{
    "info": {
        "author": "Lucas Simpson",
        "author_email": "lucassimpson05@gmail.com",
        "bugtrack_url": null,
        "classifiers": [
            "Development Status :: 3 - Alpha",
            "Intended Audience :: Developers",
            "License :: OSI Approved :: MIT License",
            "Programming Language :: Python :: 3",
            "Topic :: Software Development"
        ],
        "description": "Concurrent Flood Scraper\n========================\n\nIt's probably exactly what you think it is, based off the name\n--------------------------------------------------------------\n\nGET a page. scrape for urls, filter those according to some regex. Put all those in a master queue. Scrape page for any data you want. Repeat...\n\nThere's a small demo in the wikipedia_demo. There you can see how easy it is to set up to fit your web scraping needs!\n\n\nSpecifics\n=========\n\n1. Create a child class of concurrentfloodscraper.Scraper and implement the scrape_page(self, text) method. text is the raw html. In this method you do the specific scraping required. Note that only urls that match the class url_filter_regex will be added to the master queue.\n\n2. Annotate your Scraper subclass with concurrentfloodscraper.Route. The single parameter is a regex; URL's that match the regex will be parsed with that scraper.\n\n3. Repeat steps 1 and 2 for as many different types of pages you expect to be scraping from.\n\n4. Create an instance of concurrentfloodscraper.ConcurrentFloodScraper, pass it the root URL, the number of threads to use, and a page limit. Page limit defaults to None, which means 'go forever'.\n\n5. Start the ConcurrentFloodScraper instance, and enjoy the magic!",
        "description_content_type": null,
        "docs_url": null,
        "download_url": "",
        "downloads": {
            "last_day": -1,
            "last_month": -1,
            "last_week": -1
        },
        "home_page": "https://github.com/LucasSimpson/ConcurrentFloodScraper",
        "keywords": "crawl crawler scrape scraper web internet",
        "license": "MIT",
        "maintainer": "",
        "maintainer_email": "",
        "name": "concurrentfloodscraper",
        "package_url": "https://pypi.org/project/concurrentfloodscraper/",
        "platform": "UNKNOWN",
        "project_url": "https://pypi.org/project/concurrentfloodscraper/",
        "project_urls": {
            "Homepage": "https://github.com/LucasSimpson/ConcurrentFloodScraper"
        },
        "release_url": "https://pypi.org/project/concurrentfloodscraper/1.0.1/",
        "requires_dist": [
            "requests"
        ],
        "requires_python": "",
        "summary": "A concurrent flood web scraper.",
        "version": "1.0.1"
    },
    "last_serial": 2642155,
    "releases": {
        "1.0.0": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "f6ff065d8c786d35016f2de1452a22f2",
                    "sha256": "717bad5c60d21f5be2a8e2d14bcb425dff1ec13760a842a018a947025fbc5f9a"
                },
                "downloads": -1,
                "filename": "concurrentfloodscraper-1.0.0-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "f6ff065d8c786d35016f2de1452a22f2",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 9640,
                "upload_time": "2017-02-14T20:49:12",
                "url": "https://files.pythonhosted.org/packages/0e/a9/59d0b305df5e6be03c5a69533e2e5707f21946757ef55b51347787541195/concurrentfloodscraper-1.0.0-py3-none-any.whl"
            },
            {
                "comment_text": "",
                "digests": {
                    "md5": "270bbb9ae6eaab4e43317df2a3b0a803",
                    "sha256": "1c6ee38f60cdc141e8d95c11fcd88e17df2d98eb453767c39a137978eaf2d698"
                },
                "downloads": -1,
                "filename": "concurrentfloodscraper-1.0.0.tar.gz",
                "has_sig": false,
                "md5_digest": "270bbb9ae6eaab4e43317df2a3b0a803",
                "packagetype": "sdist",
                "python_version": "source",
                "requires_python": null,
                "size": 6278,
                "upload_time": "2017-02-14T20:49:14",
                "url": "https://files.pythonhosted.org/packages/d4/15/786a012c9f4e0edd7955fea0114674bbaf54e29158dbb02f586bd996bffd/concurrentfloodscraper-1.0.0.tar.gz"
            }
        ],
        "1.0.1": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "30db68b52d4375893c571360abe8ad4e",
                    "sha256": "27a56763c000c81d987efc3bf82835772f0695899d088b61e434d29bf0fac8a8"
                },
                "downloads": -1,
                "filename": "concurrentfloodscraper-1.0.1-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "30db68b52d4375893c571360abe8ad4e",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 9780,
                "upload_time": "2017-02-14T20:59:20",
                "url": "https://files.pythonhosted.org/packages/e0/00/995311f710f0a7217b65cbf128a636d7d14e764a5e4883b9b5e6beb31a84/concurrentfloodscraper-1.0.1-py3-none-any.whl"
            }
        ]
    },
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "md5": "30db68b52d4375893c571360abe8ad4e",
                "sha256": "27a56763c000c81d987efc3bf82835772f0695899d088b61e434d29bf0fac8a8"
            },
            "downloads": -1,
            "filename": "concurrentfloodscraper-1.0.1-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "30db68b52d4375893c571360abe8ad4e",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 9780,
            "upload_time": "2017-02-14T20:59:20",
            "url": "https://files.pythonhosted.org/packages/e0/00/995311f710f0a7217b65cbf128a636d7d14e764a5e4883b9b5e6beb31a84/concurrentfloodscraper-1.0.1-py3-none-any.whl"
        }
    ]
}