{ "info": { "author": "Gregory Petukhov", "author_email": "lorien@lorien.name", "bugtrack_url": null, "classifiers": [ "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: Implementation :: CPython", "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Application Frameworks", "Topic :: Software Development :: Libraries :: Python Modules" ], "description": "=======\nCrawler\n=======\n\n.. image:: https://travis-ci.org/lorien/crawler.png?branch=master\n :target: https://travis-ci.org/lorien/crawler\n\n.. image:: https://coveralls.io/repos/lorien/crawler/badge.svg?branch=master\n :target: https://coveralls.io/r/lorien/crawler?branch=master\n\n.. image:: https://pypip.in/download/crawler/badge.svg?period=month\n :target: https://pypi.python.org/pypi/crawler\n\n.. image:: https://pypip.in/version/crawler/badge.svg\n :target: https://pypi.python.org/pypi/crawler\n\n.. image:: https://landscape.io/github/lorien/crawler/master/landscape.png\n :target: https://landscape.io/github/lorien/crawler/master\n\nWeb scraping framework based on py3 asyncio & aiohttp libraries.\n\n\nUsage Example\n=============\n\n.. code:: python\n\n import re\n from itertools import islice\n\n from crawler import Crawler, Request\n\n RE_TITLE = re.compile(r'([^<]+)', re.S | re.I)\n\n class TestCrawler(Crawler):\n def task_generator(self):\n for host in islice(open('var/domains.txt'), 100):\n host = host.strip()\n if host:\n yield Request('http://%s/' % host, tag='page')\n\n def handler_page(self, req, res):\n print('Result of request to {}'.format(req.url))\n try:\n title = RE_TITLE.search(res.body).group(1)\n except AttributeError:\n title = 'N/A'\n print('Title: {}'.format(title))\n\n bot = TestCrawler(concurrency=10)\n bot.run()\n\n\nInstallation\n============\n\n.. code:: bash\n\n pip install crawler\n\n\nDependencies\n============\n\n* Python>=3.4\n* aiohttp", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "UNKNOWN", "keywords": null, "license": "MIT", "maintainer": null, "maintainer_email": null, "name": "crawler", "package_url": "https://pypi.org/project/crawler/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/crawler/", "project_urls": { "Download": "UNKNOWN", "Homepage": "UNKNOWN" }, "release_url": "https://pypi.org/project/crawler/0.0.2/", "requires_dist": null, "requires_python": null, "summary": "Web Scraping Framework based on py3 asyncio", "version": "0.0.2" }, "last_serial": 2168271, "releases": { "0.0.2": [ { "comment_text": "", "digests": { "md5": "272f2a88e1376ac09f2d310405ff2bb8", "sha256": "b6b5bcc2f2a64ac60251bee1494bd7ea98605ef1a8bf87db5194bea4bdd420d2" }, "downloads": -1, "filename": "crawler-0.0.2.tar.gz", "has_sig": false, "md5_digest": "272f2a88e1376ac09f2d310405ff2bb8", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6028, "upload_time": "2016-06-15T09:47:49", "url": "https://files.pythonhosted.org/packages/8d/42/2b042beebf63f6d490d38b698f06ee4fdd16a1d32fa2373a6b662a37a33d/crawler-0.0.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "272f2a88e1376ac09f2d310405ff2bb8", "sha256": "b6b5bcc2f2a64ac60251bee1494bd7ea98605ef1a8bf87db5194bea4bdd420d2" }, "downloads": -1, "filename": "crawler-0.0.2.tar.gz", "has_sig": false, "md5_digest": "272f2a88e1376ac09f2d310405ff2bb8", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6028, "upload_time": "2016-06-15T09:47:49", "url": "https://files.pythonhosted.org/packages/8d/42/2b042beebf63f6d490d38b698f06ee4fdd16a1d32fa2373a6b662a37a33d/crawler-0.0.2.tar.gz" } ] }