{ "info": { "author": "Scrapinghub", "author_email": "info@scrapinghub.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: ISC License (ISCL)", "Natural Language :: English", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5" ], "description": "===============================================\nscrapy-mosquitera - tools for filtered scraping\n===============================================\n\n\n.. image:: https://travis-ci.org/scrapinghub/scrapy-mosquitera.svg?branch=master\n :target: https://travis-ci.org/scrapinghub/scrapy-mosquitera\n\n.. image:: https://img.shields.io/pypi/v/scrapy-mosquitera.svg?maxAge=2592000\n :target: https://pypi.python.org/pypi/scrapy-mosquitera\n\n.. image:: https://img.shields.io/pypi/pyversions/scrapy-mosquitera.svg?maxAge=2592000\n\n.. image:: https://img.shields.io/pypi/l/scrapy-mosquitera.svg?maxAge=2592000\n\n\n\n.. epigraph::\n\n How can I scrape items off a site from the last five days?\n\n -- Scrapy User\n\n\nThat question started the development of **scrapy-mosquitera**, a tool to help\nyou restrict crawling and scraping scope using *matchers*.\n\nMatchers are simple Python functions that return the validity of an element\nunder certain restrictions.\n\nThe first goal in the project was date matching, but you can create your own\nmatcher for your own crawling and scraping needs.\n\n\nHow it works\n============\n\nIn the case where the dates are available in the URLs, you will just use\nthe matcher function directly in your code::\n\n\n from scrapy_mosquitera.matchers import date_matches\n\n date = scrape_date_from_url(url)\n\n if date_matches(data=date, after='5 days ago'):\n yield Request(url=url, callback=self.parse_item)\n\n\nTo handle the case when the date is only available at the time when you scrape\nthe items, **scrapy-mosquitera** provides a ``PaginationMixin`` to control the\ncrawl according to the dates scraped.\n\nHead on to the remaining of the `documentation`_ for more details.\n\n.. _documentation: http://scrapy-mosquitera.readthedocs.io\n\n\nInstallation\n============\n\nThe quick way::\n\n pip install scrapy-mosquitera", "description_content_type": null, "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/scrapinghub/scrapy-mosquitera", "keywords": "mosquitera,scrapy-mosquitera", "license": "BSD", "maintainer": "", "maintainer_email": "", "name": "scrapy-mosquitera", "package_url": "https://pypi.org/project/scrapy-mosquitera/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/scrapy-mosquitera/", "project_urls": { "Homepage": "https://github.com/scrapinghub/scrapy-mosquitera" }, "release_url": "https://pypi.org/project/scrapy-mosquitera/0.1.1/", "requires_dist": [ "scrapy (>=1.1.0rc3)", "six", "PyDispatcher (>=2.0.5)", "dateparser" ], "requires_python": "", "summary": "Restrict crawl and scraping scope using matchers.", "version": "0.1.1" }, "last_serial": 2124333, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "66a840e9a3dee2130fc1efd557c580cf", "sha256": "05b7d6c918db3c834c57d6bde654a17726be83c2ba08980a91172f2fe4bd44b8" }, "downloads": -1, "filename": "scrapy_mosquitera-0.1.0-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "66a840e9a3dee2130fc1efd557c580cf", "packagetype": "bdist_wheel", "python_version": "3.5", "requires_python": null, "size": 8400, "upload_time": "2016-05-10T20:00:30", "url": "https://files.pythonhosted.org/packages/67/8c/2247293774befbdb7e682bcfdc11bfb77f33f2d8009a32ec46f43881cdf9/scrapy_mosquitera-0.1.0-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "c8cb064c164c998e55df12f58b189740", "sha256": "4297b2ef89c387b4e5f31179e0a6e0493a1d5bbe013d0b96917c1cf0927a18b5" }, "downloads": -1, "filename": "scrapy-mosquitera-0.1.0.tar.gz", "has_sig": false, "md5_digest": "c8cb064c164c998e55df12f58b189740", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 18169, "upload_time": "2016-05-10T20:00:44", "url": "https://files.pythonhosted.org/packages/c5/bb/d1c8fa3e2aecd8e78e96b490a430699054f6e7a778c502a80176af9dfabe/scrapy-mosquitera-0.1.0.tar.gz" } ], "0.1.1": [ { "comment_text": "", "digests": { "md5": "d8201af7533690b9db7bb70ceb3b1e8f", "sha256": "92472f527dfb33efcc6733641de622c0537b71ee89a14111fb651c8f6c4d2a70" }, "downloads": -1, "filename": "scrapy_mosquitera-0.1.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "d8201af7533690b9db7bb70ceb3b1e8f", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 8702, "upload_time": "2016-05-19T21:04:01", "url": "https://files.pythonhosted.org/packages/b0/79/d188e5de92c8699480fa464867982c50d0728408e54be275c14524a1aec3/scrapy_mosquitera-0.1.1-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "e7d52f82e90ad06f0b882db4c1d9db1a", "sha256": "2ba3752240999a9111851b0cd0e4d31e3f073cbd241bd7afcc64db420d0b62b7" }, "downloads": -1, "filename": "scrapy-mosquitera-0.1.1.tar.gz", "has_sig": false, "md5_digest": "e7d52f82e90ad06f0b882db4c1d9db1a", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 18356, "upload_time": "2016-05-19T21:04:29", "url": "https://files.pythonhosted.org/packages/0b/6d/4edc4532bc7181299cbee894b460d44b0b26d57ce09fce637077683735ad/scrapy-mosquitera-0.1.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "d8201af7533690b9db7bb70ceb3b1e8f", "sha256": "92472f527dfb33efcc6733641de622c0537b71ee89a14111fb651c8f6c4d2a70" }, "downloads": -1, "filename": "scrapy_mosquitera-0.1.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "d8201af7533690b9db7bb70ceb3b1e8f", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 8702, "upload_time": "2016-05-19T21:04:01", "url": "https://files.pythonhosted.org/packages/b0/79/d188e5de92c8699480fa464867982c50d0728408e54be275c14524a1aec3/scrapy_mosquitera-0.1.1-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "e7d52f82e90ad06f0b882db4c1d9db1a", "sha256": "2ba3752240999a9111851b0cd0e4d31e3f073cbd241bd7afcc64db420d0b62b7" }, "downloads": -1, "filename": "scrapy-mosquitera-0.1.1.tar.gz", "has_sig": false, "md5_digest": "e7d52f82e90ad06f0b882db4c1d9db1a", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 18356, "upload_time": "2016-05-19T21:04:29", "url": "https://files.pythonhosted.org/packages/0b/6d/4edc4532bc7181299cbee894b460d44b0b26d57ce09fce637077683735ad/scrapy-mosquitera-0.1.1.tar.gz" } ] }