{ "info": { "author": "Scrapinghub", "author_email": "info@scrapinghub.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5" ], "description": "=================\nscrapy-deltafetch\n=================\n\n.. image:: https://travis-ci.org/scrapy-plugins/scrapy-deltafetch.svg?branch=master\n :target: https://travis-ci.org/scrapy-plugins/scrapy-deltafetch\n\n.. image:: https://codecov.io/gh/scrapy-plugins/scrapy-deltafetch/branch/master/graph/badge.svg\n :target: https://codecov.io/gh/scrapy-plugins/scrapy-deltafetch\n\nThis is a Scrapy spider middleware to ignore requests\nto pages containing items seen in previous crawls of the same spider,\nthus producing a \"delta crawl\" containing only new items.\n\nThis also speeds up the crawl, by reducing the number of requests that need\nto be crawled, and processed (typically, item requests are the most CPU\nintensive).\n\nRequirements\n============\n\nDeltaFetch middleware depends on Python's bsddb3_ package.\n\nOn Ubuntu/Debian, you may need to install ``libdb-dev`` if it's not installed already.\n\n.. _bsddb3: https://pypi.python.org/pypi/bsddb3\n\n\nInstallation\n============\n\nInstall scrapy-deltafetch using ``pip``::\n\n $ pip install scrapy-deltafetch\n\n\nConfiguration\n=============\n\n1. Add DeltaFetch middleware by including it in ``SPIDER_MIDDLEWARES``\n in your ``settings.py`` file::\n\n SPIDER_MIDDLEWARES = {\n 'scrapy_deltafetch.DeltaFetch': 100,\n }\n\n Here, priority ``100`` is just an example.\n Set its value depending on other middlewares you may have enabled already.\n\n2. Enable the middleware using ``DELTAFETCH_ENABLED`` in your ``setting.py``::\n\n DELTAFETCH_ENABLED = True\n\n\nUsage\n=====\n\nFollowing are the different options to control DeltaFetch middleware\nbehavior.\n\nSupported Scrapy settings\n-------------------------\n\n* ``DELTAFETCH_ENABLED`` \u2014 to enable (or disable) this extension\n* ``DELTAFETCH_DIR`` \u2014 directory where to store state\n* ``DELTAFETCH_RESET`` \u2014 reset the state, clearing out all seen requests\n\nThese usually go in your Scrapy project's ``settings.py``.\n\n\nSupported Scrapy spider arguments\n---------------------------------\n\n* ``deltafetch_reset`` \u2014 same effect as DELTAFETCH_RESET setting\n\nExample::\n\n $ scrapy crawl example -a deltafetch_reset=1\n\n\nSupported Scrapy request meta keys\n----------------------------------\n\n* ``deltafetch_key`` \u2014 used to define the lookup key for that request. by\n default it's Scrapy's default Request fingerprint function,\n but it can be changed to contain an item id, for example.\n This requires support from the spider, but makes the extension\n more efficient for sites that many URLs for the same item.\n\n\n", "description_content_type": null, "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "http://github.com/scrapy-plugins/scrapy-deltafetch", "keywords": "", "license": "BSD", "maintainer": "", "maintainer_email": "", "name": "scrapy-deltafetch", "package_url": "https://pypi.org/project/scrapy-deltafetch/", "platform": "Any", "project_url": "https://pypi.org/project/scrapy-deltafetch/", "project_urls": { "Homepage": "http://github.com/scrapy-plugins/scrapy-deltafetch" }, "release_url": "https://pypi.org/project/scrapy-deltafetch/1.2.1/", "requires_dist": [ "Scrapy (>=1.1.0)", "bsddb3" ], "requires_python": "", "summary": "Scrapy middleware to ignore previously crawled pages", "version": "1.2.1" }, "last_serial": 2631608, "releases": { "0.9.1": [ { "comment_text": "", "digests": { "md5": "6efb0feb13dbf37403df860f4dd6b211", "sha256": "cb5d3aa6002e53f83c297da72151bfa649befdf1769a908a1575a95983ac5ffe" }, "downloads": -1, "filename": "scrapy_deltafetch-0.9.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "6efb0feb13dbf37403df860f4dd6b211", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 3867, "upload_time": "2016-06-27T16:45:03", "url": "https://files.pythonhosted.org/packages/b2/79/adaa11c3cb93f8c8cc76a6378126c0438e4c370c669dd67221e90ea3338d/scrapy_deltafetch-0.9.1-py2.py3-none-any.whl" } ], "0.9.2": [ { "comment_text": "", "digests": { "md5": "dfa5222eb96170fc33062babae70a0fd", "sha256": "2ad6c6cf806dc81d492396e6f51ac53a4d7085199798cb26e7a3e7bbbcdd743c" }, "downloads": -1, "filename": "scrapy_deltafetch-0.9.2-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "dfa5222eb96170fc33062babae70a0fd", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 3862, "upload_time": "2016-06-27T16:47:02", "url": "https://files.pythonhosted.org/packages/23/6e/10e64edb1b68a1973cc4838a95b144488fd402e32ce7a5ecd4c5b28a114c/scrapy_deltafetch-0.9.2-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "2e76bcec5039f81b1dc7d16c0a493d48", "sha256": "2ad8c2c049c8948e99596116c50fa4ad93e504ad772ac0768db66033e7f36fce" }, "downloads": -1, "filename": "scrapy-deltafetch-0.9.2.tar.gz", "has_sig": false, "md5_digest": "2e76bcec5039f81b1dc7d16c0a493d48", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3160, "upload_time": "2016-06-27T16:37:05", "url": "https://files.pythonhosted.org/packages/b3/55/d2ce7c1e6e2412265f5f8092673904fdd16ebd86b3a78412bfa75d75a025/scrapy-deltafetch-0.9.2.tar.gz" } ], "1.0.0": [ { "comment_text": "", "digests": { "md5": "335e57d33b2b75f9cfbbb7938739fb50", "sha256": "bc7bb30ada4264282e8572dc8c3e64d066fa075fba75ff58913fd4511c74e515" }, "downloads": -1, "filename": "scrapy_deltafetch-1.0.0-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "335e57d33b2b75f9cfbbb7938739fb50", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 3861, "upload_time": "2016-06-27T16:58:25", "url": "https://files.pythonhosted.org/packages/cd/e2/eb0a1c1535b34a87b9de5fb028bd7661198a9b28fbd4a94237e3fbb4baaf/scrapy_deltafetch-1.0.0-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "4b3cd8789e3e95f81d067a8a480c3b0d", "sha256": "1c901ae261fff68f69311919f3b62293ffc299d54d54bae47bbc70fb3aa17d7f" }, "downloads": -1, "filename": "scrapy-deltafetch-1.0.0.tar.gz", "has_sig": false, "md5_digest": "4b3cd8789e3e95f81d067a8a480c3b0d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3163, "upload_time": "2016-06-27T16:53:27", "url": "https://files.pythonhosted.org/packages/85/54/dfe30be88b55f99a1834740102fe0ea160255a2918d6776d164f7055fd51/scrapy-deltafetch-1.0.0.tar.gz" } ], "1.0.1": [ { "comment_text": "", "digests": { "md5": "ca26399fe240ab4ab07b68bca522b136", "sha256": "50b8ec61da52da5204255d929a780bc2203336305a59d721ed53a6cd0f5abcfb" }, "downloads": -1, "filename": "scrapy_deltafetch-1.0.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "ca26399fe240ab4ab07b68bca522b136", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 3868, "upload_time": "2016-06-27T17:21:10", "url": "https://files.pythonhosted.org/packages/8e/ff/28fb27053efb11c66540c1d5d334946eb7eb272b83aee2f63b9028ae2dfc/scrapy_deltafetch-1.0.1-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "df3f772f9b5402a811ae89ba4dbc734c", "sha256": "c9fe002622e86bd3b513f5db774475b1681786fe6c35294b29e6a8aeb1fd76a3" }, "downloads": -1, "filename": "scrapy-deltafetch-1.0.1.tar.gz", "has_sig": false, "md5_digest": "df3f772f9b5402a811ae89ba4dbc734c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3162, "upload_time": "2016-06-27T17:18:20", "url": "https://files.pythonhosted.org/packages/84/a5/747d1bb92bd4a160ec065416210c4049878d288b4518ffe07ef3f5f75825/scrapy-deltafetch-1.0.1.tar.gz" } ], "1.1.0": [ { "comment_text": "", "digests": { "md5": "17c737653446df48757e141c7258666b", "sha256": "b8dae335a1c056879b1e8204a225f00907233abb4cd3f61653cd79c5e62171ca" }, "downloads": -1, "filename": "scrapy_deltafetch-1.1.0-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "17c737653446df48757e141c7258666b", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 3870, "upload_time": "2016-06-29T20:28:27", "url": "https://files.pythonhosted.org/packages/1d/df/af2fe63eb3a44c65034539527d1a4989a93b962ba5ccca0fbb33b69ba324/scrapy_deltafetch-1.1.0-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "67dd2087fb441027cea23061a6cfabf8", "sha256": "8f2161a94175d06200d26061437fcc6ad1c65d84c3e8d9fcdbc522b6cb6935d3" }, "downloads": -1, "filename": "scrapy-deltafetch-1.1.0.tar.gz", "has_sig": false, "md5_digest": "67dd2087fb441027cea23061a6cfabf8", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3166, "upload_time": "2016-06-29T20:28:30", "url": "https://files.pythonhosted.org/packages/be/14/9524294b65a1582c128ddbb2fcc64102c492ee98abaf98fb174005692345/scrapy-deltafetch-1.1.0.tar.gz" } ], "1.2.0": [ { "comment_text": "", "digests": { "md5": "b96dd1ecc3ef5ac3cb234f62f6891e62", "sha256": "25b8ff108ff97a3b738cc905c6595ca8ca02fe705901f11ceb1c7024ed54c1f2" }, "downloads": -1, "filename": "scrapy_deltafetch-1.2.0-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "b96dd1ecc3ef5ac3cb234f62f6891e62", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 3893, "upload_time": "2016-12-07T10:28:19", "url": "https://files.pythonhosted.org/packages/e6/65/b9948295a0e4c080dd0d368a85372d25f47fef8ba53d9f0745134b37def6/scrapy_deltafetch-1.2.0-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "4c0941fc731b6a653154d8c97edca834", "sha256": "052666fc57e9f8a4a8d200351cb7920d577e0326fb91bae00b023be7b16e7861" }, "downloads": -1, "filename": "scrapy-deltafetch-1.2.0.tar.gz", "has_sig": false, "md5_digest": "4c0941fc731b6a653154d8c97edca834", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3181, "upload_time": "2016-12-07T10:28:21", "url": "https://files.pythonhosted.org/packages/3d/b3/cbadfaf57b32a5b0f8b8f81ac0435e004fe8a89768e0d2e96a9658cff164/scrapy-deltafetch-1.2.0.tar.gz" } ], "1.2.1": [ { "comment_text": "", "digests": { "md5": "1c28362a58b5aa50d7499915ffe91e20", "sha256": "8d45da18b415f7c0147c9dbe3cd7b3a9023eef64ac6f49f6a8d5c571f32f5ad8" }, "downloads": -1, "filename": "scrapy_deltafetch-1.2.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "1c28362a58b5aa50d7499915ffe91e20", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 3895, "upload_time": "2017-02-09T19:50:56", "url": "https://files.pythonhosted.org/packages/90/81/08bd21bc3ee364845d76adef09d20d85d75851c582a2e0bb7f959d49b8e5/scrapy_deltafetch-1.2.1-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "93361130614d9dac5ecc900241570f7c", "sha256": "08bb8156d2e6bbdf9f0aca749f4624fff89f1135c35b91c8bb8aeed6f40da1d4" }, "downloads": -1, "filename": "scrapy-deltafetch-1.2.1.tar.gz", "has_sig": false, "md5_digest": "93361130614d9dac5ecc900241570f7c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3176, "upload_time": "2017-02-09T19:50:58", "url": "https://files.pythonhosted.org/packages/45/12/a564137c81bfcf3145044d4e5854a053dbe7805f19ce3bc36ee91f6c617e/scrapy-deltafetch-1.2.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "1c28362a58b5aa50d7499915ffe91e20", "sha256": "8d45da18b415f7c0147c9dbe3cd7b3a9023eef64ac6f49f6a8d5c571f32f5ad8" }, "downloads": -1, "filename": "scrapy_deltafetch-1.2.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "1c28362a58b5aa50d7499915ffe91e20", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 3895, "upload_time": "2017-02-09T19:50:56", "url": "https://files.pythonhosted.org/packages/90/81/08bd21bc3ee364845d76adef09d20d85d75851c582a2e0bb7f959d49b8e5/scrapy_deltafetch-1.2.1-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "93361130614d9dac5ecc900241570f7c", "sha256": "08bb8156d2e6bbdf9f0aca749f4624fff89f1135c35b91c8bb8aeed6f40da1d4" }, "downloads": -1, "filename": "scrapy-deltafetch-1.2.1.tar.gz", "has_sig": false, "md5_digest": "93361130614d9dac5ecc900241570f7c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3176, "upload_time": "2017-02-09T19:50:58", "url": "https://files.pythonhosted.org/packages/45/12/a564137c81bfcf3145044d4e5854a053dbe7805f19ce3bc36ee91f6c617e/scrapy-deltafetch-1.2.1.tar.gz" } ] }