{ "info": { "author": "ffteen", "author_email": "ffteen@qq.com", "bugtrack_url": null, "classifiers": [], "description": "# sbcrawler\nlight weight crawler\n\n\u8f7b\u91cf\u7ea7\u7684\u722c\u866b\u6846\u67b6sbcrawler\n\n## \u5199\u8fd9\u4e2a\u6846\u67b6\u7684\u52a8\u673a\n\n1. \u5e73\u65f6\u5199\u722c\u866b\u8fc7\u7a0b\u4e2d\uff0c\u53d1\u73b0\u901a\u5e38\u4e0d\u9700\u8981\u4ec0\u4e48\u9ad8\u5927\u4e0a\u7684\u5f02\u6b65\u3001\u5e76\u53d1\u3001\u5206\u5e03\u5f0f\u7b49\u529f\u80fd\u3002\n2. \u5c0f\u9700\u6c42\u5bf9\u9632\u6b62\u88ab\u5c01\uff0c\u4e2d\u65ad\u7ee7\u7eed\uff0c\u65e5\u5fd7\u8fdb\u5ea6\u7b49\u65b9\u9762\u6709\u66f4\u591a\u91cd\u590d\u6027\u7684\u4ee3\u7801\u3002\n3. sbcrawler\u5c31\u662f\u5b9e\u73b0\u4e00\u4e2a\u6700\u7b80\u5355\u7684\u722c\u866b\u6846\u67b6\uff0c\u8ba9\u4f60\u53ef\u4ee5\u4e13\u6ce8\u4e8e\u5199\u5185\u5bb9\u62bd\u53d6\u903b\u8f91\u3002\n\n## \u7279\u70b9\n- \u5355\u8fdb\u7a0b\uff0c\u975e\u5f02\u6b65\n- \u65ad\u70b9\u7eed\u722c\n- \u9519\u8bef\u65e5\u5fd7\u8bb0\u5f55\n\n## \u7528\u6cd5\u793a\u4f8b\n\n```python\n# -*- coding: utf-8 -*-\n\nfrom sbcrawler import Crawler\n\nclass MyCrawlerExample(Crawler):\n\n start_url = \"https://xxx.xxx.com/xxx/\" # \u8d77\u59cb\u79cd\u5b50url\n\n allowed_domain = \"https://xxx.xxx.com/\" # \u9650\u5236\u57df,\u8981\u5e26http\n\n def extract_links(self, html, task):\n # \u62bd\u53d6\u94fe\u63a5 \u52a0\u5230\u722c\u53d6\u4efb\u52a1\u5217\u8868\n if task.depth == 0 or task.depth == 1:\n html = html.find('.module_summary', first=True)\n if task.depth == 2:\n html = html.find(\"#in_list_main > table > tr:nth-child(6)\", first=True)\n if task.depth == 3:\n return\n\n super().extract_links(html, task)\n\n def extract_content(self, html, task):\n if task.depth == 3:\n title = html.find('#title', first=True)\n article = html.find('#article', first=True)\n return {'title': title.full_text, 'article': article.full_text}\n\n\nif __name__ == \"__main__\":\n crawler = MyCrawlerExample()\n crawler.start()\n\n```\n\n\n## \u5b89\u88c5\n```\npip install git+https://github.com/ffteen/sbcrawler.git\n```\n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/ffteen/sbcrawler", "keywords": "", "license": "", "maintainer": "", "maintainer_email": "", "name": "sbcrawler", "package_url": "https://pypi.org/project/sbcrawler/", "platform": "", "project_url": "https://pypi.org/project/sbcrawler/", "project_urls": { "Homepage": "https://github.com/ffteen/sbcrawler" }, "release_url": "https://pypi.org/project/sbcrawler/0.1.2/", "requires_dist": [ "tornado", "requests", "requests-html" ], "requires_python": "", "summary": "A light weight crawler", "version": "0.1.2" }, "last_serial": 5327364, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "b84b17eb4172b655e6199413413e5e4a", "sha256": "276caae9d1b4dac332b1cbd3ffe7af2262a40183ee841acc26ef83e915dcc4e4" }, "downloads": -1, "filename": "sbcrawler-0.1.0-py3-none-any.whl", "has_sig": false, "md5_digest": "b84b17eb4172b655e6199413413e5e4a", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 6744, "upload_time": "2019-05-26T13:58:46", "url": "https://files.pythonhosted.org/packages/81/3f/1e83210983771bfba9619c3a4da666429bb300c219551e74612e1eb93746/sbcrawler-0.1.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "6e93a527a16a92848f2e0151fcf1cb4f", "sha256": "541f4e1b0b7c1591e28efe9e9ae0ccfbebc9b576014286ca9bebe7956dd07783" }, "downloads": -1, "filename": "sbcrawler-0.1.0.tar.gz", "has_sig": false, "md5_digest": "6e93a527a16a92848f2e0151fcf1cb4f", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3015, "upload_time": "2019-05-26T13:58:48", "url": "https://files.pythonhosted.org/packages/f8/51/59e0a99a70c81b0da009ce2624c8593ec08e6ba83647a89b98aaa7807e1d/sbcrawler-0.1.0.tar.gz" } ], "0.1.1": [ { "comment_text": "", "digests": { "md5": "4a6e597fa8b6fcabf55ebc0cd25f16cd", "sha256": "b70c2e743f7eed8b9e06195a016e9e72d302ed4dc226ebe46aa438f89de64592" }, "downloads": -1, "filename": "sbcrawler-0.1.1-py3-none-any.whl", "has_sig": false, "md5_digest": "4a6e597fa8b6fcabf55ebc0cd25f16cd", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7653, "upload_time": "2019-05-26T14:11:26", "url": "https://files.pythonhosted.org/packages/40/99/8b2862c2c6f7c76aa7be895b7bb01eb11f5b3ad8c64fd9c70e041d6fad89/sbcrawler-0.1.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "64b4834feb294f39adec163f6961449d", "sha256": "a6daad4d2890ab2bc89fc345348197b392bec78322b176bf63178978253df053" }, "downloads": -1, "filename": "sbcrawler-0.1.1.tar.gz", "has_sig": false, "md5_digest": "64b4834feb294f39adec163f6961449d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4177, "upload_time": "2019-05-26T14:11:28", "url": "https://files.pythonhosted.org/packages/3d/79/09ae99ce9aa1c944b15d6ddf6f852a4bddf029d308ad2c83d36bf25f3d23/sbcrawler-0.1.1.tar.gz" } ], "0.1.2": [ { "comment_text": "", "digests": { "md5": "37caaa588588fe7fd3d865091f1f6fb0", "sha256": "b821b8d405af909d444f7d630b6ba583c3fc8d1e02889789619c3539a7907e77" }, "downloads": -1, "filename": "sbcrawler-0.1.2-py3-none-any.whl", "has_sig": false, "md5_digest": "37caaa588588fe7fd3d865091f1f6fb0", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7659, "upload_time": "2019-05-28T15:36:25", "url": "https://files.pythonhosted.org/packages/ca/b5/084b7398b05b699aa4fcbc1f6cf3d19212c50e4e65667ecc04511f6895ef/sbcrawler-0.1.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "dd7f51905e159c5603b422086e4d153e", "sha256": "6d46bd4b081e1723c9953603481e26a550d263450f0a5fbf35f2a591453a9471" }, "downloads": -1, "filename": "sbcrawler-0.1.2.tar.gz", "has_sig": false, "md5_digest": "dd7f51905e159c5603b422086e4d153e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4190, "upload_time": "2019-05-28T15:36:28", "url": "https://files.pythonhosted.org/packages/95/d6/504e5efa4b7859a562543b64953185ae90f960b9c33440887752c7853ad3/sbcrawler-0.1.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "37caaa588588fe7fd3d865091f1f6fb0", "sha256": "b821b8d405af909d444f7d630b6ba583c3fc8d1e02889789619c3539a7907e77" }, "downloads": -1, "filename": "sbcrawler-0.1.2-py3-none-any.whl", "has_sig": false, "md5_digest": "37caaa588588fe7fd3d865091f1f6fb0", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7659, "upload_time": "2019-05-28T15:36:25", "url": "https://files.pythonhosted.org/packages/ca/b5/084b7398b05b699aa4fcbc1f6cf3d19212c50e4e65667ecc04511f6895ef/sbcrawler-0.1.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "dd7f51905e159c5603b422086e4d153e", "sha256": "6d46bd4b081e1723c9953603481e26a550d263450f0a5fbf35f2a591453a9471" }, "downloads": -1, "filename": "sbcrawler-0.1.2.tar.gz", "has_sig": false, "md5_digest": "dd7f51905e159c5603b422086e4d153e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4190, "upload_time": "2019-05-28T15:36:28", "url": "https://files.pythonhosted.org/packages/95/d6/504e5efa4b7859a562543b64953185ae90f960b9c33440887752c7853ad3/sbcrawler-0.1.2.tar.gz" } ] }