{ "info": { "author": "teitei-tk", "author_email": "teitei.tk@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 3 - Alpha", "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", "Topic :: Software Development", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules" ], "description": "IcePick\n===================\n\nIcePick is a All in one Package library for easy Scraping\n\n--------------\n\nConcept\n-------\n\n- Lightweight Scraping Library\n- All in one Package library for easy Scraping\n\nRequirements\n------------\n\n- Python 3.4 or later(not support 2.x)\n- MongoDB\n\nDependencies Libraries\n----------------------\n\n- aiohttp\n- beautifulsoup4\n- pymongo >= 3.0\n- nose\n\nUsage\n-----\n\nScraping Flow,\n\n::\n\n Your Scraping Order(Order) -> Do Scraping(Picker) -> HTML Parse(Parser) -> Save in Database(Recorder)\n\nExample\n-------\n\nget a my repository filenames\n\n.. code:: python\n\n\n import icePick\n\n db = icePick.get_database('icePick_example', 'localhost')\n\n\n class GithubRepoParser(icePick.Parser):\n def serialize(self):\n result = {\n \"files\": [],\n }\n\n for v in self.bs.find_all(class_=\"js-directory-link\"):\n result['files'] += [v.text]\n return result\n\n\n class GithubRepoRecorder(icePick.Recorder):\n struct = icePick.Structure(files=list())\n\n class Meta:\n database = db\n\n\n class GithubRepoOrder(icePick.Order):\n recorder = GithubRepoRecorder\n parser = GithubRepoParser\n\n\n def main():\n document = {\n 'url': 'https://github.com/teitei-tk/ice-pick/tree/master',\n 'ua': 'Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',\n }\n\n print('---download start---')\n order = GithubRepoOrder(document.get('url'), document.get('ua'))\n picker = icePick.Picker([order])\n picker.run()\n print(\"---finish---\")\n\n if __name__ == \"__main__\":\n main()\n\n::\n\n >>> import icePick\n >>> db = icePick.get_database('icePick_example', 'localhost')\n >>> class GithubRepoRecorder(icePick.Recorder):\n ... struct = icePick.Structure(files=list())\n ... class Meta:\n ... database = db\n ...\n >>> records = GithubRepoRecorder.find()\n >>> records[0].files\n ['example', 'icePick', 'tests', 'LICENSE', 'README.md', 'circle.yml', 'requirements.txt']\n >>>\n\nTODO\n----\n\n- Crawling\n- Document\n\nLICENSE\n-------\n\n- MIT", "description_content_type": null, "docs_url": null, "download_url": "https://github.com/teitei-tk/ice-pick/archive/master.tar.gz", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/teitei-tk/ice-pick", "keywords": "scraping", "license": "MIT", "maintainer": null, "maintainer_email": null, "name": "icePick", "package_url": "https://pypi.org/project/icePick/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/icePick/", "project_urls": { "Download": "https://github.com/teitei-tk/ice-pick/archive/master.tar.gz", "Homepage": "https://github.com/teitei-tk/ice-pick" }, "release_url": "https://pypi.org/project/icePick/0.0.5/", "requires_dist": null, "requires_python": null, "summary": "icePick is a All in one Package library for easy Scraping", "version": "0.0.5" }, "last_serial": 1619475, "releases": { "0.0.1": [ { "comment_text": "", "digests": { "md5": "958bafa3b0439bafa2c7b962fe8557c3", "sha256": "95a629c4e14bf00ea329ab2ed7fab4b2642c1fcf9de0d40fbb0b509f7e7e8819" }, "downloads": -1, "filename": "icePick-0.0.1.tar.gz", "has_sig": false, "md5_digest": "958bafa3b0439bafa2c7b962fe8557c3", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6679, "upload_time": "2015-04-24T17:05:34", "url": "https://files.pythonhosted.org/packages/ff/6c/ea9d489a8dc4f3c41d11e37ad05420116a59b0e7cbd90b4a1dbc2d69489b/icePick-0.0.1.tar.gz" } ], "0.0.2": [], "0.0.3": [ { "comment_text": "", "digests": { "md5": "609951c0d60ac17381ec1c57692cd11c", "sha256": "1995e0437b730972e66775dafeaccfe433045b0164f1e8bf7a5be2ef3c43d0d7" }, "downloads": -1, "filename": "icePick-0.0.3.tar.gz", "has_sig": false, "md5_digest": "609951c0d60ac17381ec1c57692cd11c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6278, "upload_time": "2015-06-29T14:47:54", "url": "https://files.pythonhosted.org/packages/1a/ec/168a9bc7d32960b4918f7fe6cf36ba40d26dfe532926b5ee096482f83367/icePick-0.0.3.tar.gz" } ], "0.0.3.1": [ { "comment_text": "", "digests": { "md5": "724cc4edc2904c6fbd90bb54367b17ad", "sha256": "2b6c23714d077e93c66d1d9576f95a5710f296cf1108bd2b8ecaa1fd66be87cf" }, "downloads": -1, "filename": "icePick-0.0.3.1.tar.gz", "has_sig": false, "md5_digest": "724cc4edc2904c6fbd90bb54367b17ad", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6452, "upload_time": "2015-06-29T14:52:12", "url": "https://files.pythonhosted.org/packages/d0/21/09952e3b7e9b827a13288d394679e7556fdb5a1528a69c48c068496b372a/icePick-0.0.3.1.tar.gz" } ], "0.0.4": [ { "comment_text": "", "digests": { "md5": "5f76a8deae00956b2bd1c772e47245cf", "sha256": "33768f142b9a27b67d8345cc16b81eaf23e8e8dd39bfaf06faecb8d683769dc6" }, "downloads": -1, "filename": "icePick-0.0.4.tar.gz", "has_sig": false, "md5_digest": "5f76a8deae00956b2bd1c772e47245cf", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6421, "upload_time": "2015-06-30T15:12:12", "url": "https://files.pythonhosted.org/packages/3a/28/b86902f881d9e0bee8f92b7acf00ede5c6f837fffae610ceb47d6d16da46/icePick-0.0.4.tar.gz" } ], "0.0.4.1": [ { "comment_text": "", "digests": { "md5": "3ac28c9b872b69ae61c1fdbdbbebe76f", "sha256": "ba194d238a8ff21cbca9dbb2006f7c71f1cf8781cca9ba0c314adc1637f01d43" }, "downloads": -1, "filename": "icePick-0.0.4.1.tar.gz", "has_sig": false, "md5_digest": "3ac28c9b872b69ae61c1fdbdbbebe76f", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6425, "upload_time": "2015-06-30T15:20:13", "url": "https://files.pythonhosted.org/packages/5e/65/d37842a15a036f164e2712b1ebc98aae0605695481dc0823959ac389bfda/icePick-0.0.4.1.tar.gz" } ], "0.0.5": [ { "comment_text": "", "digests": { "md5": "a9d0a7cb3c7ecb8564415d4c114b5407", "sha256": "af0a13832add144af80b231ed0ed7232aa5ca7b2d2f2b317cb2aa0e8ecab154b" }, "downloads": -1, "filename": "icePick-0.0.5.tar.gz", "has_sig": false, "md5_digest": "a9d0a7cb3c7ecb8564415d4c114b5407", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6541, "upload_time": "2015-07-04T18:03:45", "url": "https://files.pythonhosted.org/packages/6d/d5/081c9f31bad2775a4b7cf7c9921ae0491df0c7ed7cf50830e4033feca89e/icePick-0.0.5.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "a9d0a7cb3c7ecb8564415d4c114b5407", "sha256": "af0a13832add144af80b231ed0ed7232aa5ca7b2d2f2b317cb2aa0e8ecab154b" }, "downloads": -1, "filename": "icePick-0.0.5.tar.gz", "has_sig": false, "md5_digest": "a9d0a7cb3c7ecb8564415d4c114b5407", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6541, "upload_time": "2015-07-04T18:03:45", "url": "https://files.pythonhosted.org/packages/6d/d5/081c9f31bad2775a4b7cf7c9921ae0491df0c7ed7cf50830e4033feca89e/icePick-0.0.5.tar.gz" } ] }