{ "info": { "author": "vilame", "author_email": "opaquism@hotmail.com", "bugtrack_url": null, "classifiers": [ "Environment :: Web Environment", "Intended Audience :: Developers", "Operating System :: OS Independent" ], "description": "\nCrawling framework for storing text data via sqlite3.\n=====================================================\n\nSupport for xpath and jsonpath syntax\n\nsqlite3: table_name: some, table_col: col_0, col_1\n==================================================\n\n.. code-block:: python\n\n import vspider\n\n def some(url):\n print(url)\n x @ url\n x * '//*[contains(@class,\"c-container\")]'\n x ** 'string(./h3/a)'\n x ** 'string(./h3/a/@href)'\n\n for i in range(10):\n url = f\"https://www.baidu.com/s?wd=\u4f60\u597d&pn={i*10}\"\n some(url)\n\nsqlite3: table_name: some,some2; table1_col: title,url; table2_col: test\n========================================================================\n\n.. code-block:: python\n\n import vspider,vthread \n\n @vhread.pool(10) # By using the Vthread function library, the efficiency can be greatly improved.\n def some(url):\n print(url)\n x @ url\n # The first way of collecting is to use * as the node, ** as the\n # configuration of the content address collected under the node.\n # applicable to data of type html_table.\n x * '//*[contains(@class,\"c-container\")]'\n x ** ('title','string(./h3/a)')\n x ** ('url', 'string(./h3/a/@href)')\n\n # The second way of collecting is \"directly collecting\" by <<.\n # It is suitable for a single page to collect only one set of data\n x(\"some2\") @ url\n x << (\"test_int_\",'string(//*[@id=\"page\"]/strong/span[2])',lambda i:i[:20])\n # setting the storage type with a suffix\n # Support:\n # _double_\n # _int_\n # _integer_\n # _str_\n # _string_\n # _date_\n\n # Both ** and << both configuration functions can use tuple and list to pass parameters.\n # If the third parameter exists, it will be used as the subsequent processing function of\n # the data collected by xpath, and the processed data will be inserted into the database.\n # defualt function: lambda i:i.strip(),if set it None, do nothing.\n\n for i in range(10):\n url = f\"https://www.baidu.com/s?wd=\u4f60\u597d&pn={i*10}\"\n some(url)\n \n", "description_content_type": "", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/cilame/vspider", "keywords": "", "license": "MIT", "maintainer": "", "maintainer_email": "", "name": "vspider", "package_url": "https://pypi.org/project/vspider/", "platform": "", "project_url": "https://pypi.org/project/vspider/", "project_urls": { "Homepage": "https://github.com/cilame/vspider" }, "release_url": "https://pypi.org/project/vspider/0.0.9/", "requires_dist": null, "requires_python": "", "summary": "minimalist crawling framework.", "version": "0.0.9" }, "last_serial": 4152697, "releases": { "0.0.1": [ { "comment_text": "", "digests": { "md5": "1b180324ecbb41f20c9be34a8693ca69", "sha256": "8cbca4b048e12434d4f83a858c12730f845387a269d5d86e8ff55273728a637b" }, "downloads": -1, "filename": "vspider-0.0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "1b180324ecbb41f20c9be34a8693ca69", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 9514, "upload_time": "2018-08-02T07:02:33", "url": "https://files.pythonhosted.org/packages/65/00/504747639f829d6b28c4c5a74a288472b60f0a4e61d626faac406c9e55bf/vspider-0.0.1-py3-none-any.whl" } ], "0.0.2": [ { "comment_text": "", "digests": { "md5": "02ae90e1887ba4d08f2b5d29efbe3559", "sha256": "9759486ac83f1b42aed6f7326dba02d3bb4ab4a31d54ea5ef7382bb87c72bba2" }, "downloads": -1, "filename": "vspider-0.0.2-py3-none-any.whl", "has_sig": false, "md5_digest": "02ae90e1887ba4d08f2b5d29efbe3559", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 9511, "upload_time": "2018-08-02T07:05:39", "url": "https://files.pythonhosted.org/packages/1f/50/a0748a30033b83b554d83ffa5ce6341e2f3be34caa17a9723e40e94ee5bd/vspider-0.0.2-py3-none-any.whl" } ], "0.0.3": [ { "comment_text": "", "digests": { "md5": "c112dd085f347640bc1016817a469a19", "sha256": "fb10fe259a47a949ed3fc29a137c697f9b4643e7131d47a19102e725489f59d0" }, "downloads": -1, "filename": "vspider-0.0.3-py3-none-any.whl", "has_sig": false, "md5_digest": "c112dd085f347640bc1016817a469a19", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 11678, "upload_time": "2018-08-03T13:10:27", "url": "https://files.pythonhosted.org/packages/8b/4f/1b8771d16fd42b3e33116492254072da7e80a5a046d65454e37e60f5cf34/vspider-0.0.3-py3-none-any.whl" } ], "0.0.4": [ { "comment_text": "", "digests": { "md5": "c4b0237042b0204b5b9362ece8566c0e", "sha256": "6a809516a26e53dfcc0362fc4291e4d3bb07ac3f1a30f5ae90655cb2cc7a8d20" }, "downloads": -1, "filename": "vspider-0.0.4-py3-none-any.whl", "has_sig": false, "md5_digest": "c4b0237042b0204b5b9362ece8566c0e", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 12409, "upload_time": "2018-08-04T04:48:49", "url": "https://files.pythonhosted.org/packages/22/55/81aeb15436121d9c620e91ad3415f068f1b3309dc8a4d778a9ed94ec5cf2/vspider-0.0.4-py3-none-any.whl" } ], "0.0.6": [ { "comment_text": "", "digests": { "md5": "3a4ca872f30b0faade7d998bf8feb45f", "sha256": "a241e91bd8eac8bc36df1229e69caae5fe28335febb4d8d080476ae14053ba96" }, "downloads": -1, "filename": "vspider-0.0.6-py3-none-any.whl", "has_sig": false, "md5_digest": "3a4ca872f30b0faade7d998bf8feb45f", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 13143, "upload_time": "2018-08-04T18:26:44", "url": "https://files.pythonhosted.org/packages/94/c6/4045b8a2cd367fd9ea0b50dd216cab4dd4f7764a4434ff84c05d832638bf/vspider-0.0.6-py3-none-any.whl" } ], "0.0.7": [ { "comment_text": "", "digests": { "md5": "640efc2c93e338bb219f4ef72e7b3c71", "sha256": "38713127d3a654650b88ee4f526e6e211531c94dd4e3ae21f3544871afc093ea" }, "downloads": -1, "filename": "vspider-0.0.7-py3-none-any.whl", "has_sig": false, "md5_digest": "640efc2c93e338bb219f4ef72e7b3c71", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 13703, "upload_time": "2018-08-05T13:15:10", "url": "https://files.pythonhosted.org/packages/1b/ce/067f338fea30f4ad6ccc0a86e2b30eeaed49b0d5fb950d546beefb2f8c9b/vspider-0.0.7-py3-none-any.whl" } ], "0.0.8": [ { "comment_text": "", "digests": { "md5": "b4d8ae66fecb0af8dad769a9a4914914", "sha256": "88eeb9c90d4a9b8b9d8061791e0b42964061cf51ebca8392798fa37d86570767" }, "downloads": -1, "filename": "vspider-0.0.8-py3-none-any.whl", "has_sig": false, "md5_digest": "b4d8ae66fecb0af8dad769a9a4914914", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 13690, "upload_time": "2018-08-05T17:31:48", "url": "https://files.pythonhosted.org/packages/e5/cb/4e2e1b576a90d8abebf88e2309ffa462fc31c880b64ef1802c2fe424ce1d/vspider-0.0.8-py3-none-any.whl" } ], "0.0.9": [ { "comment_text": "", "digests": { "md5": "50fec20fe708c8b8d746e1b8342c85d3", "sha256": "20f5d36e402ab08ba48c57d322d6637f09304537b091f8066a344114350a7093" }, "downloads": -1, "filename": "vspider-0.0.9-py3-none-any.whl", "has_sig": false, "md5_digest": "50fec20fe708c8b8d746e1b8342c85d3", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 15429, "upload_time": "2018-08-09T12:28:53", "url": "https://files.pythonhosted.org/packages/9b/86/0b77d08b6d5213f7462786410f650a1fedf4c5b3961fb81b7a5c9441a68e/vspider-0.0.9-py3-none-any.whl" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "50fec20fe708c8b8d746e1b8342c85d3", "sha256": "20f5d36e402ab08ba48c57d322d6637f09304537b091f8066a344114350a7093" }, "downloads": -1, "filename": "vspider-0.0.9-py3-none-any.whl", "has_sig": false, "md5_digest": "50fec20fe708c8b8d746e1b8342c85d3", "packagetype": "bdist_wheel", "python_version": "3.6", "requires_python": null, "size": 15429, "upload_time": "2018-08-09T12:28:53", "url": "https://files.pythonhosted.org/packages/9b/86/0b77d08b6d5213f7462786410f650a1fedf4c5b3961fb81b7a5c9441a68e/vspider-0.0.9-py3-none-any.whl" } ] }