{ "info": { "author": "Riccardo Bucco", "author_email": "riccardobucco@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3" ], "description": "# Wikipedia downloader\n*wikipedia_downloader* is a Python module that makes it easy to download Wikipedia data dumps.\n## Installation\nTo install *wikipedia_downloader*, simply run:\n```\npip install wikipedia_downloader\n```\n## Documentation\n### Functions\n- wikipedia_downloader.**download_sql_dump**(*language*, *file*, *dump=\"latest\"*, *target_dir=\".\"*)\n\n Downloads and decompresses a Wikipedia SQL dump.\n\n Arguments:\n - *language*: Wikipedia name (language code).\n - *file*: File name.\n - *dump*: Dump version.\n - *target_dir*: Target directory.\n\n #### Example\n ```Python\n import wikipedia_downloader as wpd\n wpd.download_sql_dump(\"en\", \"pagelinks\", dump=\"20190101\", target_dir=\"./dumps\")\n ```\n\n- wikipedia_downloader.**get_dataframe**(*language*, *file*, *dump=\"latest\"*, *select=None*, *where=None*)\n\n Builds a pandas.DataFrame from a Wikipedia SQL dump.\n\n Arguments:\n - *language*: Wikipedia name (language code).\n - *file*: File name.\n - *dump*: Dump version.\n - *select*: Columns to be kept.\n - *where*: Functions used to filter records.\n\n Returns: *pandas.DataFrame*\n\n #### Example\n ```python\n import wikipedia_downloader as wpd\n select = [\"page_id\", \"page_namespace\", \"page_title\"]\n where = {\"page_namespace\": lambda x: x == 0 or x == 14}\n df = wpd.get_dataframe(\"en\", \"page\", dump=\"20190101\", select=select, where=where)\n ```\n## License\nThis project is licensed under the MIT License - see the [LICENSE.md](LICENSE.md) file for details.\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/riccardobucco/wikipedia_downloader", "keywords": "wikipedia dump download", "license": "", "maintainer": "", "maintainer_email": "", "name": "wikipedia-downloader", "package_url": "https://pypi.org/project/wikipedia-downloader/", "platform": "", "project_url": "https://pypi.org/project/wikipedia-downloader/", "project_urls": { "Homepage": "https://github.com/riccardobucco/wikipedia_downloader" }, "release_url": "https://pypi.org/project/wikipedia-downloader/0.2/", "requires_dist": null, "requires_python": "", "summary": "Download Wikipedia data dumps", "version": "0.2" }, "last_serial": 4659862, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "a48d616263948b898620adfe38c3dad8", "sha256": "374cbba2175a8c47caf102b6e266c765b2068aacea08603131cc8e00ca2d3a25" }, "downloads": -1, "filename": "wikipedia_downloader-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "a48d616263948b898620adfe38c3dad8", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 3153, "upload_time": "2018-12-30T18:32:19", "url": "https://files.pythonhosted.org/packages/bc/f8/bfdc0b2671879c139b2e85acd0ce9b453f8697574ab294797d33f022f149/wikipedia_downloader-0.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "b2b64cce266e62cdb54d85381fd6ab5c", "sha256": "cde6c224e01cdb9f3c39f0341f7851dcd8a83063903502ee3f2d7aa90f7ed002" }, "downloads": -1, "filename": "wikipedia_downloader-0.1.tar.gz", "has_sig": false, "md5_digest": "b2b64cce266e62cdb54d85381fd6ab5c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 1873, "upload_time": "2018-12-30T18:32:21", "url": "https://files.pythonhosted.org/packages/6f/56/6bfeff97e5d22dc90c44ba2070ed1345c980a5dd5749256402b248db99e9/wikipedia_downloader-0.1.tar.gz" } ], "0.2": [ { "comment_text": "", "digests": { "md5": "046189d1261137a4b8a6794edc771d39", "sha256": "ed4b68aa983e631385367a9553d37bff02c04617fae2f244755ce7d0402b97be" }, "downloads": -1, "filename": "wikipedia_downloader-0.2-py3-none-any.whl", "has_sig": false, "md5_digest": "046189d1261137a4b8a6794edc771d39", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 4083, "upload_time": "2019-01-04T11:27:21", "url": "https://files.pythonhosted.org/packages/b3/37/cbf8b1e1f85dec357cd61064bd8e71e96d58d0e8cf249f5e8de5d8a2660a/wikipedia_downloader-0.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "2230bbfb607d1befcf46ef6d67917bbe", "sha256": "71d881e37337bb4b89dc3efd7740064ad9bbabacd8775af7c3a388fa75b7185d" }, "downloads": -1, "filename": "wikipedia_downloader-0.2.tar.gz", "has_sig": false, "md5_digest": "2230bbfb607d1befcf46ef6d67917bbe", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2850, "upload_time": "2019-01-04T11:27:22", "url": "https://files.pythonhosted.org/packages/c2/a6/3dfe3d1a1715373f6a7f8171b51820440489e6ddea694ae44b0eee0abd10/wikipedia_downloader-0.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "046189d1261137a4b8a6794edc771d39", "sha256": "ed4b68aa983e631385367a9553d37bff02c04617fae2f244755ce7d0402b97be" }, "downloads": -1, "filename": "wikipedia_downloader-0.2-py3-none-any.whl", "has_sig": false, "md5_digest": "046189d1261137a4b8a6794edc771d39", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 4083, "upload_time": "2019-01-04T11:27:21", "url": "https://files.pythonhosted.org/packages/b3/37/cbf8b1e1f85dec357cd61064bd8e71e96d58d0e8cf249f5e8de5d8a2660a/wikipedia_downloader-0.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "2230bbfb607d1befcf46ef6d67917bbe", "sha256": "71d881e37337bb4b89dc3efd7740064ad9bbabacd8775af7c3a388fa75b7185d" }, "downloads": -1, "filename": "wikipedia_downloader-0.2.tar.gz", "has_sig": false, "md5_digest": "2230bbfb607d1befcf46ef6d67917bbe", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2850, "upload_time": "2019-01-04T11:27:22", "url": "https://files.pythonhosted.org/packages/c2/a6/3dfe3d1a1715373f6a7f8171b51820440489e6ddea694ae44b0eee0abd10/wikipedia_downloader-0.2.tar.gz" } ] }