{ "info": { "author": "Julien Romero", "author_email": "romerojulien34@gmail.com", "bugtrack_url": null, "classifiers": [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3" ], "description": "# PyCommonCrawl\n\nA python interface for [Common Crawl](https://commoncrawl.org/).\n\n## INSTALL\n\npip3 install pycommoncrawl\n\n## USAGE\n\n```python\nfrom pycommoncrawl.common_crawl_data_accessor import CommonCrawlDataAccessor\n\ncommon_crawl_data_accessor = CommonCrawlDataAccessor()\n\n# Iterate by line\nfor line in common_crawl_data_accessor.get_raw_resource_data(\"WAT\"):\n print(line)\n\n# Iterate by WARC bloc\nfor warc in common_crawl_data_accessor.get_raw_resource_data_per_warc(\"WAT\"):\n print(warc[\"Content-Length\"])\n```\n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/Aunsiels/pycommoncrawl", "keywords": "", "license": "", "maintainer": "", "maintainer_email": "", "name": "pycommoncrawl", "package_url": "https://pypi.org/project/pycommoncrawl/", "platform": "", "project_url": "https://pypi.org/project/pycommoncrawl/", "project_urls": { "Homepage": "https://github.com/Aunsiels/pycommoncrawl" }, "release_url": "https://pypi.org/project/pycommoncrawl/0.2/", "requires_dist": [ "progressbar", "pytest", "warc3-wet" ], "requires_python": "", "summary": "An interface to access common crawl data", "version": "0.2" }, "last_serial": 5832681, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "4c8619569e88d1d9564388e88f0a147a", "sha256": "65ae17cce4b79ba2ca3c9f91dd2f2f2f3016020e9c409bca0260ad5ba1aa5cf6" }, "downloads": -1, "filename": "pycommoncrawl-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "4c8619569e88d1d9564388e88f0a147a", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 4949, "upload_time": "2019-09-15T17:50:46", "url": "https://files.pythonhosted.org/packages/46/77/143d23e41bd8d53a3043e3acc3e21d5bcc14bc1e3d59f584f5100ce91941/pycommoncrawl-0.1-py3-none-any.whl" } ], "0.2": [ { "comment_text": "", "digests": { "md5": "b363cc73a434f66611ad1128ded22f20", "sha256": "a6b9fd48bc7717281f78098ab91831751aa4527cedff093d90234b82574a9552" }, "downloads": -1, "filename": "pycommoncrawl-0.2-py3-none-any.whl", "has_sig": false, "md5_digest": "b363cc73a434f66611ad1128ded22f20", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 4993, "upload_time": "2019-09-15T17:59:23", "url": "https://files.pythonhosted.org/packages/c5/17/5362da86620e4a6c832f1614d69f271e4fc80b410706f1a195221e75f683/pycommoncrawl-0.2-py3-none-any.whl" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "b363cc73a434f66611ad1128ded22f20", "sha256": "a6b9fd48bc7717281f78098ab91831751aa4527cedff093d90234b82574a9552" }, "downloads": -1, "filename": "pycommoncrawl-0.2-py3-none-any.whl", "has_sig": false, "md5_digest": "b363cc73a434f66611ad1128ded22f20", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 4993, "upload_time": "2019-09-15T17:59:23", "url": "https://files.pythonhosted.org/packages/c5/17/5362da86620e4a6c832f1614d69f271e4fc80b410706f1a195221e75f683/pycommoncrawl-0.2-py3-none-any.whl" } ] }