{ "info": { "author": "Qadium Inc", "author_email": "sang@qadium.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 3 - Alpha", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2.7", "Programming Language :: Unix Shell", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic", "Topic :: Utilities" ], "description": "Common Crawl Job Library\r\n========================\r\n\r\n.. image:: https://img.shields.io/badge/License-Apache%202.0-blue.svg\r\n :target: https://opensource.org/licenses/Apache-2.0\r\n\r\n.. image:: https://travis-ci.org/qadium-memex/CommonCrawlJob.svg?branch=master\r\n :target: https://travis-ci.org/qadium-memex/CommonCrawlJob\r\n\r\n.. image:: https://badge.fury.io/py/CommonCrawlJob.svg\r\n :target: https://badge.fury.io/py/CommonCrawlJo\r\n\r\nThis work is supported by `Qadium Inc`_ as a part of the `DARPA Memex Program`_.\r\n\r\nInstallation\r\n------------\r\n\r\nThe easiest way to get started is using pip to install a copy of this library.\r\nThis will install the stable latest version hosted on ``PyPI``.\r\n\r\n.. code-block:: sh\r\n\r\n $ pip install -e git+https://github.com/qadium-memex/CommonCrawlJob.git#egg=ccjob\r\n\r\nAnother way is to directly install the code from github to get the bleeding\r\nedge version of the code. If that is the case, you can still use pip by pointing\r\nit to github and specifying the protocol.\r\n\r\n.. code-block:: sh\r\n\r\n $ pip install CommonCrawlJob\r\n\r\nCompatibility\r\n-------------\r\n\r\nUnfortunately, this code does not yet compatible with Python 3 and Python/PyPy 2.7\r\nare the only current implementations which are tested against.\r\nUnfortunately the library for encoding ``WARC (Web Archive)`` file formats\r\nwill need to undergo a rewrite it is possible to have deterministic IO behavior.\r\n\r\n.. _MRJob: https://pythonhosted.org/mrjob/\r\n.. _`Qadium Inc`: https://qadium.com\r\n.. _`Darpa Memex Program`: www.darpa.mil/program/memex", "description_content_type": null, "docs_url": "https://pythonhosted.org/CommonCrawlJob/", "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/qadium-memex/CommonCrawlJob", "keywords": "", "license": "Apache Software License v2", "maintainer": "", "maintainer_email": "", "name": "CommonCrawlJob", "package_url": "https://pypi.org/project/CommonCrawlJob/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/CommonCrawlJob/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://github.com/qadium-memex/CommonCrawlJob" }, "release_url": "https://pypi.org/project/CommonCrawlJob/0.1.0/", "requires_dist": null, "requires_python": null, "summary": "Extract data from common crawl using elastic map reduce", "version": "0.1.0" }, "last_serial": 2419101, "releases": { "0.0.0": [ { "comment_text": "", "digests": { "md5": "188c879db355721bb11cc14ce3b1bad9", "sha256": "a64b8252ffe2fbe260b09db2ebaa84c53f719a9f1a5343c98909c1520012eb5e" }, "downloads": -1, "filename": "CommonCrawlJob-0.0.0.tar.gz", "has_sig": false, "md5_digest": "188c879db355721bb11cc14ce3b1bad9", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6673, "upload_time": "2016-02-16T05:33:50", "url": "https://files.pythonhosted.org/packages/b6/36/4a08e634d40f82852d010007f356d1a568375ea0f65b00fab145f473f451/CommonCrawlJob-0.0.0.tar.gz" } ], "0.1.0": [ { "comment_text": "", "digests": { "md5": "63cab2db7db096c14336f83ce5af137f", "sha256": "19bff00a963a86956ef886e109244625576afd29df5ee43c27e40d0d33d8a2d2" }, "downloads": -1, "filename": "CommonCrawlJob-0.1.0-py2-none-any.whl", "has_sig": false, "md5_digest": "63cab2db7db096c14336f83ce5af137f", "packagetype": "bdist_wheel", "python_version": "2.7", "requires_python": null, "size": 11524, "upload_time": "2016-08-24T09:45:50", "url": "https://files.pythonhosted.org/packages/55/ca/aa387832f21c37e8020dcd414b1c58211d21c3358617d3aca61c67032868/CommonCrawlJob-0.1.0-py2-none-any.whl" }, { "comment_text": "", "digests": { "md5": "eb69b97cb7269d4880fab1921cd5b297", "sha256": "539c6934af1e84459b87a0465e09e8c21e7cf5bccc857cc1bda5db7fea476c7e" }, "downloads": -1, "filename": "CommonCrawlJob-0.1.0.tar.gz", "has_sig": false, "md5_digest": "eb69b97cb7269d4880fab1921cd5b297", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 312388, "upload_time": "2016-08-24T09:37:41", "url": "https://files.pythonhosted.org/packages/6c/56/8085072d352a8875c2c1efdf8ce4074d2b8b9181b6647de9e6c1fcf6a2d2/CommonCrawlJob-0.1.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "63cab2db7db096c14336f83ce5af137f", "sha256": "19bff00a963a86956ef886e109244625576afd29df5ee43c27e40d0d33d8a2d2" }, "downloads": -1, "filename": "CommonCrawlJob-0.1.0-py2-none-any.whl", "has_sig": false, "md5_digest": "63cab2db7db096c14336f83ce5af137f", "packagetype": "bdist_wheel", "python_version": "2.7", "requires_python": null, "size": 11524, "upload_time": "2016-08-24T09:45:50", "url": "https://files.pythonhosted.org/packages/55/ca/aa387832f21c37e8020dcd414b1c58211d21c3358617d3aca61c67032868/CommonCrawlJob-0.1.0-py2-none-any.whl" }, { "comment_text": "", "digests": { "md5": "eb69b97cb7269d4880fab1921cd5b297", "sha256": "539c6934af1e84459b87a0465e09e8c21e7cf5bccc857cc1bda5db7fea476c7e" }, "downloads": -1, "filename": "CommonCrawlJob-0.1.0.tar.gz", "has_sig": false, "md5_digest": "eb69b97cb7269d4880fab1921cd5b297", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 312388, "upload_time": "2016-08-24T09:37:41", "url": "https://files.pythonhosted.org/packages/6c/56/8085072d352a8875c2c1efdf8ce4074d2b8b9181b6647de9e6c1fcf6a2d2/CommonCrawlJob-0.1.0.tar.gz" } ] }