{
    "info": {
        "author": "Krzysztof Dorosz",
        "author_email": "cypreess@gmail.com",
        "bugtrack_url": null,
        "classifiers": [
            "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
            "Topic :: Text Processing",
            "Topic :: Text Processing :: Indexing",
            "Topic :: Text Processing :: Linguistic",
            "Topic :: Utilities"
        ],
        "description": "Welcome to Corpora!\n===================\n*Corpora* is a lightweight, fast and scalable corpus library able to store a collection of raw text documents with additional key-value headers. It uses Berkeley DB (bsddb3 module) for index managing what guarantee speed and bullet-proof. Text storage model is based on chunked flat, human readable text files. This architecture can easily scale up to millions documents, hundred of gigabytes collections.\n\nCorpora module provides four main features:\n  * create a new corpus,\n  * append documents to a corpus,\n  * random access to any document in a corpus using it's unique ``id``,\n  * sequential access to document collection (generator over collection).\n\nKey-Value document headers supports storing any kind of objects seriazable with yaml_. Corpora supports only append & read-only philosophy, for more information please read section :doc:`motivation`.\n\n.. _yaml: http://www.yaml.org/\n\nQuickstart\n----------\nInstallation:\n::\n    \n    > sudo pip install corpora\n\nBasic usage:\n\n   \n    >>> from corpora import Corpus\n    >>> Corpus.create('/tmp/test_corpus')\n    >>> c = Corpus('/tmp/test_corpus')\n    >>> c.add('First document', 1)\n    >>> c.add('Second document', 2)\n    >>> c.save_indexes()\n    >>> len(c)\n    2\n    >>> c[1]\n    ({'id': 1}, u'First document')\n    >>> c[2]\n    ({'id': 2}, u'Second document')\n    >>> for t in c:\n    ...    print t\n    ... \n    ({'id': 1}, u'First document')\n    ({'id': 2}, u'Second document')",
        "description_content_type": null,
        "docs_url": "https://pythonhosted.org/Corpora/",
        "download_url": "UNKNOWN",
        "downloads": {
            "last_day": -1,
            "last_month": -1,
            "last_week": -1
        },
        "home_page": "http://packages.python.org/Corpora",
        "keywords": "text utf corpus corpora nlp toolkit",
        "license": "LGPL",
        "maintainer": null,
        "maintainer_email": null,
        "name": "Corpora",
        "package_url": "https://pypi.org/project/Corpora/",
        "platform": "UNKNOWN",
        "project_url": "https://pypi.org/project/Corpora/",
        "project_urls": {
            "Download": "UNKNOWN",
            "Homepage": "http://packages.python.org/Corpora"
        },
        "release_url": "https://pypi.org/project/Corpora/1.0/",
        "requires_dist": null,
        "requires_python": null,
        "summary": "Lightweight, fast and scalable text corpus library.",
        "version": "1.0"
    },
    "last_serial": 784023,
    "releases": {
        "1.0": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "02781c45591ff458819e13120394828d",
                    "sha256": "208a68da259c6c5ccd36a85d83c5cfdf43f1fb25aa77d21b7a2b0c6bfa1cd1db"
                },
                "downloads": -1,
                "filename": "Corpora-1.0.tar.gz",
                "has_sig": false,
                "md5_digest": "02781c45591ff458819e13120394828d",
                "packagetype": "sdist",
                "python_version": "source",
                "requires_python": null,
                "size": 5147,
                "upload_time": "2011-12-14T18:25:44",
                "url": "https://files.pythonhosted.org/packages/6c/f5/998ee3d19c64e42a5a3839858ede61ccd504c13f24fbe3bf48ddb6fd3592/Corpora-1.0.tar.gz"
            }
        ]
    },
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "md5": "02781c45591ff458819e13120394828d",
                "sha256": "208a68da259c6c5ccd36a85d83c5cfdf43f1fb25aa77d21b7a2b0c6bfa1cd1db"
            },
            "downloads": -1,
            "filename": "Corpora-1.0.tar.gz",
            "has_sig": false,
            "md5_digest": "02781c45591ff458819e13120394828d",
            "packagetype": "sdist",
            "python_version": "source",
            "requires_python": null,
            "size": 5147,
            "upload_time": "2011-12-14T18:25:44",
            "url": "https://files.pythonhosted.org/packages/6c/f5/998ee3d19c64e42a5a3839858ede61ccd504c13f24fbe3bf48ddb6fd3592/Corpora-1.0.tar.gz"
        }
    ]
}