{
    "info": {
        "author": "Daniel Nicolai",
        "author_email": "dalanicolai@gmail.com",
        "bugtrack_url": null,
        "classifiers": [
            "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
            "Operating System :: POSIX",
            "Programming Language :: Python :: 3"
        ],
        "description": "# pdf-contents-extractor \nExtract contents as text from a pdf- or djvu-document (for use in e.g. handyoutliner)\n\nrequires:\n* for PDF, pytesseract and PyMuPDF (both can be easily installed with pip)\n* for DJVU, the ddjvu command available in the path\n\nAfter installation type in a terminal: extract_contents /path/filename startpage lastpage\n(e.g.: `extract_contents example.djvu 3 6`)\nwhere startpage and lastpage are pagenumbers of the content pages.\nThe script automatically recognizes the format (pdf or djvu)\n\nThe default tesseract language is english. Another language(s) can be set with -l flag (e.g.: `-l eng+nld` for english and dutch) but it requires the correct tesseract langpack to be installed.\n\nfor extra options and help type: extract_contents -h\n\nThe contents can be further edited in a text-editor and added to the pdf-file with handyoutliner (http://handyoutlinerfo.sourceforge.net/)\n\n#### Note on djvu\n\nFor djvu files the command djvutxt, on linux, usually works great already (if OCR layer available). Example usage: `djvutxt -page=3-6 example.pdf contents.txt` \n\n\n",
        "description_content_type": "text/markdown",
        "docs_url": null,
        "download_url": "",
        "downloads": {
            "last_day": -1,
            "last_month": -1,
            "last_week": -1
        },
        "home_page": "https://github.com/dalanicolai/pdf-contents-extractor",
        "keywords": "",
        "license": "",
        "maintainer": "",
        "maintainer_email": "",
        "name": "pdf-contents-extractor",
        "package_url": "https://pypi.org/project/pdf-contents-extractor/",
        "platform": "",
        "project_url": "https://pypi.org/project/pdf-contents-extractor/",
        "project_urls": {
            "Homepage": "https://github.com/dalanicolai/pdf-contents-extractor"
        },
        "release_url": "https://pypi.org/project/pdf-contents-extractor/0.7/",
        "requires_dist": [
            "Pillow",
            "PyMuPDF",
            "pytesseract (==0.2.7)"
        ],
        "requires_python": "",
        "summary": "A simple script to extract contents section from a PDF or DJVU document",
        "version": "0.7"
    },
    "last_serial": 5556948,
    "releases": {
        "0.1": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "94f5160cef5298ad7e3b630dfae3a689",
                    "sha256": "9cd78abe074992ca1bcdaddbf57e272941f884706243e641076aeffe63848864"
                },
                "downloads": -1,
                "filename": "pdf_contents_extractor-0.1-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "94f5160cef5298ad7e3b630dfae3a689",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 2046,
                "upload_time": "2019-07-15T20:40:33",
                "url": "https://files.pythonhosted.org/packages/fa/fa/5a627c57ccacfa35f2f376fa63f4ec0d9679acdd0c167ba3fe726ea16ce9/pdf_contents_extractor-0.1-py3-none-any.whl"
            }
        ],
        "0.2": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "2c9b0d1eec365cc4843991cf4ddc6805",
                    "sha256": "b2c4d23f391dcbfba00d50625cc4fabf8b34cf9e4a692926d90e8d198c5a285a"
                },
                "downloads": -1,
                "filename": "pdf_contents_extractor-0.2-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "2c9b0d1eec365cc4843991cf4ddc6805",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 2092,
                "upload_time": "2019-07-15T22:26:56",
                "url": "https://files.pythonhosted.org/packages/0e/f1/71d6d29f9a7377f12b4d751c7e625ee13f6f037aa74d8ecdc665708072c4/pdf_contents_extractor-0.2-py3-none-any.whl"
            }
        ],
        "0.3": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "6ae40444f1efcf0a3976ce8fe6cb361a",
                    "sha256": "a7d660f3366234dbced945ba533b8076d0a6173061053ea97974d26b6b7831e9"
                },
                "downloads": -1,
                "filename": "pdf_contents_extractor-0.3-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "6ae40444f1efcf0a3976ce8fe6cb361a",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 2445,
                "upload_time": "2019-07-16T09:56:14",
                "url": "https://files.pythonhosted.org/packages/c2/2d/453ac8a562d6367ff16b4b4878eb6d2d73339e414b5874a5fe5e79ef5452/pdf_contents_extractor-0.3-py3-none-any.whl"
            }
        ],
        "0.4": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "dc36dcbfbd02ac63fcc1216695c0d9e5",
                    "sha256": "9bceb06a0369c69ae941924aa6d396a7af0ccd7554e1bfd34040acb67b95d7a1"
                },
                "downloads": -1,
                "filename": "pdf_contents_extractor-0.4-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "dc36dcbfbd02ac63fcc1216695c0d9e5",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 2562,
                "upload_time": "2019-07-16T10:25:20",
                "url": "https://files.pythonhosted.org/packages/1c/ba/466fb8e29c9caf09eb63a468dab4b727c924aae1c312c2b65af24f11f265/pdf_contents_extractor-0.4-py3-none-any.whl"
            }
        ],
        "0.5": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "9d8fdb50679195b18f1dc71aec83654a",
                    "sha256": "47ffd70c8f12b6171f5ffd5fcc7650a7f2453d27662d774b71013176a1b51a41"
                },
                "downloads": -1,
                "filename": "pdf_contents_extractor-0.5-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "9d8fdb50679195b18f1dc71aec83654a",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 2611,
                "upload_time": "2019-07-16T10:47:40",
                "url": "https://files.pythonhosted.org/packages/53/c7/4ac976916c4a6573807f5caad7842ea4383516abb60dfcaab61d61572ba9/pdf_contents_extractor-0.5-py3-none-any.whl"
            }
        ],
        "0.6": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "2c373dce722b6bf2df9f75e7f4b4ee0c",
                    "sha256": "811f93f60d21cf8f59fd189dc361b5509563b31596fd0d9405f0fc980fca8ee6"
                },
                "downloads": -1,
                "filename": "pdf_contents_extractor-0.6-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "2c373dce722b6bf2df9f75e7f4b4ee0c",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 2832,
                "upload_time": "2019-07-18T11:17:01",
                "url": "https://files.pythonhosted.org/packages/14/c5/00bbee878fd78e9f01153f885bf4b5f07abc181933eb30de1463076c176e/pdf_contents_extractor-0.6-py3-none-any.whl"
            }
        ],
        "0.7": [
            {
                "comment_text": "",
                "digests": {
                    "md5": "b052390e69338f38f923c8dd5fdd0542",
                    "sha256": "91bdc7ff682ada10592919d09fe5da91863377b1529dbccb701db3e4a0c96313"
                },
                "downloads": -1,
                "filename": "pdf_contents_extractor-0.7-py3-none-any.whl",
                "has_sig": false,
                "md5_digest": "b052390e69338f38f923c8dd5fdd0542",
                "packagetype": "bdist_wheel",
                "python_version": "py3",
                "requires_python": null,
                "size": 2993,
                "upload_time": "2019-07-19T14:59:48",
                "url": "https://files.pythonhosted.org/packages/ca/d9/df6d34e1535ed279b10b6ba0b46f9b16660092c9e2e767a57156a8813897/pdf_contents_extractor-0.7-py3-none-any.whl"
            }
        ]
    },
    "urls": [
        {
            "comment_text": "",
            "digests": {
                "md5": "b052390e69338f38f923c8dd5fdd0542",
                "sha256": "91bdc7ff682ada10592919d09fe5da91863377b1529dbccb701db3e4a0c96313"
            },
            "downloads": -1,
            "filename": "pdf_contents_extractor-0.7-py3-none-any.whl",
            "has_sig": false,
            "md5_digest": "b052390e69338f38f923c8dd5fdd0542",
            "packagetype": "bdist_wheel",
            "python_version": "py3",
            "requires_python": null,
            "size": 2993,
            "upload_time": "2019-07-19T14:59:48",
            "url": "https://files.pythonhosted.org/packages/ca/d9/df6d34e1535ed279b10b6ba0b46f9b16660092c9e2e767a57156a8813897/pdf_contents_extractor-0.7-py3-none-any.whl"
        }
    ]
}