{ "info": { "author": "Daniel Nicolai", "author_email": "dalanicolai@gmail.com", "bugtrack_url": null, "classifiers": [ "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", "Operating System :: POSIX", "Programming Language :: Python :: 3" ], "description": "# pdf-contents-extractor \nExtract contents as text from a pdf- or djvu-document (for use in e.g. handyoutliner)\n\nrequires:\n* for PDF, pytesseract and PyMuPDF (both can be easily installed with pip)\n* for DJVU, the ddjvu command available in the path\n\nAfter installation type in a terminal: extract_contents /path/filename startpage lastpage\n(e.g.: `extract_contents example.djvu 3 6`)\nwhere startpage and lastpage are pagenumbers of the content pages.\nThe script automatically recognizes the format (pdf or djvu)\n\nThe default tesseract language is english. Another language(s) can be set with -l flag (e.g.: `-l eng+nld` for english and dutch) but it requires the correct tesseract langpack to be installed.\n\nfor extra options and help type: extract_contents -h\n\nThe contents can be further edited in a text-editor and added to the pdf-file with handyoutliner (http://handyoutlinerfo.sourceforge.net/)\n\n#### Note on djvu\n\nFor djvu files the command djvutxt, on linux, usually works great already (if OCR layer available). Example usage: `djvutxt -page=3-6 example.pdf contents.txt` \n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/dalanicolai/pdf-contents-extractor", "keywords": "", "license": "", "maintainer": "", "maintainer_email": "", "name": "pdf-contents-extractor", "package_url": "https://pypi.org/project/pdf-contents-extractor/", "platform": "", "project_url": "https://pypi.org/project/pdf-contents-extractor/", "project_urls": { "Homepage": "https://github.com/dalanicolai/pdf-contents-extractor" }, "release_url": "https://pypi.org/project/pdf-contents-extractor/0.7/", "requires_dist": [ "Pillow", "PyMuPDF", "pytesseract (==0.2.7)" ], "requires_python": "", "summary": "A simple script to extract contents section from a PDF or DJVU document", "version": "0.7" }, "last_serial": 5556948, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "94f5160cef5298ad7e3b630dfae3a689", "sha256": "9cd78abe074992ca1bcdaddbf57e272941f884706243e641076aeffe63848864" }, "downloads": -1, "filename": "pdf_contents_extractor-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "94f5160cef5298ad7e3b630dfae3a689", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 2046, "upload_time": "2019-07-15T20:40:33", "url": "https://files.pythonhosted.org/packages/fa/fa/5a627c57ccacfa35f2f376fa63f4ec0d9679acdd0c167ba3fe726ea16ce9/pdf_contents_extractor-0.1-py3-none-any.whl" } ], "0.2": [ { "comment_text": "", "digests": { "md5": "2c9b0d1eec365cc4843991cf4ddc6805", "sha256": "b2c4d23f391dcbfba00d50625cc4fabf8b34cf9e4a692926d90e8d198c5a285a" }, "downloads": -1, "filename": "pdf_contents_extractor-0.2-py3-none-any.whl", "has_sig": false, "md5_digest": "2c9b0d1eec365cc4843991cf4ddc6805", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 2092, "upload_time": "2019-07-15T22:26:56", "url": "https://files.pythonhosted.org/packages/0e/f1/71d6d29f9a7377f12b4d751c7e625ee13f6f037aa74d8ecdc665708072c4/pdf_contents_extractor-0.2-py3-none-any.whl" } ], "0.3": [ { "comment_text": "", "digests": { "md5": "6ae40444f1efcf0a3976ce8fe6cb361a", "sha256": "a7d660f3366234dbced945ba533b8076d0a6173061053ea97974d26b6b7831e9" }, "downloads": -1, "filename": "pdf_contents_extractor-0.3-py3-none-any.whl", "has_sig": false, "md5_digest": "6ae40444f1efcf0a3976ce8fe6cb361a", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 2445, "upload_time": "2019-07-16T09:56:14", "url": "https://files.pythonhosted.org/packages/c2/2d/453ac8a562d6367ff16b4b4878eb6d2d73339e414b5874a5fe5e79ef5452/pdf_contents_extractor-0.3-py3-none-any.whl" } ], "0.4": [ { "comment_text": "", "digests": { "md5": "dc36dcbfbd02ac63fcc1216695c0d9e5", "sha256": "9bceb06a0369c69ae941924aa6d396a7af0ccd7554e1bfd34040acb67b95d7a1" }, "downloads": -1, "filename": "pdf_contents_extractor-0.4-py3-none-any.whl", "has_sig": false, "md5_digest": "dc36dcbfbd02ac63fcc1216695c0d9e5", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 2562, "upload_time": "2019-07-16T10:25:20", "url": "https://files.pythonhosted.org/packages/1c/ba/466fb8e29c9caf09eb63a468dab4b727c924aae1c312c2b65af24f11f265/pdf_contents_extractor-0.4-py3-none-any.whl" } ], "0.5": [ { "comment_text": "", "digests": { "md5": "9d8fdb50679195b18f1dc71aec83654a", "sha256": "47ffd70c8f12b6171f5ffd5fcc7650a7f2453d27662d774b71013176a1b51a41" }, "downloads": -1, "filename": "pdf_contents_extractor-0.5-py3-none-any.whl", "has_sig": false, "md5_digest": "9d8fdb50679195b18f1dc71aec83654a", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 2611, "upload_time": "2019-07-16T10:47:40", "url": "https://files.pythonhosted.org/packages/53/c7/4ac976916c4a6573807f5caad7842ea4383516abb60dfcaab61d61572ba9/pdf_contents_extractor-0.5-py3-none-any.whl" } ], "0.6": [ { "comment_text": "", "digests": { "md5": "2c373dce722b6bf2df9f75e7f4b4ee0c", "sha256": "811f93f60d21cf8f59fd189dc361b5509563b31596fd0d9405f0fc980fca8ee6" }, "downloads": -1, "filename": "pdf_contents_extractor-0.6-py3-none-any.whl", "has_sig": false, "md5_digest": "2c373dce722b6bf2df9f75e7f4b4ee0c", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 2832, "upload_time": "2019-07-18T11:17:01", "url": "https://files.pythonhosted.org/packages/14/c5/00bbee878fd78e9f01153f885bf4b5f07abc181933eb30de1463076c176e/pdf_contents_extractor-0.6-py3-none-any.whl" } ], "0.7": [ { "comment_text": "", "digests": { "md5": "b052390e69338f38f923c8dd5fdd0542", "sha256": "91bdc7ff682ada10592919d09fe5da91863377b1529dbccb701db3e4a0c96313" }, "downloads": -1, "filename": "pdf_contents_extractor-0.7-py3-none-any.whl", "has_sig": false, "md5_digest": "b052390e69338f38f923c8dd5fdd0542", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 2993, "upload_time": "2019-07-19T14:59:48", "url": "https://files.pythonhosted.org/packages/ca/d9/df6d34e1535ed279b10b6ba0b46f9b16660092c9e2e767a57156a8813897/pdf_contents_extractor-0.7-py3-none-any.whl" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "b052390e69338f38f923c8dd5fdd0542", "sha256": "91bdc7ff682ada10592919d09fe5da91863377b1529dbccb701db3e4a0c96313" }, "downloads": -1, "filename": "pdf_contents_extractor-0.7-py3-none-any.whl", "has_sig": false, "md5_digest": "b052390e69338f38f923c8dd5fdd0542", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 2993, "upload_time": "2019-07-19T14:59:48", "url": "https://files.pythonhosted.org/packages/ca/d9/df6d34e1535ed279b10b6ba0b46f9b16660092c9e2e767a57156a8813897/pdf_contents_extractor-0.7-py3-none-any.whl" } ] }