{ "info": { "author": "Mark Gemmill", "author_email": "bitbucket@markgemmill.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7" ], "description": "### pdftxt\n\nThe goal of this project is to provide an api to extract text\nfrom specific regions of a pdf document/page and a cli to assist\nidentifying the location of text within a document.\n\n### Installation\n\n ... pip install pdftxt\n\n\n### Basic Command Line Usage\n\nLet's say we have a PDF file (PDF-DOC.pdf) that looks like this:\n\n![Source File Image](https://bytebucket.org/mgemmill/pdftxt/raw/36ef6c80f953ac5d4eae712d5c7943c23e8914bc/assets/readme_src_doc_.jpg)\n\nThe `pdftxt` command:\n\n ... pdftxt PDF-DOC.pdf\n\nWill output a visual layout of the pdf document's pages and text elements to an html page:\n\n![Output File Image](https://bytebucket.org/mgemmill/pdftxt/raw/36ef6c80f953ac5d4eae712d5c7943c23e8914bc/assets/readme_output_doc_.jpg)\n\n\n### API Usage\n\n\n from pathlib import Path\n from pdftxt import api\n\n filepath = 'tests/Word_PDF.pdf'\n\n with api.PdfTxtContext(filepath) as pdf:\n\n for page in pdf:\n\n # To fetch text objects from specific region\n # of the page, first define the region:\n region = api.Region(400, 300, 512, 317)\n\n # Initialize layout parameters:\n params = api.PdfTxtParams()\n\n # Then analyze that area of the page for text objects:\n text = page.analyze(region, params)\n\n # Do whatever it is we need to do with the results:\n for txt in text:\n print(txt.text)\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://bitbucket.org/mgemmill/pdftxt", "keywords": "", "license": "BSD-3-Clause", "maintainer": "Mark Gemmill", "maintainer_email": "bitbucket@markgemmill.com", "name": "pdftxt", "package_url": "https://pypi.org/project/pdftxt/", "platform": "", "project_url": "https://pypi.org/project/pdftxt/", "project_urls": { "Homepage": "https://bitbucket.org/mgemmill/pdftxt" }, "release_url": "https://pypi.org/project/pdftxt/0.3.2/", "requires_dist": [ "pdfminer.six (>=20181108.0,<20181109.0)", "docopt (>=0.6.2,<0.7.0)" ], "requires_python": ">=3.6,<4.0", "summary": "PDF text extractor.", "version": "0.3.2" }, "last_serial": 4752043, "releases": { "0.2.0": [ { "comment_text": "", "digests": { "md5": "27ef1255fcc854cd15b63398125c9152", "sha256": "e3795e3a87ea87ac7f2b31601e243ebaf013c6ea7362d4aab6225ee76de0195a" }, "downloads": -1, "filename": "pdftxt-0.2.0-py3-none-any.whl", "has_sig": false, "md5_digest": "27ef1255fcc854cd15b63398125c9152", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6,<4.0", "size": 40383, "upload_time": "2019-01-22T15:01:49", "url": "https://files.pythonhosted.org/packages/27/d6/ffc7954ad45dbc1499c714524b24e04335bf6cd7d25fd7b086279b1ab6af/pdftxt-0.2.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "401f078f14eb2920ad876b09e41ff7f2", "sha256": "e16795b8250361fc5d30f7ad3848f1994bfed0c3e565377362b93dfebc19f250" }, "downloads": -1, "filename": "pdftxt-0.2.0.tar.gz", "has_sig": false, "md5_digest": "401f078f14eb2920ad876b09e41ff7f2", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6,<4.0", "size": 12725, "upload_time": "2019-01-22T15:01:50", "url": "https://files.pythonhosted.org/packages/07/ac/0263fd411e9c70742f7d71d71dfb4a96c8093e1a68deb94794d30aa2cd56/pdftxt-0.2.0.tar.gz" } ], "0.3.0": [ { "comment_text": "", "digests": { "md5": "cb802fe5f5fd1359ca7850ef47b98f54", "sha256": "ab2ab027713dd3dd771ded8de338eb0ccfde36b55c3fb5986b50cfedecc2baba" }, "downloads": -1, "filename": "pdftxt-0.3.0-py3-none-any.whl", "has_sig": false, "md5_digest": "cb802fe5f5fd1359ca7850ef47b98f54", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6,<4.0", "size": 41902, "upload_time": "2019-01-27T23:41:01", "url": "https://files.pythonhosted.org/packages/2b/90/cd2b407f689752168f6a55523ee9e4285360e4339b2952b786d51012107a/pdftxt-0.3.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "7b1a31213390d5798eabc5a6c31be516", "sha256": "54500934b563d6b4b5dab6d21698cd293891a8cdbcf971fdf137ab545f3054f3" }, "downloads": -1, "filename": "pdftxt-0.3.0.tar.gz", "has_sig": false, "md5_digest": "7b1a31213390d5798eabc5a6c31be516", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6,<4.0", "size": 13085, "upload_time": "2019-01-27T23:41:03", "url": "https://files.pythonhosted.org/packages/43/dd/bdf75a62dfd71a89344b58255b228b4e220157ca82060915c2716ec21b78/pdftxt-0.3.0.tar.gz" } ], "0.3.1": [ { "comment_text": "", "digests": { "md5": "4c73fcd68b7c62c5f0615301b509abbf", "sha256": "141c3eb838e205277f07629f1f01d1355c5eef91b626b75fb0a7671bd539cc28" }, "downloads": -1, "filename": "pdftxt-0.3.1-py3-none-any.whl", "has_sig": false, "md5_digest": "4c73fcd68b7c62c5f0615301b509abbf", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6,<4.0", "size": 43975, "upload_time": "2019-01-28T01:16:22", "url": "https://files.pythonhosted.org/packages/5f/d7/7d2e273cce2f9e7aea2ac076ca3212c2099c115d058b4571c3ba0c3d3b4d/pdftxt-0.3.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "ae56779a3f9ebfc4556c2ae0c2e71e48", "sha256": "1c90037c72ce62b6cf86810c32bd266ffe1447a20538cd5358ba099dfd6530ee" }, "downloads": -1, "filename": "pdftxt-0.3.1.tar.gz", "has_sig": false, "md5_digest": "ae56779a3f9ebfc4556c2ae0c2e71e48", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6,<4.0", "size": 13595, "upload_time": "2019-01-28T01:16:24", "url": "https://files.pythonhosted.org/packages/5f/30/8f22031fab0cae146379566403835efc49bd1859c239a479ce54f7f38eaf/pdftxt-0.3.1.tar.gz" } ], "0.3.2": [ { "comment_text": "", "digests": { "md5": "1a437ab1cdcb8944da2fe64b06db0cfe", "sha256": "d80ae0dc3a33b097d2ca7f51fb97db4451287cc75c49078c29544cce0d1f6bec" }, "downloads": -1, "filename": "pdftxt-0.3.2-py3-none-any.whl", "has_sig": false, "md5_digest": "1a437ab1cdcb8944da2fe64b06db0cfe", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6,<4.0", "size": 44241, "upload_time": "2019-01-28T21:54:34", "url": "https://files.pythonhosted.org/packages/9e/27/51efbdc795d4a83435da586cbe154946627ff879c99f616cd5487c93ce8a/pdftxt-0.3.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "e6c1b66a5860bb8a5b38a8d13f4a38a1", "sha256": "768cbd8caa754493d58100bca3ee97bf423f14212e93164169549a83a358bf83" }, "downloads": -1, "filename": "pdftxt-0.3.2.tar.gz", "has_sig": false, "md5_digest": "e6c1b66a5860bb8a5b38a8d13f4a38a1", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6,<4.0", "size": 13737, "upload_time": "2019-01-28T21:54:35", "url": "https://files.pythonhosted.org/packages/92/1d/4d6702d16e9db6994a4cacf9daba215093deb0d380c2cdf3e616e1b0c7d7/pdftxt-0.3.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "1a437ab1cdcb8944da2fe64b06db0cfe", "sha256": "d80ae0dc3a33b097d2ca7f51fb97db4451287cc75c49078c29544cce0d1f6bec" }, "downloads": -1, "filename": "pdftxt-0.3.2-py3-none-any.whl", "has_sig": false, "md5_digest": "1a437ab1cdcb8944da2fe64b06db0cfe", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6,<4.0", "size": 44241, "upload_time": "2019-01-28T21:54:34", "url": "https://files.pythonhosted.org/packages/9e/27/51efbdc795d4a83435da586cbe154946627ff879c99f616cd5487c93ce8a/pdftxt-0.3.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "e6c1b66a5860bb8a5b38a8d13f4a38a1", "sha256": "768cbd8caa754493d58100bca3ee97bf423f14212e93164169549a83a358bf83" }, "downloads": -1, "filename": "pdftxt-0.3.2.tar.gz", "has_sig": false, "md5_digest": "e6c1b66a5860bb8a5b38a8d13f4a38a1", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6,<4.0", "size": 13737, "upload_time": "2019-01-28T21:54:35", "url": "https://files.pythonhosted.org/packages/92/1d/4d6702d16e9db6994a4cacf9daba215093deb0d380c2cdf3e616e1b0c7d7/pdftxt-0.3.2.tar.gz" } ] }