{ "info": { "author": "Markus Konrad", "author_email": "markus.konrad@wzb.eu", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Utilities" ], "description": "This repository contains a set of tools written in Python 3 with the aim to extract tabular\ndata from scanned and OCR-processed documents available as PDF files. Before these files can be processed they need\nto be converted to XML files in pdf2xml format using poppler utils. Further information and examples can be found\nin the github repository.", "description_content_type": null, "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/WZBSocialScienceCenter/pdftabextract", "keywords": "datamining ocr pdf tabular data mining extract extraction", "license": "Apache 2.0", "maintainer": "", "maintainer_email": "", "name": "pdftabextract", "package_url": "https://pypi.org/project/pdftabextract/", "platform": "", "project_url": "https://pypi.org/project/pdftabextract/", "project_urls": { "Homepage": "https://github.com/WZBSocialScienceCenter/pdftabextract" }, "release_url": "https://pypi.org/project/pdftabextract/0.3.0/", "requires_dist": null, "requires_python": "", "summary": "A set of tools for data mining (OCR-processed) PDFs", "version": "0.3.0" }, "last_serial": 3473361, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "48cf5d8ccbd045198a89202b10fbaa28", "sha256": "d076c565e50279a72926593b16a5eb8c54e0e0b9fe70599a66f37ddf090b6399" }, "downloads": -1, "filename": "pdftabextract-0.1.0-py3-none-any.whl", "has_sig": false, "md5_digest": "48cf5d8ccbd045198a89202b10fbaa28", "packagetype": "bdist_wheel", "python_version": "3.5", "requires_python": null, "size": 26316, "upload_time": "2017-02-16T10:15:39", "url": "https://files.pythonhosted.org/packages/2a/dc/1a267ec645383a096a11ae23ffcc00e3519fac0bf0b3f976c3e100e34da5/pdftabextract-0.1.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "fa3a6ccbfe71bee89b773a408076764a", "sha256": "6ad9ac250b2f914383aba66078bfc01e91b5c53874d2361c559826125dd1a840" }, "downloads": -1, "filename": "pdftabextract-0.1.0.tar.gz", "has_sig": false, "md5_digest": "fa3a6ccbfe71bee89b773a408076764a", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 26233, "upload_time": "2017-02-16T10:15:37", "url": "https://files.pythonhosted.org/packages/00/7c/7b1cc3647a5ab2fbdb41a8b300a8e01d92a8d01d7fd2d75927daa6e038c0/pdftabextract-0.1.0.tar.gz" } ], "0.1.1": [ { "comment_text": "", "digests": { "md5": "c8d0987c51fd24d5615b03a715c22fa6", "sha256": "c4a6423d760ccfc8d5e9adf8682956f81ea657b2dd18d9cc73a3554f685db208" }, "downloads": -1, "filename": "pdftabextract-0.1.1-py3-none-any.whl", "has_sig": false, "md5_digest": "c8d0987c51fd24d5615b03a715c22fa6", "packagetype": "bdist_wheel", "python_version": "3.5", "requires_python": null, "size": 26320, "upload_time": "2017-07-07T09:50:34", "url": "https://files.pythonhosted.org/packages/a8/3b/a419afb3d179c9e15d3c76faffac7df3103913cc2e7550b1ad8f40badcfe/pdftabextract-0.1.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "89e2cd93eaeb2c07f1cf71b2fe65574e", "sha256": "331d86827b3a67ec9eca8774be1c67da168c8c9129d459972c9fce3d196523ea" }, "downloads": -1, "filename": "pdftabextract-0.1.1.tar.gz", "has_sig": false, "md5_digest": "89e2cd93eaeb2c07f1cf71b2fe65574e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 26420, "upload_time": "2017-07-07T09:50:31", "url": "https://files.pythonhosted.org/packages/62/0b/5a1a81d8188590d04199561f3a8c792c2c81781d7fe5c95dda0d09dad2e6/pdftabextract-0.1.1.tar.gz" } ], "0.1.2": [ { "comment_text": "", "digests": { "md5": "7349a6b003a5773c6c0d67c53c73fe62", "sha256": "7c62ad9795662c168c95ff3429324a12316c99bf4e790ffe18bf45e6c9bc166d" }, "downloads": -1, "filename": "pdftabextract-0.1.2-py3-none-any.whl", "has_sig": false, "md5_digest": "7349a6b003a5773c6c0d67c53c73fe62", "packagetype": "bdist_wheel", "python_version": "3.5", "requires_python": null, "size": 26326, "upload_time": "2017-07-10T06:56:50", "url": "https://files.pythonhosted.org/packages/6e/ff/9fc0d67954719ff5e07cb716738e05069216e6cc4108ccab25274994bd28/pdftabextract-0.1.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "4b91e44021a87b04b8bd5163a4a1b4c6", "sha256": "8251672785ae2fca9ff338c8a0e97099cace40aeed7e13f8836103f00c99f1c7" }, "downloads": -1, "filename": "pdftabextract-0.1.2.tar.gz", "has_sig": false, "md5_digest": "4b91e44021a87b04b8bd5163a4a1b4c6", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 26433, "upload_time": "2017-07-10T06:56:48", "url": "https://files.pythonhosted.org/packages/16/ab/3a7d48fda8b2b43af096dbae0e92af6ecf3120e4ec61c3334eb729f2b58a/pdftabextract-0.1.2.tar.gz" } ], "0.2.0": [ { "comment_text": "", "digests": { "md5": "3ef2e962b46509e3948f7754f3e5d656", "sha256": "ec36702c4e5fe1c629ef4a81453e3372b043e753d89ebd188a8e435976f97b30" }, "downloads": -1, "filename": "pdftabextract-0.2.0-py3-none-any.whl", "has_sig": false, "md5_digest": "3ef2e962b46509e3948f7754f3e5d656", "packagetype": "bdist_wheel", "python_version": "3.5", "requires_python": null, "size": 27029, "upload_time": "2017-12-20T13:14:47", "url": "https://files.pythonhosted.org/packages/99/09/750e01099b021b0a693367b0cfa81cf2d62ec22f93b583e0676e9fcd77e2/pdftabextract-0.2.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "e6e10c07db4b41942883271b0b6f998a", "sha256": "4e8643a36cff1fa99764284520a80b6651f39045d023be8472e61a6b1f0c9015" }, "downloads": -1, "filename": "pdftabextract-0.2.0.tar.gz", "has_sig": false, "md5_digest": "e6e10c07db4b41942883271b0b6f998a", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 27251, "upload_time": "2017-12-20T13:14:45", "url": "https://files.pythonhosted.org/packages/1c/10/13ddbf0183811e20dfeb87b82f67065075cb160a4093f57622860240b3bc/pdftabextract-0.2.0.tar.gz" } ], "0.3.0": [ { "comment_text": "", "digests": { "md5": "cf361171526695a4d0f45020344ec317", "sha256": "88ec8c4481d4de2bb5f675732e751c10cc31c5908545cbad011f3e0d40654f3c" }, "downloads": -1, "filename": "pdftabextract-0.3.0-py3-none-any.whl", "has_sig": false, "md5_digest": "cf361171526695a4d0f45020344ec317", "packagetype": "bdist_wheel", "python_version": "3.5", "requires_python": null, "size": 27997, "upload_time": "2018-01-09T08:00:24", "url": "https://files.pythonhosted.org/packages/1e/a9/dcf92e41100ba949e33ff7dc47ac8f6e905c5ed1890e6113eb0abd263f40/pdftabextract-0.3.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "4199c31bf926e7a830ac87138f0c56e1", "sha256": "822bc899123f360bd83d32f830c7d1fc4db16240f84eedb3009ff12a2d8a97e9" }, "downloads": -1, "filename": "pdftabextract-0.3.0.tar.gz", "has_sig": false, "md5_digest": "4199c31bf926e7a830ac87138f0c56e1", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 28167, "upload_time": "2018-01-09T08:00:22", "url": "https://files.pythonhosted.org/packages/cb/b4/9c47e9a73262f7155fdc94334d44e3f8a39c54be71ce0e6525feb2494176/pdftabextract-0.3.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "cf361171526695a4d0f45020344ec317", "sha256": "88ec8c4481d4de2bb5f675732e751c10cc31c5908545cbad011f3e0d40654f3c" }, "downloads": -1, "filename": "pdftabextract-0.3.0-py3-none-any.whl", "has_sig": false, "md5_digest": "cf361171526695a4d0f45020344ec317", "packagetype": "bdist_wheel", "python_version": "3.5", "requires_python": null, "size": 27997, "upload_time": "2018-01-09T08:00:24", "url": "https://files.pythonhosted.org/packages/1e/a9/dcf92e41100ba949e33ff7dc47ac8f6e905c5ed1890e6113eb0abd263f40/pdftabextract-0.3.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "4199c31bf926e7a830ac87138f0c56e1", "sha256": "822bc899123f360bd83d32f830c7d1fc4db16240f84eedb3009ff12a2d8a97e9" }, "downloads": -1, "filename": "pdftabextract-0.3.0.tar.gz", "has_sig": false, "md5_digest": "4199c31bf926e7a830ac87138f0c56e1", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 28167, "upload_time": "2018-01-09T08:00:22", "url": "https://files.pythonhosted.org/packages/cb/b4/9c47e9a73262f7155fdc94334d44e3f8a39c54be71ce0e6525feb2494176/pdftabextract-0.3.0.tar.gz" } ] }