{ "info": { "author": "naivefeeling", "author_email": "625424539@qq.com", "bugtrack_url": null, "classifiers": [ "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7" ], "description": "# NaivePDF \n*yet another pdf texts and tables extractor*\n\nThis project is inspired by [pdfminer](https://github.com/euske/pdfminer), \nand the pdf parts use, rewrite or redesign a lots of it's codes.\n\nThe main purpose of this project is to provide a tool \nthat can naively extract text lines and **bordered tables** from pdf files, \nand write them into a html file. \nIn most cases it works well.\n\nOn the other hand, it's pdf parts can be an alternative of pdfminer\nthat you can use it to extract texts, lines and shapes more simply.\n\n\n# How to Install\n* Python3.6+ required\n\n```cmd\npip install naivepdf\n```\n\n# Example of Use\n\nvery simple to use\n\n```python\n# encoding: utf-8\n\nfrom naivepdf.pdfdocument import PDFDocument\nfrom naivepdf.reconstructor import PageReconstructor\nfrom naivepdf.utils.html import html\n\n\ndef main():\n with open('examples/1206061047.pdf', 'rb') as fp:\n data = []\n doc = PDFDocument(fp)\n for i, page in enumerate(doc.pages):\n # as an alternative of pdfminer, just:\n # data.append(page.data)\n reconstructor = PageReconstructor(page)\n data.extend(reconstructor.reconstruct())\n\n # as an alternative of pdfminer, just:\n # return data\n with open('examples/1206061047.html', 'w', encoding='utf-8') as fp:\n html(fp, data)\n\n\nif __name__ == '__main__':\n main()\n\n```", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/naivefeeling/naivepdf.git", "keywords": "pdf,pdfparser,pdfextract", "license": "", "maintainer": "", "maintainer_email": "", "name": "naivepdf", "package_url": "https://pypi.org/project/naivepdf/", "platform": "", "project_url": "https://pypi.org/project/naivepdf/", "project_urls": { "Homepage": "https://github.com/naivefeeling/naivepdf.git" }, "release_url": "https://pypi.org/project/naivepdf/0.2/", "requires_dist": null, "requires_python": "", "summary": "yet another pdf texts and tables extractor", "version": "0.2" }, "last_serial": 5910351, "releases": { "0.2": [ { "comment_text": "", "digests": { "md5": "e36c6b108ace080e92aae0893c0c7c11", "sha256": "d4dde81361ceb2fd028c5ad750d29b15fe7ba604c7c502ab519825cced2f8b01" }, "downloads": -1, "filename": "naivepdf-0.2.tar.gz", "has_sig": false, "md5_digest": "e36c6b108ace080e92aae0893c0c7c11", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3004046, "upload_time": "2019-10-01T04:38:33", "url": "https://files.pythonhosted.org/packages/93/d8/99b92f026d8ff133931a21833be86bae0e8790c44354ec29fdbe82c47930/naivepdf-0.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "e36c6b108ace080e92aae0893c0c7c11", "sha256": "d4dde81361ceb2fd028c5ad750d29b15fe7ba604c7c502ab519825cced2f8b01" }, "downloads": -1, "filename": "naivepdf-0.2.tar.gz", "has_sig": false, "md5_digest": "e36c6b108ace080e92aae0893c0c7c11", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3004046, "upload_time": "2019-10-01T04:38:33", "url": "https://files.pythonhosted.org/packages/93/d8/99b92f026d8ff133931a21833be86bae0e8790c44354ec29fdbe82c47930/naivepdf-0.2.tar.gz" } ] }