{ "info": { "author": "Thorsten Weimann", "author_email": "weimann.th@yahoo.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Topic :: Software Development :: Libraries :: Python Modules" ], "description": "TExtractor\n==========\n\nExtract text content from many filetypes in pure Python. This package extracts\npure text from many office filetypes. Only three external (pure Python)\nlibraries are needed to work. After extracting you get a list of words with\nthe most common stop words stripped out (only en, de).\n\nInstall with: `pip install TExtractor`\n\nUsage::\n\n >>> from textractor import TExtractor\n >>> extractor = TExtractor()\n >>> extractor.index('test.docx', lang='en')\n ['workflow_history', 'portal_workflow', 'review_history',\n 'implementation', 'organizations', 'Illustrations', ...]\n >>> extractor.index('test.pdf', lang='en')\n ['workflow_history', 'portal_workflow', 'review_history',\n 'implementation', 'organizations', 'Illustrations', ...]\n >>>\n\n\n\n", "description_content_type": "text/x-rst", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "http://bitbucket.org/whitie/textractor-py3/", "keywords": "text extract pdf docx", "license": "MIT", "maintainer": "", "maintainer_email": "", "name": "TExtractor", "package_url": "https://pypi.org/project/TExtractor/", "platform": "", "project_url": "https://pypi.org/project/TExtractor/", "project_urls": { "Homepage": "http://bitbucket.org/whitie/textractor-py3/" }, "release_url": "https://pypi.org/project/TExtractor/0.1.2/", "requires_dist": [ "pdfminer.six", "pluginbase", "chardet" ], "requires_python": "", "summary": "Extract text content from many filetypes.", "version": "0.1.2" }, "last_serial": 4448962, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "e9108accc4fd73018f1e5491dc2ae732", "sha256": "019ded50394aa098a38bc7325906380a03ed560fc8d02189c0a9a419a063ae32" }, "downloads": -1, "filename": "TExtractor-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "e9108accc4fd73018f1e5491dc2ae732", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 5529, "upload_time": "2018-10-31T18:42:50", "url": "https://files.pythonhosted.org/packages/03/12/8c3a1a1a4eb5204c72f21d4e1871ef3e2705ea53c197058319a50bedbf05/TExtractor-0.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "d03de5ea8a348035e087b6a7671338bd", "sha256": "0912ca40d7579b93ad1f989863d63886fced0ad64feb7106a3cdf9163db03333" }, "downloads": -1, "filename": "TExtractor-0.1.tar.gz", "has_sig": false, "md5_digest": "d03de5ea8a348035e087b6a7671338bd", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 10082, "upload_time": "2018-10-31T18:42:52", "url": "https://files.pythonhosted.org/packages/dd/29/b8f7766d285d6dc92f3dd4051ae9c88870642a8261febd93ccd3d0bb8893/TExtractor-0.1.tar.gz" } ], "0.1.1": [ { "comment_text": "", "digests": { "md5": "ba96a5feb9a2b53279397d4d3aa8a434", "sha256": "8211423efa766667660f34a08f8d4f4ace85c4b439473212be94dce406511afc" }, "downloads": -1, "filename": "TExtractor-0.1.1-py3-none-any.whl", "has_sig": false, "md5_digest": "ba96a5feb9a2b53279397d4d3aa8a434", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 5704, "upload_time": "2018-10-31T19:05:22", "url": "https://files.pythonhosted.org/packages/f3/be/8302e36451eb31dc0d1e9ffd6a798b407a9b1a6dfd4d98911729808dfbdc/TExtractor-0.1.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "25a52a87896fb4777acbb17a93639ec3", "sha256": "4cf2ae43c7ed966f252ba511928494e3f44626e8d8f5baf58318cd6c95c89480" }, "downloads": -1, "filename": "TExtractor-0.1.1.tar.gz", "has_sig": false, "md5_digest": "25a52a87896fb4777acbb17a93639ec3", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 10184, "upload_time": "2018-10-31T19:05:25", "url": "https://files.pythonhosted.org/packages/12/ac/1e9dd255cef84f263ec9453e332bb8dbeb83387388e7c69ef1b4258a11c8/TExtractor-0.1.1.tar.gz" } ], "0.1.2": [ { "comment_text": "", "digests": { "md5": "2e05d552001e183e88a473a05f6265b3", "sha256": "a4c7e77e786731000035064217f2ef9fbd8e7116bd497c3d46151229535d6c5a" }, "downloads": -1, "filename": "TExtractor-0.1.2-py3-none-any.whl", "has_sig": false, "md5_digest": "2e05d552001e183e88a473a05f6265b3", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 12464, "upload_time": "2018-11-04T06:36:17", "url": "https://files.pythonhosted.org/packages/c3/46/ee0f03fb43dc117bad87bc5e20d3d70a32b7f944021aa2e6a0000a724d39/TExtractor-0.1.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "20550312e85a00fd6b839023db79463f", "sha256": "cd4e5af2eb6d343815f83d6c900d9390ed6ea518071aeeb7a6b0224d8f9a0a20" }, "downloads": -1, "filename": "TExtractor-0.1.2.tar.gz", "has_sig": false, "md5_digest": "20550312e85a00fd6b839023db79463f", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 10198, "upload_time": "2018-11-04T06:36:19", "url": "https://files.pythonhosted.org/packages/de/b9/c94be3c965497db0b59e1a9715b5b7e75a919056f1bfb5adc8ea6a2a37b4/TExtractor-0.1.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "2e05d552001e183e88a473a05f6265b3", "sha256": "a4c7e77e786731000035064217f2ef9fbd8e7116bd497c3d46151229535d6c5a" }, "downloads": -1, "filename": "TExtractor-0.1.2-py3-none-any.whl", "has_sig": false, "md5_digest": "2e05d552001e183e88a473a05f6265b3", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 12464, "upload_time": "2018-11-04T06:36:17", "url": "https://files.pythonhosted.org/packages/c3/46/ee0f03fb43dc117bad87bc5e20d3d70a32b7f944021aa2e6a0000a724d39/TExtractor-0.1.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "20550312e85a00fd6b839023db79463f", "sha256": "cd4e5af2eb6d343815f83d6c900d9390ed6ea518071aeeb7a6b0224d8f9a0a20" }, "downloads": -1, "filename": "TExtractor-0.1.2.tar.gz", "has_sig": false, "md5_digest": "20550312e85a00fd6b839023db79463f", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 10198, "upload_time": "2018-11-04T06:36:19", "url": "https://files.pythonhosted.org/packages/de/b9/c94be3c965497db0b59e1a9715b5b7e75a919056f1bfb5adc8ea6a2a37b4/TExtractor-0.1.2.tar.gz" } ] }