{ "info": { "author": "Simon Blanchard", "author_email": "bnomis@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 5 - Production/Stable", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: POSIX", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.4", "Topic :: Office/Business :: Office Suites", "Topic :: Text Processing", "Topic :: Utilities" ], "description": "off2txt\n=======\n\nExtracts ASCII/Unicode text from Office files to separate files.\n\nUseful if you have a document containing two languages (e.g. English and\nChinese) and you want to separate the languages into text files for\nfurther processing and analysis.\n\nSupports Open XML file formats. That is, docx, pptx, and xlsx.\n\nWord and PowerPoint files are extracted to text files. Excel files are\nextracted to CSV files, columns are preserved.\n\nCan be used to make a CSV file from Excel without opening Excel.\n\nExamples\n--------\n\nExtract ASCII and Unicode Text From a Word Document\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. code:: shell\n\n $ off2txt -s word.docx\n\nThe above will make two files: word-ascii.txt and word-unicode.txt\n\nExtract ASCII and Unicode Text From an Excel Document\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n\n.. code:: shell\n\n $ off2txt -s excel.xlsx\n\nThe above will make two files: excel-ascii.csv and excel-unicode.csv\n\nNotes\n-----\n\nIf an extracted file would be empty, it is not created.\n\nExcel is different. Columns are preserved. So may get a CSV file of\nempty columns. Cells are put in the extracted ASCII file if they\ncontaint ASCII only otherwise they are streamed to the Unicode file.\n\nUsage\n-----\n\n.. code:: shell\n\n usage: off2txt [options] File [File ...]\n\n off2txt: extract ASCII/Unicode text from Office files to separate files\n\n positional arguments:\n File Files to extract from\n\n optional arguments:\n -h, --help show this help message and exit\n --version show program's version number and exit\n --debug Turn on debug logging.\n --debug-log FILE Save debug logging to FILE.\n -a EXTENSION, --ascii EXTENSION\n Identifier to append to input file name to make ASCII\n output file name when splitting Unicode and ASCII\n text. Default ascii.\n -d DIRECTORY, --directory DIRECTORY\n Save extracted text to DIRECTORY. Ignored if the -o\n option is given.\n -e EXTENSION, --extension EXTENSION\n Extension to use for extracted text files. Default for\n Word and PowerPoint is txt. Default for Excel is csv.\n -o FILE, --output FILE\n Save extracted text to FILE. If not given, the output\n file is named the same as the input file but with a\n txt extension. The extension can be changed with the\n -e option. Files are opened in append mode unless the\n -X option is given.\n -s, --split Split ASCII and Unicode text into two separate files.\n Unicode files are named by adding -unicode before the\n file extension. The Unicode identifer can be changed\n with the -u option.\n -u EXTENSION, --unicode EXTENSION\n Identifier to append to input file name to make\n Unicode output file name when splitting Unicode and\n ASCII text. Default unicode.\n -A, --suppress-file-access-errors\n Do not print file/directory access errors.\n -X, --overwrite-output-files\n Truncate output files before writing.", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/bnomis/off2txt", "keywords": "office,text,extract", "license": "MIT", "maintainer": "", "maintainer_email": "", "name": "off2txt", "package_url": "https://pypi.org/project/off2txt/", "platform": "macosx,linux,unix", "project_url": "https://pypi.org/project/off2txt/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://github.com/bnomis/off2txt" }, "release_url": "https://pypi.org/project/off2txt/0.1.0/", "requires_dist": null, "requires_python": null, "summary": "off2txt: extract text from Office files", "version": "0.1.0" }, "last_serial": 1512352, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "5ad349ec4d77736038ec3e44ffea384b", "sha256": "73f7b5626f432cd164fdc884bb2125ff8fd096022fb309d53620d990b3dd96c6" }, "downloads": -1, "filename": "off2txt-0.1.0-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "5ad349ec4d77736038ec3e44ffea384b", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 9826, "upload_time": "2015-04-20T09:19:43", "url": "https://files.pythonhosted.org/packages/87/e2/7a56fdf87832d639b7ebdab7cc02ace8be698cdca513b15bfb071264609d/off2txt-0.1.0-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "6fecb2037ef4f572a3d83fe7f3cd400c", "sha256": "7d07e26956fd009e19757304d264a067071d3bbbae62ad098f466832356118cc" }, "downloads": -1, "filename": "off2txt-0.1.0.tar.gz", "has_sig": false, "md5_digest": "6fecb2037ef4f572a3d83fe7f3cd400c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 187322, "upload_time": "2015-04-20T09:19:47", "url": "https://files.pythonhosted.org/packages/28/ed/b382a3e32ae388f570969ed039da2d6e0956e36ec0ad53546e8a81a07636/off2txt-0.1.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "5ad349ec4d77736038ec3e44ffea384b", "sha256": "73f7b5626f432cd164fdc884bb2125ff8fd096022fb309d53620d990b3dd96c6" }, "downloads": -1, "filename": "off2txt-0.1.0-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "5ad349ec4d77736038ec3e44ffea384b", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 9826, "upload_time": "2015-04-20T09:19:43", "url": "https://files.pythonhosted.org/packages/87/e2/7a56fdf87832d639b7ebdab7cc02ace8be698cdca513b15bfb071264609d/off2txt-0.1.0-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "6fecb2037ef4f572a3d83fe7f3cd400c", "sha256": "7d07e26956fd009e19757304d264a067071d3bbbae62ad098f466832356118cc" }, "downloads": -1, "filename": "off2txt-0.1.0.tar.gz", "has_sig": false, "md5_digest": "6fecb2037ef4f572a3d83fe7f3cd400c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 187322, "upload_time": "2015-04-20T09:19:47", "url": "https://files.pythonhosted.org/packages/28/ed/b382a3e32ae388f570969ed039da2d6e0956e36ec0ad53546e8a81a07636/off2txt-0.1.0.tar.gz" } ] }