{ "info": { "author": "Scott Colby", "author_email": "scolby33@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: End Users/Desktop", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.5", "Topic :: Office/Business" ], "description": "address_extractor\n=================\nA script to extract US-style street addresses from a text file\n\n::\n\n $ address_extractor\n 1600 Pennsylvania Ave NW, Washington, DC 20500 ^D\n 1 lines in input\n ,1600 Pennsylvania Ave NW,Washington DC 20500\n $ address_extractor -o output.csv input.csv\n 4361 lines in input\n *snip*\n 11 lines unable to be parsed\n $ ls\n output.csv\n\n\n:code:`address_extractor` takes text or a text file containing address-like data, one address per line, and parses it into a uniform format with the `usaddress` package.\n\nInstallation\n------------\nThis package is available from PyPi via :code:`pip`::\n\n pip install address_extractor\n \nThis will install the module as well as the command-line script as :code:`address_extractor`.\n\nCommand-line Usage\n------------------\n\n::\n\n address_extractor [-h] [-o OUTPUT] [--remove-post-zip] [input]\n\n positional arguments:\n input the input file. Defaults to stdin.\n\n optional arguments:\n -h, --help show this help message and exit\n -o OUTPUT, --output OUTPUT\n the output file. Defaults to stdout. \n --remove-post-zip, -r\n when scanning the input lines, remove everything after\n a sequence of 5 digits followed by a comma. The\n parsing library used by this script chokes on\n addresses containing this kind of information, often a\n county name.\n\nLines that could not be parsed will be printed to :code:`STDERR`. They can be saved to a file with standard :code:`bash` redirection techniques:\n\n::\n\n $ address_extractor -o good_addresses.csv has_some_bad_addresses.txt 2> bad_addresses.txt\n\nUsage as a Module\n-----------------\n:code:`address_extractor` can be used as a Python module:\n\n::\n\n >>> import address_extractor\n >>> address_extractor.main(input=input_file_object, output=output_file_object, remove_post_zip=a_bool)\n\nThere are some small issues with this implementation:\n\n* If using :code:`sys.stdin` or :code:`sys.stdout` for input or output, respectively, the file objects will still be closed. This presents issues trying to use these in the future.\n* Errored lines are still printed to :code:`sys.stderr` which may not be expected.\n\nVersions and Stability\n----------------------\nThis package is uploaded as a 0.1.0 release. There are no tests and little error checking--it originated as a quick-'n-dirty script and I decided to release it as a package to gain familiarity with that process.\n\nIssues, comments, and pull requests are welcome at the GitHub page!\n", "description_content_type": null, "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/scolby33/address_extractor", "keywords": "addresses,mailing", "license": "MIT License", "maintainer": "", "maintainer_email": "", "name": "address_extractor", "package_url": "https://pypi.org/project/address_extractor/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/address_extractor/", "project_urls": { "Homepage": "https://github.com/scolby33/address_extractor" }, "release_url": "https://pypi.org/project/address_extractor/0.1.0.post1/", "requires_dist": [ "usaddress" ], "requires_python": "", "summary": "A script to extract US-style street addresses from a text file.", "version": "0.1.0.post1" }, "last_serial": 1794792, "releases": { "0.1.0": [], "0.1.0.post1": [ { "comment_text": "", "digests": { "md5": "0aabfd9ff75d960de3ca5ad311e67132", "sha256": "b5983231c123a92d0b6fb8c45ffb763d102b6692fc007fe34e2ac486902afe6b" }, "downloads": -1, "filename": "address_extractor-0.1.0.post1-py3-none-any.whl", "has_sig": false, "md5_digest": "0aabfd9ff75d960de3ca5ad311e67132", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 4685, "upload_time": "2015-10-31T05:52:51", "url": "https://files.pythonhosted.org/packages/40/a9/6e20672d4bbaa5e7c3871ca29429d1703acea6312159225f564657da23ad/address_extractor-0.1.0.post1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "c77704784852d775587cf51c100f104d", "sha256": "fd62961905c9bae63223b4624116571953a4a83b7c2b473ef3d4a51eccd40a9d" }, "downloads": -1, "filename": "address_extractor-0.1.0.post1.tar.gz", "has_sig": false, "md5_digest": "c77704784852d775587cf51c100f104d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4867, "upload_time": "2015-10-31T05:52:55", "url": "https://files.pythonhosted.org/packages/19/8a/a0b8e6676e7fe5a7d553a44d90608236236aeac7d72505b717b2049179b1/address_extractor-0.1.0.post1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "0aabfd9ff75d960de3ca5ad311e67132", "sha256": "b5983231c123a92d0b6fb8c45ffb763d102b6692fc007fe34e2ac486902afe6b" }, "downloads": -1, "filename": "address_extractor-0.1.0.post1-py3-none-any.whl", "has_sig": false, "md5_digest": "0aabfd9ff75d960de3ca5ad311e67132", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 4685, "upload_time": "2015-10-31T05:52:51", "url": "https://files.pythonhosted.org/packages/40/a9/6e20672d4bbaa5e7c3871ca29429d1703acea6312159225f564657da23ad/address_extractor-0.1.0.post1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "c77704784852d775587cf51c100f104d", "sha256": "fd62961905c9bae63223b4624116571953a4a83b7c2b473ef3d4a51eccd40a9d" }, "downloads": -1, "filename": "address_extractor-0.1.0.post1.tar.gz", "has_sig": false, "md5_digest": "c77704784852d775587cf51c100f104d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4867, "upload_time": "2015-10-31T05:52:55", "url": "https://files.pythonhosted.org/packages/19/8a/a0b8e6676e7fe5a7d553a44d90608236236aeac7d72505b717b2049179b1/address_extractor-0.1.0.post1.tar.gz" } ] }