{ "info": { "author": "Chris Mutel", "author_email": "cmutel@gmail.com", "bugtrack_url": null, "classifiers": [ "Intended Audience :: Developers", "Intended Audience :: End Users/Desktop", "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Topic :: Scientific/Engineering :: Information Analysis" ], "description": "# Perdu\n\nPython library and webapp for matching against standard industry and product classifiers. Comes with NAICS, GS1, and USEEIO built-in.\n\n[![Build Status](https://travis-ci.org/cmutel/perdu.svg?branch=master)](https://travis-ci.org/cmutel/perdu) [![Build status](https://ci.appveyor.com/api/projects/status/46fi2vroxh6rruka?svg=true)](https://ci.appveyor.com/project/cmutel/perdu) [![Coverage Status](https://coveralls.io/repos/github/cmutel/perdu/badge.svg?branch=master)](https://coveralls.io/github/cmutel/perdu?branch=master)\n\n## Installation\n\nInstall using pip or conda:\n\n conda -c conda-forge -c cmutel perdu\n\n-or-\n\n pip install perdu\n\nDepends on:\n\n* appdirs\n* docopt\n* flask\n* peewee\n* rdflib\n* rdflib-jsonld\n* whoosh\n\n## Usage\n\nAs a webapp:\n\n conda_webapp\n\nAs a library:\n\n import perdu\n perdu.search_useeio(\"plastic toy\")\n\n# Search basics\n\nPerdu uses [whoosh](https://whoosh.readthedocs.io/en/latest/intro.html) as the search engine. When you first import it, Perdu will import the three built-in catalogues in around one minute.\n\n## Built-in catalogues\n\n* [NAICS](https://www.census.gov/eos/www/naics/)\n* [GS1 GPC](https://www.gs1.org/standards/gpc)\n* [US 2014 EEIO](https://cfpub.epa.gov/si/si_public_record_report.cfm?Lab=NRMRL&dirEntryId=336332)\n\n## Uploading data\n\nCurrently, the only possibility to upload data to the web interface is via CSV, with the first column being the item name or title, and the second (optional) column being the item description. See `perdu.test.fixtures` for examples.\n\n## Adding other catalogues\n\nSee the files in `perdu.extraction` for examples on how to extract data from PDFs (NAICS), XML (GS1), and JSON (USEEIO). Each search catalogue will have its own schema, but Perdu expects these schemas to have at least the columns `name`, `description`, and `code` (see examples in `perdu.searching`). New catalogues will need to have suitable functions provided in `perdu.webapp.search_mapping`.\n\n## Advanced searching\n\nIn addition to the default search method used in the web interface, Perdu also offers [search corrections](https://whoosh.readthedocs.io/en/latest/spelling.html) (`search_corrector_gs1`, `search_corrector_naics`, and `search_corrector_useeio`) and [disjunction maximization](https://whoosh.readthedocs.io/en/latest/api/qparser.html?highlight=dismaxparser#whoosh.qparser.DisMaxParser) (`search_gs1_disjoint`, `search_useeio_disjoint`, and `search_naics_disjoint`).", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/cmutel/perdu", "keywords": "", "license": "BSD 3-clause", "maintainer": "", "maintainer_email": "", "name": "perdu", "package_url": "https://pypi.org/project/perdu/", "platform": "", "project_url": "https://pypi.org/project/perdu/", "project_urls": { "Homepage": "https://github.com/cmutel/perdu" }, "release_url": "https://pypi.org/project/perdu/0.2.1/", "requires_dist": null, "requires_python": "", "summary": "Python library and webapp for searching standard industry and product classifiers", "version": "0.2.1", "yanked": false, "yanked_reason": null }, "last_serial": 6235510, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "010fd612cb83b675a2e41e9477dfa177", "sha256": "db16f397b42c2f3c9185166909d887f6306003ef98304ab89c993e3ef4885d15" }, "downloads": -1, "filename": "perdu-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "010fd612cb83b675a2e41e9477dfa177", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 623064, "upload_time": "2019-10-21T14:06:28", "upload_time_iso_8601": "2019-10-21T14:06:28.846810Z", "url": "https://files.pythonhosted.org/packages/9e/3b/5c0d390ac272eaa07b3a9cedf7f805560c79e277e56d90c9ec0d96e7c31c/perdu-0.1-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "b2c596858e3a829b69871d633a966911", "sha256": "d43d566ed90cadb0984c53ec1e71c12f2d13600c126c0bffa23e96c570ec8d88" }, "downloads": -1, "filename": "perdu-0.1.tar.gz", "has_sig": false, "md5_digest": "b2c596858e3a829b69871d633a966911", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 623286, "upload_time": "2019-10-21T14:06:34", "upload_time_iso_8601": "2019-10-21T14:06:34.651061Z", "url": "https://files.pythonhosted.org/packages/51/fb/b5eb52586dd49853e0eaab7b2f404b02311ca4caa906517441e887957837/perdu-0.1.tar.gz", "yanked": false, "yanked_reason": null } ], "0.1.1": [ { "comment_text": "", "digests": { "md5": "112fdc70e5d5e57e5b2765fa3b4ec800", "sha256": "1b73939e1f3797b4d2b07e4eb7aadfe91d356d8e9a02bdb8b93326c08b69a069" }, "downloads": -1, "filename": "perdu-0.1.1-py3-none-any.whl", "has_sig": false, "md5_digest": "112fdc70e5d5e57e5b2765fa3b4ec800", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 623103, "upload_time": "2019-10-21T14:45:03", "upload_time_iso_8601": "2019-10-21T14:45:03.054781Z", "url": "https://files.pythonhosted.org/packages/35/23/9a15aed9c449b9c579f63f355fd925a2c99d82e3eb91cceddc2aee8c2b8a/perdu-0.1.1-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "2f93a4d3770605044fa1c53a4e034e75", "sha256": "2a53a2dae0fa76f3750e4a0a418674f01f421b5481be2c66b6d5a4216843a507" }, "downloads": -1, "filename": "perdu-0.1.1.tar.gz", "has_sig": false, "md5_digest": "2f93a4d3770605044fa1c53a4e034e75", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 623212, "upload_time": "2019-10-21T14:45:05", "upload_time_iso_8601": "2019-10-21T14:45:05.542951Z", "url": "https://files.pythonhosted.org/packages/d7/87/785efd36e45bee89d5fc25a7c3413d034804135479f8246b44788f5ed53f/perdu-0.1.1.tar.gz", "yanked": false, "yanked_reason": null } ], "0.2": [ { "comment_text": "", "digests": { "md5": "37cd56b4ba0a385fcabf9c973b9e6f41", "sha256": "9106e8f6097e6b6e450c0da5e43e930c952fb596bb7dc44723f9c270f891a0b8" }, "downloads": -1, "filename": "perdu-0.2.tar.gz", "has_sig": false, "md5_digest": "37cd56b4ba0a385fcabf9c973b9e6f41", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 624426, "upload_time": "2019-11-29T08:57:25", "upload_time_iso_8601": "2019-11-29T08:57:25.622875Z", "url": "https://files.pythonhosted.org/packages/b5/f3/8852f1e8b57eaad4813b0530b8d1d2dcd1da286b177a35124a4d8869c2d9/perdu-0.2.tar.gz", "yanked": false, "yanked_reason": null } ], "0.2.1": [ { "comment_text": "", "digests": { "md5": "cd2075cc7f33689300f134aceac2e1d2", "sha256": "ad7e0dc431173d661e291b5e2f7266a36f7bba8ea7849cf873e9df82d96db1d2" }, "downloads": -1, "filename": "perdu-0.2.1.tar.gz", "has_sig": false, "md5_digest": "cd2075cc7f33689300f134aceac2e1d2", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 624804, "upload_time": "2019-12-03T15:05:55", "upload_time_iso_8601": "2019-12-03T15:05:55.010520Z", "url": "https://files.pythonhosted.org/packages/7e/56/bb3e08cb491c18d2ecc19d2ec04669fd0a41db25dea91a1835c871acc5e1/perdu-0.2.1.tar.gz", "yanked": false, "yanked_reason": null } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "cd2075cc7f33689300f134aceac2e1d2", "sha256": "ad7e0dc431173d661e291b5e2f7266a36f7bba8ea7849cf873e9df82d96db1d2" }, "downloads": -1, "filename": "perdu-0.2.1.tar.gz", "has_sig": false, "md5_digest": "cd2075cc7f33689300f134aceac2e1d2", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 624804, "upload_time": "2019-12-03T15:05:55", "upload_time_iso_8601": "2019-12-03T15:05:55.010520Z", "url": "https://files.pythonhosted.org/packages/7e/56/bb3e08cb491c18d2ecc19d2ec04669fd0a41db25dea91a1835c871acc5e1/perdu-0.2.1.tar.gz", "yanked": false, "yanked_reason": null } ], "vulnerabilities": [] }