{ "info": { "author": "UW Magellan Team", "author_email": "uwmagellan@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Environment :: Console", "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Operating System :: MacOS", "Operating System :: Microsoft :: Windows", "Operating System :: POSIX", "Operating System :: Unix", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Topic :: Scientific/Engineering", "Topic :: Software Development :: Libraries", "Topic :: Utilities" ], "description": "py_entitymatching\n=================\n\nThis project seeks to build a Python software package to match entities\nbetween two tables using supervised learning. This problem is often\nreferred as entity matching (EM). Given two tables A and B, the goal of\nEM is to discover the tuple pairs between two tables that refer to the\nsame real-world entities. There are two main steps involved in entity matching:\nblocking and matching. The blocking step aims to remove obvious non-matching\ntuple pairs and reduce the set considered for matching. Entity matching in\npractice involves many steps than just blocking and matching. While performing EM\nusers often execute many steps, e.g. exploring, cleaning, debugging, sampling,\nestimating accuracy, etc. Current EM systems however do not cover the entire\nEM pipeline, providing support only for a few steps (e.g., blocking, matching), while\nignoring less well-known yet equally critical steps (e.g., debgging, sampling).\nThis package seeks to support all the steps involved in EM pipeline.\n\nThe package is free, open-source, and BSD-licensed.\n\nImportant links\n===============\n\n* Project Homepage: https://sites.google.com/site/anhaidgroup/projects/magellan/py_entitymatching\n* Code repository: https://github.com/anhaidgroup/py_entitymatching\n* Issue Tracker: https://github.com/anhaidgroup/py_entitymatching/issues\n\nDependencies\n============\n\nThe required dependencies to build the packages are:\n\n* pandas (provides data structures to store and manage tables). Tested on version 0.23.2.\n* scikit-learn (provides implementations for common machine learning algorithms). Tested on version 0.18.0.\n* joblib (provides multiprocessing capabilities). Tested on version 0.12.0.\n* pyqt5 (provides tools to build GUIs). Tested on version 5.6.0.\n* py_stringsimjoin (provides implementations for string similarity joins). Tested on version 0.3.0.\n* py_stringmatching (provides a set of string tokenizers and string similarity functions). Tested on version 0.4.0.\n* cloudpickle (provides functions to serialize Python constructs). Tested on version 0.2.1.\n* pyprind (library to display progress indicators). Tested on version 2.10.0.\n* pyparsing (library to parse strings). Tested on version 2.2.0.\n* six (provides functions to write compatible code across Python 2 and 3). Tested on version 2.11.0.\n\nPlatforms\n=========\n\npy_entitymatching has been tested on Linux, OS X and Windows.", "description_content_type": "", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://sites.google.com/site/anhaidgroup/projects/magellan/py_entitymatching", "keywords": "", "license": "BSD", "maintainer": "", "maintainer_email": "", "name": "py-entitymatching", "package_url": "https://pypi.org/project/py-entitymatching/", "platform": "", "project_url": "https://pypi.org/project/py-entitymatching/", "project_urls": { "Homepage": "https://sites.google.com/site/anhaidgroup/projects/magellan/py_entitymatching" }, "release_url": "https://pypi.org/project/py-entitymatching/0.3.2/", "requires_dist": null, "requires_python": "", "summary": "Python library for end to end Entity Matching.", "version": "0.3.2" }, "last_serial": 5363325, "releases": { "0.0.0": [ { "comment_text": "", "digests": { "md5": "fec8c8ef8c76927befa4fd03f036279b", "sha256": "881d2fa80dd46ef62cbff4e98394082c3b65c2609242aa4822aae54933790e65" }, "downloads": -1, "filename": "py_entitymatching-0.0.0.tar.gz", "has_sig": false, "md5_digest": "fec8c8ef8c76927befa4fd03f036279b", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 462964, "upload_time": "2016-08-04T21:59:00", "url": "https://files.pythonhosted.org/packages/91/ac/d7e707b07a41d7a6e7a233c568aecd5ec7efdcdef9016926ea9538eb488e/py_entitymatching-0.0.0.tar.gz" } ], "0.1.0": [ { "comment_text": "", "digests": { "md5": "35462fd2aee00a94ff30567cb896dcba", "sha256": "23097c0954e106115610ed4cf641b58a92a3651a51663d26cda286f8c41c96ae" }, "downloads": -1, "filename": "py_entitymatching-0.1.0.tar.gz", "has_sig": false, "md5_digest": "35462fd2aee00a94ff30567cb896dcba", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 1928093, "upload_time": "2017-01-12T23:57:41", "url": "https://files.pythonhosted.org/packages/16/7d/e50f5c85e417b9384a3b4fa2e5ca9392e53863f22b1d9401ea933891cb60/py_entitymatching-0.1.0.tar.gz" } ], "0.2.0": [ { "comment_text": "", "digests": { "md5": "c19f017acafe04f07f6a86308fb35fc5", "sha256": "4a0dbf5d6d587ec0dadfdbaec7eb3b024cb513fff17532a1826944d7440ef4b5" }, "downloads": -1, "filename": "py_entitymatching-0.2.0.tar.gz", "has_sig": false, "md5_digest": "c19f017acafe04f07f6a86308fb35fc5", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 1949519, "upload_time": "2017-08-14T01:10:39", "url": "https://files.pythonhosted.org/packages/d8/e7/0d89a09da15eca81ae5babfd420e299a57bdcf7cef18fa9a398aaafd3c1d/py_entitymatching-0.2.0.tar.gz" } ], "0.3.0": [ { "comment_text": "", "digests": { "md5": "19aee01ad0158442f22cacc03417ddc2", "sha256": "f0356833aa05b9726a7203a024b2cb5123b925e8678a215464f6b3eb712d9411" }, "downloads": -1, "filename": "py_entitymatching-0.3.0.zip", "has_sig": false, "md5_digest": "19aee01ad0158442f22cacc03417ddc2", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2142688, "upload_time": "2017-11-27T01:12:45", "url": "https://files.pythonhosted.org/packages/28/b7/46ec8d7260e2774ce11ab700bf860141010557ededccb6b590ea516ea133/py_entitymatching-0.3.0.zip" } ], "0.3.1": [ { "comment_text": "", "digests": { "md5": "504b4eb075de8954d82d68aef0c108fd", "sha256": "a8a70ecfe7488a7d1d48f15f0b24c4e7965bc31a8096aa65e2ad9337afcc6dfa" }, "downloads": -1, "filename": "py_entitymatching-0.3.1.tar.gz", "has_sig": false, "md5_digest": "504b4eb075de8954d82d68aef0c108fd", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 1980655, "upload_time": "2018-05-10T23:01:48", "url": "https://files.pythonhosted.org/packages/ee/d3/2eacdb4ee0e268eb4c041fc2921e880262658b24e15ae470559fb1999eab/py_entitymatching-0.3.1.tar.gz" } ], "0.3.2": [ { "comment_text": "", "digests": { "md5": "384f656a594340c9797849831805ae17", "sha256": "f8e7a677901c4e35564d374b305e2beac13d615d49260787565877c0b07da1bb" }, "downloads": -1, "filename": "py_entitymatching-0.3.2.tar.gz", "has_sig": false, "md5_digest": "384f656a594340c9797849831805ae17", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2023610, "upload_time": "2019-06-05T16:47:51", "url": "https://files.pythonhosted.org/packages/d1/f0/aac98c3049180c83d421bd08d3094a69cf8aa4677e3b6e38a36f34742d63/py_entitymatching-0.3.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "384f656a594340c9797849831805ae17", "sha256": "f8e7a677901c4e35564d374b305e2beac13d615d49260787565877c0b07da1bb" }, "downloads": -1, "filename": "py_entitymatching-0.3.2.tar.gz", "has_sig": false, "md5_digest": "384f656a594340c9797849831805ae17", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2023610, "upload_time": "2019-06-05T16:47:51", "url": "https://files.pythonhosted.org/packages/d1/f0/aac98c3049180c83d421bd08d3094a69cf8aa4677e3b6e38a36f34742d63/py_entitymatching-0.3.2.tar.gz" } ] }