{ "info": { "author": "Avinash Kak", "author_email": "kak@purdue.edu", "bugtrack_url": null, "classifiers": [ "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.5", "Topic :: Utilities" ], "description": "Consult the module API page at\n\n https://engineering.purdue.edu/kak/distLSH/LocalitySensitiveHashing-1.0.1.html\n\nfor all information related to this module, including\ninformation regarding the latest changes to the code. The\npage at the URL shown above lists all of the module\nfunctionality you can invoke in your own code.\n\nLocality Sensitive Hashing (LSH) is a computationally\nefficient approach for finding nearest neighbors in large\ndatasets. The main idea in LSH is to avoid having to\ncompare every pair of data samples in a large dataset in\norder to find the nearest similar neighbors for the\ndifferent data samples. With LSH, one can expect a data\nsample and its closest similar neighbors to be hashed into\nthe same bucket with a high probability. By treating the\ndata samples placed in the same bucket as candidates for\nsimilarity checking, we significantly reduce the\ncomputational burden associated with similarity detection in\nlarge datasets.\n\nWhile LSH algorithms have traditionally been used for\nfinding nearest neighbors, this module goes a step further\nand explores using LSH for clustering the data. Strictly\nspeaking, this violates the basic mandate of LSH, which is\nto return just the nearest neighbors. (A data sample X being\nY's nearest neighbor and Y being Z's nearest neighbor, in\nthe sense neighbors are commonly defined with the Cosine\nmetric in LSH, does not imply that X and Z will always be\nsufficiently close to be considered each other's nearest\nneighbors.) Nonetheless, if you believe that your datafile\nconsists of non-overlapping data clusters, this module may\ndo a decent job of finding those clusters.\n\nTypical usage syntax for invoking LocalitySensitiveHashing\nin your own code:\n\n::\n\n from LocalitySensitiveHashing import *\n datafile = \"data_for_lsh.csv\"\n lsh = LocalitySensitiveHashing( \n datafile = datafile,\n dim = 10,\n r = 50, \n b = 100, \n expected_num_of_clusters = 10,\n )\n lsh.get_data_from_csv()\n lsh.initialize_hash_store()\n lsh.hash_all_data()\n similarity_groups = lsh.lsh_basic_for_neighborhood_clusters()\n coalesced_similarity_groups = lsh.merge_similarity_groups_with_coalescence( similarity_groups )\n merged_similarity_groups = lsh.merge_similarity_groups_with_l2norm_sample_based( coalesced_similarity_groups )\n lsh.write_clusters_to_file( merged_similarity_groups, \"clusters.txt\" )", "description_content_type": null, "docs_url": null, "download_url": "https://engineering.purdue.edu/kak/distLSH/LocalitySensitiveHashing-1.0.1.tar.gz#md5=5ad10c6eef6acdf7912e2960f552021e", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://engineering.purdue.edu/kak/distLSH/LocalitySensitiveHashing-1.0.1.html", "keywords": "locality sensitive hashing,nearest neighbor calculation,hashing with hyperplanes,clustering", "license": "Python Software Foundation License", "maintainer": null, "maintainer_email": null, "name": "LocalitySensitiveHashing", "package_url": "https://pypi.org/project/LocalitySensitiveHashing/", "platform": "All platforms", "project_url": "https://pypi.org/project/LocalitySensitiveHashing/", "project_urls": { "Download": "https://engineering.purdue.edu/kak/distLSH/LocalitySensitiveHashing-1.0.1.tar.gz#md5=5ad10c6eef6acdf7912e2960f552021e", "Homepage": "https://engineering.purdue.edu/kak/distLSH/LocalitySensitiveHashing-1.0.1.html" }, "release_url": "https://pypi.org/project/LocalitySensitiveHashing/1.0.1/", "requires_dist": null, "requires_python": null, "summary": "A Python implementation of Locality Sensitive Hashing for finding nearest neighbors and clusters in multidimensional numerical data", "version": "1.0.1" }, "last_serial": 2898990, "releases": { "1.0": [ { "comment_text": "", "digests": { "md5": "22057425f52f7ad352af30256ad0a2db", "sha256": "20b5f1f947930e92c4e3c3d74a1521a4e4ef77b2981e33744db014b3e7ea484e" }, "downloads": -1, "filename": "LocalitySensitiveHashing-1.0.tar.gz", "has_sig": false, "md5_digest": "22057425f52f7ad352af30256ad0a2db", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 63053, "upload_time": "2017-05-11T15:36:45", "url": "https://files.pythonhosted.org/packages/d0/f9/b9c322d0fc0c11cf46f3cb78ed960530a013e18f28f1c67c4effbb6c25ed/LocalitySensitiveHashing-1.0.tar.gz" } ], "1.0.1": [ { "comment_text": "", "digests": { "md5": "5ad10c6eef6acdf7912e2960f552021e", "sha256": "a4b38b34b94f5f251096f53655cd2fdff0dbdfa552ed89a3d4ec84e738f0e5c9" }, "downloads": -1, "filename": "LocalitySensitiveHashing-1.0.1.tar.gz", "has_sig": false, "md5_digest": "5ad10c6eef6acdf7912e2960f552021e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 63614, "upload_time": "2017-05-25T20:56:51", "url": "https://files.pythonhosted.org/packages/6d/33/4ac1ce68013e61a418968f3bded7571b4259f45655445c321d6d3fb84ac1/LocalitySensitiveHashing-1.0.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "5ad10c6eef6acdf7912e2960f552021e", "sha256": "a4b38b34b94f5f251096f53655cd2fdff0dbdfa552ed89a3d4ec84e738f0e5c9" }, "downloads": -1, "filename": "LocalitySensitiveHashing-1.0.1.tar.gz", "has_sig": false, "md5_digest": "5ad10c6eef6acdf7912e2960f552021e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 63614, "upload_time": "2017-05-25T20:56:51", "url": "https://files.pythonhosted.org/packages/6d/33/4ac1ce68013e61a418968f3bded7571b4259f45655445c321d6d3fb84ac1/LocalitySensitiveHashing-1.0.1.tar.gz" } ] }