{ "info": { "author": "Ke Sang", "author_email": "kesang0156357@gmail.com", "bugtrack_url": null, "classifiers": [], "description": "This module brings the effective reservoir sampling method, with or without\nweight. The reservoir sampling is used when you have a very large and unknown\ndataset of size N, and you want to sampling a subset of k of these N samples,\nwith one stream or one file reading. \n\nIf the weight is not present, each sample will have equal chance to be selected\nin the final subset; if weight is used, each sample will be selected according \nto their weights.\n\n\n# to install\n pip install weightreservoir\n\n\n# to use as a module in python\n from weightreservoir import reservoir\n\n# to use uniform sampling\n uniform = reservoir.UniformSampling(size = 10)\n\n # to add one item into the stream and decide to sample it or not\n uniform.addOne(itemValue)\n\n # to add a list of items into the stream and decide to sample each of them or not\n uniform.addAll(itemValueList) \n\n # to get all the current items of the sampled dataset, returned as a list\n uniform.get()\n\n# to use weighted sampling\n weight_sample = reservoir.WeightSampling(size = 10)\n\n # to add one item into the stream and decide to sample it or not by its weight\n weight_sample.addOne(itemValue, itemWeight)\n\n # to add a list of items into the stream and decide to sample each of them or not by their weight\n weight_sample.addAll(itemValueList, itemWeightList) \n\n # to get all the current items of the sampled dataset, returned as a list\n weight_sample.get() \n\n", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/minddrummer/weightreservoir", "keywords": null, "license": "LICENSE.txt", "maintainer": null, "maintainer_email": null, "name": "weightreservoir", "package_url": "https://pypi.org/project/weightreservoir/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/weightreservoir/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://github.com/minddrummer/weightreservoir" }, "release_url": "https://pypi.org/project/weightreservoir/1.0/", "requires_dist": null, "requires_python": null, "summary": "reservoir sampling with or without weight from a stream of data", "version": "1.0" }, "last_serial": 2498905, "releases": { "1.0": [ { "comment_text": "", "digests": { "md5": "f2f45141af4d8831e48bf1f4b98c5eea", "sha256": "22ccce1186ad0f85dc66a5a0515e6147076951c165640b9e29ddce4d8105dbd5" }, "downloads": -1, "filename": "weightreservoir-1.0.tar.gz", "has_sig": false, "md5_digest": "f2f45141af4d8831e48bf1f4b98c5eea", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2029, "upload_time": "2016-12-04T18:31:48", "url": "https://files.pythonhosted.org/packages/9f/92/5d6b7b3820a99b06b93ce2d490a56e30f9ac512cca059208d7735f3b3a1f/weightreservoir-1.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "f2f45141af4d8831e48bf1f4b98c5eea", "sha256": "22ccce1186ad0f85dc66a5a0515e6147076951c165640b9e29ddce4d8105dbd5" }, "downloads": -1, "filename": "weightreservoir-1.0.tar.gz", "has_sig": false, "md5_digest": "f2f45141af4d8831e48bf1f4b98c5eea", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2029, "upload_time": "2016-12-04T18:31:48", "url": "https://files.pythonhosted.org/packages/9f/92/5d6b7b3820a99b06b93ce2d490a56e30f9ac512cca059208d7735f3b3a1f/weightreservoir-1.0.tar.gz" } ] }