{ "info": { "author": "brahle", "author_email": "brahle@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Topic :: Software Development :: Libraries :: Python Modules" ], "description": "Data Partitioner\n================\n\nSimple project that can be used to consistently partition a data set\ninto two parts - a test set and a training set. There are also helpful\nmethods that provide a way to partition into more groups of elements.\n\nInstallation\n============\n\nThe easiest way to install this module is to install it via ``pip``:\n\n::\n\n $ pip install data_partitioner\n\nUsage\n=====\n\nUsing this module is dead simple. The main module (``DatasetSuplier``)\noffers two methods that return the training set (``training_set()``) or\nthe test set (``test_set()``). Both of these methods are consitent, so\nno matter how many times you call them on the same object, they will\nreturn the same set of elements back.\n\nYou have two configuration options you can specify:\n\n- ``training_percent`` - the percent of the dataset used for the\n training set. It defaults to ``0.8``.\n- ``partitioning_function`` - the function that's used to partition the\n dataset.\n- It defaults to ``data_partitioner.pseudorandom_function``, which will\n randomly assign every element of the dataset to either the test set\n or the training set.\n- Another useful existing option you can set it to is\n ``data_partitioner.LinearFakeRandomFunction``, which will make sure\n that no elements in the training set come after any elements of the\n test set.\n- You can also manually write this callable, which will take one\n parameter as input - the index of the element currently considered.\n\nExample\n=======\n\n::\n\n from data_partitioner import DatasetSuplier\n\n dataset = [\n ('Alice', 10, 23, 401),\n ('Bob', 20, 40, 812),\n ('Christine', 41, 92, 533),\n ('Dave', 843, 12, -5),\n ('Elizabeth', 682, 33, -7),\n ('Fred', 95, 642, 34),\n ]\n suplier = DatasetSuplier(dataset)\n\n for iteration in range(100):\n for element in suplier.training_set():\n do_train(element[1])\n for element in suplier.test_set():\n do_evaluate(element[1])", "description_content_type": null, "docs_url": null, "download_url": null, "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/brahle/data_partitioner", "keywords": null, "license": "GNU Lesser General Public License (LGPL), Version 3", "maintainer": null, "maintainer_email": null, "name": "data-partitioner", "package_url": "https://pypi.org/project/data-partitioner/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/data-partitioner/", "project_urls": { "Homepage": "https://github.com/brahle/data_partitioner" }, "release_url": "https://pypi.org/project/data-partitioner/0.1/", "requires_dist": null, "requires_python": null, "summary": "Consistently partitions a dataset into a training set and a test set", "version": "0.1" }, "last_serial": 2764933, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "c22acf4b57bb87cd423603036fb2b4aa", "sha256": "794f0244fa3bcee17e0b878f8f7e11b33ba72717e84b48cdc26df7f48f4be4eb" }, "downloads": -1, "filename": "data_partitioner-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "c22acf4b57bb87cd423603036fb2b4aa", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 6038, "upload_time": "2017-04-10T01:52:42", "url": "https://files.pythonhosted.org/packages/2f/b1/51a87731b6d6f3745ffcc9ddecb9e9193f047f7404eceb6a1259cc223041/data_partitioner-0.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "2878e365f05186bfd2eb80db802313ce", "sha256": "dd7a9bea91adb4655986197fde06d172b0a9f5f55696982a373e41969921f1c1" }, "downloads": -1, "filename": "data_partitioner-0.1.tar.gz", "has_sig": false, "md5_digest": "2878e365f05186bfd2eb80db802313ce", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3487, "upload_time": "2017-04-10T01:52:46", "url": "https://files.pythonhosted.org/packages/32/37/c00f967e6f48d3507eb23af76d4cb8232263c2b0443419cd71d7de13c074/data_partitioner-0.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "c22acf4b57bb87cd423603036fb2b4aa", "sha256": "794f0244fa3bcee17e0b878f8f7e11b33ba72717e84b48cdc26df7f48f4be4eb" }, "downloads": -1, "filename": "data_partitioner-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "c22acf4b57bb87cd423603036fb2b4aa", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 6038, "upload_time": "2017-04-10T01:52:42", "url": "https://files.pythonhosted.org/packages/2f/b1/51a87731b6d6f3745ffcc9ddecb9e9193f047f7404eceb6a1259cc223041/data_partitioner-0.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "2878e365f05186bfd2eb80db802313ce", "sha256": "dd7a9bea91adb4655986197fde06d172b0a9f5f55696982a373e41969921f1c1" }, "downloads": -1, "filename": "data_partitioner-0.1.tar.gz", "has_sig": false, "md5_digest": "2878e365f05186bfd2eb80db802313ce", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3487, "upload_time": "2017-04-10T01:52:46", "url": "https://files.pythonhosted.org/packages/32/37/c00f967e6f48d3507eb23af76d4cb8232263c2b0443419cd71d7de13c074/data_partitioner-0.1.tar.gz" } ] }