{ "info": { "author": "Kwame Porter Robinson", "author_email": "kwamepr@umich.edu", "bugtrack_url": null, "classifiers": [ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7" ], "description": "# adversarial_labeller\n---\n\nAdversarial labeller is a sklearn compatible labeller that scores instances as belonging to the test dataset or not to help model selection under data drift. Adversarial labeller is distributed under the MIT license.\n\n## Installation\n\n*Dependencies*\n\nAdversarial validator requires:\n* Python (>= 3.7)\n* scikit-learn (>= 0.21.0)\n* [imbalanced learn](>= 0.5.0)\n* [pandas](>= 0.25.0)\n\n*User installation*\n\nThe easiest way to install adversarial validator is using \n```pip\npip install adversarial_labeller\n```\n\n*Example Usage*\n```python\nimport numpy as np\nimport pandas as pd\nfrom sklearn.datasets.samples_generator import make_blobs\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.ensemble import RandomForestClassifier\nfrom adversarial_labeller import AdversarialLabelerFactory, Scorer\n\nscoring_metric = accuracy_score\n\n# Our blob data generation parameters for this example\nnumber_of_samples = 1000\nnumber_of_test_samples = 300\n\n# Generate 1d blob data and label a portion as test data\n# ... 1d blob data can be visualized as a rug plot\nvariables, labels = \\\nmake_blobs(\n n_samples=number_of_samples,\n centers=2,\n n_features=1,\n random_state=0\n)\n\ndf = pd.DataFrame(\n{\n 'independent_variable':variables.flatten(),\n 'dependent_variable': labels,\n 'label': 0 # default to train data\n}\n)\ntest_indices = df.index[-number_of_test_samples:]\ntrain_indices = df.index[:-number_of_test_samples]\n\ndf.loc[test_indices,'label'] = 1 # ... now we mark instances that are test data\n\n# Now perturb the test samples to simulate data drift/different test distribution\ndf.loc[test_indices, \"independent_variable\"] +=\\\nnp.std(df.independent_variable)\n\n# ... now we have an example of data drift where adversarial labeling can be used to better estimate the actual test accuracy\n\nfeatures_for_labeller = df.independent_variable\nlabels_for_labeller = df.label\n\npipeline, flip_binary_predictions =\\\n AdversarialLabelerFactory(\n features = features_for_labeller,\n labels = labels_for_labeller,\n run_pipeline = False\n ).fit_with_best_params()\n\nscorer = Scorer(the_scorer=pipeline,\n flip_binary_predictions=flip_binary_predictions)\n\n# Now we evaluate a classifer on training data only, but using\n# our fancy adversarial labeller\n_X = df.loc[train_indices]\\\n .independent_variable\\\n .values\\\n .reshape(-1,1)\n\n_X_test = df.loc[test_indices]\\\n .independent_variable\\\n .values\\\n .reshape(-1,1)\n\n# ... sklearn wants firmly defined shapes\nclf_adver = RandomForestClassifier(n_estimators=100, random_state=1)\nadversarial_scores =\\\n cross_val_score(\n X=_X,\n y=df.loc[train_indices].dependent_variable,\n estimator=clf_adver,\n scoring=scorer.grade,\n cv=10,\n n_jobs=-1,\n verbose=1)\n# ... and we get ~ 0.70 - 0.68\naverage_adversarial_score =\\\n np.array(adversarial_scores).mean()\n\n# ... let's see how this compares with normal cross validation\nclf = RandomForestClassifier(n_estimators=100, random_state=1)\nscores =\\\n cross_val_score(\n X=_X,\n y=df.loc[train_indices].dependent_variable,\n estimator=clf,\n cv=10,\n n_jobs=-1,\n verbose=1)\n\n# ... and we get ~ 0.92\naverage_score =\\\n np.array(scores).mean()\n\n# now let's see how this compares with the actual test score\nclf_all = RandomForestClassifier(n_estimators=100, random_state=1)\nclf_all.fit(_X,\n df.loc[train_indices].dependent_variable)\n\n# ... actual test score is 0.70\nactual_score =\\\naccuracy_score(\n clf_all.predict(_X_test),\n df.loc[test_indices].dependent_variable\n)\n\nadversarial_result = abs(average_adversarial_score - actual_score)\nprint(f\"... adversarial labelled cross validation was {adversarial_result:.2f} points different than actual.\") # ... 0.00 - 0.02 points\n\ncross_val_result = abs(average_score - actual_score)\nprint(f\"... regular validation was {cross_val_result:.2f} points different than actual.\") # ... 0.23 points\n\n# See tests/ for additional examples, including against the Titanic and stock market trading\n```\n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/robinsonkwame/adversarial_labeller", "keywords": "model selection,validation,data drift", "license": "MIT license", "maintainer": "", "maintainer_email": "", "name": "adversarial-labeller", "package_url": "https://pypi.org/project/adversarial-labeller/", "platform": "", "project_url": "https://pypi.org/project/adversarial-labeller/", "project_urls": { "Homepage": "https://github.com/robinsonkwame/adversarial_labeller" }, "release_url": "https://pypi.org/project/adversarial-labeller/0.1.8/", "requires_dist": [ "attrs (==19.3.0)", "bleach (==3.1.0)", "certifi (==2019.11.28)", "cffi (==1.13.2)", "chardet (==3.0.4)", "cryptography (==2.8)", "docutils (==0.15.2)", "idna (==2.8)", "imbalanced-learn (==0.6.1)", "jeepney (==0.4.1)", "joblib (==0.14.1)", "keyring (==21.0.0)", "more-itertools (==8.0.2)", "numpy (==1.18.0)", "packaging (==19.2)", "pandas (==0.25.3)", "pkginfo (==1.5.0.1)", "pluggy (==0.13.1)", "py (==1.8.1)", "pycparser (==2.19)", "pygments (==2.5.2)", "pyparsing (==2.4.6)", "pytest (==5.3.2)", "python-dateutil (==2.8.1)", "pytz (==2019.3)", "readme-renderer (==24.0)", "requests-toolbelt (==0.9.1)", "requests (==2.22.0)", "scikit-learn (==0.22)", "scipy (==1.4.1)", "six (==1.13.0)", "sklearn (==0.0)", "tqdm (==4.41.0)", "twine (==3.1.1)", "urllib3 (==1.25.7)", "wcwidth (==0.1.7)", "webencodings (==0.5.1)", "zipp (==0.6.0)", "importlib-metadata (==1.3.0) ; python_version < \"3.8\"", "secretstorage (==3.1.1) ; sys_platform == \"linux\"" ], "requires_python": "", "summary": "Sklearn compatiable model instance labelling tool to help validate models in situations involving data drift.", "version": "0.1.8", "yanked": false, "yanked_reason": null }, "last_serial": 6369120, "releases": { "0.1.2": [ { "comment_text": "", "digests": { "md5": "339a6a7504d9e21678c70a632b1cc9ee", "sha256": "a4444ee6343450d7629138cc9b474980c9263cfdac3e424ac77348c352389d1e" }, "downloads": -1, "filename": "adversarial_labeller-0.1.2-py3-none-any.whl", "has_sig": false, "md5_digest": "339a6a7504d9e21678c70a632b1cc9ee", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 5760, "upload_time": "2019-10-19T16:20:51", "upload_time_iso_8601": "2019-10-19T16:20:51.536680Z", "url": "https://files.pythonhosted.org/packages/14/41/5a233be2f212eeb7c35302bd6a953190991a2e8361e8d21e57c980d88249/adversarial_labeller-0.1.2-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "9f7fc74e40966643b29321081a7d5530", "sha256": "3ed28ca0e46651a2067666f105a4cd87cf62439104da48a3610728b5fc0e9225" }, "downloads": -1, "filename": "adversarial labeller-0.1.2.tar.gz", "has_sig": false, "md5_digest": "9f7fc74e40966643b29321081a7d5530", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4194, "upload_time": "2019-10-19T16:20:53", "upload_time_iso_8601": "2019-10-19T16:20:53.920278Z", "url": "https://files.pythonhosted.org/packages/6c/35/54234d66809d9be51c46d74b305102f00ac644c98cc2dcc7cfe35a5c3c79/adversarial%20labeller-0.1.2.tar.gz", "yanked": false, "yanked_reason": null } ], "0.1.3": [ { "comment_text": "", "digests": { "md5": "de9acae1cb69ca9f216a767ae76caf59", "sha256": "d86620b95be8ebee54a5c807f3aa6e516c38f666bd5404f74672ea95e9be0807" }, "downloads": -1, "filename": "adversarial_labeller-0.1.3-py3-none-any.whl", "has_sig": false, "md5_digest": "de9acae1cb69ca9f216a767ae76caf59", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 5897, "upload_time": "2019-10-19T17:31:10", "upload_time_iso_8601": "2019-10-19T17:31:10.534784Z", "url": "https://files.pythonhosted.org/packages/7e/6b/b37928d79efe07d60c212064f563dd145805190dd9ffe2a4d3476ff0670e/adversarial_labeller-0.1.3-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "171ac904df9aa219dd2ec104002d6f8b", "sha256": "d83b70b8fa06c5f57c354b4cb5e4985b54dcc985971b79c35b97c99e2b55a11e" }, "downloads": -1, "filename": "adversarial labeller-0.1.3.tar.gz", "has_sig": false, "md5_digest": "171ac904df9aa219dd2ec104002d6f8b", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4331, "upload_time": "2019-10-19T17:31:12", "upload_time_iso_8601": "2019-10-19T17:31:12.428025Z", "url": "https://files.pythonhosted.org/packages/ed/fd/9cab01b957e9c5ebc69f12b72b4cab1005ef603c46524aa772c3245b1a78/adversarial%20labeller-0.1.3.tar.gz", "yanked": false, "yanked_reason": null } ], "0.1.4": [ { "comment_text": "", "digests": { "md5": "308c5f56dfc3aabdaa208acd37e2e12c", "sha256": "1ef81c151fa6c96b542114083fcb12ae5f87990a1c57da9fcc62bba325653dfa" }, "downloads": -1, "filename": "adversarial_labeller-0.1.4-py3-none-any.whl", "has_sig": false, "md5_digest": "308c5f56dfc3aabdaa208acd37e2e12c", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 5915, "upload_time": "2019-10-24T00:44:29", "upload_time_iso_8601": "2019-10-24T00:44:29.280034Z", "url": "https://files.pythonhosted.org/packages/94/a9/fae787a460b436702d23d88fb76762026dfcb728e3b15c48ea41d9d446ee/adversarial_labeller-0.1.4-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "9d41d0b3427eb215dc8460d853cc8a45", "sha256": "d1bca66344c84ee923f40c5e0a1fb139ce4a6045fc6305b61b399edaceb90d2e" }, "downloads": -1, "filename": "adversarial labeller-0.1.4.tar.gz", "has_sig": false, "md5_digest": "9d41d0b3427eb215dc8460d853cc8a45", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5370, "upload_time": "2019-10-24T00:44:30", "upload_time_iso_8601": "2019-10-24T00:44:30.744062Z", "url": "https://files.pythonhosted.org/packages/eb/ec/31f67037b9c825cc9ec7629bd55260d2c8603b36329a1c047e7e6e1bcfcb/adversarial%20labeller-0.1.4.tar.gz", "yanked": false, "yanked_reason": null } ], "0.1.5": [ { "comment_text": "", "digests": { "md5": "69c8e1d78a065621bbe95ceb7d202e1e", "sha256": "31775a5b60cbdcdeb44f108d7256d170bc3cf263a1cb87e2414f93412046e03a" }, "downloads": -1, "filename": "adversarial_labeller-0.1.5-py3-none-any.whl", "has_sig": false, "md5_digest": "69c8e1d78a065621bbe95ceb7d202e1e", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7273, "upload_time": "2019-10-27T16:53:35", "upload_time_iso_8601": "2019-10-27T16:53:35.876513Z", "url": "https://files.pythonhosted.org/packages/86/36/e89f0cabcb516bd6922609465b0ce6e4050bb593309ef3829000b1e5384a/adversarial_labeller-0.1.5-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "07707dac69428aa3b57202a581a95442", "sha256": "69e2957ea5139a115ac559f8300c2d8f67c748af44a93780bac3ea332176e366" }, "downloads": -1, "filename": "adversarial labeller-0.1.5.tar.gz", "has_sig": false, "md5_digest": "07707dac69428aa3b57202a581a95442", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5954, "upload_time": "2019-10-27T16:53:37", "upload_time_iso_8601": "2019-10-27T16:53:37.662779Z", "url": "https://files.pythonhosted.org/packages/11/df/0ca4bf5c6a5745916993b3f3bb751514c2f03edb63da3778fe9d24cbb765/adversarial%20labeller-0.1.5.tar.gz", "yanked": false, "yanked_reason": null } ], "0.1.6": [ { "comment_text": "", "digests": { "md5": "9496393b6953be0ba2d78dd11e23480e", "sha256": "f405aa8ddb9bca6c8ca9ff460c3e42471b5666d6d3df5f1208d5663e9d21f263" }, "downloads": -1, "filename": "adversarial_labeller-0.1.6-py3-none-any.whl", "has_sig": false, "md5_digest": "9496393b6953be0ba2d78dd11e23480e", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7701, "upload_time": "2019-12-28T17:34:40", "upload_time_iso_8601": "2019-12-28T17:34:40.511246Z", "url": "https://files.pythonhosted.org/packages/ab/34/2794c037c4395d66105cbaaecf08f4db4ca0540e64f926a2c2bafe709349/adversarial_labeller-0.1.6-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "102ba0497aab1c62061c8975f55fd02a", "sha256": "f08ee8b6af56a6cb82cda0b73b1933e84fce186f3934ab9b0f1101c2b2e9eddf" }, "downloads": -1, "filename": "adversarial labeller-0.1.6.tar.gz", "has_sig": false, "md5_digest": "102ba0497aab1c62061c8975f55fd02a", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6346, "upload_time": "2019-12-28T17:34:41", "upload_time_iso_8601": "2019-12-28T17:34:41.665600Z", "url": "https://files.pythonhosted.org/packages/d7/fd/85a821b545c3fd5c8eac3b581ab7d01af172acddc0d5ef7da07b11be2b4e/adversarial%20labeller-0.1.6.tar.gz", "yanked": false, "yanked_reason": null } ], "0.1.7": [ { "comment_text": "", "digests": { "md5": "33e8c3bdcec165895ac5199dd1840ad1", "sha256": "70dcb26878f55c8a0f1f725625518ed6f4b07682dbc1ad0776e2b2a0bdb093a9" }, "downloads": -1, "filename": "adversarial_labeller-0.1.7-py3-none-any.whl", "has_sig": false, "md5_digest": "33e8c3bdcec165895ac5199dd1840ad1", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7573, "upload_time": "2019-12-28T18:31:18", "upload_time_iso_8601": "2019-12-28T18:31:18.777559Z", "url": "https://files.pythonhosted.org/packages/04/a0/fd1743150c3d02ae4593cf028ab03bc5d7167552c8b52713450346510649/adversarial_labeller-0.1.7-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "b367c7ef243c78db1894c9fd88d38a42", "sha256": "d673319dba1544395bd0fb681259b1679e62c19887b75f270f67bfe8da90d468" }, "downloads": -1, "filename": "adversarial labeller-0.1.7.tar.gz", "has_sig": false, "md5_digest": "b367c7ef243c78db1894c9fd88d38a42", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6231, "upload_time": "2019-12-28T18:31:20", "upload_time_iso_8601": "2019-12-28T18:31:20.294070Z", "url": "https://files.pythonhosted.org/packages/17/19/6f7e28d40ae5afc66283cd5f1ac9cf6eafbc54583a6567e51b68f14ae96e/adversarial%20labeller-0.1.7.tar.gz", "yanked": false, "yanked_reason": null } ], "0.1.8": [ { "comment_text": "", "digests": { "md5": "0aa802592a8bdd4e55a3e468d17d348b", "sha256": "281ff665aa6bbda4843ba734487860cdd48aa7bd49c0d15db40551d2167b0f54" }, "downloads": -1, "filename": "adversarial_labeller-0.1.8-py3-none-any.whl", "has_sig": false, "md5_digest": "0aa802592a8bdd4e55a3e468d17d348b", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7910, "upload_time": "2019-12-28T18:44:11", "upload_time_iso_8601": "2019-12-28T18:44:11.682146Z", "url": "https://files.pythonhosted.org/packages/a6/29/df7a3ec3812142575459f90a3278167757e3852f16ebdda4a3ab6864de63/adversarial_labeller-0.1.8-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "ef04d8e76e952ea4c876a08b0e3c642e", "sha256": "5cdc9b594cf38e6e21eb4acc8d77dda6ef4882672ff790e67e3965e2319c76d7" }, "downloads": -1, "filename": "adversarial labeller-0.1.8.tar.gz", "has_sig": false, "md5_digest": "ef04d8e76e952ea4c876a08b0e3c642e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6547, "upload_time": "2019-12-28T18:44:13", "upload_time_iso_8601": "2019-12-28T18:44:13.105769Z", "url": "https://files.pythonhosted.org/packages/5e/26/0677577be4d9a6d9907cf32dc1c850ae15e4bb8e41bcf6583aaec14ade1e/adversarial%20labeller-0.1.8.tar.gz", "yanked": false, "yanked_reason": null } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "0aa802592a8bdd4e55a3e468d17d348b", "sha256": "281ff665aa6bbda4843ba734487860cdd48aa7bd49c0d15db40551d2167b0f54" }, "downloads": -1, "filename": "adversarial_labeller-0.1.8-py3-none-any.whl", "has_sig": false, "md5_digest": "0aa802592a8bdd4e55a3e468d17d348b", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7910, "upload_time": "2019-12-28T18:44:11", "upload_time_iso_8601": "2019-12-28T18:44:11.682146Z", "url": "https://files.pythonhosted.org/packages/a6/29/df7a3ec3812142575459f90a3278167757e3852f16ebdda4a3ab6864de63/adversarial_labeller-0.1.8-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "ef04d8e76e952ea4c876a08b0e3c642e", "sha256": "5cdc9b594cf38e6e21eb4acc8d77dda6ef4882672ff790e67e3965e2319c76d7" }, "downloads": -1, "filename": "adversarial labeller-0.1.8.tar.gz", "has_sig": false, "md5_digest": "ef04d8e76e952ea4c876a08b0e3c642e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6547, "upload_time": "2019-12-28T18:44:13", "upload_time_iso_8601": "2019-12-28T18:44:13.105769Z", "url": "https://files.pythonhosted.org/packages/5e/26/0677577be4d9a6d9907cf32dc1c850ae15e4bb8e41bcf6583aaec14ade1e/adversarial%20labeller-0.1.8.tar.gz", "yanked": false, "yanked_reason": null } ], "vulnerabilities": [] }