{ "info": { "author": "Mark Howison", "author_email": "mhowison@ripl.org", "bugtrack_url": null, "classifiers": [ "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", "License :: Free for non-commercial use", "Natural Language :: English", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3.7", "Topic :: Scientific/Engineering" ], "description": "# Secure Infrastructure for Research with Administrative Data (SIRAD)\n\n`sirad` is an integration framework for data from administrative systems. It\ndeidentifies administrative data by removing and replacing personally\nidentifiable information (PII) with a global anonymized identifier, allowing\nresearchers to securely join data on an individual from multiple tables without\nknowing the individual's identity. It is developed by\n[Research Improving People's Lives (RIPL)](https://ripl.org).\n\nFor a worked example and further details, please see\n[sirad-example](https://github.com/ripl-org/sirad-example).\n\nTo learn more about the motivation for creating this package and its potential\nuses, please see our article in *Communications of the ACM*:\n\n> J.S. Hastings, M. Howison, T. Lawless, J. Ucles, P. White. (2019).\n> Unlocking Data to Improve Public Policy. *Communications of the ACM* **62**(10): 48-53.\n> doi:[10.1145/3335150](https://doi.org/10.1145/3335150)\n\n## Installation\n\nRequires Python 3.7 or later.\n\nTo install from PyPI using **pip**: \n`pip install sirad`\n\nTo install using **Anaconda Python**: \n`conda install -c ripl-org sirad`\n\nTo install a **development version** from the current directory: \n`pip install -e .`\n\n## Running\nThere is a single command line script included, `sirad`.\n\n`sirad` supports the following arguments:\n* `process` - split raw data files into data and PII files\n* `research` - create a versioned set of research files with a unique\n anonymous identifier\n\n## Configuration\n\nTo set configuration options, create a file called `sirad_config.py` and place\neither in the directory where you are executing the `sirad` command or\nsomewhere else on your Python path. See `_options` in `config.py` for a\ncomplete list of possible options and default values.\n\nThe following options are available:\n\n* `DATA_SALT`: secret salt used for hashing data values. This shouldn't be\n shared. A warning will be outputted if it is not set. Defaults to None.\n\n* `PII_SALT`: secret salt used for hashing pii values. This shouldn't be\n shared. A warning will be issued if it is not set. Defaults to None.\n\n* `LAYOUTS`: directory that contains layout files. Defaults to `layouts/`.\n\n* `RAW_DIR`, `DATA_DIR`, `PII_DIR`, `LINK_DIR`, `RESEARCH_DIR`: paths to where\n the original data, the processed files, and the research files will be saved.\n\n* `VERSION`: the current version number of the processed and research files.\n\n## Layout files\n\n`sirad` uses YAML files to define the layout, or structure, of raw data files.\nThese YAML files define each column in the incoming data and how it should be\nprocessed. More documentation to come on this YAML format.\n\nThe following file formats are supported:\n* csv - change delimiter with delimiter option\n* fixed with\n* xlsx (xls not currently supported)\n\n## Development\n\nSample test data is randomly generated using\n[Faker](https://github.com/joke2k/faker); none of the information identifies\nreal individuals.\n\n* tax.txt - sample tax return data. Includes first, last, DOB and SSN.\n* credit_scores.txt - sample credit score information. Includes first, last and\n DOB but no SSN.\n\nRun unit tests as:\n\n`python -m unittest discover`\n\n`sirad` can also be used as an API from custom Python code. Documentation to come.\n\n## Authors\n* Mark Howison\n* Ted Lawless\n* John Ucles\n* Preston White", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/ripl-org/sirad", "keywords": "", "license": "", "maintainer": "", "maintainer_email": "", "name": "sirad", "package_url": "https://pypi.org/project/sirad/", "platform": "", "project_url": "https://pypi.org/project/sirad/", "project_urls": { "Homepage": "https://github.com/ripl-org/sirad" }, "release_url": "https://pypi.org/project/sirad/0.3.2/", "requires_dist": null, "requires_python": "", "summary": "Secure Infrastructure for Research with Administrative Data", "version": "0.3.2", "yanked": false, "yanked_reason": null }, "last_serial": 12384838, "releases": { "0.1.2": [ { "comment_text": "", "digests": { "md5": "8653da6bf9805cde2ba5b9e85b1295e1", "sha256": "3cacdd76dac561ee71880b27565d04a1219da6f264ce148c6d18c5b19a2a1822" }, "downloads": -1, "filename": "sirad-0.1.2.tar.gz", "has_sig": false, "md5_digest": "8653da6bf9805cde2ba5b9e85b1295e1", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 12989, "upload_time": "2018-06-19T18:58:23", "upload_time_iso_8601": "2018-06-19T18:58:23.882951Z", "url": "https://files.pythonhosted.org/packages/1c/c2/a98757429690f68a221cb2bfc9ef5b88c2d8157d7dd909060eb5c3a4eff6/sirad-0.1.2.tar.gz", "yanked": false, "yanked_reason": null } ], "0.2.0": [ { "comment_text": "", "digests": { "md5": "09fac30918c0f744040b09aba81f7e14", "sha256": "dd35cf145533d0554414170c507b9b82a19b34600436e5f22f543aa318127417" }, "downloads": -1, "filename": "sirad-0.2.0.tar.gz", "has_sig": false, "md5_digest": "09fac30918c0f744040b09aba81f7e14", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 15026, "upload_time": "2019-10-11T14:38:24", "upload_time_iso_8601": "2019-10-11T14:38:24.712020Z", "url": "https://files.pythonhosted.org/packages/a3/58/bb1030809e649a14472564a099b23691b06889790f51fa06ada5cc9c80ca/sirad-0.2.0.tar.gz", "yanked": false, "yanked_reason": null } ], "0.2.1": [ { "comment_text": "", "digests": { "md5": "26fb4f19de1ccefb5d840e0d88ce51cf", "sha256": "37bc1aebb52897201d71ddd9b3401a3482206500b57d48c3b1b442819aff5881" }, "downloads": -1, "filename": "sirad-0.2.1.tar.gz", "has_sig": false, "md5_digest": "26fb4f19de1ccefb5d840e0d88ce51cf", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 15980, "upload_time": "2019-10-30T14:42:01", "upload_time_iso_8601": "2019-10-30T14:42:01.866783Z", "url": "https://files.pythonhosted.org/packages/e1/06/1a2b97bdc1e831b893b51075631cf848877706e2a27cf0518f152719c4e0/sirad-0.2.1.tar.gz", "yanked": false, "yanked_reason": null } ], "0.3.0": [ { "comment_text": "", "digests": { "md5": "8d9fb2853b241fe2d184ee6d92e5863d", "sha256": "f28b708c24ff74a94dd5f166543f118a92003f6911209b7d4ae25931dc5485af" }, "downloads": -1, "filename": "sirad-0.3.0.tar.gz", "has_sig": false, "md5_digest": "8d9fb2853b241fe2d184ee6d92e5863d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 18540, "upload_time": "2019-11-07T17:42:46", "upload_time_iso_8601": "2019-11-07T17:42:46.825698Z", "url": "https://files.pythonhosted.org/packages/4c/96/7a93930ac34260080c7ee54ad861a742dfc8c934d2dc4ca82ded4143c54a/sirad-0.3.0.tar.gz", "yanked": false, "yanked_reason": null } ], "0.3.1": [ { "comment_text": "", "digests": { "md5": "3e61eda71e691906e7b6bb98406fa53d", "sha256": "9d6c228ac00a2cf6be8c2dbcc2c625284443f2df0a59d2c36d093d7d7e40246f" }, "downloads": -1, "filename": "sirad-0.3.1.tar.gz", "has_sig": false, "md5_digest": "3e61eda71e691906e7b6bb98406fa53d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 20650, "upload_time": "2020-09-24T18:56:21", "upload_time_iso_8601": "2020-09-24T18:56:21.893355Z", "url": "https://files.pythonhosted.org/packages/7f/8b/689ffe93459d1d5e0971410a81d5ee7adbb35b2d1bcd21501e0735b44765/sirad-0.3.1.tar.gz", "yanked": false, "yanked_reason": null } ], "0.3.2": [ { "comment_text": "", "digests": { "md5": "da7cecf6a3d3ae0028758553f121d524", "sha256": "21b306c43ec74254fa34aea42063e0c6812d734b50d4bebb8d1386423b26d7e0" }, "downloads": -1, "filename": "sirad-0.3.2.tar.gz", "has_sig": false, "md5_digest": "da7cecf6a3d3ae0028758553f121d524", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 20638, "upload_time": "2021-12-22T21:17:51", "upload_time_iso_8601": "2021-12-22T21:17:51.668363Z", "url": "https://files.pythonhosted.org/packages/7b/08/6496e8fb3d52b80f4add7ad19d414b31eb2d68997370f61175ce29e98acf/sirad-0.3.2.tar.gz", "yanked": false, "yanked_reason": null } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "da7cecf6a3d3ae0028758553f121d524", "sha256": "21b306c43ec74254fa34aea42063e0c6812d734b50d4bebb8d1386423b26d7e0" }, "downloads": -1, "filename": "sirad-0.3.2.tar.gz", "has_sig": false, "md5_digest": "da7cecf6a3d3ae0028758553f121d524", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 20638, "upload_time": "2021-12-22T21:17:51", "upload_time_iso_8601": "2021-12-22T21:17:51.668363Z", "url": "https://files.pythonhosted.org/packages/7b/08/6496e8fb3d52b80f4add7ad19d414b31eb2d68997370f61175ce29e98acf/sirad-0.3.2.tar.gz", "yanked": false, "yanked_reason": null } ], "vulnerabilities": [] }