{ "info": { "author": "Jorge C. Leitao", "author_email": "", "bugtrack_url": null, "classifiers": [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3" ], "description": "[![Build Status](https://travis-ci.org/jorgecarleitao/schemaflow.svg?branch=master)](https://travis-ci.org/jorgecarleitao/schemaflow)\n[![Coverage Status](https://coveralls.io/repos/github/jorgecarleitao/schemaflow/badge.svg)](https://coveralls.io/github/jorgecarleitao/schemaflow)\n[![Documentation Status](https://readthedocs.org/projects/schemaflow/badge/?version=latest)](https://schemaflow.readthedocs.io/en/latest/?badge=latest)\n\n# SchemaFlow\n\nThis is a a package to write data pipelines for data science systematically in Python.\nThanks for checking it out.\n\nCheck out the very comprehensive documentation [here](https://schemaflow.readthedocs.io/en/latest/).\n\n## The problem that this package solves\n\nA major challenge in creating a robust data pipeline is guaranteeing interoperability between\npipes: how do we guarantee that the pipe that someone wrote is compatible\nwith others' pipe *without* running the whole pipeline multiple times until we get it right?\n\n## The solution that this package adopts\n\nThis package declares an API to define a stateful data transformation that gives \nthe developer the opportunity to declare what comes in, what comes out, and what states are modified\non each pipe and therefore the whole pipeline. Check out \n[`tests/test_pipeline.py`](https://github.com/jorgecarleitao/schemaflow/blob/master/tests/test_pipeline.py) or \n[`examples/end_to_end_kaggle.py`](https://github.com/jorgecarleitao/schemaflow/blob/master/examples/end_to_end_kaggle.py)\n\n## Install \n\n pip install schemaflow\n\nor, install the latest (recommended for now):\n\n git clone https://github.com/jorgecarleitao/schemaflow\n cd schemaflow && pip install -e .\n\n## Run examples\n\nWe provide one example that demonstrate the usage of SchemaFlow's API\non developing an end-to-end pipeline applied to \n[one of Kaggle's exercises](https://www.kaggle.com/c/house-prices-advanced-regression-techniques).\n\nTo run it, download the data in that exercise to `examples/all/` and run\n\n pip install -r examples/requirements.txt\n python examples/end_to_end_kaggle.py\n\nYou should see some prints to the console as well as the generation of 3 files at \n`examples/`: two plots and one `submission.txt`.\n\n## Run tests\n\n pip install -r tests/requirements.txt\n python -m unittest discover\n\n## Build documentation\n\n pip install -r docs/requirements.txt\n cd docs && make html && cd ..\n open docs/build/html/index.html\n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/jorgecarleitao/schemaflow", "keywords": "", "license": "", "maintainer": "", "maintainer_email": "", "name": "schemaflow", "package_url": "https://pypi.org/project/schemaflow/", "platform": "", "project_url": "https://pypi.org/project/schemaflow/", "project_urls": { "Homepage": "https://github.com/jorgecarleitao/schemaflow" }, "release_url": "https://pypi.org/project/schemaflow/0.2.0/", "requires_dist": null, "requires_python": "", "summary": "A package to write schema-aware data pipelines", "version": "0.2.0" }, "last_serial": 4275507, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "3a2b098bdb8030a081261271e6217775", "sha256": "7432f613f636fc0af9f3a30c66ae2225d38b3f44c25567f090ac4c7dd0f434fe" }, "downloads": -1, "filename": "schemaflow-0.1.0-py3-none-any.whl", "has_sig": false, "md5_digest": "3a2b098bdb8030a081261271e6217775", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 15443, "upload_time": "2018-09-08T08:10:09", "url": "https://files.pythonhosted.org/packages/13/83/3bbb5ba3cffcbf6189621dd4119bbd0c030df49b527b8a4b5acad2bdba0e/schemaflow-0.1.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "4ff4b131aaed302131c3c91a5d2399c7", "sha256": "c2e27edafc8875a03b89dc37c202a1ceefbcd0617cdbe50c66431fa0c5c548fb" }, "downloads": -1, "filename": "schemaflow-0.1.0.tar.gz", "has_sig": false, "md5_digest": "4ff4b131aaed302131c3c91a5d2399c7", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11222, "upload_time": "2018-09-08T08:10:11", "url": "https://files.pythonhosted.org/packages/b4/23/23e2075cbec725f08a3bbf6ba62ee8e2cfadbb442446bc51bfb24cfa1dec/schemaflow-0.1.0.tar.gz" } ], "0.2.0": [ { "comment_text": "", "digests": { "md5": "bfc8f9a9559593e1bc58a79adf373e9a", "sha256": "db3f4930ab135176b85ba01067b608959f1bcb6ec64bbcbbe510b3ed73f3983c" }, "downloads": -1, "filename": "schemaflow-0.2.0-py3-none-any.whl", "has_sig": false, "md5_digest": "bfc8f9a9559593e1bc58a79adf373e9a", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 23482, "upload_time": "2018-09-15T20:38:17", "url": "https://files.pythonhosted.org/packages/3f/b3/78a1499748782bac31bff0208faca37da748839d04805aaf22ef8454ba1a/schemaflow-0.2.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "b1854ae7d328993317e61894edb6b3a0", "sha256": "1333fc45a8f6ffb7170e0d803b45bfbfc0d09c1a50380718bfb1fffea2bb931d" }, "downloads": -1, "filename": "schemaflow-0.2.0.tar.gz", "has_sig": false, "md5_digest": "b1854ae7d328993317e61894edb6b3a0", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 13870, "upload_time": "2018-09-15T20:38:19", "url": "https://files.pythonhosted.org/packages/9e/2e/7a630cb63a3f0aa05e30ecb308b29143a4f5706688421781f94421907ad5/schemaflow-0.2.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "bfc8f9a9559593e1bc58a79adf373e9a", "sha256": "db3f4930ab135176b85ba01067b608959f1bcb6ec64bbcbbe510b3ed73f3983c" }, "downloads": -1, "filename": "schemaflow-0.2.0-py3-none-any.whl", "has_sig": false, "md5_digest": "bfc8f9a9559593e1bc58a79adf373e9a", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 23482, "upload_time": "2018-09-15T20:38:17", "url": "https://files.pythonhosted.org/packages/3f/b3/78a1499748782bac31bff0208faca37da748839d04805aaf22ef8454ba1a/schemaflow-0.2.0-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "b1854ae7d328993317e61894edb6b3a0", "sha256": "1333fc45a8f6ffb7170e0d803b45bfbfc0d09c1a50380718bfb1fffea2bb931d" }, "downloads": -1, "filename": "schemaflow-0.2.0.tar.gz", "has_sig": false, "md5_digest": "b1854ae7d328993317e61894edb6b3a0", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 13870, "upload_time": "2018-09-15T20:38:19", "url": "https://files.pythonhosted.org/packages/9e/2e/7a630cb63a3f0aa05e30ecb308b29143a4f5706688421781f94421907ad5/schemaflow-0.2.0.tar.gz" } ] }