{ "info": { "author": "Vadim Markovtsev", "author_email": "vadim@sourced.tech", "bugtrack_url": null, "classifiers": [ "Development Status :: 3 - Alpha", "Environment :: Console", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Operating System :: POSIX", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Topic :: Software Development :: Libraries" ], "description": "[![Build Status](https://travis-ci.org/src-d/sparkpickle.svg?branch=master)](https://travis-ci.org/src-d/sparkpickle) [![PyPI](https://img.shields.io/pypi/v/sparkpickle.svg)](https://pypi.python.org/pypi/sparkpickle)\n\nSparkPickle\n===========\n\nPure Python implementation of reading SequenceFile-s with pickles written by\nSpark's [saveAsPickleFile()](http://spark.apache.org/docs/latest/api/python/pyspark.html#pyspark.RDD.saveAsPickleFile).\nThis is needed if you store the results from Spark in the efficient binary pickle\nformat and want to load them locally on your computer, without any Spark installation,\ngiven only the actual files.\n\n[Article about creating this project.](https://blog.sourced.tech/post/reading_pyspark_pickles_locally)\n\nInstallation\n------------\n```\npip install sparkpickle\n```\nSupports Python 2.7 and 3.x.\n\nUsage\n-----\nView the contents of the file via command line:\n```\npython -m sparkpickle /path/to/file\n```\n\nCode:\n```\nimport sparkpickle\n\nfor obj in sparkpickle.load_gen(\"/path/to/file\"):\n print(obj)\n```\n\nAPI\n---\nThere are 3 functions: `load()`, `loads()` and `load_gen()`. The first two\nare similar to those found in \"pickle\" package, whereas the last one is the\ngenerator which yields deserialized objects and thus provides the minimal\nmemory footprint.\n\nLicense\n-------\nApache 2.0.\n", "description_content_type": null, "docs_url": null, "download_url": "https://github.com/src-d/sparkpickle", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/src-d/sparkpickle", "keywords": "spark,pyspark,hadoop,rdd,pickle", "license": "Apache 2.0", "maintainer": null, "maintainer_email": null, "name": "sparkpickle", "package_url": "https://pypi.org/project/sparkpickle/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/sparkpickle/", "project_urls": { "Download": "https://github.com/src-d/sparkpickle", "Homepage": "https://github.com/src-d/sparkpickle" }, "release_url": "https://pypi.org/project/sparkpickle/1.0.1/", "requires_dist": null, "requires_python": null, "summary": "Provides functions for reading SequenceFile-s with Python pickles.", "version": "1.0.1" }, "last_serial": 2447478, "releases": { "1.0.0": [ { "comment_text": "", "digests": { "md5": "941e34e113eb10d8aa7409dd3e0c9ac2", "sha256": "dfbd411de993c07903b4b9a912d0581af4c845ac1e9ca57a75fc42797f23c175" }, "downloads": -1, "filename": "sparkpickle-1.0.0.tar.gz", "has_sig": false, "md5_digest": "941e34e113eb10d8aa7409dd3e0c9ac2", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 19990, "upload_time": "2016-11-07T17:56:57", "url": "https://files.pythonhosted.org/packages/59/fa/3253e8bab57e3f34a8292e93d3288e360dd6fca95ed927bab48f71453232/sparkpickle-1.0.0.tar.gz" } ], "1.0.1": [ { "comment_text": "", "digests": { "md5": "c356b21876db42d082c3861df527525a", "sha256": "8e60b3823a9462bb606f9b8648aa5e5101a336c2506aefffe840bc76d114cf24" }, "downloads": -1, "filename": "sparkpickle-1.0.1.tar.gz", "has_sig": false, "md5_digest": "c356b21876db42d082c3861df527525a", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 19858, "upload_time": "2016-11-07T18:07:36", "url": "https://files.pythonhosted.org/packages/2e/3d/f2af239ad6f9a1e0c851a105b663642fcbf11718aba1e014fc43d3382029/sparkpickle-1.0.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "c356b21876db42d082c3861df527525a", "sha256": "8e60b3823a9462bb606f9b8648aa5e5101a336c2506aefffe840bc76d114cf24" }, "downloads": -1, "filename": "sparkpickle-1.0.1.tar.gz", "has_sig": false, "md5_digest": "c356b21876db42d082c3861df527525a", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 19858, "upload_time": "2016-11-07T18:07:36", "url": "https://files.pythonhosted.org/packages/2e/3d/f2af239ad6f9a1e0c851a105b663642fcbf11718aba1e014fc43d3382029/sparkpickle-1.0.1.tar.gz" } ] }