{ "info": { "author": "Kevin Wurster", "author_email": "wursterk@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries", "Topic :: Text Processing", "Topic :: Utilities" ], "description": "================\nMapReduce Python\n================\n\nExperimental Pythonic MapReduce inspired by `Spotify's luigi framework `_.\n\n.. image:: https://travis-ci.org/geowurster/tinymr.svg?branch=master\n :target: https://travis-ci.org/geowurster/tinymr?branch=master\n\n.. image:: https://coveralls.io/repos/geowurster/tinymr/badge.svg?branch=master\n :target: https://coveralls.io/r/geowurster/tinymr?branch=master\n\n\nCanonical Word Count Example\n============================\n\nCurrently there are two MapReduce implementations, one that includes sorting and\none that does not. The example below would not benefit from sorting so we can\ntake advantage of the inherent optimization of not sorting. The API is the same\nbut ``tinymr.memory.MRSerial()`` sorts after partitioning and again between the\n``reducer()`` and ``final_reducer()``.\n\n.. code-block:: python\n\n import json\n import re\n import sys\n\n from tinymr.memory import MRSerial\n\n\n class WordCount(MRSerial):\n\n def __init__(self):\n self.pattern = re.compile('[\\W_]+')\n\n def mapper(self, item):\n for word in item.split():\n word = self.pattern.sub('', word)\n if word:\n yield word.lower(), 1\n\n def reducer(self, key, values):\n yield key, sum(values)\n\n def final_reducer(self, pairs):\n return {k: tuple(v)[0] for k, v in pairs}\n\n\n wc = WordCount()\n with open('LICENSE.txt') as f:\n out = wc(f)\n print(json.dumps(out, indent=4, sort_keys=True))\n\nTruncated output:\n\n.. code-block:: json\n\n {\n \"a\": 1,\n \"above\": 2,\n \"advised\": 1,\n \"all\": 1,\n \"and\": 8,\n \"andor\": 1\n }\n\n\nDeveloping\n==========\n\n.. code-block:: console\n\n $ git clone https://github.com/geowurster/tinymr.git\n $ cd tinymr\n $ pip install -e .\\[dev\\]\n $ py.test tests --cov tinymr --cov-report term-missing\n\n\nLicense\n=======\n\nSee ``LICENSE.txt``\n\n\nChangelog\n=========\n\nSee ``CHANGES.md``", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/geowurster/tinymr", "keywords": "experimental map reduce mapreduce", "license": "New BSD", "maintainer": null, "maintainer_email": null, "name": "tinymr", "package_url": "https://pypi.org/project/tinymr/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/tinymr/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://github.com/geowurster/tinymr" }, "release_url": "https://pypi.org/project/tinymr/0.1/", "requires_dist": null, "requires_python": null, "summary": "Pythonic in-memory MapReduce.", "version": "0.1" }, "last_serial": 2349389, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "6c47547eecc5f6b3093da2da127b15a4", "sha256": "093f2359d19a279a1ba88fae77b3af25650245f30ec7c027a99e733e95d1bff9" }, "downloads": -1, "filename": "tinymr-0.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "6c47547eecc5f6b3093da2da127b15a4", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 15074, "upload_time": "2016-09-18T17:42:20", "url": "https://files.pythonhosted.org/packages/0b/3a/2b8c208985c7e8ee3be96a880b1757076c35bb67d98ae2a9a52948a1e709/tinymr-0.1-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "4e554c14e854da308529f9e201150fd7", "sha256": "bba34762221bcf14696679b11f81810bd5a09258c1a9eec4a1c63cfbc11fe980" }, "downloads": -1, "filename": "tinymr-0.1.tar.gz", "has_sig": false, "md5_digest": "4e554c14e854da308529f9e201150fd7", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16810, "upload_time": "2016-09-18T17:42:23", "url": "https://files.pythonhosted.org/packages/d8/3c/adbb7418a860a1202b7bb8f1dfc296ff43b622512a330ca8919e5edd1b87/tinymr-0.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "6c47547eecc5f6b3093da2da127b15a4", "sha256": "093f2359d19a279a1ba88fae77b3af25650245f30ec7c027a99e733e95d1bff9" }, "downloads": -1, "filename": "tinymr-0.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "6c47547eecc5f6b3093da2da127b15a4", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 15074, "upload_time": "2016-09-18T17:42:20", "url": "https://files.pythonhosted.org/packages/0b/3a/2b8c208985c7e8ee3be96a880b1757076c35bb67d98ae2a9a52948a1e709/tinymr-0.1-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "4e554c14e854da308529f9e201150fd7", "sha256": "bba34762221bcf14696679b11f81810bd5a09258c1a9eec4a1c63cfbc11fe980" }, "downloads": -1, "filename": "tinymr-0.1.tar.gz", "has_sig": false, "md5_digest": "4e554c14e854da308529f9e201150fd7", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16810, "upload_time": "2016-09-18T17:42:23", "url": "https://files.pythonhosted.org/packages/d8/3c/adbb7418a860a1202b7bb8f1dfc296ff43b622512a330ca8919e5edd1b87/tinymr-0.1.tar.gz" } ] }