{ "info": { "author": "Chao Li", "author_email": "chaoli.job@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7" ], "description": "Easy-Tokenizer\n==================\n\nDescription\n-----------\n\nMost tokenizers are eithor too cumbersom (Neural Network based), or too simple.\nThis simple rule based tokenizer is type, small, and sufficient good. Specially,\nit handles long strings very often parsed wrong by some simple tokenizers, deal\nurl, email, long digits rather well.\n\n\nTry with the following script:\n\n``easy_tokenizer -s input_text``\n\nor\n\n``easy_tokenizer -f input_file``\n\n\nCI Status\n------------\n\n.. image:: https://travis-ci.org/tilaboy/easy-tokenizer.svg?branch=master\n :target: https://travis-ci.org/tilaboy/easy-tokenizer\n\n.. image:: https://readthedocs.org/projects/easy-tokenizer/badge/?version=latest\n :target: https://easy-tokenizer.readthedocs.io/en/latest/?badge=latest\n :alt: Documentation Status\n\n\n.. image:: https://pyup.io/repos/github/tilaboy/easy-tokenizer/shield.svg\n :target: https://pyup.io/repos/github/tilaboy/easy-tokenizer/\n :alt: Updates\n\nRequirements\n------------\n\nPython 3.6+\n\nInstallation\n------------\n\n::\n\n pip install easy-tokenizer\n\n\nUsage\n-----\n\n- easy-tokenizer:\n\n input:\n\n - string: input string to tokenize\n\n - filename: input text file to tokenize\n\n - output: output filename, optional. print out to STDOUT when not set\n\n output:\n\n - a sequence of space separated tokens\n\nexamples:\n^^^^^^^^^\n\n::\n\n # string input\n easy-tokenizer -s \"this is a simple test.\"\n\n easy-tokenizer -f foo.txt\n easy-tokenizer -f foo.txt -o bar.txt\n\noutput will be \"this is a simple test .\"\n\nDevelopment\n-----------\n\nTo install package and its dependencies, run the following from project\nroot directory:\n\n::\n\n python setup.py install\n\nTo work the code and develop the package, run the following from project\nroot directory:\n\n::\n\n python setup.py develop\n\nTo run unit tests, execute the following from the project root\ndirectory:\n\n::\n\n python setup.py test\n\n\n", "description_content_type": "text/x-rst", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/tilaboy/easy-tokenizer", "keywords": "tokenizer", "license": "MIT license", "maintainer": "", "maintainer_email": "", "name": "easy-tokenizer", "package_url": "https://pypi.org/project/easy-tokenizer/", "platform": "", "project_url": "https://pypi.org/project/easy-tokenizer/", "project_urls": { "Homepage": "https://github.com/tilaboy/easy-tokenizer" }, "release_url": "https://pypi.org/project/easy-tokenizer/0.0.10/", "requires_dist": null, "requires_python": "", "summary": "tokenizer tool", "version": "0.0.10", "yanked": false, "yanked_reason": null }, "last_serial": 6741557, "releases": { "0.0.1": [ { "comment_text": "", "digests": { "md5": "1de05d6619dd18b6eec20bb6f3ac285b", "sha256": "248259cc3cd6c6a21749cd23cbff43cc5f14876dc9f25cbdf9b47932a74edc47" }, "downloads": -1, "filename": "easy_tokenizer-0.0.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "1de05d6619dd18b6eec20bb6f3ac285b", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 8039, "upload_time": "2019-10-23T13:48:23", "upload_time_iso_8601": "2019-10-23T13:48:23.940375Z", "url": "https://files.pythonhosted.org/packages/fa/e4/2ec4c8c8a9c80cca527a7c7bb0bbc58b3b92620c0ccbf1f6bcb2fe3bd5f6/easy_tokenizer-0.0.1-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "f3a97ffb0942b51f01c7d7114f61c715", "sha256": "369fce57e7360145663eb233d847135a712b9675947b1e3f30f77f6de1250260" }, "downloads": -1, "filename": "easy_tokenizer-0.0.1.tar.gz", "has_sig": false, "md5_digest": "f3a97ffb0942b51f01c7d7114f61c715", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 9287, "upload_time": "2019-10-23T13:48:38", "upload_time_iso_8601": "2019-10-23T13:48:38.451251Z", "url": "https://files.pythonhosted.org/packages/f0/3b/c2f330571bfaf2758df7fff8e89d7678fad13e798e94165beddc15205c6d/easy_tokenizer-0.0.1.tar.gz", "yanked": false, "yanked_reason": null } ], "0.0.10": [ { "comment_text": "", "digests": { "md5": "93ad5a67caefef773321cb5c8c2cdb76", "sha256": "29577595625fbbba7325a28dbb8d3f25510e01abbf38fd81b5e50c4666e02f52" }, "downloads": -1, "filename": "easy_tokenizer-0.0.10-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "93ad5a67caefef773321cb5c8c2cdb76", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 8939, "upload_time": "2020-03-03T15:35:09", "upload_time_iso_8601": "2020-03-03T15:35:09.210459Z", "url": "https://files.pythonhosted.org/packages/11/16/34cfdbbe64c1e7649d4f57eb009a3e7b558f8ff0271417c16f53baea14fd/easy_tokenizer-0.0.10-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "ef26ccae9b106844186661a2218ad267", "sha256": "d2da094a2e61637ae4db1d6fe5ae85ad6596e056a7c59f2d094d2ab31d937c63" }, "downloads": -1, "filename": "easy_tokenizer-0.0.10.tar.gz", "has_sig": false, "md5_digest": "ef26ccae9b106844186661a2218ad267", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 21323, "upload_time": "2020-03-03T15:35:12", "upload_time_iso_8601": "2020-03-03T15:35:12.243141Z", "url": "https://files.pythonhosted.org/packages/2c/94/6712dd75e5ace020714c98e6a17303fa1b5d4588e1f252deb88cde9bcf1c/easy_tokenizer-0.0.10.tar.gz", "yanked": false, "yanked_reason": null } ], "0.0.2": [ { "comment_text": "", "digests": { "md5": "edba49712ed88458561d5639117f54fa", "sha256": "b658e9c2120c0ef6c4c401cf27412f8a31a28073ba13b23c6e143675d30fead2" }, "downloads": -1, "filename": "easy_tokenizer-0.0.2-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "edba49712ed88458561d5639117f54fa", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 8303, "upload_time": "2019-10-23T16:19:40", "upload_time_iso_8601": "2019-10-23T16:19:40.811714Z", "url": "https://files.pythonhosted.org/packages/73/6d/dcb428441adc22a08fc99909479ae659717c3616be5e7635403c1443a54f/easy_tokenizer-0.0.2-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "7211d984fc40e2352a42f67db27a000e", "sha256": "1a9ea9c3a58b0f05a18deab6077ac79206b225845a6836ae9b5cc98fa0c59e8e" }, "downloads": -1, "filename": "easy_tokenizer-0.0.2.tar.gz", "has_sig": false, "md5_digest": "7211d984fc40e2352a42f67db27a000e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 18206, "upload_time": "2019-10-23T16:19:43", "upload_time_iso_8601": "2019-10-23T16:19:43.368023Z", "url": "https://files.pythonhosted.org/packages/4d/08/da9552b839a703dbc416ebe9c7a445736f8a2becae23fcce3fb2c92ac25f/easy_tokenizer-0.0.2.tar.gz", "yanked": false, "yanked_reason": null } ], "0.0.4": [ { "comment_text": "", "digests": { "md5": "ad0263ee654949856cbb61c2e14ff4c6", "sha256": "7256c4b005577af6416d4d17a045192642008622c0a146b25c0d4d03d9d749aa" }, "downloads": -1, "filename": "easy_tokenizer-0.0.4-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "ad0263ee654949856cbb61c2e14ff4c6", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 7830, "upload_time": "2019-10-24T02:24:12", "upload_time_iso_8601": "2019-10-24T02:24:12.590141Z", "url": "https://files.pythonhosted.org/packages/d4/d5/f255a53b6757781a6a4464ec8c0d4c7c44f06f442e96ed83f928fca6b386/easy_tokenizer-0.0.4-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "b93baa82f033aa3109a8852337c787f2", "sha256": "7fc0cf488a0bcaf4db3c4c0f4b49ed6c2d774869ab24e8e7cff97e831636dbf2" }, "downloads": -1, "filename": "easy_tokenizer-0.0.4.tar.gz", "has_sig": false, "md5_digest": "b93baa82f033aa3109a8852337c787f2", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 18074, "upload_time": "2019-10-24T02:24:14", "upload_time_iso_8601": "2019-10-24T02:24:14.295710Z", "url": "https://files.pythonhosted.org/packages/91/cc/e66158ee2e03c7fa11086457bb05205d5f6ea4a2b6d09844b4d1ee501f84/easy_tokenizer-0.0.4.tar.gz", "yanked": false, "yanked_reason": null } ], "0.0.5": [ { "comment_text": "", "digests": { "md5": "2e52c58f76d38994b2ebe95ff066ba3a", "sha256": "956b6eb3a9ce7836778dd41a1bceedb039122cae92f871c0981a78c630f38150" }, "downloads": -1, "filename": "easy_tokenizer-0.0.5-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "2e52c58f76d38994b2ebe95ff066ba3a", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 8649, "upload_time": "2019-10-24T02:34:10", "upload_time_iso_8601": "2019-10-24T02:34:10.386782Z", "url": "https://files.pythonhosted.org/packages/65/4e/944d93104823fed11a9eaa8e9c7e1e46f7bc1cc98b0824f560b44a704c97/easy_tokenizer-0.0.5-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "ce2116458f0c3694d5867bed46fe5c5d", "sha256": "c3ef01357be255c9a01aaee168fae86a154f6704bd87235b146af5c9a09c8508" }, "downloads": -1, "filename": "easy_tokenizer-0.0.5.tar.gz", "has_sig": false, "md5_digest": "ce2116458f0c3694d5867bed46fe5c5d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 13115, "upload_time": "2019-10-24T02:34:11", "upload_time_iso_8601": "2019-10-24T02:34:11.895602Z", "url": "https://files.pythonhosted.org/packages/ed/b4/8dc42b96da56237afb96b9e0b233643d154d137e2aa50eb8361c2897fcf2/easy_tokenizer-0.0.5.tar.gz", "yanked": false, "yanked_reason": null } ], "0.0.7": [ { "comment_text": "", "digests": { "md5": "4545d59e6e9be8a2fecf4daa48a715cd", "sha256": "8e7d367b9ceb1f7920ddf72fd2479b15a4e81b039f57a23cbf41387a89005aa5" }, "downloads": -1, "filename": "easy_tokenizer-0.0.7-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "4545d59e6e9be8a2fecf4daa48a715cd", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 8726, "upload_time": "2019-11-14T13:54:14", "upload_time_iso_8601": "2019-11-14T13:54:14.974783Z", "url": "https://files.pythonhosted.org/packages/60/ee/3c54b65ed6a1e0799b9ca1cce6423c06b83e54584d63f4b5b0a5baf2ea6a/easy_tokenizer-0.0.7-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "06044be448096f21ff00156e0c41b06b", "sha256": "762ab9f12702373999959bc6af051ecae85a48ed00a1995eee66e9f1d301a688" }, "downloads": -1, "filename": "easy_tokenizer-0.0.7.tar.gz", "has_sig": false, "md5_digest": "06044be448096f21ff00156e0c41b06b", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 13205, "upload_time": "2019-11-14T13:54:16", "upload_time_iso_8601": "2019-11-14T13:54:16.452568Z", "url": "https://files.pythonhosted.org/packages/64/11/d43a13ed36cd91099a766630ecdaa569ba362edaa4c538d29b7c8cc5bcc9/easy_tokenizer-0.0.7.tar.gz", "yanked": false, "yanked_reason": null } ], "0.0.8": [ { "comment_text": "", "digests": { "md5": "16c4198566b9f28f24d06e5a2ef534eb", "sha256": "148b339f4d456966b9f64cde8e7bcdf99214aa4a364913dd3c2b1cb0a7a9fd4f" }, "downloads": -1, "filename": "easy_tokenizer-0.0.8-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "16c4198566b9f28f24d06e5a2ef534eb", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 9110, "upload_time": "2019-11-14T15:45:06", "upload_time_iso_8601": "2019-11-14T15:45:06.539745Z", "url": "https://files.pythonhosted.org/packages/06/e9/dcae554b06f79049ad17fee6947f99fc81e5e0a236ade867454687c8fa5d/easy_tokenizer-0.0.8-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "f50d5558dc8cf934c226eb96326d7cad", "sha256": "28bd5453aa62b3d019596ea854292b91fcd21ea54f3357cfcef52b63605bffec" }, "downloads": -1, "filename": "easy_tokenizer-0.0.8.tar.gz", "has_sig": false, "md5_digest": "f50d5558dc8cf934c226eb96326d7cad", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 13414, "upload_time": "2019-11-14T15:45:08", "upload_time_iso_8601": "2019-11-14T15:45:08.113392Z", "url": "https://files.pythonhosted.org/packages/3a/2d/10cb4099dd695e2fe725ff63aecd762d424dca35a57826049a242d689c11/easy_tokenizer-0.0.8.tar.gz", "yanked": false, "yanked_reason": null } ], "0.0.9": [ { "comment_text": "", "digests": { "md5": "ed19596947e8677ed49a1f0e2fe51cad", "sha256": "2afcc03a8711cb0af3a733f88754784ebb26f33ca79369f2a06c2beaf08f81b2" }, "downloads": -1, "filename": "easy_tokenizer-0.0.9-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "ed19596947e8677ed49a1f0e2fe51cad", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 9328, "upload_time": "2020-01-16T06:50:44", "upload_time_iso_8601": "2020-01-16T06:50:44.435560Z", "url": "https://files.pythonhosted.org/packages/05/0a/6d3f18eeb85e70b12b98e2d8a87140e06406e95f2f7557cbf53d61fa91d7/easy_tokenizer-0.0.9-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "a6c6e10d1128447c3a870eed0fa9c984", "sha256": "25c2917bc84f67b75916ac510df1c980930fdcb765c9f5d91a0876eb236cf847" }, "downloads": -1, "filename": "easy_tokenizer-0.0.9.tar.gz", "has_sig": false, "md5_digest": "a6c6e10d1128447c3a870eed0fa9c984", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 14845, "upload_time": "2020-01-16T06:50:45", "upload_time_iso_8601": "2020-01-16T06:50:45.989194Z", "url": "https://files.pythonhosted.org/packages/ce/da/f5ccccae14ee2395c676ca0d4d6574986deb0b5bcbeba2205445e0dd5d87/easy_tokenizer-0.0.9.tar.gz", "yanked": false, "yanked_reason": null } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "93ad5a67caefef773321cb5c8c2cdb76", "sha256": "29577595625fbbba7325a28dbb8d3f25510e01abbf38fd81b5e50c4666e02f52" }, "downloads": -1, "filename": "easy_tokenizer-0.0.10-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "93ad5a67caefef773321cb5c8c2cdb76", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 8939, "upload_time": "2020-03-03T15:35:09", "upload_time_iso_8601": "2020-03-03T15:35:09.210459Z", "url": "https://files.pythonhosted.org/packages/11/16/34cfdbbe64c1e7649d4f57eb009a3e7b558f8ff0271417c16f53baea14fd/easy_tokenizer-0.0.10-py2.py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "ef26ccae9b106844186661a2218ad267", "sha256": "d2da094a2e61637ae4db1d6fe5ae85ad6596e056a7c59f2d094d2ab31d937c63" }, "downloads": -1, "filename": "easy_tokenizer-0.0.10.tar.gz", "has_sig": false, "md5_digest": "ef26ccae9b106844186661a2218ad267", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 21323, "upload_time": "2020-03-03T15:35:12", "upload_time_iso_8601": "2020-03-03T15:35:12.243141Z", "url": "https://files.pythonhosted.org/packages/2c/94/6712dd75e5ace020714c98e6a17303fa1b5d4588e1f252deb88cde9bcf1c/easy_tokenizer-0.0.10.tar.gz", "yanked": false, "yanked_reason": null } ], "vulnerabilities": [] }