{ "info": { "author": "HaveTwoBrush", "author_email": "kinggreenhall@gmail.com", "bugtrack_url": null, "classifiers": [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3" ], "description": "
\n
\n
\n\n---\n\n
\n \n \n \n \n \n
\n\n
\n [ \u4e2d\u6587 | English]\n
\n\n# cnlp = cn + nlp\n\n\ud83d\udd25 \u4e13\u6ce8\u4e8e\u4e2d\u6587\u7684\u81ea\u7136\u8bed\u8a00\u5904\u7406\u6846\u67b6\u3002\n\n## \u529f\u80fd\n\n1. [cnlp.data](https://cnlp.dovolopor.com/api/#data): \u4e00\u4e2a\u901a\u7528\u7684\u6587\u672c\u5904\u7406\u5668\u3002\n2. [cnlp.dataset](https://cnlp.dovolopor.com/api/#dataset): \u63d0\u4f9b\u5e38\u89c1\u7684\u6570\u636e\u96c6\u52a0\u8f7d\u3002\n3. [cnlp.model](https://cnlp.dovolopor.com/api/#model): \u63d0\u4f9b\u5e38\u89c1\u7684\u6a21\u578b\u3002\n\n## \u5b89\u88c5\n\n### 1 pip \u5b89\u88c5\n\n```shell\n$ pip install cnlp\n```\n\n### 2 \u624b\u52a8\u5b89\u88c5\n\n```shell\n$ git clone https://github.com/HaveTwoBrush/cnlp.git\n$ cd cnlp\n$ python setup.py install\n```\n\n## \u5feb\u901f\u4e0a\u624b\n\n### 1 cnlp.data\n\n1. \u6570\u636e\u96c6\u5212\u5206\n\n```python\nfrom cnlp.data import split\n\norigin_dataset = [\n \"\u4eca\u5929\u5929\u6c14\u4e0d\u9519\u554a\",\n \"\u6211\u60f3\u5403\u70e7\u70e4\",\n \"\u5730\u7403\u4eba\u5c31\u77e5\u9053\u5403\",\n \"\u8bf4\u7684\u597d\u50cf\u4f60\u4eec\u706b\u661f\u4eba\u4e0d\u7231\u5403\u4e00\u6837\",\n \"\u6ca1\u9519\u554a\uff01\",\n \"\u597d\u5427\",\n \"\u4f60\u8d62\u4e86\",\n \"\u90a3\u8bf7\u4f60\u79bb\u5f00\u5730\u7403\",\n \"\u5feb\u8d70\",\n \"\u5feb\u5feb\u8d70\"\n]\n\nprint(split(origin_dataset))\n#(['\u4eca\u5929\u5929\u6c14\u4e0d\u9519\u554a', '\u6211\u60f3\u5403\u70e7\u70e4', '\u5730\u7403\u4eba\u5c31\u77e5\u9053\u5403', '\u8bf4\u7684\u597d\u50cf\u4f60\u4eec\u706b\u661f\u4eba\u4e0d\u7231\u5403\u4e00\u6837', '\u6ca1\u9519\u554a\uff01', '\u597d\u5427', '\u4f60\u8d62\u4e86'],\n# ['\u90a3\u8bf7\u4f60\u79bb\u5f00\u5730\u7403', '\u5feb\u8d70', '\u5feb\u5feb\u8d70'])\n\n# \u652f\u6301\u5212\u5206 train val test \u4e09\u79cd\u6570\u636e\u96c6\nprint(split(origin_dataset, [0.5, 0.3, 0.2]))\n#(['\u4eca\u5929\u5929\u6c14\u4e0d\u9519\u554a', '\u6211\u60f3\u5403\u70e7\u70e4', '\u5730\u7403\u4eba\u5c31\u77e5\u9053\u5403', '\u8bf4\u7684\u597d\u50cf\u4f60\u4eec\u706b\u661f\u4eba\u4e0d\u7231\u5403\u4e00\u6837', '\u6ca1\u9519\u554a\uff01'],\n# ['\u597d\u5427', '\u4f60\u8d62\u4e86', '\u90a3\u8bf7\u4f60\u79bb\u5f00\u5730\u7403'],\n# ['\u8bf4\u7684\u597d\u50cf\u4f60\u4eec\u706b\u661f\u4eba\u4e0d\u7231\u5403\u4e00\u6837', '\u6ca1\u9519\u554a\uff01', '\u597d\u5427', '\u4f60\u8d62\u4e86', '\u90a3\u8bf7\u4f60\u79bb\u5f00\u5730\u7403', '\u5feb\u8d70', '\u5feb\u5feb\u8d70'])\n\n# \u652f\u6301\u6253\u4e71(shuffle)\nprint(split(origin_dataset, [0.5, 0.3, 0.2], True))\n#(['\u4f60\u8d62\u4e86', '\u8bf4\u7684\u597d\u50cf\u4f60\u4eec\u706b\u661f\u4eba\u4e0d\u7231\u5403\u4e00\u6837', '\u5feb\u8d70', '\u4eca\u5929\u5929\u6c14\u4e0d\u9519\u554a', '\u90a3\u8bf7\u4f60\u79bb\u5f00\u5730\u7403'],\n# ['\u6211\u60f3\u5403\u70e7\u70e4', '\u5feb\u5feb\u8d70', '\u5730\u7403\u4eba\u5c31\u77e5\u9053\u5403'],\n# ['\u4eca\u5929\u5929\u6c14\u4e0d\u9519\u554a', '\u90a3\u8bf7\u4f60\u79bb\u5f00\u5730\u7403', '\u6211\u60f3\u5403\u70e7\u70e4', '\u5feb\u5feb\u8d70', '\u5730\u7403\u4eba\u5c31\u77e5\u9053\u5403', '\u6ca1\u9519\u554a\uff01', '\u597d\u5427'])\n```\n\n2. token \u548c index \u76f8\u4e92\u8f6c\u5316\n\n```python\nfrom cnlp.data import Vocabulary\n\nvocab = Vocabulary()\n\ntokens = \"\u4e3a\u4e2d\u534e\u4e4b\u5d1b\u8d77\u800c\u8bfb\u4e66\"\nprint(vocab.token_to_index(tokens))\n# [257, 209, 523, 159, 3606, 1539, 454, 1771, 277]\n\ntokens_list = [\n \"\u4e3a\u4e2d\u534e\u4e4b\u5d1b\u8d77\u800c\u8bfb\u4e66\",\n \"\u597d\u597d\u5b66\u4e60\uff0c\u5929\u5929\u5411\u4e0a\"\n]\nprint(vocab.token_to_index(tokens_list))\n# [[257, 209, 523, 159, 3606, 1539, 454, 1771, 277], [617, 617, 1146, 170, 3, 179, 179, 530, 138]]\n\nindexs = [257, 209, 523, 159, 3606, 1539, 454, 1771, 277]\nprint(vocab.index_to_token(indexs))\n# ['\u4e3a', '\u4e2d', '\u534e', '\u4e4b', '@UNK@', '\u8d77', '\u800c', '\u8bfb', '\u4e66']\n\nindexs_list = [\n [257, 209, 523, 159, 3606, 1539, 454, 1771, 277],\n [617, 617, 1146, 170, 3, 179, 179, 530, 138]\n]\nprint(vocab.index_to_token(indexs_list))\n# [['\u4e3a', '\u4e2d', '\u534e', '\u4e4b', '@UNK@', '\u8d77', '\u800c', '\u8bfb', '\u4e66'], ['\u597d', '\u597d', '\u5b66', '\u4e60', '\uff0c', '\u5929', '\u5929', '\u5411', '\u4e0a']]\n```\n\n3. \u673a\u68b0\u5206\u8bcd\n\n```python\nfrom cnlp.data import Tokenizer\n\nt = Tokenizer()\n\nprint(t.cut(\"\u4e3a\u4e2d\u534e\u4e4b\u5d1b\u8d77\u800c\u8bfb\u4e66\"))\n# ['\u4e3a', '\u4e2d\u534e', '\u4e4b', '\u5d1b\u8d77', '\u800c', '\u8bfb\u4e66']\n```\n\n## \u6587\u6863\n\n\u8bf7\u5728 [https://cnlp.dovolopor.com](https://cnlp.dovolopor.com) \u4e2d\u67e5\u770b\u5b83\u3002\n\n## \u8d21\u732e\n\n\u5982\u679c\u60a8\u8ba1\u5212\u4e3a\u6b64\u9879\u76ee\u63d0\u4f9b\u65b0\u529f\u80fd\uff0c\u5b9e\u7528\u7a0b\u5e8f\u529f\u80fd\u6216\u6269\u5c55\uff0c\u8bf7\u9996\u5148\u6253\u5f00\u4e00\u4e2a [Issues](https://github.com/HaveTwoBrush/cnlp/issues) \u5e76\u4e0e\u6211\u4eec\u8ba8\u8bba\u8be5\u529f\u80fd\u3002\n\n## \u6c9f\u901a\n\n\u4f60\u53ef\u4ee5\u901a\u8fc7\u5fae\u4fe1\u548c\u6211\u6c9f\u901a\uff0c\u6211\u7684\u5fae\u4fe1\u53f7\u662f `kinggreenhall`\u3002\n\n## \u8bb8\u53ef\u8bc1\n\n[MIT License](./LICENSE)", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/HaveTwoBrush/cnlp", "keywords": "", "license": "MIT License", "maintainer": "", "maintainer_email": "", "name": "cnlp", "package_url": "https://pypi.org/project/cnlp/", "platform": "", "project_url": "https://pypi.org/project/cnlp/", "project_urls": { "Homepage": "https://github.com/HaveTwoBrush/cnlp" }, "release_url": "https://pypi.org/project/cnlp/0.0.12/", "requires_dist": null, "requires_python": "", "summary": "A natural language processing framework focused on Chinese.", "version": "0.0.12" }, "last_serial": 5986597, "releases": { "0.0.1": [ { "comment_text": "", "digests": { "md5": "6af980660eaa926531ad204a594258c1", "sha256": "b5c4aa8e715c9fbdbe31401ae9752ae59ac1ccd30f1a3b5187c5dffaabfc53be" }, "downloads": -1, "filename": "cnlp-0.0.1-py3.6.egg", "has_sig": false, "md5_digest": "6af980660eaa926531ad204a594258c1", "packagetype": "bdist_egg", "python_version": "3.6", "requires_python": null, "size": 1754, "upload_time": "2018-09-22T21:27:47", "url": "https://files.pythonhosted.org/packages/35/51/b60625f77c83c4245334a24389521f22a3329945de5da55079c18efb1831/cnlp-0.0.1-py3.6.egg" }, { "comment_text": "", "digests": { "md5": "124dd4a6c8bb395c00058cda1c66934e", "sha256": "92764f8667583f9272e6c0f01d52669bf1d0567eaaf19336f63e667a0596664c" }, "downloads": -1, "filename": "cnlp-0.0.1.tar.gz", "has_sig": false, "md5_digest": "124dd4a6c8bb395c00058cda1c66934e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 1228, "upload_time": "2018-07-29T12:53:24", "url": "https://files.pythonhosted.org/packages/a0/e6/05de5b23483d30dc5f510431c31f5c69025bb8a42d62a58510c0e2958d72/cnlp-0.0.1.tar.gz" } ], "0.0.10": [ { "comment_text": "", "digests": { "md5": "5a9ee12b7748f44e85d7bf29e1fe685f", "sha256": "c6419ee6db4a8f16883397a6887274b977ffc6832cc307d9c39bca96a0ad3241" }, "downloads": -1, "filename": "cnlp-0.0.10.tar.gz", "has_sig": false, "md5_digest": "5a9ee12b7748f44e85d7bf29e1fe685f", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5267732, "upload_time": "2019-09-07T11:47:52", "url": "https://files.pythonhosted.org/packages/13/38/0952addd4bb3ac2782bd3a7eb105c3aee5ecfbc0171009f32a4e3c653431/cnlp-0.0.10.tar.gz" } ], "0.0.11": [ { "comment_text": "", "digests": { "md5": "1ce151c2e88c112c69d1cfb7c058e323", "sha256": "c9b7caa956de15ea5a3cd26f41856d947103d5db836afe85452d676b9a05a7a3" }, "downloads": -1, "filename": "cnlp-0.0.11.tar.gz", "has_sig": false, "md5_digest": "1ce151c2e88c112c69d1cfb7c058e323", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5267420, "upload_time": "2019-09-08T09:21:01", "url": "https://files.pythonhosted.org/packages/5c/42/d24c2f1ae76e7c3cd2f0e22528d44ed6d9cfbd42f2b0f7b0691fab23bbd5/cnlp-0.0.11.tar.gz" } ], "0.0.12": [ { "comment_text": "", "digests": { "md5": "2bb5e192014da1884d596cb34fb36fb5", "sha256": "c09c9427b8093fe1a89fdec5eea69892cf8e49b38f4a1ee3fd3bbad99e383b6d" }, "downloads": -1, "filename": "cnlp-0.0.12.tar.gz", "has_sig": false, "md5_digest": "2bb5e192014da1884d596cb34fb36fb5", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5267580, "upload_time": "2019-10-16T22:03:24", "url": "https://files.pythonhosted.org/packages/bb/ac/83be1639219e2ba73a3f5c654ff9654e142915be2f96f23b9e943efce157/cnlp-0.0.12.tar.gz" } ], "0.0.2": [ { "comment_text": "", "digests": { "md5": "a8e15c82016f1baecdfa71ca38b8e9d0", "sha256": "20b70e62073eba67cde0239f0961c41ea84321e828c9664e349d7fd1efe9324b" }, "downloads": -1, "filename": "cnlp-0.0.2-py3.6.egg", "has_sig": false, "md5_digest": "a8e15c82016f1baecdfa71ca38b8e9d0", "packagetype": "bdist_egg", "python_version": "3.6", "requires_python": null, "size": 1753, "upload_time": "2018-09-22T21:27:49", "url": "https://files.pythonhosted.org/packages/f2/ca/050f255a3ad68845d76350ea23f6011f014c0e19710c199be00f42774d2e/cnlp-0.0.2-py3.6.egg" }, { "comment_text": "", "digests": { "md5": "b3bd3c5e41af90e4e1d23d940073b34b", "sha256": "35c3ff9b879b5c81aff7755538a37a35ccb3140a9180de6f9106674a9730e570" }, "downloads": -1, "filename": "cnlp-0.0.2.tar.gz", "has_sig": false, "md5_digest": "b3bd3c5e41af90e4e1d23d940073b34b", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 12611, "upload_time": "2018-08-15T09:34:18", "url": "https://files.pythonhosted.org/packages/e1/8d/baf6342c34043956973aef60fe8c4809be86e5801a0b03b4af73672deb55/cnlp-0.0.2.tar.gz" } ], "0.0.6": [ { "comment_text": "", "digests": { "md5": "e41b4c993a004bed3b5f0aea854360b9", "sha256": "581ae86f2ae459cd1957a845e686cae25418253bc93198947188ff125ca55c06" }, "downloads": -1, "filename": "cnlp-0.0.6.tar.gz", "has_sig": false, "md5_digest": "e41b4c993a004bed3b5f0aea854360b9", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5262322, "upload_time": "2019-01-18T23:01:56", "url": "https://files.pythonhosted.org/packages/22/e3/8ffe6873500fb289091177d6dd17a402f21031ae9a59e02ac625ea5fe476/cnlp-0.0.6.tar.gz" } ], "0.0.8": [ { "comment_text": "", "digests": { "md5": "5171628003f59c6b08c1eba8dcd65f86", "sha256": "94da2e93649610ad26a84d5f79bd53de1189eda1d4998d324e91e49e727238fb" }, "downloads": -1, "filename": "cnlp-0.0.8-py3.6.egg", "has_sig": false, "md5_digest": "5171628003f59c6b08c1eba8dcd65f86", "packagetype": "bdist_egg", "python_version": "3.6", "requires_python": null, "size": 10535011, "upload_time": "2019-05-26T22:03:43", "url": "https://files.pythonhosted.org/packages/af/69/20fb7cb61cefc9eeac1baed6c5b23ce2ac27e2baf4d408aac4c28af74a61/cnlp-0.0.8-py3.6.egg" }, { "comment_text": "", "digests": { "md5": "7a9120c7ba081a75ad7fc82d851ec2ff", "sha256": "cadb7c43ec7fb170a456aede882adea9f36fb8ef6e04d0af266266b1a92edcdf" }, "downloads": -1, "filename": "cnlp-0.0.8.tar.gz", "has_sig": false, "md5_digest": "7a9120c7ba081a75ad7fc82d851ec2ff", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5264720, "upload_time": "2019-02-15T16:13:27", "url": "https://files.pythonhosted.org/packages/8c/3f/b2e8a12a8fd1fe4f7af0327e2facbe9769acd4528b28cb8d715d29731bac/cnlp-0.0.8.tar.gz" } ], "0.0.9": [ { "comment_text": "", "digests": { "md5": "688c05a972575d0d6de58a2b00138401", "sha256": "da3028c78558909f3b05d6f15f1808e49fb29e9695bc9215ed024ca85995711d" }, "downloads": -1, "filename": "cnlp-0.0.9.tar.gz", "has_sig": false, "md5_digest": "688c05a972575d0d6de58a2b00138401", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5267641, "upload_time": "2019-05-26T22:04:07", "url": "https://files.pythonhosted.org/packages/17/d2/35b6864346ca0887cfd47951371770373642fa0aea8c5690fd2b03eabf52/cnlp-0.0.9.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "2bb5e192014da1884d596cb34fb36fb5", "sha256": "c09c9427b8093fe1a89fdec5eea69892cf8e49b38f4a1ee3fd3bbad99e383b6d" }, "downloads": -1, "filename": "cnlp-0.0.12.tar.gz", "has_sig": false, "md5_digest": "2bb5e192014da1884d596cb34fb36fb5", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5267580, "upload_time": "2019-10-16T22:03:24", "url": "https://files.pythonhosted.org/packages/bb/ac/83be1639219e2ba73a3f5c654ff9654e142915be2f96f23b9e943efce157/cnlp-0.0.12.tar.gz" } ] }