{ "info": { "author": "chen ru long", "author_email": "chenrulong0513.master@gmail.com", "bugtrack_url": null, "classifiers": [], "description": "pyjieba\n=======\n\n|996.icu| |LICENSE| |python|\n\nPython wrapper for cppjieba without any dependency, no need to compile\nby using gcc/c++\n\n1. Python\n \u5c01\u88c5C++\u7248\u672c\u7684jieba\u5206\u8bcd\u5668\uff0c\u6027\u80fd\u597d\uff0c\u5e76\u4e14\u5df2\u7ecf\u9884\u7f16\u8bd1\uff0c\u65e0\u9700\u5b89\u88c5\u65f6\u7f16\u8bd1\uff0c\u5373\u62ff\u5373\u7528\u3002\n2. \u5df2\u5728Centos7\u3001MaxOS10.13.6\u4e0a\u6d4b\u8bd5 \u3010Windows\u53ef\u80fd\u5728\u67d0\u4e9b\u60c5\u51b5\u4e0b\u8fd0\u884c\u5f02\u5e38\u3011\u3002\n3. C++\n \u4ee3\u7801\u6e90\u81ea\"\u7ed3\u5df4\"\u4e2d\u6587\u5206\u8bcd\u7684C++\u7248\u672c\uff0c\\ `CPPJieba `__\n\n\u4f7f\u7528\u8bf4\u660e\n--------\n\npip\u5b89\u88c5\n\n.. code:: shell\n\n pip install pyjieba\n\n\u624b\u52a8\u5b89\u88c5\n\n.. code:: shell\n\n cd pyjieba\n python setup.py install\n\n\u63a5\u53e3\u4f7f\u7528\n\n.. code:: Python\n\n # \u5bfc\u5165\u5305\n import pyjieba\n # \u521d\u59cb\u5316[\u53ef\u9009]\n pyjieba.initialize()\n # \u521d\u59cb\u5316\u65f6\uff0c\u53ef\u4ee5\u6307\u5b9a\u81ea\u5df1\u7684\u8bcd\u5178\n pyjieba.initialize(dictPath='yourpath', # \u9ed8\u8ba4\u8bcd\u5178\u8def\u5f84\uff0c \u9ed8\u8ba4\u5728\u5305\u4e0b\u8def\u5f84/dict/jieba.dict.utf8\n userPath='yourpath', # \u7528\u6237\u8bcd\u5178\uff0c\u9ed8\u8ba4\u5728\u5305\u4e0b\u8def\u5f84/dict/user.dict.utf8\uff0c\u591a\u4e2a\u8bcd\u5178\u4f7f\u7528\u82f1\u6587\u5206\u53f7;\u5206\u9694\u5f00\n idfPath='yourpath', # IDF\u8bcd\u5178\uff0c\u9ed8\u8ba4\u5728\u5305\u4e0b\u8def\u5f84/dict/idf.utf8\uff0c\u63d0\u53d6\u5173\u952e\u8bcd\u65f6\u4f7f\u7528\n stopwordsPath='yourpath' # \u505c\u7528\u8bcd\u8bcd\u5178\uff0c\u9ed8\u8ba4\u5728\u5305\u4e0b\u8def\u5f84/dict/stop_words.utf8\n )\n\n # \u5206\u8bcd\n sentence = '\u89c6\u89c9\u4e2d\u56fd\u518d\u6b21\u81f4\u6b49'\n pyjieba.cut(sentence)\n # \u8f93\u51fa\u7ed3\u679c ['\u89c6\u89c9', '\u4e2d\u56fd', '\u518d\u6b21', '\u81f4\u6b49']\n\n # \u8bcd\u6027\u6807\u6ce8\n pyjieba.tag(sentence)\n # \u8f93\u51fa\u7ed3\u679c ['\u89c6\u89c9/n', '\u4e2d\u56fd/ns', '\u518d\u6b21/d', '\u81f4\u6b49/v']\n\n # \u4f7f\u7528TFIDF\u63d0\u53d6\u5173\u952e\u8bcd\n pyjieba.keywordsTFIDF(sentence, # \u53e5\u5b50\uff0c\u53ef\u4ee5\u662f\u6587\u672c\uff0c\u4e5f\u53ef\u4ee5\u4f20\u5165\u5206\u8bcd\u7684\u7ed3\u679c\u4f8b\u5982 ['\u89c6\u89c9', '\u4e2d\u56fd', '\u518d\u6b21', '\u81f4\u6b49']\n topN=5, # \u6700\u591a\u8fd4\u56de\u51e0\u4e2a\u5173\u952e\u8bcd\uff0c \u53ef\u9009\n allowedPOS='ns,n,vn,v,x' # \u5173\u952e\u8bcd\u5c5e\u6027\u9650\u5236, \u53ef\u9009\n )\n # \u8f93\u51fa\u7ed3\u679c [('\u81f4\u6b49', 3.618718), ('\u89c6\u89c9', 2.680915), ('\u4e2d\u56fd', 1.009107)]\n\n # \u4f7f\u7528Textrank\u7b97\u6cd5\u63d0\u53d6\u5173\u952e\u8bcd\n pyjieba.keywordsTextrank(sentence, # \u53e5\u5b50\uff0c\u53ef\u4ee5\u662f\u6587\u672c\uff0c\u4e5f\u53ef\u4ee5\u4f20\u5165\u5206\u8bcd\u7684\u7ed3\u679c\u4f8b\u5982 ['\u89c6\u89c9', '\u4e2d\u56fd', '\u518d\u6b21', '\u81f4\u6b49']\n topN=5, # \u6700\u591a\u8fd4\u56de\u51e0\u4e2a\u5173\u952e\u8bcd\uff0c \u53ef\u9009\n allowedPOS='ns,n,vn,v,x' # \u5173\u952e\u8bcd\u5c5e\u6027\u9650\u5236, \u53ef\u9009\n )\n # \u8f93\u51fa\u7ed3\u679c [('\u89c6\u89c9', 1.0), ('\u81f4\u6b49', 0.996685), ('\u4e2d\u56fd', 0.992994)]\n\n\u6e90\u7801\u4fee\u6539\u4e0e\u7f16\u8bd1\u547d\u4ee4\n------------------\n\n\u5982\u679c\u9700\u8981\u8fdb\u884ccppjieba\u6e90\u7801\u4fee\u6539\uff0c\u9700\u8981\u5728\u4e09\u4e2a\u5e73\u53f0\u4e0a\u5206\u522b\u8fdb\u884c\u7f16\u8bd1\uff0c\u751f\u6210\u65b0\u7684so\u548cdll\u6587\u4ef6\n\n.. code:: shell\n\n > cd cppjieba_src\n\n # windows\n > g++ jiebaapi.cpp -fPIC -I deps -I include -std=c++11 -shared -o ../pyjieba/libs/cppjieba_API_win64.dll\n\n # MaxOS\n > g++ jiebaapi.cpp -fPIC -I deps -I include -std=c++11 -shared -o ../pyjieba/libs/cppjieba_API_osx64.dll\n\n # Linux\n > g++ jiebaapi.cpp -fPIC -I deps -I include -std=c++11 -shared -o ../pyjieba/libs/cppjieba_API_linux64.so\n\nBenchmark\n---------\n\n1. \u5e73\u53f0 Centos7, 8\u683816G\uff0cPython3.6\n2. \u5c0f\u8bf4\u6587\u672c\u957f\u5ea6\uff1a83791\n3. \u5faa\u73af\u5206\u8bcd\u6b21\u6570\uff1a10\n\n+--------+------------+-------------+\n| \u6b21\u6570 | pyjieba | jieba |\n+========+============+=============+\n| 1 | 3147.3ms | 11137.5ms |\n+--------+------------+-------------+\n| 2 | 4692.9ms | 12792.7ms |\n+--------+------------+-------------+\n| 3 | 3257.1ms | 10830.7ms |\n+--------+------------+-------------+\n\n\u603b\u4f53\u6765\u770b\uff0cpyjieba\u5e73\u5747\u8017\u65f6\u4e3ajieba\u76841/3\u3002\n\n\u9e23\u8c22\n----\n\n1. `CPPJieba `__\n \"\u7ed3\u5df4\"\u4e2d\u6587\u5206\u8bcd\u7684C++\u7248\u672c\n2. `jieba `__ \u7ed3\u5df4\u4e2d\u6587\u5206\u8bcd\n\n.. |996.icu| image:: https://img.shields.io/badge/link-996.icu-red.svg\n :target: https://996.icu\n.. |LICENSE| image:: https://img.shields.io/badge/license-Anti%20996-blue.svg\n :target: https://github.com/996icu/996.ICU/blob/master/LICENSE\n.. |python| image:: https://img.shields.io/badge/python-3.5%20%7C%203.6%20%7C%203.7-blue.svg\n\n\n", "description_content_type": "", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/chenrulongmaster/pyjieba", "keywords": "jieba,cppjieba,pyjieba", "license": "Apache-2.0", "maintainer": "", "maintainer_email": "", "name": "pyjieba", "package_url": "https://pypi.org/project/pyjieba/", "platform": "any", "project_url": "https://pypi.org/project/pyjieba/", "project_urls": { "Homepage": "https://github.com/chenrulongmaster/pyjieba" }, "release_url": "https://pypi.org/project/pyjieba/1.0/", "requires_dist": null, "requires_python": "", "summary": "CPPJieba python wrapper", "version": "1.0" }, "last_serial": 5159157, "releases": { "1.0": [ { "comment_text": "", "digests": { "md5": "32bb806695053753cfa1871ae3ba4e67", "sha256": "15e85ffebcc29cacc7202715e4162e6d5d9dbda5830404a321abcfbbda9b9bc3" }, "downloads": -1, "filename": "pyjieba-1.0-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "32bb806695053753cfa1871ae3ba4e67", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 4956388, "upload_time": "2019-04-18T09:27:29", "url": "https://files.pythonhosted.org/packages/94/0a/1c3a029e9a6dee38b76508d833c1c3c6a83845ccff83342150199bcea0cc/pyjieba-1.0-py2.py3-none-any.whl" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "32bb806695053753cfa1871ae3ba4e67", "sha256": "15e85ffebcc29cacc7202715e4162e6d5d9dbda5830404a321abcfbbda9b9bc3" }, "downloads": -1, "filename": "pyjieba-1.0-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "32bb806695053753cfa1871ae3ba4e67", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 4956388, "upload_time": "2019-04-18T09:27:29", "url": "https://files.pythonhosted.org/packages/94/0a/1c3a029e9a6dee38b76508d833c1c3c6a83845ccff83342150199bcea0cc/pyjieba-1.0-py2.py3-none-any.whl" } ] }