{ "info": { "author": "Kun JIN", "author_email": "jin.kun@flykun.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Topic :: Software Development :: Build Tools" ], "description": "\u8bcd\u9891\u3001\u4e92\u4fe1\u606f\u3001\u4fe1\u606f\u71b5\u53d1\u73b0\u4e2d\u6587\u65b0\u8bcd\n================================\n\n**\u65b0\u8bcd\u53d1\u73b0**\\ \u4efb\u52a1\u662f\u4e2d\u6587\u81ea\u7136\u8bed\u8a00\u5904\u7406\u7684\u91cd\u8981\u6b65\u9aa4\u3002\\ **\u65b0\u8bcd**\\ \u6709\u201c\u65b0\u201d\u5c31\u6709\u201c\u65e7\u201d\uff0c\u5c5e\u4e8e\u4e00\u4e2a\u76f8\u5bf9\u4e2a\u6982\u5ff5\uff0c\u5728\u76f8\u5bf9\u7684\u9886\u57df\uff08\u91d1\u878d\u3001\u533b\u7597\uff09\uff0c\u5728\u76f8\u5bf9\u7684\u65f6\u95f4\uff08\u8fc7\u53bb\u3001\u73b0\u5728\uff09\u90fd\u5b58\u5728\u65b0\u8bcd\u3002\\ `\u6587\u672c\u6316\u6398 `__\\ \u4f1a\u5148\u5c06\u6587\u672c\\ `\u5206\u8bcd `__\\ \uff0c\u800c\u901a\u7528\u5206\u8bcd\u5668\u7cbe\u5ea6\u4e0d\u8fc7\uff0c\u901a\u5e38\u9700\u8981\u6dfb\u52a0\\ **\u81ea\u5b9a\u4e49\u5b57\u5178**\\ \u8865\u8db3\u7cbe\u5ea6\uff0c\u6240\u4ee5\u53d1\u73b0\u65b0\u8bcd\u5e76\u52a0\u5165\u5b57\u5178\uff0c\u6210\u4e3a\u6587\u672c\u6316\u6398\u7684\u4e00\u4e2a\u91cd\u8981\u5de5\u4f5c\u3002\n\n`\u5355\u8bcd `__\\ \u7684\u5b9a\u4e49\uff0c\u6765\u81ea\u7ef4\u57fa\u767e\u79d1\u7684\u5b9a\u4e49\u5982\u4e0b\uff1a\n\n \u5728\u8bed\u8a00\u5b66\u4e2d\uff0c\\ **\u5355\u8bcd**\\ \uff08\u53c8\u79f0\u4e3a\u8bcd\u3001\u8bcd\u8bed\u3001\u5355\u5b57\uff1b\u82f1\u8bed\u5bf9\u5e94\u7528\u8bed\u4e3a\u201cword\u201d\uff09\u662f\u80fd\u72ec\u7acb\u8fd0\u7528\u5e76\u542b\u6709\u8bed\u4e49\u5185\u5bb9\u6216\u8bed\u7528\u5185\u5bb9\uff08\u5373\u5177\u6709\u8868\u9762\u542b\u4e49\u6216\u5b9e\u9645\u542b\u4e49\uff09\u7684\u6700\u5c0f\u5355\u4f4d\u3002\u5355\u8bcd\u7684\u96c6\u5408\u79f0\u4e3a\u8bcd\u6c47\u3001\u672f\u8bed\uff0c\u4f8b\u5982\uff1a\u6240\u6709\u4e2d\u6587\u5355\u8bcd\u7edf\u79f0\u4e3a\u201c\u4e2d\u6587\u8bcd\u6c47\u201d\uff0c\u533b\u5b66\u4e0a\u4e13\u7528\u7684\u8bcd\u7edf\u79f0\u4e3a\u201c\u533b\u5b66\u672f\u8bed\u201d\u7b49\u3002\u8bcd\u5178\u662f\u4e3a\u8bcd\u8bed\u63d0\u4f9b\u97f3\u97f5\u3001\u8bcd\u4e49\u89e3\u91ca\u3001\u4f8b\u53e5\u3001\u7528\u6cd5\u7b49\u7b49\u7684\u5de5\u5177\u4e66\uff0c\u6709\u7684\u8bcd\u5178\u53ea\u4fee\u5f55\u7279\u6b8a\u9886\u57df\u7684\u8bcd\u6c47\u3002\n\n\u5355\u4ece\u8bed\u4e49\u89d2\u5ea6\uff0c\u201c\u82f9\u679c\u201c\u7684\u6cd5\u8bed\u662f\u201dpomme\u201d\uff0c\u800c\u201c\u571f\u8c46\u201d\u7684\u6cd5\u8bed\u662f\u201cpomme de\nterre\u201d\uff0c\u82e5\u6309\u4e0a\u9762\u7684\u5b9a\u4e49\uff0c\u201c\u571f\u8c46\u201d\u662f\u8981\u88ab\u62c6\u7684\u9762\u76ee\u5168\u975e\uff0c\u4f46\u201cpomme de\nterre\u201d\u662f\u5374\u662f\u8868\u8fbe\u201c\u571f\u8c46\u201d\u8fd9\u4e2a\u8bed\u4e49\u7684\u6700\u5c0f\u5355\u4f4d\uff1b\u5728\u673a\u6784\u540d\u4e2d\uff0c\u8fd9\u7c7b\u95ee\u9898\u51fa\u73b0\u7684\u66f4\u9891\u7e41\uff0c\u201cParis\n3\u201d\u662f\u201c\u5df4\u9ece\u7b2c\u4e09\u5927\u5b66\u201d\u7684\u7b80\u79f0\uff0c\u5982\u679c\u201cParis\u201d\u548c\u201c3\u201d\u5206\u522b\u8868\u793a\u5730\u540d\u548c\u6570\u5b57\uff0c\u90a3\u8fd9\u4e24\u4e2a\u5c31\u65e0\u6cd5\u8868\u8fbe\u201c\u5df4\u9ece\u7b2c\u4e09\u5927\u5b66\u201d\u7684\u8bed\u4e49\u3002\u800c\u4e2d\u6587\u4e5f\u6709\u7c7b\u4f3c\u7684\u4f8b\u5b50\uff0c\u201c\u5317\u4eac\u5927\u5b66\u201d\u7684\u201d\u5317\u4eac\u201c\u548c\u201d\u5927\u5b66\u201c\u90fd\u53ef\u4ee5\u4f5c\u4e3a\u4e00\u4e2a\u6700\u5c0f\u5355\u4f4d\u6765\u4f7f\u7528\uff0c\u5206\u522b\u8868\u793a\u201d\u5730\u65b9\u540d\u201c\u548c\u201c\u5927\u5b66\u201d\uff0c\u5982\u679c\u8fd9\u6837\u5206\u8bcd\uff0c\u90a3\u4e48\u5c31\u53ef\u4ee5\u7406\u89e3\u4e3a\u201c\u5317\u4eac\u7684\u5927\u5b66\u201d\u4e86\uff0c\u6240\u4ee5\u201c\u5317\u4eac\u5927\u5b66\u201d\u662f\u4e00\u4e2a\u8868\u8fbe\u8bed\u4e49\u7684\u6700\u5c0f\u5355\u4f4d\u3002\u524d\u51e0\u5e74\u6709\u90e8\u7535\u5f71\u300a\u590f\u6d1b\u7279\u70e6\u607c\u300b\uff0c\u6211\u4eec\u662f\u8981\u7406\u89e3\u4e3a\u201c\u590f\u6d1b\u7279\n\u70e6\u607c\u201c\u8fd8\u662f\u201d\u590f\u6d1b \u7279 \u70e6\u607c\u201c\uff0c\u8fd9\u5c31\u662f\u5f88\u7ecf\u5178\u7684\u5206\u8bcd\u95ee\u9898\u3002\n\n\u4f46\u662f\u4ece\u8bed\u7528\u89d2\u5ea6\uff0c\u8fd9\u4e9b\u95ee\u9898\u4f3c\u4e4e\u80fd\u88ab\u89e3\u51b3\uff0c\u6211\u4eec\u77e5\u9053\u201cpomme de\nterre\u201d\u5728\u65e5\u5e38\u751f\u6d3b\u4e2d\u4e00\u822c\u4f5c\u4e3a\u201c\u571f\u8c46\u201d\u800c\u4e0d\u662f\u201c\u571f\u91cc\u7684\u82f9\u679c\u201d\uff0c\u5728\u5df4\u9ece\u5b66\u4e60\u90fd\u77e5\u9053\u201cParis\n3\u201d\uff0c\u5c31\u50cf\u6211\u4eec\u63d0\u5230\u201c\u5317\u4eac\u5927\u5b66\u201d\u7279\u6307\u90a3\u6240\u8457\u540d\u7684\u9ad8\u7b49\u5b66\u5e9c\u4e00\u6837\u3002\u770b\u8fc7\u7535\u5f71\u300a\u590f\u6d1b\u7279\u70e6\u607c\u300b\u7684\u89c2\u4f17\u5f88\u5bb9\u6613\u7684\u5c31\u80fd\u533a\u5206\u8fd9\u4e2a\u6807\u9898\u5e94\u8be5\u770b\u4e3a\u201c\u590f\u6d1b\n\u7279 \u70e6\u607c\u201d\u3002\n\n\u53d1\u73b0\u65b0\u8bcd\u7684\u65b9\u6cd5\uff0c\u300a\\ `\u4e92\u8054\u7f51\u65f6\u4ee3\u7684\u793e\u4f1a\u8bed\u8a00\u5b66\uff1a\u57fa\u4e8eSNS\u7684\u6587\u672c\u6570\u636e\u6316\u6398 `__\n\u300b\u4e00\u6587\uff0c\u91cc\u9762\u63d0\u5230\u7684\u7ed9\u6bcf\u4e00\u4e2a\u6587\u672c\u4e32\u8ba1\u7b97\\ **\u6587\u672c\u7247\u6bb5**\\ \u7684\\ **\u51dd\u56fa\u7a0b\u5ea6**\\ \u548c\u6587\u672c\u4e32\u5bf9\u5916\u7684\u4f7f\u7528\\ **\u81ea\u7531\u5ea6**\\ \uff0c\u901a\u8fc7\u8bbe\u5b9a\u9608\u503c\u6765\u5c06\u6587\u672c\u4e32\u5206\u7c7b\u4e3a\u8bcd\u548c\u975e\u8bcd\u4e24\u7c7b\u3002\u539f\u6587\u7ed9\u4e86\u5341\u5206\u901a\u4fd7\u6613\u61c2\u7684\u4f8b\u5b50\u6765\u89e3\u91ca\u51dd\u56fa\u5ea6\u548c\u81ea\u52a8\u5ea6\u3002\u8fd9\u91cc\u653e\u4e0a\u8ba1\u7b97\u65b9\u6cd5\u3002\u8fd9\u4e2a\u65b9\u6cd5\u8fd8\u6709\u8bb8\u591a\u5730\u65b9\u9700\u8981\u4f18\u5316\uff0c\u5728\u4e4b\u540e\u7684\u5b9e\u8df5\u4e2d\u6162\u6162\u8c03\u6574\u4e86\u3002\n\n\u73af\u5883\n----\n\n::\n\n python >= 3.5\n\n\u5b89\u88c5\n----\n\n.. code:: bash\n\n python setup.py install\n\n\u4f7f\u7528\u8bf4\u660e\n--------\n\n.. code:: python\n\n import wordiscovery as wd\n\n text = \"\u65b0\u8bcd\u53d1\u73b0\u4efb\u52a1\u662f\u4e2d\u6587\u81ea\u7136\u8bed\u8a00\u5904\u7406\u7684\u91cd\u8981\u6b65\u9aa4\u3002\n \u65b0\u8bcd\u6709\u65b0\u5c31\u6709\u65e7\uff0c\u5c5e\u4e8e\u4e00\u4e2a\u76f8\u5bf9\u4e2a\u6982\u5ff5\uff0c\u5728\u76f8\u5bf9\u7684\u9886\u57df\uff08\u91d1\u878d\u3001\u533b\u7597\uff09\uff0c\n \u5728\u76f8\u5bf9\u7684\u65f6\u95f4\uff08\u8fc7\u53bb\u3001\u73b0\u5728\uff09\u90fd\u5b58\u5728\u65b0\u8bcd\u3002\u6587\u672c\u6316\u6398\u4f1a\u5148\u5c06\u6587\u672c\u5206\u8bcd\uff0c\n \u800c\u901a\u7528\u5206\u8bcd\u5668\u7cbe\u5ea6\u4e0d\u8fc7\uff0c\u901a\u5e38\u9700\u8981\u6dfb\u52a0\u81ea\u5b9a\u4e49\u5b57\u5178\u8865\u8db3\u7cbe\u5ea6\uff0c\n \u6240\u4ee5\u53d1\u73b0\u65b0\u8bcd\u5e76\u52a0\u5165\u5b57\u5178\uff0c\u6210\u4e3a\u6587\u672c\u6316\u6398\u7684\u4e00\u4e2a\u91cd\u8981\u5de5\u4f5c\u3002\n \"\n\n f = wd.Wordiscovery()\n\n # \u89e3\u6790\u8fc7\u7a0b\u9ed8\u8ba4\u53c2\u6570, \u6839\u636e\u6587\u672c\u81ea\u7531\u8c03\u8282\u8fd9\u51e0\u4e2a\u9608\u503c\n # \u6700\u5c0f\u4fe1\u606f\u71b50.01\n # \u6700\u5c0f\u4e92\u4fe1\u606f4\n # \u6700\u5c0f\u8bcd\u98912\n f.parse(text) # f.parse(text, 0.01, 4, 2)\n # {'\u5206\u8bcd': (2, 5.18271944179699, 0.6931471805599453),\n # '\u5b57\u5178': (2, 6.2813317304651, 0.6931471805599453),\n # '\u6587\u672c': (3, 4.895037369345209, 0.6365141682948128),\n # '\u6587\u672c\u6316\u6398': (2, 5.588184549905154, 0.6931471805599453),\n # '\u65b0\u8bcd': (4, 4.371789225580661, 1.0397207708399179),\n # '\u76f8\u5bf9': (3, 4.3842117455792184, 0.6365141682948128),\n # '\u7cbe\u5ea6': (2, 6.2813317304651, 0.6931471805599453),\n # '\u901a\u5e38': (2, 5.18271944179699, 0.6931471805599453),\n # '\u91cd\u8981': (2, 5.028568761969732, 0.6931471805599453),\n # '\u9700\u8981': (2, 5.028568761969732, 0.6931471805599453),\n # '\u9886\u57df': (2, 6.2813317304651, 0.6931471805599453)}\n\n\u8be6\u7ec6\u8bf4\u660e\n--------\n\n`wordicovery\u89e3\u91ca `__", "description_content_type": null, "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/ushiao/wordiscovery", "keywords": "NLP,new word discorvery", "license": "MIT", "maintainer": "", "maintainer_email": "", "name": "wordiscovery", "package_url": "https://pypi.org/project/wordiscovery/", "platform": "", "project_url": "https://pypi.org/project/wordiscovery/", "project_urls": { "Homepage": "https://github.com/ushiao/wordiscovery" }, "release_url": "https://pypi.org/project/wordiscovery/0.1.4.6/", "requires_dist": null, "requires_python": "", "summary": "A Chinese new word discovery", "version": "0.1.4.6" }, "last_serial": 3413507, "releases": { "0.1.4.6": [ { "comment_text": "", "digests": { "md5": "b3999a340746be349fe514cb0d85c4f2", "sha256": "ecd1a57d4c407bc8936cc33f86b1e2441f00850ad4be81edb27f343639e1ea2b" }, "downloads": -1, "filename": "wordiscovery-0.1.4.6.tar.gz", "has_sig": false, "md5_digest": "b3999a340746be349fe514cb0d85c4f2", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 7001, "upload_time": "2017-12-13T10:03:04", "url": "https://files.pythonhosted.org/packages/a1/ea/3e3106e6c8ce96106fd231e863a007255e272b610c52b1a305f0efec15b1/wordiscovery-0.1.4.6.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "b3999a340746be349fe514cb0d85c4f2", "sha256": "ecd1a57d4c407bc8936cc33f86b1e2441f00850ad4be81edb27f343639e1ea2b" }, "downloads": -1, "filename": "wordiscovery-0.1.4.6.tar.gz", "has_sig": false, "md5_digest": "b3999a340746be349fe514cb0d85c4f2", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 7001, "upload_time": "2017-12-13T10:03:04", "url": "https://files.pythonhosted.org/packages/a1/ea/3e3106e6c8ce96106fd231e863a007255e272b610c52b1a305f0efec15b1/wordiscovery-0.1.4.6.tar.gz" } ] }