{ "info": { "author": "Li Hanju", "author_email": "99959828@qq.com", "bugtrack_url": null, "classifiers": [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: Chinese (Simplified)", "Natural Language :: Chinese (Traditional)", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Text Processing", "Topic :: Text Processing :: Indexing", "Topic :: Text Processing :: Linguistic" ], "description": "\njuba \u7b80\u4ecb\n=========\n\u201c\u5de8\u9738\u201d\u4e2d\u6587\u6587\u672c\u5904\u7406\uff1a\n\u5b8c\u6574\u6587\u6863\u89c1 \u4eca\u65e5\u5934\u6761 AiMath\u6587\u7ae0\u300aPython\u7b2c\u4e09\u65b9\u5e93juba\u4e2d\u6587\u6587\u672c\u5904\u7406\u8be6\u7ec6\u4f7f\u7528\u65b9\u6cd5\u300b\nGitHub: https://github.com/lihanju/juba\n\n\u529f\u80fd\u4e0e\u7279\u70b9\n=========\n- \u652f\u6301document term matrix(dtm) \u6587\u6863\u8bcd\u6c47\u77e9\u9635,\u6709\u4e09\u79cd\u6a21\u5f0f\uff1a\n - tf_dtm();\u8bcd\u9891\u6a21\u5f0f\uff1b\n - prob_dtm():\u6982\u7387\u6a21\u5f0f\uff1b\n - tfidf_dtm():\u8bcd\u9891\u9006\u6587\u6863\u9891\u7387\u6a21\u5f0f\u3002\n- \u652f\u6301term document matrix(tdm) \u8bcd\u6c47\u6587\u6863\u77e9\u9635,\u6709\u4e09\u79cd\u6a21\u5f0f\uff1a\n - tf_tdm();\u8bcd\u9891\u6a21\u5f0f\uff1b\n - prob_tdm():\u6982\u7387\u6a21\u5f0f\uff1b\n - tfidf_tdm():\u8bcd\u9891\u9006\u6587\u6863\u9891\u7387\u6a21\u5f0f\u3002\n- \u652f\u6301\u6587\u672c\u76f8\u4f3c\u6027\u5206\u6790\uff0c\u6709\u56db\u79cd\u65b9\u6cd5\uff1a\n -cosine_sim():\u4f59\u5f26\u76f8\u4f3c\u5ea6\uff1b\n -weight_jaccard_sim():\u6743\u91cdjaccard\u76f8\u4f3c\u5ea6\uff1b\n -jaccard_sim():jaccard\u76f8\u4f3c\u5ea6\uff1b\n -bm25_sim():bm25\u76f8\u4f3c\u5ea6\u3002\n- \u652f\u6301\u8bcd\u6c47\u5173\u8054\u5206\u6790\uff0c\u6709\u4e24\u79cd\u6a21\u5f0f\uff1a\n -two_term_assocs(word_one,word_two,tdm='tf_tdm',norm='False'):\u8ba1\u7b97\u4e24\u4e2a\u8bcd\u6c47\u7684\u76f8\u5173\u7cfb\u6570\uff1b\n -find_assocs(word,mu=0,tdm='tf_tdm',norm='False')\uff1a\u627e\u51faword\u7684\u76f8\u5173\u7cfb\u6570\u7684\u7edd\u5bf9\u503c\u4e0d\u5c11\u4e8emu\u7684\u6240\u6709\u8bcd\u6c47\u3002\n- \u652f\u6301\u4e2d\u6587\u6587\u5b57\u751f\u6210\u5668\uff0c\u81ea\u52a8\u64b0\u5199\u6587\u7ae0\uff1a\n -random_text(textlength,firstWord)\uff1a\u4ee5firstWord\u5f00\u59cb\uff0c\u751f\u6210textlength\u4e2a\u8bcd\u6c47\u7684\u6587\u7ae0\u3002\n- MIT \u6388\u6743\u534f\u8bae\n\n\u4f7f\u7528\u4f8b\u5b50\n=========\nimport jieba\nfrom juba import Similar,Markov\n\ndocs = [['\u901a\u4fe1', '\u6709\u6548'], ['\u9886\u57df', '\u81ea\u7136\u8bed\u8a00', '\u8ba1\u7b97\u673a\u79d1\u5b66', '\u81ea\u7136\u8bed\u8a00', '\u7406\u89e3', '\u81ea\u7136\u8bed\u8a00', '\u81ea\u7136\u8bed\u8a00', '\u7406\u8bba'], ['\u901a\u4fe1', '\u6709\u6548', '\u7406\u89e3', '\u7406\u8bba'],\n ['\u9886\u57df', '\u81ea\u7136\u8bed\u8a00', '\u8ba1\u7b97\u673a\u79d1\u5b66', '\u81ea\u7136\u8bed\u8a00', '\u7406\u89e3', '\u81ea\u7136\u8bed\u8a00', '\u81ea\u7136\u8bed\u8a00', '\u7406\u8bba', '\u901a\u4fe1', '\u6709\u6548', '\u7406\u89e3', '\u7406\u8bba'],['\u6211','\u559c\u6b22','\u4f60\u4eec'],\n ['\u6211','\u8c22\u8c22','\u60a8']]\nS=Similar(docs)\nS.tf_dtm()\nS.tf_tdm()\nS.cosine_sim(dtm='tf_dtm')\nS.two_term_assocs('\u6709\u6548','\u7406\u8bba',tdm='tf_tdm')\nS.find_assocs('\u8ba1\u7b97\u673a\u79d1\u5b66',tdm='tf_tdm',mu=0.7)\n\ntext='\u5728\u5168\u56fd\u7f51\u7edc\u5b89\u5168\u548c\u4fe1\u606f\u5316\u5de5\u4f5c\u4f1a\u8bae\u4e0a\uff0c\u4e60\u8fd1\u5e73\u603b\u4e66\u8bb0\u4ece\u515a\u957f\u671f\u6267\u653f\u548c\u56fd\u5bb6\u957f\u6cbb\u4e45\u5b89\u7684\u9ad8\u5ea6\uff0c\u6df1\u523b\u9610\u660e\u4e86\u7f51\u4fe1\u4e8b\u4e1a\u53d1\u5c55\u7684\u4e00\u7cfb\u5217\u65b9\u5411\u6027\u3001\u5168\u5c40\u6027\u3001\u6839\u672c\u6027\u95ee\u9898\uff0c\n\u5bf9\u52a0\u5f3a\u515a\u5bf9\u7f51\u4fe1\u5de5\u4f5c\u7684\u9886\u5bfc\u63d0\u51fa\u4e86\u660e\u786e\u8981\u6c42\uff0c\u4e3a\u65b0\u65f6\u4ee3\u7f51\u7edc\u5b89\u5168\u548c\u4fe1\u606f\u5316\u53d1\u5c55\u63d0\u4f9b\u4e86\u6839\u672c\u9075\u5faa\u3002\u575a\u6301\u6b63\u786e\u653f\u6cbb\u65b9\u5411\uff0c\u5c31\u8981\u4ee5\u4e60\u8fd1\u5e73\u65b0\u65f6\u4ee3\u4e2d\u56fd\u7279\u8272\u793e\u4f1a\u4e3b\u4e49\u601d\u60f3\u4e3a\u6307\u5bfc\uff0c\n\u628a\u7f51\u7edc\u5f3a\u56fd\u6218\u7565\u601d\u60f3\u8d2f\u7a7f\u5230\u7f51\u4fe1\u5de5\u4f5c\u5404\u65b9\u9762\u3001\u8bf8\u73af\u8282\u3002\u515a\u7684\u5341\u516b\u5927\u4ee5\u6765\uff0c\u6211\u4eec\u4e4b\u6240\u4ee5\u80fd\u63a8\u52a8\u7f51\u4fe1\u4e8b\u4e1a\u53d6\u5f97\u5386\u53f2\u6027\u6210\u5c31\uff0c\u6700\u6839\u672c\u7684\u5c31\u5728\u4e8e\u6709\u4ee5\u4e60\u8fd1\u5e73\u540c\u5fd7\u4e3a\u6838\u5fc3\u7684\u515a\u4e2d\u592e\u7684\u575a\u5f3a\u9886\u5bfc\uff0c\n\u6709\u7f51\u7edc\u5f3a\u56fd\u6218\u7565\u601d\u60f3\u7684\u6b63\u786e\u5f15\u9886\u3002'\ntext=list(jieba.cut(text))#\u4f7f\u7528jieba\u5bf9\u6587\u672c\u8fdb\u884c\u5206\u8bcd\nM=Markov(text)\nM.random_text(200,\"\u6211\")\n\n\u5b89\u88c5\u8bf4\u660e\n==========\n\u4ee3\u7801\u5bf9 Python 2/3 \u5747\u517c\u5bb9\n- \u5168\u81ea\u52a8\u5b89\u88c5\uff1apip install juba\n- \u624b\u52a8\u5b89\u88c5\uff1a\u5c06 juba \u76ee\u5f55\u653e\u7f6e\u4e8e\u5f53\u524d\u76ee\u5f55\u6216\u8005 site-packages \u76ee\u5f55\n- \u901a\u8fc7 ``import juba`` \u6765\u5f15\u7528\n\n\n\n", "description_content_type": "", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/lihanju/juba", "keywords": "", "license": "", "maintainer": "", "maintainer_email": "", "name": "juba", "package_url": "https://pypi.org/project/juba/", "platform": "", "project_url": "https://pypi.org/project/juba/", "project_urls": { "Homepage": "https://github.com/lihanju/juba" }, "release_url": "https://pypi.org/project/juba/0.1.2/", "requires_dist": null, "requires_python": "", "summary": "A Python library for Chinese text analysis.", "version": "0.1.2" }, "last_serial": 3911572, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "6d9f1ef65f0f18441e9d3addf099924e", "sha256": "f27ce9c5065f50b8c37b29df1b9432cd770558c42063d178e444baa3d32a916a" }, "downloads": -1, "filename": "juba-0.1.0.tar.gz", "has_sig": false, "md5_digest": "6d9f1ef65f0f18441e9d3addf099924e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5408, "upload_time": "2018-05-30T03:56:47", "url": "https://files.pythonhosted.org/packages/33/78/752f58944f4c90f673fd0a6f464149fd6597d9c9a5b12a2e2bc0da877a7b/juba-0.1.0.tar.gz" } ], "0.1.1": [ { "comment_text": "", "digests": { "md5": "c2be14c6b06c533db8fe5e55f16e244d", "sha256": "1ea3d4316bf37dd18fc0c1b038979540bb10a8803343a266afaf5e65d93b9c85" }, "downloads": -1, "filename": "juba-0.1.1.tar.gz", "has_sig": false, "md5_digest": "c2be14c6b06c533db8fe5e55f16e244d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5392, "upload_time": "2018-05-30T05:26:27", "url": "https://files.pythonhosted.org/packages/c8/a8/6c38eef5c2f7ce72a167cfa0cad521982913360a9acd94b7d89d15365fcb/juba-0.1.1.tar.gz" } ], "0.1.2": [ { "comment_text": "", "digests": { "md5": "e1f50e2e9b170897bdb86f258b4160cd", "sha256": "f735f4d98e411a215a3e9e3304ccd9b41cb43ce8fb57e372451515d83f13777c" }, "downloads": -1, "filename": "juba-0.1.2-py3-none-any.whl", "has_sig": false, "md5_digest": "e1f50e2e9b170897bdb86f258b4160cd", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 5747, "upload_time": "2018-05-30T05:40:09", "url": "https://files.pythonhosted.org/packages/b3/4e/9d7aab48d97535688db8922852305a9a56b20e2a68c6a7de1364a5c8a82c/juba-0.1.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "07d18424dc60c74e8e5457280ec86c02", "sha256": "c50b429aca24d8a2bd3a2367fb201beb44bd63430da7d58785078d4e4c2dcbfb" }, "downloads": -1, "filename": "juba-0.1.2.tar.gz", "has_sig": false, "md5_digest": "07d18424dc60c74e8e5457280ec86c02", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5380, "upload_time": "2018-05-30T05:40:11", "url": "https://files.pythonhosted.org/packages/4c/a9/c50f2671ae18ee237ab870d39fb817c1d7f58e0fcac309df73e19a0b3429/juba-0.1.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "e1f50e2e9b170897bdb86f258b4160cd", "sha256": "f735f4d98e411a215a3e9e3304ccd9b41cb43ce8fb57e372451515d83f13777c" }, "downloads": -1, "filename": "juba-0.1.2-py3-none-any.whl", "has_sig": false, "md5_digest": "e1f50e2e9b170897bdb86f258b4160cd", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 5747, "upload_time": "2018-05-30T05:40:09", "url": "https://files.pythonhosted.org/packages/b3/4e/9d7aab48d97535688db8922852305a9a56b20e2a68c6a7de1364a5c8a82c/juba-0.1.2-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "07d18424dc60c74e8e5457280ec86c02", "sha256": "c50b429aca24d8a2bd3a2367fb201beb44bd63430da7d58785078d4e4c2dcbfb" }, "downloads": -1, "filename": "juba-0.1.2.tar.gz", "has_sig": false, "md5_digest": "07d18424dc60c74e8e5457280ec86c02", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5380, "upload_time": "2018-05-30T05:40:11", "url": "https://files.pythonhosted.org/packages/4c/a9/c50f2671ae18ee237ab870d39fb817c1d7f58e0fcac309df73e19a0b3429/juba-0.1.2.tar.gz" } ] }