{ "info": { "author": "yongzhuo", "author_email": "1903865025@qq.com", "bugtrack_url": null, "classifiers": [ "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy" ], "description": "# nlp_xiaojiang\n\n\n# AugmentText\n - \u56de\u8bd1\uff08\u6548\u679c\u6bd4\u8f83\u597d\uff09\n - EDA\uff08\u540c\u4e49\u8bcd\u66ff\u6362\u3001\u63d2\u5165\u3001\u4ea4\u6362\u548c\u5220\u9664\uff09\uff08\u6548\u679c\u8fd8\u884c\uff09\n - HMM-marko\uff08\u8d28\u91cf\u8f83\u5dee\uff09\n - syntax\uff08\u4f9d\u5b58\u53e5\u6cd5\u3001\u53e5\u6cd5\u3001\u8bed\u6cd5\u4e66\uff09\uff08\u7b80\u5355\u53e5\u8fd8\u53ef\uff09\n - seq2seq\uff08\u6df1\u5ea6\u5b66\u4e60\u540c\u4e49\u53e5\u751f\u6210\uff0c\u6548\u679c\u4e0d\u7406\u60f3\uff0cseq2seq\u4ee3\u7801\u5927\u90fd\u662f [https://github.com/qhduan/just_another_seq2seq] \u7684\uff0c\u6548\u679c\u4e0d\u7406\u60f3\uff09\n\n# ChatBot\n - \u68c0\u7d22\u5f0fChatBot\n - \u50cfES\u90a3\u6837\u76f4\u63a5\u68c0\u7d22(\u5982\u4f7f\u7528fuzzywuzzy)\uff0c\u53ea\u80fd\u5b57\u9762\u5339\u914d\n - \u6784\u9020\u53e5\u5411\u91cf\uff0c\u68c0\u7d22\u95ee\u7b54\u5e93\uff0c\u80fd\u591f\u68c0\u7d22\u6709\u540c\u4e49\u8bcd\u7684\u53e5\u5b50\n - \u751f\u6210\u5f0fChatBot\uff08todo\uff09\n - seq2seq\n - GAN\n\n# FeatureProject\n - bert\u53e5\u5411\u91cf\u3001\u6587\u672c\u76f8\u4f3c\u5ea6\n - bert/extract_keras_bert_feature.py:\u63d0\u53d6bert\u53e5\u5411\u91cf\u7279\u5f81\n - bert/tet_bert_keras_sim.py:\u6d4b\u8bd5bert\u53e5\u5411\u91cfcosin\u76f8\u4f3c\u5ea6\n - normalization_util\u6307\u7684\u662f\u6570\u636e\u5f52\u4e00\u5316\n - 0-1\u5f52\u4e00\u5316\u5904\u7406\n - \u5747\u503c\u5f52\u4e00\u5316\n - sig\u5f52\u4e00\u5316\u5904\u7406\n - sim feature\uff08ML\uff09\n - distance_text_or_vec:\u5404\u79cd\u8ba1\u7b97\u6587\u672c\u3001\u5411\u91cf\u8ddd\u79bb\u7b49\n - distance_vec_TS_SS\uff1aTS_SS\u8ba1\u7b97\u8bcd\u5411\u91cf\u8ddd\u79bb\n - cut_td_idf\uff1a\u5c06\u5c0f\u9ec4\u9e21\u8bed\u6599\u548cgossip\u7ed3\u5408\n - sentence_sim_feature\uff1a\u8ba1\u7b97\u4e24\u4e2a\u6587\u672c\u7684\u76f8\u4f3c\u5ea6\u6216\u8005\u8ddd\u79bb\uff0c\u4f8b\u5982qq\uff08\u95ee\u9898\u548c\u95ee\u9898\uff09\uff0c\u6216\u8005qa\uff08\u95ee\u9898\u548c\u7b54\u6848\uff09\n\n# run(\u53ef\u4ee5\u5728win10\u4e0b,pycharm\u4e0b\u8fd0\u884c)\n - 1.\u521b\u5efatf-idf\u6587\u4ef6\u7b49\uff08\u8fd0\u884c2\u9700\u8981\u5148\u8dd11\uff09:\n ```\n python cut_td_idf.py\n ```\n - 2.\u8ba1\u7b97\u4e24\u4e2a\u53e5\u5b50\u95f4\u7684\u5404\u79cd\u76f8\u4f3c\u5ea6\uff0c\u5148\u8ba1\u7b97\u4e00\u4e2a\u9884\u5b9a\u4e49\u7684\uff0c\u7136\u540e\u53ef\u8f93\u5165\u81ea\u5b9a\u4e49\u7684\uff08\u5148\u8dd11\uff09:\n ```\n python sentence_sim_feature.py\n ```\n - 3.chatbot_1\u8dd1\u8d77\u6765(fuzzy\u68c0\u7d22-\u6ca1)\uff08\u72ec\u7acb\uff09\uff1a\n ```\n python chatbot_fuzzy.py\n ```\n - 4.chatbot_2\u8dd1\u8d77\u6765(\u53e5\u5411\u91cf\u68c0\u7d22-\u8bcd)\uff08\u72ec\u7acb\uff09\uff1a\n ```\n python chatbot_sentence_vec_by_word.py\n ```\n - 5.chatbot_3\u8dd1\u8d77\u6765(\u53e5\u5411\u91cf\u68c0\u7d22-\u5b57)\uff08\u72ec\u7acb\uff09\uff1a\n ```\n python chatbot_sentence_vec_by_char.py\n ```\n - 6.\u6570\u636e\u589e\u5f3a\uff08eda)\uff1a python enhance_eda.py\n - 7.\u6570\u636e\u589e\u5f3a\uff08marko\uff09: python enhance_marko.py\n - 8.\u6570\u636e\u589e\u5f3a\uff08translate_account\uff09: python translate_tencent_secret.py\n - 9.\u6570\u636e\u589e\u5f3a\uff08translate_tools\uff09: python translate_translate.py\n - 10.\u6570\u636e\u589e\u5f3a\uff08translate_web\uff09: python translate_google.py\n - 11.\u6570\u636e\u589e\u5f3a\uff08augment_seq2seq\uff09: \u5148\u8dd1 python extract_char_webank.py\u751f\u6210\u6570\u636e\uff0c\n \u518d\u8dd1 python train_char_anti.py\n \u7136\u540e\u8dd1 python predict_char_anti.py\n - 12.\u7279\u5f81\u8ba1\u7b97(bert)\uff08\u63d0\u53d6\u7279\u5f81\u3001\u8ba1\u7b97\u76f8\u4f3c\u5ea6\uff09:\n ```\n run extract_keras_bert_feature.py\n run tet_bert_keras_sim.py\n ```\n\n# Data\n - chinese_L-12_H-768_A-12\uff08\u8c37\u6b4c\u9884\u8bad\u7ec3\u597d\u7684\u6a21\u578b\uff09\n github\u9879\u76ee\u4e2d\u53ea\u662f\u4e0a\u4f20\u90e8\u5206\u6570\u636e\uff0c\u9700\u8981\u7684\u524d\u5f80\u94fe\u63a5: https://pan.baidu.com/s/1I3vydhmFEQ9nuPG2fDou8Q \u63d0\u53d6\u7801: rket\n \u89e3\u538b\u540e\u5c31\u53ef\u4ee5\u5566\n - chinese_vector\n github\u9879\u76ee\u4e2d\u53ea\u662f\u4e0a\u4f20\u90e8\u5206\u6570\u636e\uff0c\u9700\u8981\u7684\u524d\u5f80\u94fe\u63a5: https://pan.baidu.com/s/1I3vydhmFEQ9nuPG2fDou8Q \u63d0\u53d6\u7801: rket\n - \u622a\u53d6\u7684\u90e8\u5206word2vec\u8bad\u7ec3\u8bcd\u5411\u91cf\uff08\u81ea\u5df1\u9700\u8981\u4e0b\u8f7d\u5168\u6548\u679c\u624d\u4f1a\u597d\uff09\n - w2v_model_wiki_char.vec\u3001w2v_model_wiki_word.vec\u90fd\u53ea\u6709\u90e8\u5206\n - corpus\n github\u9879\u76ee\u4e2d\u53ea\u662f\u4e0a\u4f20\u90e8\u5206\u6570\u636e\uff0c\u9700\u8981\u7684\u524d\u5f80\u94fe\u63a5: https://pan.baidu.com/s/1I3vydhmFEQ9nuPG2fDou8Q \u63d0\u53d6\u7801: rket\n - \u5c0f\u9ec4\u9e21\u548cgossip\u95ee\u7b54\u9884\u6599\uff08\u6570\u636e\u6ca1\u6e05\u6d17\uff09,chicken_and_gossip.txt\n - \u5fae\u4f17\u94f6\u884c\u548c\u652f\u4ed8\u5b9d\u6587\u672c\u76f8\u4f3c\u5ea6\u7ade\u8d5b\u6570\u636e\uff0c sim_webank.csv\n - sentence_vec_encode_char\n - 1.txt\uff08\u5b57\u5411\u91cf\u751f\u6210\u7684\u524d100000\u53e5\u5411\u91cf\uff09\n - sentence_vec_encode_word\n - 1.txt\uff08\u8bcd\u5411\u91cf\u751f\u6210\u7684\u524d100000\u53e5\u5411\u91cf\uff09\n - tf_idf\uff08chicken_and_gossip.txt\u751f\u6210\u7684tf-idf\uff09\n\n# requestments.txt\n - python_Levenshtei\n - \u8c03\u7528Levenshtein\uff0c\u6211\u7684python\u662f3.6\uff0c\n - \u6253\u5f00\u5176\u6e90\u6587\u4ef6: https://www.lfd.uci.edu/~gohlke/pythonlibs/\n - \u67e5\u627epython_Levenshtein-0.12.0-cp36-cp36m-win_amd64.whl\u4e0b\u8f7d\u5373\u53ef\n - pyemd\n - pyemd-0.5.1-cp36-cp36m-win_amd64.whl\n - pyhanlp\n - \u4e0b\u597d\u4f9d\u8d56JPype1-0.6.3-cp36-cp36m-win_amd64.whl\n\n# \u53c2\u8003/\u611f\u8c22\n* eda_chinese\uff1a[https://github.com/zhanlaoban/eda_nlp_for_Chinese](https://github.com/zhanlaoban/eda_nlp_for_Chinese)\n* \u4e3b\u8c13\u5bbe\u63d0\u53d6\u5668\uff1a[https://github.com/hankcs/MainPartExtractor](https://github.com/hankcs/MainPartExtractor)\n* HMM\u751f\u6210\u53e5\u5b50\uff1a[https://github.com/takeToDreamLand/SentenceGenerate_byMarkov](https://github.com/takeToDreamLand/SentenceGenerate_byMarkov)\n* \u540c\u4e49\u8bcd\u7b49\uff1a[https://github.com/fighting41love/funNLP/tree/master/data/](https://github.com/fighting41love/funNLP/tree/master/data/)\n* \u5c0f\u725b\u7ffb\u8bd1\uff1a[http://www.niutrans.com/index.html](http://www.niutrans.com/index.html)\n\n# \u5176\u4ed6\u8d44\u6599\n* NLP\u6570\u636e\u589e\u5f3a\u6c47\u603b:[https://github.com/quincyliang/nlp-data-augmentation](https://github.com/quincyliang/nlp-data-augmentation)\n* \u77e5\u4e4eNLP\u6570\u636e\u589e\u5f3a\u8bdd\u9898:[https://www.zhihu.com/question/305256736/answer/550873100](https://www.zhihu.com/question/305256736/answer/550873100)\n* chatbot_seq2seq_seqGan\uff08\u6bd4\u8f83\u597d\u7528\uff09\uff1a[https://github.com/qhduan/just_another_seq2seq](https://github.com/qhduan/just_another_seq2seq)\n* \u81ea\u5df1\u52a8\u624b\u505a\u804a\u5929\u673a\u5668\u4eba\u6559\u7a0b: [https://github.com/warmheartli/ChatBotCourse](https://github.com/warmheartli/ChatBotCourse)\n\n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/yongzhuo/nlp_xiaojiang", "keywords": "", "license": "MIT", "maintainer": "", "maintainer_email": "", "name": "nlp-xiaojiang", "package_url": "https://pypi.org/project/nlp-xiaojiang/", "platform": "", "project_url": "https://pypi.org/project/nlp-xiaojiang/", "project_urls": { "Homepage": "https://github.com/yongzhuo/nlp_xiaojiang" }, "release_url": "https://pypi.org/project/nlp-xiaojiang/0.0.1/", "requires_dist": [ "scikit-learn (>=0.19.1)", "fuzzywuzzy (>=0.17.0)", "openpyxl (>=2.6.2)", "xpinyin (>=0.5.6)", "gensim (>=3.7.1)", "jieba (>=0.39)", "xlrd (>=1.2.0)", "tensorflow (>=1.8.0)", "keras-bert (>=0.41.0)", "Keras (>=2.2.0)", "pandas (>=0.23.0)", "h5py (>=2.7.1)", "numpy (>=1.16.1)", "pyemd (==0.5.1)", "pathlib", "translate", "PyExecJS", "stanfordcorenlp" ], "requires_python": "", "summary": "nlp of augment\u3001chatbot\u3001classification and featureproject of chinese text", "version": "0.0.1" }, "last_serial": 5487824, "releases": { "0.0.1": [ { "comment_text": "", "digests": { "md5": "3ca8b0323bdc9da449d54051d17c34a2", "sha256": "76034429322c14b469558ac80d2bfebb50797ea0e8014c46be36e7673a56a316" }, "downloads": -1, "filename": "nlp_xiaojiang-0.0.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "3ca8b0323bdc9da449d54051d17c34a2", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 217157, "upload_time": "2019-07-04T18:04:44", "url": "https://files.pythonhosted.org/packages/62/6c/6d1e026b8c2ff2bf336d0886af5570f73ee0d74c862a640153f76636da05/nlp_xiaojiang-0.0.1-py2.py3-none-any.whl" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "3ca8b0323bdc9da449d54051d17c34a2", "sha256": "76034429322c14b469558ac80d2bfebb50797ea0e8014c46be36e7673a56a316" }, "downloads": -1, "filename": "nlp_xiaojiang-0.0.1-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "3ca8b0323bdc9da449d54051d17c34a2", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 217157, "upload_time": "2019-07-04T18:04:44", "url": "https://files.pythonhosted.org/packages/62/6c/6d1e026b8c2ff2bf336d0886af5570f73ee0d74c862a640153f76636da05/nlp_xiaojiang-0.0.1-py2.py3-none-any.whl" } ] }