{ "info": { "author": "Mingli Yuan", "author_email": "mingli.yuan@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 3 - Alpha", "License :: OSI Approved :: BSD License", "Topic :: Utilities" ], "description": "# lang-detect: a tool to detect language\n\nDetecting the language for a small piece of unicode text without any dependency\nto other libraries.\n\nCurrently we support detecting de, en, es, fr, it, ja, nl, pl, ru, zh-hans,\nzh-hant, and zh-yue.\n\nAfter some simple testing, we found that the result for long sentence is better.\n\n## Method\n\nWe focus on the Basic Multilingual Plane in unicode encoding, and current\nlanguage support set could be extended.\n\nFor each language, we use a uniformed ngram vector to represent the language\nitself. This vector can be seen at the data folder.\n\nWhen we detect a text, we generate the uniformed ngram vector for this text, and\njust comparing the cosine value of the angle between the text vector and the\nlanguage vector.\n\nTo get the language vector, we use feature articles on Wikipedia as corpus.\n\n## Usage\n\ncd to the project root\n\nbin/langdetect YOUR_SENTENCE_HERE", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/braingnp-org/lang-detect", "keywords": "nlp language unicode", "license": "BSD", "maintainer": null, "maintainer_email": null, "name": "lang-detect", "package_url": "https://pypi.org/project/lang-detect/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/lang-detect/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://github.com/braingnp-org/lang-detect" }, "release_url": "https://pypi.org/project/lang-detect/0.0.1/", "requires_dist": null, "requires_python": null, "summary": "a tool to detecting the language for a small piece of unicode text without any dependency to other libraries.", "version": "0.0.1" }, "last_serial": 794044, "releases": { "0.0.1": [ { "comment_text": "", "digests": { "md5": "f80c43a3beb93cf25d7acfdde1d95603", "sha256": "c1fa4a594eab61f1d2cbf9fece10f91cd5b507a7155410764c8b579c4c6e8a09" }, "downloads": -1, "filename": "lang_detect-0.0.1-py2.6.egg", "has_sig": false, "md5_digest": "f80c43a3beb93cf25d7acfdde1d95603", "packagetype": "bdist_egg", "python_version": "2.6", "requires_python": null, "size": 1450, "upload_time": "2011-08-18T08:11:43", "url": "https://files.pythonhosted.org/packages/ce/63/a28dd6e7a709c6d758d70cc6690b834dc769300960affd1621cbacd170c8/lang_detect-0.0.1-py2.6.egg" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "f80c43a3beb93cf25d7acfdde1d95603", "sha256": "c1fa4a594eab61f1d2cbf9fece10f91cd5b507a7155410764c8b579c4c6e8a09" }, "downloads": -1, "filename": "lang_detect-0.0.1-py2.6.egg", "has_sig": false, "md5_digest": "f80c43a3beb93cf25d7acfdde1d95603", "packagetype": "bdist_egg", "python_version": "2.6", "requires_python": null, "size": 1450, "upload_time": "2011-08-18T08:11:43", "url": "https://files.pythonhosted.org/packages/ce/63/a28dd6e7a709c6d758d70cc6690b834dc769300960affd1621cbacd170c8/lang_detect-0.0.1-py2.6.egg" } ] }