{ "info": { "author": "Hideaki Takahashi", "author_email": "mymelo@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Natural Language :: Japanese", "Operating System :: Microsoft :: Windows", "Operating System :: OS Independent", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.2", "Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic" ], "description": "================================\r\n Japanese Tokenizers for Whoosh\r\n================================\r\n\r\nAbout\r\n=====\r\n\r\nTokenizers for Whoosh full text search library designed for Japanese language.\r\nThis package conteins two Tokenizers.\r\n\r\n* IgoTokenizer\r\n\r\n + requires igo-python(http://pypi.python.org/pypi/igo-python/) and its dictionary.\r\n\r\n* TinySegmenterTokenizer\r\n\r\n + requires TinySegmenter in Python(https://code.google.com/p/mhagiwara/source/browse/trunk/nltk/jpbook/tinysegmenter.py)\r\n\r\n* MeCabTokenizer\r\n\r\n * requires MeCab python binding(http://mecab.sourceforge.net/bindings.html)\r\n\r\n\r\nHow To Use\r\n==========\r\n\r\nIgoTokenizer::\r\n\r\n import igo.Tagger\r\n import whooshjp\r\n from whooshjp.IgoTokenizer import IgoTokenizer\r\n\r\n tk = IgoTokenizer(igo.Tagger.Tagger('ipadic'))\r\n scm = Schema(title=TEXT(stored=True, analyzer=tk), path=ID(unique=True,stored=True), content=TEXT(analyzer=tk))\r\n\r\n\r\nTinySegmenterTokenizer::\r\n\r\n import tinysegmenter\r\n import whooshjp\r\n from whooshjp.TinySegmenterTokenizer import TinySegmenterTokenizer\r\n\r\n tk = TinySegmenterTokenizer(tinysegmenter.TinySegmenter())\r\n scm = Schema(title=TEXT(stored=True, analyzer=tk), path=ID(unique=True,stored=True), content=TEXT(analyzer=tk))\r\n\r\n\r\n\r\nChangelog for Japanese Tokenizers for Whoosh\r\n============================================\r\n\r\n2011-02-19 -- 0.1\r\n * first release.\r\n\r\n2011-02-21 -- 0.2\r\n * add TinySegmenterTokenizer\r\n * change module name\r\n\r\n2011-02-24 -- 0.3\r\n * add FeatureFilter\r\n\r\n2011-02-27 -- 0.4\r\n * add MeCabTokenizer\r\n * add a mode for don't pickle igo tagger to minimize index.\r\n\r\n2011-04-17 -- 0.5\r\n * correct char offsets\r\n\r\n2011-04-17 -- 0.6\r\n * correct char offsets(TinySegmenterTokenizer)\r\n\r\n2012-04-14 -- 0.7\r\n * rename package(WhooshJapaneseTokenizer to whooshjp)\r\n * no longer import sub modules automatically\r\n * Python3 compatibility(3.2, 3.3)\r\n * Drop Python2.5 support", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/hideaki-t/whoosh-igo", "keywords": "japanese,tokenizer", "license": "Apache License, Version 2.0", "maintainer": "", "maintainer_email": "", "name": "whoosh-igo", "package_url": "https://pypi.org/project/whoosh-igo/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/whoosh-igo/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://github.com/hideaki-t/whoosh-igo" }, "release_url": "https://pypi.org/project/whoosh-igo/0.7/", "requires_dist": null, "requires_python": null, "summary": "tokenizers for Whoosh designed for Japanese language", "version": "0.7" }, "last_serial": 1557844, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "8d7e75e39aca090edec2b4f17d1adddb", "sha256": "c89d13cab98cc6e360fd5797231d1c2619344d974af293a808936270bfa52cef" }, "downloads": -1, "filename": "whoosh-igo-0.1.tar.gz", "has_sig": false, "md5_digest": "8d7e75e39aca090edec2b4f17d1adddb", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2011, "upload_time": "2011-02-19T12:31:08", "url": "https://files.pythonhosted.org/packages/32/d1/6c27eb0d61350110e58e7c787983ad2e82e6c1d03ad2664d63e2b43345e5/whoosh-igo-0.1.tar.gz" } ], "0.2": [ { "comment_text": "", "digests": { "md5": "4591237050d6f1902da02084427b939e", "sha256": "979ad3493118f5f537942a7ebde4b8c5a54672830f800f6b059103f08d69c7cc" }, "downloads": -1, "filename": "whoosh-igo-0.2.tar.gz", "has_sig": false, "md5_digest": "4591237050d6f1902da02084427b939e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2467, "upload_time": "2011-02-21T04:56:49", "url": "https://files.pythonhosted.org/packages/a3/03/1970f87a4e463aa23dd8feb2aa5885498fc038e6fbe3e5426179e43ff1f9/whoosh-igo-0.2.tar.gz" } ], "0.4": [ { "comment_text": "", "digests": { "md5": "c0997a1f2dceea3f819c679c1a0ab9d6", "sha256": "e4c924023a7a645ec6d7dd76c4b9b1bc98b15724cc5be58bb46bebc5233677b0" }, "downloads": -1, "filename": "whoosh-igo-0.4.tar.gz", "has_sig": false, "md5_digest": "c0997a1f2dceea3f819c679c1a0ab9d6", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3158, "upload_time": "2011-02-26T16:56:54", "url": "https://files.pythonhosted.org/packages/e1/c0/adf4e67813bfaf6b0c7c48636ed045af3559e0e4d2e2bae76c29a546020d/whoosh-igo-0.4.tar.gz" } ], "0.5": [ { "comment_text": "", "digests": { "md5": "74ea5fc83267dd1b370e5267e4282459", "sha256": "9b414d86347f9e051071e3ab3b70aef00dcd96fc148a1018cd98596e611453c9" }, "downloads": -1, "filename": "whoosh-igo-0.5.tar.gz", "has_sig": false, "md5_digest": "74ea5fc83267dd1b370e5267e4282459", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3279, "upload_time": "2011-04-17T11:46:34", "url": "https://files.pythonhosted.org/packages/05/7d/20ef7ad1eb7dfff73fde642b53ac2fb9eb3c30f9faa414d5133e97f32741/whoosh-igo-0.5.tar.gz" } ], "0.6": [ { "comment_text": "", "digests": { "md5": "d85d269c0fef2884f6bf589f13eaae97", "sha256": "707d59362878e6ee17c85b97ad9b3a2a41b83e79d7121071f33084b6ed4e74ae" }, "downloads": -1, "filename": "whoosh-igo-0.6.tar.gz", "has_sig": false, "md5_digest": "d85d269c0fef2884f6bf589f13eaae97", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3290, "upload_time": "2011-04-22T18:26:37", "url": "https://files.pythonhosted.org/packages/bb/4f/b5e736f11cccab11369b5588c8520ea0040e651636b717cc6aa4d9e68cbc/whoosh-igo-0.6.tar.gz" } ], "0.7": [ { "comment_text": "", "digests": { "md5": "24f942dd1a5e59d72907893ad602f38c", "sha256": "9731410ae86c4980955b77be62d3a06592705a4fc89191136b2841d379597157" }, "downloads": -1, "filename": "whoosh-igo-0.7.tar.gz", "has_sig": false, "md5_digest": "24f942dd1a5e59d72907893ad602f38c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 8004, "upload_time": "2012-07-16T12:10:56", "url": "https://files.pythonhosted.org/packages/0c/d6/5c557c67716de1a6657da7e425cacf6c5abffb73ba8cb20609b21ee086c2/whoosh-igo-0.7.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "24f942dd1a5e59d72907893ad602f38c", "sha256": "9731410ae86c4980955b77be62d3a06592705a4fc89191136b2841d379597157" }, "downloads": -1, "filename": "whoosh-igo-0.7.tar.gz", "has_sig": false, "md5_digest": "24f942dd1a5e59d72907893ad602f38c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 8004, "upload_time": "2012-07-16T12:10:56", "url": "https://files.pythonhosted.org/packages/0c/d6/5c557c67716de1a6657da7e425cacf6c5abffb73ba8cb20609b21ee086c2/whoosh-igo-0.7.tar.gz" } ] }