{ "info": { "author": "Alexandre Kabbach", "author_email": "akb@3azouz.net", "bugtrack_url": null, "classifiers": [ "Development Status :: 2 - Pre-Alpha", "Environment :: Web Environment", "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Linguistic" ], "description": "# Word2Vec\n\n[![GitHub release][release-image]][release-url]\n[![PyPI release][pypi-image]][pypi-url]\n[![Build][travis-image]][travis-url]\n[![MIT License][license-image]][license-url]\n\nThis is a re-implementation of Word2Vec relying on Tensorflow\n[Estimators](https://www.tensorflow.org/guide/estimators) and\n[Datasets](https://www.tensorflow.org/guide/datasets_for_estimators)\n\n## Install\nAfter a git clone:\n```shell\npython3 setup.py install\n```\n\n## Get data\nYou can download a sample of the English Wikipedia here:\n```shell\nwget http://129.194.21.122/~kabbach/enwiki.20190120.sample10.0.balanced.txt.7z\n```\n\n## Train Word2Vec\n```shell\nw2v train \\\n --data /absolute/path/to/enwiki.20190120.sample10.0.balanced.txt \\\n --outputdir /absolute/path/to/word2vec/models \\\n --alpha 0.025 \\\n --neg 5 \\\n --window 2 \\\n --epochs 5 \\\n --size 300 \\\n --min-count 50 \\\n --sample 1e-5 \\\n --train-mode skipgram \\\n --t-num-threads 20 \\\n --p-num-threads 25 \\\n --keep-checkpoint-max 3 \\\n --batch 1 \\\n --shuffling-buffer-size 10000 \\\n --save-summary-steps 10000 \\\n --save-checkpoints-steps 100000 \\\n --log-step-count-steps 10000\n```\n\n[release-image]:https://img.shields.io/github/release/akb89/word2vec.svg?style=flat-square\n[release-url]:https://github.com/akb89/word2vec/releases/latest\n[pypi-image]:https://img.shields.io/pypi/v/tf-word2vec.svg?style=flat-square\n[pypi-url]:https://pypi.org/project/tf-word2vec/\n[travis-image]:https://img.shields.io/travis/akb89/word2vec.svg?style=flat-square\n[travis-url]:https://travis-ci.org/akb89/word2vec\n[license-image]:http://img.shields.io/badge/license-MIT-000000.svg?style=flat-square\n[license-url]:LICENSE.txt", "description_content_type": "text/markdown", "docs_url": null, "download_url": "https://github.com/akb89/word2vec/archive/0.1.6.tar.gz", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/akb89/word2vec", "keywords": "word2vec,word embeddings,tensorflow,estimators,datasets", "license": "MIT", "maintainer": "", "maintainer_email": "", "name": "tf-word2vec", "package_url": "https://pypi.org/project/tf-word2vec/", "platform": "any", "project_url": "https://pypi.org/project/tf-word2vec/", "project_urls": { "Download": "https://github.com/akb89/word2vec/archive/0.1.6.tar.gz", "Homepage": "https://github.com/akb89/word2vec" }, "release_url": "https://pypi.org/project/tf-word2vec/0.1.6/", "requires_dist": null, "requires_python": "", "summary": "Word2Vec implentation with Tensorflow Estimators and Datasets", "version": "0.1.6" }, "last_serial": 5222595, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "2d3aca86db5cd7ebc85402176ad750a4", "sha256": "603f0bece67817ad310ef0d5b345a191b1ad8bb07a79afaae31939218c6ab64c" }, "downloads": -1, "filename": "tf-word2vec-0.1.0.tar.gz", "has_sig": false, "md5_digest": "2d3aca86db5cd7ebc85402176ad750a4", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 31154, "upload_time": "2019-04-17T19:51:53", "url": "https://files.pythonhosted.org/packages/14/e1/b154e3283575ca0df2326196d62c81c7f919d4cae661de9bd883257379cc/tf-word2vec-0.1.0.tar.gz" } ], "0.1.2": [ { "comment_text": "", "digests": { "md5": "1b45a73b84ca198d3407e93ccf466414", "sha256": "f157924b96f3eb995486a75f04b7ec3faadfd6130fd6a13634074a8be2363159" }, "downloads": -1, "filename": "tf-word2vec-0.1.2.tar.gz", "has_sig": false, "md5_digest": "1b45a73b84ca198d3407e93ccf466414", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 31251, "upload_time": "2019-04-17T20:02:16", "url": "https://files.pythonhosted.org/packages/95/86/4207613ba20411f7e9e2b7b57864a3cf334173ce593abb7e3f689f8e38bb/tf-word2vec-0.1.2.tar.gz" } ], "0.1.5": [ { "comment_text": "", "digests": { "md5": "95bfa9c351ce96eaf5efac445b551e03", "sha256": "5c81565603ddf9484445ade86ff426d3b2e211a6e89c72f7a66f275fdc1ae621" }, "downloads": -1, "filename": "tf-word2vec-0.1.5.tar.gz", "has_sig": false, "md5_digest": "95bfa9c351ce96eaf5efac445b551e03", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 31193, "upload_time": "2019-04-17T20:15:37", "url": "https://files.pythonhosted.org/packages/a1/94/9b39c3bc1a99a1f56dfd1860087019c1ccba21c8d89b6c7f5516cfcd1dfc/tf-word2vec-0.1.5.tar.gz" } ], "0.1.6": [ { "comment_text": "", "digests": { "md5": "d3d05acbd03eea377b6f568a7c25b808", "sha256": "d647c9d711d060262959edbfd3c700559fee25357739d242de49d740adbd972d" }, "downloads": -1, "filename": "tf-word2vec-0.1.6.tar.gz", "has_sig": false, "md5_digest": "d3d05acbd03eea377b6f568a7c25b808", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 31196, "upload_time": "2019-05-03T15:53:26", "url": "https://files.pythonhosted.org/packages/16/ef/55df1c947c12b3b181973e9e62db78db439483aa9383bfeff01779e6a461/tf-word2vec-0.1.6.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "d3d05acbd03eea377b6f568a7c25b808", "sha256": "d647c9d711d060262959edbfd3c700559fee25357739d242de49d740adbd972d" }, "downloads": -1, "filename": "tf-word2vec-0.1.6.tar.gz", "has_sig": false, "md5_digest": "d3d05acbd03eea377b6f568a7c25b808", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 31196, "upload_time": "2019-05-03T15:53:26", "url": "https://files.pythonhosted.org/packages/16/ef/55df1c947c12b3b181973e9e62db78db439483aa9383bfeff01779e6a461/tf-word2vec-0.1.6.tar.gz" } ] }