{ "info": { "author": "Michael Hohl", "author_email": "me@michaelhohl.net", "bugtrack_url": null, "classifiers": [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3" ], "description": "# twcrawl - Twitter Network Crawler\n\nStarts at any given Twitter accounts (for example `realDonaldTrump` and \n`elonmusk`) and follows their relationships to download the profiles of all \nTwitter users into a local database for later batch processing (like \nanalyzing the social sentiment).\n\n**Note:** This crawler has been designed to prioritize crawling of accounts with\nthe most followers. Depending on your use case you might need to first tweak \nthe parameters a bit.\n\n\n## Setup \n\nAll required dependencies are defined in the `requirements.txt` file. Run\n`pip install -r requirements.txt` to install all of them if needed. Then copy\nthe `config.example.json` into a `config.json` and fill in your Twitter\nAPI credentials.\n\n\n## Usage\n\nSimply run `src/main.py --users users.txt`. `users.txt` should be\na list of twitter handles to use as entry points of the crawling process, one\nper each line of the text file.\n\nThis will launch an endless running process, which crawls as many users as\npossible (and as fast as allowed by the Twitter API limits). You can pause the \nprocess by simply killing it and continue the crawling process by starting it \nagain by executing `src/main.py` again (no more need for the `-i` parameter).\n\nThe crawled database will be stored into `data/twitter.sqlite3` (or anywhere \nelse if you override the default values in your config file).\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/hohl/twcrawl", "keywords": "", "license": "", "maintainer": "", "maintainer_email": "", "name": "twcrawl", "package_url": "https://pypi.org/project/twcrawl/", "platform": "", "project_url": "https://pypi.org/project/twcrawl/", "project_urls": { "Homepage": "https://github.com/hohl/twcrawl" }, "release_url": "https://pypi.org/project/twcrawl/1.0.1/", "requires_dist": [ "twitter", "tweepy", "sqlalchemy" ], "requires_python": ">=3.6", "summary": "Twitter crawler to download a followers graph and statuses into a local database.", "version": "1.0.1" }, "last_serial": 5944300, "releases": { "1.0.1": [ { "comment_text": "", "digests": { "md5": "f525c68d9a3a9d0f34cc67082c2dce61", "sha256": "2d001b9976339b3734be4fd9dba11abed2dc5554830d242d1d58cefbaf998c52" }, "downloads": -1, "filename": "twcrawl-1.0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "f525c68d9a3a9d0f34cc67082c2dce61", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6", "size": 8933, "upload_time": "2019-10-08T11:18:11", "url": "https://files.pythonhosted.org/packages/b8/3c/ff76b93798764ffd40e7033af9ce2bbb318450ffa83ff78ffa5f92fd3bca/twcrawl-1.0.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "a5cfd3cf327f1521fd11770414b7a953", "sha256": "203b6dcdab8586c891365b76c6b1af9401a533796d52576b159e9426e9d00cba" }, "downloads": -1, "filename": "twcrawl-1.0.1.tar.gz", "has_sig": false, "md5_digest": "a5cfd3cf327f1521fd11770414b7a953", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6", "size": 6716, "upload_time": "2019-10-08T11:18:12", "url": "https://files.pythonhosted.org/packages/16/42/81fdfaf0efa9135d31dbfe5c5fd2c97d6cc9ad4e2167cfcb96b0a36909db/twcrawl-1.0.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "f525c68d9a3a9d0f34cc67082c2dce61", "sha256": "2d001b9976339b3734be4fd9dba11abed2dc5554830d242d1d58cefbaf998c52" }, "downloads": -1, "filename": "twcrawl-1.0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "f525c68d9a3a9d0f34cc67082c2dce61", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6", "size": 8933, "upload_time": "2019-10-08T11:18:11", "url": "https://files.pythonhosted.org/packages/b8/3c/ff76b93798764ffd40e7033af9ce2bbb318450ffa83ff78ffa5f92fd3bca/twcrawl-1.0.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "a5cfd3cf327f1521fd11770414b7a953", "sha256": "203b6dcdab8586c891365b76c6b1af9401a533796d52576b159e9426e9d00cba" }, "downloads": -1, "filename": "twcrawl-1.0.1.tar.gz", "has_sig": false, "md5_digest": "a5cfd3cf327f1521fd11770414b7a953", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6", "size": 6716, "upload_time": "2019-10-08T11:18:12", "url": "https://files.pythonhosted.org/packages/16/42/81fdfaf0efa9135d31dbfe5c5fd2c97d6cc9ad4e2167cfcb96b0a36909db/twcrawl-1.0.1.tar.gz" } ] }