{ "info": { "author": "Andrea Capitanelli", "author_email": "andrea.capitanelli@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 3 - Alpha", "License :: OSI Approved :: MIT License", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", "Topic :: Office/Business :: News/Diary" ], "description": "\n# Newsman\n\nA tool for scraping news from web.\n\nThere is no AI here, just good old-fashioned if-else rules.\n\n# Usage\n\nBasic usage:\n\n```\nimport newsman\n\n# start reading\nsrc = 'https://www.bbc.com'\nnews = newsman.read(src)\n\n# show articles\nfor article in news:\n print(f'Url: {article.url.url}')\n print(f'Title: {article.title}')\n print(f'Text: {article.text}')\n print(f'Main image: {article.main_image}')\n```\n\nCustomizing configuration:\n\n```\nimport newsman\n\n# get configuration\nconfig = newsman.get_config()\n\n# add a proxy for connection\nconfig['proxies'] = proxies\n\n# update accepted/rejected domains\nconfig['accepted_domains'] = ['a-good-site.com', 'the-best-one.org']\nconfig['rejected_domains'] += ['bad.com', 'very.bad.biz']\n\n# override default values for web retrieval\nconfig['link_depth'] = 2 # crawling depth level\nconfig['scan_limit'] = 10 # max. number of scanned sites\n\n# set pipes\npipes = ['byte2html', 'html2text', 'text2title', 'html2image']\nnews = newsman.News(config, pipes)\n\nsrc = 'https://www.bbc.com'\npages = news(src)\n```\n\nProxy information is formatted according to [Requests format](https://requests.kennethreitz.org/en/master/user/advanced/#proxies).\n\n# Installation\n\n` pip install newsman `\n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/acapitanelli/newsman", "keywords": "press articles text extraction", "license": "MIT", "maintainer": "Andrea Capitanelli", "maintainer_email": "andrea.capitanelli@gmail.com", "name": "newsman", "package_url": "https://pypi.org/project/newsman/", "platform": "", "project_url": "https://pypi.org/project/newsman/", "project_urls": { "Homepage": "https://github.com/acapitanelli/newsman" }, "release_url": "https://pypi.org/project/newsman/1.1.0/", "requires_dist": [ "chardet", "requests" ], "requires_python": ">=3.6.0", "summary": "A tool for web news scraping.", "version": "1.1.0", "yanked": false, "yanked_reason": null }, "last_serial": 6129232, "releases": { "1.0.0": [ { "comment_text": "", "digests": { "md5": "6241340d0b48635dee4658556daf900a", "sha256": "7f7a754985c777a8481e6cf8aca999b4863f716432fa3e34f59ba6a0cf25299e" }, "downloads": -1, "filename": "newsman-1.0.0-py3-none-any.whl", "has_sig": false, "md5_digest": "6241340d0b48635dee4658556daf900a", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6.0", "size": 20260, "upload_time": "2019-10-30T13:29:07", "upload_time_iso_8601": "2019-10-30T13:29:07.638836Z", "url": "https://files.pythonhosted.org/packages/68/77/da1a17a4a6c8b99ddfb877c17855026a7881716abe8cf636570a1901c4eb/newsman-1.0.0-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "dd509637af983cdf325c8a409b2d8590", "sha256": "3f93058dd9368730949c74d20144cdaa9bfb0604230cf5a33759a89a3f7aa639" }, "downloads": -1, "filename": "newsman-1.0.0.tar.gz", "has_sig": false, "md5_digest": "dd509637af983cdf325c8a409b2d8590", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6.0", "size": 13721, "upload_time": "2019-10-30T13:29:10", "upload_time_iso_8601": "2019-10-30T13:29:10.070923Z", "url": "https://files.pythonhosted.org/packages/10/c2/a2116523bbda2ab309ef4a6a62c68607345e79c74257bea1d565ab01a603/newsman-1.0.0.tar.gz", "yanked": false, "yanked_reason": null } ], "1.1.0": [ { "comment_text": "", "digests": { "md5": "af8aa9e0b4d1049e7e37058c7b671711", "sha256": "cbe1b49fefa255349926b9ee0854905799311309a8ea5227c5c9e98e9e985b0d" }, "downloads": -1, "filename": "newsman-1.1.0-py3-none-any.whl", "has_sig": false, "md5_digest": "af8aa9e0b4d1049e7e37058c7b671711", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6.0", "size": 19719, "upload_time": "2019-11-13T10:51:10", "upload_time_iso_8601": "2019-11-13T10:51:10.477228Z", "url": "https://files.pythonhosted.org/packages/08/a4/35836b3fa9679d6776cea459eb9b7ab9f5a2c10e193f9baf009357b82b42/newsman-1.1.0-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "40e1030f877cb6c7c1f30543d29650b6", "sha256": "8829be60ae71d9045b276c5b88e73472e5f2b205c9968b8be5581ca6097baee4" }, "downloads": -1, "filename": "newsman-1.1.0.tar.gz", "has_sig": false, "md5_digest": "40e1030f877cb6c7c1f30543d29650b6", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6.0", "size": 13078, "upload_time": "2019-11-13T10:51:12", "upload_time_iso_8601": "2019-11-13T10:51:12.195741Z", "url": "https://files.pythonhosted.org/packages/89/ba/f5e57abc5d602528f268d72bd2e74dc8d6e4477beb3711a880ffe36c721b/newsman-1.1.0.tar.gz", "yanked": false, "yanked_reason": null } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "af8aa9e0b4d1049e7e37058c7b671711", "sha256": "cbe1b49fefa255349926b9ee0854905799311309a8ea5227c5c9e98e9e985b0d" }, "downloads": -1, "filename": "newsman-1.1.0-py3-none-any.whl", "has_sig": false, "md5_digest": "af8aa9e0b4d1049e7e37058c7b671711", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": ">=3.6.0", "size": 19719, "upload_time": "2019-11-13T10:51:10", "upload_time_iso_8601": "2019-11-13T10:51:10.477228Z", "url": "https://files.pythonhosted.org/packages/08/a4/35836b3fa9679d6776cea459eb9b7ab9f5a2c10e193f9baf009357b82b42/newsman-1.1.0-py3-none-any.whl", "yanked": false, "yanked_reason": null }, { "comment_text": "", "digests": { "md5": "40e1030f877cb6c7c1f30543d29650b6", "sha256": "8829be60ae71d9045b276c5b88e73472e5f2b205c9968b8be5581ca6097baee4" }, "downloads": -1, "filename": "newsman-1.1.0.tar.gz", "has_sig": false, "md5_digest": "40e1030f877cb6c7c1f30543d29650b6", "packagetype": "sdist", "python_version": "source", "requires_python": ">=3.6.0", "size": 13078, "upload_time": "2019-11-13T10:51:12", "upload_time_iso_8601": "2019-11-13T10:51:12.195741Z", "url": "https://files.pythonhosted.org/packages/89/ba/f5e57abc5d602528f268d72bd2e74dc8d6e4477beb3711a880ffe36c721b/newsman-1.1.0.tar.gz", "yanked": false, "yanked_reason": null } ], "vulnerabilities": [] }