{ "info": { "author": "Florian Schulze", "author_email": "florian.schulze@gmx.net", "bugtrack_url": null, "classifiers": [ "Development Status :: 5 - Production/Stable", "Environment :: Web Environment", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing :: Filters", "Topic :: Text Processing :: Markup :: HTML", "Topic :: Utilities" ], "description": "collective.soupstrainer\n=======================\n\n\nQuite often there is a need to clean up HTML from some source, be it user\ninput or data gathered by scraping, which needs to be cleaned up. With the\nSoupStrainer class in collective.soupstrainer this is made easy. It uses\nbeautifulsoup4 to parse and clean up HTML. The constructor of the class takes\nfour arguments.\n\nexclusions\n This is a list of tuples with two items each. The first item is a list of\n tag names, the second item is a list of attributes. If the list of\n attributes is empty, then each tag in the first list is completely\n removed from the passed in HTML. If the list of tags is empty, then each\n attribute listed is completely removed. If there are both tags and\n attributes listed, then the attributes are only removed from matching\n tags.\n\nstyle_whitelist\n This is a white list of CSS styles allowed in 'style' attributes. All\n other styles are removed.\n\nclass_blacklist\n This is a black list for CSS classes. Each matching class is removed from\n 'class' attributes.\n\nparser\n This is the parser used by beautifulsoup4, when the strainer is called with\n a string. It must be an installed parser for beautifulsoup4, defaults to\n ``html.parser``\n\nAn instance of the SoupStrainer class can be called directly with one\nargument. The argument can either be a string, in which case it will\ninternally be parsed by beautifulsoup4 and the result will be unicode (or \nstring in python 3), or it can be a parsed HTML tree created by beautifulsoup4,\nin which case it will be modified in place and be returned again.\n\nChangelog\n=========\n\n2.1 (2019-02-06)\n----------------\n\n- Add support for Python 3 and PyPy.\n\n\n2.0 (2017-10-19)\n----------------\n\nBackwards incompatible changes\n++++++++++++++++++++++++++++++\n\n* Update to beautifulsoup4.\n\n* Add a parameter ``parser`` to ``SoupStrainer`` which specifies the parser\n used by beautifulsoup4.\n\n\n1.0 - 2008-11-14\n----------------\n\n* Initial release", "description_content_type": "", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/collective/collective.soupstrainer", "keywords": "html beautifulsoup clean filter rules", "license": "GPLv2+", "maintainer": "", "maintainer_email": "", "name": "collective.soupstrainer", "package_url": "https://pypi.org/project/collective.soupstrainer/", "platform": "", "project_url": "https://pypi.org/project/collective.soupstrainer/", "project_urls": { "Homepage": "https://github.com/collective/collective.soupstrainer" }, "release_url": "https://pypi.org/project/collective.soupstrainer/2.1/", "requires_dist": null, "requires_python": "", "summary": "Clean up HTML using BeautifulSoup and filter rules.", "version": "2.1" }, "last_serial": 4787502, "releases": { "1.0": [ { "comment_text": "", "digests": { "md5": "f8ac48e7ba6976a3dc61c74f9c37c3f9", "sha256": "7580a7cdb0c86ee18cccea5927e87a11971663d5a8ebcc68fcf28129ee9e2326" }, "downloads": -1, "filename": "collective.soupstrainer-1.0.zip", "has_sig": false, "md5_digest": "f8ac48e7ba6976a3dc61c74f9c37c3f9", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 14775, "upload_time": "2008-11-14T20:16:55", "url": "https://files.pythonhosted.org/packages/80/8d/71424a5039bf8ebcd44bb687f515d62fc093542e93e88d6103701dc2a312/collective.soupstrainer-1.0.zip" } ], "2.0": [ { "comment_text": "", "digests": { "md5": "25afd66949c1821e335690059b665be8", "sha256": "deffdb460a8c19f7fec26d58b1b22a8a85cb915e6fcae3b997eae788c80c867c" }, "downloads": -1, "filename": "collective.soupstrainer-2.0.tar.gz", "has_sig": false, "md5_digest": "25afd66949c1821e335690059b665be8", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 10266, "upload_time": "2017-10-19T09:47:36", "url": "https://files.pythonhosted.org/packages/6b/7c/ad398fbf9151d7244b53907550cecd9af854ee5f78456dee09f731200e93/collective.soupstrainer-2.0.tar.gz" } ], "2.1": [ { "comment_text": "", "digests": { "md5": "440fc3700d2a6cddfe802a0dd05dcdc3", "sha256": "0e9ddb790a2da97ab988d4c625e9d3c2668ced8ed7ab2fb04f989a91339608ce" }, "downloads": -1, "filename": "collective.soupstrainer-2.1.tar.gz", "has_sig": false, "md5_digest": "440fc3700d2a6cddfe802a0dd05dcdc3", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11619, "upload_time": "2019-02-06T16:38:07", "url": "https://files.pythonhosted.org/packages/c2/a3/6d16963113fa3d9a5629cfd159934a4b63e65b7f94f30cd08d860ce41706/collective.soupstrainer-2.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "440fc3700d2a6cddfe802a0dd05dcdc3", "sha256": "0e9ddb790a2da97ab988d4c625e9d3c2668ced8ed7ab2fb04f989a91339608ce" }, "downloads": -1, "filename": "collective.soupstrainer-2.1.tar.gz", "has_sig": false, "md5_digest": "440fc3700d2a6cddfe802a0dd05dcdc3", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11619, "upload_time": "2019-02-06T16:38:07", "url": "https://files.pythonhosted.org/packages/c2/a3/6d16963113fa3d9a5629cfd159934a4b63e65b7f94f30cd08d860ce41706/collective.soupstrainer-2.1.tar.gz" } ] }