{ "info": { "author": "Jack Diederich", "author_email": "jackdied@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: System Administrators", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 2.7", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules" ], "description": "boilerpot\n=========\n\nTemplating content from HTML. A Python do-alike to boilerpipe.\n\nboilerpipe (http://code.google.com/p/boilerpipe/) is a Java program that looks at HTML tags and tries to deduce where the actual content is sans navigation, headers & footers, etc.\n\nThis is a rough rewrite of that written in Python and should be considered super duper alpha. I did it during a 2-day company (Curata.com) hackathon. I haven't even run a comparison of its output against its step-father let alone done any corpus comparisons against commoncrawl.org. Consider yourself warned.\n\nThe only advantages over boilerpipe is that it is easier to interface with Python and the code is much more accessible: 500 lines of Python in one module versus 9000 lines of Java scattered accross a bazillion files and directories (I hate me some directories).", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "http://github.com/jackdied/boilerpot", "keywords": "html,scraping,templating", "license": "Apache License", "maintainer": null, "maintainer_email": null, "name": "boilerpot", "package_url": "https://pypi.org/project/boilerpot/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/boilerpot/", "project_urls": { "Download": "UNKNOWN", "Homepage": "http://github.com/jackdied/boilerpot" }, "release_url": "https://pypi.org/project/boilerpot/0.92/", "requires_dist": null, "requires_python": null, "summary": "HTML content extraction", "version": "0.92" }, "last_serial": 786955, "releases": { "0.91": [ { "comment_text": "", "digests": { "md5": "cb4d87b8d3ff6a2aaa5f261d6d02d487", "sha256": "257cce4b2215d18a2cc331fcfeaf0a80f909077f6926de72fbefc859713b7462" }, "downloads": -1, "filename": "boilerpot-0.91.tar.gz", "has_sig": false, "md5_digest": "cb4d87b8d3ff6a2aaa5f261d6d02d487", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 9094, "upload_time": "2013-05-08T18:18:51", "url": "https://files.pythonhosted.org/packages/ee/ce/b2e1779e9e9f515af5e4a3d8f1cf9897134677ed0e107f35ecb3f0f4c63c/boilerpot-0.91.tar.gz" } ], "0.92": [ { "comment_text": "", "digests": { "md5": "2852b1de154f093d964235ec52917452", "sha256": "1a495f3f428c28898261c704bb777ea7df75abdf86789356c79c9be252e25119" }, "downloads": -1, "filename": "boilerpot-0.92.tar.gz", "has_sig": false, "md5_digest": "2852b1de154f093d964235ec52917452", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 9378, "upload_time": "2013-05-08T18:36:03", "url": "https://files.pythonhosted.org/packages/b9/90/27e7b4bf2d47ca1689746fd120cc5ffa87826c4af2a29ca0a98f249dc461/boilerpot-0.92.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "2852b1de154f093d964235ec52917452", "sha256": "1a495f3f428c28898261c704bb777ea7df75abdf86789356c79c9be252e25119" }, "downloads": -1, "filename": "boilerpot-0.92.tar.gz", "has_sig": false, "md5_digest": "2852b1de154f093d964235ec52917452", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 9378, "upload_time": "2013-05-08T18:36:03", "url": "https://files.pythonhosted.org/packages/b9/90/27e7b4bf2d47ca1689746fd120cc5ffa87826c4af2a29ca0a98f249dc461/boilerpot-0.92.tar.gz" } ] }