{ "info": { "author": "Behzad Mehrabi", "author_email": "bezMehrabi@gmail.com", "bugtrack_url": null, "classifiers": [], "description": "# tojson\nPython HTML document to JSON\n\n#### convert HTML document to JSON\n```\n>>> from tojson import HTML\n>>> with open('sample.html', 'r') as src:\n... html = HTML(src.read(), text_skip=['html', 'head', 'body'])\n... html.tojson()) # return json format of html\n{\n \"html\": {\n \"head\": {\n \"title\": {\n \"text\": \"test\"\n }\n },\n \"body\": {\n \"bgcolor\": \"FFFFFF\",\n \"img\": {\n \"src\": \"clouds.jpg\",\n \"align\": \"bottom\"\n },\n \"a\": [\n {\n \"href\": \"http://somegreatsite.com\",\n \"text\": \"Link Name\"\n },\n {\n \"href\": \"mailto:support@yourcompany.com\",\n \"text\": \"support@yourcompany.com\"\n }\n ],\n \"h1\": {\n \"text\": \"This is a Header\"\n },\n \"h2\": {\n \"text\": \"This is a Medium Header\"\n },\n \"p\": [\n {\n \"text\": \"first paragraph\"\n },\n {\n \"text\": \"second paragraph!\"\n }\n ],\n \"b\": {\n \"text\": \"This is a new sentence without a paragraph break\",\n \"i\": {\n \"text\": \"This is a new sentence without a paragraph break\"\n }\n }\n }\n }\n}\n```\n#### iterate over tags and their values\n to get tuple contains (tag, value):\n```\n>>> from tojson import HTML\n>>> with open('sample.html', 'r') as src:\n... html = HTML(src.read(), text_skip=['html', 'head', 'body'])\n...\n>>> for item in html:\n... item\n...\n('html', {'head': {'title': {'text': 'test'}}, 'body': {'bgcolor': 'FFFFFF', 'img': {'src': 'clouds.jpg', 'align': 'bottom'}, 'a': [{'href': 'http://somegreatsite.com', 'text': 'Link Name'}, {'href': 'mailto:support@yourcompany.com', 'text': 'support@yourcompany.com'}], 'h1': {'text': 'This is a Header'}, 'h2': {'text': 'This is a Medium Header'}, 'p': [{'text': 'first paragraph'}, {'text': 'second paragraph!'}], 'b': {'text': 'This is a new sentence without a paragraph break', 'i': {'text': 'This is a new sentence without a paragraph break'}}}})\n('head', {'title': {'text': 'test'}})\n('title', {'text': 'test'})\n('body', {'bgcolor': 'FFFFFF', 'img': {'src': 'clouds.jpg', 'align': 'bottom'}, 'a': [{'href': 'http://somegreatsite.com', 'text': 'Link Name'}, {'href': 'mailto:support@yourcompany.com', 'text': 'support@yourcompany.com'}], 'h1': {'text': 'This is a Header'}, 'h2': {'text': 'This is a Medium Header'}, 'p': [{'text': 'first paragraph'}, {'text': 'second paragraph!'}], 'b': {'text': 'This is a new sentence without a paragraph break', 'i': {'text': 'This is a new sentence without a paragraph break'}}})\n('img', {'src': 'clouds.jpg', 'align': 'bottom'})\n('a', [{'href': 'http://somegreatsite.com', 'text': 'Link Name'}, {'href': 'mailto:support@yourcompany.com', 'text': 'support@yourcompany.com'}])\n('h1', {'text': 'This is a Header'})\n('h2', {'text': 'This is a Medium Header'})\n('p', [{'text': 'first paragraph'}, {'text': 'second paragraph!'}])\n('b', {'text': 'This is a new sentence without a paragraph break', 'i': {'text': 'This is a new sentence without a paragraph break'}})\n('i', {'text': 'This is a new sentence without a paragraph break'})\n```\n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/Bezmehrabi/tojson.git", "keywords": "", "license": "LICENSE.txt", "maintainer": "", "maintainer_email": "", "name": "tojson", "package_url": "https://pypi.org/project/tojson/", "platform": "", "project_url": "https://pypi.org/project/tojson/", "project_urls": { "Homepage": "https://github.com/Bezmehrabi/tojson.git" }, "release_url": "https://pypi.org/project/tojson/0.1/", "requires_dist": null, "requires_python": "", "summary": "Python Convert HTML Document to JSON", "version": "0.1" }, "last_serial": 5583264, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "f02ce877485bdc9675258801a5911175", "sha256": "7e0fe52c6fdceab230b2438c997cf0a56b89a5e9a9b529b44f49fb5c4db21fcd" }, "downloads": -1, "filename": "tojson-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "f02ce877485bdc9675258801a5911175", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 3736, "upload_time": "2019-07-25T13:38:44", "url": "https://files.pythonhosted.org/packages/00/2c/8f8941b4280af7cf73c01fbc2869a2190bc7ba5ee95f9a3d3c5f1d56b186/tojson-0.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "0ec8e78723eda70e69c739928dd3e50e", "sha256": "ea938c659529322970bc325f371615e92342ce1635901dd87d1384249b090c71" }, "downloads": -1, "filename": "tojson-0.1.tar.gz", "has_sig": false, "md5_digest": "0ec8e78723eda70e69c739928dd3e50e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2953, "upload_time": "2019-07-25T13:38:47", "url": "https://files.pythonhosted.org/packages/2d/85/5e0bc19a718f2844d6b624629411c4972c292cc7b966f90a65b7c8bcf3ae/tojson-0.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "f02ce877485bdc9675258801a5911175", "sha256": "7e0fe52c6fdceab230b2438c997cf0a56b89a5e9a9b529b44f49fb5c4db21fcd" }, "downloads": -1, "filename": "tojson-0.1-py3-none-any.whl", "has_sig": false, "md5_digest": "f02ce877485bdc9675258801a5911175", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 3736, "upload_time": "2019-07-25T13:38:44", "url": "https://files.pythonhosted.org/packages/00/2c/8f8941b4280af7cf73c01fbc2869a2190bc7ba5ee95f9a3d3c5f1d56b186/tojson-0.1-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "0ec8e78723eda70e69c739928dd3e50e", "sha256": "ea938c659529322970bc325f371615e92342ce1635901dd87d1384249b090c71" }, "downloads": -1, "filename": "tojson-0.1.tar.gz", "has_sig": false, "md5_digest": "0ec8e78723eda70e69c739928dd3e50e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2953, "upload_time": "2019-07-25T13:38:47", "url": "https://files.pythonhosted.org/packages/2d/85/5e0bc19a718f2844d6b624629411c4972c292cc7b966f90a65b7c8bcf3ae/tojson-0.1.tar.gz" } ] }