{ "info": { "author": "Louis Fogel", "author_email": "fogel@alexkarpinski.com", "bugtrack_url": null, "classifiers": [], "description": "JAWS -- Just Another Web Scraper\n================================\n# Introduction\nJAWS is a system for quickly designing web scrapers. It contains a framework for designing custom resources, parsers and outputs for entirely custom scrapers as well as a few implemenations for common use cases.\n\n# Dependenices\nJAWS is written in Python, for Python2. The dependencies for the latest version are:\n* mechanize==0.2.5\n* requests==2.2.1\n\nJAWS can also be installed with easy_install or pip.\n\n# Components\nThe core components of the JAWS framework can be found in core.py.\n\n## Scraper\nThe Scraper class is a collection of all the core components into one object which can be easily instantiated and used to scrape all data into your specified output.\n\n## Resource\nThe JAWSResource class is the abstract class describing the interface by which pages are provided to the parser for scraping. A resource could be as simple as a file reader or as complex as a full Web crawler.\n\n## Parser\nThe JAWSParser class is the abstract class describing the way your scraper will turn input from the resource into a python dictionary of keys and values to be fed to the output.\n\n## Output\nThe JAWSOutput class is the abstract class describing what to actually do with that data you have scraped. It could describe a file output format (a csv is probably simplest), a database interface, or whatever else you can think of.\n\n# Future Work\n* Automatic Schema Detection\n* JSON parser\n* Examples for README\n* Better documentation in code\n* Python3 compatibility\n\n# License\nAll code and content distributed with JAWS is released under the [GNU GPLv3](http://www.gnu.org/licenses/gpl-3.0.html) unless otherwise specified or prohibited.", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/iccelou91/JAWS", "keywords": null, "license": "LICENSE.txt", "maintainer": null, "maintainer_email": null, "name": "jaws-scraper", "package_url": "https://pypi.org/project/jaws-scraper/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/jaws-scraper/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://github.com/iccelou91/JAWS" }, "release_url": "https://pypi.org/project/jaws-scraper/0.1.0/", "requires_dist": null, "requires_python": null, "summary": "Just Another Web Scraper.", "version": "0.1.0" }, "last_serial": 1029992, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "99eda24ccc889d2dad41aa47c4dd547c", "sha256": "6c3814f838c0586131f0c5748d7ea2b20b324183d47dfc8832ae986d7748db05" }, "downloads": -1, "filename": "jaws-scraper-0.1.0.tar.gz", "has_sig": false, "md5_digest": "99eda24ccc889d2dad41aa47c4dd547c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16289, "upload_time": "2014-03-14T20:44:15", "url": "https://files.pythonhosted.org/packages/e8/12/0642fade06bdd35f6f31de21d95be6970477752b8684e359a70b712e7cc5/jaws-scraper-0.1.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "99eda24ccc889d2dad41aa47c4dd547c", "sha256": "6c3814f838c0586131f0c5748d7ea2b20b324183d47dfc8832ae986d7748db05" }, "downloads": -1, "filename": "jaws-scraper-0.1.0.tar.gz", "has_sig": false, "md5_digest": "99eda24ccc889d2dad41aa47c4dd547c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16289, "upload_time": "2014-03-14T20:44:15", "url": "https://files.pythonhosted.org/packages/e8/12/0642fade06bdd35f6f31de21d95be6970477752b8684e359a70b712e7cc5/jaws-scraper-0.1.0.tar.gz" } ] }