{ "info": { "author": "F. Javier Alba", "author_email": "me@fjavieralba.com", "bugtrack_url": null, "classifiers": [], "description": "=======\nScraper\n=======\n\nMinimalist Python DOM Scraper\n\nDescription\n===========\n\nThis module is an easy to use HTML/XML scraper. It supports both XPath and Regular \nExpression retrieval.\n\nOnce you have a file you want to extract information from, you can extract\nmultiple pieces of information with a simple function call.\n\nYou should obtain the files you want to scrape by your own ways.\n\n\nInstallation\n============\n\n::\n\n pip install scraper\n\n\nUsage\n=====\n\nScrape using xpath:\n-------------------\n\n::\n\n import scraper\n import requests\n \n content = requests.get('https://github.com/explore').content\n \n conf = {'trending-repos' : {'xpath' : '//ol/li/h3/a[2]/@href'}}\n\n scraper.scrapes(content, conf)\n\n >>> {'trending-repos': ['/jamescryer/grumble.js', '/dominictarr/JSON.sh', '/JamieLottering/DropKick', '/harvesthq/chosen', '/velvia/ScalaStorm']}\n\nScrape using regexp:\n--------------------\n\n::\n\n import scraper\n import requests\n\n content = requests.get('http://wiki.nomasnumeros900.com/Air_Liquide').content\n \n conf = {\n 'numbers': \n {'regexp': '91[\\s\\d]+', \n 'transf': [lambda x: x.strip()], \n 'encoding': 'utf-8'}\n }\n\n scraper.scrapes(content, conf)\n\n >>> {'numbers': [u'915 029 300', u'915 029 560', u'915 029 330', u'91']}", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "http://pypi.python.org/pypi/scraper/", "keywords": null, "license": "LICENSE.txt", "maintainer": null, "maintainer_email": null, "name": "scraper", "package_url": "https://pypi.org/project/scraper/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/scraper/", "project_urls": { "Download": "UNKNOWN", "Homepage": "http://pypi.python.org/pypi/scraper/" }, "release_url": "https://pypi.org/project/scraper/0.1.0/", "requires_dist": null, "requires_python": null, "summary": "Configurable Python Web Scraper", "version": "0.1.0" }, "last_serial": 799328, "releases": { "0.1.0": [ { "comment_text": "", "digests": { "md5": "a118670e8eb77dbe4ab7c00701795e5c", "sha256": "2f24748bad11221408c92f420ac63f9c119a1884a7e790e084ec8047eafc9eaf" }, "downloads": -1, "filename": "scraper-0.1.0.tar.gz", "has_sig": false, "md5_digest": "a118670e8eb77dbe4ab7c00701795e5c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2626, "upload_time": "2012-10-31T11:03:06", "url": "https://files.pythonhosted.org/packages/d9/0b/24cd90d74f123f707ebf1a6d1ac78d76d46467e5cc9a249320004e85ea7d/scraper-0.1.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "a118670e8eb77dbe4ab7c00701795e5c", "sha256": "2f24748bad11221408c92f420ac63f9c119a1884a7e790e084ec8047eafc9eaf" }, "downloads": -1, "filename": "scraper-0.1.0.tar.gz", "has_sig": false, "md5_digest": "a118670e8eb77dbe4ab7c00701795e5c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2626, "upload_time": "2012-10-31T11:03:06", "url": "https://files.pythonhosted.org/packages/d9/0b/24cd90d74f123f707ebf1a6d1ac78d76d46467e5cc9a249320004e85ea7d/scraper-0.1.0.tar.gz" } ] }