{ "info": { "author": "Matias Bordese", "author_email": "mbordese@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4" ], "description": "demiurge\r\n========\r\n\r\nPyQuery-based scraping micro-framework.\r\nSupports Python 2.x and 3.x.\r\n\r\nDocumentation: http://demiurge.readthedocs.org\r\n\r\n\r\nInstalling demiurge\r\n-------------------\r\n\r\n $ pip install demiurge\r\n\r\n\r\nQuick start\r\n-----------\r\n\r\nDefine items to be scraped using a declarative (Django-inspired) syntax:\r\n\r\n >>> import demiurge\r\n >>> class TorrentDetails(demiurge.Item):\r\n ... label = demiurge.TextField(selector='strong')\r\n ... value = demiurge.TextField()\r\n ... def clean_value(self, value):\r\n ... unlabel = value[value.find(':') + 1:]\r\n ... return unlabel.strip()\r\n ... class Meta:\r\n ... selector = 'div#specifications p'\r\n ... \r\n >>> class Torrent(demiurge.Item):\r\n ... url = demiurge.AttributeValueField(\r\n ... selector='td:eq(2) a:eq(1)', attr='href')\r\n ... name = demiurge.TextField(selector='td:eq(2) a:eq(2)')\r\n ... size = demiurge.TextField(selector='td:eq(3)')\r\n ... details = demiurge.RelatedItem(\r\n ... TorrentDetails, selector='td:eq(2) a:eq(2)', attr='href')\r\n ... class Meta:\r\n ... selector = 'table.maintable:gt(0) tr:gt(0)'\r\n ... base_url = 'http://www.mininova.org'\r\n ... \r\n >>> \r\n >>> t = Torrent.one('/search/ubuntu/seeds')\r\n >>> t.name\r\n 'Ubuntu 7.10 Desktop Live CD'\r\n >>> t.size\r\n u'695.81\\xa0MB'\r\n >>> t.url\r\n '/get/1053846'\r\n >>> t.html\r\n u'