{ "info": { "author": "Matias Bordese", "author_email": "mbordese@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4" ], "description": "demiurge\r\n========\r\n\r\nPyQuery-based scraping micro-framework.\r\nSupports Python 2.x and 3.x.\r\n\r\nDocumentation: http://demiurge.readthedocs.org\r\n\r\n\r\nInstalling demiurge\r\n-------------------\r\n\r\n $ pip install demiurge\r\n\r\n\r\nQuick start\r\n-----------\r\n\r\nDefine items to be scraped using a declarative (Django-inspired) syntax:\r\n\r\n >>> import demiurge\r\n >>> class TorrentDetails(demiurge.Item):\r\n ... label = demiurge.TextField(selector='strong')\r\n ... value = demiurge.TextField()\r\n ... def clean_value(self, value):\r\n ... unlabel = value[value.find(':') + 1:]\r\n ... return unlabel.strip()\r\n ... class Meta:\r\n ... selector = 'div#specifications p'\r\n ... \r\n >>> class Torrent(demiurge.Item):\r\n ... url = demiurge.AttributeValueField(\r\n ... selector='td:eq(2) a:eq(1)', attr='href')\r\n ... name = demiurge.TextField(selector='td:eq(2) a:eq(2)')\r\n ... size = demiurge.TextField(selector='td:eq(3)')\r\n ... details = demiurge.RelatedItem(\r\n ... TorrentDetails, selector='td:eq(2) a:eq(2)', attr='href')\r\n ... class Meta:\r\n ... selector = 'table.maintable:gt(0) tr:gt(0)'\r\n ... base_url = 'http://www.mininova.org'\r\n ... \r\n >>> \r\n >>> t = Torrent.one('/search/ubuntu/seeds')\r\n >>> t.name\r\n 'Ubuntu 7.10 Desktop Live CD'\r\n >>> t.size\r\n u'695.81\\xa0MB'\r\n >>> t.url\r\n '/get/1053846'\r\n >>> t.html\r\n u'19\\xa0Dec\\xa007Software...'\r\n >>> results = Torrent.all('/search/ubuntu/seeds')\r\n >>> len(results)\r\n 116\r\n >>> for t in results[:3]:\r\n ... print t.name, t.size\r\n ...\r\n Ubuntu 7.10 Desktop Live CD 695.81 MB\r\n Super Ubuntu 2008.09 - VMware image 871.95 MB\r\n Portable Ubuntu 9.10 for Windows 559.78 MB\r\n ...\r\n >>> t = Torrent.one('/search/ubuntu/seeds')\r\n >>> for detail in t.details:\r\n ... print detail.label, detail.value\r\n ... \r\n Category: Software > GNU/Linux\r\n Total size: 695.81\u00a0megabyte\r\n Added: 2467 days ago by Distribution\r\n Share ratio: 17 seeds, 2 leechers\r\n Last updated: 35 minutes ago\r\n Downloads: 29,085\r\n\r\n\r\nSee documentation for details: http://demiurge.readthedocs.org\r\n\r\n\r\nWhy *demiurge*?\r\n---------------\r\n\r\nPlato, as the speaker Timaeus, refers to the Demiurge frequently in the Socratic\r\ndialogue Timaeus, c. 360 BC. The main character refers to the Demiurge as the\r\nentity who \"fashioned and shaped\" the material world. Timaeus describes the\r\nDemiurge as unreservedly benevolent, and hence desirous of a world as good as\r\npossible. The world remains imperfect, however, because the Demiurge created\r\nthe world out of a chaotic, indeterminate non-being.\r\n\r\nhttp://en.wikipedia.org/wiki/Demiurge", "description_content_type": null, "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/matiasb/demiurge", "keywords": "scraping pyquery framework scraper web data extraction", "license": "BSD", "maintainer": "", "maintainer_email": "", "name": "demiurge", "package_url": "https://pypi.org/project/demiurge/", "platform": "", "project_url": "https://pypi.org/project/demiurge/", "project_urls": { "Homepage": "https://github.com/matiasb/demiurge" }, "release_url": "https://pypi.org/project/demiurge/0.2/", "requires_dist": null, "requires_python": null, "summary": "Scraping micro-framework based on pyquery.", "version": "0.2" }, "last_serial": 1231718, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "1ce9ba2bc20ab37610d7ed7f3fe2c5c0", "sha256": "9d6496e3e02495a16e6cef2d988645e64a3a58896148d9eec85bb94622cbc3c6" }, "downloads": -1, "filename": "demiurge-0.1.tar.gz", "has_sig": false, "md5_digest": "1ce9ba2bc20ab37610d7ed7f3fe2c5c0", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5199, "upload_time": "2014-04-18T20:23:39", "url": "https://files.pythonhosted.org/packages/a5/24/bcaeaf2c8ffea5bea605317afd7387416369b6605a5bf52011f28421afa3/demiurge-0.1.tar.gz" } ], "0.2": [ { "comment_text": "", "digests": { "md5": "2cdbfcd8bf629f2158847c4a4df7351c", "sha256": "be04a5a2b06a44d3ae3c94aa24a92625b2544147bf46227da9114a55936fa760" }, "downloads": -1, "filename": "demiurge-0.2.tar.gz", "has_sig": false, "md5_digest": "2cdbfcd8bf629f2158847c4a4df7351c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5463, "upload_time": "2014-09-20T18:27:19", "url": "https://files.pythonhosted.org/packages/56/8a/495fa2401c757c2bf336fa9d05c3e0c765fb2c9b9d8f1067e8e7506b429d/demiurge-0.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "2cdbfcd8bf629f2158847c4a4df7351c", "sha256": "be04a5a2b06a44d3ae3c94aa24a92625b2544147bf46227da9114a55936fa760" }, "downloads": -1, "filename": "demiurge-0.2.tar.gz", "has_sig": false, "md5_digest": "2cdbfcd8bf629f2158847c4a4df7351c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 5463, "upload_time": "2014-09-20T18:27:19", "url": "https://files.pythonhosted.org/packages/56/8a/495fa2401c757c2bf336fa9d05c3e0c765fb2c9b9d8f1067e8e7506b429d/demiurge-0.2.tar.gz" } ] }