{ "info": { "author": "Pavel Zhukov", "author_email": "gelios@gmail.com", "bugtrack_url": null, "classifiers": [], "description": "Description:\n============\nPbot contains two modules, Bot and Spider\n\nBot is a simple helper, created to save request state (cookies, referrer) between http requests. Also, it provides addional methods for adding cookies. With no dependencies this module is easy to use when you need to simulate browser.\n\nSpider it's pbot, armed by lxml (required). Provides addional methods for easy website crawling, see below.\n\nBot is very easy to use::\n\n from pbot.pbot import Bot\n bot = Bot(proxies={'http': 'localhost:3128'}) # You can provide proxies, during bot creation, or set later as bot.proxies\n bot.add_cookie({'name': 'sample', 'value': 1, 'domain': 'example.com'})\n response = bot.open('http://example.com') # Open with cookies and empty referrer\n bot.follow('http://google.com') # Open google with example.com as a referrer\n response = bot.response # Response saved, and can be read later\n bot.follow('http://example.com', post={'q': 'abc'}) # You can provide post and get as keyword arguments\n bot.refresh_connector() # Flush cookies and referrer\n\n\n\nSpider gives you special features::\n\n from pbot.spider import Spider\n bot = Spider() # or Spider(force_encoding='utf-8') to force encoding for parser\n bot.open('http://example.com')\n bot.tree.xpath('//a') # lxml tree can be accessed by .tree, response will be automatically readed and parsed by lxml.html\n form = bot.xpath('//form[@id=\"main\"]') # xpath shortcut for bot.tree.xpath\n bot.submit(form) # Submit lxml f\u00a7orm\n #\n # Crawler, recursively crawl from target page yielding xml_tree, query_url, real_url (real_url - url after all redirects).\n bot.crawl(self,\n url=None, # Target url to start crawling\n check_base=True, # Yield pages only on domain from url\n only_descendant=True, # Yield only pages that urls starts with url\n max_level=None, #Maximum level\n allowed_protocols=('http:', 'https:'),\n ignore_errors=True,\n ignore_starts=(), # Tuple/array, ignore urls that starts with ignore_starts (exclude some parts of site)\n check_mime=())", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "http://bitbucket.org/zeus/pbot", "keywords": "crawling,bot", "license": "GPL", "maintainer": null, "maintainer_email": null, "name": "pbot", "package_url": "https://pypi.org/project/pbot/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/pbot/", "project_urls": { "Download": "UNKNOWN", "Homepage": "http://bitbucket.org/zeus/pbot" }, "release_url": "https://pypi.org/project/pbot/1.4.0/", "requires_dist": null, "requires_python": null, "summary": "An simple site crawler with proxy support", "version": "1.4.0" }, "last_serial": 796087, "releases": { "1.0.0": [ { "comment_text": "", "digests": { "md5": "b70f3ef0797b743f45348ec0a7977d20", "sha256": "fb49ad70b26b8a74ac3b11180abcf453c388bad5b5d56a35a9f967813b7109a2" }, "downloads": -1, "filename": "pbot-1.0.0.tar.gz", "has_sig": false, "md5_digest": "b70f3ef0797b743f45348ec0a7977d20", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2156, "upload_time": "2010-11-13T15:23:19", "url": "https://files.pythonhosted.org/packages/d0/a6/87e045ac3b1fa69965c5ab8ff14b1de1642b6b780a786ec8707c87e4adc9/pbot-1.0.0.tar.gz" } ], "1.1.0": [ { "comment_text": "", "digests": { "md5": "844449489e160af63306f0de3849e33f", "sha256": "1fb9aaa217efa41fd9504da932c57f3bed4e9edc5e1ac20078cb5850f4132877" }, "downloads": -1, "filename": "pbot-1.1.0.tar.gz", "has_sig": false, "md5_digest": "844449489e160af63306f0de3849e33f", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 2168, "upload_time": "2010-11-13T22:30:41", "url": "https://files.pythonhosted.org/packages/62/4a/a410c5fd0a751b06493a76740adb39242fe04546b4eb1e24081a307dca01/pbot-1.1.0.tar.gz" } ], "1.2.0": [ { "comment_text": "", "digests": { "md5": "ea6d19b4757ccfa9cc17f7ead93142d7", "sha256": "8d114c76786fe809b0abc399b41961900a8f91b0aba3ceb4d7c6a8a49bdf3301" }, "downloads": -1, "filename": "pbot-1.2.0.tar.gz", "has_sig": false, "md5_digest": "ea6d19b4757ccfa9cc17f7ead93142d7", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3970, "upload_time": "2010-12-17T10:49:55", "url": "https://files.pythonhosted.org/packages/47/af/dc5139a10ade9de178f7467ff9ce8917da065342649bef0f18c40eed98a8/pbot-1.2.0.tar.gz" } ], "1.3.0": [ { "comment_text": "", "digests": { "md5": "7772ab7f8d5cbcf1099b32b41be0ca05", "sha256": "0a09366cf36e3d0615e94da1357ffae211279052c87aa777fce5f8c2b2346859" }, "downloads": -1, "filename": "pbot-1.3.0.tar.gz", "has_sig": false, "md5_digest": "7772ab7f8d5cbcf1099b32b41be0ca05", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4850, "upload_time": "2010-12-20T15:14:50", "url": "https://files.pythonhosted.org/packages/d7/d9/45ce65854ff21cf9d457a1989a64fa08bc7cc07a961a9f2e99be46c7c3fd/pbot-1.3.0.tar.gz" } ], "1.4.0": [ { "comment_text": "", "digests": { "md5": "adfa46604311542b76750811748b1353", "sha256": "84c182f459a81a4e0967c4e9034a5fda694d875daa98c063ac6443e72e838daf" }, "downloads": -1, "filename": "pbot-1.4.0.tar.gz", "has_sig": false, "md5_digest": "adfa46604311542b76750811748b1353", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4980, "upload_time": "2011-02-22T10:18:22", "url": "https://files.pythonhosted.org/packages/0e/71/38e17675ac35cc4e83b94e623517fe29530a97bc3087d1f754fc9ecd86ba/pbot-1.4.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "adfa46604311542b76750811748b1353", "sha256": "84c182f459a81a4e0967c4e9034a5fda694d875daa98c063ac6443e72e838daf" }, "downloads": -1, "filename": "pbot-1.4.0.tar.gz", "has_sig": false, "md5_digest": "adfa46604311542b76750811748b1353", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4980, "upload_time": "2011-02-22T10:18:22", "url": "https://files.pythonhosted.org/packages/0e/71/38e17675ac35cc4e83b94e623517fe29530a97bc3087d1f754fc9ecd86ba/pbot-1.4.0.tar.gz" } ] }