{ "info": { "author": "alex shu", "author_email": "daxia4444@qq.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Environment :: Web Environment", "Intended Audience :: Developers", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", "Programming Language :: Python :: 2.7" ], "description": "djangospider is light web crawling framework, it have a few code, but\ncan do high speed crawling, it support three modes to crawl: multithreading,\ntornado IOloop, and twisted rector.you can easily to understand to how to use\nasync crawler.\n\nRequirement:\n\n\tPython2.7\n\tWorks on Linux\n\n\n\nInstall:\n\tyou can download the zip package in github. then unpack the zip package,\n\tfind the path of setup.py, Execute the command: \n\t$sudo python setup.py install\n\n\n\nThe entry function: Start(start_urls,mode)\n\n\tstart_urls parameter: is a list, and it's element is tuple:\n\n\t\tthe first of the tuple is url which you will crawl,\n\t\tthe second of the tuple is the callback for url.\n\n\tthe mode parameter: the crawler's way, it has three types:\n\n\t\tif mode is int 1 : multithreading ways\n\t\tif mode is int 2 : tornado async ways\n\t\tif mode is int 3 : twisted async ways\n\n\nFor example:\n\n\tfrom djangospider.mycrawl.run import Start ,_crawl\n\n\tdef callback(response,url):\n\t\tprint \"get the %s\" %url\n\n\tstart_urls=[('http://github.com/',callback),]", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "UNKNOWN", "keywords": null, "license": "UNKNOWN", "maintainer": null, "maintainer_email": null, "name": "djangospider", "package_url": "https://pypi.org/project/djangospider/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/djangospider/", "project_urls": { "Download": "UNKNOWN", "Homepage": "UNKNOWN" }, "release_url": "https://pypi.org/project/djangospider/0.11/", "requires_dist": null, "requires_python": null, "summary": "three ways for spider by python", "version": "0.11" }, "last_serial": 1980708, "releases": { "0.1": [], "0.11": [ { "comment_text": "", "digests": { "md5": "215efa9bbf67ca505482cb401088da3d", "sha256": "da7f0144d3a0609e28f5d75ec561726a69c4cbc2b575a0d07df383a5638fdaae" }, "downloads": -1, "filename": "djangospider-0.11.tar.gz", "has_sig": false, "md5_digest": "215efa9bbf67ca505482cb401088da3d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 61441, "upload_time": "2016-02-28T16:04:02", "url": "https://files.pythonhosted.org/packages/40/65/d5e6f6295c2fbe8e66ec35d11f38286753e45570f0334a833a981fa54ec1/djangospider-0.11.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "215efa9bbf67ca505482cb401088da3d", "sha256": "da7f0144d3a0609e28f5d75ec561726a69c4cbc2b575a0d07df383a5638fdaae" }, "downloads": -1, "filename": "djangospider-0.11.tar.gz", "has_sig": false, "md5_digest": "215efa9bbf67ca505482cb401088da3d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 61441, "upload_time": "2016-02-28T16:04:02", "url": "https://files.pythonhosted.org/packages/40/65/d5e6f6295c2fbe8e66ec35d11f38286753e45570f0334a833a981fa54ec1/djangospider-0.11.tar.gz" } ] }