{ "info": { "author": "Siva Avis", "author_email": "forhacku@gmail.com", "bugtrack_url": null, "classifiers": [ "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", "Programming Language :: Python :: 3", "Topic :: Communications", "Topic :: Software Development :: Libraries" ], "description": "gskrawler\n==============\n\ngskcrawler will enter your domain and scan every page of your website, extracting page titles, descriptions, keywords, and links etc..\n----\n\n\nDescription: gskrawler\n ==============\n \n gskcrawler will enter your domain and scan every page of your website, extracting page titles, descriptions, keywords, and links etc..\n\n Requirements\n ============================\n BeautifulSoup4\n requests\n urllib3 1.22\n \n\n Commands\n ============================\n \n ------------\n\n\t\t\t\tgskrawler.head(url)\n\n\t\t \n\t\t------------\n\n\t\t\t\tgskrawler.title(url)\n\n\t\t<body>\n\t\t------------\n\n\t\t\t\tgskrawler.body(url)\n\n\t\tresponse in html format\n\t\t------------\n\n\t\t\t\tgskrawler.html(url)\n\n\t\tlinks in a website\n\t\t------------\n\n\t\t\t\tgskrawler.links(url)\n\n\t\tclass elements\n\t\t------------\n\n\t\t\t\tgskrawler.tagclass(url,tagname,classname)\n\n\t\tid elements\n\t\t------------\n\n\t\t\t\tgskrawler.tagid(url,tagname,idname)\n\n\t\temails in a website\n\t\t------------\n\n\t\t\t\tgskrawler.emails(url)\n\n\t\timages in a website\n\t\t------------\n\n\t\t\t\tgskrawler.images(url)\n\n\n ----\n \n Example Code\n ------------\n \n Open Python Interpreter::\n \n >>> import gskrawler\n >>> gskrawler.emails('https://www.fisglobal.com/')\n >>> gskrawler.images('https://www.fisglobal.com/')\n >>> gskrawler.head('https://www.fisglobal.com/')\n >>> gskrawler.tagclass('https://www.naukri.com/','ul','set')\n", "description_content_type": null, "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/username/", "keywords": "gskrawler scraping website emails data webcrawler", "license": "", "maintainer": "", "maintainer_email": "", "name": "gskrawler", "package_url": "https://pypi.org/project/gskrawler/", "platform": "", "project_url": "https://pypi.org/project/gskrawler/", "project_urls": { "Homepage": "https://github.com/username/" }, "release_url": "https://pypi.org/project/gskrawler/1.0.0/", "requires_dist": null, "requires_python": "", "summary": "gskrawler will enter your domain and scan every page of your website, extracting page titles, descriptions, keywords, and links etc..", "version": "1.0.0" }, "last_serial": 3419552, "releases": { "1.0.0": [ { "comment_text": "", "digests": { "md5": "346f65562428ab34096fd54cc65daa1d", "sha256": "0686faaf69bd5d18b8a643f528e434658c91840204a473878f05d7692da8d8dc" }, "downloads": -1, "filename": "gskrawler-1.0.0.tar.gz", "has_sig": false, "md5_digest": "346f65562428ab34096fd54cc65daa1d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16248, "upload_time": "2017-12-15T10:35:36", "url": "https://files.pythonhosted.org/packages/02/c9/24fe4def0001451535a2390627b5d7e96f7a102d096aa130bd92da1ad713/gskrawler-1.0.0.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "346f65562428ab34096fd54cc65daa1d", "sha256": "0686faaf69bd5d18b8a643f528e434658c91840204a473878f05d7692da8d8dc" }, "downloads": -1, "filename": "gskrawler-1.0.0.tar.gz", "has_sig": false, "md5_digest": "346f65562428ab34096fd54cc65daa1d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16248, "upload_time": "2017-12-15T10:35:36", "url": "https://files.pythonhosted.org/packages/02/c9/24fe4def0001451535a2390627b5d7e96f7a102d096aa130bd92da1ad713/gskrawler-1.0.0.tar.gz" } ] }