{ "info": { "author": "Lapis-Hong", "author_email": "dhq1125@163.com", "bugtrack_url": null, "classifiers": [ "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Programming Language :: Python", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Text Processing" ], "description": "# PageFLow\n*PageFlow* is a Python (2 and 3) library for webpage search result crawler. \nIt provides a simple API and support Google, Baidu, Bing search engines.\n[https://pypi.org/project/pageflow/]\n\n## Features\n- support pages argument instead of just the first pate result.\n- support redirect pages information extraction.\n\n\n## Installation\n### 1. using pip\n```shell\npip install pageflow\n```\n### 2. using setup.py\n``` shell\ngit clone https://github.com/Lapis-Hong/PageFlow.git \ncd PageFlow\npip setup.py install\n```\n\n## Usage\n```python\nfrom pageflow import PageFlow\n\nquery = \"python\"\npages = 1 # search results total pages\n\npf = PageFlow(\"baidu\", proxies=None)\n\n\n# Get search page html.\nhtml = pf.get_html(query=query, pages=pages)\n\n\n# The following results are all generator of SearchResult obj.\n# Get search result urls.\nurl = pf.get_url(query=query, pages=pages)\n\n# Get search result titles.\ntitle = pf.get_title(query=query, pages=pages)\n\n# Get search result abstract.\nabstract = pf.get_abstract(query=query, pages=pages)\n\n# Get search result redirect html.\nredirect_html = pf.get_redirect_html(query=query, pages=pages)\n\n# Get search result redirect content.\nredirect_content = pf.get_redirect_content(query=query, pages=pages)\n\n# Get search result title, abstract and url.\nresult = pf.get(query=query, pages=pages)\n\n# Get search result title, abstract, url, redirect html and redirect content.\nresult_all = pf.get_all(query=query, pages=pages)\n```\n\n## References\nhttps://github.com/howie6879/magic_google \nhttps://github.com/meibenjin/GoogleSearchCrawler \nhttps://github.com/chrislinan/cx-extractor-python \n\n\n\n\n\n\n\n\n", "description_content_type": "text/markdown", "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/lapis-hong/PageFlow", "keywords": "pageflow,search result spider,web information extraction", "license": "MIT", "maintainer": "", "maintainer_email": "", "name": "pageflow", "package_url": "https://pypi.org/project/pageflow/", "platform": "", "project_url": "https://pypi.org/project/pageflow/", "project_urls": { "Homepage": "https://github.com/lapis-hong/PageFlow" }, "release_url": "https://pypi.org/project/pageflow/0.1/", "requires_dist": [ "requests (>=2.12)", "scrapy (>=1.6.0)", "cchardet" ], "requires_python": "", "summary": "Simple, powerful and pythonic web page search results crawler.", "version": "0.1" }, "last_serial": 5319098, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "8407972348cf19449e6186844edd1395", "sha256": "4f4740a07f8b4d24f70605d6adf4300332609da8603d1caa1a36e4bdd685dd1c" }, "downloads": -1, "filename": "pageflow-0.1-py2-none-any.whl", "has_sig": false, "md5_digest": "8407972348cf19449e6186844edd1395", "packagetype": "bdist_wheel", "python_version": "py2", "requires_python": null, "size": 12795, "upload_time": "2019-05-26T15:09:46", "url": "https://files.pythonhosted.org/packages/5d/42/609573cf360730c1224375b6afb59356994738dd0d294669bf094a919065/pageflow-0.1-py2-none-any.whl" }, { "comment_text": "", "digests": { "md5": "96beda6eb6d15c7570f75937cd978370", "sha256": "123ccdeebd28889fe1688c4fb4db30176f920167295cf657044305299d1bec5a" }, "downloads": -1, "filename": "pageflow-0.1.tar.gz", "has_sig": false, "md5_digest": "96beda6eb6d15c7570f75937cd978370", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 10002, "upload_time": "2019-05-26T15:09:48", "url": "https://files.pythonhosted.org/packages/af/b8/16726119ea0ffb9659352c493b3d94eaf99a91915c85e201bfd18f3fd1e5/pageflow-0.1.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "8407972348cf19449e6186844edd1395", "sha256": "4f4740a07f8b4d24f70605d6adf4300332609da8603d1caa1a36e4bdd685dd1c" }, "downloads": -1, "filename": "pageflow-0.1-py2-none-any.whl", "has_sig": false, "md5_digest": "8407972348cf19449e6186844edd1395", "packagetype": "bdist_wheel", "python_version": "py2", "requires_python": null, "size": 12795, "upload_time": "2019-05-26T15:09:46", "url": "https://files.pythonhosted.org/packages/5d/42/609573cf360730c1224375b6afb59356994738dd0d294669bf094a919065/pageflow-0.1-py2-none-any.whl" }, { "comment_text": "", "digests": { "md5": "96beda6eb6d15c7570f75937cd978370", "sha256": "123ccdeebd28889fe1688c4fb4db30176f920167295cf657044305299d1bec5a" }, "downloads": -1, "filename": "pageflow-0.1.tar.gz", "has_sig": false, "md5_digest": "96beda6eb6d15c7570f75937cd978370", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 10002, "upload_time": "2019-05-26T15:09:48", "url": "https://files.pythonhosted.org/packages/af/b8/16726119ea0ffb9659352c493b3d94eaf99a91915c85e201bfd18f3fd1e5/pageflow-0.1.tar.gz" } ] }