{ "info": { "author": "Sanhe Hu", "author_email": "husanhe@gmail.com", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Natural Language :: English", "Operating System :: MacOS", "Operating System :: Microsoft :: Windows", "Operating System :: Unix", "Programming Language :: Python", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", "Programming Language :: Python :: 3.5" ], "description": ".. image:: https://travis-ci.org/MacHu-GWU/crawl_trulia-project.svg?branch=master\r\n\r\n.. image:: https://img.shields.io/pypi/v/crawl_trulia.svg\r\n\r\n.. image:: https://img.shields.io/pypi/l/crawl_trulia.svg\r\n\r\n.. image:: https://img.shields.io/pypi/pyversions/crawl_trulia.svg\r\n\r\n\r\nWelcome to crawl_trulia Documentation\r\n===============================================================================\r\nThis is a small project provide url route, html parse tools to crawl www.trulia.com.\r\n\r\n\r\n**Quick Links**\r\n-------------------------------------------------------------------------------\r\n- `GitHub Homepage `_\r\n- `PyPI download `_\r\n- `Install `_\r\n- `Issue submit and feature request `_\r\n\r\n\r\n**Usage**\r\n-------------------------------------------------------------------------------\r\nA real example:\r\n\r\n.. code-block:: python\r\n\r\n >>> from crawl_trulia.urlencoder import urlencoder\r\n >>> from crawl_trulia.htmlparser import htmlparser\r\n >>> from crawlib.spider import spider # install crawlib first\r\n\r\n # use address, city and zipcode\r\n >>> address = \"22 Yew Rd\"\r\n >>> city = \"Baltimore\"\r\n >>> zipcode = \"21221\"\r\n\r\n >>> url = urlencoder.by_address_city_and_zipcode(address, city, zipcode)\r\n >>> html = spider.get_html(url)\r\n >>> house_detail_data = htmlparser.get_house_detail(html)\r\n >>> house_detail_data\r\n {\r\n \"features\": {}, \r\n \"public_records\": {\r\n \"AC\": \"a/c\", \r\n \"basement_type\": \"improved basement (finished)\", \r\n \"bathroom\": 2, \r\n \"build_year\": 1986, \r\n \"county\": \"baltimore county\", \r\n \"exterior_walls\": \"siding (alum/vinyl)\", \r\n \"heating\": \"heat pump\", \r\n \"lot_size\": 7505, \r\n \"lot_size_unit\": \"sqft\", \r\n \"partial_bathroom\": 1, \r\n \"roof\": \"composition shingle\", \r\n \"sqft\": 998\r\n }\r\n }\r\n\r\n # usually combination of address and zipcode is enough\r\n >>> address = \"2004 Birch Rd\"\r\n >>> zipcode = \"21221\"\r\n\r\n >>> url = urlencoder.by_address_and_zipcode(address, zipcode)\r\n >>> html = spider.get_html(url)\r\n >>> house_detail_data = htmlparser.get_house_detail(html)\r\n\r\n \r\n.. _install:\r\n\r\nInstall\r\n-------------------------------------------------------------------------------\r\n\r\n``crawl_trulia`` is released on PyPI, so all you need is:\r\n\r\n.. code-block:: console\r\n\r\n $ pip install crawl_trulia\r\n\r\nTo upgrade to latest version:\r\n\r\n.. code-block:: console\r\n\r\n $ pip install --upgrade crawl_trulia", "description_content_type": null, "docs_url": null, "download_url": "https://github.com/MacHu-GWU/crawl_trulia-project/tarball/2017-02-07", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/MacHu-GWU/crawl_trulia-project", "keywords": null, "license": "MIT", "maintainer": null, "maintainer_email": null, "name": "crawl_trulia", "package_url": "https://pypi.org/project/crawl_trulia/", "platform": "Windows,MacOS,Unix", "project_url": "https://pypi.org/project/crawl_trulia/", "project_urls": { "Download": "https://github.com/MacHu-GWU/crawl_trulia-project/tarball/2017-02-07", "Homepage": "https://github.com/MacHu-GWU/crawl_trulia-project" }, "release_url": "https://pypi.org/project/crawl_trulia/0.0.4/", "requires_dist": null, "requires_python": null, "summary": "Trulia Crawler Tool Set", "version": "0.0.4" }, "last_serial": 2626276, "releases": { "0.0.1": [ { "comment_text": "", "digests": { "md5": "c2f1e0f0098317d1637cd23f78b2ca54", "sha256": "06c3933d587018fcb2c4d933dccf3082da7e36c19c904ac00e74d69c3a189a8a" }, "downloads": -1, "filename": "crawl_trulia-0.0.1.zip", "has_sig": false, "md5_digest": "c2f1e0f0098317d1637cd23f78b2ca54", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11989, "upload_time": "2017-01-24T00:28:40", "url": "https://files.pythonhosted.org/packages/73/90/a0b59c27117a4930bd33d7ed25cf7968ad6f2a7faf19558b4df5cbb0c2e4/crawl_trulia-0.0.1.zip" } ], "0.0.2": [ { "comment_text": "", "digests": { "md5": "3131b1a59112c889dd99ef7c914d4285", "sha256": "724f2c30d2514e88720661d0c368844c174cb3902fd7bb03003963645e108086" }, "downloads": -1, "filename": "crawl_trulia-0.0.2.zip", "has_sig": false, "md5_digest": "3131b1a59112c889dd99ef7c914d4285", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 12107, "upload_time": "2017-01-27T19:18:54", "url": "https://files.pythonhosted.org/packages/94/de/96930796aab7cc4f8c7ee39fb6bcec1302f0fae4166d668da0b1817c671f/crawl_trulia-0.0.2.zip" } ], "0.0.3": [ { "comment_text": "", "digests": { "md5": "7b7c4fb4fd2608743cf96cf327a86408", "sha256": "937d3d56ad37340279eda58a362a65add0e6ccc6312de2761dff83cac5083c4d" }, "downloads": -1, "filename": "crawl_trulia-0.0.3.zip", "has_sig": false, "md5_digest": "7b7c4fb4fd2608743cf96cf327a86408", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 12592, "upload_time": "2017-02-01T23:07:34", "url": "https://files.pythonhosted.org/packages/ff/1d/db4eb3c18866f2d002f144d631c005185801d6d9664e4991f6e719faddd5/crawl_trulia-0.0.3.zip" } ], "0.0.4": [ { "comment_text": "", "digests": { "md5": "d9ef2488f8c372d8d590bfb604130c28", "sha256": "eb11b1c974b52fcc6330e543338c6051f534d61e3c891dc4841646696b3f9124" }, "downloads": -1, "filename": "crawl_trulia-0.0.4.zip", "has_sig": false, "md5_digest": "d9ef2488f8c372d8d590bfb604130c28", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 22507, "upload_time": "2017-02-07T20:27:06", "url": "https://files.pythonhosted.org/packages/d0/09/234199a82d99a59c3eea0ba5d644bd7279ca65d5fd144ac3ab47f5ef1ceb/crawl_trulia-0.0.4.zip" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "d9ef2488f8c372d8d590bfb604130c28", "sha256": "eb11b1c974b52fcc6330e543338c6051f534d61e3c891dc4841646696b3f9124" }, "downloads": -1, "filename": "crawl_trulia-0.0.4.zip", "has_sig": false, "md5_digest": "d9ef2488f8c372d8d590bfb604130c28", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 22507, "upload_time": "2017-02-07T20:27:06", "url": "https://files.pythonhosted.org/packages/d0/09/234199a82d99a59c3eea0ba5d644bd7279ca65d5fd144ac3ab47f5ef1ceb/crawl_trulia-0.0.4.zip" } ] }