{ "info": { "author": "rozap", "author_email": "chrisd1891@gmail.com", "bugtrack_url": null, "classifiers": [], "description": "corpcrawl\n=============\n\n# About\nThis is a python scraper for the Securities and Exchange Commission EDGAR database. It looks at the Form 10k\nfilings that publicly held corporations are required to file with the SEC. It then attempts to extract the \nsubsidiary relationships from the 10k exhibit 21.1. \n\nCorpcrawl is a storage agnostic scraper, so you'll need to implement your own storage scheme. \n\n\n# Installation\nYou can get the package from PyPi through Pip. \n \n pip install corpcrawl\n \nFrom a python console you can try\n\n import corpcrawl\n \nIf it works, then you can get started\n\n# How to use\n\n## Running it\nFirst import the required pieces\n \n from corpcrawl.crawler import CorpCrawl\n from corpcrawl.backend import Backend\n \n def main()\n my_backend = MyBackend()\n crawler = CorpCrawl(cache_path = '/an/absolute/path/to/some/dir', backend = my_backend)\n c.crawl(years = [2011, 2012], quarters = [1, 2, 3, 4])\n \n \n class MyBackend(Backend):\n\n def get_company(self, name):\n pass\n\n def add_company(self, comp):\n print \"Adding %s\" % str(comp)\n \n \n \n \n \n \nThis is about as simple of a backend as can be. This code will crawl the filings and print out the name of each\ncompany and subsidiary that it finds for all quarters of years 2011 and 2012.\nObviously you will want to hook it up to a database or something.\n\n# Notes\nThe SEC data is very unstructured. As such, there are a lot of errors. This is version 0.0.1, so instead of\nimplementing your own methods to massage the data that it returns, it would be better if you could contribute\nso make the core parser better.", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/chrisd1891/corpcrawl/", "keywords": null, "license": "MIT", "maintainer": null, "maintainer_email": null, "name": "corpcrawl", "package_url": "https://pypi.org/project/corpcrawl/", "platform": "any", "project_url": "https://pypi.org/project/corpcrawl/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://github.com/chrisd1891/corpcrawl/" }, "release_url": "https://pypi.org/project/corpcrawl/0.0.37/", "requires_dist": null, "requires_python": null, "summary": "Corpcrawl EDGAR Scraper", "version": "0.0.37" }, "last_serial": 788460, "releases": { "0.0.1": [ { "comment_text": "", "digests": { "md5": "605f62587cc02b14a612aff9f7cc0c90", "sha256": "e6c50701d82cce3b95e91ac28d4eac19805d0e4bbdce152f76760113dfa71f85" }, "downloads": -1, "filename": "corpcrawl-0.0.1.tar.gz", "has_sig": false, "md5_digest": "605f62587cc02b14a612aff9f7cc0c90", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3526, "upload_time": "2013-03-12T03:24:24", "url": "https://files.pythonhosted.org/packages/92/1a/9b489fd50effafe66ed78d3e9f8e86ba4819479cc36ece934cb52d72b0af/corpcrawl-0.0.1.tar.gz" } ], "0.0.2": [ { "comment_text": "", "digests": { "md5": "930ecb08429d991c9ee387582206e59e", "sha256": "2a5d2e230f95b2761ff241fdca47350d7f51be0c2e177569992fd8bad4595ae6" }, "downloads": -1, "filename": "corpcrawl-0.0.2.tar.gz", "has_sig": false, "md5_digest": "930ecb08429d991c9ee387582206e59e", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4148, "upload_time": "2013-03-12T04:13:30", "url": "https://files.pythonhosted.org/packages/eb/ff/4eda7e225e148bb831959ddf6b5f6529ffed43383b4d93fb9e89f70d2f9c/corpcrawl-0.0.2.tar.gz" } ], "0.0.3": [ { "comment_text": "", "digests": { "md5": "7c87670f3b8c189e6d585a7d22fb4646", "sha256": "751834a41367fdf99380e0e155eea305e748931a6d3be9167f99188bfb330c40" }, "downloads": -1, "filename": "corpcrawl-0.0.3.tar.gz", "has_sig": false, "md5_digest": "7c87670f3b8c189e6d585a7d22fb4646", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4175, "upload_time": "2013-03-14T04:34:38", "url": "https://files.pythonhosted.org/packages/50/ec/6e44c00fdfbbe8cfdc088864dd26b18dcc474e21bd27200d149f8f1f0a82/corpcrawl-0.0.3.tar.gz" } ], "0.0.30": [ { "comment_text": "", "digests": { "md5": "6e646532f24b4a88ceb11cf68720517d", "sha256": "3e6bbec3c35a2e0d23227e4fabbcdffb8c563372b3030ceba2df0a070f9fe292" }, "downloads": -1, "filename": "corpcrawl-0.0.30.tar.gz", "has_sig": false, "md5_digest": "6e646532f24b4a88ceb11cf68720517d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4333, "upload_time": "2013-03-14T04:44:59", "url": "https://files.pythonhosted.org/packages/af/53/5250f06652e943f1d34a9d1faafd6951a886270e593da2c530dd6b33d309/corpcrawl-0.0.30.tar.gz" } ], "0.0.31": [ { "comment_text": "", "digests": { "md5": "baba1cace7d0222cf8acb9c0816b67a6", "sha256": "38f61e03df8cb1301e811672a3cc0384484676eb749ff2076df2bdfa15aac8b4" }, "downloads": -1, "filename": "corpcrawl-0.0.31.tar.gz", "has_sig": false, "md5_digest": "baba1cace7d0222cf8acb9c0816b67a6", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4331, "upload_time": "2013-03-14T05:01:25", "url": "https://files.pythonhosted.org/packages/34/31/c998fb4b78bb7c5eb52f5acec94dc11fdb5200fb59f2346cfe84418259ec/corpcrawl-0.0.31.tar.gz" } ], "0.0.32": [ { "comment_text": "", "digests": { "md5": "fcb0fb136dda9a463c9b538cc4cf99e6", "sha256": "29cbf54de7c669fdc555e06c14b29c7393d2708f911d94b89adab92158adec5b" }, "downloads": -1, "filename": "corpcrawl-0.0.32.tar.gz", "has_sig": false, "md5_digest": "fcb0fb136dda9a463c9b538cc4cf99e6", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4450, "upload_time": "2013-03-14T05:11:15", "url": "https://files.pythonhosted.org/packages/c7/c4/b9e83c7bf61ce70a94beb0e9b957a5b6be6bde821dd5cdd14497560bc75a/corpcrawl-0.0.32.tar.gz" } ], "0.0.33": [ { "comment_text": "", "digests": { "md5": "5bc8edae406360d7a5a4da3f33cb68a2", "sha256": "2da6f33bc4e3b52c0645ad2b23e8446ecfc5b388141aa1920806ae412eb93144" }, "downloads": -1, "filename": "corpcrawl-0.0.33.tar.gz", "has_sig": false, "md5_digest": "5bc8edae406360d7a5a4da3f33cb68a2", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16148, "upload_time": "2013-03-14T05:16:18", "url": "https://files.pythonhosted.org/packages/a1/c7/20f655b7e588e8186f519f06dcdfe3acbadce230376b0116e09aeebc8289/corpcrawl-0.0.33.tar.gz" } ], "0.0.34": [ { "comment_text": "", "digests": { "md5": "505b4f3b7041e1f6a5f33ac09f9c40e7", "sha256": "1c0bf8b0f8baa248109fc6ad470b6c096ff24f001af4ff544ad49756119e3fd6" }, "downloads": -1, "filename": "corpcrawl-0.0.34.tar.gz", "has_sig": false, "md5_digest": "505b4f3b7041e1f6a5f33ac09f9c40e7", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16077, "upload_time": "2013-03-14T05:32:51", "url": "https://files.pythonhosted.org/packages/4d/ac/137a2f88c138787d5784d49be5ce9a01f68a6d4f6979992653597c6a7087/corpcrawl-0.0.34.tar.gz" } ], "0.0.35": [ { "comment_text": "", "digests": { "md5": "0146a30e69797c396618a737e41c4dcb", "sha256": "71136fda16e836134363162385ba97d8beb86d04cc700bce8b0770b766c28eb7" }, "downloads": -1, "filename": "corpcrawl-0.0.35.tar.gz", "has_sig": false, "md5_digest": "0146a30e69797c396618a737e41c4dcb", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16082, "upload_time": "2013-03-14T05:42:38", "url": "https://files.pythonhosted.org/packages/4f/5a/e61bd273f97a5b6de26c5228011a54b65ef3432b23b019fd41c7e30297a3/corpcrawl-0.0.35.tar.gz" } ], "0.0.36": [ { "comment_text": "", "digests": { "md5": "d3bc16b7b22ea9809eb6b09c4e6df72d", "sha256": "90f16b6d3a68abc588464414fef0933a97b6703320bf3a891cc25abd1ba4bd32" }, "downloads": -1, "filename": "corpcrawl-0.0.36.tar.gz", "has_sig": false, "md5_digest": "d3bc16b7b22ea9809eb6b09c4e6df72d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16176, "upload_time": "2013-03-14T06:13:29", "url": "https://files.pythonhosted.org/packages/ce/71/dd6fecf99bd340fcfdc32d7204fdf702f4fb0d2bc053febbe3bee03019ba/corpcrawl-0.0.36.tar.gz" } ], "0.0.37": [ { "comment_text": "", "digests": { "md5": "7110980ed14a0735f4e984547a14bb7f", "sha256": "c29bca0e249d24286f01dd7630eabe953b4498615a6b9de26c2de563d7803214" }, "downloads": -1, "filename": "corpcrawl-0.0.37.tar.gz", "has_sig": false, "md5_digest": "7110980ed14a0735f4e984547a14bb7f", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16586, "upload_time": "2013-03-14T06:38:02", "url": "https://files.pythonhosted.org/packages/1b/11/2d93c6021fc0bd1bcc54b08fb80b304299057a12e554cfff1d9316274dff/corpcrawl-0.0.37.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "7110980ed14a0735f4e984547a14bb7f", "sha256": "c29bca0e249d24286f01dd7630eabe953b4498615a6b9de26c2de563d7803214" }, "downloads": -1, "filename": "corpcrawl-0.0.37.tar.gz", "has_sig": false, "md5_digest": "7110980ed14a0735f4e984547a14bb7f", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 16586, "upload_time": "2013-03-14T06:38:02", "url": "https://files.pythonhosted.org/packages/1b/11/2d93c6021fc0bd1bcc54b08fb80b304299057a12e554cfff1d9316274dff/corpcrawl-0.0.37.tar.gz" } ] }