{ "info": { "author": "", "author_email": "", "bugtrack_url": null, "classifiers": [], "description": "schul-cloud-url-crawler\n=======================\n\n.. image:: https://travis-ci.org/schul-cloud/url-crawler.svg?branch=master\n :target: https://travis-ci.org/schul-cloud/url-crawler\n :alt: Build Status\n\n.. image:: https://badge.fury.io/py/schul-cloud-url-crawler.svg\n :target: https://pypi.python.org/pypi/schul-cloud-url-crawler\n :alt: Python Package Index\n\nThis crawler fetches ressources from urls and posts them to a server.\n\nPurpose\n-------\n\nThe purpose of this crawler is:\n\n- We can provide test data to the API.\n- It can crawl ressources which are not active and cannot post.\n- Other crawl services can use this crawler to upload their conversions.\n- It has the full crawler logic but does not transform into other formats.\n\n - Maybe we can create recommendations or a library for crawlers from this case.\n\nRequirements\n------------\n\nThe crawler should work as follows:\n\n- Provide urls\n\n - as command line arguments\n - as a link to a file with one url per line\n\n- Provide ressources_\n\n - as one ressource in a file\n - as a list of ressources\n\nThe crawler must be invoked to crawl.\n\nExample\n~~~~~~~\n\nThis example gets a ressource from the url and post it to the api.\n\n.. code:: shell\n\n python3 -m ressource_url_crawler http://localhost:8080 \\\n https://raw.githubusercontent.com/schul-cloud/ressources-api-v1/master/schemas/ressource/examples/valid/example-website.json\n\nAuthentication\n~~~~~~~~~~~~~~\n\nYou can specify the authentication_ like this:\n\n- ``--basic=username:password`` for basic authentication\n- ``--apikey=apikey`` for api key authentication\n\nFurther Requirements\n--------------------\n\n- **The crawler does not post ressources twice.**\n This can be implemented by\n\n - caching the ressources locally, to see if they changed\n\n - compare ressource\n - compare timestamp\n\n - removing the ressources from the database if they are updated after posting new ressources.\n\nThis may require some form of state for the crawler.\nThe state could be added to the ressources in a ``X-Ressources-Url-Crawler-Source`` field.\nThis allows local caching and requires getting the objects from the database.\n\n.. _ressources: https://github.com/schul-cloud/ressources-api-v1#ressources-api\n.. _authentication: https://github.com/schul-cloud/ressources-api-v1#authorization\n\n\n", "description_content_type": null, "docs_url": null, "download_url": "", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://github.com/schul-cloud/url-crawler", "keywords": "Schul-Cloud Content API,Crawler", "license": "", "maintainer": "", "maintainer_email": "", "name": "schul-cloud-url-crawler", "package_url": "https://pypi.org/project/schul-cloud-url-crawler/", "platform": "", "project_url": "https://pypi.org/project/schul-cloud-url-crawler/", "project_urls": { "Homepage": "https://github.com/schul-cloud/url-crawler" }, "release_url": "https://pypi.org/project/schul-cloud-url-crawler/1.0.17/", "requires_dist": [ "certifi (==2017.4.17)", "click (==6.7)", "jsonschema (==2.6.0)", "python-dateutil (==2.6.0)", "requests (==2.13.0)", "schul-cloud-resources-api-v1 (==1.0.0.80)", "six (==1.10.0)", "urllib3 (==1.21)" ], "requires_python": "", "summary": "Crawler for Schul-Cloud Ressources", "version": "1.0.17" }, "last_serial": 2871092, "releases": { "1.0.10": [ { "comment_text": "", "digests": { "md5": "439ae5151c5f4b546cdfa1a9e0cfdba8", "sha256": "e9a2d76044b24b45bfbf344f0962fdefbcb1d17be374842cd161712c9ba6651f" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.10-py3-none-any.whl", "has_sig": false, "md5_digest": "439ae5151c5f4b546cdfa1a9e0cfdba8", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 15475, "upload_time": "2017-05-12T09:56:06", "url": "https://files.pythonhosted.org/packages/d3/62/c1f6db7cd853dffc8a90cbc15439bc7ca2e524adb77631eeb3d24adb2cf7/schul_cloud_url_crawler-1.0.10-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "00946199f87c34fe97d32a9bd23db9c6", "sha256": "fdc12e243154f1258afeca04dc6f49e18650270ec6081f74fb553f6a46453a62" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.10.tar.gz", "has_sig": false, "md5_digest": "00946199f87c34fe97d32a9bd23db9c6", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 10606, "upload_time": "2017-05-12T09:56:07", "url": "https://files.pythonhosted.org/packages/a1/ce/c4a4f216c5a0993ee52166cccc3fe7aa5faaf95980c373f9d59118fae1b8/schul_cloud_url_crawler-1.0.10.tar.gz" } ], "1.0.11": [ { "comment_text": "", "digests": { "md5": "8bf358bbd591a08022e63577effa93e2", "sha256": "4b895be163a846fbdf261f40079ce35189087ead14dba53162be3c67012781c0" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.11-py3-none-any.whl", "has_sig": false, "md5_digest": "8bf358bbd591a08022e63577effa93e2", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 16064, "upload_time": "2017-05-12T11:58:33", "url": "https://files.pythonhosted.org/packages/c8/4c/689f854354e70803a733d3a243f9aa2d1f398a480cc1ffee47688e656ef8/schul_cloud_url_crawler-1.0.11-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "87a0f54d868368e78ec60b79769ab703", "sha256": "3634921018b2627870bf92c0a322317e368185a6a339b263fd4fb6f81d47b11d" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.11.tar.gz", "has_sig": false, "md5_digest": "87a0f54d868368e78ec60b79769ab703", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11210, "upload_time": "2017-05-12T11:58:35", "url": "https://files.pythonhosted.org/packages/8a/5c/0967a59caf607fdc8f42726c5cb4dce1a9b2753344ffaba321316ca43950/schul_cloud_url_crawler-1.0.11.tar.gz" } ], "1.0.12": [ { "comment_text": "", "digests": { "md5": "b4206e4559092e8639e69fe07dbb87cb", "sha256": "07dc5a65fcec12d6d2bba7702e2d6c5a511bfe1cd3bd806e1bf65481c1e422ac" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.12-py3-none-any.whl", "has_sig": false, "md5_digest": "b4206e4559092e8639e69fe07dbb87cb", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 18545, "upload_time": "2017-05-12T17:17:02", "url": "https://files.pythonhosted.org/packages/4a/90/8347a3721dafc21c6f643f6107933d7f6ad71e333fc5130f553227bbd176/schul_cloud_url_crawler-1.0.12-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "345140a073f5d259824f8777295c4aab", "sha256": "b8028d99f857a53e64f41c43e7b63359ba1977aaf9331b4ee8f6e1d71813ba42" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.12.tar.gz", "has_sig": false, "md5_digest": "345140a073f5d259824f8777295c4aab", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 12939, "upload_time": "2017-05-12T17:17:03", "url": "https://files.pythonhosted.org/packages/ae/69/f59c5dc3eb6e3c56882ae1c4847a8e46b46dd4fc0207c740b45a5be1f221/schul_cloud_url_crawler-1.0.12.tar.gz" } ], "1.0.13": [ { "comment_text": "", "digests": { "md5": "76bfc6ee9f2715c1e207a00352bf47a6", "sha256": "0e25e2951770e21f6ab8fe474204f7aec0fee8cb593651225db8e9f996bba1c9" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.13-py3-none-any.whl", "has_sig": false, "md5_digest": "76bfc6ee9f2715c1e207a00352bf47a6", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 19876, "upload_time": "2017-05-13T06:30:26", "url": "https://files.pythonhosted.org/packages/54/57/b2556126f7ad6fd9a11419c1fcb2f3d93d8fcd36937437c3aefbc5c78b41/schul_cloud_url_crawler-1.0.13-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "03b77b1c62d7602049f1cc4ab4c36bdb", "sha256": "222b21e1b8c3df2fb334932b8b0304b45eda8cc951a4b1977b12f2814ff7265b" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.13.tar.gz", "has_sig": false, "md5_digest": "03b77b1c62d7602049f1cc4ab4c36bdb", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 14057, "upload_time": "2017-05-13T06:30:27", "url": "https://files.pythonhosted.org/packages/9a/3f/f3b27bfe50f8d2b6236f626cba49dbb97c007a7de6c264019d4bbdeda069/schul_cloud_url_crawler-1.0.13.tar.gz" } ], "1.0.14": [ { "comment_text": "", "digests": { "md5": "f3ab98e1ca9188f7ad19c3f7ec0cc7ab", "sha256": "5a2269889b6c784551b51ee8e981b81e6760585ed0cc9da81b298536a7ce7380" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.14-py3-none-any.whl", "has_sig": false, "md5_digest": "f3ab98e1ca9188f7ad19c3f7ec0cc7ab", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 20244, "upload_time": "2017-05-13T06:39:52", "url": "https://files.pythonhosted.org/packages/a3/ff/1db5027ff7a67121aea7dbb5465e22311227039c78dee8ebfbdb75f835e9/schul_cloud_url_crawler-1.0.14-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "799330add0853c3b16c7e011a68696a7", "sha256": "5ed16205f9d5e8362360d12bdc3968b087befc4b26d8ffa4f836a49caf0a228e" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.14.tar.gz", "has_sig": false, "md5_digest": "799330add0853c3b16c7e011a68696a7", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 14155, "upload_time": "2017-05-13T06:39:53", "url": "https://files.pythonhosted.org/packages/60/7e/c361c63cc803d8e3988b7d2bebf9a4b302d3cffb1d4f4e9809c83af886af/schul_cloud_url_crawler-1.0.14.tar.gz" } ], "1.0.16": [ { "comment_text": "", "digests": { "md5": "ff15d0ca897c1e24013ffa73c7b93921", "sha256": "5d0c1bfcdc7274996bdf61134050f1f28630752ae943e858afbe4b4f0d21cbf7" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.16-py3-none-any.whl", "has_sig": false, "md5_digest": "ff15d0ca897c1e24013ffa73c7b93921", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 20117, "upload_time": "2017-05-13T07:03:21", "url": "https://files.pythonhosted.org/packages/cb/0c/8fb8eb98d652c9bedd8ccb7e0a7252f75340a4261c3ca9c78308701c1e64/schul_cloud_url_crawler-1.0.16-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "fe59a6068c4b9cf2807b08f64651b2a4", "sha256": "0f064874d9b06a02009de967aea26cd59a7301a3c1e5a1db632965547abdd8a1" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.16.tar.gz", "has_sig": false, "md5_digest": "fe59a6068c4b9cf2807b08f64651b2a4", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 14140, "upload_time": "2017-05-13T07:03:23", "url": "https://files.pythonhosted.org/packages/03/88/d782086c0201b5b054cf3b9429854d8d042116de2062ca8b25b4911c4cf5/schul_cloud_url_crawler-1.0.16.tar.gz" } ], "1.0.17": [ { "comment_text": "", "digests": { "md5": "338e6f6b272bdae3528e6c5e441b850b", "sha256": "b67a81c05bc4b51eba0e5693a52dd0b7672f948d571769d51fda4df0ca6f6f58" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.17-py3-none-any.whl", "has_sig": false, "md5_digest": "338e6f6b272bdae3528e6c5e441b850b", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 20167, "upload_time": "2017-05-13T07:20:15", "url": "https://files.pythonhosted.org/packages/a1/5e/b4c3c51a07c207146695d9f672a2d9d5caf99e4c0b578d73131b8c5e8d02/schul_cloud_url_crawler-1.0.17-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "806146a56b38dbf3f14115c726c5260c", "sha256": "cfdc59458d10c3c38aaa1e79288ee935abc5a3ae1e9f05fdb1898514fd538131" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.17.tar.gz", "has_sig": false, "md5_digest": "806146a56b38dbf3f14115c726c5260c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 14195, "upload_time": "2017-05-13T07:20:17", "url": "https://files.pythonhosted.org/packages/40/ee/5161e6791606a2fe6c459a9711dfd3a915f3ecdd72505eaeea8b7635f7a1/schul_cloud_url_crawler-1.0.17.tar.gz" } ], "1.0.5": [ { "comment_text": "", "digests": { "md5": "9f250681a9c26ece5ad9b7b71ddb9b04", "sha256": "f4d72a9baec87d23de28fecca7a4d60fdc01d179dd1b0b5c1b46fa12b9dd0825" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.5-py3-none-any.whl", "has_sig": false, "md5_digest": "9f250681a9c26ece5ad9b7b71ddb9b04", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 7842, "upload_time": "2017-05-04T11:14:02", "url": "https://files.pythonhosted.org/packages/7f/42/a1520aa97e078a4aa03dedec3bc01759602bb8300cc9225f5916c2e2d32d/schul_cloud_url_crawler-1.0.5-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "b1495e24ca4e41fd19bb68f0cf495086", "sha256": "1893fd22ff2b397fe7bac41762689e39e7cac20cdbde4364613929e763c929b2" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.5.tar.gz", "has_sig": false, "md5_digest": "b1495e24ca4e41fd19bb68f0cf495086", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4672, "upload_time": "2017-05-04T11:14:03", "url": "https://files.pythonhosted.org/packages/5c/ba/b2662d83b1cb07715555b763eb1579c14e04e844f54d8e145f952128a600/schul_cloud_url_crawler-1.0.5.tar.gz" } ], "1.0.6": [ { "comment_text": "", "digests": { "md5": "aca9e8f319796229d4369cefc5839685", "sha256": "26c84ed1073e5946a8464851dd07998638cd99c781756bda0541553d29257c72" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.6-py3-none-any.whl", "has_sig": false, "md5_digest": "aca9e8f319796229d4369cefc5839685", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 8046, "upload_time": "2017-05-04T11:19:29", "url": "https://files.pythonhosted.org/packages/23/43/1bd113dcecbc1a1b50354f8eb747f394a3faef0b001540e5f68572474194/schul_cloud_url_crawler-1.0.6-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "2557da9a7e90101cd31775525ee5853b", "sha256": "9a5d58a05cb0634135611f70de2b3e88eb84c6e86f41d1a1f80071603601d03b" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.6.tar.gz", "has_sig": false, "md5_digest": "2557da9a7e90101cd31775525ee5853b", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 4800, "upload_time": "2017-05-04T11:19:31", "url": "https://files.pythonhosted.org/packages/2e/69/795d3afa1af1dd9c9a07298c4a49fd985b524522fe529dd7a7c6c72604f8/schul_cloud_url_crawler-1.0.6.tar.gz" } ], "1.0.9": [ { "comment_text": "", "digests": { "md5": "b3cc6a99ccc061fa4c296505b409b422", "sha256": "5421029c8a43eadc9bbb8f1d23cf03f79e6253f19499044a857b484caa98d055" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.9-py3-none-any.whl", "has_sig": false, "md5_digest": "b3cc6a99ccc061fa4c296505b409b422", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 13165, "upload_time": "2017-05-11T12:23:33", "url": "https://files.pythonhosted.org/packages/2d/1f/e9ae8e0b4307e66b5a38586a7216779fbde874a5a8d1386f5cf5336f285f/schul_cloud_url_crawler-1.0.9-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "f7549f789b7a4da108bcf0c7cc364454", "sha256": "f1cf21ab108c870da93152953a9751ca650071ca051c5f3e3c5cae55279823bc" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.9.tar.gz", "has_sig": false, "md5_digest": "f7549f789b7a4da108bcf0c7cc364454", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 8885, "upload_time": "2017-05-11T12:23:34", "url": "https://files.pythonhosted.org/packages/be/97/dd97e399e7277488374e652fe217bd7efd9f28af6cbcc961c305177b2364/schul_cloud_url_crawler-1.0.9.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "338e6f6b272bdae3528e6c5e441b850b", "sha256": "b67a81c05bc4b51eba0e5693a52dd0b7672f948d571769d51fda4df0ca6f6f58" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.17-py3-none-any.whl", "has_sig": false, "md5_digest": "338e6f6b272bdae3528e6c5e441b850b", "packagetype": "bdist_wheel", "python_version": "py3", "requires_python": null, "size": 20167, "upload_time": "2017-05-13T07:20:15", "url": "https://files.pythonhosted.org/packages/a1/5e/b4c3c51a07c207146695d9f672a2d9d5caf99e4c0b578d73131b8c5e8d02/schul_cloud_url_crawler-1.0.17-py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "806146a56b38dbf3f14115c726c5260c", "sha256": "cfdc59458d10c3c38aaa1e79288ee935abc5a3ae1e9f05fdb1898514fd538131" }, "downloads": -1, "filename": "schul_cloud_url_crawler-1.0.17.tar.gz", "has_sig": false, "md5_digest": "806146a56b38dbf3f14115c726c5260c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 14195, "upload_time": "2017-05-13T07:20:17", "url": "https://files.pythonhosted.org/packages/40/ee/5161e6791606a2fe6c459a9711dfd3a915f3ecdd72505eaeea8b7635f7a1/schul_cloud_url_crawler-1.0.17.tar.gz" } ] }