{ "info": { "author": "Anthon van der Neut", "author_email": "a.van.der.neut@ruamel.eu", "bugtrack_url": null, "classifiers": [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python" ], "description": "ruamel.pdfdouble\n================\n\n\nthis package provides the ``pdfdbl`` command::\n\n pdfdbl scan dir1 dir2\n\nThis will walk down the directories provided as argument and for the PDF\nfiles found, create a hash based on (in order)::\n\n- metadata if unique\n- images, if the number of images matches the number of pages\n- text\n\nThis assumes that ``pdfinfo``, ``pdfimages`` and `pdftotext`` from the\n``poppler-utils`` package are avaialable.\n\nA \"database\" is build up in ``~/.config/pdfdbl/pdf.lst``\nagainst which further scans are tested.\n\nRemoving markings\n-----------------\n\nIn ruamel/pdfdouble/pdfdouble.py there are two methods that can be enhanced\nto filter out markings in the PDF that make them less unique and make\nvirtually the same files to have different hashes.\n\nFor text the method ``PdfData.filter_for_marking`` should be extended to remove\nand markings from the string that is its arguments and return the result.\n\nFor scanned images the method ``PdfData.process_image_and_update`` needs to be\nenhanced, e.g. by cutting off the images bottom and top X lines, and\nby removing any gray background text by setting all black pixels to white.\nThis function needs to update the hash passed in using the ``.update()`` method\npassing in the filtered data.\n\nRestrictions\n------------\n\nThe current \"database\" cannot handle paths that contain newlines\n\n\nThis utility is currently Python 2.7 only.", "description_content_type": null, "docs_url": null, "download_url": null, "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://bitbucket.org/ruamel/pdfdouble", "keywords": null, "license": "MIT license", "maintainer": null, "maintainer_email": null, "name": "ruamel.pdfdouble", "package_url": "https://pypi.org/project/ruamel.pdfdouble/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/ruamel.pdfdouble/", "project_urls": { "Homepage": "https://bitbucket.org/ruamel/pdfdouble" }, "release_url": "https://pypi.org/project/ruamel.pdfdouble/0.1.2/", "requires_dist": null, "requires_python": null, "summary": "scan and find double PDF files, by creating a hash database", "version": "0.1.2" }, "last_serial": 1699224, "releases": { "0.1": [ { "comment_text": "", "digests": { "md5": "09ae32a20a88c9f613332bfb318102b1", "sha256": "3918c12ee6a922b70c3e0755a0b83afd0ad3ae6e42cf73d125ac85fcfe23130b" }, "downloads": -1, "filename": "ruamel.pdfdouble-0.1.tar.gz", "has_sig": false, "md5_digest": "09ae32a20a88c9f613332bfb318102b1", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6442, "upload_time": "2015-03-20T15:05:48", "url": "https://files.pythonhosted.org/packages/25/ea/8fcddf559e5d9f76dbfc850e6a027a5e6bec3d3a552262fcf4a8c9aca6ed/ruamel.pdfdouble-0.1.tar.gz" } ], "0.1.1": [ { "comment_text": "", "digests": { "md5": "c543a8bd23cecdc170fd8f91d0a1e00d", "sha256": "a5b82766d9a0cc3e1aab7a12da841edd30c3fbe1a44b6bf0ceae4fee09d6cd46" }, "downloads": -1, "filename": "ruamel.pdfdouble-0.1.1.tar.gz", "has_sig": false, "md5_digest": "c543a8bd23cecdc170fd8f91d0a1e00d", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 6602, "upload_time": "2015-08-27T09:16:35", "url": "https://files.pythonhosted.org/packages/04/08/564df5177fbd0faf93f2f6df0bd5fd5e3b35929f22cdc9ddcf599da5f83b/ruamel.pdfdouble-0.1.1.tar.gz" } ], "0.1.2": [ { "comment_text": "", "digests": { "md5": "9b48c5254362501d25c43f502f7ce939", "sha256": "01f4aab4e90d7222115e02211aa575db6b8155c68920999639cf01dbb45d3bd1" }, "downloads": -1, "filename": "ruamel.pdfdouble-0.1.2-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "9b48c5254362501d25c43f502f7ce939", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 9696, "upload_time": "2015-08-29T09:26:25", "url": "https://files.pythonhosted.org/packages/b3/a0/f96dea16434710cffb34441271ebe9345cbe680caf1912b112f963ecd015/ruamel.pdfdouble-0.1.2-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "527139f4728083d66383af6da5ef7e35", "sha256": "8986e56d81e751ed204ced7c34f10ab0b8a2a5e3aabe56d287acfbbb86847f3f" }, "downloads": -1, "filename": "ruamel.pdfdouble-0.1.2.tar.gz", "has_sig": false, "md5_digest": "527139f4728083d66383af6da5ef7e35", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11334, "upload_time": "2015-08-29T09:26:21", "url": "https://files.pythonhosted.org/packages/b7/d2/faaf85f5fbefd36edfc2066a14a8b15b8f675653a0cdb6f2560884a36b17/ruamel.pdfdouble-0.1.2.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "9b48c5254362501d25c43f502f7ce939", "sha256": "01f4aab4e90d7222115e02211aa575db6b8155c68920999639cf01dbb45d3bd1" }, "downloads": -1, "filename": "ruamel.pdfdouble-0.1.2-py2.py3-none-any.whl", "has_sig": false, "md5_digest": "9b48c5254362501d25c43f502f7ce939", "packagetype": "bdist_wheel", "python_version": "py2.py3", "requires_python": null, "size": 9696, "upload_time": "2015-08-29T09:26:25", "url": "https://files.pythonhosted.org/packages/b3/a0/f96dea16434710cffb34441271ebe9345cbe680caf1912b112f963ecd015/ruamel.pdfdouble-0.1.2-py2.py3-none-any.whl" }, { "comment_text": "", "digests": { "md5": "527139f4728083d66383af6da5ef7e35", "sha256": "8986e56d81e751ed204ced7c34f10ab0b8a2a5e3aabe56d287acfbbb86847f3f" }, "downloads": -1, "filename": "ruamel.pdfdouble-0.1.2.tar.gz", "has_sig": false, "md5_digest": "527139f4728083d66383af6da5ef7e35", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 11334, "upload_time": "2015-08-29T09:26:21", "url": "https://files.pythonhosted.org/packages/b7/d2/faaf85f5fbefd36edfc2066a14a8b15b8f675653a0cdb6f2560884a36b17/ruamel.pdfdouble-0.1.2.tar.gz" } ] }