{ "info": { "author": "Niels Ranosch", "author_email": "ranosch@mfo.de", "bugtrack_url": null, "classifiers": [], "description": "It is alarming, that very often, special characters like umlauts break when\nconverting through different encodings. (You might want to take a look at the\nGerman Amazon Marketplace.)\nA broken umlaut is still valid in the target encoding and therefore can only be\ndetect through heuristics (magic).\n\nVersion 0.5: supporting utf-8 and latin1\nFor a full changeset, take a look at bitbucket.org/niels_mfo/encoding_repair\n(bug reports will also be accepted there)\n\nA common case that breaks a special character is the following:\n - An input string is coded in utf-8 (which uses multibyte chars)\n - It is interpreted as being a valid latin1 string\n - Latin1 has a valid representation for nearly all bytes\n - Latin1 uses single-byte chars\n - Now both bytes of the multi-byte char are interpreted as chars\n - The special char broke off into two different (valid!) characters\n\nThis scenario has many pitfalls:\n - The characters are irreversebly broken.\n - ... regardless of what you do with the string.\n - You can convert it through all encodings and the umlauts won't come back.\n - Only through a few heuristical replaces, this module is able to help you.\n\nThis module assumes, that a few special characters are always correct. They are\nstored in the list 'umlauts'. Furthermore, the module assumes, that their\nrepresentation, that would be correct in the other encoding, is always broken in\nthe target-encoding.\n\nNOTE:\nThis only happens, because people don't use unicode. If everybody would\nconsequently use unicode strings, I would not have to write this module.\nThe best and actually only way to handle encodings correctly is the following:\n - An input string comes into your programm.\n - If it is unicode, jump to point 6.\n - If it isn't, you might already need to repair umlauts.\n - You need to make sure, that you know the right encoding of the input\n string, because it is hardly possible to guess.\n - Convert it to unicode.\n - Use the unicode string throuout your whole programm.\n - If you can return unicode, return unicode.\n - If you are in doubt, return unicode.\n - If you really need to return anything else, return utf-8.\n - If you are certain, that the programm, which will take your output is not\n able to handle neither unicode nor utf-8, you better write a bug report.", "description_content_type": null, "docs_url": null, "download_url": "UNKNOWN", "downloads": { "last_day": -1, "last_month": -1, "last_week": -1 }, "home_page": "https://bitbucket.org/niels_mfo/encoding_repair", "keywords": "encoding utf-8 latin1 character magic umlaut special unicode", "license": "MIT", "maintainer": null, "maintainer_email": null, "name": "encoding_repair", "package_url": "https://pypi.org/project/encoding_repair/", "platform": "UNKNOWN", "project_url": "https://pypi.org/project/encoding_repair/", "project_urls": { "Download": "UNKNOWN", "Homepage": "https://bitbucket.org/niels_mfo/encoding_repair" }, "release_url": "https://pypi.org/project/encoding_repair/0.7dev/", "requires_dist": null, "requires_python": null, "summary": "Helpers to repair encodings (especially umlauts)", "version": "0.7dev" }, "last_serial": 791643, "releases": { "0.3dev": [ { "comment_text": "", "digests": { "md5": "8210b61f22cc05baf17ff3db5198f965", "sha256": "05a1b10fd0c04b4ddae729ae9c25b1c9fb0ac2aa6ca67c4a8be6c5561c69c45c" }, "downloads": -1, "filename": "encoding_repair-0.3dev.tar.gz", "has_sig": false, "md5_digest": "8210b61f22cc05baf17ff3db5198f965", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3621, "upload_time": "2011-08-25T14:19:26", "url": "https://files.pythonhosted.org/packages/94/f4/ec78186608776e5ae296da03b6defc361f4a62c19be5d75ebb2044260b5a/encoding_repair-0.3dev.tar.gz" } ], "0.5dev": [ { "comment_text": "", "digests": { "md5": "0f35173ec65e37e0bda21653bffdb0ac", "sha256": "d9a858275d6e7ea029121e6ffacda73b21e318e02c29615caa614a292435398c" }, "downloads": -1, "filename": "encoding_repair-0.5dev.tar.gz", "has_sig": false, "md5_digest": "0f35173ec65e37e0bda21653bffdb0ac", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3615, "upload_time": "2011-08-25T14:20:52", "url": "https://files.pythonhosted.org/packages/70/ab/c1d39d27524a17f8240d00906f5430192fb64cb1944434bfd3fea7aa6d1f/encoding_repair-0.5dev.tar.gz" } ], "0.6dev": [ { "comment_text": "", "digests": { "md5": "cbd3abe893aedc8b0ae1eba2aeb37077", "sha256": "c0c47cf290b6eb6143d691c98228f96d3dda0e821a8c1fbf965bce10e4426a0f" }, "downloads": -1, "filename": "encoding_repair-0.6dev.tar.gz", "has_sig": false, "md5_digest": "cbd3abe893aedc8b0ae1eba2aeb37077", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3638, "upload_time": "2011-08-25T14:34:05", "url": "https://files.pythonhosted.org/packages/57/5a/4f94905e04b18b5046d2b26849a0ba2569280afc299df6bf4719bebbd5a2/encoding_repair-0.6dev.tar.gz" } ], "0.7dev": [ { "comment_text": "", "digests": { "md5": "274dd750d0e4111620b17508cbeb322c", "sha256": "ccb765b64dac98bc9905b8d5370255c25c9dc66f80dee5c149b420f83494e407" }, "downloads": -1, "filename": "encoding_repair-0.7dev.tar.gz", "has_sig": false, "md5_digest": "274dd750d0e4111620b17508cbeb322c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3644, "upload_time": "2011-08-31T13:52:43", "url": "https://files.pythonhosted.org/packages/e5/97/4b2bbc9dc979911c8074829602e8ad7b8e7f4b232b413f8e78d595c50f08/encoding_repair-0.7dev.tar.gz" } ] }, "urls": [ { "comment_text": "", "digests": { "md5": "274dd750d0e4111620b17508cbeb322c", "sha256": "ccb765b64dac98bc9905b8d5370255c25c9dc66f80dee5c149b420f83494e407" }, "downloads": -1, "filename": "encoding_repair-0.7dev.tar.gz", "has_sig": false, "md5_digest": "274dd750d0e4111620b17508cbeb322c", "packagetype": "sdist", "python_version": "source", "requires_python": null, "size": 3644, "upload_time": "2011-08-31T13:52:43", "url": "https://files.pythonhosted.org/packages/e5/97/4b2bbc9dc979911c8074829602e8ad7b8e7f4b232b413f8e78d595c50f08/encoding_repair-0.7dev.tar.gz" } ] }