PK qI~֐pyphonetics/__init__.py"""A Python 3 phonetics library.""" from .phonetics import (Soundex, Metaphone, MatchingRatingApproach, FuzzySoundex, Lein, RefinedSoundex) __version__ = '0.4' PK qIuPEpyphonetics/exceptions.pyclass UnicodeException(Exception): pass class WrongLengthException(Exception): pass class DistanceMetricError(Exception): pass PK qI_%pyphonetics/utils.pyfrom itertools import groupby from .exceptions import WrongLengthException def translation(first, second): """Create an index of mapped letters (zip to dict).""" if len(first) != len(second): raise WrongLengthException('The lists are not of the same length!') return dict(zip(first, second)) def squeeze(word): """Squeeze the given sequence by dropping consecutive duplicates.""" return ''.join(x[0] for x in groupby(word)) PK qISF22(pyphonetics/distance_metrics/__init__.pyfrom .levenshtein import * from .hamming import * PK qIh)  'pyphonetics/distance_metrics/hamming.pyfrom ..exceptions import WrongLengthException def hamming_distance(word1, word2): """ Computes the Hamming distance. [Reference]: https://en.wikipedia.org/wiki/Hamming_distance [Article]: Hamming, Richard W. (1950), "Error detecting and error correcting codes", Bell System Technical Journal 29 (2): 147–160 """ from operator import ne if len(word1) != len(word2): raise WrongLengthException('The words need to be of the same length!') return sum(map(ne, word1, word2)) PK qIQ)+pyphonetics/distance_metrics/levenshtein.pydef levenshtein_distance(word1, word2): """ Computes the Levenshtein distance. [Reference]: https://en.wikipedia.org/wiki/Levenshtein_distance [Article]: Levenshtein, Vladimir I. (February 1966). "Binary codes capable of correcting deletions, insertions,and reversals". Soviet Physics Doklady 10 (8): 707–710. [Implementation]: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python """ if len(word1) < len(word2): return levenshtein_distance(word2, word1) if len(word2) == 0: return len(word1) previous_row = list(range(len(word2) + 1)) for i, char1 in enumerate(word1): current_row = [i + 1] for j, char2 in enumerate(word2): insertions = previous_row[j + 1] + 1 deletions = current_row[j] + 1 substitutions = previous_row[j] + (char1 != char2) current_row.append(min(insertions, deletions, substitutions)) previous_row = current_row return previous_row[-1] PK qII|I!pyphonetics/phonetics/__init__.pyfrom .soundex import * from .metaphone import * from .mra import * from .fuzzy_soundex import * from .lein import * from .refined_soundex import * PK qI\ܣ= = &pyphonetics/phonetics/fuzzy_soundex.pyimport re from unidecode import unidecode from ..utils import squeeze, translation from ..exceptions import UnicodeException from .phonetic_algorithm import PhoneticAlgorithm class FuzzySoundex(PhoneticAlgorithm): """ Implementation of the "Fuzzy Soundex" algorithm. [Reference]: http://wayback.archive.org/web/20100629121128/http://www.ir.iit.edu/publications/downloads/IEEESoundexV5.pdf [Article]: Holmes, David and M. Catherine McCabe. "Improving Precision and Recall for Soundex Retrieval." """ def __init__(self): super().__init__() self.translations = translation( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', '0193017-07745501769301-7-9' ) self.rules = [ (r'CA', r'KA'), (r'CC', r'KK'), (r'CK', r'KK'), (r'CE', r'SE'), (r'CHL', r'KL'), (r'CL', r'KL'), (r'CHR', r'KR'), (r'CR', r'KR'), (r'CI', r'SI'), (r'CO', r'KO'), (r'CU', r'KU'), (r'CY', r'SY'), (r'DG', r'GG'), (r'GH', r'HH'), (r'MAC', r'MK'), (r'MC', r'MK'), (r'NST', r'NSS'), (r'PF', r'FF'), (r'PH', r'FF'), (r'SCH', r'SSS'), (r'TIO', r'SIO'), (r'TIA', r'SIO'), (r'TCH', r'CHH'), ] self.set1 = ['CS', 'CZ', 'TS', 'TZ'] self.set2 = ['HR', 'WR'] self.set3 = ['KN', 'NG'] self.set4 = 'HWY' def phonetics(self, word): if not isinstance(word, str): raise UnicodeException('Expected a unicode string!') if not word: return '' word = unidecode(word).upper() # Substitutions for beginnings first_two, rest = word[:2], word[2:] if first_two in self.set1: word = 'SS' + rest elif first_two == 'GN': word = 'NN' + rest elif first_two in self.set2: word = 'RR' + rest elif first_two == 'HW': word = 'WW' + rest elif first_two in self.set3: word = 'NN' + rest # Substitutions for endings last_two, initial = word[-2:], word[0:-2] if last_two == 'CH': word = initial + 'KK' elif last_two == 'NT': word = initial + 'TT' elif last_two == 'RT': word = initial + 'RR' elif word[-3:] == 'RDT': word = word[0:-3] + 'RR' # Applying the rules for rule in self.rules: word = re.sub(rule[0], rule[1], word) # Catch the first letter first_letter = word[0] # Translating code = ''.join(self.translations.get(char, char) for char in word) # Removing hyphens code = code.replace('-', '') # Squeezing the code code = squeeze(code) # Dealing with initials code = first_letter if code[0] in self.set4 \ else first_letter + code[1:] # Dropping vowels code = code.replace('0', '') return code PK qI$ōpyphonetics/phonetics/lein.pyimport re from unidecode import unidecode from ..utils import squeeze, translation from ..exceptions import UnicodeException from .phonetic_algorithm import PhoneticAlgorithm class Lein(PhoneticAlgorithm): """ The Lein name coding procedure. [Reference]: http://naldc.nal.usda.gov/download/27833/PDF """ def __init__(self): super().__init__() self.translations = translation( 'DTMNLRBFPVCJKGQSXZ', '112233444455555555' ) self.pad = lambda code: '{}0000'.format(code)[:4] def phonetics(self, word): if not isinstance(word, str): raise UnicodeException('Expected a unicode string!') word = unidecode(word).upper() word = re.sub(r'[^A-Z]\s', r'', word) # Keep the 1st letter first, code = word[0], word[1:] # Drop vowels and Y, W & H code = re.sub(r'[AEIOUYWH]', r'', code) # Drop consecutive duplicates and truncate to 4 chars code = squeeze(code)[0: 4] # Translations code = ''.join(self.translations.get(char, char) for char in code) return self.pad(first + code) PK qI"Y''"pyphonetics/phonetics/metaphone.pyimport re from unidecode import unidecode from ..exceptions import UnicodeException from .phonetic_algorithm import PhoneticAlgorithm class Metaphone(PhoneticAlgorithm): """ The metaphone algorithm. [Reference]: https://en.wikipedia.org/wiki/Metaphone [Author]: Lawrence Philips, 1990 """ def __init__(self): super().__init__() self.rules = [ (r'[^a-z]', r''), (r'([bcdfhjklmnpqrstvwxyz])\1+', r'\1'), (r'^ae', r'E'), (r'^[gkp]n', r'N'), (r'^wr', r'R'), (r'^x', r'S'), (r'^wh', r'W'), (r'mb$', r'M'), (r'(?!^)sch', r'SK'), (r'th', r'0'), (r't?ch|sh', r'X'), (r'c(?=ia)', r'X'), (r'[st](?=i[ao])', r'X'), (r's?c(?=[iey])', r'S'), (r'[cq]', r'K'), (r'dg(?=[iey])', r'J'), (r'd', r'T'), (r'g(?=h[^aeiou])', r''), (r'gn(ed)?', r'N'), (r'([^g]|^)g(?=[iey])', r'\1J'), (r'g+', r'K'), (r'ph', r'F'), (r'([aeiou])h(?=\b|[^aeiou])', r'\1'), (r'[wy](?![aeiou])', r''), (r'z', r'S'), (r'v', r'F'), (r'(?!^)[aeiou]+', r'') ] def phonetics(self, word): if not isinstance(word, str): raise UnicodeException('Expected a unicode string!') code = unidecode(word).lower() for item in self.rules: code = re.sub(item[0], item[1], code) return code.upper() PK qI:pyphonetics/phonetics/mra.pyimport re from unidecode import unidecode from ..utils import squeeze from ..exceptions import UnicodeException from .phonetic_algorithm import PhoneticAlgorithm class MatchingRatingApproach(PhoneticAlgorithm): """ Functions related to the computation of the Match Rating Approach codex. [Reference]: https://en.wikipedia.org/wiki/Match_rating_approach [Article]: Moore, G B.; Kuhns, J L.; Treffzs, J L.; Montgomery, C A. (Feb 1, 1977). Accessing Individual Records from Personal Data Files Using Nonunique Identifiers. US National Institute of Standards and Technology. p. 17. NIST SP - 500-2. """ def __init__(self): super().__init__() def phonetics(self, word): if not isinstance(word, str): raise UnicodeException('Expected a unicode string!') codex = unidecode(word).upper() codex = re.sub(r'[^A-Z]', r'', codex) # Dropping non - leading vowels codex = codex[0] + re.sub(r'[AEIOU]', r'', codex[1:]) # Dropping consecutive consonants codex = squeeze(codex) # Returning the codex offset = min(3, len(codex) - 3) return codex[:3] + codex[len(codex) - offset:offset + len(codex)] PK qIUW?ww+pyphonetics/phonetics/phonetic_algorithm.pyfrom ..distance_metrics import levenshtein_distance, hamming_distance from ..exceptions import DistanceMetricError class PhoneticAlgorithm: """ The main Phonetic Algorithm class, to ensure a unified API for all the included algorithms. """ def __init__(self): self.distances = { 'levenshtein': levenshtein_distance, 'hamming': hamming_distance, } def phonetics(self, word): """Get the phonetic representation of the word.""" pass def sounds_like(self, word1, word2): """Compare the phonetic representations of 2 words, and return a boolean value.""" return self.phonetics(word1) == self.phonetics(word2) def similarity(self, word1, word2, metric='levenshtein'): """Get the similarity of the words, using the supported distance metrics.""" if metric in self.distances: distance_func = self.distances[metric] return distance_func(self.phonetics(word1), self.phonetics(word2)) else: raise DistanceMetricError('Distance metric not supported! Choose from levenshtein, hamming.') PK qI[(pyphonetics/phonetics/refined_soundex.pyimport re from unidecode import unidecode from ..utils import translation, squeeze from ..exceptions import UnicodeException from .phonetic_algorithm import PhoneticAlgorithm class RefinedSoundex(PhoneticAlgorithm): """ The Refined Soundex algorithm. [Reference]: https://en.wikipedia.org/wiki/Soundex [Authors]: Robert C. Russel, Margaret King Odell """ def __init__(self): super().__init__() self.translations = translation( 'AEIOUYWHBPFVCKSGJQXZDTLMNR', '000000DD112233344555667889' ) def phonetics(self, word): if not isinstance(word, str): raise UnicodeException('Expected a unicode string!') word = unidecode(word).upper() word = re.sub(r'[^A-Z]', r'', word) first_letter = word[0] tail = ''.join(self.translations[char] for char in word if self.translations[char] != 'D') return first_letter + squeeze(tail) PK qIk۹ pyphonetics/phonetics/soundex.pyimport re from unidecode import unidecode from ..utils import translation, squeeze from ..exceptions import UnicodeException from .phonetic_algorithm import PhoneticAlgorithm class Soundex(PhoneticAlgorithm): """ The Soundex algorithm. [Reference]: https://en.wikipedia.org/wiki/Soundex [Authors]: Robert C. Russel, Margaret King Odell """ def __init__(self): super().__init__() self.translations = translation( 'AEIOUYWHBPFVCSKGJQXZDTLMNR', '000000DD111122222222334556' ) self.pad = lambda code: '{}0000'.format(code)[:4] def phonetics(self, word): if not isinstance(word, str): raise UnicodeException('Expected a unicode string!') word = unidecode(word).upper() word = re.sub(r'[^A-Z]', r'', word) first_letter = word[0] tail = ''.join(self.translations[char] for char in word if self.translations[char] != 'D') # Dropping first code's letter if duplicate if tail[0] == self.translations[first_letter]: tail = tail[1:] code = squeeze(tail).replace('0', '') return self.pad(first_letter + code) PK!H;@QPpyphonetics-0.4.dist-info/WHEEL1 0 RZq+D-Dv;_[*7Fp ܦpv/fݞoL(*IPK!HjaS."pyphonetics-0.4.dist-info/METADATAUێ6}W ^y/EF؎uKKcYK\;eYN/A"s89sf4kt^]Mq\a vo[1ʳ|]ȞRKrwCA#k~5 ,oU-'F6nʨLaP{_]`H] [_B(ܤuVG^%<߱ג{/ӣkakB(Kx_PK)a4+%t3H$x3}^+da+!23Q?𡄨E[ÖCTH5/pG%RhZ$f8 ($NGkU"*"grXVh:2~7f(t>Cզ+kj3jNH ^ }o aأK:h4{uz[Je35_HB|XZ>sGxv#ML{O`OΨ!2Dp[[;"i4槻K1GZ@#pker0@'!M}.rC|#}d-|h.1|b'.M.gdz#z1/v7cHq Du?m .IOJR¤:{؁Cp{%"_]]K?:YApv.1A*O8GXi5:J_>E=e^ݼ:X{k:k w =l6MeN|gKgOEj"-\NB_X̖?_PK!Hy# pyphonetics-0.4.dist-info/RECORD}K8-ppq1 QuCq w%A_?=SUvػdM]bD 4~wCً3Eq9ۅzXڍ`feD0)*K;~ ؒow{w;'hs#pP^leʇ"-9>;n$$!LjKiBj֣NyҎ8AXAjgz/ND+$\٣k 6#Ax iW ߼%{8ͻQ1ٲ E һ6>+ם]\6R5X,q #}z%$GIv> :oW^oЯSY;4o^2 KD9\"q4Hʚ ='$ٖkz@iQ?GA Q񨅩ٖ-zkY4Ňop;l풜pP[SþasYs%ES)\m[, N?_@Y6'2WPg;@ &c` U A(UM=_p7k1e) qn@FvSӳF6D1AvPK qI~֐pyphonetics/__init__.pyPK qIuPENpyphonetics/exceptions.pyPK qI_%pyphonetics/utils.pyPK qISF22(pyphonetics/distance_metrics/__init__.pyPK qIh)  'pyphonetics/distance_metrics/hamming.pyPK qIQ)+pyphonetics/distance_metrics/levenshtein.pyPK qII|I!6 pyphonetics/phonetics/__init__.pyPK qI\ܣ= = & pyphonetics/phonetics/fuzzy_soundex.pyPK qI$ōpyphonetics/phonetics/lein.pyPK qI"Y''"Qpyphonetics/phonetics/metaphone.pyPK qI:#pyphonetics/phonetics/mra.pyPK qIUW?ww+(pyphonetics/phonetics/phonetic_algorithm.pyPK qI[(-pyphonetics/phonetics/refined_soundex.pyPK qIk۹ 1pyphonetics/phonetics/soundex.pyPK!H;@QP6pyphonetics-0.4.dist-info/WHEELPK!HjaS."&7pyphonetics-0.4.dist-info/METADATAPK!Hy# :pyphonetics-0.4.dist-info/RECORDPKB>