How to use the textdistance.libraries.LibraryBase function in textdistance

To help you get started, we’ve selected a few textdistance examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github life4 / textdistance / textdistance / libraries.py View on Github external
return False
        return True


class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
    pass


prototype = LibrariesManager()

prototype.register('DamerauLevenshtein', LibraryBase('abydos.distance', 'damerau_levenshtein'))
prototype.register('DamerauLevenshtein', LibraryBase('pylev', 'damerau_levenshtein'))
prototype.register('DamerauLevenshtein', LibraryBase('pyxdameraulevenshtein', 'damerau_levenshtein_distance'))
prototype.register('DamerauLevenshtein', TextLibrary('jellyfish', 'damerau_levenshtein_distance'))

prototype.register('Hamming', LibraryBase('abydos.distance', 'hamming'))
prototype.register('Hamming', SameLengthLibrary('distance', 'hamming'))
prototype.register('Hamming', SameLengthTextLibrary('Levenshtein', 'hamming'))
prototype.register('Hamming', TextLibrary('jellyfish', 'hamming_distance'))

prototype.register('Jaro', TextLibrary('jellyfish', 'jaro_distance'))
prototype.register('Jaro', TextLibrary('Levenshtein', 'jaro'))
prototype.register('Jaro', TextLibrary('py_stringmatching.similarity_measure.jaro', 'jaro'))

# libraries.register('JaroWinkler', LibraryBase('py_stringmatching.similarity_measure.jaro_winkler', 'jaro_winkler'))
prototype.register('JaroWinkler', TextLibrary('jellyfish', 'jaro_winkler', conditions=dict(winklerize=True)))
# https://github.com/life4/textdistance/issues/39
# prototype.register('JaroWinkler', TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))

prototype.register('Levenshtein', LibraryBase('abydos.distance', 'levenshtein'))
prototype.register('Levenshtein', LibraryBase('distance', 'levenshtein'))
prototype.register('Levenshtein', LibraryBase('pylev', 'levenshtein'))
github life4 / textdistance / textdistance / libraries.py View on Github external
prototype.register('Hamming', LibraryBase('abydos.distance', 'hamming'))
prototype.register('Hamming', SameLengthLibrary('distance', 'hamming'))
prototype.register('Hamming', SameLengthTextLibrary('Levenshtein', 'hamming'))
prototype.register('Hamming', TextLibrary('jellyfish', 'hamming_distance'))

prototype.register('Jaro', TextLibrary('jellyfish', 'jaro_distance'))
prototype.register('Jaro', TextLibrary('Levenshtein', 'jaro'))
prototype.register('Jaro', TextLibrary('py_stringmatching.similarity_measure.jaro', 'jaro'))

# libraries.register('JaroWinkler', LibraryBase('py_stringmatching.similarity_measure.jaro_winkler', 'jaro_winkler'))
prototype.register('JaroWinkler', TextLibrary('jellyfish', 'jaro_winkler', conditions=dict(winklerize=True)))
# https://github.com/life4/textdistance/issues/39
# prototype.register('JaroWinkler', TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))

prototype.register('Levenshtein', LibraryBase('abydos.distance', 'levenshtein'))
prototype.register('Levenshtein', LibraryBase('distance', 'levenshtein'))
prototype.register('Levenshtein', LibraryBase('pylev', 'levenshtein'))
prototype.register('Levenshtein', TextLibrary('jellyfish', 'levenshtein_distance'))
prototype.register('Levenshtein', TextLibrary('Levenshtein', 'distance'))
prototype.register('Levenshtein', TextLibrary('py_stringmatching.similarity_measure.levenshtein', 'levenshtein'))
github life4 / textdistance / textdistance / libraries.py View on Github external
prototype.register('Hamming', SameLengthLibrary('distance', 'hamming'))
prototype.register('Hamming', SameLengthTextLibrary('Levenshtein', 'hamming'))
prototype.register('Hamming', TextLibrary('jellyfish', 'hamming_distance'))

prototype.register('Jaro', TextLibrary('jellyfish', 'jaro_distance'))
prototype.register('Jaro', TextLibrary('Levenshtein', 'jaro'))
prototype.register('Jaro', TextLibrary('py_stringmatching.similarity_measure.jaro', 'jaro'))

# libraries.register('JaroWinkler', LibraryBase('py_stringmatching.similarity_measure.jaro_winkler', 'jaro_winkler'))
prototype.register('JaroWinkler', TextLibrary('jellyfish', 'jaro_winkler', conditions=dict(winklerize=True)))
# https://github.com/life4/textdistance/issues/39
# prototype.register('JaroWinkler', TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))

prototype.register('Levenshtein', LibraryBase('abydos.distance', 'levenshtein'))
prototype.register('Levenshtein', LibraryBase('distance', 'levenshtein'))
prototype.register('Levenshtein', LibraryBase('pylev', 'levenshtein'))
prototype.register('Levenshtein', TextLibrary('jellyfish', 'levenshtein_distance'))
prototype.register('Levenshtein', TextLibrary('Levenshtein', 'distance'))
prototype.register('Levenshtein', TextLibrary('py_stringmatching.similarity_measure.levenshtein', 'levenshtein'))
github life4 / textdistance / textdistance / libraries.py View on Github external
def prepare(self, *sequences):
        # convert list of letters to string
        if isinstance(sequences[0], (tuple, list)):
            sequences = list(map(lambda x: u''.join(x), sequences))

        # convert to unicode for python2
        try:
            sequences = list(map(unicode, sequences))
        except NameError:
            pass

        return sequences


class SameLengthLibrary(LibraryBase):
    def check_conditions(self, obj, *sequences):
        if not super(SameLengthLibrary, self).check_conditions(obj, *sequences):
            return False
        # compare only same length iterators
        if min(map(len, sequences)) != max(map(len, sequences)):
            return False
        return True


class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
    pass


prototype = LibrariesManager()

prototype.register('DamerauLevenshtein', LibraryBase('abydos.distance', 'damerau_levenshtein'))
github life4 / textdistance / textdistance / libraries.py View on Github external
return False
        # compare only same length iterators
        if min(map(len, sequences)) != max(map(len, sequences)):
            return False
        return True


class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
    pass


prototype = LibrariesManager()

prototype.register('DamerauLevenshtein', LibraryBase('abydos.distance', 'damerau_levenshtein'))
prototype.register('DamerauLevenshtein', LibraryBase('pylev', 'damerau_levenshtein'))
prototype.register('DamerauLevenshtein', LibraryBase('pyxdameraulevenshtein', 'damerau_levenshtein_distance'))
prototype.register('DamerauLevenshtein', TextLibrary('jellyfish', 'damerau_levenshtein_distance'))

prototype.register('Hamming', LibraryBase('abydos.distance', 'hamming'))
prototype.register('Hamming', SameLengthLibrary('distance', 'hamming'))
prototype.register('Hamming', SameLengthTextLibrary('Levenshtein', 'hamming'))
prototype.register('Hamming', TextLibrary('jellyfish', 'hamming_distance'))

prototype.register('Jaro', TextLibrary('jellyfish', 'jaro_distance'))
prototype.register('Jaro', TextLibrary('Levenshtein', 'jaro'))
prototype.register('Jaro', TextLibrary('py_stringmatching.similarity_measure.jaro', 'jaro'))

# libraries.register('JaroWinkler', LibraryBase('py_stringmatching.similarity_measure.jaro_winkler', 'jaro_winkler'))
prototype.register('JaroWinkler', TextLibrary('jellyfish', 'jaro_winkler', conditions=dict(winklerize=True)))
# https://github.com/life4/textdistance/issues/39
# prototype.register('JaroWinkler', TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))
github life4 / textdistance / textdistance / libraries.py View on Github external
prototype.register('Hamming', LibraryBase('abydos.distance', 'hamming'))
prototype.register('Hamming', SameLengthLibrary('distance', 'hamming'))
prototype.register('Hamming', SameLengthTextLibrary('Levenshtein', 'hamming'))
prototype.register('Hamming', TextLibrary('jellyfish', 'hamming_distance'))

prototype.register('Jaro', TextLibrary('jellyfish', 'jaro_distance'))
prototype.register('Jaro', TextLibrary('Levenshtein', 'jaro'))
prototype.register('Jaro', TextLibrary('py_stringmatching.similarity_measure.jaro', 'jaro'))

# libraries.register('JaroWinkler', LibraryBase('py_stringmatching.similarity_measure.jaro_winkler', 'jaro_winkler'))
prototype.register('JaroWinkler', TextLibrary('jellyfish', 'jaro_winkler', conditions=dict(winklerize=True)))
# https://github.com/life4/textdistance/issues/39
# prototype.register('JaroWinkler', TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))

prototype.register('Levenshtein', LibraryBase('abydos.distance', 'levenshtein'))
prototype.register('Levenshtein', LibraryBase('distance', 'levenshtein'))
prototype.register('Levenshtein', LibraryBase('pylev', 'levenshtein'))
prototype.register('Levenshtein', TextLibrary('jellyfish', 'levenshtein_distance'))
prototype.register('Levenshtein', TextLibrary('Levenshtein', 'distance'))
prototype.register('Levenshtein', TextLibrary('py_stringmatching.similarity_measure.levenshtein', 'levenshtein'))
github life4 / textdistance / textdistance / libraries.py View on Github external
if not super(SameLengthLibrary, self).check_conditions(obj, *sequences):
            return False
        # compare only same length iterators
        if min(map(len, sequences)) != max(map(len, sequences)):
            return False
        return True


class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
    pass


prototype = LibrariesManager()

prototype.register('DamerauLevenshtein', LibraryBase('abydos.distance', 'damerau_levenshtein'))
prototype.register('DamerauLevenshtein', LibraryBase('pylev', 'damerau_levenshtein'))
prototype.register('DamerauLevenshtein', LibraryBase('pyxdameraulevenshtein', 'damerau_levenshtein_distance'))
prototype.register('DamerauLevenshtein', TextLibrary('jellyfish', 'damerau_levenshtein_distance'))

prototype.register('Hamming', LibraryBase('abydos.distance', 'hamming'))
prototype.register('Hamming', SameLengthLibrary('distance', 'hamming'))
prototype.register('Hamming', SameLengthTextLibrary('Levenshtein', 'hamming'))
prototype.register('Hamming', TextLibrary('jellyfish', 'hamming_distance'))

prototype.register('Jaro', TextLibrary('jellyfish', 'jaro_distance'))
prototype.register('Jaro', TextLibrary('Levenshtein', 'jaro'))
prototype.register('Jaro', TextLibrary('py_stringmatching.similarity_measure.jaro', 'jaro'))

# libraries.register('JaroWinkler', LibraryBase('py_stringmatching.similarity_measure.jaro_winkler', 'jaro_winkler'))
prototype.register('JaroWinkler', TextLibrary('jellyfish', 'jaro_winkler', conditions=dict(winklerize=True)))
# https://github.com/life4/textdistance/issues/39
# prototype.register('JaroWinkler', TextLibrary('Levenshtein', 'jaro_winkler', conditions=dict(winklerize=True)))
github life4 / textdistance / textdistance / libraries.py View on Github external
def check_conditions(self, obj, *sequences):
        if not super(SameLengthLibrary, self).check_conditions(obj, *sequences):
            return False
        # compare only same length iterators
        if min(map(len, sequences)) != max(map(len, sequences)):
            return False
        return True


class SameLengthTextLibrary(SameLengthLibrary, TextLibrary):
    pass


prototype = LibrariesManager()

prototype.register('DamerauLevenshtein', LibraryBase('abydos.distance', 'damerau_levenshtein'))
prototype.register('DamerauLevenshtein', LibraryBase('pylev', 'damerau_levenshtein'))
prototype.register('DamerauLevenshtein', LibraryBase('pyxdameraulevenshtein', 'damerau_levenshtein_distance'))
prototype.register('DamerauLevenshtein', TextLibrary('jellyfish', 'damerau_levenshtein_distance'))

prototype.register('Hamming', LibraryBase('abydos.distance', 'hamming'))
prototype.register('Hamming', SameLengthLibrary('distance', 'hamming'))
prototype.register('Hamming', SameLengthTextLibrary('Levenshtein', 'hamming'))
prototype.register('Hamming', TextLibrary('jellyfish', 'hamming_distance'))

prototype.register('Jaro', TextLibrary('jellyfish', 'jaro_distance'))
prototype.register('Jaro', TextLibrary('Levenshtein', 'jaro'))
prototype.register('Jaro', TextLibrary('py_stringmatching.similarity_measure.jaro', 'jaro'))

# libraries.register('JaroWinkler', LibraryBase('py_stringmatching.similarity_measure.jaro_winkler', 'jaro_winkler'))
prototype.register('JaroWinkler', TextLibrary('jellyfish', 'jaro_winkler', conditions=dict(winklerize=True)))
# https://github.com/life4/textdistance/issues/39
github life4 / textdistance / textdistance / libraries.py View on Github external
return

            # get object from module
            obj = getattr(module, self.func_name)
            # init class
            if self.presets is not None:
                obj = obj(**self.presets)
            # get needed attribute
            if self.attr:
                obj = getattr(obj, self.attr)
            self.func = obj

        return self.func


class TextLibrary(LibraryBase):
    def check_conditions(self, obj, *sequences):
        if not super(TextLibrary, self).check_conditions(obj, *sequences):
            return False
        # compare only by letters
        if getattr(obj, 'qval', 0) != 1:
            return False
        return True

    def prepare(self, *sequences):
        # convert list of letters to string
        if isinstance(sequences[0], (tuple, list)):
            sequences = list(map(lambda x: u''.join(x), sequences))

        # convert to unicode for python2
        try:
            sequences = list(map(unicode, sequences))