Source code for munin.distance.levenshtein

#!/usr/bin/env python
# encoding: utf-8

"""
Overview
--------

Distance Function that is able to compare two strings using the
Damerau-Levenshtein-Distance.

For computation the ``pyxdameraulevenshtein`` module is used, which is
implemented in Cython.

Reference
---------
"""

# Internal:
from munin.distance import DistanceFunction

# External:
from pyxdameraulevenshtein import normalized_damerau_levenshtein_distance


[docs]class LevenshteinDistance(DistanceFunction): """Compute the damerau-levenshtein distance of two words. **Takes:** two lists of length 1. """ def do_compute(self, lefts, rights): lev = normalized_damerau_levenshtein_distance dist_sum = 0 smaller, larger = sorted((lefts, rights), key=len) for word_a in larger: dist_sum += min(lev(word_b, word_a) for word_b in smaller) return dist_sum / len(larger)
if __name__ == '__main__': import unittest class TestLevenshteinDistance(unittest.TestCase): def test_cmp(self): dfunc = LevenshteinDistance() for i in range(1000000): dfunc.do_compute(('berta', ), ('berte', )) unittest.main()

Related Topics

Useful links:

Package:

Github: