aboutsummaryrefslogtreecommitdiff
path: root/core/strings/strings.odin
diff options
context:
space:
mode:
authorhikari <ftphikari@gmail.com>2022-04-21 20:49:32 +0300
committerhikari <ftphikari@gmail.com>2022-04-21 20:49:32 +0300
commiteee97f7f62bbd65dd03ea3ec8668fef3fcfc685c (patch)
tree1c4c8538a52376e2399b389ffb673ded2b10e950 /core/strings/strings.odin
parent3dd9da1b668324f55eb0fbdda2b96e1ea1bd1a4d (diff)
strings: add levenshtein_distance procedure
Diffstat (limited to 'core/strings/strings.odin')
-rw-r--r--core/strings/strings.odin59
1 files changed, 59 insertions, 0 deletions
diff --git a/core/strings/strings.odin b/core/strings/strings.odin
index 8e774b367..87bbb42cf 100644
--- a/core/strings/strings.odin
+++ b/core/strings/strings.odin
@@ -1809,3 +1809,62 @@ fields_iterator :: proc(s: ^string) -> (field: string, ok: bool) {
s^ = s[len(s):]
return
}
+
+// `levenshtein_distance` returns the Levenshtein edit distance between 2 strings.
+// This is a single-row-version of the Wagner–Fischer algorithm, based on C code by Martin Ettl.
+// Note: allocator isn't used if the length of string b in runes is smaller than 70.
+levenshtein_distance :: proc(a, b: string, allocator := context.allocator) -> int {
+ LEVENSHTEIN_DEFAULT_COSTS: []int : {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
+ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+ }
+
+ m, n := utf8.rune_count_in_string(a), utf8.rune_count_in_string(b)
+
+ if m == 0 do return n
+ if n == 0 do return m
+
+ costs: []int
+
+ if n + 1 > len(LEVENSHTEIN_DEFAULT_COSTS) {
+ costs = make([]int, n + 1, allocator)
+ } else {
+ costs = LEVENSHTEIN_DEFAULT_COSTS
+ }
+
+ defer if n + 1 > len(LEVENSHTEIN_DEFAULT_COSTS) {
+ delete(costs, allocator)
+ }
+
+ for k in 0..=n {
+ costs[k] = k
+ }
+
+ i: int
+ for c1 in a {
+ costs[0] = i + 1
+ corner := i
+ j: int
+ for c2 in b {
+ upper := costs[j + 1]
+ if c1 == c2 {
+ costs[j + 1] = corner
+ } else {
+ t := upper if upper < corner else corner
+ costs[j + 1] = (costs[j] if costs[j] < t else t) + 1
+ }
+
+ corner = upper
+ j += 1
+ }
+
+ i += 1
+ }
+
+ return costs[n]
+}