Levenshtein distance/Alignment
You are encouraged to solve this task according to the task description, using any language you may know.
The Levenshtein distance algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order.
An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is:
P-LACE PALACE
- Task
Write a function that shows the alignment of two strings for the corresponding levenshtein distance.
As an example, use the words "rosettacode" and "raisethysword".
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language).
11l
F levenshteinAlign(aa, bb)
V a = aa.lowercase()
V b = bb.lowercase()
V costs = [[0] * (b.len + 1)] * (a.len + 1)
L(j) 0 .. b.len
costs[0][j] = j
L(i) 1 .. a.len
costs[i][0] = i
L(j) 1 .. b.len
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)
V aPathRev = ‘’
V bPathRev = ‘’
V i = a.len
V j = b.len
L i != 0 & j != 0
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
I costs[i][j] == tmp
aPathRev ‘’= a[--i]
bPathRev ‘’= b[--j]
E I costs[i][j] == 1 + costs[i - 1][j]
aPathRev ‘’= a[--i]
bPathRev ‘’= ‘-’
E I costs[i][j] == 1 + costs[i][j - 1]
aPathRev ‘’= ‘-’
bPathRev ‘’= b[--j]
E
assert(0B, ‘Internal error’)
R (reversed(aPathRev), reversed(bPathRev))
V result = levenshteinAlign(‘place’, ‘palace’)
print(result[0])
print(result[1])
print()
result = levenshteinAlign(‘rosettacode’, ‘raisethysword’)
print(result[0])
print(result[1])
- Output:
p-lace palace r-oset-tacode raisethysword
Arturo
print join.with:"\n" levenshtein.align "place" "palace"
print join.with:"\n" levenshtein.align "rosettacode" "raisethysword"
- Output:
p-lace palace r-oset-tacode raisethysword
C
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct edit_s edit_t, *edit;
struct edit_s {
char c1, c2;
int n;
edit next;
};
void leven(char *a, char *b)
{
int i, j, la = strlen(a), lb = strlen(b);
edit *tbl = malloc(sizeof(edit) * (1 + la));
tbl[0] = calloc((1 + la) * (1 + lb), sizeof(edit_t));
for (i = 1; i <= la; i++)
tbl[i] = tbl[i-1] + (1+lb);
for (i = la; i >= 0; i--) {
char *aa = a + i;
for (j = lb; j >= 0; j--) {
char *bb = b + j;
if (!*aa && !*bb) continue;
edit e = &tbl[i][j];
edit repl = &tbl[i+1][j+1];
edit dela = &tbl[i+1][j];
edit delb = &tbl[i][j+1];
e->c1 = *aa;
e->c2 = *bb;
if (!*aa) {
e->next = delb;
e->n = e->next->n + 1;
continue;
}
if (!*bb) {
e->next = dela;
e->n = e->next->n + 1;
continue;
}
e->next = repl;
if (*aa == *bb) {
e->n = e->next->n;
continue;
}
if (e->next->n > delb->n) {
e->next = delb;
e->c1 = 0;
}
if (e->next->n > dela->n) {
e->next = dela;
e->c1 = *aa;
e->c2 = 0;
}
e->n = e->next->n + 1;
}
}
edit p = tbl[0];
printf("%s -> %s: %d edits\n", a, b, p->n);
while (p->next) {
if (p->c1 == p->c2)
printf("%c", p->c1);
else {
putchar('(');
if (p->c1) putchar(p->c1);
putchar(',');
if (p->c2) putchar(p->c2);
putchar(')');
}
p = p->next;
}
putchar('\n');
free(tbl[0]);
free(tbl);
}
int main(void)
{
leven("raisethysword", "rosettacode");
return 0;
}
- Output:
raisethysword -> rosettacode: 8 edits r(a,o)(i,)set(h,t)(y,a)(s,c)(w,)o(r,d)(d,e)
C#
using System;
using System.Text;
public class LevenshteinAlignment
{
public static string[] Alignment(string a, string b)
{
a = a.ToLower();
b = b.ToLower();
int[,] costs = new int[a.Length + 1, b.Length + 1];
for (int j = 0; j <= b.Length; j++)
costs[0, j] = j;
for (int i = 1; i <= a.Length; i++)
{
costs[i, 0] = i;
for (int j = 1; j <= b.Length; j++)
{
costs[i, j] = Math.Min(1 + Math.Min(costs[i - 1, j], costs[i, j - 1]),
a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1);
}
}
StringBuilder aPathRev = new StringBuilder();
StringBuilder bPathRev = new StringBuilder();
for (int i = a.Length, j = b.Length; i != 0 && j != 0;)
{
if (costs[i, j] == (a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1))
{
aPathRev.Append(a[--i]);
bPathRev.Append(b[--j]);
}
else if (costs[i, j] == 1 + costs[i - 1, j])
{
aPathRev.Append(a[--i]);
bPathRev.Append('-');
}
else if (costs[i, j] == 1 + costs[i, j - 1])
{
aPathRev.Append('-');
bPathRev.Append(b[--j]);
}
}
return new string[] { Reverse(aPathRev.ToString()), Reverse(bPathRev.ToString()) };
}
private static string Reverse(string s)
{
char[] charArray = s.ToCharArray();
Array.Reverse(charArray);
return new string(charArray);
}
public static void Main(string[] args)
{
string[] result = Alignment("rosettacode", "raisethysword");
Console.WriteLine(result[0]);
Console.WriteLine(result[1]);
}
}
- Output:
r-oset-tacode raisethysword
C++
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>
std::string to_lower_case(const std::string& text) {
std::string result = text;
std::transform(result.begin(), result.end(), result.begin(),
[](char ch){ return std::tolower(ch); });
return result;
}
std::vector<std::string> levenshtein_alignment(std::string a, std::string b) {
a = to_lower_case(a);
b = to_lower_case(b);
std::vector<std::vector<int32_t>> costs{ a.length() + 1, std::vector<int32_t>( b.length() + 1, 0 ) };
for ( uint64_t j = 0; j <= b.length(); ++j )
costs[0][j] = j;
for ( uint64_t i = 1; i <= a.length(); ++i ) {
costs[i][0] = i;
for ( uint64_t j = 1; j <= b.length(); ++j ) {
costs[i][j] = std::min(std::min( costs[i - 1][j], costs[i][j - 1]) + 1,
a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1);
}
}
std::string a_path_reversed, b_path_reversed;
uint64_t i = a.length(), j = b.length();
while ( i != 0 && j != 0 ) {
if ( costs[i][j] == ( a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1 ) ) {
a_path_reversed += a[--i];
b_path_reversed += b[--j];
} else if ( costs[i][j] == costs[i - 1][j] + 1 ) {
a_path_reversed += a[--i];
b_path_reversed += "-";
} else if ( costs[i][j] == costs[i][j - 1] + 1 ) {
a_path_reversed += "-";
b_path_reversed += b[--j];
}
}
std::reverse(a_path_reversed.begin(), a_path_reversed.end());
std::reverse(b_path_reversed.begin(), b_path_reversed.end());
return std::vector<std::string>{ a_path_reversed, b_path_reversed };
}
int main() {
std::vector<std::string> result = levenshtein_alignment("place", "palace");
std::cout << result[0] << std::endl;
std::cout << result[1] << std::endl;
std::cout << std::endl;
result = levenshtein_alignment("rosettacode", "raisethysword");
std::cout << result[0] << std::endl;
std::cout << result[1] << std::endl;
}
- Output:
p-lace palace r-oset-tacode raisethysword
D
Using the standard library.
void main() {
import std.stdio, std.algorithm;
immutable s1 = "rosettacode";
immutable s2 = "raisethysword";
string s1b, s2b;
size_t pos1, pos2;
foreach (immutable c; levenshteinDistanceAndPath(s1, s2)[1]) {
final switch (c) with (EditOp) {
case none, substitute:
s1b ~= s1[pos1++];
s2b ~= s2[pos2++];
break;
case insert:
s1b ~= "_";
s2b ~= s2[pos2++];
break;
case remove:
s1b ~= s1[pos1++];
s2b ~= "_";
break;
}
}
writeln(s1b, "\n", s2b);
}
- Output:
r_oset_tacode raisethysword
EasyLang
global dparr[] dpcols .
proc dpnew a b . .
len dparr[] a * b
dpcols = b
.
func dp r c .
return dparr[r * dpcols + c + 1]
.
proc dps r c v . .
dparr[r * dpcols + c + 1] = v
.
proc align a$ b$ . ar$ br$ .
dpnew (len a$ + 1) (len b$ + 1)
for i = 1 to len a$
dps i 0 i
.
for j = 0 to len b$
dps 0 j j
.
for i = 1 to len a$
for j = 1 to len b$
if substr a$ i 1 = substr b$ j 1
dps i j dp (i - 1) (j - 1)
else
dps i j lower (lower dp (i - 1) j dp i (j - 1)) dp (i - 1) (j - 1) + 1
.
.
.
ar$ = "" ; br$ = ""
i = len a$ ; j = len b$
while i <> 0 and j <> 0
if substr a$ i 1 = substr b$ j 1 or dp i j = dp (i - 1) (j - 1) + 1
ar$ = substr a$ i 1 & ar$
i -= 1
br$ = substr b$ j 1 & br$
j -= 1
elif dp i j = dp (i - 1) j + 1
ar$ = substr a$ i 1 & ar$
i -= 1
br$ = "-" & br$
else
ar$ = "-" & ar$
br$ = substr b$ j 1 & br$
j -= 1
.
.
.
align "rosettacode" "raisethysword" ar$ br$
print ar$ ; print br$
- Output:
r-oset-tacode raisethysword
Eiffel
distance (source, target: STRING): INTEGER
-- Minimum number of operations to turn `source' into `target'.
local
l_distance: ARRAY2 [INTEGER]
del, ins, subst: INTEGER
do
create l_distance.make (source.count, target.count)
⟳ ic:(1 |..| source.count) ¦ l_distance [ic, 1] := ic - 1 ⟲
⟳ ij:(1 |..| target.count) ¦ l_distance [1, ij] := ij - 1 ⟲
⟳ ic:(2 |..| source.count) ¦
⟳ jc:(2 |..| target.count) ¦
if source [ic] = target [jc] then -- same char
l_distance [ic, jc] := l_distance [ic - 1, jc - 1]
else -- diff char
del := l_distance [ic - 1, jc] -- delete?
ins := l_distance [ic, jc - 1] -- insert?
subst := l_distance [ic - 1, jc -1] -- substitute/swap?
l_distance [ic, jc] := del.min (ins.min (subst)) + 1
end
⟲
⟲
Result:= l_distance [source.count, target.count]
end
- Output:
Result = 8
The ⟳ ¦ ⟲ represents the "symbolic form" of an Eiffel across loop. In the case above, the first two ⟳ (loops) initialize the first column and the first row, and then two nested ⟳ (loops) to walk the distance computation.
Also—the `ic' is this programmers shorthand for ITERATION_CURSOR. Therefore, in the nested loops, the `ic' is the iteration cursor following `i' and the `ij' is following `j' as indexes on the source, target, and l_distance.
FreeBASIC
#define min(a, b) iif((a) < (b), (a), (b))
Sub LevenshteinAlignment(s1 As String, s2 As String)
Dim As String align1 = "", align2 = ""
Dim As Integer i, j, len1, len2
len1 = Len(s1):
len2 = Len(s2)
Dim As Integer dp(len1+1, len2+1)
For i = 0 To len1
dp(i, 0) = i
Next i
For j = 0 To len2
dp(0, j) = j
Next j
For i = 1 To len1
For j = 1 To len2
If Mid(s1, i, 1) = Mid(s2, j, 1) Then
dp(i, j) = dp(i-1, j-1)
Else
dp(i, j) = min(min(dp(i-1, j), dp(i, j-1)), dp(i-1, j-1)) + 1
End If
Next j
Next i
i = len1: j = len2
While i > 0 And j > 0
If Mid(s1, i, 1) = Mid(s2, j, 1) Then
align1 = Mid(s1, i, 1) + align1
align2 = Mid(s2, j, 1) + align2
i -= 1: j -= 1
Elseif dp(i, j) = dp(i-1, j-1) + 1 Then
align1 = Mid(s1, i, 1) + align1
align2 = Mid(s2, j, 1) + align2
i -= 1: j -= 1
Elseif dp(i, j) = dp(i-1, j) + 1 Then
align1 = Mid(s1, i, 1) + align1
align2 = "-" + align2
i -= 1
Else
align1 = "-" + align1
align2 = Mid(s2, j, 1) + align2
j -= 1
End If
Wend
While i > 0
align1 = Mid(s1, i, 1) + align1
align2 = "-" + align2
i -= 1
Wend
While j > 0
align1 = "-" + align1
align2 = Mid(s2, j, 1) + align2
j -= 1
Wend
Print "Levenshtein Distance: "; dp(len1, len2)
Print align1
Print align2
Print
End Sub
LevenshteinAlignment("rosettacode", "raisethysword")
LevenshteinAlignment("place", "palace")
Sleep
- Output:
Levenshtein Distance: 8 r-oset-tacode raisethysword Levenshtein Distance: 1 p-lace palace
Go
Alignment computed by the Needleman-Wunch algorithm, which is used in bioinformatics.
package main
import (
"fmt"
"github.com/biogo/biogo/align"
ab "github.com/biogo/biogo/alphabet"
"github.com/biogo/biogo/feat"
"github.com/biogo/biogo/seq/linear"
)
func main() {
// Alphabets for things like DNA are predefined in biogo, but we
// define our own here.
lc := ab.Must(ab.NewAlphabet("-abcdefghijklmnopqrstuvwxyz",
feat.Undefined, '-', 0, true))
// Construct scoring matrix for Needleman-Wunch algorithm.
// We leave zeros on the diagonal for the Levenshtein distance of an
// exact match and put -1s everywhere else for the Levenshtein distance
// of an edit.
nw := make(align.NW, lc.Len())
for i := range nw {
r := make([]int, lc.Len())
nw[i] = r
for j := range r {
if j != i {
r[j] = -1
}
}
}
// define input sequences
a := &linear.Seq{Seq: ab.BytesToLetters([]byte("rosettacode"))}
a.Alpha = lc
b := &linear.Seq{Seq: ab.BytesToLetters([]byte("raisethysword"))}
b.Alpha = lc
// perform alignment
aln, err := nw.Align(a, b)
// format and display result
if err != nil {
fmt.Println(err)
return
}
fa := align.Format(a, b, aln, '-')
fmt.Printf("%s\n%s\n", fa[0], fa[1])
aa := fmt.Sprint(fa[0])
ba := fmt.Sprint(fa[1])
ma := make([]byte, len(aa))
for i := range ma {
if aa[i] == ba[i] {
ma[i] = ' '
} else {
ma[i] = '|'
}
}
fmt.Println(string(ma))
}
- Output:
The lines after the alignment point out the 8 edits.
r-oset-tacode raisethysword || |||| ||
Haskell
The Wagner–Fischer matrix construction is adopted from Levenshtein_distance#Haskell, however it is reversed in order to simplify it's traversing.
costs :: String -> String -> [[Int]]
costs s1 s2 = reverse $ reverse <$> matrix
where
matrix = scanl transform [0 .. length s1] s2
transform ns@(n:ns1) c = scanl calc (n + 1) $ zip3 s1 ns ns1
where
calc z (c1, x, y) = minimum [ y + 1, z + 1
, x + fromEnum (c1 /= c)]
levenshteinDistance :: String -> String -> Int
levenshteinDistance s1 s2 = head.head $ costs s1 s2
A sample alignment
The alignment is built in the same way as it is done in Levenshtein_distance/Alignment#Java. Instead of indices the Wagner–Fischer matrix and strings are traversed by list truncation, as zippers.
alignment :: String -> String -> (String, String)
alignment s1 s2 = go (costs s1 s2) (reverse s1) (reverse s2) ([],[])
where
go c _ _ r | isEmpty c = r
go _ [] s r = ('-' <$ s, reverse s) <> r
go _ s [] r = (reverse s, '-' <$ s) <> r
go c s1@(h1:t1) s2@(h2:t2) (r1, r2) =
let temp = (get.nextCol.nextRow $ c) + if h1 == h2 then 0 else 1
in case get c of
x | x == temp -> go (nextRow.nextCol $ c) t1 t2 (h1:r1, h2:r2)
| x == 1 + (get.nextCol $ c) -> go (nextCol c) s1 t2 ('-':r1, h2:r2)
| x == 1 + (get.nextRow $ c) -> go (nextRow c) t1 s2 (h1:r1, '-':r2)
-- Functions which treat table as zipper
get ((h:_):_) = h
nextRow = map tail
nextCol = tail
isEmpty c = null c || null (head c)
λ> alignment "palace" "place" ("palace","p-lace") λ> alignment "rosettacode" "raisethysword" ("r-oset-tacode","raisethysword") λ> alignment "rosettacode" "rat" ("rosettacode","r-----a---t")
All alignments
The alternative solution, which extensively produces all minimal alignments for given strings.
-- Produces all possible alignments for two strings.
allAlignments :: String -> String -> [[(Char, Char)]]
allAlignments s1 s2 = go (length s2 - length s1) s1 s2
where
go _ s [] = [(\x -> (x, '-')) <$> s]
go _ [] s = [(\x -> ('-' ,x)) <$> s]
go n s1@(h1:t1) s2@(h2:t2) = (h1, h2) <:> go n t1 t2
++ case compare n 0 of
LT -> (h1, '-') <:> go (n+1) t1 s2
EQ -> []
GT -> ('-', h2) <:> go (n-1) s1 t2
x <:> l = fmap (x :) l
-- Returns a lazy list of all optimal alignments.
levenshteinAlignments :: String -> String -> [(String, String)]
levenshteinAlignments s1 s2 = unzip <$> best
where
best = filter ((lev ==) . dist) $ allAlignments s1 s2
lev = levenshteinDistance s1 s2
dist = length . filter (uncurry (/=))
λ> mapM_ print $ levenshteinAlignments "rosettacode" "raisethysword" ("ro-settac-ode","raisethysword") ("ro-setta-code","raisethysword") ("ro-sett-acode","raisethysword") ("ro-set-tacode","raisethysword") ("r-osettac-ode","raisethysword") ("r-osetta-code","raisethysword") ("r-osett-acode","raisethysword") ("r-oset-tacode","raisethysword") λ> mapM_ print $ levenshteinAlignments "rosettacode" "rat" ("rosettacode","ra--t------") ("rosettacode","ra---t-----") ("rosettacode","r-a-t------") ("rosettacode","r-a--t-----") ("rosettacode","r--at------") ("rosettacode","r--a-t-----") ("rosettacode","r---at-----") ("rosettacode","r-----at---") ("rosettacode","r-----a-t--") ("rosettacode","r-----a--t-") ("rosettacode","r-----a---t")
J
Translation of java:
levalign=:4 :0
assert. x <:&# y
D=. x +/&i.&>:&# y
for_i. 1+i.#x do.
for_j. 1+i.#y do.
if. ((<:i){x)=(<:j){y do.
D=. (D {~<<:i,j) (<i,j)} D
else.
min=. 1+<./D{~(i,j) <@:-"1#:1 2 3
D=. min (<i,j)} D
end.
end.
end.
A=. B=. ''
ij=. x ,&# y
while. */ij do.
'd00 d01 d10 d11'=. D{~ ij <@:-"1#:i.4
'x1 y1'=. (ij-1){each x;y
if. d00 = d11+x1~:y1 do.
A=. A,x1 [ B=. B,y1 [ ij=. ij-1
elseif. d00 = 1+d10 do.
A=. A,x1 [ B=. B,'-'[ ij=. ij-1 0
elseif. d00 = 1+d01 do.
A=. A,'-'[ B=. B,y1 [ ij=. ij-0 1
end.
end.
A,:&|.B
)
Task examples:
'place' levalign 'palace'
p-lace
palace
'rosettacode' levalign 'raisethysword'
r-oset-tacode
raisethysword
Java
public class LevenshteinAlignment {
public static String[] alignment(String a, String b) {
a = a.toLowerCase();
b = b.toLowerCase();
// i == 0
int[][] costs = new int[a.length()+1][b.length()+1];
for (int j = 0; j <= b.length(); j++)
costs[0][j] = j;
for (int i = 1; i <= a.length(); i++) {
costs[i][0] = i;
for (int j = 1; j <= b.length(); j++) {
costs[i][j] = Math.min(1 + Math.min(costs[i-1][j], costs[i][j-1]), a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1);
}
}
// walk back through matrix to figure out path
StringBuilder aPathRev = new StringBuilder();
StringBuilder bPathRev = new StringBuilder();
for (int i = a.length(), j = b.length(); i != 0 && j != 0; ) {
if (costs[i][j] == (a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1)) {
aPathRev.append(a.charAt(--i));
bPathRev.append(b.charAt(--j));
} else if (costs[i][j] == 1 + costs[i-1][j]) {
aPathRev.append(a.charAt(--i));
bPathRev.append('-');
} else if (costs[i][j] == 1 + costs[i][j-1]) {
aPathRev.append('-');
bPathRev.append(b.charAt(--j));
}
}
return new String[]{aPathRev.reverse().toString(), bPathRev.reverse().toString()};
}
public static void main(String[] args) {
String[] result = alignment("rosettacode", "raisethysword");
System.out.println(result[0]);
System.out.println(result[1]);
}
}
- Output:
r-oset-tacode raisethysword
jq
Adapted from Wren
Works with jq, the C implementation of jq
Works with gojq, the Go implementation of jq
Works with jaq, the Rust implementation of jq
def reverseString: explode | reverse | implode;
def levenshteinAlign($x; $y):
def min($a;$b): [$a,$b]|min;
($x|length + 1) as $a1
| ($y|length + 1) as $b1
| ($x | ascii_downcase) as $a
| ($y | ascii_downcase) as $b
| [range(0; $b1) | 0] as $zeros
| {costs: [range(0; $a1) | $zeros]}
| reduce range(0; $b1) as $j (.; .costs[0][$j] = $j)
| reduce range(1; $a1) as $i (.;
.costs[$i][0] = $i
| reduce range(1; $b1) as $j (.;
(.costs[$i - 1][$j - 1] + (if $a[$i - 1: $i] == $b[$j - 1: $j] then 0 else 1 end)) as $temp
| .costs[$i][$j] = min( $temp; 1 + min(.costs[$i - 1][$j]; .costs[$i][$j - 1] )) ))
# walk back through matrix to figure out path
| .aPathRev = ""
| .bPathRev = ""
| .i = ($a|length)
| .j = ($b|length)
| until (.i == 0 or .j == 0;
(.costs[.i - 1][.j - 1] + (if $a[.i - 1: .i] == $b[.j - 1: .j] then 0 else 1 end)) as $temp
| .costs[.i][.j] as $cij
| if $cij == $temp
then .i += -1
| .aPathRev += $a[.i: .i+1]
| .j += -1
| .bPathRev += $b[.j: .j+1]
elif $cij == 1 + .costs[.i-1][.j]
then .i += -1
| .aPathRev += $a[.i:.i+1]
| .bPathRev += "-"
elif $cij == 1 + .costs[.i][.j-1]
then .aPathRev += "-"
| .j += -1
| .bPathRev += $b[.j: .j+1]
end)
| [.aPathRev, .bPathRev ] | map(reverseString);
levenshteinAlign("place"; "palace"),
levenshteinAlign("rosettacode";"raisethysword")
- Output:
[ "p-lace", "palace" ] [ "r-oset-tacode", "raisethysword" ]
Julia
function levenshteinalign(a::AbstractString, b::AbstractString)
a = lowercase(a)
b = lowercase(b)
len_a = length(a)
len_b = length(b)
costs = Matrix{Int}(len_a + 1, len_b + 1)
costs[1, :] .= 0:len_b
@inbounds for i in 2:(len_a + 1)
costs[i, 1] = i
for j in 2:(len_b + 1)
tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
costs[i, j] = min(1 + min(costs[i-1, j], costs[i, j-1]), tmp)
end
end
apathrev = IOBuffer()
bpathrev = IOBuffer()
local i = len_a + 1
local j = len_b + 1
@inbounds while i != 1 && j != 1
tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
if costs[i, j] == tmp
i -= 1
j -= 1
print(apathrev, a[i])
print(bpathrev, b[j])
elseif costs[i, j] == 1 + costs[i-1, j]
i -= 1
print(apathrev, a[i])
print(bpathrev, '-')
elseif costs[i, j] == 1 + costs[i, j-1]
j -= 1
print(apathrev, '-')
print(bpathrev, b[j])
end
end
return reverse(String(take!(apathrev))), reverse(String(take!(bpathrev)))
end
foreach(println, levenshteinalign("rosettacode", "raisethysword"))
foreach(println, levenshteinalign("place", "palace"))
- Output:
r-oset-tacode raisethysword p-lace palace
Kotlin
// version 1.1.3
fun levenshteinAlign(a: String, b: String): Array<String> {
val aa = a.toLowerCase()
val bb = b.toLowerCase()
val costs = Array(a.length + 1) { IntArray(b.length + 1) }
for (j in 0..b.length) costs[0][j] = j
for (i in 1..a.length) {
costs[i][0] = i
for (j in 1..b.length) {
val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1)
costs[i][j] = minOf(1 + minOf(costs[i - 1][j], costs[i][j - 1]), temp)
}
}
// walk back through matrix to figure out path
val aPathRev = StringBuilder()
val bPathRev = StringBuilder()
var i = a.length
var j = b.length
while (i != 0 && j != 0) {
val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1)
when (costs[i][j]) {
temp -> {
aPathRev.append(aa[--i])
bPathRev.append(bb[--j])
}
1 + costs[i-1][j] -> {
aPathRev.append(aa[--i])
bPathRev.append('-')
}
1 + costs[i][j-1] -> {
aPathRev.append('-')
bPathRev.append(bb[--j])
}
}
}
return arrayOf(aPathRev.reverse().toString(), bPathRev.reverse().toString())
}
fun main(args: Array<String>) {
var result = levenshteinAlign("place", "palace")
println(result[0])
println(result[1])
println()
result = levenshteinAlign("rosettacode","raisethysword")
println(result[0])
println(result[1])
}
- Output:
p-lace palace r-oset-tacode raisethysword
Mathematica / Wolfram Language
DamerauLevenshteinDistance["rosettacode", "raisethysword"]
- Output:
8
Nim
import algorithm, sequtils, strutils
proc levenshteinAlign(a, b: string): tuple[a, b: string] =
let a = a.toLower()
let b = b.toLower()
var costs = newSeqWith(a.len + 1, newSeq[int](b.len + 1))
for j in 0..b.len: costs[0][j] = j
for i in 1..a.len:
costs[i][0] = i
for j in 1..b.len:
let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)
# Walk back through matrix to figure out path.
var aPathRev, bPathRev: string
var i = a.len
var j = b.len
while i != 0 and j != 0:
let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
if costs[i][j] == tmp:
dec i
dec j
aPathRev.add a[i]
bPathRev.add b[j]
elif costs[i][j] == 1 + costs[i-1][j]:
dec i
aPathRev.add a[i]
bPathRev.add '-'
elif costs[i][j] == 1 + costs[i][j-1]:
dec j
aPathRev.add '-'
bPathRev.add b[j]
else:
quit "Internal error"
result = (reversed(aPathRev).join(), reversed(bPathRev).join())
when isMainModule:
var result = levenshteinAlign("place", "palace")
echo result.a
echo result.b
echo ""
result = levenshteinAlign("rosettacode","raisethysword")
echo result.a
echo result.b
- Output:
p-lace palace r-oset-tacode raisethysword
Perl
use strict;
use warnings;
use List::Util qw(min);
sub levenshtein_distance_alignment {
my @s = ('^', split //, shift);
my @t = ('^', split //, shift);
my @A;
@{$A[$_][0]}{qw(d s t)} = ($_, join('', @s[1 .. $_]), ('~' x $_)) for 0 .. $#s;
@{$A[0][$_]}{qw(d s t)} = ($_, ('-' x $_), join '', @t[1 .. $_]) for 0 .. $#t;
for my $i (1 .. $#s) {
for my $j (1 .. $#t) {
if ($s[$i] ne $t[$j]) {
$A[$i][$j]{d} = 1 + (
my $min = min $A[$i-1][$j]{d}, $A[$i][$j-1]{d}, $A[$i-1][$j-1]{d}
);
@{$A[$i][$j]}{qw(s t)} =
$A[$i-1][$j]{d} == $min ? ($A[$i-1][$j]{s}.$s[$i], $A[$i-1][$j]{t}.'-') :
$A[$i][$j-1]{d} == $min ? ($A[$i][$j-1]{s}.'-', $A[$i][$j-1]{t}.$t[$j]) :
($A[$i-1][$j-1]{s}.$s[$i], $A[$i-1][$j-1]{t}.$t[$j]);
}
else {
@{$A[$i][$j]}{qw(d s t)} = (
$A[$i-1][$j-1]{d},
$A[$i-1][$j-1]{s}.$s[$i],
$A[$i-1][$j-1]{t}.$t[$j]
);
}
}
}
return @{$A[-1][-1]}{'s', 't'};
}
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";
- Output:
ro-settac-o-de raisethysword-
Phix
plus the indicator from Go
function LevenshteinAlignment(string a, b) integer la = length(a)+1, lb = length(b)+1 sequence costs = repeat(repeat(0,lb),la) for j=1 to lb do costs[1][j] = j end for for i=2 to la do costs[i][1] = i for j=2 to lb do integer tmp1 = 1+min(costs[i-1][j], costs[i][j-1]), tmp2 = costs[i-1][j-1] + (a[i-1]!=b[j-1]) costs[i][j] = min(tmp1, tmp2) end for end for -- walk back through matrix to figure out the path string arev = "", brev = "" integer i = la, j = lb while i>1 and j>1 do integer tmp = costs[i-1][j-1] + (a[i-1]!=b[j-1]) switch costs[i][j] do case tmp: i -= 1; arev &= a[i] j -= 1; brev &= b[j] case 1 + costs[i-1][j]: i -= 1; arev &= a[i] brev &= '-' case 1 + costs[i][j-1]: arev &= '-' j -= 1; brev &= b[j] end switch end while return {reverse(arev),reverse(brev)} end function procedure test(string a,b) {a,b} = LevenshteinAlignment(a,b) string c = sq_add(repeat(' ',length(a)),sq_mul(sq_ne(a,b),'|'-' ')) printf(1,"%s\n%s\n%s\n\n",{a,b,c}) end procedure test("rosettacode", "raisethysword") test("place", "palace")
- Output:
r-oset-tacode raisethysword || |||| || p-lace palace |
Python
from difflib import ndiff
def levenshtein(str1, str2):
result = ""
pos, removed = 0, 0
for x in ndiff(str1, str2):
if pos<len(str1) and str1[pos] == x[2]:
pos += 1
result += x[2]
if x[0] == "-":
removed += 1
continue
else:
if removed > 0:
removed -=1
else:
result += "-"
print(result)
levenshtein("place","palace")
levenshtein("rosettacode","raisethysword")
- Output:
p-lace ro-settac-o-de
Racket
Simple version (no aligment)
First we will analyze this solution that only computes the distance. See http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html for a discussion of the code.
#lang racket
(define (memoize f)
(local ([define table (make-hash)])
(lambda args
(dict-ref! table args (λ () (apply f args))))))
(define levenshtein
(memoize
(lambda (s t)
(cond
[(and (empty? s) (empty? t)) 0]
[(empty? s) (length t)]
[(empty? t) (length s)]
[else
(if (equal? (first s) (first t))
(levenshtein (rest s) (rest t))
(min (add1 (levenshtein (rest s) t))
(add1 (levenshtein s (rest t)))
(add1 (levenshtein (rest s) (rest t)))))]))))
Demonstration:
(levenshtein (string->list "rosettacode")
(string->list "raisethysword"))
- Output:
8
Complete version
Now we extend the code from http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html to show also the alignment. The code is very similar, but it stores the partial results (number of edits and alignment of each substring) in a lev structure.
#lang racket
(struct lev (n s t))
(define (lev-add old n sx tx)
(lev (+ n (lev-n old))
(cons sx (lev-s old))
(cons tx (lev-t old))))
(define (list-repeat n v)
(build-list n (lambda (_) v)))
(define (memoize f)
(local ([define table (make-hash)])
(lambda args
(dict-ref! table args (λ () (apply f args))))))
(define levenshtein/list
(memoize
(lambda (s t)
(cond
[(and (empty? s) (empty? t))
(lev 0 '() '())]
[(empty? s)
(lev (length t) (list-repeat (length t) #\-) t)]
[(empty? t)
(lev (length s) s (list-repeat (length s) #\-))]
[else
(if (equal? (first s) (first t))
(lev-add (levenshtein/list (rest s) (rest t))
0 (first s) (first t))
(argmin lev-n (list (lev-add (levenshtein/list (rest s) t)
1 (first s) #\-)
(lev-add (levenshtein/list s (rest t))
1 #\- (first t))
(lev-add (levenshtein/list (rest s) (rest t))
1 (first s) (first t)))))]))))
(define (levenshtein s t)
(let ([result (levenshtein/list (string->list s)
(string->list t))])
(values (lev-n result)
(list->string (lev-s result))
(list->string (lev-t result)))))
Demonstration:
(let-values ([(dist exp-s exp-t)
(levenshtein "rosettacode" "raisethysword")])
(printf "levenshtein: ~a edits\n" dist)
(displayln exp-s)
(displayln exp-t))
- Output:
levenshtein: 8 edits r-oset-taco-de raisethysword-
Raku
(formerly Perl 6)
sub align ( Str $σ, Str $t ) {
my @s = flat *, $σ.comb;
my @t = flat *, $t.comb;
my @A;
@A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s;
@A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t;
for 1 ..^ @s X 1..^ @t -> (\i, \j) {
if @s[i] ne @t[j] {
@A[i][j]<d> = 1 + my $min =
min @A[i-1][j]<d>, @A[i][j-1]<d>, @A[i-1][j-1]<d>;
@A[i][j]<s t> =
@A[i-1][j]<d> == $min ?? (@A[i-1][j]<s t> Z~ @s[i], '-') !!
@A[i][j-1]<d> == $min ?? (@A[i][j-1]<s t> Z~ '-', @t[j]) !!
(@A[i-1][j-1]<s t> Z~ @s[i], @t[j]);
} else {
@A[i][j]<d s t> = @A[i-1][j-1]<d s t> Z~ '', @s[i], @t[j];
}
}
return @A[*-1][*-1]<s t>;
}
.say for align 'rosettacode', 'raisethysword';
- Output:
ro-settac-o-de raisethysword-
Ruby
uses "lcs" from here
require 'lcs'
def levenshtein_align(a, b)
apos, bpos = LCS.new(a, b).backtrack2
c = ""
d = ""
x0 = y0 = -1
dx = dy = 0
apos.zip(bpos) do |x,y|
diff = x + dx - y - dy
if diff < 0
dx -= diff
c += "-" * (-diff)
elsif diff > 0
dy += diff
d += "-" * diff
end
c += a[x0+1..x]
x0 = x
d += b[y0+1..y]
y0 = y
end
c += a[x0+1..-1]
d += b[y0+1..-1]
diff = a.length + y0 - b.length - x0
if diff < 0
c += "-" * (-diff)
elsif diff > 0
d += "-" * diff
end
[c, d]
end
puts levenshtein_align("rosettacode", "raisethysword")
- Output:
r-oset-taco-de raisethysword-
Rust
Cargo.toml
[dependencies] edit-distance = "^1.0.0"
src/main.rs
extern crate edit_distance;
edit_distance("rosettacode", "raisethysword");
Scala
- Output:
Best seen running in your browser either by ScalaFiddle (ES aka JavaScript, non JVM) or [1].
import scala.collection.mutable
import scala.collection.parallel.ParSeq
object LevenshteinAlignment extends App {
val vlad = new Levenshtein("rosettacode", "raisethysword")
val alignment = vlad.revLevenstein()
class Levenshtein(s1: String, s2: String) {
val memoizedCosts = mutable.Map[(Int, Int), Int]()
def revLevenstein(): (String, String) = {
def revLev: (Int, Int, String, String) => (String, String) = {
case (_, 0, revS1, revS2) => (revS1, revS2)
case (0, _, revS1, revS2) => (revS1, revS2)
case (i, j, revS1, revS2) =>
if (memoizedCosts(i, j) == (memoizedCosts(i - 1, j - 1)
+ (if (s1(i - 1) != s2(j - 1)) 1 else 0)))
revLev(i - 1, j - 1, s1(i - 1) + revS1, s2(j - 1) + revS2)
else if (memoizedCosts(i, j) == 1 + memoizedCosts(i - 1, j))
revLev(i - 1, j, s1(i - 1) + revS1, "-" + revS2)
else
revLev(i, j - 1, "-" + revS1, s2(j - 1) + revS2)
}
revLev(s1.length, s2.length, "", "")
}
private def levenshtein: Int = {
def lev: ((Int, Int)) => Int = {
case (k1, k2) =>
memoizedCosts.getOrElseUpdate((k1, k2), (k1, k2) match {
case (i, 0) => i
case (0, j) => j
case (i, j) =>
ParSeq(1 + lev((i - 1, j)),
1 + lev((i, j - 1)),
lev((i - 1, j - 1))
+ (if (s1(i - 1) != s2(j - 1)) 1 else 0)).min
})
}
lev((s1.length, s2.length))
}
levenshtein
}
println(alignment._1)
println(alignment._2)
}
Sidef
func align(s, t) {
s.chars!.prepend!('^')
t.chars!.prepend!('^')
var A = []
{|i| A[i][0]{@|<d s t>} = (i, s.slice(1).first(i).join, '~' * i) } << ^s
{|i| A[0][i]{@|<d s t>} = (i, '-' * i, t.slice(1).first(i).join) } << ^t
for i (1 .. s.end) {
for j (1 .. t.end) {
if (s[i] != t[j]) {
A[i][j]{:d} = 1+(
var min = Math.min(A[i-1][j]{:d}, A[i][j-1]{:d}, A[i-1][j-1]{:d})
)
A[i][j]{@|<s t>} = (A[i-1][j]{:d} == min
? [A[i-1][j]{:s}+s[i], A[i-1][j]{:t}+'-']
: (A[i][j-1]{:d} == min
? [A[i][j-1]{:s}+'-', A[i][j-1]{:t}+t[j]]
: [A[i-1][j-1]{:s}+s[i], A[i-1][j-1]{:t}+t[j]]))...
}
else {
A[i][j]{@|<d s t>} = (
A[i-1][j-1]{:d},
A[i-1][j-1]{:s}+s[i],
A[i-1][j-1]{:t}+t[j],
)
}
}
}
return [A[-1][-1]{@|<s t>}]
}
align("rosettacode", "raisethysword").each { .say }
- Output:
ro-settac-o-de raisethysword-
Tcl
package require struct::list
proc levenshtein/align {a b} {
lassign [struct::list longestCommonSubsequence [split $a ""] [split $b ""]]\
apos bpos
set c ""
set d ""
set x0 [set y0 -1]
set dx [set dy 0]
foreach x $apos y $bpos {
if {$x+$dx < $y+$dy} {
set n [expr {($y+$dy)-($x+$dx)}]
incr dx $n
append c [string repeat "-" $n]
} elseif {$x+$dx > $y+$dy} {
set n [expr {($x+$dx)-($y+$dy)}]
incr dy $n
append d [string repeat "-" $n]
}
append c [string range $a $x0+1 $x]
set x0 $x
append d [string range $b $y0+1 $y]
set y0 $y
}
append c [string range $a $x0+1 end]
append d [string range $b $y0+1 end]
set al [string length $a]
set bl [string length $b]
if {$al+$y0 < $bl+$x0} {
append c [string repeat "-" [expr {$bl+$x0-$y0-$al}]]
} elseif {$bl+$x0 < $al+$y0} {
append d [string repeat "-" [expr {$al+$y0-$x0-$bl}]]
}
return $c\n$d
}
puts [levenshtein/align "rosettacode" "raisethysword"]
- Output:
r-oset-taco-de raisethysword-
Wren
import "./str" for Str
var levenshteinAlign = Fn.new { |a, b|
a = Str.lower(a)
b = Str.lower(b)
var costs = List.filled(a.count+1, null)
for (i in 0..a.count) costs[i] = List.filled(b.count+1, 0)
for (j in 0..b.count) costs[0][j] = j
for (i in 1..a.count) {
costs[i][0] = i
for (j in 1..b.count) {
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
costs[i][j] = temp.min(1 + costs[i - 1][j].min(costs[i][j - 1]))
}
}
// walk back through matrix to figure out path
var aPathRev = ""
var bPathRev = ""
var i = a.count
var j = b.count
while (i != 0 && j != 0) {
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
var cij = costs[i][j]
if (cij == temp) {
i = i - 1
aPathRev = aPathRev + a[i]
j = j - 1
bPathRev = bPathRev + b[j]
} else if (cij == 1 + costs[i-1][j]) {
i = i - 1
aPathRev = aPathRev + a[i]
bPathRev = bPathRev + "-"
} else if (cij == 1 + costs[i][j-1]) {
aPathRev = aPathRev + "-"
j = j - 1
bPathRev = bPathRev+ b[j]
}
}
return [aPathRev[-1..0], bPathRev[-1..0]]
}
var result = levenshteinAlign.call("place", "palace")
System.print(result[0])
System.print(result[1])
System.print()
result = levenshteinAlign.call("rosettacode","raisethysword")
System.print(result[0])
System.print(result[1])
- Output:
p-lace palace r-oset-tacode raisethysword
zkl
fcn alignment(a,b){
a,b = a.toLower(), b.toLower();
costs := (a.len()+1).pump(List(),'wrap(a){ [1..b.len()].pump(List(a)) });
foreach i,j in (a.len()+1, [1..b.len()]){
costs[i][j] = ( 1 + costs[i-1][j].min(costs[i][j-1]) )
.min(if(a[i-1] == b[j-1]) costs[i-1][j-1] else costs[i-1][j-1] + 1);
}
// walk back through matrix to figure out path
aPathRev,bPathRev := Data(),Data(); // byte buckets
i,j := a.len(), b.len();
while(i!=0 and j!= 0){
if (costs[i][j] ==
( if(a[i-1]==b[j-1]) costs[i-1][j-1] else costs[i-1][j-1]+1 )){
aPathRev.append(a[i-=1]);
bPathRev.append(b[j-=1]);
} else if(costs[i][j] == 1+costs[i-1][j]){
aPathRev.append(a[i-=1]);
bPathRev.append("-");
} else if (costs[i][j] == 1+costs[i][j-1]){
aPathRev.append("-");
bPathRev.append(b[j-=1]);
}
}
return(aPathRev.text.reverse(), bPathRev.text.reverse())
}
result := alignment("rosettacode", "raisethysword");
println(result[0]);
println(result[1]);
- Output:
r-oset-tacode raisethysword