Levenshtein distance/Alignment: Difference between revisions
m (whitespace cleanup) |
|||
(65 intermediate revisions by 31 users not shown) | |||
Line 1: | Line 1: | ||
{{ |
{{task}} |
||
The [[Levenshtein distance]] algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order. |
The [[Levenshtein distance]] algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order. |
||
Line 5: | Line 5: | ||
An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is: |
An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is: |
||
<pre> |
<pre> |
||
P-LACE |
|||
PALACE</pre> |
|||
PALACE |
|||
</pre> |
|||
For this task, write a function that shows the alignment of two strings for the corresponding levenshtein distance. As an example, use the words "rosettacode" and "raisethysword". |
|||
;Task: |
|||
Write a function that shows the alignment of two strings for the corresponding levenshtein distance. |
|||
As an example, use the words "rosettacode" and "raisethysword". |
|||
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language). |
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language). |
||
<br><br> |
|||
=={{header|11l}}== |
|||
{{trans|Nim}} |
|||
<syntaxhighlight lang="11l">F levenshteinAlign(aa, bb) |
|||
V a = aa.lowercase() |
|||
V b = bb.lowercase() |
|||
V costs = [[0] * (b.len + 1)] * (a.len + 1) |
|||
L(j) 0 .. b.len |
|||
costs[0][j] = j |
|||
L(i) 1 .. a.len |
|||
costs[i][0] = i |
|||
L(j) 1 .. b.len |
|||
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1]) |
|||
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp) |
|||
V aPathRev = ‘’ |
|||
V bPathRev = ‘’ |
|||
V i = a.len |
|||
V j = b.len |
|||
L i != 0 & j != 0 |
|||
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1]) |
|||
I costs[i][j] == tmp |
|||
aPathRev ‘’= a[--i] |
|||
bPathRev ‘’= b[--j] |
|||
E I costs[i][j] == 1 + costs[i - 1][j] |
|||
aPathRev ‘’= a[--i] |
|||
bPathRev ‘’= ‘-’ |
|||
E I costs[i][j] == 1 + costs[i][j - 1] |
|||
aPathRev ‘’= ‘-’ |
|||
bPathRev ‘’= b[--j] |
|||
E |
|||
assert(0B, ‘Internal error’) |
|||
R (reversed(aPathRev), reversed(bPathRev)) |
|||
V result = levenshteinAlign(‘place’, ‘palace’) |
|||
print(result[0]) |
|||
print(result[1]) |
|||
print() |
|||
result = levenshteinAlign(‘rosettacode’, ‘raisethysword’) |
|||
print(result[0]) |
|||
print(result[1])</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|Arturo}}== |
|||
<syntaxhighlight lang="rebol">print join.with:"\n" levenshtein.align "place" "palace" |
|||
print join.with:"\n" levenshtein.align "rosettacode" "raisethysword"</syntaxhighlight> |
|||
{{out}} |
|||
<pre>p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword</pre> |
|||
=={{header|C}}== |
|||
<syntaxhighlight lang="c">#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <string.h> |
|||
typedef struct edit_s edit_t, *edit; |
|||
struct edit_s { |
|||
char c1, c2; |
|||
int n; |
|||
edit next; |
|||
}; |
|||
void leven(char *a, char *b) |
|||
{ |
|||
int i, j, la = strlen(a), lb = strlen(b); |
|||
edit *tbl = malloc(sizeof(edit) * (1 + la)); |
|||
tbl[0] = calloc((1 + la) * (1 + lb), sizeof(edit_t)); |
|||
for (i = 1; i <= la; i++) |
|||
tbl[i] = tbl[i-1] + (1+lb); |
|||
for (i = la; i >= 0; i--) { |
|||
char *aa = a + i; |
|||
for (j = lb; j >= 0; j--) { |
|||
char *bb = b + j; |
|||
if (!*aa && !*bb) continue; |
|||
edit e = &tbl[i][j]; |
|||
edit repl = &tbl[i+1][j+1]; |
|||
edit dela = &tbl[i+1][j]; |
|||
edit delb = &tbl[i][j+1]; |
|||
e->c1 = *aa; |
|||
e->c2 = *bb; |
|||
if (!*aa) { |
|||
e->next = delb; |
|||
e->n = e->next->n + 1; |
|||
continue; |
|||
} |
|||
if (!*bb) { |
|||
e->next = dela; |
|||
e->n = e->next->n + 1; |
|||
continue; |
|||
} |
|||
e->next = repl; |
|||
if (*aa == *bb) { |
|||
e->n = e->next->n; |
|||
continue; |
|||
} |
|||
if (e->next->n > delb->n) { |
|||
e->next = delb; |
|||
e->c1 = 0; |
|||
} |
|||
if (e->next->n > dela->n) { |
|||
e->next = dela; |
|||
e->c1 = *aa; |
|||
e->c2 = 0; |
|||
} |
|||
e->n = e->next->n + 1; |
|||
} |
|||
} |
|||
edit p = tbl[0]; |
|||
printf("%s -> %s: %d edits\n", a, b, p->n); |
|||
while (p->next) { |
|||
if (p->c1 == p->c2) |
|||
printf("%c", p->c1); |
|||
else { |
|||
putchar('('); |
|||
if (p->c1) putchar(p->c1); |
|||
putchar(','); |
|||
if (p->c2) putchar(p->c2); |
|||
putchar(')'); |
|||
} |
|||
p = p->next; |
|||
} |
|||
putchar('\n'); |
|||
free(tbl[0]); |
|||
free(tbl); |
|||
} |
|||
int main(void) |
|||
{ |
|||
leven("raisethysword", "rosettacode"); |
|||
return 0; |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
raisethysword -> rosettacode: 8 edits |
|||
r(a,o)(i,)set(h,t)(y,a)(s,c)(w,)o(r,d)(d,e) |
|||
</pre> |
|||
=={{header|C#}}== |
|||
{{trans|Java}} |
|||
<syntaxhighlight lang="C#"> |
|||
using System; |
|||
using System.Text; |
|||
public class LevenshteinAlignment |
|||
{ |
|||
public static string[] Alignment(string a, string b) |
|||
{ |
|||
a = a.ToLower(); |
|||
b = b.ToLower(); |
|||
int[,] costs = new int[a.Length + 1, b.Length + 1]; |
|||
for (int j = 0; j <= b.Length; j++) |
|||
costs[0, j] = j; |
|||
for (int i = 1; i <= a.Length; i++) |
|||
{ |
|||
costs[i, 0] = i; |
|||
for (int j = 1; j <= b.Length; j++) |
|||
{ |
|||
costs[i, j] = Math.Min(1 + Math.Min(costs[i - 1, j], costs[i, j - 1]), |
|||
a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1); |
|||
} |
|||
} |
|||
StringBuilder aPathRev = new StringBuilder(); |
|||
StringBuilder bPathRev = new StringBuilder(); |
|||
for (int i = a.Length, j = b.Length; i != 0 && j != 0;) |
|||
{ |
|||
if (costs[i, j] == (a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1)) |
|||
{ |
|||
aPathRev.Append(a[--i]); |
|||
bPathRev.Append(b[--j]); |
|||
} |
|||
else if (costs[i, j] == 1 + costs[i - 1, j]) |
|||
{ |
|||
aPathRev.Append(a[--i]); |
|||
bPathRev.Append('-'); |
|||
} |
|||
else if (costs[i, j] == 1 + costs[i, j - 1]) |
|||
{ |
|||
aPathRev.Append('-'); |
|||
bPathRev.Append(b[--j]); |
|||
} |
|||
} |
|||
return new string[] { Reverse(aPathRev.ToString()), Reverse(bPathRev.ToString()) }; |
|||
} |
|||
private static string Reverse(string s) |
|||
{ |
|||
char[] charArray = s.ToCharArray(); |
|||
Array.Reverse(charArray); |
|||
return new string(charArray); |
|||
} |
|||
public static void Main(string[] args) |
|||
{ |
|||
string[] result = Alignment("rosettacode", "raisethysword"); |
|||
Console.WriteLine(result[0]); |
|||
Console.WriteLine(result[1]); |
|||
} |
|||
} |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|C++}}== |
|||
<syntaxhighlight lang="c++"> |
|||
#include <algorithm> |
|||
#include <cstdint> |
|||
#include <iostream> |
|||
#include <string> |
|||
#include <vector> |
|||
std::string to_lower_case(const std::string& text) { |
|||
std::string result = text; |
|||
std::transform(result.begin(), result.end(), result.begin(), |
|||
[](char ch){ return std::tolower(ch); }); |
|||
return result; |
|||
} |
|||
std::vector<std::string> levenshtein_alignment(std::string a, std::string b) { |
|||
a = to_lower_case(a); |
|||
b = to_lower_case(b); |
|||
std::vector<std::vector<int32_t>> costs{ a.length() + 1, std::vector<int32_t>( b.length() + 1, 0 ) }; |
|||
for ( uint64_t j = 0; j <= b.length(); ++j ) |
|||
costs[0][j] = j; |
|||
for ( uint64_t i = 1; i <= a.length(); ++i ) { |
|||
costs[i][0] = i; |
|||
for ( uint64_t j = 1; j <= b.length(); ++j ) { |
|||
costs[i][j] = std::min(std::min( costs[i - 1][j], costs[i][j - 1]) + 1, |
|||
a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1); |
|||
} |
|||
} |
|||
std::string a_path_reversed, b_path_reversed; |
|||
uint64_t i = a.length(), j = b.length(); |
|||
while ( i != 0 && j != 0 ) { |
|||
if ( costs[i][j] == ( a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1 ) ) { |
|||
a_path_reversed += a[--i]; |
|||
b_path_reversed += b[--j]; |
|||
} else if ( costs[i][j] == costs[i - 1][j] + 1 ) { |
|||
a_path_reversed += a[--i]; |
|||
b_path_reversed += "-"; |
|||
} else if ( costs[i][j] == costs[i][j - 1] + 1 ) { |
|||
a_path_reversed += "-"; |
|||
b_path_reversed += b[--j]; |
|||
} |
|||
} |
|||
std::reverse(a_path_reversed.begin(), a_path_reversed.end()); |
|||
std::reverse(b_path_reversed.begin(), b_path_reversed.end()); |
|||
return std::vector<std::string>{ a_path_reversed, b_path_reversed }; |
|||
} |
|||
int main() { |
|||
std::vector<std::string> result = levenshtein_alignment("place", "palace"); |
|||
std::cout << result[0] << std::endl; |
|||
std::cout << result[1] << std::endl; |
|||
std::cout << std::endl; |
|||
result = levenshtein_alignment("rosettacode", "raisethysword"); |
|||
std::cout << result[0] << std::endl; |
|||
std::cout << result[1] << std::endl; |
|||
} |
|||
</syntaxhighlight> |
|||
{{ out }} |
|||
<pre> |
|||
p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|D}}== |
|||
Using the standard library. |
|||
<syntaxhighlight lang="d">void main() { |
|||
import std.stdio, std.algorithm; |
|||
immutable s1 = "rosettacode"; |
|||
immutable s2 = "raisethysword"; |
|||
string s1b, s2b; |
|||
size_t pos1, pos2; |
|||
foreach (immutable c; levenshteinDistanceAndPath(s1, s2)[1]) { |
|||
final switch (c) with (EditOp) { |
|||
case none, substitute: |
|||
s1b ~= s1[pos1++]; |
|||
s2b ~= s2[pos2++]; |
|||
break; |
|||
case insert: |
|||
s1b ~= "_"; |
|||
s2b ~= s2[pos2++]; |
|||
break; |
|||
case remove: |
|||
s1b ~= s1[pos1++]; |
|||
s2b ~= "_"; |
|||
break; |
|||
} |
|||
} |
|||
writeln(s1b, "\n", s2b); |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre>r_oset_tacode |
|||
raisethysword</pre> |
|||
=={{header|EasyLang}}== |
|||
{{trans|FreeBASIC}} |
|||
<syntaxhighlight> |
|||
global dparr[] dpcols . |
|||
proc dpnew a b . . |
|||
len dparr[] a * b |
|||
dpcols = b |
|||
. |
|||
func dp r c . |
|||
return dparr[r * dpcols + c + 1] |
|||
. |
|||
proc dps r c v . . |
|||
dparr[r * dpcols + c + 1] = v |
|||
. |
|||
proc align a$ b$ . ar$ br$ . |
|||
dpnew (len a$ + 1) (len b$ + 1) |
|||
for i = 1 to len a$ |
|||
dps i 0 i |
|||
. |
|||
for j = 0 to len b$ |
|||
dps 0 j j |
|||
. |
|||
for i = 1 to len a$ |
|||
for j = 1 to len b$ |
|||
if substr a$ i 1 = substr b$ j 1 |
|||
dps i j dp (i - 1) (j - 1) |
|||
else |
|||
dps i j lower (lower dp (i - 1) j dp i (j - 1)) dp (i - 1) (j - 1) + 1 |
|||
. |
|||
. |
|||
. |
|||
ar$ = "" ; br$ = "" |
|||
i = len a$ ; j = len b$ |
|||
while i <> 0 and j <> 0 |
|||
if substr a$ i 1 = substr b$ j 1 or dp i j = dp (i - 1) (j - 1) + 1 |
|||
ar$ = substr a$ i 1 & ar$ |
|||
i -= 1 |
|||
br$ = substr b$ j 1 & br$ |
|||
j -= 1 |
|||
elif dp i j = dp (i - 1) j + 1 |
|||
ar$ = substr a$ i 1 & ar$ |
|||
i -= 1 |
|||
br$ = "-" & br$ |
|||
else |
|||
ar$ = "-" & ar$ |
|||
br$ = substr b$ j 1 & br$ |
|||
j -= 1 |
|||
. |
|||
. |
|||
. |
|||
align "rosettacode" "raisethysword" ar$ br$ |
|||
print ar$ ; print br$ |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|Eiffel}}== |
|||
<syntaxhighlight lang="eiffel"> |
|||
distance (source, target: STRING): INTEGER |
|||
-- Minimum number of operations to turn `source' into `target'. |
|||
local |
|||
l_distance: ARRAY2 [INTEGER] |
|||
del, ins, subst: INTEGER |
|||
do |
|||
create l_distance.make (source.count, target.count) |
|||
⟳ ic:(1 |..| source.count) ¦ l_distance [ic, 1] := ic - 1 ⟲ |
|||
⟳ ij:(1 |..| target.count) ¦ l_distance [1, ij] := ij - 1 ⟲ |
|||
⟳ ic:(2 |..| source.count) ¦ |
|||
⟳ jc:(2 |..| target.count) ¦ |
|||
if source [ic] = target [jc] then -- same char |
|||
l_distance [ic, jc] := l_distance [ic - 1, jc - 1] |
|||
else -- diff char |
|||
del := l_distance [ic - 1, jc] -- delete? |
|||
ins := l_distance [ic, jc - 1] -- insert? |
|||
subst := l_distance [ic - 1, jc -1] -- substitute/swap? |
|||
l_distance [ic, jc] := del.min (ins.min (subst)) + 1 |
|||
end |
|||
⟲ |
|||
⟲ |
|||
Result:= l_distance [source.count, target.count] |
|||
end |
|||
</syntaxhighlight>{{out}} |
|||
Result = 8 |
|||
The ⟳ ¦ ⟲ represents the "symbolic form" of an Eiffel across loop. In the case above, the first two ⟳ (loops) initialize the first column and the first row, and then two nested ⟳ (loops) to walk the distance computation. |
|||
Also—the `ic' is this programmers shorthand for ITERATION_CURSOR. Therefore, in the nested loops, the `ic' is the iteration cursor following `i' and the `ij' is following `j' as indexes on the source, target, and l_distance. |
|||
=={{header|FreeBASIC}}== |
|||
<syntaxhighlight lang="vbnet">#define min(a, b) iif((a) < (b), (a), (b)) |
|||
Sub LevenshteinAlignment(s1 As String, s2 As String) |
|||
Dim As String align1 = "", align2 = "" |
|||
Dim As Integer i, j, len1, len2 |
|||
len1 = Len(s1): |
|||
len2 = Len(s2) |
|||
Dim As Integer dp(len1+1, len2+1) |
|||
For i = 0 To len1 |
|||
dp(i, 0) = i |
|||
Next i |
|||
For j = 0 To len2 |
|||
dp(0, j) = j |
|||
Next j |
|||
For i = 1 To len1 |
|||
For j = 1 To len2 |
|||
If Mid(s1, i, 1) = Mid(s2, j, 1) Then |
|||
dp(i, j) = dp(i-1, j-1) |
|||
Else |
|||
dp(i, j) = min(min(dp(i-1, j), dp(i, j-1)), dp(i-1, j-1)) + 1 |
|||
End If |
|||
Next j |
|||
Next i |
|||
i = len1: j = len2 |
|||
While i > 0 And j > 0 |
|||
If Mid(s1, i, 1) = Mid(s2, j, 1) Then |
|||
align1 = Mid(s1, i, 1) + align1 |
|||
align2 = Mid(s2, j, 1) + align2 |
|||
i -= 1: j -= 1 |
|||
Elseif dp(i, j) = dp(i-1, j-1) + 1 Then |
|||
align1 = Mid(s1, i, 1) + align1 |
|||
align2 = Mid(s2, j, 1) + align2 |
|||
i -= 1: j -= 1 |
|||
Elseif dp(i, j) = dp(i-1, j) + 1 Then |
|||
align1 = Mid(s1, i, 1) + align1 |
|||
align2 = "-" + align2 |
|||
i -= 1 |
|||
Else |
|||
align1 = "-" + align1 |
|||
align2 = Mid(s2, j, 1) + align2 |
|||
j -= 1 |
|||
End If |
|||
Wend |
|||
While i > 0 |
|||
align1 = Mid(s1, i, 1) + align1 |
|||
align2 = "-" + align2 |
|||
i -= 1 |
|||
Wend |
|||
While j > 0 |
|||
align1 = "-" + align1 |
|||
align2 = Mid(s2, j, 1) + align2 |
|||
j -= 1 |
|||
Wend |
|||
Print "Levenshtein Distance: "; dp(len1, len2) |
|||
Print align1 |
|||
Print align2 |
|||
Print |
|||
End Sub |
|||
LevenshteinAlignment("rosettacode", "raisethysword") |
|||
LevenshteinAlignment("place", "palace") |
|||
Sleep</syntaxhighlight> |
|||
{{out}} |
|||
<pre>Levenshtein Distance: 8 |
|||
r-oset-tacode |
|||
raisethysword |
|||
Levenshtein Distance: 1 |
|||
p-lace |
|||
palace</pre> |
|||
=={{header|Go}}== |
|||
{{libheader|biogo}} |
|||
Alignment computed by the Needleman-Wunch algorithm, which is used in bioinformatics. |
|||
<syntaxhighlight lang="go">package main |
|||
import ( |
|||
"fmt" |
|||
"github.com/biogo/biogo/align" |
|||
ab "github.com/biogo/biogo/alphabet" |
|||
"github.com/biogo/biogo/feat" |
|||
"github.com/biogo/biogo/seq/linear" |
|||
) |
|||
func main() { |
|||
// Alphabets for things like DNA are predefined in biogo, but we |
|||
// define our own here. |
|||
lc := ab.Must(ab.NewAlphabet("-abcdefghijklmnopqrstuvwxyz", |
|||
feat.Undefined, '-', 0, true)) |
|||
// Construct scoring matrix for Needleman-Wunch algorithm. |
|||
// We leave zeros on the diagonal for the Levenshtein distance of an |
|||
// exact match and put -1s everywhere else for the Levenshtein distance |
|||
// of an edit. |
|||
nw := make(align.NW, lc.Len()) |
|||
for i := range nw { |
|||
r := make([]int, lc.Len()) |
|||
nw[i] = r |
|||
for j := range r { |
|||
if j != i { |
|||
r[j] = -1 |
|||
} |
|||
} |
|||
} |
|||
// define input sequences |
|||
a := &linear.Seq{Seq: ab.BytesToLetters([]byte("rosettacode"))} |
|||
a.Alpha = lc |
|||
b := &linear.Seq{Seq: ab.BytesToLetters([]byte("raisethysword"))} |
|||
b.Alpha = lc |
|||
// perform alignment |
|||
aln, err := nw.Align(a, b) |
|||
// format and display result |
|||
if err != nil { |
|||
fmt.Println(err) |
|||
return |
|||
} |
|||
fa := align.Format(a, b, aln, '-') |
|||
fmt.Printf("%s\n%s\n", fa[0], fa[1]) |
|||
aa := fmt.Sprint(fa[0]) |
|||
ba := fmt.Sprint(fa[1]) |
|||
ma := make([]byte, len(aa)) |
|||
for i := range ma { |
|||
if aa[i] == ba[i] { |
|||
ma[i] = ' ' |
|||
} else { |
|||
ma[i] = '|' |
|||
} |
|||
} |
|||
fmt.Println(string(ma)) |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
The lines after the alignment point out the 8 edits. |
|||
<pre> |
|||
r-oset-tacode |
|||
raisethysword |
|||
|| |||| || |
|||
</pre> |
|||
=={{header|Haskell}}== |
|||
The Wagner–Fischer matrix construction is adopted from [[Levenshtein_distance#Haskell]], however it is reversed in order to simplify it's traversing. |
|||
<syntaxhighlight lang="haskell">costs :: String -> String -> [[Int]] |
|||
costs s1 s2 = reverse $ reverse <$> matrix |
|||
where |
|||
matrix = scanl transform [0 .. length s1] s2 |
|||
transform ns@(n:ns1) c = scanl calc (n + 1) $ zip3 s1 ns ns1 |
|||
where |
|||
calc z (c1, x, y) = minimum [ y + 1, z + 1 |
|||
, x + fromEnum (c1 /= c)] |
|||
levenshteinDistance :: String -> String -> Int |
|||
levenshteinDistance s1 s2 = head.head $ costs s1 s2</syntaxhighlight> |
|||
=== A sample alignment === |
|||
The alignment is built in the same way as it is done in [[Levenshtein_distance/Alignment#Java]]. |
|||
Instead of indices the Wagner–Fischer matrix and strings are traversed by list truncation, as zippers. |
|||
<syntaxhighlight lang="haskell">alignment :: String -> String -> (String, String) |
|||
alignment s1 s2 = go (costs s1 s2) (reverse s1) (reverse s2) ([],[]) |
|||
where |
|||
go c _ _ r | isEmpty c = r |
|||
go _ [] s r = ('-' <$ s, reverse s) <> r |
|||
go _ s [] r = (reverse s, '-' <$ s) <> r |
|||
go c s1@(h1:t1) s2@(h2:t2) (r1, r2) = |
|||
let temp = (get.nextCol.nextRow $ c) + if h1 == h2 then 0 else 1 |
|||
in case get c of |
|||
x | x == temp -> go (nextRow.nextCol $ c) t1 t2 (h1:r1, h2:r2) |
|||
| x == 1 + (get.nextCol $ c) -> go (nextCol c) s1 t2 ('-':r1, h2:r2) |
|||
| x == 1 + (get.nextRow $ c) -> go (nextRow c) t1 s2 (h1:r1, '-':r2) |
|||
-- Functions which treat table as zipper |
|||
get ((h:_):_) = h |
|||
nextRow = map tail |
|||
nextCol = tail |
|||
isEmpty c = null c || null (head c)</syntaxhighlight> |
|||
<pre>λ> alignment "palace" "place" |
|||
("palace","p-lace") |
|||
λ> alignment "rosettacode" "raisethysword" |
|||
("r-oset-tacode","raisethysword") |
|||
λ> alignment "rosettacode" "rat" |
|||
("rosettacode","r-----a---t")</pre> |
|||
=== All alignments === |
|||
The alternative solution, which extensively produces all minimal alignments for given strings. |
|||
<syntaxhighlight lang="haskell">-- Produces all possible alignments for two strings. |
|||
allAlignments :: String -> String -> [[(Char, Char)]] |
|||
allAlignments s1 s2 = go (length s2 - length s1) s1 s2 |
|||
where |
|||
go _ s [] = [(\x -> (x, '-')) <$> s] |
|||
go _ [] s = [(\x -> ('-' ,x)) <$> s] |
|||
go n s1@(h1:t1) s2@(h2:t2) = (h1, h2) <:> go n t1 t2 |
|||
++ case compare n 0 of |
|||
LT -> (h1, '-') <:> go (n+1) t1 s2 |
|||
EQ -> [] |
|||
GT -> ('-', h2) <:> go (n-1) s1 t2 |
|||
x <:> l = fmap (x :) l |
|||
-- Returns a lazy list of all optimal alignments. |
|||
levenshteinAlignments :: String -> String -> [(String, String)] |
|||
levenshteinAlignments s1 s2 = unzip <$> best |
|||
where |
|||
best = filter ((lev ==) . dist) $ allAlignments s1 s2 |
|||
lev = levenshteinDistance s1 s2 |
|||
dist = length . filter (uncurry (/=))</syntaxhighlight> |
|||
<pre>λ> mapM_ print $ levenshteinAlignments "rosettacode" "raisethysword" |
|||
("ro-settac-ode","raisethysword") |
|||
("ro-setta-code","raisethysword") |
|||
("ro-sett-acode","raisethysword") |
|||
("ro-set-tacode","raisethysword") |
|||
("r-osettac-ode","raisethysword") |
|||
("r-osetta-code","raisethysword") |
|||
("r-osett-acode","raisethysword") |
|||
("r-oset-tacode","raisethysword") |
|||
λ> mapM_ print $ levenshteinAlignments "rosettacode" "rat" |
|||
("rosettacode","ra--t------") |
|||
("rosettacode","ra---t-----") |
|||
("rosettacode","r-a-t------") |
|||
("rosettacode","r-a--t-----") |
|||
("rosettacode","r--at------") |
|||
("rosettacode","r--a-t-----") |
|||
("rosettacode","r---at-----") |
|||
("rosettacode","r-----at---") |
|||
("rosettacode","r-----a-t--") |
|||
("rosettacode","r-----a--t-") |
|||
("rosettacode","r-----a---t")</pre> |
|||
=={{header|J}}== |
|||
Translation of java: |
|||
<syntaxhighlight lang="j">levalign=:4 :0 |
|||
assert. x <:&# y |
|||
D=. x +/&i.&>:&# y |
|||
for_i. 1+i.#x do. |
|||
for_j. 1+i.#y do. |
|||
if. ((<:i){x)=(<:j){y do. |
|||
D=. (D {~<<:i,j) (<i,j)} D |
|||
else. |
|||
min=. 1+<./D{~(i,j) <@:-"1#:1 2 3 |
|||
D=. min (<i,j)} D |
|||
end. |
|||
end. |
|||
end. |
|||
A=. B=. '' |
|||
ij=. x ,&# y |
|||
while. */ij do. |
|||
'd00 d01 d10 d11'=. D{~ ij <@:-"1#:i.4 |
|||
'x1 y1'=. (ij-1){each x;y |
|||
if. d00 = d11+x1~:y1 do. |
|||
A=. A,x1 [ B=. B,y1 [ ij=. ij-1 |
|||
elseif. d00 = 1+d10 do. |
|||
A=. A,x1 [ B=. B,'-'[ ij=. ij-1 0 |
|||
elseif. d00 = 1+d01 do. |
|||
A=. A,'-'[ B=. B,y1 [ ij=. ij-0 1 |
|||
end. |
|||
end. |
|||
A,:&|.B |
|||
)</syntaxhighlight> |
|||
Task examples: |
|||
<syntaxhighlight lang="j"> 'place' levalign 'palace' |
|||
p-lace |
|||
palace |
|||
'rosettacode' levalign 'raisethysword' |
|||
r-oset-tacode |
|||
raisethysword |
|||
</syntaxhighlight> |
|||
=={{header|Java}}== |
|||
<syntaxhighlight lang="java">public class LevenshteinAlignment { |
|||
public static String[] alignment(String a, String b) { |
|||
a = a.toLowerCase(); |
|||
b = b.toLowerCase(); |
|||
// i == 0 |
|||
int[][] costs = new int[a.length()+1][b.length()+1]; |
|||
for (int j = 0; j <= b.length(); j++) |
|||
costs[0][j] = j; |
|||
for (int i = 1; i <= a.length(); i++) { |
|||
costs[i][0] = i; |
|||
for (int j = 1; j <= b.length(); j++) { |
|||
costs[i][j] = Math.min(1 + Math.min(costs[i-1][j], costs[i][j-1]), a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1); |
|||
} |
|||
} |
|||
// walk back through matrix to figure out path |
|||
StringBuilder aPathRev = new StringBuilder(); |
|||
StringBuilder bPathRev = new StringBuilder(); |
|||
for (int i = a.length(), j = b.length(); i != 0 && j != 0; ) { |
|||
if (costs[i][j] == (a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1)) { |
|||
aPathRev.append(a.charAt(--i)); |
|||
bPathRev.append(b.charAt(--j)); |
|||
} else if (costs[i][j] == 1 + costs[i-1][j]) { |
|||
aPathRev.append(a.charAt(--i)); |
|||
bPathRev.append('-'); |
|||
} else if (costs[i][j] == 1 + costs[i][j-1]) { |
|||
aPathRev.append('-'); |
|||
bPathRev.append(b.charAt(--j)); |
|||
} |
|||
} |
|||
return new String[]{aPathRev.reverse().toString(), bPathRev.reverse().toString()}; |
|||
} |
|||
public static void main(String[] args) { |
|||
String[] result = alignment("rosettacode", "raisethysword"); |
|||
System.out.println(result[0]); |
|||
System.out.println(result[1]); |
|||
} |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|jq}}== |
|||
'''Adapted from [[#Wren|Wren]]''' |
|||
'''Works with jq, the C implementation of jq''' |
|||
'''Works with gojq, the Go implementation of jq''' |
|||
'''Works with jaq, the Rust implementation of jq''' |
|||
<syntaxhighlight lang="jq"> |
|||
def reverseString: explode | reverse | implode; |
|||
def levenshteinAlign($x; $y): |
|||
def min($a;$b): [$a,$b]|min; |
|||
($x|length + 1) as $a1 |
|||
| ($y|length + 1) as $b1 |
|||
| ($x | ascii_downcase) as $a |
|||
| ($y | ascii_downcase) as $b |
|||
| [range(0; $b1) | 0] as $zeros |
|||
| {costs: [range(0; $a1) | $zeros]} |
|||
| reduce range(0; $b1) as $j (.; .costs[0][$j] = $j) |
|||
| reduce range(1; $a1) as $i (.; |
|||
.costs[$i][0] = $i |
|||
| reduce range(1; $b1) as $j (.; |
|||
(.costs[$i - 1][$j - 1] + (if $a[$i - 1: $i] == $b[$j - 1: $j] then 0 else 1 end)) as $temp |
|||
| .costs[$i][$j] = min( $temp; 1 + min(.costs[$i - 1][$j]; .costs[$i][$j - 1] )) )) |
|||
# walk back through matrix to figure out path |
|||
| .aPathRev = "" |
|||
| .bPathRev = "" |
|||
| .i = ($a|length) |
|||
| .j = ($b|length) |
|||
| until (.i == 0 or .j == 0; |
|||
(.costs[.i - 1][.j - 1] + (if $a[.i - 1: .i] == $b[.j - 1: .j] then 0 else 1 end)) as $temp |
|||
| .costs[.i][.j] as $cij |
|||
| if $cij == $temp |
|||
then .i += -1 |
|||
| .aPathRev += $a[.i: .i+1] |
|||
| .j += -1 |
|||
| .bPathRev += $b[.j: .j+1] |
|||
elif $cij == 1 + .costs[.i-1][.j] |
|||
then .i += -1 |
|||
| .aPathRev += $a[.i:.i+1] |
|||
| .bPathRev += "-" |
|||
elif $cij == 1 + .costs[.i][.j-1] |
|||
then .aPathRev += "-" |
|||
| .j += -1 |
|||
| .bPathRev += $b[.j: .j+1] |
|||
end) |
|||
| [.aPathRev, .bPathRev ] | map(reverseString); |
|||
levenshteinAlign("place"; "palace"), |
|||
levenshteinAlign("rosettacode";"raisethysword") |
|||
</syntaxhighlight> |
|||
{{output}} |
|||
<pre> |
|||
[ |
|||
"p-lace", |
|||
"palace" |
|||
] |
|||
[ |
|||
"r-oset-tacode", |
|||
"raisethysword" |
|||
] |
|||
</pre> |
|||
=={{header|Julia}}== |
|||
{{works with|Julia|0.6}} |
|||
{{trans|Java}} |
|||
<syntaxhighlight lang="julia">function levenshteinalign(a::AbstractString, b::AbstractString) |
|||
a = lowercase(a) |
|||
b = lowercase(b) |
|||
len_a = length(a) |
|||
len_b = length(b) |
|||
costs = Matrix{Int}(len_a + 1, len_b + 1) |
|||
costs[1, :] .= 0:len_b |
|||
@inbounds for i in 2:(len_a + 1) |
|||
costs[i, 1] = i |
|||
for j in 2:(len_b + 1) |
|||
tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1) |
|||
costs[i, j] = min(1 + min(costs[i-1, j], costs[i, j-1]), tmp) |
|||
end |
|||
end |
|||
apathrev = IOBuffer() |
|||
bpathrev = IOBuffer() |
|||
local i = len_a + 1 |
|||
local j = len_b + 1 |
|||
@inbounds while i != 1 && j != 1 |
|||
tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1) |
|||
if costs[i, j] == tmp |
|||
i -= 1 |
|||
j -= 1 |
|||
print(apathrev, a[i]) |
|||
print(bpathrev, b[j]) |
|||
elseif costs[i, j] == 1 + costs[i-1, j] |
|||
i -= 1 |
|||
print(apathrev, a[i]) |
|||
print(bpathrev, '-') |
|||
elseif costs[i, j] == 1 + costs[i, j-1] |
|||
j -= 1 |
|||
print(apathrev, '-') |
|||
print(bpathrev, b[j]) |
|||
end |
|||
end |
|||
return reverse(String(take!(apathrev))), reverse(String(take!(bpathrev))) |
|||
end |
|||
foreach(println, levenshteinalign("rosettacode", "raisethysword")) |
|||
foreach(println, levenshteinalign("place", "palace"))</syntaxhighlight> |
|||
{{out}} |
|||
<pre>r-oset-tacode |
|||
raisethysword |
|||
p-lace |
|||
palace</pre> |
|||
=={{header|Kotlin}}== |
|||
{{trans|Java}} |
|||
<syntaxhighlight lang="scala">// version 1.1.3 |
|||
fun levenshteinAlign(a: String, b: String): Array<String> { |
|||
val aa = a.toLowerCase() |
|||
val bb = b.toLowerCase() |
|||
val costs = Array(a.length + 1) { IntArray(b.length + 1) } |
|||
for (j in 0..b.length) costs[0][j] = j |
|||
for (i in 1..a.length) { |
|||
costs[i][0] = i |
|||
for (j in 1..b.length) { |
|||
val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1) |
|||
costs[i][j] = minOf(1 + minOf(costs[i - 1][j], costs[i][j - 1]), temp) |
|||
} |
|||
} |
|||
// walk back through matrix to figure out path |
|||
val aPathRev = StringBuilder() |
|||
val bPathRev = StringBuilder() |
|||
var i = a.length |
|||
var j = b.length |
|||
while (i != 0 && j != 0) { |
|||
val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1) |
|||
when (costs[i][j]) { |
|||
temp -> { |
|||
aPathRev.append(aa[--i]) |
|||
bPathRev.append(bb[--j]) |
|||
} |
|||
1 + costs[i-1][j] -> { |
|||
aPathRev.append(aa[--i]) |
|||
bPathRev.append('-') |
|||
} |
|||
1 + costs[i][j-1] -> { |
|||
aPathRev.append('-') |
|||
bPathRev.append(bb[--j]) |
|||
} |
|||
} |
|||
} |
|||
return arrayOf(aPathRev.reverse().toString(), bPathRev.reverse().toString()) |
|||
} |
|||
fun main(args: Array<String>) { |
|||
var result = levenshteinAlign("place", "palace") |
|||
println(result[0]) |
|||
println(result[1]) |
|||
println() |
|||
result = levenshteinAlign("rosettacode","raisethysword") |
|||
println(result[0]) |
|||
println(result[1]) |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|Mathematica}} / {{header|Wolfram Language}}== |
|||
{{incorrect}} |
|||
{{works with|Mathematica|7}} |
|||
<syntaxhighlight lang="mathematica">DamerauLevenshteinDistance["rosettacode", "raisethysword"]</syntaxhighlight> |
|||
{{out}}<pre>8</pre> |
|||
=={{header|Nim}}== |
|||
{{trans|Kotlin}} |
|||
<syntaxhighlight lang="nim">import algorithm, sequtils, strutils |
|||
proc levenshteinAlign(a, b: string): tuple[a, b: string] = |
|||
let a = a.toLower() |
|||
let b = b.toLower() |
|||
var costs = newSeqWith(a.len + 1, newSeq[int](b.len + 1)) |
|||
for j in 0..b.len: costs[0][j] = j |
|||
for i in 1..a.len: |
|||
costs[i][0] = i |
|||
for j in 1..b.len: |
|||
let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1]) |
|||
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp) |
|||
# Walk back through matrix to figure out path. |
|||
var aPathRev, bPathRev: string |
|||
var i = a.len |
|||
var j = b.len |
|||
while i != 0 and j != 0: |
|||
let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1]) |
|||
if costs[i][j] == tmp: |
|||
dec i |
|||
dec j |
|||
aPathRev.add a[i] |
|||
bPathRev.add b[j] |
|||
elif costs[i][j] == 1 + costs[i-1][j]: |
|||
dec i |
|||
aPathRev.add a[i] |
|||
bPathRev.add '-' |
|||
elif costs[i][j] == 1 + costs[i][j-1]: |
|||
dec j |
|||
aPathRev.add '-' |
|||
bPathRev.add b[j] |
|||
else: |
|||
quit "Internal error" |
|||
result = (reversed(aPathRev).join(), reversed(bPathRev).join()) |
|||
when isMainModule: |
|||
var result = levenshteinAlign("place", "palace") |
|||
echo result.a |
|||
echo result.b |
|||
echo "" |
|||
result = levenshteinAlign("rosettacode","raisethysword") |
|||
echo result.a |
|||
echo result.b</syntaxhighlight> |
|||
{{out}} |
|||
<pre>p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword</pre> |
|||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
< |
<syntaxhighlight lang="perl">use strict; |
||
use warnings; |
use warnings; |
||
Line 48: | Line 1,067: | ||
} |
} |
||
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";</ |
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>ro-settac-o-de |
<pre>ro-settac-o-de |
||
raisethysword-</pre> |
raisethysword-</pre> |
||
=={{header| |
=={{header|Phix}}== |
||
{{trans|Kotlin}} plus the indicator from Go |
|||
<!--<syntaxhighlight lang="phix">(phixonline)--> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">LevenshteinAlignment</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">la</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> |
|||
<span style="color: #000000;">lb</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">1</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">costs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lb</span><span style="color: #0000FF;">),</span><span style="color: #000000;">la</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">lb</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">j</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> <span style="color: #008080;">to</span> <span style="color: #000000;">la</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> <span style="color: #008080;">to</span> <span style="color: #000000;">lb</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">tmp1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]),</span> |
|||
<span style="color: #000000;">tmp2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span> |
|||
<span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tmp1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tmp2</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #000080;font-style:italic;">-- walk back through matrix to figure out the path</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">arev</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span><span style="color: #0000FF;">,</span> |
|||
<span style="color: #000000;">brev</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">la</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">lb</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">></span><span style="color: #000000;">1</span> <span style="color: #008080;">and</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">></span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">tmp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span> |
|||
<span style="color: #008080;">switch</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #008080;">case</span> <span style="color: #000000;">tmp</span><span style="color: #0000FF;">:</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000000;">arev</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000000;">brev</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">case</span> <span style="color: #000000;">1</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]:</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000000;">arev</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #000000;">brev</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">'-'</span> |
|||
<span style="color: #008080;">case</span> <span style="color: #000000;">1</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]:</span> <span style="color: #000000;">arev</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">'-'</span> |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000000;">brev</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">switch</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">reverse</span><span style="color: #0000FF;">(</span><span style="color: #000000;">arev</span><span style="color: #0000FF;">),</span><span style="color: #7060A8;">reverse</span><span style="color: #0000FF;">(</span><span style="color: #000000;">brev</span><span style="color: #0000FF;">)}</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #0000FF;">{</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">LevenshteinAlignment</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">c</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sq_add</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">)),</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_ne</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">),</span><span style="color: #008000;">'|'</span><span style="color: #0000FF;">-</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">))</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s\n%s\n%s\n\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #000000;">c</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
<span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rosettacode"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"raisethysword"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"place"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"palace"</span><span style="color: #0000FF;">)</span> |
|||
<!--</syntaxhighlight>--> |
|||
{{out}} |
|||
<pre> |
|||
r-oset-tacode |
|||
raisethysword |
|||
|| |||| || |
|||
p-lace |
|||
palace |
|||
| |
|||
</pre> |
|||
=={{header|Python}}== |
|||
<syntaxhighlight lang="python">from difflib import ndiff |
|||
def levenshtein(str1, str2): |
|||
result = "" |
|||
pos, removed = 0, 0 |
|||
for x in ndiff(str1, str2): |
|||
if pos<len(str1) and str1[pos] == x[2]: |
|||
pos += 1 |
|||
result += x[2] |
|||
if x[0] == "-": |
|||
removed += 1 |
|||
continue |
|||
else: |
|||
if removed > 0: |
|||
removed -=1 |
|||
else: |
|||
result += "-" |
|||
print(result) |
|||
levenshtein("place","palace") |
|||
levenshtein("rosettacode","raisethysword")</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
p-lace |
|||
ro-settac-o-de |
|||
</pre> |
|||
=={{header|Racket}}== |
|||
===Simple version (no aligment)=== |
|||
First we will analyze this solution that only computes the distance. |
|||
See http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html |
|||
for a discussion of the code. |
|||
<syntaxhighlight lang="racket">#lang racket |
|||
(define (memoize f) |
|||
(local ([define table (make-hash)]) |
|||
(lambda args |
|||
(dict-ref! table args (λ () (apply f args)))))) |
|||
(define levenshtein |
|||
(memoize |
|||
(lambda (s t) |
|||
(cond |
|||
[(and (empty? s) (empty? t)) 0] |
|||
[(empty? s) (length t)] |
|||
[(empty? t) (length s)] |
|||
[else |
|||
(if (equal? (first s) (first t)) |
|||
(levenshtein (rest s) (rest t)) |
|||
(min (add1 (levenshtein (rest s) t)) |
|||
(add1 (levenshtein s (rest t))) |
|||
(add1 (levenshtein (rest s) (rest t)))))]))))</syntaxhighlight> |
|||
'''Demonstration:''' |
|||
<syntaxhighlight lang="racket">(levenshtein (string->list "rosettacode") |
|||
(string->list "raisethysword"))</syntaxhighlight> |
|||
{{out}} |
|||
<pre>8</pre> |
|||
===Complete version=== |
|||
Now we extend the code from http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html to show also the alignment. The code is very similar, but it stores the partial results (number of edits and alignment of each substring) in a lev structure. |
|||
<syntaxhighlight lang="racket">#lang racket |
|||
(struct lev (n s t)) |
|||
(define (lev-add old n sx tx) |
|||
(lev (+ n (lev-n old)) |
|||
(cons sx (lev-s old)) |
|||
(cons tx (lev-t old)))) |
|||
(define (list-repeat n v) |
|||
(build-list n (lambda (_) v))) |
|||
(define (memoize f) |
|||
(local ([define table (make-hash)]) |
|||
(lambda args |
|||
(dict-ref! table args (λ () (apply f args)))))) |
|||
(define levenshtein/list |
|||
(memoize |
|||
(lambda (s t) |
|||
(cond |
|||
[(and (empty? s) (empty? t)) |
|||
(lev 0 '() '())] |
|||
[(empty? s) |
|||
(lev (length t) (list-repeat (length t) #\-) t)] |
|||
[(empty? t) |
|||
(lev (length s) s (list-repeat (length s) #\-))] |
|||
[else |
|||
(if (equal? (first s) (first t)) |
|||
(lev-add (levenshtein/list (rest s) (rest t)) |
|||
0 (first s) (first t)) |
|||
(argmin lev-n (list (lev-add (levenshtein/list (rest s) t) |
|||
1 (first s) #\-) |
|||
(lev-add (levenshtein/list s (rest t)) |
|||
1 #\- (first t)) |
|||
(lev-add (levenshtein/list (rest s) (rest t)) |
|||
1 (first s) (first t)))))])))) |
|||
(define (levenshtein s t) |
|||
(let ([result (levenshtein/list (string->list s) |
|||
(string->list t))]) |
|||
(values (lev-n result) |
|||
(list->string (lev-s result)) |
|||
(list->string (lev-t result)))))</syntaxhighlight> |
|||
'''Demonstration:''' |
|||
<syntaxhighlight lang="racket">(let-values ([(dist exp-s exp-t) |
|||
(levenshtein "rosettacode" "raisethysword")]) |
|||
(printf "levenshtein: ~a edits\n" dist) |
|||
(displayln exp-s) |
|||
(displayln exp-t))</syntaxhighlight> |
|||
{{out}} |
|||
<pre>levenshtein: 8 edits |
|||
r-oset-taco-de |
|||
raisethysword-</pre> |
|||
=={{header|Raku}}== |
|||
(formerly Perl 6) |
|||
{{trans|Perl}} |
{{trans|Perl}} |
||
< |
<syntaxhighlight lang="raku" line>sub align ( Str $σ, Str $t ) { |
||
my @s = *, $σ.comb; |
my @s = flat *, $σ.comb; |
||
my @t = *, $t.comb; |
my @t = flat *, $t.comb; |
||
my @A; |
my @A; |
||
Line 63: | Line 1,255: | ||
@A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t; |
@A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t; |
||
for 1 ..^ @s X 1..^ @t -> \i, \j { |
for 1 ..^ @s X 1..^ @t -> (\i, \j) { |
||
if @s[i] ne @t[j] { |
if @s[i] ne @t[j] { |
||
@A[i][j]<d> = 1 + my $min = |
@A[i][j]<d> = 1 + my $min = |
||
Line 79: | Line 1,271: | ||
} |
} |
||
.say for align |
.say for align 'rosettacode', 'raisethysword';</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>ro-settac-o-de |
<pre>ro-settac-o-de |
||
raisethysword-</pre> |
raisethysword-</pre> |
||
=={{header| |
=={{header|Ruby}}== |
||
{{trans|Tcl}} |
|||
This solution only computes the distance. |
|||
uses "lcs" from [[Longest common subsequence#Ruby|here]] |
|||
See http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html |
|||
<syntaxhighlight lang="ruby">require 'lcs' |
|||
for a discussion of the code. |
|||
def levenshtein_align(a, b) |
|||
<lang racket>#lang racket |
|||
apos, bpos = LCS.new(a, b).backtrack2 |
|||
c = "" |
|||
d = "" |
|||
x0 = y0 = -1 |
|||
dx = dy = 0 |
|||
apos.zip(bpos) do |x,y| |
|||
diff = x + dx - y - dy |
|||
if diff < 0 |
|||
dx -= diff |
|||
c += "-" * (-diff) |
|||
elsif diff > 0 |
|||
dy += diff |
|||
d += "-" * diff |
|||
end |
|||
c += a[x0+1..x] |
|||
x0 = x |
|||
d += b[y0+1..y] |
|||
y0 = y |
|||
end |
|||
c += a[x0+1..-1] |
|||
d += b[y0+1..-1] |
|||
diff = a.length + y0 - b.length - x0 |
|||
if diff < 0 |
|||
c += "-" * (-diff) |
|||
elsif diff > 0 |
|||
d += "-" * diff |
|||
end |
|||
[c, d] |
|||
end |
|||
puts levenshtein_align("rosettacode", "raisethysword")</syntaxhighlight> |
|||
(define (memoize f) |
|||
(local ([define table (make-hash)]) |
|||
(lambda args |
|||
(dict-ref! table args (λ () (apply f args)))))) |
|||
{{out}} |
|||
(define levenshtein |
|||
<pre> |
|||
(memoize |
|||
r-oset-taco-de |
|||
(lambda (s t) |
|||
raisethysword- |
|||
(cond |
|||
</pre> |
|||
[(and (empty? s) (empty? t)) 0] |
|||
[(empty? s) (length t)] |
|||
=={{header|Rust}}== |
|||
[(empty? t) (length s)] |
|||
'''Cargo.toml''' |
|||
[else |
|||
<pre>[dependencies] |
|||
(if (equal? (first s) (first t)) |
|||
edit-distance = "^1.0.0"</pre> |
|||
(levenshtein (rest s) (rest t)) |
|||
(min (add1 (levenshtein (rest s) t)) |
|||
'''src/main.rs''' |
|||
(add1 (levenshtein s (rest t))) |
|||
<syntaxhighlight lang="rust">extern crate edit_distance; |
|||
(add1 (levenshtein (rest s) (rest t)))))]))))</lang> |
|||
Demonstration: |
|||
edit_distance("rosettacode", "raisethysword");</syntaxhighlight> |
|||
<lang racket>(levenshtein (string->list "rosettacode") |
|||
(string->list "raisethysword"))</lang> |
|||
=={{header|Scala}}== |
|||
{{Out}}Best seen running in your browser either by [https://scastie.scala-lang.org/I8BAESkNTjukVPzsWOUyPA ScalaFiddle (ES aka JavaScript, non JVM)] or [https://scastie.scala-lang.org/I8BAESkNTjukVPzsWOUyPA]. |
|||
<syntaxhighlight lang="scala">import scala.collection.mutable |
|||
import scala.collection.parallel.ParSeq |
|||
object LevenshteinAlignment extends App { |
|||
val vlad = new Levenshtein("rosettacode", "raisethysword") |
|||
val alignment = vlad.revLevenstein() |
|||
class Levenshtein(s1: String, s2: String) { |
|||
val memoizedCosts = mutable.Map[(Int, Int), Int]() |
|||
def revLevenstein(): (String, String) = { |
|||
def revLev: (Int, Int, String, String) => (String, String) = { |
|||
case (_, 0, revS1, revS2) => (revS1, revS2) |
|||
case (0, _, revS1, revS2) => (revS1, revS2) |
|||
case (i, j, revS1, revS2) => |
|||
if (memoizedCosts(i, j) == (memoizedCosts(i - 1, j - 1) |
|||
+ (if (s1(i - 1) != s2(j - 1)) 1 else 0))) |
|||
revLev(i - 1, j - 1, s1(i - 1) + revS1, s2(j - 1) + revS2) |
|||
else if (memoizedCosts(i, j) == 1 + memoizedCosts(i - 1, j)) |
|||
revLev(i - 1, j, s1(i - 1) + revS1, "-" + revS2) |
|||
else |
|||
revLev(i, j - 1, "-" + revS1, s2(j - 1) + revS2) |
|||
} |
|||
revLev(s1.length, s2.length, "", "") |
|||
} |
|||
private def levenshtein: Int = { |
|||
def lev: ((Int, Int)) => Int = { |
|||
case (k1, k2) => |
|||
memoizedCosts.getOrElseUpdate((k1, k2), (k1, k2) match { |
|||
case (i, 0) => i |
|||
case (0, j) => j |
|||
case (i, j) => |
|||
ParSeq(1 + lev((i - 1, j)), |
|||
1 + lev((i, j - 1)), |
|||
lev((i - 1, j - 1)) |
|||
+ (if (s1(i - 1) != s2(j - 1)) 1 else 0)).min |
|||
}) |
|||
} |
|||
lev((s1.length, s2.length)) |
|||
} |
|||
levenshtein |
|||
} |
|||
println(alignment._1) |
|||
println(alignment._2) |
|||
}</syntaxhighlight> |
|||
=={{header|Sidef}}== |
|||
{{trans|Perl}} |
|||
<syntaxhighlight lang="ruby">func align(s, t) { |
|||
s.chars!.prepend!('^') |
|||
t.chars!.prepend!('^') |
|||
var A = [] |
|||
{|i| A[i][0]{@|<d s t>} = (i, s.slice(1).first(i).join, '~' * i) } << ^s |
|||
{|i| A[0][i]{@|<d s t>} = (i, '-' * i, t.slice(1).first(i).join) } << ^t |
|||
for i (1 .. s.end) { |
|||
for j (1 .. t.end) { |
|||
if (s[i] != t[j]) { |
|||
A[i][j]{:d} = 1+( |
|||
var min = Math.min(A[i-1][j]{:d}, A[i][j-1]{:d}, A[i-1][j-1]{:d}) |
|||
) |
|||
A[i][j]{@|<s t>} = (A[i-1][j]{:d} == min |
|||
? [A[i-1][j]{:s}+s[i], A[i-1][j]{:t}+'-'] |
|||
: (A[i][j-1]{:d} == min |
|||
? [A[i][j-1]{:s}+'-', A[i][j-1]{:t}+t[j]] |
|||
: [A[i-1][j-1]{:s}+s[i], A[i-1][j-1]{:t}+t[j]]))... |
|||
} |
|||
else { |
|||
A[i][j]{@|<d s t>} = ( |
|||
A[i-1][j-1]{:d}, |
|||
A[i-1][j-1]{:s}+s[i], |
|||
A[i-1][j-1]{:t}+t[j], |
|||
) |
|||
} |
|||
} |
|||
} |
|||
return [A[-1][-1]{@|<s t>}] |
|||
} |
|||
align("rosettacode", "raisethysword").each { .say }</syntaxhighlight> |
|||
{{out}} |
|||
ro-settac-o-de |
|||
raisethysword- |
|||
=={{header|Tcl}}== |
|||
{{tcllib|struct::list}} |
|||
<syntaxhighlight lang="tcl">package require struct::list |
|||
proc levenshtein/align {a b} { |
|||
lassign [struct::list longestCommonSubsequence [split $a ""] [split $b ""]]\ |
|||
apos bpos |
|||
set c "" |
|||
set d "" |
|||
set x0 [set y0 -1] |
|||
set dx [set dy 0] |
|||
foreach x $apos y $bpos { |
|||
if {$x+$dx < $y+$dy} { |
|||
set n [expr {($y+$dy)-($x+$dx)}] |
|||
incr dx $n |
|||
append c [string repeat "-" $n] |
|||
} elseif {$x+$dx > $y+$dy} { |
|||
set n [expr {($x+$dx)-($y+$dy)}] |
|||
incr dy $n |
|||
append d [string repeat "-" $n] |
|||
} |
|||
append c [string range $a $x0+1 $x] |
|||
set x0 $x |
|||
append d [string range $b $y0+1 $y] |
|||
set y0 $y |
|||
} |
|||
append c [string range $a $x0+1 end] |
|||
append d [string range $b $y0+1 end] |
|||
set al [string length $a] |
|||
set bl [string length $b] |
|||
if {$al+$y0 < $bl+$x0} { |
|||
append c [string repeat "-" [expr {$bl+$x0-$y0-$al}]] |
|||
} elseif {$bl+$x0 < $al+$y0} { |
|||
append d [string repeat "-" [expr {$al+$y0-$x0-$bl}]] |
|||
} |
|||
return $c\n$d |
|||
} |
|||
puts [levenshtein/align "rosettacode" "raisethysword"]</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
r-oset-taco-de |
|||
raisethysword- |
|||
</pre> |
|||
=={{header|Wren}}== |
|||
{{trans|Kotlin}} |
|||
{{libheader|Wren-str}} |
|||
<syntaxhighlight lang="wren">import "./str" for Str |
|||
var levenshteinAlign = Fn.new { |a, b| |
|||
a = Str.lower(a) |
|||
b = Str.lower(b) |
|||
var costs = List.filled(a.count+1, null) |
|||
for (i in 0..a.count) costs[i] = List.filled(b.count+1, 0) |
|||
for (j in 0..b.count) costs[0][j] = j |
|||
for (i in 1..a.count) { |
|||
costs[i][0] = i |
|||
for (j in 1..b.count) { |
|||
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1) |
|||
costs[i][j] = temp.min(1 + costs[i - 1][j].min(costs[i][j - 1])) |
|||
} |
|||
} |
|||
// walk back through matrix to figure out path |
|||
var aPathRev = "" |
|||
var bPathRev = "" |
|||
var i = a.count |
|||
var j = b.count |
|||
while (i != 0 && j != 0) { |
|||
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1) |
|||
var cij = costs[i][j] |
|||
if (cij == temp) { |
|||
i = i - 1 |
|||
aPathRev = aPathRev + a[i] |
|||
j = j - 1 |
|||
bPathRev = bPathRev + b[j] |
|||
} else if (cij == 1 + costs[i-1][j]) { |
|||
i = i - 1 |
|||
aPathRev = aPathRev + a[i] |
|||
bPathRev = bPathRev + "-" |
|||
} else if (cij == 1 + costs[i][j-1]) { |
|||
aPathRev = aPathRev + "-" |
|||
j = j - 1 |
|||
bPathRev = bPathRev+ b[j] |
|||
} |
|||
} |
|||
return [aPathRev[-1..0], bPathRev[-1..0]] |
|||
} |
|||
var result = levenshteinAlign.call("place", "palace") |
|||
System.print(result[0]) |
|||
System.print(result[1]) |
|||
System.print() |
|||
result = levenshteinAlign.call("rosettacode","raisethysword") |
|||
System.print(result[0]) |
|||
System.print(result[1])</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|zkl}}== |
|||
{{trans|Java}} |
|||
<syntaxhighlight lang="zkl">fcn alignment(a,b){ |
|||
a,b = a.toLower(), b.toLower(); |
|||
costs := (a.len()+1).pump(List(),'wrap(a){ [1..b.len()].pump(List(a)) }); |
|||
foreach i,j in (a.len()+1, [1..b.len()]){ |
|||
costs[i][j] = ( 1 + costs[i-1][j].min(costs[i][j-1]) ) |
|||
.min(if(a[i-1] == b[j-1]) costs[i-1][j-1] else costs[i-1][j-1] + 1); |
|||
} |
|||
// walk back through matrix to figure out path |
|||
aPathRev,bPathRev := Data(),Data(); // byte buckets |
|||
i,j := a.len(), b.len(); |
|||
while(i!=0 and j!= 0){ |
|||
if (costs[i][j] == |
|||
( if(a[i-1]==b[j-1]) costs[i-1][j-1] else costs[i-1][j-1]+1 )){ |
|||
aPathRev.append(a[i-=1]); |
|||
bPathRev.append(b[j-=1]); |
|||
} else if(costs[i][j] == 1+costs[i-1][j]){ |
|||
aPathRev.append(a[i-=1]); |
|||
bPathRev.append("-"); |
|||
} else if (costs[i][j] == 1+costs[i][j-1]){ |
|||
aPathRev.append("-"); |
|||
bPathRev.append(b[j-=1]); |
|||
} |
|||
} |
|||
return(aPathRev.text.reverse(), bPathRev.text.reverse()) |
|||
}</syntaxhighlight> |
|||
<syntaxhighlight lang="zkl">result := alignment("rosettacode", "raisethysword"); |
|||
println(result[0]); |
|||
println(result[1]);</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
Latest revision as of 08:13, 25 May 2024
The Levenshtein distance algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order.
![Task](http://static.miraheze.org/rosettacodewiki/thumb/b/ba/Rcode-button-task-crushed.png/64px-Rcode-button-task-crushed.png)
You are encouraged to solve this task according to the task description, using any language you may know.
An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is:
P-LACE PALACE
- Task
Write a function that shows the alignment of two strings for the corresponding levenshtein distance.
As an example, use the words "rosettacode" and "raisethysword".
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language).
11l
F levenshteinAlign(aa, bb)
V a = aa.lowercase()
V b = bb.lowercase()
V costs = [[0] * (b.len + 1)] * (a.len + 1)
L(j) 0 .. b.len
costs[0][j] = j
L(i) 1 .. a.len
costs[i][0] = i
L(j) 1 .. b.len
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)
V aPathRev = ‘’
V bPathRev = ‘’
V i = a.len
V j = b.len
L i != 0 & j != 0
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
I costs[i][j] == tmp
aPathRev ‘’= a[--i]
bPathRev ‘’= b[--j]
E I costs[i][j] == 1 + costs[i - 1][j]
aPathRev ‘’= a[--i]
bPathRev ‘’= ‘-’
E I costs[i][j] == 1 + costs[i][j - 1]
aPathRev ‘’= ‘-’
bPathRev ‘’= b[--j]
E
assert(0B, ‘Internal error’)
R (reversed(aPathRev), reversed(bPathRev))
V result = levenshteinAlign(‘place’, ‘palace’)
print(result[0])
print(result[1])
print()
result = levenshteinAlign(‘rosettacode’, ‘raisethysword’)
print(result[0])
print(result[1])
- Output:
p-lace palace r-oset-tacode raisethysword
Arturo
print join.with:"\n" levenshtein.align "place" "palace"
print join.with:"\n" levenshtein.align "rosettacode" "raisethysword"
- Output:
p-lace palace r-oset-tacode raisethysword
C
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct edit_s edit_t, *edit;
struct edit_s {
char c1, c2;
int n;
edit next;
};
void leven(char *a, char *b)
{
int i, j, la = strlen(a), lb = strlen(b);
edit *tbl = malloc(sizeof(edit) * (1 + la));
tbl[0] = calloc((1 + la) * (1 + lb), sizeof(edit_t));
for (i = 1; i <= la; i++)
tbl[i] = tbl[i-1] + (1+lb);
for (i = la; i >= 0; i--) {
char *aa = a + i;
for (j = lb; j >= 0; j--) {
char *bb = b + j;
if (!*aa && !*bb) continue;
edit e = &tbl[i][j];
edit repl = &tbl[i+1][j+1];
edit dela = &tbl[i+1][j];
edit delb = &tbl[i][j+1];
e->c1 = *aa;
e->c2 = *bb;
if (!*aa) {
e->next = delb;
e->n = e->next->n + 1;
continue;
}
if (!*bb) {
e->next = dela;
e->n = e->next->n + 1;
continue;
}
e->next = repl;
if (*aa == *bb) {
e->n = e->next->n;
continue;
}
if (e->next->n > delb->n) {
e->next = delb;
e->c1 = 0;
}
if (e->next->n > dela->n) {
e->next = dela;
e->c1 = *aa;
e->c2 = 0;
}
e->n = e->next->n + 1;
}
}
edit p = tbl[0];
printf("%s -> %s: %d edits\n", a, b, p->n);
while (p->next) {
if (p->c1 == p->c2)
printf("%c", p->c1);
else {
putchar('(');
if (p->c1) putchar(p->c1);
putchar(',');
if (p->c2) putchar(p->c2);
putchar(')');
}
p = p->next;
}
putchar('\n');
free(tbl[0]);
free(tbl);
}
int main(void)
{
leven("raisethysword", "rosettacode");
return 0;
}
- Output:
raisethysword -> rosettacode: 8 edits r(a,o)(i,)set(h,t)(y,a)(s,c)(w,)o(r,d)(d,e)
C#
using System;
using System.Text;
public class LevenshteinAlignment
{
public static string[] Alignment(string a, string b)
{
a = a.ToLower();
b = b.ToLower();
int[,] costs = new int[a.Length + 1, b.Length + 1];
for (int j = 0; j <= b.Length; j++)
costs[0, j] = j;
for (int i = 1; i <= a.Length; i++)
{
costs[i, 0] = i;
for (int j = 1; j <= b.Length; j++)
{
costs[i, j] = Math.Min(1 + Math.Min(costs[i - 1, j], costs[i, j - 1]),
a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1);
}
}
StringBuilder aPathRev = new StringBuilder();
StringBuilder bPathRev = new StringBuilder();
for (int i = a.Length, j = b.Length; i != 0 && j != 0;)
{
if (costs[i, j] == (a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1))
{
aPathRev.Append(a[--i]);
bPathRev.Append(b[--j]);
}
else if (costs[i, j] == 1 + costs[i - 1, j])
{
aPathRev.Append(a[--i]);
bPathRev.Append('-');
}
else if (costs[i, j] == 1 + costs[i, j - 1])
{
aPathRev.Append('-');
bPathRev.Append(b[--j]);
}
}
return new string[] { Reverse(aPathRev.ToString()), Reverse(bPathRev.ToString()) };
}
private static string Reverse(string s)
{
char[] charArray = s.ToCharArray();
Array.Reverse(charArray);
return new string(charArray);
}
public static void Main(string[] args)
{
string[] result = Alignment("rosettacode", "raisethysword");
Console.WriteLine(result[0]);
Console.WriteLine(result[1]);
}
}
- Output:
r-oset-tacode raisethysword
C++
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>
std::string to_lower_case(const std::string& text) {
std::string result = text;
std::transform(result.begin(), result.end(), result.begin(),
[](char ch){ return std::tolower(ch); });
return result;
}
std::vector<std::string> levenshtein_alignment(std::string a, std::string b) {
a = to_lower_case(a);
b = to_lower_case(b);
std::vector<std::vector<int32_t>> costs{ a.length() + 1, std::vector<int32_t>( b.length() + 1, 0 ) };
for ( uint64_t j = 0; j <= b.length(); ++j )
costs[0][j] = j;
for ( uint64_t i = 1; i <= a.length(); ++i ) {
costs[i][0] = i;
for ( uint64_t j = 1; j <= b.length(); ++j ) {
costs[i][j] = std::min(std::min( costs[i - 1][j], costs[i][j - 1]) + 1,
a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1);
}
}
std::string a_path_reversed, b_path_reversed;
uint64_t i = a.length(), j = b.length();
while ( i != 0 && j != 0 ) {
if ( costs[i][j] == ( a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1 ) ) {
a_path_reversed += a[--i];
b_path_reversed += b[--j];
} else if ( costs[i][j] == costs[i - 1][j] + 1 ) {
a_path_reversed += a[--i];
b_path_reversed += "-";
} else if ( costs[i][j] == costs[i][j - 1] + 1 ) {
a_path_reversed += "-";
b_path_reversed += b[--j];
}
}
std::reverse(a_path_reversed.begin(), a_path_reversed.end());
std::reverse(b_path_reversed.begin(), b_path_reversed.end());
return std::vector<std::string>{ a_path_reversed, b_path_reversed };
}
int main() {
std::vector<std::string> result = levenshtein_alignment("place", "palace");
std::cout << result[0] << std::endl;
std::cout << result[1] << std::endl;
std::cout << std::endl;
result = levenshtein_alignment("rosettacode", "raisethysword");
std::cout << result[0] << std::endl;
std::cout << result[1] << std::endl;
}
- Output:
p-lace palace r-oset-tacode raisethysword
D
Using the standard library.
void main() {
import std.stdio, std.algorithm;
immutable s1 = "rosettacode";
immutable s2 = "raisethysword";
string s1b, s2b;
size_t pos1, pos2;
foreach (immutable c; levenshteinDistanceAndPath(s1, s2)[1]) {
final switch (c) with (EditOp) {
case none, substitute:
s1b ~= s1[pos1++];
s2b ~= s2[pos2++];
break;
case insert:
s1b ~= "_";
s2b ~= s2[pos2++];
break;
case remove:
s1b ~= s1[pos1++];
s2b ~= "_";
break;
}
}
writeln(s1b, "\n", s2b);
}
- Output:
r_oset_tacode raisethysword
EasyLang
global dparr[] dpcols .
proc dpnew a b . .
len dparr[] a * b
dpcols = b
.
func dp r c .
return dparr[r * dpcols + c + 1]
.
proc dps r c v . .
dparr[r * dpcols + c + 1] = v
.
proc align a$ b$ . ar$ br$ .
dpnew (len a$ + 1) (len b$ + 1)
for i = 1 to len a$
dps i 0 i
.
for j = 0 to len b$
dps 0 j j
.
for i = 1 to len a$
for j = 1 to len b$
if substr a$ i 1 = substr b$ j 1
dps i j dp (i - 1) (j - 1)
else
dps i j lower (lower dp (i - 1) j dp i (j - 1)) dp (i - 1) (j - 1) + 1
.
.
.
ar$ = "" ; br$ = ""
i = len a$ ; j = len b$
while i <> 0 and j <> 0
if substr a$ i 1 = substr b$ j 1 or dp i j = dp (i - 1) (j - 1) + 1
ar$ = substr a$ i 1 & ar$
i -= 1
br$ = substr b$ j 1 & br$
j -= 1
elif dp i j = dp (i - 1) j + 1
ar$ = substr a$ i 1 & ar$
i -= 1
br$ = "-" & br$
else
ar$ = "-" & ar$
br$ = substr b$ j 1 & br$
j -= 1
.
.
.
align "rosettacode" "raisethysword" ar$ br$
print ar$ ; print br$
- Output:
r-oset-tacode raisethysword
Eiffel
distance (source, target: STRING): INTEGER
-- Minimum number of operations to turn `source' into `target'.
local
l_distance: ARRAY2 [INTEGER]
del, ins, subst: INTEGER
do
create l_distance.make (source.count, target.count)
⟳ ic:(1 |..| source.count) ¦ l_distance [ic, 1] := ic - 1 ⟲
⟳ ij:(1 |..| target.count) ¦ l_distance [1, ij] := ij - 1 ⟲
⟳ ic:(2 |..| source.count) ¦
⟳ jc:(2 |..| target.count) ¦
if source [ic] = target [jc] then -- same char
l_distance [ic, jc] := l_distance [ic - 1, jc - 1]
else -- diff char
del := l_distance [ic - 1, jc] -- delete?
ins := l_distance [ic, jc - 1] -- insert?
subst := l_distance [ic - 1, jc -1] -- substitute/swap?
l_distance [ic, jc] := del.min (ins.min (subst)) + 1
end
⟲
⟲
Result:= l_distance [source.count, target.count]
end
- Output:
Result = 8
The ⟳ ¦ ⟲ represents the "symbolic form" of an Eiffel across loop. In the case above, the first two ⟳ (loops) initialize the first column and the first row, and then two nested ⟳ (loops) to walk the distance computation.
Also—the `ic' is this programmers shorthand for ITERATION_CURSOR. Therefore, in the nested loops, the `ic' is the iteration cursor following `i' and the `ij' is following `j' as indexes on the source, target, and l_distance.
FreeBASIC
#define min(a, b) iif((a) < (b), (a), (b))
Sub LevenshteinAlignment(s1 As String, s2 As String)
Dim As String align1 = "", align2 = ""
Dim As Integer i, j, len1, len2
len1 = Len(s1):
len2 = Len(s2)
Dim As Integer dp(len1+1, len2+1)
For i = 0 To len1
dp(i, 0) = i
Next i
For j = 0 To len2
dp(0, j) = j
Next j
For i = 1 To len1
For j = 1 To len2
If Mid(s1, i, 1) = Mid(s2, j, 1) Then
dp(i, j) = dp(i-1, j-1)
Else
dp(i, j) = min(min(dp(i-1, j), dp(i, j-1)), dp(i-1, j-1)) + 1
End If
Next j
Next i
i = len1: j = len2
While i > 0 And j > 0
If Mid(s1, i, 1) = Mid(s2, j, 1) Then
align1 = Mid(s1, i, 1) + align1
align2 = Mid(s2, j, 1) + align2
i -= 1: j -= 1
Elseif dp(i, j) = dp(i-1, j-1) + 1 Then
align1 = Mid(s1, i, 1) + align1
align2 = Mid(s2, j, 1) + align2
i -= 1: j -= 1
Elseif dp(i, j) = dp(i-1, j) + 1 Then
align1 = Mid(s1, i, 1) + align1
align2 = "-" + align2
i -= 1
Else
align1 = "-" + align1
align2 = Mid(s2, j, 1) + align2
j -= 1
End If
Wend
While i > 0
align1 = Mid(s1, i, 1) + align1
align2 = "-" + align2
i -= 1
Wend
While j > 0
align1 = "-" + align1
align2 = Mid(s2, j, 1) + align2
j -= 1
Wend
Print "Levenshtein Distance: "; dp(len1, len2)
Print align1
Print align2
Print
End Sub
LevenshteinAlignment("rosettacode", "raisethysword")
LevenshteinAlignment("place", "palace")
Sleep
- Output:
Levenshtein Distance: 8 r-oset-tacode raisethysword Levenshtein Distance: 1 p-lace palace
Go
Alignment computed by the Needleman-Wunch algorithm, which is used in bioinformatics.
package main
import (
"fmt"
"github.com/biogo/biogo/align"
ab "github.com/biogo/biogo/alphabet"
"github.com/biogo/biogo/feat"
"github.com/biogo/biogo/seq/linear"
)
func main() {
// Alphabets for things like DNA are predefined in biogo, but we
// define our own here.
lc := ab.Must(ab.NewAlphabet("-abcdefghijklmnopqrstuvwxyz",
feat.Undefined, '-', 0, true))
// Construct scoring matrix for Needleman-Wunch algorithm.
// We leave zeros on the diagonal for the Levenshtein distance of an
// exact match and put -1s everywhere else for the Levenshtein distance
// of an edit.
nw := make(align.NW, lc.Len())
for i := range nw {
r := make([]int, lc.Len())
nw[i] = r
for j := range r {
if j != i {
r[j] = -1
}
}
}
// define input sequences
a := &linear.Seq{Seq: ab.BytesToLetters([]byte("rosettacode"))}
a.Alpha = lc
b := &linear.Seq{Seq: ab.BytesToLetters([]byte("raisethysword"))}
b.Alpha = lc
// perform alignment
aln, err := nw.Align(a, b)
// format and display result
if err != nil {
fmt.Println(err)
return
}
fa := align.Format(a, b, aln, '-')
fmt.Printf("%s\n%s\n", fa[0], fa[1])
aa := fmt.Sprint(fa[0])
ba := fmt.Sprint(fa[1])
ma := make([]byte, len(aa))
for i := range ma {
if aa[i] == ba[i] {
ma[i] = ' '
} else {
ma[i] = '|'
}
}
fmt.Println(string(ma))
}
- Output:
The lines after the alignment point out the 8 edits.
r-oset-tacode raisethysword || |||| ||
Haskell
The Wagner–Fischer matrix construction is adopted from Levenshtein_distance#Haskell, however it is reversed in order to simplify it's traversing.
costs :: String -> String -> [[Int]]
costs s1 s2 = reverse $ reverse <$> matrix
where
matrix = scanl transform [0 .. length s1] s2
transform ns@(n:ns1) c = scanl calc (n + 1) $ zip3 s1 ns ns1
where
calc z (c1, x, y) = minimum [ y + 1, z + 1
, x + fromEnum (c1 /= c)]
levenshteinDistance :: String -> String -> Int
levenshteinDistance s1 s2 = head.head $ costs s1 s2
A sample alignment
The alignment is built in the same way as it is done in Levenshtein_distance/Alignment#Java. Instead of indices the Wagner–Fischer matrix and strings are traversed by list truncation, as zippers.
alignment :: String -> String -> (String, String)
alignment s1 s2 = go (costs s1 s2) (reverse s1) (reverse s2) ([],[])
where
go c _ _ r | isEmpty c = r
go _ [] s r = ('-' <$ s, reverse s) <> r
go _ s [] r = (reverse s, '-' <$ s) <> r
go c s1@(h1:t1) s2@(h2:t2) (r1, r2) =
let temp = (get.nextCol.nextRow $ c) + if h1 == h2 then 0 else 1
in case get c of
x | x == temp -> go (nextRow.nextCol $ c) t1 t2 (h1:r1, h2:r2)
| x == 1 + (get.nextCol $ c) -> go (nextCol c) s1 t2 ('-':r1, h2:r2)
| x == 1 + (get.nextRow $ c) -> go (nextRow c) t1 s2 (h1:r1, '-':r2)
-- Functions which treat table as zipper
get ((h:_):_) = h
nextRow = map tail
nextCol = tail
isEmpty c = null c || null (head c)
λ> alignment "palace" "place" ("palace","p-lace") λ> alignment "rosettacode" "raisethysword" ("r-oset-tacode","raisethysword") λ> alignment "rosettacode" "rat" ("rosettacode","r-----a---t")
All alignments
The alternative solution, which extensively produces all minimal alignments for given strings.
-- Produces all possible alignments for two strings.
allAlignments :: String -> String -> [[(Char, Char)]]
allAlignments s1 s2 = go (length s2 - length s1) s1 s2
where
go _ s [] = [(\x -> (x, '-')) <$> s]
go _ [] s = [(\x -> ('-' ,x)) <$> s]
go n s1@(h1:t1) s2@(h2:t2) = (h1, h2) <:> go n t1 t2
++ case compare n 0 of
LT -> (h1, '-') <:> go (n+1) t1 s2
EQ -> []
GT -> ('-', h2) <:> go (n-1) s1 t2
x <:> l = fmap (x :) l
-- Returns a lazy list of all optimal alignments.
levenshteinAlignments :: String -> String -> [(String, String)]
levenshteinAlignments s1 s2 = unzip <$> best
where
best = filter ((lev ==) . dist) $ allAlignments s1 s2
lev = levenshteinDistance s1 s2
dist = length . filter (uncurry (/=))
λ> mapM_ print $ levenshteinAlignments "rosettacode" "raisethysword" ("ro-settac-ode","raisethysword") ("ro-setta-code","raisethysword") ("ro-sett-acode","raisethysword") ("ro-set-tacode","raisethysword") ("r-osettac-ode","raisethysword") ("r-osetta-code","raisethysword") ("r-osett-acode","raisethysword") ("r-oset-tacode","raisethysword") λ> mapM_ print $ levenshteinAlignments "rosettacode" "rat" ("rosettacode","ra--t------") ("rosettacode","ra---t-----") ("rosettacode","r-a-t------") ("rosettacode","r-a--t-----") ("rosettacode","r--at------") ("rosettacode","r--a-t-----") ("rosettacode","r---at-----") ("rosettacode","r-----at---") ("rosettacode","r-----a-t--") ("rosettacode","r-----a--t-") ("rosettacode","r-----a---t")
J
Translation of java:
levalign=:4 :0
assert. x <:&# y
D=. x +/&i.&>:&# y
for_i. 1+i.#x do.
for_j. 1+i.#y do.
if. ((<:i){x)=(<:j){y do.
D=. (D {~<<:i,j) (<i,j)} D
else.
min=. 1+<./D{~(i,j) <@:-"1#:1 2 3
D=. min (<i,j)} D
end.
end.
end.
A=. B=. ''
ij=. x ,&# y
while. */ij do.
'd00 d01 d10 d11'=. D{~ ij <@:-"1#:i.4
'x1 y1'=. (ij-1){each x;y
if. d00 = d11+x1~:y1 do.
A=. A,x1 [ B=. B,y1 [ ij=. ij-1
elseif. d00 = 1+d10 do.
A=. A,x1 [ B=. B,'-'[ ij=. ij-1 0
elseif. d00 = 1+d01 do.
A=. A,'-'[ B=. B,y1 [ ij=. ij-0 1
end.
end.
A,:&|.B
)
Task examples:
'place' levalign 'palace'
p-lace
palace
'rosettacode' levalign 'raisethysword'
r-oset-tacode
raisethysword
Java
public class LevenshteinAlignment {
public static String[] alignment(String a, String b) {
a = a.toLowerCase();
b = b.toLowerCase();
// i == 0
int[][] costs = new int[a.length()+1][b.length()+1];
for (int j = 0; j <= b.length(); j++)
costs[0][j] = j;
for (int i = 1; i <= a.length(); i++) {
costs[i][0] = i;
for (int j = 1; j <= b.length(); j++) {
costs[i][j] = Math.min(1 + Math.min(costs[i-1][j], costs[i][j-1]), a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1);
}
}
// walk back through matrix to figure out path
StringBuilder aPathRev = new StringBuilder();
StringBuilder bPathRev = new StringBuilder();
for (int i = a.length(), j = b.length(); i != 0 && j != 0; ) {
if (costs[i][j] == (a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1)) {
aPathRev.append(a.charAt(--i));
bPathRev.append(b.charAt(--j));
} else if (costs[i][j] == 1 + costs[i-1][j]) {
aPathRev.append(a.charAt(--i));
bPathRev.append('-');
} else if (costs[i][j] == 1 + costs[i][j-1]) {
aPathRev.append('-');
bPathRev.append(b.charAt(--j));
}
}
return new String[]{aPathRev.reverse().toString(), bPathRev.reverse().toString()};
}
public static void main(String[] args) {
String[] result = alignment("rosettacode", "raisethysword");
System.out.println(result[0]);
System.out.println(result[1]);
}
}
- Output:
r-oset-tacode raisethysword
jq
Adapted from Wren
Works with jq, the C implementation of jq
Works with gojq, the Go implementation of jq
Works with jaq, the Rust implementation of jq
def reverseString: explode | reverse | implode;
def levenshteinAlign($x; $y):
def min($a;$b): [$a,$b]|min;
($x|length + 1) as $a1
| ($y|length + 1) as $b1
| ($x | ascii_downcase) as $a
| ($y | ascii_downcase) as $b
| [range(0; $b1) | 0] as $zeros
| {costs: [range(0; $a1) | $zeros]}
| reduce range(0; $b1) as $j (.; .costs[0][$j] = $j)
| reduce range(1; $a1) as $i (.;
.costs[$i][0] = $i
| reduce range(1; $b1) as $j (.;
(.costs[$i - 1][$j - 1] + (if $a[$i - 1: $i] == $b[$j - 1: $j] then 0 else 1 end)) as $temp
| .costs[$i][$j] = min( $temp; 1 + min(.costs[$i - 1][$j]; .costs[$i][$j - 1] )) ))
# walk back through matrix to figure out path
| .aPathRev = ""
| .bPathRev = ""
| .i = ($a|length)
| .j = ($b|length)
| until (.i == 0 or .j == 0;
(.costs[.i - 1][.j - 1] + (if $a[.i - 1: .i] == $b[.j - 1: .j] then 0 else 1 end)) as $temp
| .costs[.i][.j] as $cij
| if $cij == $temp
then .i += -1
| .aPathRev += $a[.i: .i+1]
| .j += -1
| .bPathRev += $b[.j: .j+1]
elif $cij == 1 + .costs[.i-1][.j]
then .i += -1
| .aPathRev += $a[.i:.i+1]
| .bPathRev += "-"
elif $cij == 1 + .costs[.i][.j-1]
then .aPathRev += "-"
| .j += -1
| .bPathRev += $b[.j: .j+1]
end)
| [.aPathRev, .bPathRev ] | map(reverseString);
levenshteinAlign("place"; "palace"),
levenshteinAlign("rosettacode";"raisethysword")
- Output:
[ "p-lace", "palace" ] [ "r-oset-tacode", "raisethysword" ]
Julia
function levenshteinalign(a::AbstractString, b::AbstractString)
a = lowercase(a)
b = lowercase(b)
len_a = length(a)
len_b = length(b)
costs = Matrix{Int}(len_a + 1, len_b + 1)
costs[1, :] .= 0:len_b
@inbounds for i in 2:(len_a + 1)
costs[i, 1] = i
for j in 2:(len_b + 1)
tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
costs[i, j] = min(1 + min(costs[i-1, j], costs[i, j-1]), tmp)
end
end
apathrev = IOBuffer()
bpathrev = IOBuffer()
local i = len_a + 1
local j = len_b + 1
@inbounds while i != 1 && j != 1
tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
if costs[i, j] == tmp
i -= 1
j -= 1
print(apathrev, a[i])
print(bpathrev, b[j])
elseif costs[i, j] == 1 + costs[i-1, j]
i -= 1
print(apathrev, a[i])
print(bpathrev, '-')
elseif costs[i, j] == 1 + costs[i, j-1]
j -= 1
print(apathrev, '-')
print(bpathrev, b[j])
end
end
return reverse(String(take!(apathrev))), reverse(String(take!(bpathrev)))
end
foreach(println, levenshteinalign("rosettacode", "raisethysword"))
foreach(println, levenshteinalign("place", "palace"))
- Output:
r-oset-tacode raisethysword p-lace palace
Kotlin
// version 1.1.3
fun levenshteinAlign(a: String, b: String): Array<String> {
val aa = a.toLowerCase()
val bb = b.toLowerCase()
val costs = Array(a.length + 1) { IntArray(b.length + 1) }
for (j in 0..b.length) costs[0][j] = j
for (i in 1..a.length) {
costs[i][0] = i
for (j in 1..b.length) {
val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1)
costs[i][j] = minOf(1 + minOf(costs[i - 1][j], costs[i][j - 1]), temp)
}
}
// walk back through matrix to figure out path
val aPathRev = StringBuilder()
val bPathRev = StringBuilder()
var i = a.length
var j = b.length
while (i != 0 && j != 0) {
val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1)
when (costs[i][j]) {
temp -> {
aPathRev.append(aa[--i])
bPathRev.append(bb[--j])
}
1 + costs[i-1][j] -> {
aPathRev.append(aa[--i])
bPathRev.append('-')
}
1 + costs[i][j-1] -> {
aPathRev.append('-')
bPathRev.append(bb[--j])
}
}
}
return arrayOf(aPathRev.reverse().toString(), bPathRev.reverse().toString())
}
fun main(args: Array<String>) {
var result = levenshteinAlign("place", "palace")
println(result[0])
println(result[1])
println()
result = levenshteinAlign("rosettacode","raisethysword")
println(result[0])
println(result[1])
}
- Output:
p-lace palace r-oset-tacode raisethysword
Mathematica / Wolfram Language
DamerauLevenshteinDistance["rosettacode", "raisethysword"]
- Output:
8
Nim
import algorithm, sequtils, strutils
proc levenshteinAlign(a, b: string): tuple[a, b: string] =
let a = a.toLower()
let b = b.toLower()
var costs = newSeqWith(a.len + 1, newSeq[int](b.len + 1))
for j in 0..b.len: costs[0][j] = j
for i in 1..a.len:
costs[i][0] = i
for j in 1..b.len:
let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)
# Walk back through matrix to figure out path.
var aPathRev, bPathRev: string
var i = a.len
var j = b.len
while i != 0 and j != 0:
let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
if costs[i][j] == tmp:
dec i
dec j
aPathRev.add a[i]
bPathRev.add b[j]
elif costs[i][j] == 1 + costs[i-1][j]:
dec i
aPathRev.add a[i]
bPathRev.add '-'
elif costs[i][j] == 1 + costs[i][j-1]:
dec j
aPathRev.add '-'
bPathRev.add b[j]
else:
quit "Internal error"
result = (reversed(aPathRev).join(), reversed(bPathRev).join())
when isMainModule:
var result = levenshteinAlign("place", "palace")
echo result.a
echo result.b
echo ""
result = levenshteinAlign("rosettacode","raisethysword")
echo result.a
echo result.b
- Output:
p-lace palace r-oset-tacode raisethysword
Perl
use strict;
use warnings;
use List::Util qw(min);
sub levenshtein_distance_alignment {
my @s = ('^', split //, shift);
my @t = ('^', split //, shift);
my @A;
@{$A[$_][0]}{qw(d s t)} = ($_, join('', @s[1 .. $_]), ('~' x $_)) for 0 .. $#s;
@{$A[0][$_]}{qw(d s t)} = ($_, ('-' x $_), join '', @t[1 .. $_]) for 0 .. $#t;
for my $i (1 .. $#s) {
for my $j (1 .. $#t) {
if ($s[$i] ne $t[$j]) {
$A[$i][$j]{d} = 1 + (
my $min = min $A[$i-1][$j]{d}, $A[$i][$j-1]{d}, $A[$i-1][$j-1]{d}
);
@{$A[$i][$j]}{qw(s t)} =
$A[$i-1][$j]{d} == $min ? ($A[$i-1][$j]{s}.$s[$i], $A[$i-1][$j]{t}.'-') :
$A[$i][$j-1]{d} == $min ? ($A[$i][$j-1]{s}.'-', $A[$i][$j-1]{t}.$t[$j]) :
($A[$i-1][$j-1]{s}.$s[$i], $A[$i-1][$j-1]{t}.$t[$j]);
}
else {
@{$A[$i][$j]}{qw(d s t)} = (
$A[$i-1][$j-1]{d},
$A[$i-1][$j-1]{s}.$s[$i],
$A[$i-1][$j-1]{t}.$t[$j]
);
}
}
}
return @{$A[-1][-1]}{'s', 't'};
}
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";
- Output:
ro-settac-o-de raisethysword-
Phix
plus the indicator from Go
function LevenshteinAlignment(string a, b) integer la = length(a)+1, lb = length(b)+1 sequence costs = repeat(repeat(0,lb),la) for j=1 to lb do costs[1][j] = j end for for i=2 to la do costs[i][1] = i for j=2 to lb do integer tmp1 = 1+min(costs[i-1][j], costs[i][j-1]), tmp2 = costs[i-1][j-1] + (a[i-1]!=b[j-1]) costs[i][j] = min(tmp1, tmp2) end for end for -- walk back through matrix to figure out the path string arev = "", brev = "" integer i = la, j = lb while i>1 and j>1 do integer tmp = costs[i-1][j-1] + (a[i-1]!=b[j-1]) switch costs[i][j] do case tmp: i -= 1; arev &= a[i] j -= 1; brev &= b[j] case 1 + costs[i-1][j]: i -= 1; arev &= a[i] brev &= '-' case 1 + costs[i][j-1]: arev &= '-' j -= 1; brev &= b[j] end switch end while return {reverse(arev),reverse(brev)} end function procedure test(string a,b) {a,b} = LevenshteinAlignment(a,b) string c = sq_add(repeat(' ',length(a)),sq_mul(sq_ne(a,b),'|'-' ')) printf(1,"%s\n%s\n%s\n\n",{a,b,c}) end procedure test("rosettacode", "raisethysword") test("place", "palace")
- Output:
r-oset-tacode raisethysword || |||| || p-lace palace |
Python
from difflib import ndiff
def levenshtein(str1, str2):
result = ""
pos, removed = 0, 0
for x in ndiff(str1, str2):
if pos<len(str1) and str1[pos] == x[2]:
pos += 1
result += x[2]
if x[0] == "-":
removed += 1
continue
else:
if removed > 0:
removed -=1
else:
result += "-"
print(result)
levenshtein("place","palace")
levenshtein("rosettacode","raisethysword")
- Output:
p-lace ro-settac-o-de
Racket
Simple version (no aligment)
First we will analyze this solution that only computes the distance. See http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html for a discussion of the code.
#lang racket
(define (memoize f)
(local ([define table (make-hash)])
(lambda args
(dict-ref! table args (λ () (apply f args))))))
(define levenshtein
(memoize
(lambda (s t)
(cond
[(and (empty? s) (empty? t)) 0]
[(empty? s) (length t)]
[(empty? t) (length s)]
[else
(if (equal? (first s) (first t))
(levenshtein (rest s) (rest t))
(min (add1 (levenshtein (rest s) t))
(add1 (levenshtein s (rest t)))
(add1 (levenshtein (rest s) (rest t)))))]))))
Demonstration:
(levenshtein (string->list "rosettacode")
(string->list "raisethysword"))
- Output:
8
Complete version
Now we extend the code from http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html to show also the alignment. The code is very similar, but it stores the partial results (number of edits and alignment of each substring) in a lev structure.
#lang racket
(struct lev (n s t))
(define (lev-add old n sx tx)
(lev (+ n (lev-n old))
(cons sx (lev-s old))
(cons tx (lev-t old))))
(define (list-repeat n v)
(build-list n (lambda (_) v)))
(define (memoize f)
(local ([define table (make-hash)])
(lambda args
(dict-ref! table args (λ () (apply f args))))))
(define levenshtein/list
(memoize
(lambda (s t)
(cond
[(and (empty? s) (empty? t))
(lev 0 '() '())]
[(empty? s)
(lev (length t) (list-repeat (length t) #\-) t)]
[(empty? t)
(lev (length s) s (list-repeat (length s) #\-))]
[else
(if (equal? (first s) (first t))
(lev-add (levenshtein/list (rest s) (rest t))
0 (first s) (first t))
(argmin lev-n (list (lev-add (levenshtein/list (rest s) t)
1 (first s) #\-)
(lev-add (levenshtein/list s (rest t))
1 #\- (first t))
(lev-add (levenshtein/list (rest s) (rest t))
1 (first s) (first t)))))]))))
(define (levenshtein s t)
(let ([result (levenshtein/list (string->list s)
(string->list t))])
(values (lev-n result)
(list->string (lev-s result))
(list->string (lev-t result)))))
Demonstration:
(let-values ([(dist exp-s exp-t)
(levenshtein "rosettacode" "raisethysword")])
(printf "levenshtein: ~a edits\n" dist)
(displayln exp-s)
(displayln exp-t))
- Output:
levenshtein: 8 edits r-oset-taco-de raisethysword-
Raku
(formerly Perl 6)
sub align ( Str $σ, Str $t ) {
my @s = flat *, $σ.comb;
my @t = flat *, $t.comb;
my @A;
@A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s;
@A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t;
for 1 ..^ @s X 1..^ @t -> (\i, \j) {
if @s[i] ne @t[j] {
@A[i][j]<d> = 1 + my $min =
min @A[i-1][j]<d>, @A[i][j-1]<d>, @A[i-1][j-1]<d>;
@A[i][j]<s t> =
@A[i-1][j]<d> == $min ?? (@A[i-1][j]<s t> Z~ @s[i], '-') !!
@A[i][j-1]<d> == $min ?? (@A[i][j-1]<s t> Z~ '-', @t[j]) !!
(@A[i-1][j-1]<s t> Z~ @s[i], @t[j]);
} else {
@A[i][j]<d s t> = @A[i-1][j-1]<d s t> Z~ '', @s[i], @t[j];
}
}
return @A[*-1][*-1]<s t>;
}
.say for align 'rosettacode', 'raisethysword';
- Output:
ro-settac-o-de raisethysword-
Ruby
uses "lcs" from here
require 'lcs'
def levenshtein_align(a, b)
apos, bpos = LCS.new(a, b).backtrack2
c = ""
d = ""
x0 = y0 = -1
dx = dy = 0
apos.zip(bpos) do |x,y|
diff = x + dx - y - dy
if diff < 0
dx -= diff
c += "-" * (-diff)
elsif diff > 0
dy += diff
d += "-" * diff
end
c += a[x0+1..x]
x0 = x
d += b[y0+1..y]
y0 = y
end
c += a[x0+1..-1]
d += b[y0+1..-1]
diff = a.length + y0 - b.length - x0
if diff < 0
c += "-" * (-diff)
elsif diff > 0
d += "-" * diff
end
[c, d]
end
puts levenshtein_align("rosettacode", "raisethysword")
- Output:
r-oset-taco-de raisethysword-
Rust
Cargo.toml
[dependencies] edit-distance = "^1.0.0"
src/main.rs
extern crate edit_distance;
edit_distance("rosettacode", "raisethysword");
Scala
- Output:
Best seen running in your browser either by ScalaFiddle (ES aka JavaScript, non JVM) or [1].
import scala.collection.mutable
import scala.collection.parallel.ParSeq
object LevenshteinAlignment extends App {
val vlad = new Levenshtein("rosettacode", "raisethysword")
val alignment = vlad.revLevenstein()
class Levenshtein(s1: String, s2: String) {
val memoizedCosts = mutable.Map[(Int, Int), Int]()
def revLevenstein(): (String, String) = {
def revLev: (Int, Int, String, String) => (String, String) = {
case (_, 0, revS1, revS2) => (revS1, revS2)
case (0, _, revS1, revS2) => (revS1, revS2)
case (i, j, revS1, revS2) =>
if (memoizedCosts(i, j) == (memoizedCosts(i - 1, j - 1)
+ (if (s1(i - 1) != s2(j - 1)) 1 else 0)))
revLev(i - 1, j - 1, s1(i - 1) + revS1, s2(j - 1) + revS2)
else if (memoizedCosts(i, j) == 1 + memoizedCosts(i - 1, j))
revLev(i - 1, j, s1(i - 1) + revS1, "-" + revS2)
else
revLev(i, j - 1, "-" + revS1, s2(j - 1) + revS2)
}
revLev(s1.length, s2.length, "", "")
}
private def levenshtein: Int = {
def lev: ((Int, Int)) => Int = {
case (k1, k2) =>
memoizedCosts.getOrElseUpdate((k1, k2), (k1, k2) match {
case (i, 0) => i
case (0, j) => j
case (i, j) =>
ParSeq(1 + lev((i - 1, j)),
1 + lev((i, j - 1)),
lev((i - 1, j - 1))
+ (if (s1(i - 1) != s2(j - 1)) 1 else 0)).min
})
}
lev((s1.length, s2.length))
}
levenshtein
}
println(alignment._1)
println(alignment._2)
}
Sidef
func align(s, t) {
s.chars!.prepend!('^')
t.chars!.prepend!('^')
var A = []
{|i| A[i][0]{@|<d s t>} = (i, s.slice(1).first(i).join, '~' * i) } << ^s
{|i| A[0][i]{@|<d s t>} = (i, '-' * i, t.slice(1).first(i).join) } << ^t
for i (1 .. s.end) {
for j (1 .. t.end) {
if (s[i] != t[j]) {
A[i][j]{:d} = 1+(
var min = Math.min(A[i-1][j]{:d}, A[i][j-1]{:d}, A[i-1][j-1]{:d})
)
A[i][j]{@|<s t>} = (A[i-1][j]{:d} == min
? [A[i-1][j]{:s}+s[i], A[i-1][j]{:t}+'-']
: (A[i][j-1]{:d} == min
? [A[i][j-1]{:s}+'-', A[i][j-1]{:t}+t[j]]
: [A[i-1][j-1]{:s}+s[i], A[i-1][j-1]{:t}+t[j]]))...
}
else {
A[i][j]{@|<d s t>} = (
A[i-1][j-1]{:d},
A[i-1][j-1]{:s}+s[i],
A[i-1][j-1]{:t}+t[j],
)
}
}
}
return [A[-1][-1]{@|<s t>}]
}
align("rosettacode", "raisethysword").each { .say }
- Output:
ro-settac-o-de raisethysword-
Tcl
package require struct::list
proc levenshtein/align {a b} {
lassign [struct::list longestCommonSubsequence [split $a ""] [split $b ""]]\
apos bpos
set c ""
set d ""
set x0 [set y0 -1]
set dx [set dy 0]
foreach x $apos y $bpos {
if {$x+$dx < $y+$dy} {
set n [expr {($y+$dy)-($x+$dx)}]
incr dx $n
append c [string repeat "-" $n]
} elseif {$x+$dx > $y+$dy} {
set n [expr {($x+$dx)-($y+$dy)}]
incr dy $n
append d [string repeat "-" $n]
}
append c [string range $a $x0+1 $x]
set x0 $x
append d [string range $b $y0+1 $y]
set y0 $y
}
append c [string range $a $x0+1 end]
append d [string range $b $y0+1 end]
set al [string length $a]
set bl [string length $b]
if {$al+$y0 < $bl+$x0} {
append c [string repeat "-" [expr {$bl+$x0-$y0-$al}]]
} elseif {$bl+$x0 < $al+$y0} {
append d [string repeat "-" [expr {$al+$y0-$x0-$bl}]]
}
return $c\n$d
}
puts [levenshtein/align "rosettacode" "raisethysword"]
- Output:
r-oset-taco-de raisethysword-
Wren
import "./str" for Str
var levenshteinAlign = Fn.new { |a, b|
a = Str.lower(a)
b = Str.lower(b)
var costs = List.filled(a.count+1, null)
for (i in 0..a.count) costs[i] = List.filled(b.count+1, 0)
for (j in 0..b.count) costs[0][j] = j
for (i in 1..a.count) {
costs[i][0] = i
for (j in 1..b.count) {
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
costs[i][j] = temp.min(1 + costs[i - 1][j].min(costs[i][j - 1]))
}
}
// walk back through matrix to figure out path
var aPathRev = ""
var bPathRev = ""
var i = a.count
var j = b.count
while (i != 0 && j != 0) {
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
var cij = costs[i][j]
if (cij == temp) {
i = i - 1
aPathRev = aPathRev + a[i]
j = j - 1
bPathRev = bPathRev + b[j]
} else if (cij == 1 + costs[i-1][j]) {
i = i - 1
aPathRev = aPathRev + a[i]
bPathRev = bPathRev + "-"
} else if (cij == 1 + costs[i][j-1]) {
aPathRev = aPathRev + "-"
j = j - 1
bPathRev = bPathRev+ b[j]
}
}
return [aPathRev[-1..0], bPathRev[-1..0]]
}
var result = levenshteinAlign.call("place", "palace")
System.print(result[0])
System.print(result[1])
System.print()
result = levenshteinAlign.call("rosettacode","raisethysword")
System.print(result[0])
System.print(result[1])
- Output:
p-lace palace r-oset-tacode raisethysword
zkl
fcn alignment(a,b){
a,b = a.toLower(), b.toLower();
costs := (a.len()+1).pump(List(),'wrap(a){ [1..b.len()].pump(List(a)) });
foreach i,j in (a.len()+1, [1..b.len()]){
costs[i][j] = ( 1 + costs[i-1][j].min(costs[i][j-1]) )
.min(if(a[i-1] == b[j-1]) costs[i-1][j-1] else costs[i-1][j-1] + 1);
}
// walk back through matrix to figure out path
aPathRev,bPathRev := Data(),Data(); // byte buckets
i,j := a.len(), b.len();
while(i!=0 and j!= 0){
if (costs[i][j] ==
( if(a[i-1]==b[j-1]) costs[i-1][j-1] else costs[i-1][j-1]+1 )){
aPathRev.append(a[i-=1]);
bPathRev.append(b[j-=1]);
} else if(costs[i][j] == 1+costs[i-1][j]){
aPathRev.append(a[i-=1]);
bPathRev.append("-");
} else if (costs[i][j] == 1+costs[i][j-1]){
aPathRev.append("-");
bPathRev.append(b[j-=1]);
}
}
return(aPathRev.text.reverse(), bPathRev.text.reverse())
}
result := alignment("rosettacode", "raisethysword");
println(result[0]);
println(result[1]);
- Output:
r-oset-tacode raisethysword