Levenshtein distance/Alignment: Difference between revisions
(22 intermediate revisions by 14 users not shown) | |||
Line 18: | Line 18: | ||
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language). |
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language). |
||
<br><br> |
<br><br> |
||
=={{header|11l}}== |
|||
{{trans|Nim}} |
|||
<syntaxhighlight lang="11l">F levenshteinAlign(aa, bb) |
|||
V a = aa.lowercase() |
|||
V b = bb.lowercase() |
|||
V costs = [[0] * (b.len + 1)] * (a.len + 1) |
|||
L(j) 0 .. b.len |
|||
costs[0][j] = j |
|||
L(i) 1 .. a.len |
|||
costs[i][0] = i |
|||
L(j) 1 .. b.len |
|||
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1]) |
|||
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp) |
|||
V aPathRev = ‘’ |
|||
V bPathRev = ‘’ |
|||
V i = a.len |
|||
V j = b.len |
|||
L i != 0 & j != 0 |
|||
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1]) |
|||
I costs[i][j] == tmp |
|||
aPathRev ‘’= a[--i] |
|||
bPathRev ‘’= b[--j] |
|||
E I costs[i][j] == 1 + costs[i - 1][j] |
|||
aPathRev ‘’= a[--i] |
|||
bPathRev ‘’= ‘-’ |
|||
E I costs[i][j] == 1 + costs[i][j - 1] |
|||
aPathRev ‘’= ‘-’ |
|||
bPathRev ‘’= b[--j] |
|||
E |
|||
assert(0B, ‘Internal error’) |
|||
R (reversed(aPathRev), reversed(bPathRev)) |
|||
V result = levenshteinAlign(‘place’, ‘palace’) |
|||
print(result[0]) |
|||
print(result[1]) |
|||
print() |
|||
result = levenshteinAlign(‘rosettacode’, ‘raisethysword’) |
|||
print(result[0]) |
|||
print(result[1])</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|Arturo}}== |
|||
<syntaxhighlight lang="rebol">print join.with:"\n" levenshtein.align "place" "palace" |
|||
print join.with:"\n" levenshtein.align "rosettacode" "raisethysword"</syntaxhighlight> |
|||
{{out}} |
|||
<pre>p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword</pre> |
|||
=={{header|C}}== |
=={{header|C}}== |
||
< |
<syntaxhighlight lang="c">#include <stdio.h> |
||
#include <stdlib.h> |
#include <stdlib.h> |
||
#include <string.h> |
#include <string.h> |
||
Line 108: | Line 173: | ||
leven("raisethysword", "rosettacode"); |
leven("raisethysword", "rosettacode"); |
||
return 0; |
return 0; |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
raisethysword -> rosettacode: 8 edits |
raisethysword -> rosettacode: 8 edits |
||
r(a,o)(i,)set(h,t)(y,a)(s,c)(w,)o(r,d)(d,e) |
r(a,o)(i,)set(h,t)(y,a)(s,c)(w,)o(r,d)(d,e) |
||
</pre> |
|||
=={{header|C#}}== |
|||
{{trans|Java}} |
|||
<syntaxhighlight lang="C#"> |
|||
using System; |
|||
using System.Text; |
|||
public class LevenshteinAlignment |
|||
{ |
|||
public static string[] Alignment(string a, string b) |
|||
{ |
|||
a = a.ToLower(); |
|||
b = b.ToLower(); |
|||
int[,] costs = new int[a.Length + 1, b.Length + 1]; |
|||
for (int j = 0; j <= b.Length; j++) |
|||
costs[0, j] = j; |
|||
for (int i = 1; i <= a.Length; i++) |
|||
{ |
|||
costs[i, 0] = i; |
|||
for (int j = 1; j <= b.Length; j++) |
|||
{ |
|||
costs[i, j] = Math.Min(1 + Math.Min(costs[i - 1, j], costs[i, j - 1]), |
|||
a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1); |
|||
} |
|||
} |
|||
StringBuilder aPathRev = new StringBuilder(); |
|||
StringBuilder bPathRev = new StringBuilder(); |
|||
for (int i = a.Length, j = b.Length; i != 0 && j != 0;) |
|||
{ |
|||
if (costs[i, j] == (a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1)) |
|||
{ |
|||
aPathRev.Append(a[--i]); |
|||
bPathRev.Append(b[--j]); |
|||
} |
|||
else if (costs[i, j] == 1 + costs[i - 1, j]) |
|||
{ |
|||
aPathRev.Append(a[--i]); |
|||
bPathRev.Append('-'); |
|||
} |
|||
else if (costs[i, j] == 1 + costs[i, j - 1]) |
|||
{ |
|||
aPathRev.Append('-'); |
|||
bPathRev.Append(b[--j]); |
|||
} |
|||
} |
|||
return new string[] { Reverse(aPathRev.ToString()), Reverse(bPathRev.ToString()) }; |
|||
} |
|||
private static string Reverse(string s) |
|||
{ |
|||
char[] charArray = s.ToCharArray(); |
|||
Array.Reverse(charArray); |
|||
return new string(charArray); |
|||
} |
|||
public static void Main(string[] args) |
|||
{ |
|||
string[] result = Alignment("rosettacode", "raisethysword"); |
|||
Console.WriteLine(result[0]); |
|||
Console.WriteLine(result[1]); |
|||
} |
|||
} |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|C++}}== |
|||
<syntaxhighlight lang="c++"> |
|||
#include <algorithm> |
|||
#include <cstdint> |
|||
#include <iostream> |
|||
#include <string> |
|||
#include <vector> |
|||
std::string to_lower_case(const std::string& text) { |
|||
std::string result = text; |
|||
std::transform(result.begin(), result.end(), result.begin(), |
|||
[](char ch){ return std::tolower(ch); }); |
|||
return result; |
|||
} |
|||
std::vector<std::string> levenshtein_alignment(std::string a, std::string b) { |
|||
a = to_lower_case(a); |
|||
b = to_lower_case(b); |
|||
std::vector<std::vector<int32_t>> costs{ a.length() + 1, std::vector<int32_t>( b.length() + 1, 0 ) }; |
|||
for ( uint64_t j = 0; j <= b.length(); ++j ) |
|||
costs[0][j] = j; |
|||
for ( uint64_t i = 1; i <= a.length(); ++i ) { |
|||
costs[i][0] = i; |
|||
for ( uint64_t j = 1; j <= b.length(); ++j ) { |
|||
costs[i][j] = std::min(std::min( costs[i - 1][j], costs[i][j - 1]) + 1, |
|||
a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1); |
|||
} |
|||
} |
|||
std::string a_path_reversed, b_path_reversed; |
|||
uint64_t i = a.length(), j = b.length(); |
|||
while ( i != 0 && j != 0 ) { |
|||
if ( costs[i][j] == ( a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1 ) ) { |
|||
a_path_reversed += a[--i]; |
|||
b_path_reversed += b[--j]; |
|||
} else if ( costs[i][j] == costs[i - 1][j] + 1 ) { |
|||
a_path_reversed += a[--i]; |
|||
b_path_reversed += "-"; |
|||
} else if ( costs[i][j] == costs[i][j - 1] + 1 ) { |
|||
a_path_reversed += "-"; |
|||
b_path_reversed += b[--j]; |
|||
} |
|||
} |
|||
std::reverse(a_path_reversed.begin(), a_path_reversed.end()); |
|||
std::reverse(b_path_reversed.begin(), b_path_reversed.end()); |
|||
return std::vector<std::string>{ a_path_reversed, b_path_reversed }; |
|||
} |
|||
int main() { |
|||
std::vector<std::string> result = levenshtein_alignment("place", "palace"); |
|||
std::cout << result[0] << std::endl; |
|||
std::cout << result[1] << std::endl; |
|||
std::cout << std::endl; |
|||
result = levenshtein_alignment("rosettacode", "raisethysword"); |
|||
std::cout << result[0] << std::endl; |
|||
std::cout << result[1] << std::endl; |
|||
} |
|||
</syntaxhighlight> |
|||
{{ out }} |
|||
<pre> |
|||
p-lace |
|||
palace |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
</pre> |
||
=={{header|D}}== |
=={{header|D}}== |
||
Using the standard library. |
Using the standard library. |
||
< |
<syntaxhighlight lang="d">void main() { |
||
import std.stdio, std.algorithm; |
import std.stdio, std.algorithm; |
||
Line 144: | Line 354: | ||
writeln(s1b, "\n", s2b); |
writeln(s1b, "\n", s2b); |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>r_oset_tacode |
<pre>r_oset_tacode |
||
raisethysword</pre> |
raisethysword</pre> |
||
=={{header|EasyLang}}== |
|||
{{trans|FreeBASIC}} |
|||
<syntaxhighlight> |
|||
global dparr[] dpcols . |
|||
proc dpnew a b . . |
|||
len dparr[] a * b |
|||
dpcols = b |
|||
. |
|||
func dp r c . |
|||
return dparr[r * dpcols + c + 1] |
|||
. |
|||
proc dps r c v . . |
|||
dparr[r * dpcols + c + 1] = v |
|||
. |
|||
proc align a$ b$ . ar$ br$ . |
|||
dpnew (len a$ + 1) (len b$ + 1) |
|||
for i = 1 to len a$ |
|||
dps i 0 i |
|||
. |
|||
for j = 0 to len b$ |
|||
dps 0 j j |
|||
. |
|||
for i = 1 to len a$ |
|||
for j = 1 to len b$ |
|||
if substr a$ i 1 = substr b$ j 1 |
|||
dps i j dp (i - 1) (j - 1) |
|||
else |
|||
dps i j lower (lower dp (i - 1) j dp i (j - 1)) dp (i - 1) (j - 1) + 1 |
|||
. |
|||
. |
|||
. |
|||
ar$ = "" ; br$ = "" |
|||
i = len a$ ; j = len b$ |
|||
while i <> 0 and j <> 0 |
|||
if substr a$ i 1 = substr b$ j 1 or dp i j = dp (i - 1) (j - 1) + 1 |
|||
ar$ = substr a$ i 1 & ar$ |
|||
i -= 1 |
|||
br$ = substr b$ j 1 & br$ |
|||
j -= 1 |
|||
elif dp i j = dp (i - 1) j + 1 |
|||
ar$ = substr a$ i 1 & ar$ |
|||
i -= 1 |
|||
br$ = "-" & br$ |
|||
else |
|||
ar$ = "-" & ar$ |
|||
br$ = substr b$ j 1 & br$ |
|||
j -= 1 |
|||
. |
|||
. |
|||
. |
|||
align "rosettacode" "raisethysword" ar$ br$ |
|||
print ar$ ; print br$ |
|||
</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
r-oset-tacode |
|||
raisethysword |
|||
</pre> |
|||
=={{header|Eiffel}}== |
=={{header|Eiffel}}== |
||
< |
<syntaxhighlight lang="eiffel"> |
||
distance (source, target: STRING): INTEGER |
distance (source, target: STRING): INTEGER |
||
-- Minimum number of operations to turn `source' into `target'. |
-- Minimum number of operations to turn `source' into `target'. |
||
Line 160: | Line 429: | ||
⟳ ic:(1 |..| source.count) ¦ l_distance [ic, 1] := ic - 1 ⟲ |
⟳ ic:(1 |..| source.count) ¦ l_distance [ic, 1] := ic - 1 ⟲ |
||
⟳ ij:(1 |..| target.count) ¦ l_distance [1, ij] := ij - 1 ⟲ |
⟳ ij:(1 |..| target.count) ¦ l_distance [1, ij] := ij - 1 ⟲ |
||
⟳ ic:(1 |..| source.count) ¦ |
|||
⟳ ic:(2 |..| source.count) ¦ |
|||
⟳ jc:(2 |..| target.count) ¦ |
|||
if ic > 1 and jc > 1 then |
|||
if source [ic] = target [jc] then -- same char |
|||
l_distance [ic, jc] := l_distance [ic - 1, jc - 1] |
|||
else -- diff char |
|||
del := l_distance [ic - 1, jc] -- delete? |
|||
ins := l_distance [ic, jc - 1] -- insert? |
|||
subst := l_distance [ic - 1, jc -1] -- substitute/swap? |
|||
l_distance [ic, jc] := del.min (ins.min (subst)) + 1 |
|||
end |
|||
end |
end |
||
⟲ |
⟲ |
||
Line 176: | Line 444: | ||
Result:= l_distance [source.count, target.count] |
Result:= l_distance [source.count, target.count] |
||
end |
end |
||
</ |
</syntaxhighlight>{{out}} |
||
Result = 8 |
Result = 8 |
||
The ⟳ ¦ ⟲ represents the "symbolic form" of an Eiffel across loop. In the case above, |
The ⟳ ¦ ⟲ represents the "symbolic form" of an Eiffel across loop. In the case above, the first two ⟳ (loops) initialize the first column and the first row, and then two nested ⟳ (loops) to walk the distance computation. |
||
Also—the `ic' is this programmers shorthand for ITERATION_CURSOR. Therefore, in the nested loops, the `ic' is the iteration cursor following `i' and the `ij' is following `j' as indexes on the source, target, and l_distance. |
Also—the `ic' is this programmers shorthand for ITERATION_CURSOR. Therefore, in the nested loops, the `ic' is the iteration cursor following `i' and the `ij' is following `j' as indexes on the source, target, and l_distance. |
||
=={{header|FreeBASIC}}== |
|||
<syntaxhighlight lang="vbnet">#define min(a, b) iif((a) < (b), (a), (b)) |
|||
Sub LevenshteinAlignment(s1 As String, s2 As String) |
|||
Dim As String align1 = "", align2 = "" |
|||
Dim As Integer i, j, len1, len2 |
|||
len1 = Len(s1): |
|||
len2 = Len(s2) |
|||
Dim As Integer dp(len1+1, len2+1) |
|||
For i = 0 To len1 |
|||
dp(i, 0) = i |
|||
Next i |
|||
For j = 0 To len2 |
|||
dp(0, j) = j |
|||
Next j |
|||
For i = 1 To len1 |
|||
For j = 1 To len2 |
|||
If Mid(s1, i, 1) = Mid(s2, j, 1) Then |
|||
dp(i, j) = dp(i-1, j-1) |
|||
Else |
|||
dp(i, j) = min(min(dp(i-1, j), dp(i, j-1)), dp(i-1, j-1)) + 1 |
|||
End If |
|||
Next j |
|||
Next i |
|||
i = len1: j = len2 |
|||
While i > 0 And j > 0 |
|||
If Mid(s1, i, 1) = Mid(s2, j, 1) Then |
|||
align1 = Mid(s1, i, 1) + align1 |
|||
align2 = Mid(s2, j, 1) + align2 |
|||
i -= 1: j -= 1 |
|||
Elseif dp(i, j) = dp(i-1, j-1) + 1 Then |
|||
align1 = Mid(s1, i, 1) + align1 |
|||
align2 = Mid(s2, j, 1) + align2 |
|||
i -= 1: j -= 1 |
|||
Elseif dp(i, j) = dp(i-1, j) + 1 Then |
|||
align1 = Mid(s1, i, 1) + align1 |
|||
align2 = "-" + align2 |
|||
i -= 1 |
|||
Else |
|||
align1 = "-" + align1 |
|||
align2 = Mid(s2, j, 1) + align2 |
|||
j -= 1 |
|||
End If |
|||
Wend |
|||
While i > 0 |
|||
align1 = Mid(s1, i, 1) + align1 |
|||
align2 = "-" + align2 |
|||
i -= 1 |
|||
Wend |
|||
While j > 0 |
|||
align1 = "-" + align1 |
|||
align2 = Mid(s2, j, 1) + align2 |
|||
j -= 1 |
|||
Wend |
|||
Print "Levenshtein Distance: "; dp(len1, len2) |
|||
Print align1 |
|||
Print align2 |
|||
Print |
|||
End Sub |
|||
LevenshteinAlignment("rosettacode", "raisethysword") |
|||
LevenshteinAlignment("place", "palace") |
|||
Sleep</syntaxhighlight> |
|||
{{out}} |
|||
<pre>Levenshtein Distance: 8 |
|||
r-oset-tacode |
|||
raisethysword |
|||
Levenshtein Distance: 1 |
|||
p-lace |
|||
palace</pre> |
|||
=={{header|Go}}== |
=={{header|Go}}== |
||
{{libheader|biogo}} |
{{libheader|biogo}} |
||
Alignment computed by the Needleman-Wunch algorithm, which is used in bioinformatics. |
Alignment computed by the Needleman-Wunch algorithm, which is used in bioinformatics. |
||
< |
<syntaxhighlight lang="go">package main |
||
import ( |
import ( |
||
Line 241: | Line 588: | ||
} |
} |
||
fmt.Println(string(ma)) |
fmt.Println(string(ma)) |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
The lines after the alignment point out the 8 edits. |
The lines after the alignment point out the 8 edits. |
||
Line 249: | Line 596: | ||
|| |||| || |
|| |||| || |
||
</pre> |
</pre> |
||
=={{header|Haskell}}== |
|||
The Wagner–Fischer matrix construction is adopted from [[Levenshtein_distance#Haskell]], however it is reversed in order to simplify it's traversing. |
|||
<syntaxhighlight lang="haskell">costs :: String -> String -> [[Int]] |
|||
costs s1 s2 = reverse $ reverse <$> matrix |
|||
where |
|||
matrix = scanl transform [0 .. length s1] s2 |
|||
transform ns@(n:ns1) c = scanl calc (n + 1) $ zip3 s1 ns ns1 |
|||
where |
|||
calc z (c1, x, y) = minimum [ y + 1, z + 1 |
|||
, x + fromEnum (c1 /= c)] |
|||
levenshteinDistance :: String -> String -> Int |
|||
levenshteinDistance s1 s2 = head.head $ costs s1 s2</syntaxhighlight> |
|||
=== A sample alignment === |
|||
The alignment is built in the same way as it is done in [[Levenshtein_distance/Alignment#Java]]. |
|||
Instead of indices the Wagner–Fischer matrix and strings are traversed by list truncation, as zippers. |
|||
<syntaxhighlight lang="haskell">alignment :: String -> String -> (String, String) |
|||
alignment s1 s2 = go (costs s1 s2) (reverse s1) (reverse s2) ([],[]) |
|||
where |
|||
go c _ _ r | isEmpty c = r |
|||
go _ [] s r = ('-' <$ s, reverse s) <> r |
|||
go _ s [] r = (reverse s, '-' <$ s) <> r |
|||
go c s1@(h1:t1) s2@(h2:t2) (r1, r2) = |
|||
let temp = (get.nextCol.nextRow $ c) + if h1 == h2 then 0 else 1 |
|||
in case get c of |
|||
x | x == temp -> go (nextRow.nextCol $ c) t1 t2 (h1:r1, h2:r2) |
|||
| x == 1 + (get.nextCol $ c) -> go (nextCol c) s1 t2 ('-':r1, h2:r2) |
|||
| x == 1 + (get.nextRow $ c) -> go (nextRow c) t1 s2 (h1:r1, '-':r2) |
|||
-- Functions which treat table as zipper |
|||
get ((h:_):_) = h |
|||
nextRow = map tail |
|||
nextCol = tail |
|||
isEmpty c = null c || null (head c)</syntaxhighlight> |
|||
<pre>λ> alignment "palace" "place" |
|||
("palace","p-lace") |
|||
λ> alignment "rosettacode" "raisethysword" |
|||
("r-oset-tacode","raisethysword") |
|||
λ> alignment "rosettacode" "rat" |
|||
("rosettacode","r-----a---t")</pre> |
|||
=== All alignments === |
|||
The alternative solution, which extensively produces all minimal alignments for given strings. |
|||
<syntaxhighlight lang="haskell">-- Produces all possible alignments for two strings. |
|||
allAlignments :: String -> String -> [[(Char, Char)]] |
|||
allAlignments s1 s2 = go (length s2 - length s1) s1 s2 |
|||
where |
|||
go _ s [] = [(\x -> (x, '-')) <$> s] |
|||
go _ [] s = [(\x -> ('-' ,x)) <$> s] |
|||
go n s1@(h1:t1) s2@(h2:t2) = (h1, h2) <:> go n t1 t2 |
|||
++ case compare n 0 of |
|||
LT -> (h1, '-') <:> go (n+1) t1 s2 |
|||
EQ -> [] |
|||
GT -> ('-', h2) <:> go (n-1) s1 t2 |
|||
x <:> l = fmap (x :) l |
|||
-- Returns a lazy list of all optimal alignments. |
|||
levenshteinAlignments :: String -> String -> [(String, String)] |
|||
levenshteinAlignments s1 s2 = unzip <$> best |
|||
where |
|||
best = filter ((lev ==) . dist) $ allAlignments s1 s2 |
|||
lev = levenshteinDistance s1 s2 |
|||
dist = length . filter (uncurry (/=))</syntaxhighlight> |
|||
<pre>λ> mapM_ print $ levenshteinAlignments "rosettacode" "raisethysword" |
|||
("ro-settac-ode","raisethysword") |
|||
("ro-setta-code","raisethysword") |
|||
("ro-sett-acode","raisethysword") |
|||
("ro-set-tacode","raisethysword") |
|||
("r-osettac-ode","raisethysword") |
|||
("r-osetta-code","raisethysword") |
|||
("r-osett-acode","raisethysword") |
|||
("r-oset-tacode","raisethysword") |
|||
λ> mapM_ print $ levenshteinAlignments "rosettacode" "rat" |
|||
("rosettacode","ra--t------") |
|||
("rosettacode","ra---t-----") |
|||
("rosettacode","r-a-t------") |
|||
("rosettacode","r-a--t-----") |
|||
("rosettacode","r--at------") |
|||
("rosettacode","r--a-t-----") |
|||
("rosettacode","r---at-----") |
|||
("rosettacode","r-----at---") |
|||
("rosettacode","r-----a-t--") |
|||
("rosettacode","r-----a--t-") |
|||
("rosettacode","r-----a---t")</pre> |
|||
=={{header|J}}== |
|||
Translation of java: |
|||
<syntaxhighlight lang="j">levalign=:4 :0 |
|||
assert. x <:&# y |
|||
D=. x +/&i.&>:&# y |
|||
for_i. 1+i.#x do. |
|||
for_j. 1+i.#y do. |
|||
if. ((<:i){x)=(<:j){y do. |
|||
D=. (D {~<<:i,j) (<i,j)} D |
|||
else. |
|||
min=. 1+<./D{~(i,j) <@:-"1#:1 2 3 |
|||
D=. min (<i,j)} D |
|||
end. |
|||
end. |
|||
end. |
|||
A=. B=. '' |
|||
ij=. x ,&# y |
|||
while. */ij do. |
|||
'd00 d01 d10 d11'=. D{~ ij <@:-"1#:i.4 |
|||
'x1 y1'=. (ij-1){each x;y |
|||
if. d00 = d11+x1~:y1 do. |
|||
A=. A,x1 [ B=. B,y1 [ ij=. ij-1 |
|||
elseif. d00 = 1+d10 do. |
|||
A=. A,x1 [ B=. B,'-'[ ij=. ij-1 0 |
|||
elseif. d00 = 1+d01 do. |
|||
A=. A,'-'[ B=. B,y1 [ ij=. ij-0 1 |
|||
end. |
|||
end. |
|||
A,:&|.B |
|||
)</syntaxhighlight> |
|||
Task examples: |
|||
<syntaxhighlight lang="j"> 'place' levalign 'palace' |
|||
p-lace |
|||
palace |
|||
'rosettacode' levalign 'raisethysword' |
|||
r-oset-tacode |
|||
raisethysword |
|||
</syntaxhighlight> |
|||
=={{header|Java}}== |
=={{header|Java}}== |
||
< |
<syntaxhighlight lang="java">public class LevenshteinAlignment { |
||
public static String[] alignment(String a, String b) { |
public static String[] alignment(String a, String b) { |
||
Line 290: | Line 777: | ||
System.out.println(result[1]); |
System.out.println(result[1]); |
||
} |
} |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
r-oset-tacode |
r-oset-tacode |
||
raisethysword |
raisethysword |
||
</pre> |
|||
=={{header|jq}}== |
|||
'''Adapted from [[#Wren|Wren]]''' |
|||
'''Works with jq, the C implementation of jq''' |
|||
'''Works with gojq, the Go implementation of jq''' |
|||
'''Works with jaq, the Rust implementation of jq''' |
|||
<syntaxhighlight lang="jq"> |
|||
def reverseString: explode | reverse | implode; |
|||
def levenshteinAlign($x; $y): |
|||
def min($a;$b): [$a,$b]|min; |
|||
($x|length + 1) as $a1 |
|||
| ($y|length + 1) as $b1 |
|||
| ($x | ascii_downcase) as $a |
|||
| ($y | ascii_downcase) as $b |
|||
| [range(0; $b1) | 0] as $zeros |
|||
| {costs: [range(0; $a1) | $zeros]} |
|||
| reduce range(0; $b1) as $j (.; .costs[0][$j] = $j) |
|||
| reduce range(1; $a1) as $i (.; |
|||
.costs[$i][0] = $i |
|||
| reduce range(1; $b1) as $j (.; |
|||
(.costs[$i - 1][$j - 1] + (if $a[$i - 1: $i] == $b[$j - 1: $j] then 0 else 1 end)) as $temp |
|||
| .costs[$i][$j] = min( $temp; 1 + min(.costs[$i - 1][$j]; .costs[$i][$j - 1] )) )) |
|||
# walk back through matrix to figure out path |
|||
| .aPathRev = "" |
|||
| .bPathRev = "" |
|||
| .i = ($a|length) |
|||
| .j = ($b|length) |
|||
| until (.i == 0 or .j == 0; |
|||
(.costs[.i - 1][.j - 1] + (if $a[.i - 1: .i] == $b[.j - 1: .j] then 0 else 1 end)) as $temp |
|||
| .costs[.i][.j] as $cij |
|||
| if $cij == $temp |
|||
then .i += -1 |
|||
| .aPathRev += $a[.i: .i+1] |
|||
| .j += -1 |
|||
| .bPathRev += $b[.j: .j+1] |
|||
elif $cij == 1 + .costs[.i-1][.j] |
|||
then .i += -1 |
|||
| .aPathRev += $a[.i:.i+1] |
|||
| .bPathRev += "-" |
|||
elif $cij == 1 + .costs[.i][.j-1] |
|||
then .aPathRev += "-" |
|||
| .j += -1 |
|||
| .bPathRev += $b[.j: .j+1] |
|||
end) |
|||
| [.aPathRev, .bPathRev ] | map(reverseString); |
|||
levenshteinAlign("place"; "palace"), |
|||
levenshteinAlign("rosettacode";"raisethysword") |
|||
</syntaxhighlight> |
|||
{{output}} |
|||
<pre> |
|||
[ |
|||
"p-lace", |
|||
"palace" |
|||
] |
|||
[ |
|||
"r-oset-tacode", |
|||
"raisethysword" |
|||
] |
|||
</pre> |
</pre> |
||
Line 301: | Line 854: | ||
{{trans|Java}} |
{{trans|Java}} |
||
< |
<syntaxhighlight lang="julia">function levenshteinalign(a::AbstractString, b::AbstractString) |
||
a = lowercase(a) |
a = lowercase(a) |
||
b = lowercase(b) |
b = lowercase(b) |
||
Line 343: | Line 896: | ||
foreach(println, levenshteinalign("rosettacode", "raisethysword")) |
foreach(println, levenshteinalign("rosettacode", "raisethysword")) |
||
foreach(println, levenshteinalign("place", "palace"))</ |
foreach(println, levenshteinalign("place", "palace"))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 353: | Line 906: | ||
=={{header|Kotlin}}== |
=={{header|Kotlin}}== |
||
{{trans|Java}} |
{{trans|Java}} |
||
< |
<syntaxhighlight lang="scala">// version 1.1.3 |
||
fun levenshteinAlign(a: String, b: String): Array<String> { |
fun levenshteinAlign(a: String, b: String): Array<String> { |
||
Line 403: | Line 956: | ||
println(result[0]) |
println(result[0]) |
||
println(result[1]) |
println(result[1]) |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 417: | Line 970: | ||
{{incorrect}} |
{{incorrect}} |
||
{{works with|Mathematica|7}} |
{{works with|Mathematica|7}} |
||
< |
<syntaxhighlight lang="mathematica">DamerauLevenshteinDistance["rosettacode", "raisethysword"]</syntaxhighlight> |
||
{{out}}<pre>8</pre> |
{{out}}<pre>8</pre> |
||
Line 423: | Line 976: | ||
=={{header|Nim}}== |
=={{header|Nim}}== |
||
{{trans|Kotlin}} |
{{trans|Kotlin}} |
||
< |
<syntaxhighlight lang="nim">import algorithm, sequtils, strutils |
||
proc levenshteinAlign(a, b: string): tuple[a, b: string] = |
proc levenshteinAlign(a, b: string): tuple[a, b: string] = |
||
Line 469: | Line 1,022: | ||
result = levenshteinAlign("rosettacode","raisethysword") |
result = levenshteinAlign("rosettacode","raisethysword") |
||
echo result.a |
echo result.a |
||
echo result.b</ |
echo result.b</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 479: | Line 1,032: | ||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
< |
<syntaxhighlight lang="perl">use strict; |
||
use warnings; |
use warnings; |
||
Line 514: | Line 1,067: | ||
} |
} |
||
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";</ |
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>ro-settac-o-de |
<pre>ro-settac-o-de |
||
Line 521: | Line 1,074: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
{{trans|Kotlin}} plus the indicator from Go |
{{trans|Kotlin}} plus the indicator from Go |
||
<!--<syntaxhighlight lang="phix">(phixonline)--> |
|||
<lang Phix>function LevenshteinAlignment(string a, b) |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">LevenshteinAlignment</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">)</span> |
|||
integer la = length(a)+1, |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">la</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> |
|||
lb = length(b)+1 |
|||
<span style="color: #000000;">lb</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">1</span> |
|||
sequence costs = repeat(repeat(0,lb),la) |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">costs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">lb</span><span style="color: #0000FF;">),</span><span style="color: #000000;">la</span><span style="color: #0000FF;">)</span> |
|||
for j=1 to lb do |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">lb</span> <span style="color: #008080;">do</span> |
|||
costs[1][j] = j |
|||
<span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">j</span> |
|||
end for |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
for i=2 to la do |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> <span style="color: #008080;">to</span> <span style="color: #000000;">la</span> <span style="color: #008080;">do</span> |
|||
costs[i][1] = i |
|||
<span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span> |
|||
for j=2 to lb do |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">2</span> <span style="color: #008080;">to</span> <span style="color: #000000;">lb</span> <span style="color: #008080;">do</span> |
|||
integer tmp1 = 1+min(costs[i-1][j], costs[i][j-1]), |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">tmp1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]),</span> |
|||
tmp2 = costs[i-1][j-1] + (a[i-1]!=b[j-1]) |
|||
<span style="color: #000000;">tmp2</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span> |
|||
costs[i][j] = min(tmp1, tmp2) |
|||
<span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">tmp1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tmp2</span><span style="color: #0000FF;">)</span> |
|||
end for |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
end for |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
-- walk back through matrix to figure out the path |
|||
<span style="color: #000080;font-style:italic;">-- walk back through matrix to figure out the path</span> |
|||
string arev = "", |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">arev</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span><span style="color: #0000FF;">,</span> |
|||
brev = "" |
|||
<span style="color: #000000;">brev</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span> |
|||
integer i = la, j = lb |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">la</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">lb</span> |
|||
while i>1 and j>1 do |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">></span><span style="color: #000000;">1</span> <span style="color: #008080;">and</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">></span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span> |
|||
integer tmp = costs[i-1][j-1] + (a[i-1]!=b[j-1]) |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">tmp</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">+</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">])</span> |
|||
switch costs[i][j] do |
|||
<span style="color: #008080;">switch</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> |
|||
case tmp: i -= 1; arev &= a[i] |
|||
<span style="color: #008080;">case</span> <span style="color: #000000;">tmp</span><span style="color: #0000FF;">:</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000000;">arev</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
j -= 1; brev &= b[j] |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000000;">brev</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> |
|||
case 1 + costs[i-1][j]: i -= 1; arev &= a[i] |
|||
<span style="color: #008080;">case</span> <span style="color: #000000;">1</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]:</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000000;">arev</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> |
|||
brev &= '-' |
|||
<span style="color: #000000;">brev</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">'-'</span> |
|||
case 1 + costs[i][j-1]: arev &= '-' |
|||
<span style="color: #008080;">case</span> <span style="color: #000000;">1</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">costs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">][</span><span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]:</span> <span style="color: #000000;">arev</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">'-'</span> |
|||
j -= 1; brev &= b[j] |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span> <span style="color: #000000;">brev</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">b</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> |
|||
end switch |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">switch</span> |
|||
end while |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
return {reverse(arev),reverse(brev)} |
|||
<span style="color: #008080;">return</span> <span style="color: #0000FF;">{</span><span style="color: #7060A8;">reverse</span><span style="color: #0000FF;">(</span><span style="color: #000000;">arev</span><span style="color: #0000FF;">),</span><span style="color: #7060A8;">reverse</span><span style="color: #0000FF;">(</span><span style="color: #000000;">brev</span><span style="color: #0000FF;">)}</span> |
|||
end function |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
procedure test(string a,b) |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)</span> |
|||
{a,b} = LevenshteinAlignment(a,b) |
|||
<span style="color: #0000FF;">{</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">LevenshteinAlignment</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">)</span> |
|||
string c = sq_add(repeat(' ',length(a)),sq_mul(sq_ne(a,b),'|'-' ')) |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">c</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sq_add</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">)),</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_ne</span><span style="color: #0000FF;">(</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">),</span><span style="color: #008000;">'|'</span><span style="color: #0000FF;">-</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">))</span> |
|||
printf(1,"%s\n%s\n%s\n\n",{a,b,c}) |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s\n%s\n%s\n\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">a</span><span style="color: #0000FF;">,</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #000000;">c</span><span style="color: #0000FF;">})</span> |
|||
end procedure |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
test("rosettacode", "raisethysword") |
|||
<span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"rosettacode"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"raisethysword"</span><span style="color: #0000FF;">)</span> |
|||
test("place", "palace")</lang> |
|||
<span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"place"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"palace"</span><span style="color: #0000FF;">)</span> |
|||
<!--</syntaxhighlight>--> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 573: | Line 1,128: | ||
=={{header|Python}}== |
=={{header|Python}}== |
||
< |
<syntaxhighlight lang="python">from difflib import ndiff |
||
def levenshtein(str1, str2): |
def levenshtein(str1, str2): |
||
Line 593: | Line 1,148: | ||
levenshtein("place","palace") |
levenshtein("place","palace") |
||
levenshtein("rosettacode","raisethysword") |
levenshtein("rosettacode","raisethysword")</syntaxhighlight> |
||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 607: | Line 1,161: | ||
for a discussion of the code. |
for a discussion of the code. |
||
< |
<syntaxhighlight lang="racket">#lang racket |
||
(define (memoize f) |
(define (memoize f) |
||
Line 626: | Line 1,180: | ||
(min (add1 (levenshtein (rest s) t)) |
(min (add1 (levenshtein (rest s) t)) |
||
(add1 (levenshtein s (rest t))) |
(add1 (levenshtein s (rest t))) |
||
(add1 (levenshtein (rest s) (rest t)))))]))))</ |
(add1 (levenshtein (rest s) (rest t)))))]))))</syntaxhighlight> |
||
'''Demonstration:''' |
'''Demonstration:''' |
||
< |
<syntaxhighlight lang="racket">(levenshtein (string->list "rosettacode") |
||
(string->list "raisethysword"))</ |
(string->list "raisethysword"))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>8</pre> |
<pre>8</pre> |
||
Line 635: | Line 1,189: | ||
===Complete version=== |
===Complete version=== |
||
Now we extend the code from http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html to show also the alignment. The code is very similar, but it stores the partial results (number of edits and alignment of each substring) in a lev structure. |
Now we extend the code from http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html to show also the alignment. The code is very similar, but it stores the partial results (number of edits and alignment of each substring) in a lev structure. |
||
< |
<syntaxhighlight lang="racket">#lang racket |
||
(struct lev (n s t)) |
(struct lev (n s t)) |
||
Line 678: | Line 1,232: | ||
(values (lev-n result) |
(values (lev-n result) |
||
(list->string (lev-s result)) |
(list->string (lev-s result)) |
||
(list->string (lev-t result)))))</ |
(list->string (lev-t result)))))</syntaxhighlight> |
||
'''Demonstration:''' |
'''Demonstration:''' |
||
< |
<syntaxhighlight lang="racket">(let-values ([(dist exp-s exp-t) |
||
(levenshtein "rosettacode" "raisethysword")]) |
(levenshtein "rosettacode" "raisethysword")]) |
||
(printf "levenshtein: ~a edits\n" dist) |
(printf "levenshtein: ~a edits\n" dist) |
||
(displayln exp-s) |
(displayln exp-s) |
||
(displayln exp-t))</ |
(displayln exp-t))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>levenshtein: 8 edits |
<pre>levenshtein: 8 edits |
||
Line 693: | Line 1,247: | ||
(formerly Perl 6) |
(formerly Perl 6) |
||
{{trans|Perl}} |
{{trans|Perl}} |
||
<lang |
<syntaxhighlight lang="raku" line>sub align ( Str $σ, Str $t ) { |
||
my @s = flat *, $σ.comb; |
my @s = flat *, $σ.comb; |
||
my @t = flat *, $t.comb; |
my @t = flat *, $t.comb; |
||
Line 717: | Line 1,271: | ||
} |
} |
||
.say for align 'rosettacode', 'raisethysword';</ |
.say for align 'rosettacode', 'raisethysword';</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>ro-settac-o-de |
<pre>ro-settac-o-de |
||
Line 725: | Line 1,279: | ||
{{trans|Tcl}} |
{{trans|Tcl}} |
||
uses "lcs" from [[Longest common subsequence#Ruby|here]] |
uses "lcs" from [[Longest common subsequence#Ruby|here]] |
||
< |
<syntaxhighlight lang="ruby">require 'lcs' |
||
def levenshtein_align(a, b) |
def levenshtein_align(a, b) |
||
Line 760: | Line 1,314: | ||
end |
end |
||
puts levenshtein_align("rosettacode", "raisethysword")</ |
puts levenshtein_align("rosettacode", "raisethysword")</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 774: | Line 1,328: | ||
'''src/main.rs''' |
'''src/main.rs''' |
||
< |
<syntaxhighlight lang="rust">extern crate edit_distance; |
||
edit_distance("rosettacode", "raisethysword");</ |
edit_distance("rosettacode", "raisethysword");</syntaxhighlight> |
||
=={{header|Scala}}== |
=={{header|Scala}}== |
||
{{Out}}Best seen running in your browser either by [https://scastie.scala-lang.org/I8BAESkNTjukVPzsWOUyPA ScalaFiddle (ES aka JavaScript, non JVM)] or [https://scastie.scala-lang.org/I8BAESkNTjukVPzsWOUyPA]. |
{{Out}}Best seen running in your browser either by [https://scastie.scala-lang.org/I8BAESkNTjukVPzsWOUyPA ScalaFiddle (ES aka JavaScript, non JVM)] or [https://scastie.scala-lang.org/I8BAESkNTjukVPzsWOUyPA]. |
||
< |
<syntaxhighlight lang="scala">import scala.collection.mutable |
||
import scala.collection.parallel.ParSeq |
import scala.collection.parallel.ParSeq |
||
Line 830: | Line 1,384: | ||
println(alignment._2) |
println(alignment._2) |
||
}</ |
}</syntaxhighlight> |
||
=={{header|Sidef}}== |
=={{header|Sidef}}== |
||
{{trans|Perl}} |
{{trans|Perl}} |
||
< |
<syntaxhighlight lang="ruby">func align(s, t) { |
||
s.chars!.prepend!('^') |
s.chars!.prepend!('^') |
||
t.chars!.prepend!('^') |
t.chars!.prepend!('^') |
||
var A = [] |
var A = [] |
||
{|i| A[i][0]{@|<d s t>} = (i, s. |
{|i| A[i][0]{@|<d s t>} = (i, s.slice(1).first(i).join, '~' * i) } << ^s |
||
{|i| A[0][i]{@|<d s t>} = (i, '-' * i, t. |
{|i| A[0][i]{@|<d s t>} = (i, '-' * i, t.slice(1).first(i).join) } << ^t |
||
for i (1 .. s.end) { |
for i (1 .. s.end) { |
||
Line 866: | Line 1,420: | ||
} |
} |
||
align("rosettacode", "raisethysword").each { .say }</ |
align("rosettacode", "raisethysword").each { .say }</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
ro-settac-o-de |
ro-settac-o-de |
||
Line 873: | Line 1,427: | ||
=={{header|Tcl}}== |
=={{header|Tcl}}== |
||
{{tcllib|struct::list}} |
{{tcllib|struct::list}} |
||
< |
<syntaxhighlight lang="tcl">package require struct::list |
||
proc levenshtein/align {a b} { |
proc levenshtein/align {a b} { |
||
lassign [struct::list longestCommonSubsequence [split $a ""] [split $b ""]]\ |
lassign [struct::list longestCommonSubsequence [split $a ""] [split $b ""]]\ |
||
Line 908: | Line 1,462: | ||
} |
} |
||
puts [levenshtein/align "rosettacode" "raisethysword"]</ |
puts [levenshtein/align "rosettacode" "raisethysword"]</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 918: | Line 1,472: | ||
{{trans|Kotlin}} |
{{trans|Kotlin}} |
||
{{libheader|Wren-str}} |
{{libheader|Wren-str}} |
||
<syntaxhighlight lang="wren">import "./str" for Str |
|||
{{libheader|Wren-math}} |
|||
<lang ecmascript>import "/str" for Str |
|||
import "/math" for Math |
|||
var levenshteinAlign = Fn.new { |a, b| |
var levenshteinAlign = Fn.new { |a, b| |
||
Line 932: | Line 1,484: | ||
for (j in 1..b.count) { |
for (j in 1..b.count) { |
||
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1) |
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1) |
||
costs[i][j] = |
costs[i][j] = temp.min(1 + costs[i - 1][j].min(costs[i][j - 1])) |
||
} |
} |
||
} |
} |
||
Line 967: | Line 1,519: | ||
result = levenshteinAlign.call("rosettacode","raisethysword") |
result = levenshteinAlign.call("rosettacode","raisethysword") |
||
System.print(result[0]) |
System.print(result[0]) |
||
System.print(result[1])</ |
System.print(result[1])</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 980: | Line 1,532: | ||
=={{header|zkl}}== |
=={{header|zkl}}== |
||
{{trans|Java}} |
{{trans|Java}} |
||
< |
<syntaxhighlight lang="zkl">fcn alignment(a,b){ |
||
a,b = a.toLower(), b.toLower(); |
a,b = a.toLower(), b.toLower(); |
||
costs := (a.len()+1).pump(List(),'wrap(a){ [1..b.len()].pump(List(a)) }); |
costs := (a.len()+1).pump(List(),'wrap(a){ [1..b.len()].pump(List(a)) }); |
||
Line 1,004: | Line 1,556: | ||
} |
} |
||
return(aPathRev.text.reverse(), bPathRev.text.reverse()) |
return(aPathRev.text.reverse(), bPathRev.text.reverse()) |
||
}</ |
}</syntaxhighlight> |
||
< |
<syntaxhighlight lang="zkl">result := alignment("rosettacode", "raisethysword"); |
||
println(result[0]); |
println(result[0]); |
||
println(result[1]);</ |
println(result[1]);</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
Latest revision as of 08:13, 25 May 2024
You are encouraged to solve this task according to the task description, using any language you may know.
The Levenshtein distance algorithm returns the number of atomic operations (insertion, deletion or edition) that must be performed on a string in order to obtain an other one, but it does not say anything about the actual operations used or their order.
An alignment is a notation used to describe the operations used to turn a string into an other. At some point in the strings, the minus character ('-') is placed in order to signify that a character must be added at this very place. For instance, an alignment between the words 'place' and 'palace' is:
P-LACE PALACE
- Task
Write a function that shows the alignment of two strings for the corresponding levenshtein distance.
As an example, use the words "rosettacode" and "raisethysword".
You can either implement an algorithm, or use a dedicated library (thus showing us how it is named in your language).
11l
F levenshteinAlign(aa, bb)
V a = aa.lowercase()
V b = bb.lowercase()
V costs = [[0] * (b.len + 1)] * (a.len + 1)
L(j) 0 .. b.len
costs[0][j] = j
L(i) 1 .. a.len
costs[i][0] = i
L(j) 1 .. b.len
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)
V aPathRev = ‘’
V bPathRev = ‘’
V i = a.len
V j = b.len
L i != 0 & j != 0
V tmp = costs[i - 1][j - 1] + Int(a[i - 1] != b[j - 1])
I costs[i][j] == tmp
aPathRev ‘’= a[--i]
bPathRev ‘’= b[--j]
E I costs[i][j] == 1 + costs[i - 1][j]
aPathRev ‘’= a[--i]
bPathRev ‘’= ‘-’
E I costs[i][j] == 1 + costs[i][j - 1]
aPathRev ‘’= ‘-’
bPathRev ‘’= b[--j]
E
assert(0B, ‘Internal error’)
R (reversed(aPathRev), reversed(bPathRev))
V result = levenshteinAlign(‘place’, ‘palace’)
print(result[0])
print(result[1])
print()
result = levenshteinAlign(‘rosettacode’, ‘raisethysword’)
print(result[0])
print(result[1])
- Output:
p-lace palace r-oset-tacode raisethysword
Arturo
print join.with:"\n" levenshtein.align "place" "palace"
print join.with:"\n" levenshtein.align "rosettacode" "raisethysword"
- Output:
p-lace palace r-oset-tacode raisethysword
C
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct edit_s edit_t, *edit;
struct edit_s {
char c1, c2;
int n;
edit next;
};
void leven(char *a, char *b)
{
int i, j, la = strlen(a), lb = strlen(b);
edit *tbl = malloc(sizeof(edit) * (1 + la));
tbl[0] = calloc((1 + la) * (1 + lb), sizeof(edit_t));
for (i = 1; i <= la; i++)
tbl[i] = tbl[i-1] + (1+lb);
for (i = la; i >= 0; i--) {
char *aa = a + i;
for (j = lb; j >= 0; j--) {
char *bb = b + j;
if (!*aa && !*bb) continue;
edit e = &tbl[i][j];
edit repl = &tbl[i+1][j+1];
edit dela = &tbl[i+1][j];
edit delb = &tbl[i][j+1];
e->c1 = *aa;
e->c2 = *bb;
if (!*aa) {
e->next = delb;
e->n = e->next->n + 1;
continue;
}
if (!*bb) {
e->next = dela;
e->n = e->next->n + 1;
continue;
}
e->next = repl;
if (*aa == *bb) {
e->n = e->next->n;
continue;
}
if (e->next->n > delb->n) {
e->next = delb;
e->c1 = 0;
}
if (e->next->n > dela->n) {
e->next = dela;
e->c1 = *aa;
e->c2 = 0;
}
e->n = e->next->n + 1;
}
}
edit p = tbl[0];
printf("%s -> %s: %d edits\n", a, b, p->n);
while (p->next) {
if (p->c1 == p->c2)
printf("%c", p->c1);
else {
putchar('(');
if (p->c1) putchar(p->c1);
putchar(',');
if (p->c2) putchar(p->c2);
putchar(')');
}
p = p->next;
}
putchar('\n');
free(tbl[0]);
free(tbl);
}
int main(void)
{
leven("raisethysword", "rosettacode");
return 0;
}
- Output:
raisethysword -> rosettacode: 8 edits r(a,o)(i,)set(h,t)(y,a)(s,c)(w,)o(r,d)(d,e)
C#
using System;
using System.Text;
public class LevenshteinAlignment
{
public static string[] Alignment(string a, string b)
{
a = a.ToLower();
b = b.ToLower();
int[,] costs = new int[a.Length + 1, b.Length + 1];
for (int j = 0; j <= b.Length; j++)
costs[0, j] = j;
for (int i = 1; i <= a.Length; i++)
{
costs[i, 0] = i;
for (int j = 1; j <= b.Length; j++)
{
costs[i, j] = Math.Min(1 + Math.Min(costs[i - 1, j], costs[i, j - 1]),
a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1);
}
}
StringBuilder aPathRev = new StringBuilder();
StringBuilder bPathRev = new StringBuilder();
for (int i = a.Length, j = b.Length; i != 0 && j != 0;)
{
if (costs[i, j] == (a[i - 1] == b[j - 1] ? costs[i - 1, j - 1] : costs[i - 1, j - 1] + 1))
{
aPathRev.Append(a[--i]);
bPathRev.Append(b[--j]);
}
else if (costs[i, j] == 1 + costs[i - 1, j])
{
aPathRev.Append(a[--i]);
bPathRev.Append('-');
}
else if (costs[i, j] == 1 + costs[i, j - 1])
{
aPathRev.Append('-');
bPathRev.Append(b[--j]);
}
}
return new string[] { Reverse(aPathRev.ToString()), Reverse(bPathRev.ToString()) };
}
private static string Reverse(string s)
{
char[] charArray = s.ToCharArray();
Array.Reverse(charArray);
return new string(charArray);
}
public static void Main(string[] args)
{
string[] result = Alignment("rosettacode", "raisethysword");
Console.WriteLine(result[0]);
Console.WriteLine(result[1]);
}
}
- Output:
r-oset-tacode raisethysword
C++
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <string>
#include <vector>
std::string to_lower_case(const std::string& text) {
std::string result = text;
std::transform(result.begin(), result.end(), result.begin(),
[](char ch){ return std::tolower(ch); });
return result;
}
std::vector<std::string> levenshtein_alignment(std::string a, std::string b) {
a = to_lower_case(a);
b = to_lower_case(b);
std::vector<std::vector<int32_t>> costs{ a.length() + 1, std::vector<int32_t>( b.length() + 1, 0 ) };
for ( uint64_t j = 0; j <= b.length(); ++j )
costs[0][j] = j;
for ( uint64_t i = 1; i <= a.length(); ++i ) {
costs[i][0] = i;
for ( uint64_t j = 1; j <= b.length(); ++j ) {
costs[i][j] = std::min(std::min( costs[i - 1][j], costs[i][j - 1]) + 1,
a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1);
}
}
std::string a_path_reversed, b_path_reversed;
uint64_t i = a.length(), j = b.length();
while ( i != 0 && j != 0 ) {
if ( costs[i][j] == ( a[i - 1] == b[j - 1] ? costs[i - 1][j - 1] : costs[i - 1][j - 1] + 1 ) ) {
a_path_reversed += a[--i];
b_path_reversed += b[--j];
} else if ( costs[i][j] == costs[i - 1][j] + 1 ) {
a_path_reversed += a[--i];
b_path_reversed += "-";
} else if ( costs[i][j] == costs[i][j - 1] + 1 ) {
a_path_reversed += "-";
b_path_reversed += b[--j];
}
}
std::reverse(a_path_reversed.begin(), a_path_reversed.end());
std::reverse(b_path_reversed.begin(), b_path_reversed.end());
return std::vector<std::string>{ a_path_reversed, b_path_reversed };
}
int main() {
std::vector<std::string> result = levenshtein_alignment("place", "palace");
std::cout << result[0] << std::endl;
std::cout << result[1] << std::endl;
std::cout << std::endl;
result = levenshtein_alignment("rosettacode", "raisethysword");
std::cout << result[0] << std::endl;
std::cout << result[1] << std::endl;
}
- Output:
p-lace palace r-oset-tacode raisethysword
D
Using the standard library.
void main() {
import std.stdio, std.algorithm;
immutable s1 = "rosettacode";
immutable s2 = "raisethysword";
string s1b, s2b;
size_t pos1, pos2;
foreach (immutable c; levenshteinDistanceAndPath(s1, s2)[1]) {
final switch (c) with (EditOp) {
case none, substitute:
s1b ~= s1[pos1++];
s2b ~= s2[pos2++];
break;
case insert:
s1b ~= "_";
s2b ~= s2[pos2++];
break;
case remove:
s1b ~= s1[pos1++];
s2b ~= "_";
break;
}
}
writeln(s1b, "\n", s2b);
}
- Output:
r_oset_tacode raisethysword
EasyLang
global dparr[] dpcols .
proc dpnew a b . .
len dparr[] a * b
dpcols = b
.
func dp r c .
return dparr[r * dpcols + c + 1]
.
proc dps r c v . .
dparr[r * dpcols + c + 1] = v
.
proc align a$ b$ . ar$ br$ .
dpnew (len a$ + 1) (len b$ + 1)
for i = 1 to len a$
dps i 0 i
.
for j = 0 to len b$
dps 0 j j
.
for i = 1 to len a$
for j = 1 to len b$
if substr a$ i 1 = substr b$ j 1
dps i j dp (i - 1) (j - 1)
else
dps i j lower (lower dp (i - 1) j dp i (j - 1)) dp (i - 1) (j - 1) + 1
.
.
.
ar$ = "" ; br$ = ""
i = len a$ ; j = len b$
while i <> 0 and j <> 0
if substr a$ i 1 = substr b$ j 1 or dp i j = dp (i - 1) (j - 1) + 1
ar$ = substr a$ i 1 & ar$
i -= 1
br$ = substr b$ j 1 & br$
j -= 1
elif dp i j = dp (i - 1) j + 1
ar$ = substr a$ i 1 & ar$
i -= 1
br$ = "-" & br$
else
ar$ = "-" & ar$
br$ = substr b$ j 1 & br$
j -= 1
.
.
.
align "rosettacode" "raisethysword" ar$ br$
print ar$ ; print br$
- Output:
r-oset-tacode raisethysword
Eiffel
distance (source, target: STRING): INTEGER
-- Minimum number of operations to turn `source' into `target'.
local
l_distance: ARRAY2 [INTEGER]
del, ins, subst: INTEGER
do
create l_distance.make (source.count, target.count)
⟳ ic:(1 |..| source.count) ¦ l_distance [ic, 1] := ic - 1 ⟲
⟳ ij:(1 |..| target.count) ¦ l_distance [1, ij] := ij - 1 ⟲
⟳ ic:(2 |..| source.count) ¦
⟳ jc:(2 |..| target.count) ¦
if source [ic] = target [jc] then -- same char
l_distance [ic, jc] := l_distance [ic - 1, jc - 1]
else -- diff char
del := l_distance [ic - 1, jc] -- delete?
ins := l_distance [ic, jc - 1] -- insert?
subst := l_distance [ic - 1, jc -1] -- substitute/swap?
l_distance [ic, jc] := del.min (ins.min (subst)) + 1
end
⟲
⟲
Result:= l_distance [source.count, target.count]
end
- Output:
Result = 8
The ⟳ ¦ ⟲ represents the "symbolic form" of an Eiffel across loop. In the case above, the first two ⟳ (loops) initialize the first column and the first row, and then two nested ⟳ (loops) to walk the distance computation.
Also—the `ic' is this programmers shorthand for ITERATION_CURSOR. Therefore, in the nested loops, the `ic' is the iteration cursor following `i' and the `ij' is following `j' as indexes on the source, target, and l_distance.
FreeBASIC
#define min(a, b) iif((a) < (b), (a), (b))
Sub LevenshteinAlignment(s1 As String, s2 As String)
Dim As String align1 = "", align2 = ""
Dim As Integer i, j, len1, len2
len1 = Len(s1):
len2 = Len(s2)
Dim As Integer dp(len1+1, len2+1)
For i = 0 To len1
dp(i, 0) = i
Next i
For j = 0 To len2
dp(0, j) = j
Next j
For i = 1 To len1
For j = 1 To len2
If Mid(s1, i, 1) = Mid(s2, j, 1) Then
dp(i, j) = dp(i-1, j-1)
Else
dp(i, j) = min(min(dp(i-1, j), dp(i, j-1)), dp(i-1, j-1)) + 1
End If
Next j
Next i
i = len1: j = len2
While i > 0 And j > 0
If Mid(s1, i, 1) = Mid(s2, j, 1) Then
align1 = Mid(s1, i, 1) + align1
align2 = Mid(s2, j, 1) + align2
i -= 1: j -= 1
Elseif dp(i, j) = dp(i-1, j-1) + 1 Then
align1 = Mid(s1, i, 1) + align1
align2 = Mid(s2, j, 1) + align2
i -= 1: j -= 1
Elseif dp(i, j) = dp(i-1, j) + 1 Then
align1 = Mid(s1, i, 1) + align1
align2 = "-" + align2
i -= 1
Else
align1 = "-" + align1
align2 = Mid(s2, j, 1) + align2
j -= 1
End If
Wend
While i > 0
align1 = Mid(s1, i, 1) + align1
align2 = "-" + align2
i -= 1
Wend
While j > 0
align1 = "-" + align1
align2 = Mid(s2, j, 1) + align2
j -= 1
Wend
Print "Levenshtein Distance: "; dp(len1, len2)
Print align1
Print align2
Print
End Sub
LevenshteinAlignment("rosettacode", "raisethysword")
LevenshteinAlignment("place", "palace")
Sleep
- Output:
Levenshtein Distance: 8 r-oset-tacode raisethysword Levenshtein Distance: 1 p-lace palace
Go
Alignment computed by the Needleman-Wunch algorithm, which is used in bioinformatics.
package main
import (
"fmt"
"github.com/biogo/biogo/align"
ab "github.com/biogo/biogo/alphabet"
"github.com/biogo/biogo/feat"
"github.com/biogo/biogo/seq/linear"
)
func main() {
// Alphabets for things like DNA are predefined in biogo, but we
// define our own here.
lc := ab.Must(ab.NewAlphabet("-abcdefghijklmnopqrstuvwxyz",
feat.Undefined, '-', 0, true))
// Construct scoring matrix for Needleman-Wunch algorithm.
// We leave zeros on the diagonal for the Levenshtein distance of an
// exact match and put -1s everywhere else for the Levenshtein distance
// of an edit.
nw := make(align.NW, lc.Len())
for i := range nw {
r := make([]int, lc.Len())
nw[i] = r
for j := range r {
if j != i {
r[j] = -1
}
}
}
// define input sequences
a := &linear.Seq{Seq: ab.BytesToLetters([]byte("rosettacode"))}
a.Alpha = lc
b := &linear.Seq{Seq: ab.BytesToLetters([]byte("raisethysword"))}
b.Alpha = lc
// perform alignment
aln, err := nw.Align(a, b)
// format and display result
if err != nil {
fmt.Println(err)
return
}
fa := align.Format(a, b, aln, '-')
fmt.Printf("%s\n%s\n", fa[0], fa[1])
aa := fmt.Sprint(fa[0])
ba := fmt.Sprint(fa[1])
ma := make([]byte, len(aa))
for i := range ma {
if aa[i] == ba[i] {
ma[i] = ' '
} else {
ma[i] = '|'
}
}
fmt.Println(string(ma))
}
- Output:
The lines after the alignment point out the 8 edits.
r-oset-tacode raisethysword || |||| ||
Haskell
The Wagner–Fischer matrix construction is adopted from Levenshtein_distance#Haskell, however it is reversed in order to simplify it's traversing.
costs :: String -> String -> [[Int]]
costs s1 s2 = reverse $ reverse <$> matrix
where
matrix = scanl transform [0 .. length s1] s2
transform ns@(n:ns1) c = scanl calc (n + 1) $ zip3 s1 ns ns1
where
calc z (c1, x, y) = minimum [ y + 1, z + 1
, x + fromEnum (c1 /= c)]
levenshteinDistance :: String -> String -> Int
levenshteinDistance s1 s2 = head.head $ costs s1 s2
A sample alignment
The alignment is built in the same way as it is done in Levenshtein_distance/Alignment#Java. Instead of indices the Wagner–Fischer matrix and strings are traversed by list truncation, as zippers.
alignment :: String -> String -> (String, String)
alignment s1 s2 = go (costs s1 s2) (reverse s1) (reverse s2) ([],[])
where
go c _ _ r | isEmpty c = r
go _ [] s r = ('-' <$ s, reverse s) <> r
go _ s [] r = (reverse s, '-' <$ s) <> r
go c s1@(h1:t1) s2@(h2:t2) (r1, r2) =
let temp = (get.nextCol.nextRow $ c) + if h1 == h2 then 0 else 1
in case get c of
x | x == temp -> go (nextRow.nextCol $ c) t1 t2 (h1:r1, h2:r2)
| x == 1 + (get.nextCol $ c) -> go (nextCol c) s1 t2 ('-':r1, h2:r2)
| x == 1 + (get.nextRow $ c) -> go (nextRow c) t1 s2 (h1:r1, '-':r2)
-- Functions which treat table as zipper
get ((h:_):_) = h
nextRow = map tail
nextCol = tail
isEmpty c = null c || null (head c)
λ> alignment "palace" "place" ("palace","p-lace") λ> alignment "rosettacode" "raisethysword" ("r-oset-tacode","raisethysword") λ> alignment "rosettacode" "rat" ("rosettacode","r-----a---t")
All alignments
The alternative solution, which extensively produces all minimal alignments for given strings.
-- Produces all possible alignments for two strings.
allAlignments :: String -> String -> [[(Char, Char)]]
allAlignments s1 s2 = go (length s2 - length s1) s1 s2
where
go _ s [] = [(\x -> (x, '-')) <$> s]
go _ [] s = [(\x -> ('-' ,x)) <$> s]
go n s1@(h1:t1) s2@(h2:t2) = (h1, h2) <:> go n t1 t2
++ case compare n 0 of
LT -> (h1, '-') <:> go (n+1) t1 s2
EQ -> []
GT -> ('-', h2) <:> go (n-1) s1 t2
x <:> l = fmap (x :) l
-- Returns a lazy list of all optimal alignments.
levenshteinAlignments :: String -> String -> [(String, String)]
levenshteinAlignments s1 s2 = unzip <$> best
where
best = filter ((lev ==) . dist) $ allAlignments s1 s2
lev = levenshteinDistance s1 s2
dist = length . filter (uncurry (/=))
λ> mapM_ print $ levenshteinAlignments "rosettacode" "raisethysword" ("ro-settac-ode","raisethysword") ("ro-setta-code","raisethysword") ("ro-sett-acode","raisethysword") ("ro-set-tacode","raisethysword") ("r-osettac-ode","raisethysword") ("r-osetta-code","raisethysword") ("r-osett-acode","raisethysword") ("r-oset-tacode","raisethysword") λ> mapM_ print $ levenshteinAlignments "rosettacode" "rat" ("rosettacode","ra--t------") ("rosettacode","ra---t-----") ("rosettacode","r-a-t------") ("rosettacode","r-a--t-----") ("rosettacode","r--at------") ("rosettacode","r--a-t-----") ("rosettacode","r---at-----") ("rosettacode","r-----at---") ("rosettacode","r-----a-t--") ("rosettacode","r-----a--t-") ("rosettacode","r-----a---t")
J
Translation of java:
levalign=:4 :0
assert. x <:&# y
D=. x +/&i.&>:&# y
for_i. 1+i.#x do.
for_j. 1+i.#y do.
if. ((<:i){x)=(<:j){y do.
D=. (D {~<<:i,j) (<i,j)} D
else.
min=. 1+<./D{~(i,j) <@:-"1#:1 2 3
D=. min (<i,j)} D
end.
end.
end.
A=. B=. ''
ij=. x ,&# y
while. */ij do.
'd00 d01 d10 d11'=. D{~ ij <@:-"1#:i.4
'x1 y1'=. (ij-1){each x;y
if. d00 = d11+x1~:y1 do.
A=. A,x1 [ B=. B,y1 [ ij=. ij-1
elseif. d00 = 1+d10 do.
A=. A,x1 [ B=. B,'-'[ ij=. ij-1 0
elseif. d00 = 1+d01 do.
A=. A,'-'[ B=. B,y1 [ ij=. ij-0 1
end.
end.
A,:&|.B
)
Task examples:
'place' levalign 'palace'
p-lace
palace
'rosettacode' levalign 'raisethysword'
r-oset-tacode
raisethysword
Java
public class LevenshteinAlignment {
public static String[] alignment(String a, String b) {
a = a.toLowerCase();
b = b.toLowerCase();
// i == 0
int[][] costs = new int[a.length()+1][b.length()+1];
for (int j = 0; j <= b.length(); j++)
costs[0][j] = j;
for (int i = 1; i <= a.length(); i++) {
costs[i][0] = i;
for (int j = 1; j <= b.length(); j++) {
costs[i][j] = Math.min(1 + Math.min(costs[i-1][j], costs[i][j-1]), a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1);
}
}
// walk back through matrix to figure out path
StringBuilder aPathRev = new StringBuilder();
StringBuilder bPathRev = new StringBuilder();
for (int i = a.length(), j = b.length(); i != 0 && j != 0; ) {
if (costs[i][j] == (a.charAt(i - 1) == b.charAt(j - 1) ? costs[i-1][j-1] : costs[i-1][j-1] + 1)) {
aPathRev.append(a.charAt(--i));
bPathRev.append(b.charAt(--j));
} else if (costs[i][j] == 1 + costs[i-1][j]) {
aPathRev.append(a.charAt(--i));
bPathRev.append('-');
} else if (costs[i][j] == 1 + costs[i][j-1]) {
aPathRev.append('-');
bPathRev.append(b.charAt(--j));
}
}
return new String[]{aPathRev.reverse().toString(), bPathRev.reverse().toString()};
}
public static void main(String[] args) {
String[] result = alignment("rosettacode", "raisethysword");
System.out.println(result[0]);
System.out.println(result[1]);
}
}
- Output:
r-oset-tacode raisethysword
jq
Adapted from Wren
Works with jq, the C implementation of jq
Works with gojq, the Go implementation of jq
Works with jaq, the Rust implementation of jq
def reverseString: explode | reverse | implode;
def levenshteinAlign($x; $y):
def min($a;$b): [$a,$b]|min;
($x|length + 1) as $a1
| ($y|length + 1) as $b1
| ($x | ascii_downcase) as $a
| ($y | ascii_downcase) as $b
| [range(0; $b1) | 0] as $zeros
| {costs: [range(0; $a1) | $zeros]}
| reduce range(0; $b1) as $j (.; .costs[0][$j] = $j)
| reduce range(1; $a1) as $i (.;
.costs[$i][0] = $i
| reduce range(1; $b1) as $j (.;
(.costs[$i - 1][$j - 1] + (if $a[$i - 1: $i] == $b[$j - 1: $j] then 0 else 1 end)) as $temp
| .costs[$i][$j] = min( $temp; 1 + min(.costs[$i - 1][$j]; .costs[$i][$j - 1] )) ))
# walk back through matrix to figure out path
| .aPathRev = ""
| .bPathRev = ""
| .i = ($a|length)
| .j = ($b|length)
| until (.i == 0 or .j == 0;
(.costs[.i - 1][.j - 1] + (if $a[.i - 1: .i] == $b[.j - 1: .j] then 0 else 1 end)) as $temp
| .costs[.i][.j] as $cij
| if $cij == $temp
then .i += -1
| .aPathRev += $a[.i: .i+1]
| .j += -1
| .bPathRev += $b[.j: .j+1]
elif $cij == 1 + .costs[.i-1][.j]
then .i += -1
| .aPathRev += $a[.i:.i+1]
| .bPathRev += "-"
elif $cij == 1 + .costs[.i][.j-1]
then .aPathRev += "-"
| .j += -1
| .bPathRev += $b[.j: .j+1]
end)
| [.aPathRev, .bPathRev ] | map(reverseString);
levenshteinAlign("place"; "palace"),
levenshteinAlign("rosettacode";"raisethysword")
- Output:
[ "p-lace", "palace" ] [ "r-oset-tacode", "raisethysword" ]
Julia
function levenshteinalign(a::AbstractString, b::AbstractString)
a = lowercase(a)
b = lowercase(b)
len_a = length(a)
len_b = length(b)
costs = Matrix{Int}(len_a + 1, len_b + 1)
costs[1, :] .= 0:len_b
@inbounds for i in 2:(len_a + 1)
costs[i, 1] = i
for j in 2:(len_b + 1)
tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
costs[i, j] = min(1 + min(costs[i-1, j], costs[i, j-1]), tmp)
end
end
apathrev = IOBuffer()
bpathrev = IOBuffer()
local i = len_a + 1
local j = len_b + 1
@inbounds while i != 1 && j != 1
tmp = ifelse(a[i-1] == b[j-1], costs[i-1, j-1], costs[i-1, j-1] + 1)
if costs[i, j] == tmp
i -= 1
j -= 1
print(apathrev, a[i])
print(bpathrev, b[j])
elseif costs[i, j] == 1 + costs[i-1, j]
i -= 1
print(apathrev, a[i])
print(bpathrev, '-')
elseif costs[i, j] == 1 + costs[i, j-1]
j -= 1
print(apathrev, '-')
print(bpathrev, b[j])
end
end
return reverse(String(take!(apathrev))), reverse(String(take!(bpathrev)))
end
foreach(println, levenshteinalign("rosettacode", "raisethysword"))
foreach(println, levenshteinalign("place", "palace"))
- Output:
r-oset-tacode raisethysword p-lace palace
Kotlin
// version 1.1.3
fun levenshteinAlign(a: String, b: String): Array<String> {
val aa = a.toLowerCase()
val bb = b.toLowerCase()
val costs = Array(a.length + 1) { IntArray(b.length + 1) }
for (j in 0..b.length) costs[0][j] = j
for (i in 1..a.length) {
costs[i][0] = i
for (j in 1..b.length) {
val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1)
costs[i][j] = minOf(1 + minOf(costs[i - 1][j], costs[i][j - 1]), temp)
}
}
// walk back through matrix to figure out path
val aPathRev = StringBuilder()
val bPathRev = StringBuilder()
var i = a.length
var j = b.length
while (i != 0 && j != 0) {
val temp = costs[i - 1][j - 1] + (if (aa[i - 1] == bb[j - 1]) 0 else 1)
when (costs[i][j]) {
temp -> {
aPathRev.append(aa[--i])
bPathRev.append(bb[--j])
}
1 + costs[i-1][j] -> {
aPathRev.append(aa[--i])
bPathRev.append('-')
}
1 + costs[i][j-1] -> {
aPathRev.append('-')
bPathRev.append(bb[--j])
}
}
}
return arrayOf(aPathRev.reverse().toString(), bPathRev.reverse().toString())
}
fun main(args: Array<String>) {
var result = levenshteinAlign("place", "palace")
println(result[0])
println(result[1])
println()
result = levenshteinAlign("rosettacode","raisethysword")
println(result[0])
println(result[1])
}
- Output:
p-lace palace r-oset-tacode raisethysword
Mathematica / Wolfram Language
DamerauLevenshteinDistance["rosettacode", "raisethysword"]
- Output:
8
Nim
import algorithm, sequtils, strutils
proc levenshteinAlign(a, b: string): tuple[a, b: string] =
let a = a.toLower()
let b = b.toLower()
var costs = newSeqWith(a.len + 1, newSeq[int](b.len + 1))
for j in 0..b.len: costs[0][j] = j
for i in 1..a.len:
costs[i][0] = i
for j in 1..b.len:
let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
costs[i][j] = min(1 + min(costs[i - 1][j], costs[i][j - 1]), tmp)
# Walk back through matrix to figure out path.
var aPathRev, bPathRev: string
var i = a.len
var j = b.len
while i != 0 and j != 0:
let tmp = costs[i - 1][j - 1] + ord(a[i - 1] != b[j - 1])
if costs[i][j] == tmp:
dec i
dec j
aPathRev.add a[i]
bPathRev.add b[j]
elif costs[i][j] == 1 + costs[i-1][j]:
dec i
aPathRev.add a[i]
bPathRev.add '-'
elif costs[i][j] == 1 + costs[i][j-1]:
dec j
aPathRev.add '-'
bPathRev.add b[j]
else:
quit "Internal error"
result = (reversed(aPathRev).join(), reversed(bPathRev).join())
when isMainModule:
var result = levenshteinAlign("place", "palace")
echo result.a
echo result.b
echo ""
result = levenshteinAlign("rosettacode","raisethysword")
echo result.a
echo result.b
- Output:
p-lace palace r-oset-tacode raisethysword
Perl
use strict;
use warnings;
use List::Util qw(min);
sub levenshtein_distance_alignment {
my @s = ('^', split //, shift);
my @t = ('^', split //, shift);
my @A;
@{$A[$_][0]}{qw(d s t)} = ($_, join('', @s[1 .. $_]), ('~' x $_)) for 0 .. $#s;
@{$A[0][$_]}{qw(d s t)} = ($_, ('-' x $_), join '', @t[1 .. $_]) for 0 .. $#t;
for my $i (1 .. $#s) {
for my $j (1 .. $#t) {
if ($s[$i] ne $t[$j]) {
$A[$i][$j]{d} = 1 + (
my $min = min $A[$i-1][$j]{d}, $A[$i][$j-1]{d}, $A[$i-1][$j-1]{d}
);
@{$A[$i][$j]}{qw(s t)} =
$A[$i-1][$j]{d} == $min ? ($A[$i-1][$j]{s}.$s[$i], $A[$i-1][$j]{t}.'-') :
$A[$i][$j-1]{d} == $min ? ($A[$i][$j-1]{s}.'-', $A[$i][$j-1]{t}.$t[$j]) :
($A[$i-1][$j-1]{s}.$s[$i], $A[$i-1][$j-1]{t}.$t[$j]);
}
else {
@{$A[$i][$j]}{qw(d s t)} = (
$A[$i-1][$j-1]{d},
$A[$i-1][$j-1]{s}.$s[$i],
$A[$i-1][$j-1]{t}.$t[$j]
);
}
}
}
return @{$A[-1][-1]}{'s', 't'};
}
print join "\n", levenshtein_distance_alignment "rosettacode", "raisethysword";
- Output:
ro-settac-o-de raisethysword-
Phix
plus the indicator from Go
function LevenshteinAlignment(string a, b) integer la = length(a)+1, lb = length(b)+1 sequence costs = repeat(repeat(0,lb),la) for j=1 to lb do costs[1][j] = j end for for i=2 to la do costs[i][1] = i for j=2 to lb do integer tmp1 = 1+min(costs[i-1][j], costs[i][j-1]), tmp2 = costs[i-1][j-1] + (a[i-1]!=b[j-1]) costs[i][j] = min(tmp1, tmp2) end for end for -- walk back through matrix to figure out the path string arev = "", brev = "" integer i = la, j = lb while i>1 and j>1 do integer tmp = costs[i-1][j-1] + (a[i-1]!=b[j-1]) switch costs[i][j] do case tmp: i -= 1; arev &= a[i] j -= 1; brev &= b[j] case 1 + costs[i-1][j]: i -= 1; arev &= a[i] brev &= '-' case 1 + costs[i][j-1]: arev &= '-' j -= 1; brev &= b[j] end switch end while return {reverse(arev),reverse(brev)} end function procedure test(string a,b) {a,b} = LevenshteinAlignment(a,b) string c = sq_add(repeat(' ',length(a)),sq_mul(sq_ne(a,b),'|'-' ')) printf(1,"%s\n%s\n%s\n\n",{a,b,c}) end procedure test("rosettacode", "raisethysword") test("place", "palace")
- Output:
r-oset-tacode raisethysword || |||| || p-lace palace |
Python
from difflib import ndiff
def levenshtein(str1, str2):
result = ""
pos, removed = 0, 0
for x in ndiff(str1, str2):
if pos<len(str1) and str1[pos] == x[2]:
pos += 1
result += x[2]
if x[0] == "-":
removed += 1
continue
else:
if removed > 0:
removed -=1
else:
result += "-"
print(result)
levenshtein("place","palace")
levenshtein("rosettacode","raisethysword")
- Output:
p-lace ro-settac-o-de
Racket
Simple version (no aligment)
First we will analyze this solution that only computes the distance. See http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html for a discussion of the code.
#lang racket
(define (memoize f)
(local ([define table (make-hash)])
(lambda args
(dict-ref! table args (λ () (apply f args))))))
(define levenshtein
(memoize
(lambda (s t)
(cond
[(and (empty? s) (empty? t)) 0]
[(empty? s) (length t)]
[(empty? t) (length s)]
[else
(if (equal? (first s) (first t))
(levenshtein (rest s) (rest t))
(min (add1 (levenshtein (rest s) t))
(add1 (levenshtein s (rest t)))
(add1 (levenshtein (rest s) (rest t)))))]))))
Demonstration:
(levenshtein (string->list "rosettacode")
(string->list "raisethysword"))
- Output:
8
Complete version
Now we extend the code from http://blog.racket-lang.org/2012/08/dynamic-programming-versus-memoization.html to show also the alignment. The code is very similar, but it stores the partial results (number of edits and alignment of each substring) in a lev structure.
#lang racket
(struct lev (n s t))
(define (lev-add old n sx tx)
(lev (+ n (lev-n old))
(cons sx (lev-s old))
(cons tx (lev-t old))))
(define (list-repeat n v)
(build-list n (lambda (_) v)))
(define (memoize f)
(local ([define table (make-hash)])
(lambda args
(dict-ref! table args (λ () (apply f args))))))
(define levenshtein/list
(memoize
(lambda (s t)
(cond
[(and (empty? s) (empty? t))
(lev 0 '() '())]
[(empty? s)
(lev (length t) (list-repeat (length t) #\-) t)]
[(empty? t)
(lev (length s) s (list-repeat (length s) #\-))]
[else
(if (equal? (first s) (first t))
(lev-add (levenshtein/list (rest s) (rest t))
0 (first s) (first t))
(argmin lev-n (list (lev-add (levenshtein/list (rest s) t)
1 (first s) #\-)
(lev-add (levenshtein/list s (rest t))
1 #\- (first t))
(lev-add (levenshtein/list (rest s) (rest t))
1 (first s) (first t)))))]))))
(define (levenshtein s t)
(let ([result (levenshtein/list (string->list s)
(string->list t))])
(values (lev-n result)
(list->string (lev-s result))
(list->string (lev-t result)))))
Demonstration:
(let-values ([(dist exp-s exp-t)
(levenshtein "rosettacode" "raisethysword")])
(printf "levenshtein: ~a edits\n" dist)
(displayln exp-s)
(displayln exp-t))
- Output:
levenshtein: 8 edits r-oset-taco-de raisethysword-
Raku
(formerly Perl 6)
sub align ( Str $σ, Str $t ) {
my @s = flat *, $σ.comb;
my @t = flat *, $t.comb;
my @A;
@A[$_][ 0]<d s t> = $_, @s[1..$_].join, '-' x $_ for ^@s;
@A[ 0][$_]<d s t> = $_, '-' x $_, @t[1..$_].join for ^@t;
for 1 ..^ @s X 1..^ @t -> (\i, \j) {
if @s[i] ne @t[j] {
@A[i][j]<d> = 1 + my $min =
min @A[i-1][j]<d>, @A[i][j-1]<d>, @A[i-1][j-1]<d>;
@A[i][j]<s t> =
@A[i-1][j]<d> == $min ?? (@A[i-1][j]<s t> Z~ @s[i], '-') !!
@A[i][j-1]<d> == $min ?? (@A[i][j-1]<s t> Z~ '-', @t[j]) !!
(@A[i-1][j-1]<s t> Z~ @s[i], @t[j]);
} else {
@A[i][j]<d s t> = @A[i-1][j-1]<d s t> Z~ '', @s[i], @t[j];
}
}
return @A[*-1][*-1]<s t>;
}
.say for align 'rosettacode', 'raisethysword';
- Output:
ro-settac-o-de raisethysword-
Ruby
uses "lcs" from here
require 'lcs'
def levenshtein_align(a, b)
apos, bpos = LCS.new(a, b).backtrack2
c = ""
d = ""
x0 = y0 = -1
dx = dy = 0
apos.zip(bpos) do |x,y|
diff = x + dx - y - dy
if diff < 0
dx -= diff
c += "-" * (-diff)
elsif diff > 0
dy += diff
d += "-" * diff
end
c += a[x0+1..x]
x0 = x
d += b[y0+1..y]
y0 = y
end
c += a[x0+1..-1]
d += b[y0+1..-1]
diff = a.length + y0 - b.length - x0
if diff < 0
c += "-" * (-diff)
elsif diff > 0
d += "-" * diff
end
[c, d]
end
puts levenshtein_align("rosettacode", "raisethysword")
- Output:
r-oset-taco-de raisethysword-
Rust
Cargo.toml
[dependencies] edit-distance = "^1.0.0"
src/main.rs
extern crate edit_distance;
edit_distance("rosettacode", "raisethysword");
Scala
- Output:
Best seen running in your browser either by ScalaFiddle (ES aka JavaScript, non JVM) or [1].
import scala.collection.mutable
import scala.collection.parallel.ParSeq
object LevenshteinAlignment extends App {
val vlad = new Levenshtein("rosettacode", "raisethysword")
val alignment = vlad.revLevenstein()
class Levenshtein(s1: String, s2: String) {
val memoizedCosts = mutable.Map[(Int, Int), Int]()
def revLevenstein(): (String, String) = {
def revLev: (Int, Int, String, String) => (String, String) = {
case (_, 0, revS1, revS2) => (revS1, revS2)
case (0, _, revS1, revS2) => (revS1, revS2)
case (i, j, revS1, revS2) =>
if (memoizedCosts(i, j) == (memoizedCosts(i - 1, j - 1)
+ (if (s1(i - 1) != s2(j - 1)) 1 else 0)))
revLev(i - 1, j - 1, s1(i - 1) + revS1, s2(j - 1) + revS2)
else if (memoizedCosts(i, j) == 1 + memoizedCosts(i - 1, j))
revLev(i - 1, j, s1(i - 1) + revS1, "-" + revS2)
else
revLev(i, j - 1, "-" + revS1, s2(j - 1) + revS2)
}
revLev(s1.length, s2.length, "", "")
}
private def levenshtein: Int = {
def lev: ((Int, Int)) => Int = {
case (k1, k2) =>
memoizedCosts.getOrElseUpdate((k1, k2), (k1, k2) match {
case (i, 0) => i
case (0, j) => j
case (i, j) =>
ParSeq(1 + lev((i - 1, j)),
1 + lev((i, j - 1)),
lev((i - 1, j - 1))
+ (if (s1(i - 1) != s2(j - 1)) 1 else 0)).min
})
}
lev((s1.length, s2.length))
}
levenshtein
}
println(alignment._1)
println(alignment._2)
}
Sidef
func align(s, t) {
s.chars!.prepend!('^')
t.chars!.prepend!('^')
var A = []
{|i| A[i][0]{@|<d s t>} = (i, s.slice(1).first(i).join, '~' * i) } << ^s
{|i| A[0][i]{@|<d s t>} = (i, '-' * i, t.slice(1).first(i).join) } << ^t
for i (1 .. s.end) {
for j (1 .. t.end) {
if (s[i] != t[j]) {
A[i][j]{:d} = 1+(
var min = Math.min(A[i-1][j]{:d}, A[i][j-1]{:d}, A[i-1][j-1]{:d})
)
A[i][j]{@|<s t>} = (A[i-1][j]{:d} == min
? [A[i-1][j]{:s}+s[i], A[i-1][j]{:t}+'-']
: (A[i][j-1]{:d} == min
? [A[i][j-1]{:s}+'-', A[i][j-1]{:t}+t[j]]
: [A[i-1][j-1]{:s}+s[i], A[i-1][j-1]{:t}+t[j]]))...
}
else {
A[i][j]{@|<d s t>} = (
A[i-1][j-1]{:d},
A[i-1][j-1]{:s}+s[i],
A[i-1][j-1]{:t}+t[j],
)
}
}
}
return [A[-1][-1]{@|<s t>}]
}
align("rosettacode", "raisethysword").each { .say }
- Output:
ro-settac-o-de raisethysword-
Tcl
package require struct::list
proc levenshtein/align {a b} {
lassign [struct::list longestCommonSubsequence [split $a ""] [split $b ""]]\
apos bpos
set c ""
set d ""
set x0 [set y0 -1]
set dx [set dy 0]
foreach x $apos y $bpos {
if {$x+$dx < $y+$dy} {
set n [expr {($y+$dy)-($x+$dx)}]
incr dx $n
append c [string repeat "-" $n]
} elseif {$x+$dx > $y+$dy} {
set n [expr {($x+$dx)-($y+$dy)}]
incr dy $n
append d [string repeat "-" $n]
}
append c [string range $a $x0+1 $x]
set x0 $x
append d [string range $b $y0+1 $y]
set y0 $y
}
append c [string range $a $x0+1 end]
append d [string range $b $y0+1 end]
set al [string length $a]
set bl [string length $b]
if {$al+$y0 < $bl+$x0} {
append c [string repeat "-" [expr {$bl+$x0-$y0-$al}]]
} elseif {$bl+$x0 < $al+$y0} {
append d [string repeat "-" [expr {$al+$y0-$x0-$bl}]]
}
return $c\n$d
}
puts [levenshtein/align "rosettacode" "raisethysword"]
- Output:
r-oset-taco-de raisethysword-
Wren
import "./str" for Str
var levenshteinAlign = Fn.new { |a, b|
a = Str.lower(a)
b = Str.lower(b)
var costs = List.filled(a.count+1, null)
for (i in 0..a.count) costs[i] = List.filled(b.count+1, 0)
for (j in 0..b.count) costs[0][j] = j
for (i in 1..a.count) {
costs[i][0] = i
for (j in 1..b.count) {
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
costs[i][j] = temp.min(1 + costs[i - 1][j].min(costs[i][j - 1]))
}
}
// walk back through matrix to figure out path
var aPathRev = ""
var bPathRev = ""
var i = a.count
var j = b.count
while (i != 0 && j != 0) {
var temp = costs[i - 1][j - 1] + ((a[i - 1] == b[j - 1]) ? 0 : 1)
var cij = costs[i][j]
if (cij == temp) {
i = i - 1
aPathRev = aPathRev + a[i]
j = j - 1
bPathRev = bPathRev + b[j]
} else if (cij == 1 + costs[i-1][j]) {
i = i - 1
aPathRev = aPathRev + a[i]
bPathRev = bPathRev + "-"
} else if (cij == 1 + costs[i][j-1]) {
aPathRev = aPathRev + "-"
j = j - 1
bPathRev = bPathRev+ b[j]
}
}
return [aPathRev[-1..0], bPathRev[-1..0]]
}
var result = levenshteinAlign.call("place", "palace")
System.print(result[0])
System.print(result[1])
System.print()
result = levenshteinAlign.call("rosettacode","raisethysword")
System.print(result[0])
System.print(result[1])
- Output:
p-lace palace r-oset-tacode raisethysword
zkl
fcn alignment(a,b){
a,b = a.toLower(), b.toLower();
costs := (a.len()+1).pump(List(),'wrap(a){ [1..b.len()].pump(List(a)) });
foreach i,j in (a.len()+1, [1..b.len()]){
costs[i][j] = ( 1 + costs[i-1][j].min(costs[i][j-1]) )
.min(if(a[i-1] == b[j-1]) costs[i-1][j-1] else costs[i-1][j-1] + 1);
}
// walk back through matrix to figure out path
aPathRev,bPathRev := Data(),Data(); // byte buckets
i,j := a.len(), b.len();
while(i!=0 and j!= 0){
if (costs[i][j] ==
( if(a[i-1]==b[j-1]) costs[i-1][j-1] else costs[i-1][j-1]+1 )){
aPathRev.append(a[i-=1]);
bPathRev.append(b[j-=1]);
} else if(costs[i][j] == 1+costs[i-1][j]){
aPathRev.append(a[i-=1]);
bPathRev.append("-");
} else if (costs[i][j] == 1+costs[i][j-1]){
aPathRev.append("-");
bPathRev.append(b[j-=1]);
}
}
return(aPathRev.text.reverse(), bPathRev.text.reverse())
}
result := alignment("rosettacode", "raisethysword");
println(result[0]);
println(result[1]);
- Output:
r-oset-tacode raisethysword