Levenshtein distance: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) (Rename Perl 6 -> Raku, alphabetize, minor clean-up) |
|||
Line 647: | Line 647: | ||
return 0; |
return 0; |
||
}</lang> |
}</lang> |
||
=={{header|C sharp|C#}}== |
|||
This is a straightforward translation of the Wikipedia pseudocode. |
|||
<lang csharp>using System; |
|||
namespace LevenshteinDistance |
|||
{ |
|||
class Program |
|||
{ |
|||
static int LevenshteinDistance(string s, string t) |
|||
{ |
|||
int n = s.Length; |
|||
int m = t.Length; |
|||
int[,] d = new int[n + 1, m + 1]; |
|||
if (n == 0) |
|||
{ |
|||
return m; |
|||
} |
|||
if (m == 0) |
|||
{ |
|||
return n; |
|||
} |
|||
for (int i = 0; i <= n; i++) |
|||
d[i, 0] = i; |
|||
for (int j = 0; j <= m; j++) |
|||
d[0, j] = j; |
|||
for (int j = 1; j <= m; j++) |
|||
for (int i = 1; i <= n; i++) |
|||
if (s[i - 1] == t[j - 1]) |
|||
d[i, j] = d[i - 1, j - 1]; //no operation |
|||
else |
|||
d[i, j] = Math.Min(Math.Min( |
|||
d[i - 1, j] + 1, //a deletion |
|||
d[i, j - 1] + 1), //an insertion |
|||
d[i - 1, j - 1] + 1 //a substitution |
|||
); |
|||
return d[n, m]; |
|||
} |
|||
static void Main(string[] args) |
|||
{ |
|||
if (args.Length == 2) |
|||
Console.WriteLine("{0} -> {1} = {2}", |
|||
args[0], args[1], LevenshteinDistance(args[0], args[1])); |
|||
else |
|||
Console.WriteLine("Usage:-\n\nLevenshteinDistance <string1> <string2>"); |
|||
} |
|||
} |
|||
}</lang> |
|||
{{out|Example output}} |
|||
<pre> |
|||
> LevenshteinDistance kitten sitting |
|||
kitten -> sitting = 3 |
|||
> LevenshteinDistance rosettacode raisethysword |
|||
rosettacode -> raisethysword = 8 |
|||
</pre> |
|||
=={{header|C++}}== |
=={{header|C++}}== |
||
Line 714: | Line 775: | ||
</pre> |
</pre> |
||
=={{header| |
=={{header|Clojure}}== |
||
This is a straightforward translation of the Wikipedia pseudocode. |
|||
<lang csharp>using System; |
|||
===Recursive Version=== |
|||
namespace LevenshteinDistance |
|||
<lang lisp>(defn levenshtein [str1 str2] |
|||
{ |
|||
(let [len1 (count str1) |
|||
class Program |
|||
len2 (count str2)] |
|||
{ |
|||
(cond (zero? len1) len2 |
|||
static int LevenshteinDistance(string s, string t) |
|||
(zero? len2) len1 |
|||
:else |
|||
(let [cost (if (= (first str1) (first str2)) 0 1)] |
|||
(min (inc (levenshtein (rest str1) str2)) |
|||
(inc (levenshtein str1 (rest str2))) |
|||
(+ cost |
|||
(levenshtein (rest str1) (rest str2)))))))) |
|||
{ |
|||
return m; |
|||
} |
|||
if (m == 0) |
|||
{ |
|||
return n; |
|||
} |
|||
(println (levenshtein "rosettacode" "raisethysword"))</lang> |
|||
for (int i = 0; i <= n; i++) |
|||
{{out}} |
|||
d[i, 0] = i; |
|||
<pre>8</pre> |
|||
for (int j = 0; j <= m; j++) |
|||
d[0, j] = j; |
|||
for (int j = 1; j <= m; j++) |
|||
for (int i = 1; i <= n; i++) |
|||
if (s[i - 1] == t[j - 1]) |
|||
d[i, j] = d[i - 1, j - 1]; //no operation |
|||
else |
|||
d[i, j] = Math.Min(Math.Min( |
|||
d[i - 1, j] + 1, //a deletion |
|||
d[i, j - 1] + 1), //an insertion |
|||
d[i - 1, j - 1] + 1 //a substitution |
|||
); |
|||
return d[n, m]; |
|||
} |
|||
===Iterative version=== |
|||
static void Main(string[] args) |
|||
<lang lisp>(defn levenshtein [w1 w2] |
|||
{ |
|||
(letfn [(cell-value [same-char? prev-row cur-row col-idx] |
|||
if (args.Length == 2) |
|||
(min (inc (nth prev-row col-idx)) |
|||
(inc (last cur-row)) |
|||
(+ (nth prev-row (dec col-idx)) (if same-char? |
|||
else |
|||
0 |
|||
Console.WriteLine("Usage:-\n\nLevenshteinDistance <string1> <string2>"); |
|||
1))))] |
|||
} |
|||
(loop [row-idx 1 |
|||
} |
|||
max-rows (inc (count w2)) |
|||
}</lang> |
|||
prev-row (range (inc (count w1)))] |
|||
{{out|Example output}} |
|||
(if (= row-idx max-rows) |
|||
<pre> |
|||
(last prev-row) |
|||
> LevenshteinDistance kitten sitting |
|||
(let [ch2 (nth w2 (dec row-idx)) |
|||
kitten -> sitting = 3 |
|||
next-prev-row (reduce (fn [cur-row i] |
|||
(let [same-char? (= (nth w1 (dec i)) ch2)] |
|||
> LevenshteinDistance rosettacode raisethysword |
|||
(conj cur-row (cell-value same-char? |
|||
rosettacode -> raisethysword = 8 |
|||
prev-row |
|||
</pre> |
|||
cur-row |
|||
i)))) |
|||
[row-idx] (range 1 (count prev-row)))] |
|||
(recur (inc row-idx) max-rows next-prev-row))))))</lang> |
|||
=={{header|COBOL}}== |
=={{header|COBOL}}== |
||
Line 901: | Line 943: | ||
{{out}} |
{{out}} |
||
<pre>8</pre> |
<pre>8</pre> |
||
=={{header|Clojure}}== |
|||
===Recursive Version=== |
|||
<lang lisp>(defn levenshtein [str1 str2] |
|||
(let [len1 (count str1) |
|||
len2 (count str2)] |
|||
(cond (zero? len1) len2 |
|||
(zero? len2) len1 |
|||
:else |
|||
(let [cost (if (= (first str1) (first str2)) 0 1)] |
|||
(min (inc (levenshtein (rest str1) str2)) |
|||
(inc (levenshtein str1 (rest str2))) |
|||
(+ cost |
|||
(levenshtein (rest str1) (rest str2)))))))) |
|||
(println (levenshtein "rosettacode" "raisethysword"))</lang> |
|||
{{out}} |
|||
<pre>8</pre> |
|||
===Iterative version=== |
|||
<lang lisp>(defn levenshtein [w1 w2] |
|||
(letfn [(cell-value [same-char? prev-row cur-row col-idx] |
|||
(min (inc (nth prev-row col-idx)) |
|||
(inc (last cur-row)) |
|||
(+ (nth prev-row (dec col-idx)) (if same-char? |
|||
0 |
|||
1))))] |
|||
(loop [row-idx 1 |
|||
max-rows (inc (count w2)) |
|||
prev-row (range (inc (count w1)))] |
|||
(if (= row-idx max-rows) |
|||
(last prev-row) |
|||
(let [ch2 (nth w2 (dec row-idx)) |
|||
next-prev-row (reduce (fn [cur-row i] |
|||
(let [same-char? (= (nth w1 (dec i)) ch2)] |
|||
(conj cur-row (cell-value same-char? |
|||
prev-row |
|||
cur-row |
|||
i)))) |
|||
[row-idx] (range 1 (count prev-row)))] |
|||
(recur (inc row-idx) max-rows next-prev-row))))))</lang> |
|||
=={{header|Crystal}}== |
=={{header|Crystal}}== |
||
Line 2,728: | Line 2,729: | ||
8 |
8 |
||
</lang> |
</lang> |
||
=={{header|Mathematica}}== |
=={{header|Mathematica}}== |
||
Line 2,735: | Line 2,735: | ||
EditDistance["rosettacode","raisethysword"] |
EditDistance["rosettacode","raisethysword"] |
||
->8</lang> |
->8</lang> |
||
=={{header|MATLAB}}== |
=={{header|MATLAB}}== |
||
<lang MATLAB> |
<lang MATLAB> |
||
Line 3,203: | Line 3,204: | ||
print leven('rosettacode', 'raisethysword'), "\n";</lang> |
print leven('rosettacode', 'raisethysword'), "\n";</lang> |
||
=={{header|Perl 6}}== |
|||
{{works with|rakudo|2015-09-16}} |
|||
Implementation of the wikipedia algorithm. Since column 0 and row 0 are used for base distances, the original algorithm would require us to compare "@s[$i-1] eq @t[$j-1]", and reference the $m and $n separately. Prepending an unused value (undef) onto @s and @t makes their indices align with the $i,$j numbering of @d, and lets us use .end instead of $m,$n. |
|||
<lang perl6>sub levenshtein_distance ( Str $s, Str $t --> Int ) { |
|||
my @s = *, |$s.comb; |
|||
my @t = *, |$t.comb; |
|||
my @d; |
|||
@d[$_; 0] = $_ for ^@s.end; |
|||
@d[ 0; $_] = $_ for ^@t.end; |
|||
for 1..@s.end X 1..@t.end -> ($i, $j) { |
|||
@d[$i; $j] = @s[$i] eq @t[$j] |
|||
?? @d[$i-1; $j-1] # No operation required when eq |
|||
!! ( @d[$i-1; $j ], # Deletion |
|||
@d[$i ; $j-1], # Insertion |
|||
@d[$i-1; $j-1], # Substitution |
|||
).min + 1; |
|||
} |
|||
return @d[*-1][*-1]; |
|||
} |
|||
my @a = [<kitten sitting>], [<saturday sunday>], [<rosettacode raisethysword>]; |
|||
for @a -> [$s, $t] { |
|||
say "levenshtein_distance('$s', '$t') == ", levenshtein_distance($s, $t); |
|||
}</lang> |
|||
{{out}} |
|||
<pre>levenshtein_distance('kitten', 'sitting') == 3 |
|||
levenshtein_distance('saturday', 'sunday') == 3 |
|||
levenshtein_distance('rosettacode', 'raisethysword') == 8</pre> |
|||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
Line 3,539: | Line 3,507: | ||
rosettacode raisethysword 8 |
rosettacode raisethysword 8 |
||
</pre> |
</pre> |
||
=={{header|Processing}}== |
=={{header|Processing}}== |
||
Line 3,560: | Line 3,527: | ||
return costs[b.length()]; |
return costs[b.length()]; |
||
}</lang> |
}</lang> |
||
=={{header|Prolog}}== |
=={{header|Prolog}}== |
||
Line 3,978: | Line 3,944: | ||
<pre>3 |
<pre>3 |
||
8</pre> |
8</pre> |
||
=={{header|Raku}}== |
|||
(formerly Perl 6) |
|||
{{works with|rakudo|2015-09-16}} |
|||
Implementation of the wikipedia algorithm. Since column 0 and row 0 are used for base distances, the original algorithm would require us to compare "@s[$i-1] eq @t[$j-1]", and reference the $m and $n separately. Prepending an unused value (undef) onto @s and @t makes their indices align with the $i,$j numbering of @d, and lets us use .end instead of $m,$n. |
|||
<lang perl6>sub levenshtein_distance ( Str $s, Str $t --> Int ) { |
|||
my @s = *, |$s.comb; |
|||
my @t = *, |$t.comb; |
|||
my @d; |
|||
@d[$_; 0] = $_ for ^@s.end; |
|||
@d[ 0; $_] = $_ for ^@t.end; |
|||
for 1..@s.end X 1..@t.end -> ($i, $j) { |
|||
@d[$i; $j] = @s[$i] eq @t[$j] |
|||
?? @d[$i-1; $j-1] # No operation required when eq |
|||
!! ( @d[$i-1; $j ], # Deletion |
|||
@d[$i ; $j-1], # Insertion |
|||
@d[$i-1; $j-1], # Substitution |
|||
).min + 1; |
|||
} |
|||
return @d[*-1][*-1]; |
|||
} |
|||
my @a = [<kitten sitting>], [<saturday sunday>], [<rosettacode raisethysword>]; |
|||
for @a -> [$s, $t] { |
|||
say "levenshtein_distance('$s', '$t') == ", levenshtein_distance($s, $t); |
|||
}</lang> |
|||
{{out}} |
|||
<pre>levenshtein_distance('kitten', 'sitting') == 3 |
|||
levenshtein_distance('saturday', 'sunday') == 3 |
|||
levenshtein_distance('rosettacode', 'raisethysword') == 8</pre> |
|||
=={{header|REXX}}== |
=={{header|REXX}}== |
||
Line 4,487: | Line 4,487: | ||
n + 1); |
n + 1); |
||
</lang> |
</lang> |
||
=={{header|Sidef}}== |
=={{header|Sidef}}== |
||
===Recursive=== |
===Recursive=== |
||
Line 4,521: | Line 4,522: | ||
<lang ruby>say lev(%c'kitten', %c'sitting'); # prints: 3 |
<lang ruby>say lev(%c'kitten', %c'sitting'); # prints: 3 |
||
say lev(%c'rosettacode', %c'raisethysword'); # prints: 8</lang> |
say lev(%c'rosettacode', %c'raisethysword'); # prints: 8</lang> |
||
=={{header|Simula}}== |
=={{header|Simula}}== |
||
<lang simula>BEGIN |
<lang simula>BEGIN |
||
Line 4,692: | Line 4,694: | ||
3 |
3 |
||
</pre> |
</pre> |
||
=={{header|Vala}}== |
=={{header|Vala}}== |
||
<lang vala>class LevenshteinDistance : Object { |
<lang vala>class LevenshteinDistance : Object { |