Jump to content

Levenshtein distance: Difference between revisions

Line 4,100:
<pre>3
8</pre>
 
=={{header|Picat}}==
Here are three versions:
* iterative (algorithm from Wikipedia): levenshtein/2
* recursive with tabling: levenshtein_rec/2
* mode directive tabling (from the Prolog version): levenshtein_mode/2
 
<lang Picat>go =>
S = [
["kitten","sitting"],
["rosettacode","raisethysword"],
["levenshtein","levenstein"],
["saturday", "sunday"],
["stop", "tops"],
["saturday", "sunday"]
],
foreach([W1,W2] in S)
% clear the table cache
initialize_table,
println(iter=[W1,W2,levenshtein(W1,W2)]),
println(recu=[W1,W2,levenshtein_rec(W1,W2)]),
println(mode=[W1,W2,levenshtein_mode(W1,W2)]),
nl
end,
nl.
 
%
% Based on the iterative algorithm at Wikipedia.
% (Picat is 1-based so some adjustments are needed.)
%
levenshtein(S,T) = Dist =>
M = 1+S.length,
N = 1+T.length,
D = new_array(M,N),
 
foreach(I in 1..M)
D[I,1] := I-1
end,
foreach(J in 1..N)
D[1,J] := J-1
end,
foreach(J in 2..N, I in 2..M)
if S[I-1] == T[J-1] then
D[I,J] := D[I-1,J-1] % no operation required
else
D[I,J] := min([D[I-1,J ] + 1, % a deletion
D[I ,J-1] + 1, % an insertion
D[I-1,J-1] + 1] % a substitution
)
end
end,
 
Dist = D[M,N].
 
 
%
% Tabled recursive version.
%
table
levenshtein_rec(S,T) = Dist =>
Dist1 = 0,
if S.length = 0 then Dist1 := T.length
elseif T.length = 0 then Dist1 := S.length
elseif S[1] = T[1] then Dist1 := levenshtein_rec(S.tail(), T.tail())
else
A = levenshtein_rec(S.tail(), T.tail()),
B = levenshtein_rec(S , T.tail()),
C = levenshtein_rec(S.tail(), T),
if A > B then
A := B
elseif A > C then
A := C
end,
Dist1 := A + 1
end,
Dist = Dist1.
 
%
% Mode directed tabling (from the Prolog version)
%
levenshtein_mode(S,T) = Dist =>
lev(S, T, Dist).
table (+,+,min)
lev([], [], 0).
lev([X|L], [X|R], D) :- !, lev(L, R, D).
lev([_|L], [_|R], D) :- lev(L, R, H), D is H+1.
lev([_|L], R, D) :- lev(L, R, H), D is H+1.
lev(L, [_|R], D) :- lev(L, R, H), D is H+1.</lang>
 
Output:
<pre>goal = go
iter = [kitten,sitting,3]
recu = [kitten,sitting,3]
mode = [kitten,sitting,3]
 
iter = [rosettacode,raisethysword,8]
recu = [rosettacode,raisethysword,8]
mode = [rosettacode,raisethysword,8]
 
iter = [levenshtein,levenstein,1]
recu = [levenshtein,levenstein,1]
mode = [levenshtein,levenstein,1]
 
iter = [saturday,sunday,3]
recu = [saturday,sunday,3]
mode = [saturday,sunday,3]
 
iter = [stop,tops,2]
recu = [stop,tops,2]
mode = [stop,tops,2]
 
iter = [saturday,sunday,3]
recu = [saturday,sunday,3]
mode = [saturday,sunday,3]</pre>
 
Benchmarking the methods with larger strings of random lengths (between 1 and 2000).
<lang Picat>go2 =>
_ = random2(),
Len = 2000,
S1 = generate_string(Len),
S2 = generate_string(Len),
println(len_s1=S1.len),
println(len_s2=S2.len),
nl,
println("iter:"),
time(L1 = levenshtein(S1,S2)),
println("rec:"),
time(L2 = levenshtein_rec(S1,S2)),
println("mode:"),
time(L3 = levenshtein_mode(S1,S2)),
println(distances=[iter=L1,rec=L2,mode=L3]),
nl.
 
 
%
% Generate a random string of max length MaxLen
%
generate_string(MaxLen) = S =>
Alpha = "abcdefghijklmnopqrstuvxyz",
Len = Alpha.length,
S := [Alpha[random(1,Len)] : _ in 1..random(1,MaxLen)].</lang>
 
Here is sample run. The mode directed version is clearly the fastest.
<pre>len_s1 = 1968
len_s2 = 1529
 
iter:
CPU time 9.039 seconds.
 
rec:
CPU time 10.439 seconds.
 
mode:
CPU time 1.402 seconds.
 
distances=[iter = 1607,rec = 1607,mode = 1607]
</pre>
 
=={{header|PicoLisp}}==
495

edits

Cookies help us deliver our services. By using our services, you agree to our use of cookies.