Jaro-Winkler distance: Difference between revisions

m
→‎{{header|Phix}}: use unix_dict()
m (→‎{{header|Phix}}: use unix_dict())
Line 1,354:
 
=={{header|Phix}}==
Uses jaro() from [[Jaro_distance#Phix]] (reproduced below for your convenience) and unixdict.txtthe standard unix_dict()
<!--<lang Phix>function jaroWinklerDist(string s, tphixonline)-->
<span style="color: #008080;">function</span> <span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">str1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">str2</span><span style="color: #0000FF;">)</span>
integer lm = min({length(s),length(t),4}),
<span style="color: #000000;">str1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">))</span>
l = sum(sq_eq(s[1..lm],t[1..lm]))
<span style="color: #000000;">str2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">))</span>
return (1-jaro(s, t))*(1-l*0.1)
<span style="color: #004080;">integer</span> <span style="color: #000000;">len1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">),</span>
end function
<span style="color: #000000;">len2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">match_distance</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">len1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">match_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">half_transposed</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">len1</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">len2</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000080;font-style:italic;">-- count the number of matches</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">m1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">false</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len1</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">m2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">false</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len1</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">match_distance</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">to</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">match_distance</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">str1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">m1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #000000;">match_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">match_count</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000080;font-style:italic;">-- count the number of half-transpositions</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len1</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">m1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">while</span> <span style="color: #008080;">not</span> <span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000000;">half_transposed</span> <span style="color: #0000FF;">+=</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">])</span>
<span style="color: #000000;">k</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">transpositions</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #000000;">half_transposed</span><span style="color: #0000FF;">/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">not_transposed</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">match_count</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">transpositions</span>
<span style="color: #000080;font-style:italic;">--
-- return the average of:
-- percentage/fraction of the first string matched,
-- percentage/fraction of the second string matched, and
-- percentage/fraction of matches that were not transposed.
--</span>
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">match_count</span><span style="color: #0000FF;">/</span><span style="color: #000000;">len1</span> <span style="color: #0000FF;">+</span>
<span style="color: #000000;">match_count</span><span style="color: #0000FF;">/</span><span style="color: #000000;">len2</span> <span style="color: #0000FF;">+</span>
<span style="color: #000000;">not_transposed</span><span style="color: #0000FF;">/</span><span style="color: #000000;">match_count</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">3</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
constant mispelt = {"accomodate", "definately", "goverment", "occured",
<span style="color: #008080;">function</span> <span style="color: #000000;">jaroWinklerDist</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">)</span>
"publically", "recieve", "seperate", "untill", "wich"},
<span style="color: #004080;">integer</span> <span style="color: #000000;">lm</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">({</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">),</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">),</span><span style="color: #000000;">4</span><span style="color: #0000FF;">}),</span>
words = get_text(join_path({"demo","unixdict.txt"}),GT_LF_STRIPPED),
<span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_eq</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">lm</span><span style="color: #0000FF;">],</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">lm</span><span style="color: #0000FF;">]))</span>
nom = length(mispelt),
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">-</span><span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">))*(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">-</span><span style="color: #000000;">l</span><span style="color: #0000FF;">*</span><span style="color: #000000;">0.1</span><span style="color: #0000FF;">)</span>
now = length(words)
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
sequence jwds = repeat(0,now)
for m=1 to nom do
<span style="color: #008080;">constant</span> <span style="color: #000000;">mispelt</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"accomodate"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"definately"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"goverment"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"occured"</span><span style="color: #0000FF;">,</span>
string ms = mispelt[m]
<span style="color: #008000;">"publically"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"recieve"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"seperate"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"untill"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"wich"</span><span style="color: #0000FF;">},</span>
printf(1,"\nMisspelt word: %s :\n", ms)
<span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">unix_dict</span><span style="color: #0000FF;">()</span>
for w=1 to now do
<span style="color: #004080;">sequence</span> <span style="color: #000000;">jwds</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">))</span>
jwds[w] = jaroWinklerDist(ms,words[w])
<span style="color: #008080;">for</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">mispelt</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
end for
<span style="color: #004080;">string</span> <span style="color: #000000;">ms</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">mispelt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">m</span><span style="color: #0000FF;">]</span>
sequence tags = custom_sort(jwds,tagset(now))
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\nMisspelt word: %s :\n"</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">ms</span><span style="color: #0000FF;">)</span>
for j=1 to 6 do
<span style="color: #008080;">for</span> <span style="color: #000000;">w</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
integer tj = tags[j]
<span style="color: #000000;">jwds</span><span style="color: #0000FF;">[</span><span style="color: #000000;">w</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">jaroWinklerDist</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ms</span><span style="color: #0000FF;">,</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">w</span><span style="color: #0000FF;">])</span>
-- if jwds[tj]>0.15 then exit end if
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
printf(1,"%0.4f %s\n", {jwds[tj], words[tj]})
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">custom_sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">jwds</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)))</span>
end for
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">6</span> <span style="color: #008080;">do</span>
end for</lang>
<span style="color: #004080;">integer</span> <span style="color: #000000;">tj</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span>
<span style="color: #000080;font-style:italic;">-- if jwds[tj]&gt;0.15 then exit end if</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%0.4f %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">jwds</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tj</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tj</span><span style="color: #0000FF;">]})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end for<!--</lang>-->
Output identical to <del>Go/Wren</del> Algol68
 
7,806

edits