Jaro-Winkler distance: Difference between revisions
m
→{{header|Phix}}: use unix_dict()
m (→{{header|Phix}}: use unix_dict()) |
|||
Line 1,354:
=={{header|Phix}}==
Uses jaro() from [[Jaro_distance#Phix]] (reproduced below for your convenience) and
<!--<lang Phix>
<span style="color: #008080;">function</span> <span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">str1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">str2</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">str1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">))</span>
<span style="color: #000000;">str2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">))</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">len1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">len2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">match_distance</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">len1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">match_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">half_transposed</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">len1</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">len2</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000080;font-style:italic;">-- count the number of matches</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">m1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">false</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len1</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">m2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">false</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len1</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">match_distance</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">to</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">match_distance</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">str1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">m1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
<span style="color: #000000;">match_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">match_count</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000080;font-style:italic;">-- count the number of half-transpositions</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len1</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">m1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">while</span> <span style="color: #008080;">not</span> <span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000000;">half_transposed</span> <span style="color: #0000FF;">+=</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">])</span>
<span style="color: #000000;">k</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">transpositions</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #000000;">half_transposed</span><span style="color: #0000FF;">/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">not_transposed</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">match_count</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">transpositions</span>
<span style="color: #000080;font-style:italic;">--
-- return the average of:
-- percentage/fraction of the first string matched,
-- percentage/fraction of the second string matched, and
-- percentage/fraction of matches that were not transposed.
--</span>
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">match_count</span><span style="color: #0000FF;">/</span><span style="color: #000000;">len1</span> <span style="color: #0000FF;">+</span>
<span style="color: #000000;">match_count</span><span style="color: #0000FF;">/</span><span style="color: #000000;">len2</span> <span style="color: #0000FF;">+</span>
<span style="color: #000000;">not_transposed</span><span style="color: #0000FF;">/</span><span style="color: #000000;">match_count</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">3</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">jaroWinklerDist</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">lm</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">({</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">),</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">t</span><span style="color: #0000FF;">),</span><span style="color: #000000;">4</span><span style="color: #0000FF;">}),</span>
<span style="color: #000000;">l</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_eq</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">lm</span><span style="color: #0000FF;">],</span><span style="color: #000000;">t</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">lm</span><span style="color: #0000FF;">]))</span>
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">-</span><span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">t</span><span style="color: #0000FF;">))*(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">-</span><span style="color: #000000;">l</span><span style="color: #0000FF;">*</span><span style="color: #000000;">0.1</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">mispelt</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #008000;">"accomodate"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"definately"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"goverment"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"occured"</span><span style="color: #0000FF;">,</span>
<span style="color: #008000;">"publically"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"recieve"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"seperate"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"untill"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"wich"</span><span style="color: #0000FF;">},</span>
<span style="color: #000000;">words</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">unix_dict</span><span style="color: #0000FF;">()</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">jwds</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">))</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">mispelt</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">ms</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">mispelt</span><span style="color: #0000FF;">[</span><span style="color: #000000;">m</span><span style="color: #0000FF;">]</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"\nMisspelt word: %s :\n"</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">ms</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">w</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">jwds</span><span style="color: #0000FF;">[</span><span style="color: #000000;">w</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">jaroWinklerDist</span><span style="color: #0000FF;">(</span><span style="color: #000000;">ms</span><span style="color: #0000FF;">,</span><span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">w</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">tags</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">custom_sort</span><span style="color: #0000FF;">(</span><span style="color: #000000;">jwds</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">words</span><span style="color: #0000FF;">)))</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">6</span> <span style="color: #008080;">do</span>
end for</lang>▼
<span style="color: #004080;">integer</span> <span style="color: #000000;">tj</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">tags</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span>
<span style="color: #000080;font-style:italic;">-- if jwds[tj]>0.15 then exit end if</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%0.4f %s\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">jwds</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tj</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">words</span><span style="color: #0000FF;">[</span><span style="color: #000000;">tj</span><span style="color: #0000FF;">]})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
Output identical to <del>Go/Wren</del> Algol68
|