Jaro similarity: Difference between revisions

m
→‎{{header|Phix}}: added syntax colouring the hard way
m (Added Delphi reference to Pascal code)
m (→‎{{header|Phix}}: added syntax colouring the hard way)
Line 2,095:
 
=={{header|Phix}}==
<!--<lang Phix>function jaro(string str1, str2)-->
<span style="color: #008080;">function</span> <span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">str1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">str2</span><span style="color: #0000FF;">)</span>
str1 = trim(upper(str1))
<span style="color: #000000;">str1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">))</span>
str2 = trim(upper(str2))
<span style="color: #000000;">str2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">))</span>
integer len1 = length(str1),
<span style="color: #004080;">integer</span> <span style="color: #000000;">len1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">),</span>
len2 = length(str2),
<span style="color: #000000;">len2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">),</span>
match_distance = floor(max(len1,len2)/2)-1,
<span style="color: #000000;">match_distance</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">len1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
match_count = 0,
<span style="color: #000000;">match_count</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
half_transposed = 0
<span style="color: #000000;">half_transposed</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
 
if len1==0 then return len2==0 end if
<span style="color: #008080;">if</span> <span style="color: #000000;">len1</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">len2</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
 
-- count the number of matches
<span style="color: #000080;font-style:italic;">-- count the number of matches</span>
sequence m1 = repeat(false,len1),
<span style="color: #004080;">sequence</span> <span style="color: #000000;">m1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">false</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len1</span><span style="color: #0000FF;">),</span>
m2 = repeat(false,len2)
<span style="color: #000000;">m2</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #004600;">false</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">)</span>
for i=1 to len1 do
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len1</span> <span style="color: #008080;">do</span>
for k=max(1,i-match_distance)
<span style="color: #008080;">for</span> <span style="color: #000000;">k</span><span style="color: #0000FF;">=</span><span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">match_distance</span><span style="color: #0000FF;">)</span>
to min(len2,i+match_distance) do
<span style="color: #008080;">to</span> <span style="color: #7060A8;">min</span><span style="color: #0000FF;">(</span><span style="color: #000000;">len2</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">match_distance</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
if not m2[k] then
<span style="color: #008080;">if</span> <span style="color: #008080;">not</span> <span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
if str1[i]=str2[k] then
<span style="color: #008080;">if</span> <span style="color: #000000;">str1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]=</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
m1[i] = true
<span style="color: #000000;">m1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
m2[k] = true
<span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">true</span>
match_count += 1
<span style="color: #000000;">match_count</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
exit
end if <span style="color: #008080;">exit</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
if match_count==0 then return 0 end if
<span style="color: #008080;">if</span> <span style="color: #000000;">match_count</span><span style="color: #0000FF;">==</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
-- count the number of half-transpositions
<span style="color: #000080;font-style:italic;">-- count the number of half-transpositions</span>
integer k = 1
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
for i=1 to len1 do
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len1</span> <span style="color: #008080;">do</span>
if m1[i] then
<span style="color: #008080;">if</span> <span style="color: #000000;">m1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span>
while not m2[k] do k += 1 end while
<span style="color: #008080;">while</span> <span style="color: #008080;">not</span> <span style="color: #000000;">m2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
half_transposed += (str1[i]!=str2[k])
<span style="color: #000000;">half_transposed</span> <span style="color: #0000FF;">+=</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">str2</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">])</span>
k += 1
<span style="color: #000000;">k</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
integer transpositions = floor(half_transposed/2),
<span style="color: #004080;">integer</span> <span style="color: #000000;">transpositions</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">(</span><span style="color: #000000;">half_transposed</span><span style="color: #0000FF;">/</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
not_transposed = match_count - transpositions
<span style="color: #000000;">not_transposed</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">match_count</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">transpositions</span>
--
<span style="color: #000080;font-style:italic;">--
-- return the average of:
-- return the average of:
-- percentage/fraction of the first string matched,
-- percentage/fraction of the secondfirst string matched, and
-- percentage/fraction of matchesthe thatsecond werestring notmatched, transposed.and
-- percentage/fraction of matches that were not transposed.
--
--</span>
return (match_count/len1 +
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">match_count</span><span style="color: #0000FF;">/</span><span style="color: #000000;">len1</span> <span style="color: #0000FF;">+</span>
match_count/len2 +
<span style="color: #000000;">match_count</span><span style="color: #0000FF;">/</span><span style="color: #000000;">len2</span> <span style="color: #0000FF;">+</span>
not_transposed/match_count)/3
<span style="color: #000000;">not_transposed</span><span style="color: #0000FF;">/</span><span style="color: #000000;">match_count</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">3</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
 
constant testcouples = {{"CRATE","TRACE"},
<span style="color: #008080;">constant</span> <span style="color: #000000;">testcouples</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{{</span><span style="color: #008000;">"CRATE"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"TRACE"</span><span style="color: #0000FF;">},</span>
{"JONES","JOHNSON"},
<span style="color: #0000FF;">{</span><span style="color: #008000;">"JONES"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"JOHNSON"</span><span style="color: #0000FF;">},</span>
{"ABCVWXYZ","CABVWXYZ"},
<span style="color: #0000FF;">{</span><span style="color: #008000;">"ABCVWXYZ"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"CABVWXYZ"</span><span style="color: #0000FF;">},</span>
{"DWAYNE","DUANE"},
<span style="color: #0000FF;">{</span><span style="color: #008000;">"DWAYNE"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"DUANE"</span><span style="color: #0000FF;">},</span>
{"MARTHA", "MARHTA"},
<span style="color: #0000FF;">{</span><span style="color: #008000;">"MARTHA"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"MARHTA"</span><span style="color: #0000FF;">},</span>
{"DIXON", "DICKSONX"},
<span style="color: #0000FF;">{</span><span style="color: #008000;">"DIXON"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"DICKSONX"</span><span style="color: #0000FF;">},</span>
{"JELLYFISH", "SMELLYFISH"}}
<span style="color: #0000FF;">{</span><span style="color: #008000;">"JELLYFISH"</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">"SMELLYFISH"</span><span style="color: #0000FF;">}}</span>
 
for i=1 to length(testcouples) do
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">testcouples</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
string {s1, s2} = testcouples[i]
<span style="color: #004080;">string</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">s1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">s2</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">testcouples</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
printf(1,"%f <== jaro(\"%s\", \"%s\")\n",{jaro(s1,s2),s1,s2})
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%f &lt;== jaro(\"%s\", \"%s\")\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s2</span><span style="color: #0000FF;">),</span><span style="color: #000000;">s1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s2</span><span style="color: #0000FF;">})</span>
end for</lang>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</lang>-->
{{out}}
<pre>
7,806

edits