Jaro similarity: Difference between revisions

Add C# implementation
(Add C# implementation)
 
(23 intermediate revisions by 15 users not shown)
Line 1:
{{task}}
 
The Jaro distance is a measure of edit distance between two strings; its inverse, called the ''Jaro similarity'', is a measure of two strings' similarity: the higher the value, the more similar the strings are. The score is normalized such that   '''0'''   equates to no similarities and   '''1'''   is an exact match.
The Jaro distance is a measure of similarity between two strings.
 
The higher the Jaro distance for two strings is, the more similar the strings are.
 
The score is normalized such that   '''0'''   equates to no similarity and   '''1'''   is an exact match.
 
 
;;Definition
 
The Jaro distancesimilarity &nbsp; <math>d_j</math> &nbsp; of two given strings &nbsp; <math>s_1</math> &nbsp; and &nbsp; <math>s_2</math> &nbsp; is
 
: <math>d_j = \left\{
Line 24 ⟶ 20:
 
 
Two characters from &nbsp; <math>s_1</math> &nbsp; and &nbsp; <math>s_2</math> &nbsp; respectively, are considered ''matching'' only if they are the same and not farther apart than &nbsp; <math>\left\lfloor\frac{\max(|s_1|,|s_2|)}{2}\right\rfloor-1</math> characters.
 
Each character of &nbsp; <math>s_1</math> &nbsp; is compared with all its matching
characters in &nbsp; <math>s_2</math>.
 
Each character of &nbsp; <math>s_1</math> &nbsp; is compared with all its matching characters in &nbsp; <math>s_2</math>. Each difference in position is half a ''transposition''; that is, the number of transpositions is half the number of characters which are common to the two strings but occupy different positions in each one.
The number of matching (but different sequence order) characters
divided by 2 defines the number of ''transpositions''.
 
 
Line 50 ⟶ 42:
;Task
 
Implement the Jaro-distance algorithm and show the distancessimilarity scores for each of the following pairs:
 
* ("MARTHA", "MARHTA")
Line 64 ⟶ 56:
{{trans|Python}}
 
<langsyntaxhighlight lang="11l">F jaro(s, t)
V s_len = s.len
V t_len = t.len
Line 111 ⟶ 103:
(‘DIXON’, ‘DICKSONX’),
(‘JELLYFISH’, ‘SMELLYFISH’)]
print(‘jaro('#.', '#.') = #.10’.format(s, t, jaro(s, t)))</langsyntaxhighlight>
 
{{out}}
Line 121 ⟶ 113:
 
=={{header|Action!}}==
<langsyntaxhighlight lang="action">
DEFINE STRING="CHAR ARRAY" ; sys.act
DEFINE ASCII_SpaceBar="32"
Line 152 ⟶ 144:
IF Max<(I-L) THEN Max=I-L FI
Min=S2
IF Min>(I+L-1) THEN Min=I+L-1 FI
FOR K=Max TO Min DO
IF str1(I)=str2(K) THEN
Line 191 ⟶ 183:
PUTE()
 
SCopy(Word_1,"MARTHALIGITA") SCopy(Word_2,"MARHTALIGA")
PrintF("%S - %S%E",Word_1,Word_2)
result=JaroDistance(Word_1,Word_2)
PrintF("Jaro Distance=%U%E%E",result)
 
SCopy(Word_1,"DIXONZEILANE") SCopy(Word_2,"DICKSONXZEIDONE")
PrintF("%S - %S%E",Word_1,Word_2)
result=JaroDistance(Word_1,Word_2)
Line 206 ⟶ 198:
PrintF("Jaro Distance=%U%E%E",result)
RETURN
</syntaxhighlight>
</lang>
{{out}}
<pre>MARTHA, MARHTA: 94
Line 214 ⟶ 206:
 
=={{header|Ada}}==
<langsyntaxhighlight Adalang="ada">with Ada.Text_IO;
 
procedure Jaro_Distances is
Line 304 ⟶ 296:
Show_Jaro ("JELLYFISH", "SMELLYFISH");
Show_Jaro (S1 (3 .. 8), S1 (13 .. 18));
end Jaro_Distances;</langsyntaxhighlight>
 
{{out}}
Line 314 ⟶ 306:
=={{header|ARM Assembly}}==
{{works with|as|Raspberry Pi}}
<syntaxhighlight lang="arm assembly">
<lang ARM Assembly>
 
/* ARM assembly Raspberry PI */
Line 668 ⟶ 660:
 
 
</syntaxhighlight>
</lang>
 
=={{header|Arturo}}==
 
<syntaxhighlight lang="rebol">loop [
["MARTHA" "MARHTA"]
["DIXON" "DICKSONX"]
["JELLYFISH" "SMELLYFISH"]
] 'pair ->
print [pair "-> Jaro similarity:" round.to: 3 jaro first pair last pair]</syntaxhighlight>
 
{{out}}
 
<pre>[MARTHA MARHTA] -> Jaro similarity: 0.944
[DIXON DICKSONX] -> Jaro similarity: 0.767
[JELLYFISH SMELLYFISH] -> Jaro similarity: 0.896</pre>
 
=={{header|AWK}}==
<syntaxhighlight lang="awk">
<lang AWK>
# syntax: GAWK -f JARO_DISTANCE.AWK
BEGIN {
Line 724 ⟶ 731:
function max(x,y) { return((x > y) ? x : y) }
function min(x,y) { return((x < y) ? x : y) }
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 732 ⟶ 739:
0.8962963 'JELLYFISH' 'SMELLYFISH'
</pre>
 
=={{header|BBC BASIC}}==
{{works with|BBC BASIC for Windows}}
<syntaxhighlight lang="bbcbasic"> PRINT "Jaro similarity between the two strings:"
PROCDescribeJaro("MARTHA", "MARHTA")
PROCDescribeJaro("DIXON", "DICKSONX")
PROCDescribeJaro("JELLYFISH", "SMELLYFISH")
PROCDescribeJaro("DWAYNE", "DUANE")
PROCDescribeJaro("a", "b")
PROCDescribeJaro("", "")
END
 
DEF FNMax(a, b)=(a + b + ABS(a - b)) / 2
DEF FNMin(a, b)=(a + b - ABS(a - b)) / 2
 
DEF PROCDescribeJaro(word1$, word2$)
LOCAL d%, i%, j%, k%, l1%, l2%, m%, t%
 
PRINT " '" word1$ "' and '" word2$ "'" TAB(30) "= ";
IF word1$ == "" IF word2$ == "" PRINT;1 : ENDPROC
l1%=LENword1$
l2%=LENword2$
IF l1% < l2% SWAP l1%, l2% SWAP word1$, word2$
 
d%=l1% / 2 - 1
j%=1
FOR i%=1 TO l2%
IF MID$(word2$, i%, 1) == MID$(word1$, j%, 1) THEN
m%+=1
MID$(word1$, j%)=" "
ELSE
FOR k%=FNMax(1, i% - d%) TO FNMin(l1%, i% + d%)
IF MID$(word2$, i%, 1) == MID$(word1$, k%, 1) THEN
t%+=1
m%+=1
MID$(word1$, k%)=" "
IF k% > j% j%=k%
ENDIF
NEXT
ENDIF
j%+=1
NEXT
IF m% == 0 THEN
PRINT;0
ELSE
PRINT;(m% / l2% + m% / l1% + ((m% - (t% >> 1)) / m%)) / 3
ENDIF
ENDPROC</syntaxhighlight>
{{out}}
<pre>Jaro similarity between the two strings:
'MARTHA' and 'MARHTA' = 0.944444444
'DIXON' and 'DICKSONX' = 0.766666667
'JELLYFISH' and 'SMELLYFISH' = 0.896296296
'DWAYNE' and 'DUANE' = 0.822222222
'a' and 'b' = 0
'' and '' = 1</pre>
 
=={{header|C}}==
<langsyntaxhighlight Clang="c">#include <stdlib.h>
#include <string.h>
#include <ctype.h>
Line 823 ⟶ 886:
printf("%f\n", jaro("DIXON", "DICKSONX"));
printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"));
}</langsyntaxhighlight>
{{out}}
<pre>
Line 829 ⟶ 892:
0.766667
0.896296
</pre>
 
=={{header|C#}}==
{{trans|Java}}
<syntaxhighlight lang="C#">
using System;
 
public class JaroDistance {
public static double Jaro(string s, string t) {
int s_len = s.Length;
int t_len = t.Length;
 
if (s_len == 0 && t_len == 0) return 1;
 
int match_distance = Math.Max(s_len, t_len) / 2 - 1;
 
bool[] s_matches = new bool[s_len];
bool[] t_matches = new bool[t_len];
 
int matches = 0;
int transpositions = 0;
 
for (int i = 0; i < s_len; i++) {
int start = Math.Max(0, i - match_distance);
int end = Math.Min(i + match_distance + 1, t_len);
 
for (int j = start; j < end; j++) {
if (t_matches[j]) continue;
if (s[i] != t[j]) continue;
s_matches[i] = true;
t_matches[j] = true;
matches++;
break;
}
}
 
if (matches == 0) return 0;
 
int k = 0;
for (int i = 0; i < s_len; i++) {
if (!s_matches[i]) continue;
while (!t_matches[k]) k++;
if (s[i] != t[k]) transpositions++;
k++;
}
 
return (((double)matches / s_len) +
((double)matches / t_len) +
(((double)matches - transpositions / 2.0) / matches)) / 3.0;
}
 
public static void Main(string[] args) {
Console.WriteLine(Jaro("MARTHA", "MARHTA"));
Console.WriteLine(Jaro("DIXON", "DICKSONX"));
Console.WriteLine(Jaro("JELLYFISH", "SMELLYFISH"));
}
}
</syntaxhighlight>
{{out}}
<pre>
0.944444444444445
0.766666666666667
0.896296296296296
 
</pre>
 
=={{header|C++}}==
{{trans|C}}
<langsyntaxhighlight lang="cpp">#include <algorithm>
#include <iostream>
#include <string>
Line 881 ⟶ 1,008:
cout << jaro("JELLYFISH", "SMELLYFISH") << endl;
return 0;
}</langsyntaxhighlight>
 
=={{header|Clojure}}==
<langsyntaxhighlight lang="clojure">
(ns test-project-intellij.core
(:gen-class))
Line 966 ⟶ 1,093:
(println (jaro "DIXON" "DICKSONX"))
(println (jaro "JELLYFISH" "SMELLYFISH"))
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 973 ⟶ 1,100:
0.8962963
</pre>
 
=={{header|CLU}}==
{{trans|C}}
<syntaxhighlight lang="clu">max = proc [T: type] (a, b: T) returns (T)
where T has lt: proctype (T,T) returns (bool)
if a<b then return(b) else return(a) end
end max
 
min = proc [T: type] (a, b: T) returns (T)
where T has lt: proctype (T,T) returns (bool)
if a<b then return(a) else return(b) end
end min
 
jaro = proc (s1, s2: string) returns (real)
s1_len: int := string$size(s1)
s2_len: int := string$size(s2)
if s1_len = 0 & s2_len = 0 then return(1.0)
elseif s1_len = 0 | s2_len = 0 then return(0.0)
end
 
dist: int := max[int](s1_len, s2_len)/2 - 1
s1_match: array[bool] := array[bool]$fill(1,s1_len,false)
s2_match: array[bool] := array[bool]$fill(1,s2_len,false)
 
matches: real := 0.0
transpositions: real := 0.0
for i: int in int$from_to(1, s1_len) do
start: int := max[int](1, i-dist)
end_: int := min[int](i+dist, s2_len)
for k: int in int$from_to(start, end_) do
if s2_match[k] then continue end
if s1[i] ~= s2[k] then continue end
s1_match[i] := true
s2_match[k] := true
matches := matches + 1.0
break
end
end
 
if matches=0.0 then return(0.0) end
k: int := 1
for i: int in int$from_to(1, s1_len) do
if ~s1_match[i] then continue end
while ~s2_match[k] do k := k + 1 end
if s1[i] ~= s2[k] then
transpositions := transpositions + 1.0
end
k := k+1
end
 
transpositions := transpositions / 2.0
return( ((matches / real$i2r(s1_len)) +
(matches / real$i2r(s2_len)) +
((matches - transpositions) / matches)) / 3.0)
end jaro
 
start_up = proc ()
po: stream := stream$primary_output()
stream$putl(po, f_form(jaro("MARTHA", "MARHTA"), 1, 6))
stream$putl(po, f_form(jaro("DIXON", "DICKSONX"), 1, 6))
stream$putl(po, f_form(jaro("JELLYFISH", "SMELLYFISH"), 1, 6))
end start_up</syntaxhighlight>
{{out}}
<pre>0.944444
0.766667
0.896296</pre>
 
=={{header|COBOL}}==
{{trans|Java}}
<langsyntaxhighlight lang="cobol">
identification division.
program-id. JaroDistance.
Line 1,075 ⟶ 1,270:
((matches - transpositions / 2) / matches)) / 3
.
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 1,085 ⟶ 1,280:
=={{header|CoffeeScript}}==
{{trans|C++}}
<langsyntaxhighlight lang="coffeescript">jaro = (s1, s2) ->
l1 = s1.length
l2 = s2.length
Line 1,114 ⟶ 1,309:
console.log jaro "MARTHA", "MARHTA"
console.log jaro "DIXON", "DICKSONX"
console.log jaro "JELLYFISH", "SMELLYFISH"</langsyntaxhighlight>
{{Out}}
<pre>0.9444444444444445
Line 1,122 ⟶ 1,317:
=={{header|Crystal}}==
{{trans|Ruby}}
<langsyntaxhighlight lang="ruby">def jaro(s, t)
return 1.0 if s == t
Line 1,166 ⟶ 1,361:
JELLYFISH SMELLYFISH
).each_slice(2) { |(s ,t)| puts "jaro(#{s}, #{t}) = #{"%.10f" % jaro(s, t)}" }
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 1,176 ⟶ 1,371:
=={{header|D}}==
{{trans|Kotlin}}
<langsyntaxhighlight Dlang="d">auto jaro(in string s1, in string s2) {
int s1_len = cast(int) s1.length;
int s2_len = cast(int) s2.length;
Line 1,215 ⟶ 1,410:
writeln(jaro( "DIXON", "DICKSONX"));
writeln(jaro("JELLYFISH", "SMELLYFISH"));
}</langsyntaxhighlight>
<pre>0.944444
0.766667
Line 1,226 ⟶ 1,421:
{{trans|Ruby}}
{{works with|Elixir|1.3}}
<langsyntaxhighlight lang="elixir">defmodule Jaro do
def distance(s, t) when is_binary(s) and is_binary(t), do:
distance(to_charlist(s), to_charlist(t))
Line 1,283 ⟶ 1,478:
|> Enum.each(fn [s,t] ->
:io.format "jaro(~s, ~s) = ~.10f~n", [inspect(s), inspect(t), Jaro.distance(s, t)]
end)</langsyntaxhighlight>
 
{{out}}
Line 1,293 ⟶ 1,488:
 
Elixir has a built-in function (<code>String.jaro_distance</code>).
 
=={{header|Emacs Lisp}}==
{{trans|Python}}
<syntaxhighlight lang="lisp">
(let ()
(defun jaro (s1 s2)
(let (mw mflags1 mflags2 fn-reset-mflags fn-reset-all-mflags fn-cnt-trans)
(setq mflags1 (make-vector (length s1) nil))
(setq mflags2 (make-vector (length s2) nil))
(setq mw (1- (/ (max (length s1) (length s2)) 2)))
(setq fn-reset-mflags
(lambda (idx)
(let ((start (max 0 (- idx mw)))
(end (min (1- (length s2)) (+ idx mw))))
(cl-loop for i from start to end do
(when (and (not (elt mflags1 idx))
(not (elt mflags2 i)))
(when (equal (elt s1 idx) (elt s2 i))
(aset mflags1 idx 't)
(aset mflags2 i 't) ) ) ) ) ) )
(setq fn-reset-all-mflags
(lambda ()
(dotimes (idx (length s1))
(funcall fn-reset-mflags idx) ) ) )
(setq fn-cnt-trans
(lambda ()
(let ((cur2 0) (transposition 0))
(dotimes (cur1 (length s1))
(when (aref mflags1 cur1)
(while (not (aref mflags2 cur2))
(setq cur2 (1+ cur2)) )
(when (not (equal (aref s1 cur1)
(aref s2 cur2)))
(setq transposition (1+ transposition)) )
(setq cur2 (1+ cur2))
)
)
transposition ) ) )
(funcall fn-reset-all-mflags)
(let ((m (seq-count (lambda (f) f) mflags1))
(tr (funcall fn-cnt-trans)))
;;(message "matches: %s, transposition: %s, |s1|: %d |s2|: %d" m tr (length s1) (length s2))
(if (= m 0)
0
(progn (/ (+ (/ (float m) (length s1)) (/ (float m) (length s2)) (/ (float (- m (/ (float tr) 2))) m) ) 3))
) ) ) )
 
(let ((params '(("MARTHA" "MARHTA")
("DIXON" "DICKSONX")
("JELLYFISH" "SMELLYFISH"))))
(dolist (p params)
(message "jaro(%s, %s) = %f"
(nth 0 p) (nth 1 p)
(jaro (nth 0 p) (nth 1 p)))
)
)
)
 
</syntaxhighlight>
 
{{out}}
<pre>
jaro(MARTHA, MARHTA) = 0.944444
jaro(DIXON, DICKSONX) = 0.766667
jaro(JELLYFISH, SMELLYFISH) = 0.896296
</pre>
 
=={{header|F_Sharp|F#}}==
<langsyntaxhighlight lang="fsharp">
// Calculate Jaro distance of 2 strings. Nigel Galloway: August 7th., 2020
let fG n g=Seq.map2(fun n g->if g=1 then Some n else None) n g |> Seq.choose id
Line 1,308 ⟶ 1,571:
printfn "DIXON DICKSONX->%f" (J "DIXON" "DICKSONX")
printfn "JELLYFISH SMELLYFISH->%f" (J "JELLYFISH" "SMELLYFISH")
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 1,317 ⟶ 1,580:
=={{header|Factor}}==
{{works with|Factor|0.99 development release 2019-03-17+}}
<langsyntaxhighlight lang="factor">USING: formatting fry generalizations kernel locals make math
math.order sequences sequences.extras ;
IN: rosetta-code.jaro-distance
Line 1,350 ⟶ 1,613:
] 2 4 mnapply ;
 
MAIN: jaro-demo</langsyntaxhighlight>
{{out}}
<pre>
Line 1,360 ⟶ 1,623:
 
=={{header|FreeBASIC}}==
<langsyntaxhighlight lang="freebasic">' version 09-10-2016
' compile with: fbc -s console
 
Line 1,416 ⟶ 1,679:
Print : Print "hit any key to end program"
Sleep
End</langsyntaxhighlight>
{{out}}
<pre> jaro (MARTHA, MARHTA) = 0.9444444444444444
Line 1,423 ⟶ 1,686:
 
=={{header|Go}}==
<langsyntaxhighlight lang="go">package main
 
import "fmt"
Line 1,491 ⟶ 1,754:
fmt.Printf("%f\n", jaro("DIXON", "DICKSONX"))
fmt.Printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"))
}</langsyntaxhighlight>
{{out}}
<pre>
Line 1,500 ⟶ 1,763:
 
=={{header|Haskell}}==
<langsyntaxhighlight Haskelllang="haskell">import Data.List (elemIndex, intercalate, sortOn)
import Data.Maybe (mapMaybe)
import Text.Printf (printf)
Line 1,557 ⟶ 1,820:
("DIXON", "DICKSONX"),
("JELLYFISH", "SMELLYFISH")
]</langsyntaxhighlight>
{{Out}}
<pre>DWAYNE -> DUANE -> 0.822
Line 1,570 ⟶ 1,833:
{{trans|Kotlin}}
{{works with|Neko|2.1.0}}
<langsyntaxhighlight Haxelang="haxe">class Jaro {
private static function jaro(s1: String, s2: String): Float {
var s1_len = s1.length;
Line 1,608 ⟶ 1,871:
Sys.println(jaro("JELLYFISH", "SMELLYFISH"));
}
}</langsyntaxhighlight>
{{Out}}
<pre>0.944444444444445
Line 1,615 ⟶ 1,878:
 
=={{header|IS-BASIC}}==
<langsyntaxhighlight ISlang="is-BASICbasic">100 PROGRAM "Jaro.bas"
110 DO
120 READ IF MISSING EXIT DO:A$,B$
Line 1,651 ⟶ 1,914:
440 DATA DIXON,DICKSONX
450 DATA JELLYFISH,SMELLYFISH
460 DATA DWAYNE,DUANE</langsyntaxhighlight>
 
=={{header|J}}==
Line 1,657 ⟶ 1,920:
Implementation:
 
<langsyntaxhighlight Jlang="j">jaro=: dyad define
d=. ((x >.&# y)%2)-1
e=. (x =/y) * d >: |x -/&(i.@#) y
Line 1,667 ⟶ 1,930:
s2=. #y
((m%s1)+(m%s2)+(m-t)%m)%3
)</langsyntaxhighlight>
 
Task examples:
 
<langsyntaxhighlight Jlang="j"> 'MARTHA' jaro 'MARHTA'
0.944444
'DIXON' jaro 'DICKSONX'
0.766667
'JELLYFISH' jaro 'SMELLYFISH'
0.896296</langsyntaxhighlight>
 
=={{header|Java}}==
<langsyntaxhighlight lang="java">public class JaroDistance {
public static double jaro(String s, String t) {
int s_len = s.length();
Line 1,728 ⟶ 1,991:
System.out.println(jaro("JELLYFISH", "SMELLYFISH"));
}
}</langsyntaxhighlight>
{{out}}
<pre>
Line 1,737 ⟶ 2,000:
 
=={{header|jq}}==
{{trans|Wren}}
{{works with|jq}}
def jaro(s1; s2):
'''Works with gojq, the Go implementation of jq'''
<syntaxhighlight lang="jq">def jaro($s1; $s2):
def when(p; q): if p then q else . end;
($s1|length) as $le1
| (s1$s2|length) as $len1le2
| if |$le1 (s2|length)== as0 and $len2le2 == 0 then 1
|elif (( [$len1,le1 $len2]== |0 max )or /$le2 2== -0 1)then as $match_standard0
else ((((if $le2 > $le1 then $le2 else $le1 end) / 2) | floor) - 1) as $dist
| {m:0, p:0}
| {matches: 0, matches2: [], matches2: [], transpos: 0 }
| reduce range(0; $len1) as $l1
| reduce range(.0; s1[$l1:$l1+1]le1) as $t1i (.;
(($i - $dist) | reduceif . < range(0; $len2 then 0 else . end) as $l2start
| (($i + $dist + 1) | if (.; s2[> $l2:le2 then $l2+1]le2 else . end) as $t2stop
| when(.k $t1 == $t2;start
| until(.k >= $stop;
when( ($l2-$l1) <= $match_standard and ($l1-$l2) <= $match_standard;
if (.matches2[.k] or $s1[$i:$i+1] != $s2[.mk:.k+=1]) |not
then | when(.matches1[$l2i] == $l1; .p += 1) ) ) )true
| ((.m-matches2[.p)/2)k] as= $ttrue
| ( (.m/$len1) + (.m/$len2) + (( | .m-$t)/.m)matches ) /+= 31
; | .k = $stop
else .k += 1
end) )
jaro("MARTHA";"MARHTA")
| if .matches == 0 then 0
, jaro("DIXON"; "DICKSONX")
else .k = 0
, jaro("JELLYFISH";"SMELLYFISH")
| reduce range(0; $le1) as $i (.;
if .matches1[$i]
then until(.k >= $le2 or .matches2[.k]; .k += 1)
| if .k < $le2 and ($s1[$i:$i+1] != $s2[.k:.k+1]) then .transpos += 1 else . end
| .k += 1
else .
end )
| .transpos /= 2
| (.matches/$le1 + .matches/$le2 + ((.matches - .transpos)/.matches)) / 3
end
end ;
 
def task:
Output:
[["MARTHA","MARHTA"],
["DIXON", "DICKSONX"],
["JELLYFISH","SMELLYFISH"],
["ABC","DEF"]][]
| (jaro(.[0]; .[1]) * 1000 | floor / 1000) as $d
| "jaro(\(.[0]); \(.[1])) => \($d)";
 
task</syntaxhighlight>
{{out}}
<pre>
jaro(MARTHA; MARHTA) => 0.944
0.9444444444444444
jaro(DIXON; DICKSONX) => 0.766
0.6833333333333332
jaro(JELLYFISH; SMELLYFISH) => 0.896
0.8870370370370371
jaro(ABC; DEF) => 0
</pre>
 
=={{header|Julia}}==
{{works with|Julia|01.65}}
<langsyntaxhighlight lang="julia">function jarodistance(s1::AbstractString, s2::AbstractString)
m = t = p = l1 = l2 = 0
matchstd = max(length(s1), length(s2)) / 2 - 1
for i(i1, c1) in enumerate(s1[1:end])
l1for +=(i2, 1c2) in enumerate(s2)
(c1 == c2) && (abs(i2 - i1) ≤ matchstd) && (m += 1)
l2 = 0
for j in s2[ (c1 == c2) && (i2 == i1) && (p += 1:end])
l2 += 1
if i == j
if abs(l2 - l1) ≤ matchstd m += 1 end
if l2 == l1 p += 1 end
end
end
end
t = (m - p) / 2
d = 1 / 3 * (m / length(s1) + m / length(s2) + (m - t) / m)
return d
end
 
@show jarodistance("MARTHA", "MARHTA")
const testcouples = (("MARTHA", "MARHTA"), ("DIXON", "DICKSONX"), ("JELLYFISH", "SMELLYFISH"))
@show jarodistance("DIXON", "DICKSONX")
for (s1, s2) in testcouples
@show println("jarodistance(\"$s1\JELLYFISH", \"$s2\SMELLYFISH") = ", @sprintf "%2.2f" jarodistance(s1, s2))
</syntaxhighlight>
end</lang>
 
{{out}}
<pre>
<pre>jarodistance("MARTHA", "MARHTA") = 0.94
jarodistance("DIXONMARTHA", "DICKSONXMARHTA") = 0.689444444444444444
jarodistance("JELLYFISHDIXON", "SMELLYFISHDICKSONX") = 0.89</pre>6833333333333332
jarodistance("JELLYFISH", "SMELLYFISH") = 0.8870370370370371
</pre>
 
=={{header|Kotlin}}==
{{trans|Java}}
<langsyntaxhighlight lang="scala">object Jaro {
fun distance(s1: String, s2: String): Double {
val s1_len = s1.length
Line 1,838 ⟶ 2,118:
println(Jaro.distance("DIXON", "DICKSONX"))
println(Jaro.distance("JELLYFISH", "SMELLYFISH"))
}</langsyntaxhighlight>
 
=={{header|Mathematica}} / {{header|Wolfram Language}}==
<syntaxhighlight lang="mathematica">ClearAll[JaroDistance]
JaroDistance[s_String, t_String] := Module[{slen, tlen, maxdistance, smatches, tmatches, matches, transpositions, start, end, k, schar, tchar},
slen = StringLength[s];
tlen = StringLength[t];
schar = Characters[s];
tchar = Characters[t];
If[slen == tlen == 0,
1
,
maxdistance = Floor[Max[slen, tlen]/2] - 1;
smatches = ConstantArray[False, slen];
tmatches = ConstantArray[False, tlen];
matches = transpositions = 0;
Do[
start = Max[0, i - maxdistance];
end = Min[i + maxdistance + 1, tlen];
start = Max[1, i - maxdistance];
end = Min[i + maxdistance + 1, tlen];
Do[
If[! tmatches[[j]],
If[schar[[i]] == tchar[[j]],
smatches[[i]] = True;
tmatches[[j]] = True;
matches++;
Break[];
]
]
,
{j, start, end}
]
,
{i, slen}
];
If[matches == 0,
0
,
k = 1;
Do[
If[smatches[[i]],
While[! tmatches[[k]],
k++;
];
If[schar[[i]] != tchar[[k]],
transpositions++;
];
k++;
]
,
{i, slen}
];
N@(matches/slen + matches/tlen + (matches - transpositions/2)/matches)/3
]
]
]
JaroDistance["DWAYNE", "DUANE"]
JaroDistance["MARTHA", "MARHTA"]
JaroDistance["DIXON", "DICKSONX"]
JaroDistance["JELLYFISH", "SMELLYFISH"]</syntaxhighlight>
{{out}}
<pre>0.822222
0.944444
0.766667
0.896296</pre>
 
=={{header|Nim}}==
{{trans|Kotlin}}
<langsyntaxhighlight Nimlang="nim">import lenientops
 
func jaro(s1, s2: string): float =
Line 1,874 ⟶ 2,220:
echo jaro("MARTHA", "MARHTA")
echo jaro("DIXON", "DICKSONX")
echo jaro("JELLYFISH", "SMELLYFISH")</langsyntaxhighlight>
 
{{out}}
Line 1,883 ⟶ 2,229:
=={{header|Objeck}}==
{{trans|Java}}
<langsyntaxhighlight lang="objeck">class JaroDistance {
function : Main(args : String[]) ~ Nil {
Jaro("MARTHA", "MARHTA")->PrintLine();
Line 1,932 ⟶ 2,278:
((matches->As(Float) - transpositions/2.0) / matches)) / 3.0;
}
}</langsyntaxhighlight>
 
{{output}}
Line 1,946 ⟶ 2,292:
{{Works with|PARI/GP|2.7.4 and above}}
 
<langsyntaxhighlight lang="parigp">
\\Jaro distance between 2 strings s1 and s2.
\\ 4/12/16 aev
Line 1,982 ⟶ 2,328:
jaroDist("DWAYNE","DUANE");
}
</langsyntaxhighlight>
 
{{Output}}
Line 1,994 ⟶ 2,340:
 
=={{header|Pascal}}==
<langsyntaxhighlight lang="pascal">
program Jaro_distance;
 
Line 2,055 ⟶ 2,401:
writeln(formatfloat('0.######', ssJaroWinkler('JELLYFISH', 'SMELLYFISH')));
{$IFNDEF LINUX}readln;{$ENDIF}
end.</langsyntaxhighlight>
{{out}}
<pre>
Line 2,065 ⟶ 2,411:
 
=={{header|Perl}}==
<langsyntaxhighlight lang="perl">use strict;
use warnings;
use List::Util qw(min max);
Line 2,102 ⟶ 2,448:
printf "%.3f\n", jaro(@$_[0], @$_[1]) for
['MARTHA', 'MARHTA'], ['DIXON', 'DICKSONX'], ['JELLYFISH', 'SMELLYFISH'],
['I repeat myself', 'I repeat myself'], ['', ''];</langsyntaxhighlight>
{{out}}
<pre>0.944
Line 2,111 ⟶ 2,457:
 
=={{header|Phix}}==
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #008080;">function</span> <span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">str1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">str2</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">str1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">))</span>
Line 2,176 ⟶ 2,522:
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%f &lt;== jaro(\"%s\", \"%s\")\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s2</span><span style="color: #0000FF;">),</span><span style="color: #000000;">s1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s2</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</langsyntaxhighlight>-->
{{out}}
<pre>
Line 2,193 ⟶ 2,539:
 
{{Works with|Python|3}}
<langsyntaxhighlight lang="python">'''Jaro distance'''
 
from __future__ import division
Line 2,256 ⟶ 2,602:
 
if __name__ == '__main__':
main()</langsyntaxhighlight>
{{out}}
<pre>jaro('MARTHA', 'MARHTA') = 0.9444444444
Line 2,266 ⟶ 2,612:
{{Trans|Haskell}}
{{Works with|Python|3}}
<langsyntaxhighlight lang="python">'''Jaro distance between two strings'''
 
from functools import reduce
Line 2,506 ⟶ 2,852:
# MAIN ---
if __name__ == '__main__':
main()</langsyntaxhighlight>
{{Out}}
<pre>Jaro distances:
Line 2,522 ⟶ 2,868:
Returns an exact value for the Jaro distance.
 
<langsyntaxhighlight lang="racket">#lang racket/base
;; {{trans|C}}
(require data/bit-vector)
Line 2,589 ⟶ 2,935:
(exact->inexact (jaro-distance "DIXON" "DICKSONX")); 0.766667
(jaro-distance "JELLYFISH" "SMELLYFISH"); 0.896296
(exact->inexact (jaro-distance "JELLYFISH" "SMELLYFISH"))); 0.896296</langsyntaxhighlight>
 
{{out}}
Line 2,603 ⟶ 2,949:
(formerly Perl 6)
{{trans|Perl}}
<syntaxhighlight lang="raku" perl6line>sub jaro ($s, $t) {
return 1 if $s eq $t;
 
my $s_lens-len = + my @s = $s.comb;
my $t_lent-len = + my @t = $t.comb;
my $match_distancematch-distance = ($s_lens-len max $t_lent-len) div 2 - 1;
 
my ($matches, @s_matchess-matches, @t_matchest-matches);
for ^@s -> $i {
my $start = 0 max $i - $match_distancematch-distance;
my $end = $i + $match_distancematch-distance min ($t_lent-len - 1);
 
for $start .. $end -> $j {
next if @t_matchest-matches[$j] or @s[$i] ne @t[$j];
(@s_matchess-matches[$i], @t_matchest-matches[$j]) = (1, 1);
$matches++ and last;
}
Line 2,625 ⟶ 2,971:
my ($k, $transpositions) = (0, 0);
for ^@s -> $i {
next unless @s_matchess-matches[$i];
$k++ until @t_matchest-matches[$k];
$transpositions++ if @s[$i] ne @t[$k];
$k++;
}
 
( $matches/$s_lens-len + $matches/$t_lent-len + (($matches - $transpositions/2) / $matches) ) / 3
}
 
say jaro(.key, .value).fmt: '%.3f' for
'MARTHA' => 'MARHTA', 'DIXON' => 'DICKSONX', 'JELLYFISH' => 'SMELLYFISH',
'I repeat myself' => 'I repeat myself', '' => '';</lang>
</syntaxhighlight>
{{out}}
<pre>0.944
Line 2,645 ⟶ 2,992:
 
=={{header|REXX}}==
<langsyntaxhighlight lang="rexx">/*REXX program computes the Jaro distance between two strings (or a list of strings).*/
@.= /*define a default for the @. array. */
parse arg @.1 /*obtain an optional character string. */
Line 2,676 ⟶ 3,023:
 
if m==0 then return 0
return (m/L1 + m/L2 + (m-t)/m) / 3</langsyntaxhighlight>
{{out|output|text=&nbsp; when using the default inputs:}}
<pre>
Line 2,686 ⟶ 3,033:
 
=={{header|Ring}}==
<langsyntaxhighlight lang="ring">
# Project : Jaro distance
 
Line 2,736 ⟶ 3,083:
b = temp
return [a, b]
</syntaxhighlight>
</lang>
Output:
<pre>
Line 2,745 ⟶ 3,092:
 
=={{header|Ruby}}==
<langsyntaxhighlight lang="ruby">def jaro(s, t)
return 1.0 if s == t
Line 2,792 ⟶ 3,139:
).each_slice(2) do |s,t|
puts "jaro(#{s.inspect}, #{t.inspect}) = #{'%.10f' % jaro(s, t)}"
end</langsyntaxhighlight>
{{out}}
<pre>
Line 2,802 ⟶ 3,149:
=={{header|Rust}}==
{{trans|C++}}
<langsyntaxhighlight lang="rust">use std::cmp;
 
pub fn jaro(s1: &str, s2: &str) -> f64 {
Line 2,842 ⟶ 3,189:
let pairs = [("MARTHA", "MARHTA"), ("DIXON", "DICKSONX"), ("JELLYFISH", "SMELLYFISH")];
for p in pairs.iter() { println!("{}/{} = {}", p.0, p.1, jaro(p.0, p.1)); }
}</langsyntaxhighlight>
{{Out}}
<pre>MARTHA/MARHTA = 0.9444444444444445
Line 2,850 ⟶ 3,197:
=={{header|Scala}}==
{{trans|Java}}
<langsyntaxhighlight lang="scala">object Jaro extends App {
 
def distance(s1: String, s2: String): Double = {
Line 2,886 ⟶ 3,233:
val strings = List(("MARTHA", "MARHTA"), ("DIXON", "DICKSONX"), ("JELLYFISH", "SMELLYFISH"))
strings.foreach { s => println(distance(s._1, s._2)) }
}</langsyntaxhighlight>
 
=={{header|Sidef}}==
<langsyntaxhighlight lang="ruby">func jaro(s, t) {
 
return 1 if (s == t)
Line 2,939 ⟶ 3,286:
] {
say "jaro(#{pair.map{.join.dump}.join(', ')}) = #{'%.10f' % jaro(pair...)}"
}</langsyntaxhighlight>
{{out}}
<pre>
Line 2,950 ⟶ 3,297:
Here we use the [https://ideas.repec.org/c/boc/bocode/s457850a.html jarowinkler] package from SSC. To install the package, type
 
<syntaxhighlight lang ="stata">ssc install jarowinkler</langsyntaxhighlight>
 
Now the program for the task:
 
<langsyntaxhighlight lang="stata">clear
input str20 a str20 b
DWAYNE DUANE
Line 2,965 ⟶ 3,312:
format %8.3f jaro
format %-20s a b
list a b jaro</langsyntaxhighlight>
 
'''Output'''
Line 2,979 ⟶ 3,326:
 
=={{header|Swift}}==
<langsyntaxhighlight Swiftlang="swift"> func jaroWinklerMatch(_ s: String, _ t: String) -> Double {
let s_len: Int = s.count
let t_len: Int = t.count
Line 3,068 ⟶ 3,415:
print("DIXON/DICKSONX:", jaroWinklerMatch("DIXON", "DICKSONX"))
print("JELLYFISH/SMELLYFISH:", jaroWinklerMatch("JELLYFISH", "SMELLYFISH"))
</syntaxhighlight>
</lang>
 
{{out}}
Line 3,079 ⟶ 3,426:
 
=={{header|Tcl}}==
<langsyntaxhighlight Tcllang="tcl">proc jaro {s1 s2} {
set l1 [string length $s1]
set l2 [string length $s2]
Line 3,116 ⟶ 3,463:
} {
puts "[jaro $s $t]:\t$s / $t"
}</langsyntaxhighlight>
 
{{out}}
Line 3,125 ⟶ 3,472:
 
=={{header|Turbo-Basic XL}}==
<langsyntaxhighlight lang="turbobasic">
10 DIM Word_1$(20), Word_2$(20), Z$(20)
11 CLS
Line 3,167 ⟶ 3,514:
12310 ? "Jaro Winkler Distance=";Result
12320 ENDPROC
</syntaxhighlight>
</lang>
{{out}}
<pre>MARTHA, MARHTA: 0.9444444433
Line 3,174 ⟶ 3,521:
 
=={{header|VBA}}==
<syntaxhighlight lang="vb">
<lang vb>
Option Explicit
 
Line 3,219 ⟶ 3,566:
JaroWinkler = JaroWinkler + (1 - JaroWinkler) * l * WorksheetFunction.Min(0.25, p)
End Function
</syntaxhighlight>
</lang>
 
=={{header|V (Vlang)}}==
 
{{trans|Python}}
<syntaxhighlight lang="v (vlang)">import math
 
fn jaro(str1 string, str2 string) f64 {
s1_len := str1.len
s2_len := str2.len
if s1_len == 0 && s2_len == 0 {
return 1
}
if s1_len == 0 || s2_len == 0 {
return 0
}
match_distance := math.max<int>(s1_len,s2_len)/2 - 1
mut str1_matches := []bool{len: s1_len}
mut str2_matches := []bool{len: s2_len}
mut matches := 0
mut transpositions := 0.0
for i in 0..s1_len {
start := math.max<int>(0,i - match_distance)
end := math.min<int>(i + match_distance, s2_len)
for k in start..end {
if str2_matches[k] {
continue
}
if str1[i] != str2[k] {
continue
}
str1_matches[i] = true
str2_matches[k] = true
matches++
break
}
}
if matches == 0 {
return 0
}
mut k := 0
for i in 0..s1_len {
if !str1_matches[i] {
continue
}
for !str2_matches[k] {
k++
}
if str1[i] != str2[k] {
transpositions++
}
k++
}
transpositions /= 2
return (matches/f64(s1_len) +
matches/f64(s2_len) +
(matches-transpositions)/matches) / 3
}
fn main() {
println(jaro("MARTHA", "MARHTA"))
println(jaro("DIXON", "DICKSONX"))
println(jaro("JELLYFISH", "SMELLYFISH"))
}</syntaxhighlight>
{{out}}
<pre>0.9444444444444445
0.7666666666666666
0.8962962962962964
</pre>
 
=={{header|Wren}}==
{{trans|Go}}
{{libheader|Wren-fmt}}
<langsyntaxhighlight ecmascriptlang="wren">import "./fmt" for Fmt
 
var jaro = Fn.new { |s1, s2|
Line 3,268 ⟶ 3,685:
System.print(Fmt.f(0, jaro.call("MARTHA", "MARHTA")))
System.print(Fmt.f(0, jaro.call("DIXON", "DICKSONX")))
System.print(Fmt.f(0, jaro.call("JELLYFISH", "SMELLYFISH")))</langsyntaxhighlight>
 
{{out}}
Line 3,278 ⟶ 3,695:
 
=={{header|zkl}}==
<langsyntaxhighlight lang="zkl"> //-->String of matched characters, ordered
fcn _jaro(str1,str2, matchDistance){
cs:=Sink(String);
Line 3,298 ⟶ 3,715:
( matches/s1Len + matches/s2Len +
((matches - transpositions)/matches) ) / 3.0
}</langsyntaxhighlight>
<langsyntaxhighlight lang="zkl">foreach s,t in (T(
T("MARTHA","MARHTA"), T("DIXON","DICKSONX"), T("JELLYFISH","SMELLYFISH"))){
println(0'|jaro("%s","%s") = %.10f|.fmt(s,t,jaro(s,t)));
}</langsyntaxhighlight>
{{out}}
<pre>
Line 3,312 ⟶ 3,729:
=={{header|ZX Spectrum Basic}}==
{{trans|FreeBASIC}}
<langsyntaxhighlight lang="zxbasic">10 LET a$="MARTHA": LET b$="MARHTA": PRINT a$;", ";b$;": ";: GO SUB 1000: PRINT jaro
20 LET a$="DIXON": LET b$="DICKSONX": PRINT a$;", ";b$;": ";: GO SUB 1000: PRINT jaro
30 LET a$="JELLYFISH": LET b$="SMELLYFISH": PRINT a$;", ";b$;": ";: GO SUB 1000: PRINT jaro
Line 3,333 ⟶ 3,750:
5000 REM Functions
5010 DEF FN x(a,b)=(a AND a>b)+(b AND a<b)+(a AND a=b): REM max function
5020 DEF FN n(a,b)=(a AND a<b)+(b AND a>b)+(a AND a=b): REM min function</langsyntaxhighlight>
{{out}}
<pre>MARTHA, MARHTA: 0.94444444
337

edits