CloudFlare suffered a massive security issue affecting all of its customers, including Rosetta Code. All passwords not changed since February 19th 2017 have been expired, and session cookie longevity will be reduced until late March.--Michael Mol (talk) 05:15, 25 February 2017 (UTC)

Jaro distance

From Rosetta Code
Task
Jaro distance
You are encouraged to solve this task according to the task description, using any language you may know.

The Jaro distance is a measure of similarity between two strings.

The higher the Jaro distance for two strings is, the more similar the strings are.

The score is normalized such that   0   equates to no similarity and   1   is an exact match.


Definition

The Jaro distance     of two given strings     and     is

Where:

  •   is the number of matching characters;
  •   is half the number of transpositions.


Two characters from     and     respectively, are considered matching only if they are the same and not farther than   .

Each character of     is compared with all its matching characters in   .

The number of matching (but different sequence order) characters divided by 2 defines the number of transpositions.


Example

Given the strings     DWAYNE   and     DUANE   we find:


We find a Jaro score of:


Task

Implement the Jaro-distance algorithm and show the distances for each of the following pairs:

  • ("MARTHA", "MARHTA")
  • ("DIXON", "DICKSONX")
  • ("JELLYFISH", "SMELLYFISH")


See also



AWK[edit]

 
# syntax: GAWK -f JARO_DISTANCE.AWK
BEGIN {
main("DWAYNE","DUANE")
main("MARTHA","MARHTA")
main("DIXON","DICKSONX")
main("JELLYFISH","SMELLYFISH")
exit(0)
}
function main(str1,str2) {
printf("%9.7f '%s' '%s'\n",jaro(str1,str2),str1,str2)
}
function jaro(str1,str2, begin,end,i,j,k,leng1,leng2,match_distance,matches,str1_arr,str2_arr,transpositions) {
leng1 = length(str1)
leng2 = length(str2)
if (leng1 == 0 && leng2 == 0) { # both strings are empty
return(1)
}
if (leng1 == 0 || leng2 == 0) { # only one string is empty
return(0)
}
match_distance = int(max(leng1,leng2)/2-1)
for (i=1; i<=leng1; i++) { # find matches
begin = int(max(0,i-match_distance))
end = int(min(i+match_distance+1,leng2))
for (j=begin; j<=end; j++) {
if (str2_arr[j]) { continue }
if (substr(str1,i,1) != substr(str2,j,1)) { continue }
str1_arr[i] = 1
str2_arr[j] = 1
matches++
break
}
}
if (matches == 0) {
return(0)
}
k = 0
for (i=1; i<=leng1; i++) { # count transpositions
if (!str1_arr[i]) { continue }
while (!str2_arr[k]) {
k++
}
if (substr(str1,i,1) != substr(str2,k,1)) {
transpositions++
}
k++
}
transpositions /= 2
return((matches/leng1)+(matches/leng2)+((matches-transpositions)/matches))/3
}
function max(x,y) { return((x > y) ? x : y) }
function min(x,y) { return((x < y) ? x : y) }
 
Output:
0.8222222 'DWAYNE' 'DUANE'
0.9444444 'MARTHA' 'MARHTA'
0.7666667 'DIXON' 'DICKSONX'
0.8962963 'JELLYFISH' 'SMELLYFISH'

C[edit]

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
 
#define TRUE 1
#define FALSE 0
 
#define max(a, b) ((a) > (b) ? (a) : (b))
#define min(a, b) ((a) < (b) ? (a) : (b))
 
double jaro(const char *str1, const char *str2) {
// length of the strings
int str1_len = strlen(str1);
int str2_len = strlen(str2);
 
// if both strings are empty return 1
// if only one of the strings is empty return 0
if (str1_len == 0) return str2_len == 0 ? 1.0 : 0.0;
 
// max distance between two chars to be considered matching
// floor() is ommitted due to integer division rules
int match_distance = (int) max(str1_len, str2_len)/2 - 1;
 
// arrays of bools that signify if that char in the matching string has a match
int *str1_matches = calloc(str1_len, sizeof(int));
int *str2_matches = calloc(str2_len, sizeof(int));
 
// number of matches and transpositions
double matches = 0.0;
double transpositions = 0.0;
 
// find the matches
for (int i = 0; i < str1_len; i++) {
// start and end take into account the match distance
int start = max(0, i - match_distance);
int end = min(i + match_distance + 1, str2_len);
 
for (int k = start; k < end; k++) {
// if str2 already has a match continue
if (str2_matches[k]) continue;
// if str1 and str2 are not
if (str1[i] != str2[k]) continue;
// otherwise assume there is a match
str1_matches[i] = TRUE;
str2_matches[k] = TRUE;
matches++;
break;
}
}
 
// if there are no matches return 0
if (matches == 0) {
free(str1_matches);
free(str2_matches);
return 0.0;
}
 
// count transpositions
int k = 0;
for (int i = 0; i < str1_len; i++) {
// if there are no matches in str1 continue
if (!str1_matches[i]) continue;
// while there is no match in str2 increment k
while (!str2_matches[k]) k++;
// increment transpositions
if (str1[i] != str2[k]) transpositions++;
k++;
}
 
// divide the number of transpositions by two as per the algorithm specs
// this division is valid because the counted transpositions include both
// instances of the transposed characters.
transpositions /= 2.0;
 
// free the allocated memory
free(str1_matches);
free(str2_matches);
 
// return the Jaro distance
return ((matches / str1_len) +
(matches / str2_len) +
((matches - transpositions) / matches)) / 3.0;
}
 
int main() {
printf("%f\n", jaro("MARTHA", "MARHTA"));
printf("%f\n", jaro("DIXON", "DICKSONX"));
printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"));
}
Output:
0.944444
0.766667
0.896296

C++[edit]

Translation of: C
#include <algorithm>
#include <iostream>
#include <string>
 
double jaro(const std::string s1, const std::string s2) {
const uint l1 = s1.length(), l2 = s2.length();
if (l1 == 0)
return l2 == 0 ? 1.0 : 0.0;
const uint match_distance = std::max(l1, l2) / 2 - 1;
bool s1_matches[l1];
bool s2_matches[l2];
std::fill(s1_matches, s1_matches + l1, false);
std::fill(s2_matches, s2_matches + l2, false);
uint matches = 0;
for (uint i = 0; i < l1; i++)
{
const int end = std::min(i + match_distance + 1, l2);
for (int k = std::max(0u, i - match_distance); k < end; k++)
if (!s2_matches[k] && s1[i] == s2[k])
{
s1_matches[i] = true;
s2_matches[k] = true;
matches++;
break;
}
}
if (matches == 0)
return 0.0;
double t = 0.0;
uint k = 0;
for (uint i = 0; i < l1; i++)
if (s1_matches[i])
{
while (!s2_matches[k]) k++;
if (s1[i] != s2[k]) t += 0.5;
k++;
}
 
const double m = matches;
return (m / l1 + m / l2 + (m - t) / m) / 3.0;
}
 
int main() {
using namespace std;
cout << jaro("MARTHA", "MARHTA") << endl;
cout << jaro("DIXON", "DICKSONX") << endl;
cout << jaro("JELLYFISH", "SMELLYFISH") << endl;
return 0;
}

Clojure[edit]

 
(ns test-project-intellij.core
(:gen-class))
 
(defn find-matches [s t]
" find match locations in the two strings "
" s_matches is set to true wherever there is a match in t and t_matches is set conversely "
(let [s_len (count s)
t_len (count t)
match_distance (int (- (/ (max s_len t_len) 2) 1))
matches 0
transpositions 0
fn-start (fn [i] (max 0 (- i match_distance))) ; function to compute starting position
fn-end (fn [i] (min (+ i match_distance 1) (- t_len 1))) ] ; function to compute end position
(loop [i 0
start (fn-start i)
end (fn-end i)
k start
s_matches (vec (repeat (count s) false))
t_matches (vec (repeat (count t) false))
matches 0]
 
(if (< i s_len)
 
(if (<= k end)
 
(if (get t_matches k)
; continue with next k
(recur i start end (inc k) s_matches t_matches matches)
 
(if (= (get s i) (get t k))
; match a position, so update matches, s_matches, t_matches to reflect match
(recur (inc i) (fn-start (inc i)) (fn-end (inc i)) (fn-start (inc i)) (assoc s_matches i true) (assoc t_matches k true) (inc matches))
; no match so try next k
(recur i start end (inc k) s_matches t_matches matches)))
 
; End of k iterations, so increment i and set k to start based upon i
(recur (inc i) (fn-start (inc i)) (fn-end (inc i)) (fn-start (inc i)) s_matches t_matches matches))
 
; End of i iterations
[matches s_matches t_matches]))))
 
(defn count-transpositions [s t s_matches t_matches]
" Utility function to count the number of transpositions "
(let [s_len (count s)]
(loop [i 0
k 0
transpositions 0]
 
(if (< i s_len)
; still elements in s (since i < s_len)
(if (not (get s_matches i nil))
; skip to next i since there are no matches in s
(recur (inc i) k transpositions)
; checking for match in t
(if (not (get t_matches k nil))
; keeping looping around as long as there are no matches in t
(recur i (inc k) transpositions)
(if (not= (get s i) (get t k))
; increment transposition count (if strings don't equal at match location)
(recur (inc i) (inc k) (inc transpositions))
; was a match, so advance i and k without increasing transpositions count
(recur (inc i) (inc k) transpositions))))
; Return count
transpositions))))
 
(defn jaro [s t]
" Main Jaro Distance routine"
(if (= s t)
1
(let [[matches s_matches t_matches] (find-matches s t)]
(if (= 0 matches)
0
(let [s_len (count s)
t_len (count t)
transpositions (count-transpositions s t s_matches t_matches)]
(float (/ (+ (/ matches s_len) (/ matches t_len) (/ (- matches (/ transpositions 2)) matches)) 3)))))))
 
 
(println (jaro "MARTHA" "MARHTA"))
(println (jaro "DIXON" "DICKSONX"))
(println (jaro "JELLYFISH" "SMELLYFISH"))
 
Output:
0.9444444
0.76666665
0.8962963

CoffeeScript[edit]

Translation of: C++
jaro = (s1, s2) ->
l1 = s1.length
l2 = s2.length
if l1 == 0 then return if l2 == 0 then 1.0 else 0.0
match_distance = Math.max(l1, l2) / 2 - 1
s1_matches = []
s2_matches = []
m = 0
for i in [0...l1]
end = Math.min(i + match_distance + 1, l2)
for k in [Math.max(0, i - match_distance)...end]
if !s2_matches[k] and s1[i] == s2[k]
s1_matches[i] = true
s2_matches[k] = true
m++
break
if m == 0
0.0
else
t = 0.0
k = 0
for i in [0...l1]
if s1_matches[i]
until s2_matches[k] then k++
if s1[i] != s2[k++] then t += 0.5
(m / l1 + m / l2 + (m - t) / m) / 3.0
 
console.log jaro "MARTHA", "MARHTA"
console.log jaro "DIXON", "DICKSONX"
console.log jaro "JELLYFISH", "SMELLYFISH"
Output:
0.9444444444444445
0.7666666666666666
0.8962962962962964

D[edit]

Translation of: Kotlin
auto jaro(in string s1, in string s2) {
int s1_len = cast(int) s1.length;
int s2_len = cast(int) s2.length;
if (s1_len == 0 && s2_len == 0) return 1;
 
import std.algorithm.comparison: min, max;
auto match_distance = max(s1_len, s2_len) / 2 - 1;
auto s1_matches = new bool[s1_len];
auto s2_matches = new bool[s2_len];
int matches = 0;
for (auto i = 0; i < s1_len; i++) {
auto start = max(0, i - match_distance);
auto end = min(i + match_distance + 1, s2_len);
for (auto j = start; j < end; j++)
if (!s2_matches[j] && s1[i] == s2[j]) {
s1_matches[i] = true;
s2_matches[j] = true;
matches++;
break;
}
}
if (matches == 0) return 0;
 
auto t = 0.0;
auto k = 0;
for (auto i = 0; i < s1_len; i++)
if (s1_matches[i]) {
while (!s2_matches[k]) k++;
if (s1[i] != s2[k++]) t += 0.5;
}
double m = matches;
return (m / s1_len + m / s2_len + (m - t) / m) / 3.0;
}
 
void main() {
import std.stdio: writeln;
writeln(jaro( "MARTHA", "MARHTA"));
writeln(jaro( "DIXON", "DICKSONX"));
writeln(jaro("JELLYFISH", "SMELLYFISH"));
}
0.944444
0.766667
0.896296

Elixir[edit]

Translation of: Ruby
Works with: Elixir version 1.3
defmodule Jaro do
def distance(s, t) when is_binary(s) and is_binary(t), do:
distance(to_charlist(s), to_charlist(t))
def distance(x, x), do: 1.0
def distance(s, t) do
s_len = length(s)
t_len = length(t)
{s_matches, t_matches, matches} = matching(s, t, s_len, t_len)
if matches == 0 do
0.0
else
{k, transpositions} = transposition(s, t, s_matches, t_matches)
((matches / s_len) +
(matches / t_len) +
((matches - transpositions/2) / matches)) / 3
end
end
 
defp matching(s, t, s_len, t_len) do
match_distance = div(max(s_len, t_len), 2) - 1
ac0 = {List.duplicate(false, s_len), List.duplicate(false, t_len), 0}
Enum.reduce(0..s_len-1, ac0, fn i,acc ->
j_start = max(0, i-match_distance)
j_end = min(i+match_distance, t_len-1)
Enum.reduce_while(j_start..j_end, acc, fn j,{sm,tm,m} ->
if Enum.at(tm, j) or Enum.at(s, i) != Enum.at(t, j) do
{:cont, {sm, tm, m}}
else
{:halt, { List.replace_at(sm, i, true),
List.replace_at(tm, j, true),
m + 1 }}
end
end)
end)
end
 
defp transposition(s, t, s_matches, t_matches) do
Enum.reduce(0..length(s)-1, {0,0}, fn i,{k,transpositions} ->
if Enum.at(s_matches, i) do
k = k + (Enum.drop(t_matches, k)
|> Enum.take_while(fn matche -> not matche end)
|> length)
if Enum.at(s, i) == Enum.at(t, k), do: {k+1, transpositions},
else: {k+1, transpositions+1}
else
{k, transpositions}
end
end)
end
end
 
~w( MARTHA MARHTA
DIXON DICKSONX
JELLYFISH SMELLYFISH )c
|> Enum.chunk(2)
|> Enum.each(fn [s,t] ->
 :io.format "jaro(~s, ~s) = ~.10f~n", [inspect(s), inspect(t), Jaro.distance(s, t)]
end)
Output:
jaro('MARTHA', 'MARHTA') = 0.9444444444
jaro('DIXON', 'DICKSONX') = 0.7666666667
jaro('JELLYFISH', 'SMELLYFISH') = 0.8962962963

Elixir has a built-in function (String.jaro_distance).

FreeBASIC[edit]

' version 09-10-2016
' compile with: fbc -s console
 
#Macro max(x, y)
IIf((x) > (y), (x), (y))
#EndMacro
 
#Macro min(x, y)
IIf((x) < (y), (x), (y))
#EndMacro
 
Function jaro(word1 As String, word2 As String) As Double
 
If Len(word1) > Len(word2) Then Swap word1, word2
 
Dim As Long i, j, j1, m, t
Dim As Long s1 = Len(word1)
Dim As Long s2 = Len(word2)
Dim As Long max_dist = s2 \ 2 -1 ' integer division
 
For i = 0 To s1 -1
If word1[i] = word2[j] Then
m = m +1
word2[j] = 32
Else
For j1 = max(0, i - max_dist) To min(s2 -1, i + max_dist)
If word1[i] = word2[j1] Then
t = t +1
m = m +1
word2[j1] = 32
If j1 > j Then j = j1
End If
Next
End If
j = j + 1
Next
 
If m = 0 Then Return 0
 
t = t \ 2
Return (m / s1 + m / s2 + ((m - t) / m)) / 3
 
End Function
 
' ------=< MAIN >=------
 
Print
Print " jaro (MARTHA, MARHTA) ="; jaro("MARTHA", "MARHTA")
Print " jaro (DIXON, DICKSONX) ="; jaro("DIXON", "DICKSONX")
Print " jaro (JELLYFISH, SMELLYFISH) ="; jaro("JELLYFISH", "SMELLYFISH")
 
 
' empty keyboard buffer
While Inkey <> "" : Wend
Print : Print "hit any key to end program"
Sleep
End
Output:
 jaro (MARTHA,    MARHTA)     = 0.9444444444444444
 jaro (DIXON,     DICKSONX)   = 0.7666666666666667
 jaro (JELLYFISH, SMELLYFISH) = 0.8962962962962963

Go[edit]

package main
 
import "fmt"
 
func jaro(str1, str2 string) float64 {
if len(str1) == 0 && len(str2) == 0 {
return 1
}
if len(str1) == 0 || len(str2) == 0 {
return 0
}
match_distance := len(str1)
if len(str2) > match_distance {
match_distance = len(str2)
}
match_distance = match_distance/2 - 1
str1_matches := make([]bool, len(str1))
str2_matches := make([]bool, len(str2))
matches := 0.
transpositions := 0.
for i := range str1 {
start := i - match_distance
if start < 0 {
start = 0
}
end := i + match_distance + 1
if end > len(str2) {
end = len(str2)
}
for k := start; k < end; k++ {
if str2_matches[k] {
continue
}
if str1[i] != str2[k] {
continue
}
str1_matches[i] = true
str2_matches[k] = true
matches++
break
}
}
if matches == 0 {
return 0
}
k := 0
for i := range str1 {
if !str1_matches[i] {
continue
}
for !str2_matches[k] {
k++
}
if str1[i] != str2[k] {
transpositions++
}
k++
}
transpositions /= 2
return (matches/float64(len(str1)) +
matches/float64(len(str2)) +
(matches-transpositions)/matches) / 3
}
 
func main() {
fmt.Printf("%f\n", jaro("MARTHA", "MARHTA"))
fmt.Printf("%f\n", jaro("DIXON", "DICKSONX"))
fmt.Printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"))
}
Output:
0.944444
0.766667
0.896296

Haskell[edit]

import Data.Ord (comparing)
import Text.Printf (printf)
import Data.List (sortBy, elemIndex, intercalate)
 
jaro :: String -> String -> Float
jaro x y =
let f = (fromIntegral . length)
[m, t] = [f, fromIntegral . transpositions] <*> [matches x y]
[s1, s2] = [f] <*> [x, y]
in if m == 0
then 0
else (1 / 3) * ((m / s1) + (m / s2) + ((m - t) / m))
where
matches :: String -> String -> [(Int, Char)]
matches s1 s2 =
let [(l1, xs), (l2, ys)] =
sortBy (comparing fst) ((length >>= (,)) <$> [s1, s2])
r = quot l2 2 - 1
in foldr
(\(c, n) a ->
let offset = max 0 (n - (r + 1)) -- initial chars out of range ?
-- Any index for this char within range ?
in case elemIndex c (drop offset (take (n + r) ys)) of
Just i -> (offset + i, c) : a
Nothing -> a)
[]
(zip xs [1 ..])
transpositions :: [(Int, Char)] -> Int
transpositions ics =
foldr
(\((x, _), (y, _)) a ->
if x > y -- This character was expected before the previous one.
then a + 1
else a)
0
(zip ics (tail ics))
 
-- TEST ----------------------------------------------------------------------
main :: IO ()
main =
mapM_ putStrLn $
(\(s1, s2) -> intercalate " -> " [s1, s2, printf "%.3f\n" $ jaro s1 s2]) <$>
[ ("DWAYNE", "DUANE")
, ("MARTHA", "MARHTA")
, ("DIXON", "DICKSONX")
, ("JELLYFISH", "SMELLYFISH")
]
Output:
DWAYNE -> DUANE -> 0.822

MARTHA -> MARHTA -> 0.944

DIXON -> DICKSONX -> 0.767

JELLYFISH -> SMELLYFISH -> 0.896

Haxe[edit]

Translation of: Kotlin
Works with: Neko version 2.1.0
class Jaro {
private static function jaro(s1: String, s2: String): Float {
var s1_len = s1.length;
var s2_len = s2.length;
if (s1_len == 0 && s2_len == 0) return 1;
 
var match_distance = Std.int(Math.max(s1_len, s2_len)) / 2 - 1;
var matches = { s1: [for(n in 0...s1_len) false], s2: [for(n in 0...s2_len) false] };
var m = 0;
for (i in 0...s1_len) {
var start = Std.int(Math.max(0, i - match_distance));
var end = Std.int(Math.min(i + match_distance + 1, s2_len));
for (j in start...end)
if (!matches.s2[j] && s1.charAt(i) == s2.charAt(j)) {
matches.s1[i] = true;
matches.s2[j] = true;
m++;
break;
}
}
if (m == 0) return 0;
 
var k = 0;
var t = 0.;
for (i in 0...s1_len)
if (matches.s1[i]) {
while (!matches.s2[k]) k++;
if (s1.charAt(i) != s2.charAt(k++)) t += 0.5;
}
 
return (m / s1_len + m / s2_len + (m - t) / m) / 3.0;
}
 
public static function main() {
Sys.println(jaro( "MARTHA", "MARHTA"));
Sys.println(jaro( "DIXON", "DICKSONX"));
Sys.println(jaro("JELLYFISH", "SMELLYFISH"));
}
}
Output:
0.944444444444445
0.766666666666667
0.896296296296296

J[edit]

Implementation:

jaro=: dyad define
d=. ((x >.&# y)%2)-1
e=. (x =/y) * d >: |x -/&(i.@#) y
xm=. (+./"1 e)#x
ym=. (+./"2 e)#y
m=. xm <.&# ym
t=. (+/xm ~:&(m&{.) ym)%2
s1=. #x
s2=. #y
((m%s1)+(m%s2)+(m-t)%m)%3
)

Task examples:

   'MARTHA' jaro 'MARHTA'
0.944444
'DIXON' jaro 'DICKSONX'
0.766667
'JELLYFISH' jaro 'SMELLYFISH'
0.896296

Java[edit]

public class JaroDistance {
public static double jaro(String s, String t) {
int s_len = s.length();
int t_len = t.length();
 
if (s_len == 0 && t_len == 0) return 1;
 
int match_distance = Integer.max(s_len, t_len) / 2 - 1;
 
boolean[] s_matches = new boolean[s_len];
boolean[] t_matches = new boolean[t_len];
 
int matches = 0;
int transpositions = 0;
 
for (int i = 0; i < s_len; i++) {
int start = Integer.max(0, i-match_distance);
int end = Integer.min(i+match_distance+1, t_len);
 
for (int j = start; j < end; j++) {
if (t_matches[j]) continue;
if (s.charAt(i) != t.charAt(j)) continue;
s_matches[i] = true;
t_matches[j] = true;
matches++;
break;
}
}
 
if (matches == 0) return 0;
 
int k = 0;
for (int i = 0; i < s_len; i++) {
if (!s_matches[i]) continue;
while (!t_matches[k]) k++;
if (s.charAt(i) != t.charAt(k)) transpositions++;
k++;
}
 
return (((double)matches / s_len) +
((double)matches / t_len) +
(((double)matches - transpositions/2.0) / matches)) / 3.0;
}
 
public static void main(String[] args) {
System.out.println(jaro( "MARTHA", "MARHTA"));
System.out.println(jaro( "DIXON", "DICKSONX"));
System.out.println(jaro("JELLYFISH", "SMELLYFISH"));
}
}
Output:
0.9444444444444445
0.7666666666666666
0.8962962962962964

jq[edit]

   def jaro(s1; s2):
   
     def when(p; q): if p then q else . end;
   
     (s1|length) as $len1
     | (s2|length) as $len2
     | (( [$len1, $len2] | max ) / 2 - 1) as $match_standard
     | {m:0, p:0}
     | reduce range(0; $len1) as $l1
       (.; s1[$l1:$l1+1] as $t1
           | reduce range(0; $len2) as $l2
             (.; s2[$l2:$l2+1] as $t2
                 | when( $t1 == $t2;
                         when( ($l2-$l1) <= $match_standard and ($l1-$l2) <= $match_standard;
   		                .m+=1) 
                         | when($l2 == $l1; .p += 1) ) ) )
     | ((.m-.p)/2) as $t
     | ( (.m/$len1) + (.m/$len2) + ((.m-$t)/.m) ) / 3
   ;
   
   jaro("MARTHA";"MARHTA")
   , jaro("DIXON"; "DICKSONX") 
   , jaro("JELLYFISH";"SMELLYFISH")

Output:

    0.9444444444444444
    0.6833333333333332
    0.8870370370370371

Julia[edit]

function jaro(s1,s2)
  
  s1::AbstractString
  s2::AbstractString
  m=0
  t=0 
  p=0
  l1=0
  l2=0
  match_standard=((max(length(s1),length(s2)))/2)-1
  for i in s1[1:end]
   l1+=1
   l2=0
   for j in s2[1:end]
    l2+=1
    if i==j
     if l2-l1<=match_standard && l1-l2<=match_standard
       m+=1
     end
     if l2==l1
       p+=1        
     end        
    end
   end
  end        
  t=(m-p)/2
  d=1/3*((m/length(s1))+(m/length(s2))+((m-t)/m))
  println(d)
end
 "MARTHA","MARHTA" :  0.9444444444444444444
 "DIXON","DICKSONX" : 0.6833333333333333333
 "JELLYFISH","SMELLYFISH" : 0.8870370370370
 

Kotlin[edit]

Translation of: Java
object Jaro {
fun distance(s1: String, s2: String): Double {
val s1_len = s1.length
val s2_len = s2.length
if (s1_len == 0 && s2_len == 0) return 1.0
val match_distance = Math.max(s1_len, s2_len) / 2 - 1
val s1_matches = BooleanArray(s1_len)
val s2_matches = BooleanArray(s2_len)
var matches = 0
for (i in 0..s1_len - 1) {
val start = Math.max(0, i - match_distance)
val end = Math.min(i + match_distance + 1, s2_len)
(start..end - 1).find { j -> !s2_matches[j] && s1[i] == s2[j] } ?. let {
s1_matches[i] = true
s2_matches[it] = true
matches++
}
}
if (matches == 0) return 0.0
var t = 0.0
var k = 0
(0..s1_len - 1).filter { s1_matches[it] }.forEach { i ->
while (!s2_matches[k]) k++
if (s1[i] != s2[k]) t += 0.5
k++
}
 
val m = matches.toDouble()
return (m / s1_len + m / s2_len + (m - t) / m) / 3.0
}
}
 
fun main(args: Array<String>) {
println(Jaro.distance("MARTHA", "MARHTA"))
println(Jaro.distance("DIXON", "DICKSONX"))
println(Jaro.distance("JELLYFISH", "SMELLYFISH"))
}

PARI/GP[edit]

This version was translated from Java and Perl.

Works with: PARI/GP version 2.7.4 and above
 
\\Jaro distance between 2 strings s1 and s2.
\\ 4/12/16 aev
jaroDist(s1,s2)={
my(vt1=Vecsmall(s1),vt2=Vecsmall(s2),n1=#s1,n2=#s2,d,
md=max(n1,n2)\2-1,cs,ce,mc=0,tr=0,k=1,ds,
s1m=vector(n1,z,0),s2m=vector(n2,z,0));
if(!n1||!n2, return(0));
for(i=1,n1,
cs=max(1,i-md);
ce=min(i+md+1,n2);
for(j=cs,ce,
if(s2m[j],next);
if(vt1[i]!=vt2[j], next);
mc++; s1m[i]=1; s2m[j]=1; break;
);\\fend j
);\\fend i
if(!mc, return(0));
for(i=1,n1,
if(!s1m[i], next);
while(!s2m[k], k++);
if(vt1[i]!=vt2[k], tr++);
k++
);\\fend i
d=(mc/n1+mc/n2+(mc-tr/2)/mc)/3.0;
ds=Strprintf("%.5f",d);
print(" *** Jaro distance is: ",ds," for strings: ",s1,", ",s2);
return(d);
}
 
{ \\ Testing:
jaroDist("MARTHA","MARHTA");
jaroDist("DIXON","DICKSONX");
jaroDist("JELLYFISH","SMELLYFISH");
jaroDist("DWAYNE","DUANE");
}
 
Output:
 *** Jaro distance is: 0.94444 for strings: MARTHA, MARHTA
 *** Jaro distance is: 0.76667 for strings: DIXON, DICKSONX
 *** Jaro distance is: 0.89630 for strings: JELLYFISH, SMELLYFISH
 *** Jaro distance is: 0.82222 for strings: DWAYNE, DUANE

Perl[edit]

use List::Util qw(min max);
 
sub jaro {
my ($s, $t) = @_;
 
my $s_len = length($s);
my $t_len = length($t);
 
return 1 if $s_len == 0 and $t_len == 0;
 
my $match_distance = int(max($s_len, $t_len) / 2) - 1;
 
my @s_matches;
my @t_matches;
 
my @s = split(//, $s);
my @t = split(//, $t);
 
my $matches = 0;
foreach my $i (0 .. $#s) {
 
my $start = max(0, $i - $match_distance);
my $end = min($i + $match_distance + 1, $t_len);
 
foreach my $j ($start .. $end - 1) {
$t_matches[$j] and next;
$s[$i] eq $t[$j] or next;
$s_matches[$i] = 1;
$t_matches[$j] = 1;
$matches++;
last;
}
}
 
return 0 if $matches == 0;
 
my $k = 0;
my $transpositions = 0;
 
foreach my $i (0 .. $#s) {
$s_matches[$i] or next;
until ($t_matches[$k]) { ++$k }
$s[$i] eq $t[$k] or ++$transpositions;
++$k;
}
 
(($matches / $s_len) + ($matches / $t_len) +
(($matches - $transpositions / 2) / $matches)) / 3;
}
 
printf("%f\n", jaro("MARTHA", "MARHTA"));
printf("%f\n", jaro("DIXON", "DICKSONX"));
printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"));
Output:
0.944444
0.766667
0.896296

Perl 6[edit]

Translation of: Perl
sub jaro ($s, $t) {
 
return 1 if $s eq $t;
 
my $s_len = + my @s = $s.comb;
my $t_len = + my @t = $t.comb;
 
my $match_distance = ($s_len max $t_len) div 2 - 1;
 
my @s_matches;
my @t_matches;
my $matches = 0;
 
for ^@s -> $i {
 
my $start = 0 max $i - $match_distance;
my $end = $i + $match_distance min $t_len;
 
for $start .. $end -> $j {
@t_matches[$j] and next;
@s[$i] eq @t[$j] or next;
@s_matches[$i] = 1;
@t_matches[$j] = 1;
$matches++;
last;
}
}
 
return 0 if $matches == 0;
 
my $k = 0;
my $transpositions = 0;
 
for ^@s -> $i {
@s_matches[$i] or next;
until @t_matches[$k] { ++$k }
@s[$i] eq @t[$k] or ++$transpositions;
++$k;
}
 
($matches / $s_len + $matches / $t_len +
(($matches - $transpositions / 2) / $matches)) / 3;
}
 
printf("%f\n", jaro("MARTHA", "MARHTA"));
printf("%f\n", jaro("DIXON", "DICKSONX"));
printf("%f\n", jaro("JELLYFISH", "SMELLYFISH"));
Output:
0.944444
0.766667
0.896296

Python[edit]

from __future__ import division
 
def jaro(s, t):
s_len = len(s)
t_len = len(t)
 
if s_len == 0 and t_len == 0:
return 1
 
match_distance = (max(s_len, t_len) // 2) - 1
 
s_matches = [False] * s_len
t_matches = [False] * t_len
 
matches = 0
transpositions = 0
 
for i in range(s_len):
start = max(0, i-match_distance)
end = min(i+match_distance+1, t_len)
 
for j in range(start, end):
if t_matches[j]:
continue
if s[i] != t[j]:
continue
s_matches[i] = True
t_matches[j] = True
matches += 1
break
 
if matches == 0:
return 0
 
k = 0
for i in range(s_len):
if not s_matches[i]:
continue
while not t_matches[k]:
k += 1
if s[i] != t[k]:
transpositions += 1
k += 1
 
return ((matches / s_len) +
(matches / t_len) +
((matches - transpositions/2) / matches)) / 3
 
for s,t in [( 'MARTHA', 'MARHTA'),
( 'DIXON', 'DICKSONX'),
('JELLYFISH', 'SMELLYFISH')]:
print("jaro(%r, %r) = %.10f" % (s, t, jaro(s, t)))
Output:
jaro('MARTHA', 'MARHTA') = 0.9444444444
jaro('DIXON', 'DICKSONX') = 0.7666666667
jaro('JELLYFISH', 'SMELLYFISH') = 0.8962962963

Racket[edit]

Translation of: C

(kinda)

Returns an exact value for the Jaro distance.

#lang racket/base
;; {{trans|C}}
(require data/bit-vector)
 
(define (jaro-distance str1 str2)
(define str1-len (string-length str1))
(define str2-len (string-length str2))
(cond
[(and (zero? str1-len) (zero? str2-len)) 0]
[(or (zero? str1-len) (zero? str2-len)) 1]
[else
 ;; vectors of bools that signify if that char in the matching string has a match
(define str1-matches (make-bit-vector str1-len))
(define str2-matches (make-bit-vector str2-len))
(define matches
 ;; max distance between two chars to be considered matching
(let ((match-distance (sub1 (quotient (max str1-len str2-len) 2))))
(for/fold ((matches 0))
((i (in-range 0 str1-len))
(c1 (in-string str1)))
(define start (max 0 (- i match-distance)))
(define end (min (+ i match-distance 1) str2-len))
(for/fold ((matches matches))
((k (in-range start end))
(c2 (in-string str2 start))
#:unless (bit-vector-ref str2-matches k) ; if str2 already has a match continue
#:when (char=? c1 c2) ; if str1 and str2 are not
#:final #t)
 ;; otherwise assume there is a match
(bit-vector-set! str1-matches i #t)
(bit-vector-set! str2-matches k #t)
(add1 matches)))))
(cond
[(zero? matches) 0]
[else
(define-values (transpositions*2 k+)
(for/fold ((transpositions 0) (k 0))
((i (in-range 0 str1-len))
(c1 (in-string str1))
(b1 (in-bit-vector str1-matches))
 ;; if there are no matches in str1 continue
#:when b1)
(define k+ (for/first ((k+ (in-range k str2-len))
(b2 (in-bit-vector str2-matches k))
#:when b2)
k+))
(values
(+ transpositions (if (char=? c1 (string-ref str2 k+)) 0 1)) ; increment transpositions
(add1 k+)))) ;; while there is no match in str2 increment k
 
 ;; divide the number of transpositions by two as per the algorithm specs
 ;; this division is valid because the counted transpositions include both
 ;; instances of the transposed characters.
(define transpositions (quotient transpositions*2 2))
 
 ;; return the Jaro distance
(/ (+ (/ matches str1-len)
(/ matches str2-len)
(/ (- matches transpositions) matches))
3)])]))
 
(module+ test
(jaro-distance "MARTHA" "MARHTA"); 0.944444
(exact->inexact (jaro-distance "MARTHA" "MARHTA")); 0.944444
(jaro-distance "DIXON" "DICKSONX"); 0.766667
(exact->inexact (jaro-distance "DIXON" "DICKSONX")); 0.766667
(jaro-distance "JELLYFISH" "SMELLYFISH"); 0.896296
(exact->inexact (jaro-distance "JELLYFISH" "SMELLYFISH"))); 0.896296
Output:

The exact->inexact calls in the tests give an inexact, floating point version of the rational values.

17/18
0.9444444444444444
23/30
0.7666666666666667
121/135
0.8962962962962963

REXX[edit]

/*REXX program computes the  Jaro distance  between two strings  (or a list of strings).*/
@.= /*define a default for the @. array. */
parse arg @.1 /*obtain an optional character string. */
if @.1='' then do /*nothing specified? Use the defaults.*/
@.1= 'MARTHA MARHTA'
@.2= 'DIXON DICKSONX'
@.3= 'JELLYFISH SMELLYFISH'
@.4= 'DWAYNE DUANE'
end
 
do j=1 while @.j\=='' /*process all the strings in the list. */
d=jaroDist(@.j)
say 'Jaro distance is ' format(d, , 5) " for strings: " @.j
end /*j*/ /* └──── digits past the decimal point.*/
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
jaroDist: procedure; arg s.1 s.2 .; L1=length(s.1); L2=length(s.2); m=0
if L1==0 | L2==0 then return 0 /*check if any string is a null string.*/
f=max(L1,L2)%2 - 1 /*calculate furthest distanced allowed.*/
r.=0 /* [↓] see if the char is near enough.*/
do k=1 for L1; p=pos(substr(s.1, k, 1), s.2, max(1, k-f)); r.k=p
if p\==0 & abs(p-k)<=f then m=m+1 /*if near enough, count it as a match. */
else r.k=0 /* ··· otherwise, don't count it.*/
end /*k*/
t=0
do o=1 for L1; om=o-1
if pos( substr(s.1, o, 1), s.2)==0 | r.o==0 | r.om==0 then iterate
if r.o<r.om then t=t+1
end /*o*/ /* [↑] count number of transpositions.*/
 
if m==0 then return 0
return (m/L1 + m/L2 + (m-t)/m) / 3

output   when using the default inputs:

Jaro distance is   0.94444  for strings:  MARTHA    MARHTA
Jaro distance is   0.76667  for strings:  DIXON     DICKSONX
Jaro distance is   0.89630  for strings:  JELLYFISH SMELLYFISH
Jaro distance is   0.82222  for strings:  DWAYNE    DUANE

Ruby[edit]

def jaro(s, t)
return 1.0 if s == t
 
s_len = s.size
t_len = t.size
match_distance = ([s_len, t_len].max / 2) - 1
 
s_matches = []
t_matches = []
matches = 0.0
 
s_len.times do |i|
j_start = [0, i-match_distance].max
j_end = [i+match_distance, t_len-1].min
 
(j_start..j_end).each do |j|
t_matches[j] && next
s[i] == t[j] || next
s_matches[i] = true
t_matches[j] = true
matches += 1.0
break
end
end
 
return 0.0 if matches == 0.0
 
k = 0
transpositions = 0.0
s_len.times do |i|
s_matches[i] || next
k += 1 until t_matches[k]
s[i] == t[k] || (transpositions += 1.0)
k += 1
end
 
((matches / s_len) +
(matches / t_len) +
((matches - transpositions/2.0) / matches)) / 3.0
end
 
%w(
MARTHA MARHTA
DIXON DICKSONX
JELLYFISH SMELLYFISH
).each_slice(2) do |s,t|
puts "jaro(#{s.inspect}, #{t.inspect}) = #{'%.10f' % jaro(s, t)}"
end
Output:
jaro("MARTHA", "MARHTA") = 0.9444444444
jaro("DIXON", "DICKSONX") = 0.7666666667
jaro("JELLYFISH", "SMELLYFISH") = 0.8962962963

Rust[edit]

Translation of: C++
use std::cmp;
 
pub fn jaro(s1: &str, s2: &str) -> f64 {
let s1_len = s1.len();
let s2_len = s2.len();
if s1_len == 0 && s2_len == 0 { return 1.0; }
let match_distance = cmp::max(s1_len, s2_len) / 2 - 1;
let mut s1_matches = vec![false; s1_len];
let mut s2_matches = vec![false; s2_len];
let mut m: isize = 0;
for i in 0..s1_len {
let start = cmp::max(0, i as isize - match_distance as isize) as usize;
let end = cmp::min(i + match_distance + 1, s2_len);
for j in start..end {
if !s2_matches[j] && s1.as_bytes()[i] == s2.as_bytes()[j] {
s1_matches[i] = true;
s2_matches[j] = true;
m += 1;
break;
}
}
}
if m == 0 { return 0.0; }
let mut t = 0.0;
let mut k = 0;
for i in 0..s1_len {
if s1_matches[i] {
while !s2_matches[k] { k += 1; }
if s1.as_bytes()[i] != s2.as_bytes()[k] { t += 0.5; }
k += 1;
}
}
 
let m = m as f64;
(m / s1_len as f64 + m / s2_len as f64 + (m - t) / m) / 3.0
}
 
fn main() {
let pairs = [("MARTHA", "MARHTA"), ("DIXON", "DICKSONX"), ("JELLYFISH", "SMELLYFISH")];
for p in pairs.iter() { println!("{}/{} = {}", p.0, p.1, jaro(p.0, p.1)); }
}
Output:
MARTHA/MARHTA = 0.9444444444444445
DIXON/DICKSONX = 0.7666666666666666
JELLYFISH/SMELLYFISH = 0.8962962962962964

Scala[edit]

Translation of: Java
object Jaro extends App {
 
def distance(s1: String, s2: String): Double = {
val s1_len = s1.length
val s2_len = s2.length
if (s1_len == 0 && s2_len == 0) return 1.0
val match_distance = Math.max(s1_len, s2_len) / 2 - 1
val s1_matches = Array.ofDim[Boolean](s1_len)
val s2_matches = Array.ofDim[Boolean](s2_len)
var matches = 0
for (i <- 0 until s1_len) {
val start = Math.max(0, i - match_distance)
val end = Math.min(i + match_distance + 1, s2_len)
start until end find { j => !s2_matches(j) && s1(i) == s2(j) } match {
case Some(j) =>
s1_matches(i) = true
s2_matches(j) = true
matches += 1
case None =>
}
}
if (matches == 0) return 0.0
var t = 0.0
var k = 0
0 until s1_len filter s1_matches foreach { i =>
while (!s2_matches(k)) k += 1
if (s1(i) != s2(k)) t += 0.5
k += 1
}
 
val m = matches.toDouble
(m / s1_len + m / s2_len + (m - t) / m) / 3.0
}
 
val strings = List(("MARTHA", "MARHTA"), ("DIXON", "DICKSONX"), ("JELLYFISH", "SMELLYFISH"))
strings.foreach { s => println(distance(s._1, s._2)) }
}

Sidef[edit]

func jaro(s, t) {
 
return 1 if (s == t)
 
var s_len = s.len
var t_len = t.len
 
var match_distance = ((::max(s_len, t_len) // 2) - 1)
 
var s_matches = []
var t_matches = []
 
var matches = 0
var transpositions = 0
 
for i in ^s_len {
var start = ::max(0, i-match_distance)
var end = ::min(i+match_distance, t_len-1)
 
for k in (start .. end) {
t_matches[k] && next
s[i] == t[k] || next
s_matches[i] = true
t_matches[k] = true
matches++
break
}
}
 
return 0 if (matches == 0)
 
var k = 0
for i in ^s_len {
s_matches[i] || next
while (!t_matches[k]) { ++k }
s[i] == t[k] || ++transpositions
++k
}
 
((matches / s_len) +
(matches / t_len) +
((matches - transpositions/2) / matches)) / 3
}
 
for pair in [
[%c"MARTHA", %c"MARHTA"],
[%c"DIXON", %c"DICKSONX"],
[%c"JELLYFISH", %c"SMELLYFISH"],
] {
say "jaro(#{pair.map{.join.dump}.join(', ')}) = #{'%.10f' % jaro(pair...)}"
}
Output:
jaro("MARTHA", "MARHTA") = 0.9444444444
jaro("DIXON", "DICKSONX") = 0.7666666667
jaro("JELLYFISH", "SMELLYFISH") = 0.8962962963

zkl[edit]

   //-->String of matched characters, ordered
fcn _jaro(str1,str2, matchDistance){
cs:=Sink(String);
foreach i,c in ([0..].zip(str1)){
str2.find(c,(0).max(i - matchDistance),i + matchDistance) :
if(Void!=_) cs.write(c);
}
cs.close()
}
 
fcn jaro(str1,str2){
s1Len,s2Len,matchDistance := str1.len(), str2.len(), s1Len.max(s2Len)/2 - 1;
cs12,cs21 := _jaro(str1,str2, matchDistance), _jaro(str2,str1, matchDistance);
 
matches:=cs12.len().toFloat();
if(not matches) return(0.0);
transpositions:=cs12.walker().zipWith('!=,cs21).filter().sum(0)/2;
 
( matches/s1Len + matches/s2Len +
((matches - transpositions)/matches) ) / 3.0
}
foreach s,t in (T( 
T("MARTHA","MARHTA"), T("DIXON","DICKSONX"), T("JELLYFISH","SMELLYFISH"))){
println(0'|jaro("%s","%s") = %.10f|.fmt(s,t,jaro(s,t)));
}
Output:
jaro("MARTHA","MARHTA") = 0.9444444444
jaro("DIXON","DICKSONX") = 0.7666666667
jaro("JELLYFISH","SMELLYFISH") = 0.8962962963