Jaro-Winkler distance: Difference between revisions

no edit summary
(J)
No edit summary
Line 2,491:
witches | 0.1143
 
</pre>
 
=={{header|Vlang}==
{{trans|go}}
<lang vlang>import os
 
fn jaro_sim(str1 string, str2 string) f64 {
if str1.len == 0 && str2.len == 0 {
return 1
}
if str1.len == 0 || str2.len == 0 {
return 0
}
mut match_distance := str1.len
if str2.len > match_distance {
match_distance = str2.len
}
match_distance = match_distance/2 - 1
mut str1_matches := []bool{len: str1.len}
mut str2_matches := []bool{len: str2.len}
mut matches := 0.0
mut transpositions := 0.0
for i in 0..str1.len {
mut start := i - match_distance
if start < 0 {
start = 0
}
mut end := i + match_distance + 1
if end > str2.len {
end = str2.len
}
for k in start..end {
if str2_matches[k] {
continue
}
if str1[i] != str2[k] {
continue
}
str1_matches[i] = true
str2_matches[k] = true
matches++
break
}
}
if matches == 0 {
return 0
}
mut k := 0
for i in 0.. str1.len {
if !str1_matches[i] {
continue
}
for !str2_matches[k] {
k++
}
if str1[i] != str2[k] {
transpositions++
}
k++
}
transpositions /= 2
return (matches/f64(str1.len) +
matches/f64(str2.len) +
(matches-transpositions)/matches) / 3
}
fn jaro_winkler_dist(s string, t string) f64 {
ls := s.len
lt := t.len
mut lmax := lt
if ls < lt {
lmax = ls
}
if lmax > 4 {
lmax = 4
}
mut l := 0
for i in 0 .. lmax {
if s[i] == t[i] {
l++
}
}
js := jaro_sim(s, t)
p := 0.1
ws := js + f64(l)*p*(1-js)
return 1 - ws
}
struct Wd {
word string
dist f64
}
fn main() {
misspelt := [
"accomodate", "definately", "goverment", "occured", "publically",
"recieve", "seperate", "untill", "wich",
]
words := os.read_lines('unixdict.txt')?
for ms in misspelt {
mut closest := []Wd{}
for word in words {
if word == "" {
continue
}
jwd := jaro_winkler_dist(ms, word)
if jwd < 0.15 {
closest << Wd{word, jwd}
}
}
println("Misspelt word: $ms:")
closest.sort(a.dist<b.dist)
for i, c in closest {
println("${c.dist:.4f} ${c.word}")
if i == 5 {
break
}
}
println('')
}
}</lang>
 
{{out}}
<pre>
Misspelt word: accomodate :
0.0182 accommodate
0.1044 accordant
0.1136 accolade
0.1219 acclimate
0.1327 accompanist
0.1333 accord
 
Misspelt word: definately :
0.0800 define
0.0850 definite
0.0886 defiant
0.1200 definitive
0.1219 designate
0.1267 deflate
 
Misspelt word: goverment :
0.0667 govern
0.1167 governor
0.1175 governess
0.1330 governance
0.1361 coverlet
0.1367 sovereignty
 
Misspelt word: occured :
0.0250 occurred
0.0571 occur
0.0952 occurrent
0.1056 occlude
0.1217 concurred
0.1429 cure
 
Misspelt word: publically :
0.0800 public
0.1327 publication
0.1400 pull
0.1492 pullback
 
Misspelt word: recieve :
0.0333 receive
0.0667 relieve
0.0762 reeve
0.0852 receptive
0.0852 recessive
0.0905 recife
 
Misspelt word: seperate :
0.0708 desperate
0.0917 separate
0.1042 temperate
0.1167 selenate
0.1167 sewerage
0.1167 sept
 
Misspelt word: untill :
0.0333 until
0.1111 till
0.1333 huntsville
0.1357 instill
0.1422 unital
 
Misspelt word: wich :
0.0533 winch
0.0533 witch
0.0600 which
0.0857 wichita
0.1111 switch
0.1111 twitch
</pre>
 
338

edits