Jaro-Winkler distance: Difference between revisions

Content added Content deleted
(Added 11l)
Line 1,088: Line 1,088:
wick | 0.11664
wick | 0.11664
</pre>
</pre>

=={{header|Nim}}==
{{trans|Go}}
<lang Nim>import lenientops

func jaroSim(s1, s2: string): float =

if s1.len == 0 and s2.len == 0: return 1
if s1.len == 0 or s2.len == 0: return 0

let matchDistance = max(s1.len, s2.len) div 2 - 1
var s1Matches = newSeq[bool](s1.len)
var s2Matches = newSeq[bool](s2.len)
var matches = 0
for i in 0..s1.high:
for j in max(0, i - matchDistance)..min(i + matchDistance, s2.high):
if not s2Matches[j] and s1[i] == s2[j]:
s1Matches[i] = true
s2Matches[j] = true
inc matches
break
if matches == 0: return 0

var transpositions = 0.0
var k = 0
for i in ..s1.high:
if not s1Matches[i]: continue
while not s2Matches[k]: inc k
if s1[i] != s2[k]: transpositions += 0.5
inc k

result = (matches / s1.len + matches / s2.len + (matches - transpositions) / matches) / 3


func jaroWinklerDist(s, t: string): float =
let ls = s.len
let lt = t.len
var lmax = if ls < lt: ls else: lt
if lmax > 4: lmax = 4
var l = 0
for i in 0..<lmax:
if s[i] == t[i]: inc l
let js = jaroSim(s, t)
let p = 0.1
let ws = js + float(l) * p * (1 - js)
result = 1 - ws


when isMainModule:

import algorithm, sequtils, strformat

type Wd = tuple[word: string; dist: float]

func `<`(w1, w2: Wd): bool =
if w1.dist < w2.dist: true
elif w1.dist == w2.dist: w1.word < w2.word
else: false

const Misspelt = ["accomodate", "definately", "goverment", "occured",
"publically", "recieve", "seperate", "untill", "wich"]

let words = toSeq("unixdict.txt".lines)
for ms in Misspelt:
var closest: seq[Wd]
for word in words:
if word.len == 0: continue
let jwd = jaroWinklerDist(ms, word)
if jwd < 0.15:
closest.add (word, jwd)
echo "Misspelt word: ", ms, ":"
closest.sort()
for i, c in closest:
echo &"{c.dist:0.4f} {c.word}"
if i == 5: break
echo()</lang>

{{out}}
<pre>Misspelt word: accomodate:
0.0182 accommodate
0.1044 accordant
0.1136 accolade
0.1219 acclimate
0.1327 accompanist
0.1333 accord

Misspelt word: definately:
0.0800 define
0.0850 definite
0.0886 defiant
0.1200 definitive
0.1219 designate
0.1267 deflate

Misspelt word: goverment:
0.0667 govern
0.1167 governor
0.1175 governess
0.1330 governance
0.1361 coverlet
0.1367 sovereignty

Misspelt word: occured:
0.0250 occurred
0.0571 occur
0.0952 occurrent
0.1056 occlude
0.1217 concurred
0.1429 cure

Misspelt word: publically:
0.0800 public
0.1327 publication
0.1400 pull
0.1492 pullback

Misspelt word: recieve:
0.0333 receive
0.0667 relieve
0.0762 reeve
0.0852 receptive
0.0852 recessive
0.0905 recife

Misspelt word: seperate:
0.0708 desperate
0.0917 separate
0.1042 temperate
0.1167 selenate
0.1167 sept
0.1167 sewerage

Misspelt word: untill:
0.0333 until
0.1111 till
0.1333 huntsville
0.1357 instill
0.1422 unital

Misspelt word: wich:
0.0533 winch
0.0533 witch
0.0600 which
0.0857 wichita
0.1111 switch
0.1111 twitch</pre>


=={{header|Perl}}==
=={{header|Perl}}==