Jaro-Winkler distance: Difference between revisions
Content added Content deleted
Alextretyak (talk | contribs) (Added 11l) |
|||
Line 1,088: | Line 1,088: | ||
wick | 0.11664 |
wick | 0.11664 |
||
</pre> |
</pre> |
||
=={{header|Nim}}== |
|||
{{trans|Go}} |
|||
<lang Nim>import lenientops |
|||
func jaroSim(s1, s2: string): float = |
|||
if s1.len == 0 and s2.len == 0: return 1 |
|||
if s1.len == 0 or s2.len == 0: return 0 |
|||
let matchDistance = max(s1.len, s2.len) div 2 - 1 |
|||
var s1Matches = newSeq[bool](s1.len) |
|||
var s2Matches = newSeq[bool](s2.len) |
|||
var matches = 0 |
|||
for i in 0..s1.high: |
|||
for j in max(0, i - matchDistance)..min(i + matchDistance, s2.high): |
|||
if not s2Matches[j] and s1[i] == s2[j]: |
|||
s1Matches[i] = true |
|||
s2Matches[j] = true |
|||
inc matches |
|||
break |
|||
if matches == 0: return 0 |
|||
var transpositions = 0.0 |
|||
var k = 0 |
|||
for i in ..s1.high: |
|||
if not s1Matches[i]: continue |
|||
while not s2Matches[k]: inc k |
|||
if s1[i] != s2[k]: transpositions += 0.5 |
|||
inc k |
|||
result = (matches / s1.len + matches / s2.len + (matches - transpositions) / matches) / 3 |
|||
func jaroWinklerDist(s, t: string): float = |
|||
let ls = s.len |
|||
let lt = t.len |
|||
var lmax = if ls < lt: ls else: lt |
|||
if lmax > 4: lmax = 4 |
|||
var l = 0 |
|||
for i in 0..<lmax: |
|||
if s[i] == t[i]: inc l |
|||
let js = jaroSim(s, t) |
|||
let p = 0.1 |
|||
let ws = js + float(l) * p * (1 - js) |
|||
result = 1 - ws |
|||
when isMainModule: |
|||
import algorithm, sequtils, strformat |
|||
type Wd = tuple[word: string; dist: float] |
|||
func `<`(w1, w2: Wd): bool = |
|||
if w1.dist < w2.dist: true |
|||
elif w1.dist == w2.dist: w1.word < w2.word |
|||
else: false |
|||
const Misspelt = ["accomodate", "definately", "goverment", "occured", |
|||
"publically", "recieve", "seperate", "untill", "wich"] |
|||
let words = toSeq("unixdict.txt".lines) |
|||
for ms in Misspelt: |
|||
var closest: seq[Wd] |
|||
for word in words: |
|||
if word.len == 0: continue |
|||
let jwd = jaroWinklerDist(ms, word) |
|||
if jwd < 0.15: |
|||
closest.add (word, jwd) |
|||
echo "Misspelt word: ", ms, ":" |
|||
closest.sort() |
|||
for i, c in closest: |
|||
echo &"{c.dist:0.4f} {c.word}" |
|||
if i == 5: break |
|||
echo()</lang> |
|||
{{out}} |
|||
<pre>Misspelt word: accomodate: |
|||
0.0182 accommodate |
|||
0.1044 accordant |
|||
0.1136 accolade |
|||
0.1219 acclimate |
|||
0.1327 accompanist |
|||
0.1333 accord |
|||
Misspelt word: definately: |
|||
0.0800 define |
|||
0.0850 definite |
|||
0.0886 defiant |
|||
0.1200 definitive |
|||
0.1219 designate |
|||
0.1267 deflate |
|||
Misspelt word: goverment: |
|||
0.0667 govern |
|||
0.1167 governor |
|||
0.1175 governess |
|||
0.1330 governance |
|||
0.1361 coverlet |
|||
0.1367 sovereignty |
|||
Misspelt word: occured: |
|||
0.0250 occurred |
|||
0.0571 occur |
|||
0.0952 occurrent |
|||
0.1056 occlude |
|||
0.1217 concurred |
|||
0.1429 cure |
|||
Misspelt word: publically: |
|||
0.0800 public |
|||
0.1327 publication |
|||
0.1400 pull |
|||
0.1492 pullback |
|||
Misspelt word: recieve: |
|||
0.0333 receive |
|||
0.0667 relieve |
|||
0.0762 reeve |
|||
0.0852 receptive |
|||
0.0852 recessive |
|||
0.0905 recife |
|||
Misspelt word: seperate: |
|||
0.0708 desperate |
|||
0.0917 separate |
|||
0.1042 temperate |
|||
0.1167 selenate |
|||
0.1167 sept |
|||
0.1167 sewerage |
|||
Misspelt word: untill: |
|||
0.0333 until |
|||
0.1111 till |
|||
0.1333 huntsville |
|||
0.1357 instill |
|||
0.1422 unital |
|||
Misspelt word: wich: |
|||
0.0533 winch |
|||
0.0533 witch |
|||
0.0600 which |
|||
0.0857 wichita |
|||
0.1111 switch |
|||
0.1111 twitch</pre> |
|||
=={{header|Perl}}== |
=={{header|Perl}}== |