Jaro-Winkler distance: Difference between revisions

Added Swift solution
m (Removed duplicate header)
(Added Swift solution)
Line 791:
Err(error) => eprintln!("{}", error),
}
}</lang>
 
{{out}}
<pre>
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'accomodate' are:
Word | Distance
accommodate | 0.0182
accommodated | 0.0333
accommodates | 0.0333
accommodating | 0.0815
accommodation | 0.0815
 
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'definately' are:
Word | Distance
definitely | 0.0400
defiantly | 0.0422
define | 0.0800
definite | 0.0850
definable | 0.0872
 
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'goverment' are:
Word | Distance
government | 0.0533
govern | 0.0667
governments | 0.0697
movement | 0.0810
governmental | 0.0833
 
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'occured' are:
Word | Distance
occurred | 0.0250
occur | 0.0571
occupied | 0.0786
occurs | 0.0905
accursed | 0.0917
 
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'publically' are:
Word | Distance
publicly | 0.0400
public | 0.0800
publicity | 0.1044
publication | 0.1327
biblically | 0.1400
 
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'recieve' are:
Word | Distance
receive | 0.0333
received | 0.0625
receiver | 0.0625
receives | 0.0625
relieve | 0.0667
 
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'seperate' are:
Word | Distance
desperate | 0.0708
separate | 0.0917
temperate | 0.1042
separated | 0.1144
separates | 0.1144
 
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'untill' are:
Word | Distance
until | 0.0333
untie | 0.1067
untimely | 0.1083
till | 0.1111
Antilles | 0.1264
 
Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to 'wich' are:
Word | Distance
witch | 0.0533
which | 0.0600
switch | 0.1111
twitch | 0.1111
witches | 0.1143
 
</pre>
 
=={{header|Swift}}==
{{trans|Rust}}
<lang swift>import Foundation
 
func loadDictionary(_ path: String) throws -> [String] {
let contents = try String(contentsOfFile: path, encoding: String.Encoding.ascii)
return contents.components(separatedBy: "\n")
}
 
func jaroWinklerDistance(string1: String, string2: String) -> Double {
var st1 = Array(string1)
var st2 = Array(string2)
var len1 = st1.count
var len2 = st2.count
if len1 < len2 {
swap(&st1, &st2)
swap(&len1, &len2)
}
if len2 == 0 {
return len1 == 0 ? 0.0 : 1.0
}
let delta = max(1, len1 / 2) - 1
var flag = Array(repeating: false, count: len2)
var ch1Match: [Character] = []
ch1Match.reserveCapacity(len1)
for idx1 in 0..<len1 {
let ch1 = st1[idx1]
for idx2 in 0..<len2 {
let ch2 = st2[idx2]
if idx2 <= idx1 + delta && idx2 + delta >= idx1 && ch1 == ch2 && !flag[idx2] {
flag[idx2] = true
ch1Match.append(ch1)
break
}
}
}
let matches = ch1Match.count
if matches == 0 {
return 1.0
}
var transpositions = 0
var idx1 = 0
for idx2 in 0..<len2 {
if flag[idx2] {
if st2[idx2] != ch1Match[idx1] {
transpositions += 1
}
idx1 += 1
}
}
let m = Double(matches)
let jaro =
(m / Double(len1) + m / Double(len2) + (m - Double(transpositions) / 2.0) / m) / 3.0
var commonPrefix = 0
for i in 0..<min(4, len2) {
if st1[i] == st2[i] {
commonPrefix += 1
}
}
return 1.0 - (jaro + Double(commonPrefix) * 0.1 * (1.0 - jaro))
}
 
func withinDistance(words: [String], maxDistance: Double, string: String,
maxToReturn: Int) -> [(String, Double)] {
var arr = Array(words.map{($0, jaroWinklerDistance(string1: string, string2: $0))}
.filter{$0.1 <= maxDistance})
arr.sort(by: { x, y in return x.1 < y.1 })
return Array(arr[0..<min(maxToReturn, arr.count)])
}
 
func pad(string: String, width: Int) -> String {
if string.count >= width {
return string
}
return String(repeating: " ", count: width - string.count) + string
}
 
do {
let dict = try loadDictionary("linuxwords.txt")
for word in ["accomodate", "definately", "goverment", "occured",
"publically", "recieve", "seperate", "untill", "wich"] {
print("Close dictionary words (distance < 0.15 using Jaro-Winkler distance) to '\(word)' are:")
print(" Word | Distance")
for (w, dist) in withinDistance(words: dict, maxDistance: 0.15,
string: word, maxToReturn: 5) {
print("\(pad(string: w, width: 14)) | \(String(format: "%6.4f", dist))")
}
print()
}
} catch {
print(error.localizedDescription)
}</lang>
 
1,777

edits