Jaro-Winkler distance: Difference between revisions

m
syntax highlighting fixup automation
m (changed matches == 0 to !matches)
m (syntax highlighting fixup automation)
Line 76:
{{trans|Python}}
 
<langsyntaxhighlight lang="11l">V WORDS = File(‘linuxwords.txt’).read_lines()
V MISSPELLINGS = [‘accomodate’,
‘definately’,
Line 123:
print("\nClose dictionary words ( distance < 0.15 using Jaro-Winkler distance) to \" "STR" \" are:\n Word | Distance")
L(w) within_distance(0.15, STR, 5)
print(‘#14 | #.4’.format(w, jaro_winkler_distance(STR, w)))</langsyntaxhighlight>
 
{{out}}
Line 155:
=={{header|Elm}}==
Author: zh5
<langsyntaxhighlight Elmlang="elm">module JaroWinkler exposing (similarity)
 
 
Line 317:
else
result
</syntaxhighlight>
</lang>
 
=={{header|ALGOL 68}}==
Line 326:
<br>
Prints the 6 closest matches regarddless of their distance (i.e. we don't restrict it to matches closer that 0.15).
<langsyntaxhighlight lang="algol68">PROC jaro sim = ( STRING sp1, sp2 )REAL:
IF STRING s1 = sp1[ AT 0 ];
STRING s2 = sp2[ AT 0 ];
Line 457:
print( ( newline ) )
OD
FI</langsyntaxhighlight>
{{out}}
<pre>
Line 535:
=={{header|C++}}==
{{trans|Swift}}
<langsyntaxhighlight lang="cpp">#include <algorithm>
#include <cstdlib>
#include <fstream>
Line 638:
}
return EXIT_SUCCESS;
}</langsyntaxhighlight>
 
{{out}}
Line 718:
=={{header|F_Sharp|F#}}==
This task uses [http://www.rosettacode.org/wiki/Jaro_distance#F.23 Jaro Distance (F#)]
<langsyntaxhighlight lang="fsharp">
// Calculate Jaro-Winkler Similarity of 2 Strings. Nigel Galloway: August 7th., 2020
let Jw P n g=let L=float(let i=Seq.map2(fun n g->n=g) n g in (if Seq.length i>4 then i|>Seq.take 4 else i)|>Seq.takeWhile id|>Seq.length)
Line 727:
["accomodate";"definately";"goverment";"occured";"publically";"recieve";"seperate";"untill";"wich"]|>
List.iter(fun n->printfn "%s" n;fN n|>Array.take 5|>Array.iter(fun n->printf "%A" n);printfn "\n")
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 760:
=={{header|Go}}==
This uses unixdict and borrows code from the [[Jaro_distance#Go]] task. Otherwise it is a translation of the Wren entry.
<langsyntaxhighlight lang="go">package main
 
import (
Line 889:
fmt.Println()
}
}</langsyntaxhighlight>
 
{{out}}
Line 967:
Implementation:
 
<langsyntaxhighlight Jlang="j">jaro=: {{
Eq=. (x=/y)*(<.<:-:x>.&#y)>:|x -/&i.&# y
xM=. (+./"1 Eq)#x
Line 981:
simj=. x jaro y
-.simj + l*p*-.simj
}}</langsyntaxhighlight>
 
Task example:
 
<langsyntaxhighlight Jlang="j">task=: {{
words=. <;._2 fread '/usr/share/dict/words'
for_word. ;:'accomodate definately goverment occured publically recieve seperate untill wich' do.
Line 1,040:
0.0533333 winch
0.0533333 witch
</syntaxhighlight>
</lang>
=={{header|Java}}==
{{trans|C++}}
<langsyntaxhighlight lang="java">import java.io.*;
import java.util.*;
 
Line 1,152:
}
}
}</langsyntaxhighlight>
 
{{out}}
Line 1,236:
This entry, which uses unixdict.txt, borrows the implementation in jq of the Jaro similarity measure as defined at
[[Jaro_similarity#jq]]; since it is quite long, it is not repeated here.
<langsyntaxhighlight lang="jq"># See [[Jaro_similarity#jq]] for the implementation of jaro/2
 
def length_of_common_prefix($s1; $s2):
Line 1,267:
(.[] | "\(.[0] | lpad(21)) : \(.[-1] * 1000 | round / 1000)") ;
 
task</langsyntaxhighlight>
{{out}}
Invocation: jq -rRn -f program.jq unixdict.txt
Line 1,322:
 
=={{header|Julia}}==
<langsyntaxhighlight lang="julia"># download("http://users.cs.duke.edu/~ola/ap/linuxwords", "linuxwords.txt")
const words = read("linuxwords.txt", String) |> split .|> strip
 
Line 1,372:
end
end
</langsyntaxhighlight>{{out}}
<pre>
Close dictionary words ( distance < 0.15 using Jaro-Winkler distance) to 'accomodate' are:
Line 1,448:
 
=={{header|Mathematica}}/{{header|Wolfram Language}}==
<langsyntaxhighlight Mathematicalang="mathematica">ClearAll[JWD]
JWD[a_][b_]:=Experimental`JaroWinklerDistance[a,b]
dict=DictionaryLookup[];
Line 1,459:
TakeSmallestBy[dict->{"Element","Value"},JWD["seperate"],5]//Grid
TakeSmallestBy[dict->{"Element","Value"},JWD["untill"],5]//Grid
TakeSmallestBy[dict->{"Element","Value"},JWD["wich"],5]//Grid</langsyntaxhighlight>
{{out}}
<pre>accommodate 0.0181818
Line 1,517:
=={{header|Nim}}==
{{trans|Go}}
<langsyntaxhighlight Nimlang="nim">import lenientops
 
func jaroSim(s1, s2: string): float =
Line 1,589:
echo &"{c.dist:0.4f} {c.word}"
if i == 5: break
echo()</langsyntaxhighlight>
 
{{out}}
Line 1,662:
 
=={{header|Perl}}==
<langsyntaxhighlight lang="perl">use strict;
use warnings;
use List::Util qw(min max head);
Line 1,714:
printf "%15s : %0.4f\n", $_, $J{$_}
for head 5, sort { $J{$a} <=> $J{$b} or $a cmp $b } grep { $J{$_} < 0.15 } keys %J;
}</langsyntaxhighlight>
{{out}}
<pre style="height:40ex">Closest 5 dictionary words with a Jaro-Winkler distance < .15 from 'accomodate':
Line 1,781:
=={{header|Phix}}==
Uses jaro() from [[Jaro_distance#Phix]] (reproduced below for your convenience) and the standard unix_dict()
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #008080;">function</span> <span style="color: #000000;">jaro</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">str1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">str2</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">str1</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">upper</span><span style="color: #0000FF;">(</span><span style="color: #000000;">str1</span><span style="color: #0000FF;">))</span>
Line 1,858:
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<!--</langsyntaxhighlight>-->
Output identical to <del>Go/Wren</del> Algol68
 
=={{header|Python}}==
<langsyntaxhighlight lang="python">"""
Test Jaro-Winkler distance metric.
linuxwords.txt is from http://users.cs.duke.edu/~ola/ap/linuxwords
Line 1,928:
for w in within_distance(0.15, STR, 5):
print('{:>14} | {:6.4f}'.format(w, jaro_winkler_distance(STR, w)))
</langsyntaxhighlight>{{out}}
<pre>
Close dictionary words ( distance < 0.15 using Jaro-Winkler distance) to " accomodate " are:
Line 2,009:
using the unixdict.txt file from www.puzzlers.org
 
<syntaxhighlight lang="raku" perl6line>sub jaro-winkler ($s, $t) {
 
return 0 if $s eq $t;
Line 2,070:
 
printf "%15s : %0.4f\n", .key, .value for %result.grep({ .value < .15 }).sort({+.value, ~.key}).head(5);
}</langsyntaxhighlight>
{{out}}
<pre>Closest 5 dictionary words with a Jaro-Winkler distance < .15 from accomodate:
Line 2,136:
=={{header|Rust}}==
{{trans|Python}}
<langsyntaxhighlight lang="rust">use std::fs::File;
use std::io::{self, BufRead};
 
Line 2,246:
Err(error) => eprintln!("{}", error),
}
}</langsyntaxhighlight>
 
{{out}}
Line 2,326:
=={{header|Swift}}==
{{trans|Rust}}
<langsyntaxhighlight lang="swift">import Foundation
 
func loadDictionary(_ path: String) throws -> [String] {
Line 2,415:
} catch {
print(error.localizedDescription)
}</langsyntaxhighlight>
 
{{out}}
Line 2,495:
=={{header|Typescript}}==
{{trans|Java}}
<langsyntaxhighlight lang="typescript">
var fs = require('fs')
 
Line 2,620:
}
main();
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 2,689:
=={{header|Vlang}}==
{{trans|Go}}
<langsyntaxhighlight lang="vlang">import os
 
fn jaro_sim(str1 string, str2 string) f64 {
Line 2,805:
println('')
}
}</langsyntaxhighlight>
 
{{out}}
Line 2,883:
{{libheader|Wren-sort}}
This uses unixdict and borrows code from the [[Jaro_distance#Wren]] task.
<langsyntaxhighlight lang="ecmascript">import "io" for File
import "/fmt" for Fmt
import "/sort" for Sort
Line 2,955:
for (c in closest.take(6)) Fmt.print("$0.4f $s", c[1], c[0])
System.print()
}</langsyntaxhighlight>
 
{{out}}
10,327

edits