Jaro similarity: Difference between revisions
Content added Content deleted
mNo edit summary |
m (→{{header|Raku}}: Raku style prefers identifiers with - instead of _) |
||
Line 2,832: | Line 2,832: | ||
return 1 if $s eq $t; |
return 1 if $s eq $t; |
||
my $ |
my $s-len = + my @s = $s.comb; |
||
my $ |
my $t-len = + my @t = $t.comb; |
||
my $ |
my $match-distance = ($s-len max $t-len) div 2 - 1; |
||
my ($matches, @ |
my ($matches, @s-matches, @t-matches); |
||
for ^@s -> $i { |
for ^@s -> $i { |
||
my $start = 0 max $i - $ |
my $start = 0 max $i - $match-distance; |
||
my $end = $i + $ |
my $end = $i + $match-distance min ($t-len - 1); |
||
for $start .. $end -> $j { |
for $start .. $end -> $j { |
||
next if @ |
next if @t-matches[$j] or @s[$i] ne @t[$j]; |
||
(@ |
(@s-matches[$i], @t-matches[$j]) = (1, 1); |
||
$matches++ and last; |
$matches++ and last; |
||
} |
} |
||
Line 2,851: | Line 2,851: | ||
my ($k, $transpositions) = (0, 0); |
my ($k, $transpositions) = (0, 0); |
||
for ^@s -> $i { |
for ^@s -> $i { |
||
next unless @ |
next unless @s-matches[$i]; |
||
$k++ until @ |
$k++ until @t-matches[$k]; |
||
$transpositions++ if @s[$i] ne @t[$k]; |
$transpositions++ if @s[$i] ne @t[$k]; |
||
$k++; |
$k++; |
||
} |
} |
||
( $matches/$ |
( $matches/$s-len + $matches/$t-len + (($matches - $transpositions/2) / $matches) ) / 3 |
||
} |
} |
||
say jaro(.key, .value).fmt: '%.3f' for |
say jaro(.key, .value).fmt: '%.3f' for |
||
'MARTHA' => 'MARHTA', 'DIXON' => 'DICKSONX', 'JELLYFISH' => 'SMELLYFISH', |
'MARTHA' => 'MARHTA', 'DIXON' => 'DICKSONX', 'JELLYFISH' => 'SMELLYFISH', |
||
'I repeat myself' => 'I repeat myself', '' => ''; |
'I repeat myself' => 'I repeat myself', '' => ''; |
||
</syntaxhighlight> |
|||
{{out}} |
{{out}} |
||
<pre>0.944 |
<pre>0.944 |