Jaro similarity: Difference between revisions

Content added Content deleted
mNo edit summary
m (→‎{{header|Raku}}: Raku style prefers identifiers with - instead of _)
Line 2,832: Line 2,832:
return 1 if $s eq $t;
return 1 if $s eq $t;


my $s_len = + my @s = $s.comb;
my $s-len = + my @s = $s.comb;
my $t_len = + my @t = $t.comb;
my $t-len = + my @t = $t.comb;
my $match_distance = ($s_len max $t_len) div 2 - 1;
my $match-distance = ($s-len max $t-len) div 2 - 1;


my ($matches, @s_matches, @t_matches);
my ($matches, @s-matches, @t-matches);
for ^@s -> $i {
for ^@s -> $i {
my $start = 0 max $i - $match_distance;
my $start = 0 max $i - $match-distance;
my $end = $i + $match_distance min ($t_len - 1);
my $end = $i + $match-distance min ($t-len - 1);


for $start .. $end -> $j {
for $start .. $end -> $j {
next if @t_matches[$j] or @s[$i] ne @t[$j];
next if @t-matches[$j] or @s[$i] ne @t[$j];
(@s_matches[$i], @t_matches[$j]) = (1, 1);
(@s-matches[$i], @t-matches[$j]) = (1, 1);
$matches++ and last;
$matches++ and last;
}
}
Line 2,851: Line 2,851:
my ($k, $transpositions) = (0, 0);
my ($k, $transpositions) = (0, 0);
for ^@s -> $i {
for ^@s -> $i {
next unless @s_matches[$i];
next unless @s-matches[$i];
$k++ until @t_matches[$k];
$k++ until @t-matches[$k];
$transpositions++ if @s[$i] ne @t[$k];
$transpositions++ if @s[$i] ne @t[$k];
$k++;
$k++;
}
}


( $matches/$s_len + $matches/$t_len + (($matches - $transpositions/2) / $matches) ) / 3
( $matches/$s-len + $matches/$t-len + (($matches - $transpositions/2) / $matches) ) / 3
}
}


say jaro(.key, .value).fmt: '%.3f' for
say jaro(.key, .value).fmt: '%.3f' for
'MARTHA' => 'MARHTA', 'DIXON' => 'DICKSONX', 'JELLYFISH' => 'SMELLYFISH',
'MARTHA' => 'MARHTA', 'DIXON' => 'DICKSONX', 'JELLYFISH' => 'SMELLYFISH',
'I repeat myself' => 'I repeat myself', '' => '';</syntaxhighlight>
'I repeat myself' => 'I repeat myself', '' => '';
</syntaxhighlight>
{{out}}
{{out}}
<pre>0.944
<pre>0.944