Boyer-Moore string search: Difference between revisions

Content added Content deleted
m (Automated syntax highlighting fixup (second round - minor fixes))
(Added Perl)
Line 479: Line 479:
(occurrences, num_alignments, num_character_comparisons) = ([33, 87], 20, 42)
(occurrences, num_alignments, num_character_comparisons) = ([33, 87], 20, 42)
</pre>
</pre>
=={{header|Perl}}==
{{trans|Raku}}
<syntaxhighlight lang="perl" line>use v5.36;
use List::AllUtils <min max zip_by>;

sub suffixes ($m,@pat) {
my($f,$g) = (0, $m-1);
my @suff = (0) x $m-1; push @suff, $m;
for (my $i = $m-2; $i >= 0; --$i) {
if ($i > $g and $suff[$i + $m - 1 - $f] < $i - $g) {
$suff[$i] = $suff[$i + $m - 1 - $f]
} else {
$g = $i if $i < $g;
$f = $i;
while ($g >= 0 and $pat[$g] eq $pat[$g + $m - 1 - $f]) { $g-- }
$suff[$i] = $f - $g
}
}
@suff
}

sub preBmGs ($m,@pat) {
my @suff = suffixes($m,@pat);
my @bmGs = ($m) x $m;

for my $k (reverse 0..$m-1) {
if ($suff[$k] == $k + 1) {
for (my $j=0; $j < $m-1-$k; $j++) { $bmGs[$k]=$m-1-$k if $bmGs[$j] == $m }
}
}
for (0..$m-2) { $bmGs[$m - 1 - $suff[$_]] = $m - 1 - $_ }
@bmGs
}

sub BM ($txt,$pat) {
my @txt = split '', $txt;
my @pat = split '', $pat;
my ($m, $n, $j) = (length($pat), length($txt), 0);
my @bmGs = preBmGs($m,@pat);

my $x = min $m-1, $#pat;
my %bmBc = zip_by { @_ } [@pat[0..$x-1]], [reverse 1..$x];

my @I;
while ($j <= $n - $m) {
my $i;
for ($i = $m - 1; $i >= 0 and $pat[$i] eq $txt[$i + $j]; ) { $i-- }
if ($i < 0) {
push @I, $j;
$j += $bmGs[0]
} else {
$j += max $bmGs[$i], ($bmBc{$txt[$i + $j]} // $m)-$m+$i
}
}
@I
}

my @texts = (
"GCTAGCTCTACGAGTCTA",
"GGCTATAATGCGTA",
"there would have been a time for such a word",
"needle need noodle needle", "InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented",
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.",
);
my @pats = <TCTA TAATAAA word needle put and alfalfa>;

say "text$_ = $texts[$_]" for 0..$#texts;
say '';

for (0.. $#pats) {
my $j = $_ < 5 ? $_ : $_-1 ; # for searching text4 twice
say "Found '$pats[$_]' in 'text$j' at indices " . join ', ', BM($texts[$j],$pats[$_]);
}</syntaxhighlight>
{{out}}
<pre>text0 = GCTAGCTCTACGAGTCTA
text1 = GGCTATAATGCGTA
text2 = there would have been a time for such a word
text3 = needle need noodle needle
text4 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
text5 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.

Found 'TCTA' in 'text0' at indices 6, 14
Found 'TAATAAA' in 'text1' at indices
Found 'word' in 'text2' at indices 40
Found 'needle' in 'text3' at indices 0, 19
Found 'put' in 'text4' at indices 26, 90
Found 'and' in 'text4' at indices 101, 128, 171
Found 'alfalfa' in 'text5' at indices 33, 87</pre>

=={{header|Phix}}==
=={{header|Phix}}==
<!--<syntaxhighlight lang="phix">(phixonline)-->
<!--<syntaxhighlight lang="phix">(phixonline)-->
Line 600: Line 689:
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (42 character comparisons)
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (42 character comparisons)
</pre>
</pre>

=={{header|Python}}==
=={{header|Python}}==
Slightly modified from en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#Python_implementation.
Slightly modified from en.wikipedia.org/wiki/Boyer%E2%80%93Moore_string-search_algorithm#Python_implementation.