Boyer-Moore string search: Difference between revisions

added Pascal example
m (remove draft label)
(added Pascal example)
Line 516:
(occurrences, num_alignments, num_character_comparisons) = ([33, 87], 20, 42)
</pre>
 
=={{header|Pascal}}==
Works with FPC (currently only version 3.3.1).
<syntaxhighlight lang="pascal">
program BMTest;
{$mode objfpc}{$h+}
{$modeswitch functionreferences}
{$modeswitch anonymousfunctions}
uses
SysUtils, Math;
 
type
TIntArray = array of SizeInt;
TSearchFun = reference to function(const s: rawbytestring): TIntArray;
 
{ returns a function that performs a case-sensitive search for all occurrences(1-based)
of the specified pattern using the Boyer-Moore algorithm with Galil optimization }
function BmgCreate(const aPattern: rawbytestring): TSearchFun;
var
BcTable: array[Byte] of Integer; //bad character shifts
p: PByte absolute aPattern;
procedure FillBcTable;
var
I: Integer;
begin
FillDWord(BcTable, Succ(High(Byte)), DWord(Length(aPattern)));
for I := 0 to Length(aPattern) - 2 do
BcTable[p[I]] := Pred(Length(aPattern) - I);
end;
var
GsTable: array of Integer = nil; //good suffix shifts
procedure MakeGsTable;
function IsPrefix(aPos: Integer): Boolean;
var
I, SuffixLen: Integer;
begin
SuffixLen := Length(aPattern) - aPos;
for I := 0 to Pred(SuffixLen) do
if (p[I] <> p[aPos + I]) then exit(False);
Result := True;
end;
function GetSuffixLen(aPos: Integer): Integer;
begin
Result := 0;
while(Result < aPos)and(p[aPos - Result] = p[Pred(Length(aPattern) - Result)])do
Inc(Result);
end;
var
I, LastPrefix, SuffixLen: Integer;
begin
SetLength(GsTable, Length(aPattern));
LastPrefix := Pred(Length(aPattern));
for I := Pred(Length(aPattern)) downto 0 do begin
if IsPrefix(Succ(I)) then
LastPrefix := Succ(I);
GsTable[I] := LastPrefix + Length(aPattern) - Succ(I);
end;
for I := 0 to Length(aPattern) - 2 do begin
SuffixLen := GetSuffixLen(I);
if p[I - SuffixLen] <> p[Pred(Length(aPattern) - SuffixLen)] then
GsTable[Pred(Length(aPattern) - SuffixLen)] := Pred(Length(aPattern) + SuffixLen - I);
end;
end;
var
Needle: rawbytestring;
const
MatchInitLen = 4;
begin
if aPattern <> '' then begin
Needle := aPattern;
FillBcTable;
MakeGsTable;
end else
Needle := '';
{ returns an empty array if there are no matches or the pattern is empty }
Result := function(const aHaystack: rawbytestring): TIntArray
var
Matches: TIntArray;
pNeedle: PByte absolute Needle;
pHaystack: PByte absolute aHaystack;
I, J, NeedleLast, MatchPos, OldPfxEnd: SizeInt;
begin
Matches := nil;
if (Needle = '') or (Length(aHaystack) < Length(Needle)) then exit(Matches);
SetLength(Matches, MatchInitLen);
MatchPos := 0;
NeedleLast := Pred(Length(Needle));
I := NeedleLast;
OldPfxEnd := 0;
while I < Length(aHaystack) do begin
J := NeedleLast;
while (J >= OldPfxEnd) and (pNeedle[J] = pHaystack[I]) do begin
Dec(J); Dec(I);
end;
if J < OldPfxEnd then begin
if MatchPos = Length(Matches) then SetLength(Matches, MatchPos * 2);
Matches[MatchPos] := I - OldPfxEnd + 2;
Inc(MatchPos);
Inc(I, Succ(GsTable[0] - OldPfxEnd));
OldPfxEnd := Length(Needle)*2 - GsTable[0];
end else begin
Inc(I, Max(BcTable[pHaystack[I]], GsTable[J]));
OldPfxEnd := 0;
end;
end;
SetLength(Matches, MatchPos);
Result := Matches;
end;
end;
 
procedure WriteArray(const a: array of SizeInt);
var
I: SizeInt;
begin
Write('[');
for I := 0 to High(a) do
if I < High(a) then Write(a[I], ', ')
else Write(a[I]);
WriteLn(']');
end;
 
const
Text1 = 'Nearby farms grew a half acre of alfalfa on the dairy''s behalf, with bales of all that alfalfa exchanged for milk';
Text2 = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
Text3 = 'CAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGT';
Text4 = 'AGGTGTGGAAACAAGCACCTAGATGTGCTGAACCCGGGGCACACGTTCAGTCAGCGACTC';
var
BmgSearch: TSearchFun;
begin
WriteArray(BmgCreate('alfalfa')(Text1));
WriteArray(BmgCreate('aaaaaaaaaaaaaaaaaaaa')(Text2));
BmgSearch := BmgCreate('CAGTCAG');
WriteArray(BmgSearch(Text3));
WriteArray(BmgSearch(Text4));
ReadLn;
end.
</syntaxhighlight>
{{out}}
<pre>
[34, 88]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
[1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53]
[48]
</pre>
 
=={{header|Perl}}==
{{trans|Raku}}
73

edits