Boyer-Moore string search: Difference between revisions

(→‎{{header|Wren}}: Now includes Julia examples.)
Line 394:
(occurrences, num_alignments, num_character_comparisons) = ([101, 128, 171], 71, 79)
(occurrences, num_alignments, num_character_comparisons) = ([33, 87], 20, 42)
</pre>
 
=={{header|Phix}}==
<!--<lang Phix>(phixonline)-->
<span style="color: #000080;font-style:italic;">-- based on https://www-igm.univ-mlv.fr/~lecroq/string/node14.html</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">preBmBc</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">bmBc</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">m</span><span style="color: #0000FF;">,</span><span style="color: #000000;">255</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">bmBc</span><span style="color: #0000FF;">[</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">i</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">bmBc</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">suffixes</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">g</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">f</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">suff</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">m</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">suff</span><span style="color: #0000FF;">[</span><span style="color: #000000;">m</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">;</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">m</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">></span> <span style="color: #000000;">g</span> <span style="color: #008080;">and</span> <span style="color: #000000;">suff</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">f</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;"><</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">g</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">suff</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suff</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">f</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">else</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;"><</span> <span style="color: #000000;">g</span> <span style="color: #008080;">then</span> <span style="color: #000000;">g</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">f</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">g</span> <span style="color: #0000FF;">>=</span> <span style="color: #000000;">1</span> <span style="color: #008080;">and</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">g</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">g</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">f</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">g</span> <span style="color: #0000FF;">-=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">;</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #000000;">suff</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">f</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">g</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">suff</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">preBmGs</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">suff</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">suffixes</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">bmGs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #000000;">m</span><span style="color: #0000FF;">,</span><span style="color: #000000;">m</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">m</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">suff</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;"><</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">i</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">if</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">bmGs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">bmGs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">;</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">m</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">bmGs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">m</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">suff</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">;</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">bmGs</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">case_insensitive</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">pins</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"`%s` in `%s`"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">shorten</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"characters"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">10</span><span style="color: #0000FF;">)})</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">case_insensitive</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">pat</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000080;font-style:italic;">/* Preprocessing */</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">bmGs</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">preBmGs</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">bmBc</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">preBmBc</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">/* Searching */</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span>
<span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">cc</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;"><=</span> <span style="color: #000000;">n</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">m</span> <span style="color: #008080;">do</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">m</span> <span style="color: #008080;">to</span> <span style="color: #000000;">1</span> <span style="color: #008080;">by</span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">cc</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]!=</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span>
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">bmGs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">];</span>
<span style="color: #008080;">else</span>
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">max</span><span style="color: #0000FF;">(</span><span style="color: #000000;">bmGs</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">bmBc</span><span style="color: #0000FF;">[</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">]]</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">);</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">ccs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"(%d character comparisons)"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">cc</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">many</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">ordinant</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">))</span> <span style="color: #000080;font-style:italic;">-- never, once, twice, %,d times</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Found %s %s at %v %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pins</span><span style="color: #0000FF;">,</span><span style="color: #000000;">many</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ccs</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">else</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"No %s %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pins</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ccs</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"GCAGAGAG"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GCATCGCAGAGAGTATACAGTACG"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"TCTA"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GCTAGCTCTACGAGTCTA"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"TAATAAA"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GGCTATAATGCGTA"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"word"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"there would have been a time for such a word"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"needle"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"needle need noodle needle"</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">book</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesley"</span><span style="color: #0000FF;">&</span>
<span style="color: #008000;">"DKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeand"</span><span style="color: #0000FF;">&</span>
<span style="color: #008000;">"assemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented"</span>
<span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"put"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">book</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"and"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">book</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">farm</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with "</span><span style="color: #0000FF;">&</span>
<span style="color: #008000;">"bales of all that alfalfa exchanged for milk."</span>
<span style="color: #000000;">BM</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"alfalfa"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">farm</span><span style="color: #0000FF;">)</span>
<span style="color: #000080;font-style:italic;">--BM("aLfAlfa",farm) -- none
--BM("aLfAlfa",farm,true) -- as -2</span>
<!--</lang>-->
{{out}}
<pre>
Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (17 character comparisons)
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (14 character comparisons)
No `TAATAAA` in `GGCTATAATGCGTA` (4 character comparisons)
Found `word` in `there woul...uch a word (44 characters)` once at {41} (15 character comparisons)
Found `needle` in `needle need noodle needle` twice at {1,20} (18 character comparisons)
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (78 character comparisons)
Found `and` in `Inhisbooks...epresented (202 characters)` 3 times at {102,129,172} (79 character comparisons)
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (42 character comparisons)
</pre>
 
7,806

edits