Knuth-Morris-Pratt string search: Difference between revisions
(Added Wren) |
|||
Line 129: | Line 129: | ||
Found <and> at: (one-based indices) [102, 129, 172] |
Found <and> at: (one-based indices) [102, 129, 172] |
||
Found <alfalfa> at: (one-based indices) [34, 88] |
Found <alfalfa> at: (one-based indices) [34, 88] |
||
</pre> |
|||
=={{header|Phix}}== |
|||
<!--<lang Phix>(phixonline)--> |
|||
<span style="color: #000080;font-style:italic;">-- |
|||
-- demo\rosetta\KnuthMorrisPratt.exw |
|||
-- ================================= |
|||
-- |
|||
-- based on https://www-igm.univ-mlv.fr/~lecroq/string/node8.html#SECTION0080 |
|||
--</span> |
|||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">preKmp</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">kmpNext</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">m</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">pat</span> <span style="color: #0000FF;">&=</span> <span style="color: #008000;">'\0'</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;"><=</span> <span style="color: #000000;">m</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">></span> <span style="color: #000000;">0</span> <span style="color: #008080;">and</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">!=</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">==</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">kmpNext</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">case_insensitive</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">pins</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"`%s` in `%s`"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">shorten</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"characters"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">10</span><span style="color: #0000FF;">)})</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">case_insensitive</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">lower</span><span style="color: #0000FF;">({</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000080;font-style:italic;">/* Preprocessing */</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">kmpNext</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">preKmp</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000080;font-style:italic;">/* Searching */</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{}</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> |
|||
<span style="color: #000000;">n</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">),</span> |
|||
<span style="color: #000000;">m</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">pat</span><span style="color: #0000FF;">),</span> |
|||
<span style="color: #000000;">cc</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">j</span> <span style="color: #0000FF;"><</span> <span style="color: #000000;">n</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">></span> <span style="color: #0000FF;">-</span><span style="color: #000000;">1</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">cc</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">pat</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">j</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">j</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span> <span style="color: #0000FF;">>=</span> <span style="color: #000000;">m</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">j</span><span style="color: #0000FF;">-</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmpNext</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #000080;font-style:italic;">/* Output */</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">ccs</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"(%d character comparisons)"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">cc</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">many</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">ordinant</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">res</span><span style="color: #0000FF;">))</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Found %s %s at %v %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pins</span><span style="color: #0000FF;">,</span><span style="color: #000000;">many</span><span style="color: #0000FF;">,</span><span style="color: #000000;">res</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ccs</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"No %s %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">pins</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ccs</span><span style="color: #0000FF;">})</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"GCAGAGAG"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GCATCGCAGAGAGTATACAGTACG"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"TCTA"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GCTAGCTCTACGAGTCTA"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"TAATAAA"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"GGCTATAATGCGTA"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"word"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"there would have been a time for such a word"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"needle"</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"needle need noodle needle"</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">book</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesley"</span><span style="color: #0000FF;">&</span> |
|||
<span style="color: #008000;">"DKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeand"</span><span style="color: #0000FF;">&</span> |
|||
<span style="color: #008000;">"assemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented"</span> |
|||
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"put"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">book</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"and"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">book</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">farm</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with "</span><span style="color: #0000FF;">&</span> |
|||
<span style="color: #008000;">"bales of all that alfalfa exchanged for milk."</span> |
|||
<span style="color: #000000;">KMP</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"alfalfa"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">farm</span><span style="color: #0000FF;">)</span> |
|||
<span style="color: #000080;font-style:italic;">--KMP("aLfAlfa",farm) -- none |
|||
--KMP("aLfAlfa",farm,true) -- as -2</span> |
|||
<!--</lang>--> |
|||
{{out}} |
|||
Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br> |
|||
Also higher than those on the above (lecroq) link, probably because this carries on for all matches. |
|||
<pre> |
|||
Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons) |
|||
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons) |
|||
No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons) |
|||
Found `word` in `there woul...uch a word (44 characters)` once at {41} (45 character comparisons) |
|||
Found `needle` in `needle need noodle needle` twice at {1,20} (27 character comparisons) |
|||
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons) |
|||
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons) |
|||
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons) |
|||
</pre> |
</pre> |
||
Revision as of 15:47, 9 July 2022
< About Knuth-Morris-Pratt String Search Algorithm >
Emacs Lisp
<lang lisp> (defun kmp_compile_pattern (pattern)
"Compile pattern to DFA."
(defun create-2d-array (x y init) (let ((arr1 (make-vector x nil))) (dotimes (i x)
(aset arr1 i (make-vector y init)) )
arr1 ) ) (let* ((patLen (length pattern))
(R 256)
(restartPos 0)
(dfa (create-2d-array R patLen 0)))
(aset (aref dfa (elt pattern 0)) 0 1)
(let ((patPos 0)) (while (progn (setq patPos (1+ patPos)) (< patPos patLen))
(dotimes (c R) (aset (aref dfa c) patPos (aref (aref dfa c) restartPos)) )
(aset (aref dfa (elt pattern patPos)) patPos (1+ patPos)) (setq restartPos (aref (aref dfa (elt pattern patPos)) restartPos) ) )
) dfa ) )
(defun kmp_search (pattern text)
"Pattern search with KMP algorithm." (let ((dfa (kmp_compile_pattern pattern)))
(let ((textPos 0) (patPos 0) (N (length text)) (M (length pattern))) (while (and (< textPos N) (< patPos M))
(setq patPos (aref (aref dfa (elt text textPos)) patPos)) (setq textPos (1+ textPos)) )
(if (= patPos M) (- textPos M) N ) ) ) )
</lang>
Julia
<lang ruby>"""
function kmp_table(W)
input:
an array of characters, W (the word to be analyzed)
output:
an array of integers, T (the table to be filled)
define variables:
an integer, i ← 2 (the current one-based position we are computing in T) an integer, j ← 0 (the additive to index i in W of the next character of the current candidate substring)
""" function kmp_table(W)
len = length(W) T = zeros(Int, len) # start with the second letter of W, looking for patterns in W i = 2 while i < len j = 0 while i + j <= len # avoid overshooting end with index # compute the longest proper prefix if W[i + j] == W[j + 1] T[i + j] = T[i + j - 1] + 1 else T[i + j] = 0 # back to start j += 1 break end j += 1 end # entry in T found, so begin at next starting point along W i += j end return T
end
"""
function kmp_search(S, W)
input:
an array of characters, S (the text to be searched) an array of characters, W (the word sought)
output:
an array of integers, P (positions in S at which W is found)
define variables (one based indexing in Julia differs from the Wikipedia example):
an integer, i ← 1 (the position of the current character in S) an integer, j ← 1 (the position of the current character in W) an array of integers, T (the table, computed elsewhere)
""" function kmp_search(S, W)
lenW, lenS = length(W), length(S) i, P = 1, Int[] T = kmp_table(W) # get pattern table while i <= lenS - lenW + 1 for j in 1:lenW if S[i + j - 1] != W[j] # pattern not found, so skip unnecessary inner loops i += T[j] + 1 @goto next_outer_loop end end # found pattern W in S, so add to output P push!(P, i) i += 1 @label next_outer_loop end return P
end
const text1 = "InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented" const text2 = "Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk." const pat1, pat2, pat3 = "put", "and", "alfalfa"
println("Found <$pat1> at: (one-based indices) ", kmp_search(text1, pat1)) println("Found <$pat2> at: (one-based indices) ", kmp_search(text1, pat2)) println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3))
</lang>
- Output:
Found <put> at: (one-based indices) [27, 91] Found <and> at: (one-based indices) [102, 129, 172] Found <alfalfa> at: (one-based indices) [34, 88]
Phix
-- -- demo\rosetta\KnuthMorrisPratt.exw -- ================================= -- -- based on https://www-igm.univ-mlv.fr/~lecroq/string/node8.html#SECTION0080 -- with javascript_semantics function preKmp(string pat) integer m = length(pat), i = 1, j = 0 sequence kmpNext = repeat(-1,m+1) pat &= '\0' while i <= m do while j > 0 and pat[i] != pat[j] do j = kmpNext[j]+1 end while i += 1 j += 1 if pat[i] == pat[j] then kmpNext[i] = kmpNext[j] else kmpNext[i] = j-1 end if end while return kmpNext end function procedure KMP(string pat, s, bool case_insensitive = false) string pins = sprintf("`%s` in `%s`",{pat,shorten(s,"characters",10)}) if case_insensitive then {pat,s} = lower({pat,s}) end if /* Preprocessing */ sequence kmpNext = preKmp(pat) /* Searching */ sequence res = {} integer i = 0, j = 0, n = length(s), m = length(pat), cc = 0 while j < n do while i > -1 do cc += 1 if pat[i+1] = s[j+1] then exit end if i = kmpNext[i+1] end while i += 1 j += 1 if i >= m then res &= j-i+1 i = kmpNext[i+1] end if end while /* Output */ string ccs = sprintf("(%d character comparisons)",cc) if length(res) then string many = ordinant(length(res)) printf(1,"Found %s %s at %v %s\n",{pins,many,res,ccs}) else printf(1,"No %s %s\n",{pins,ccs}) end if end procedure KMP("GCAGAGAG","GCATCGCAGAGAGTATACAGTACG") KMP("TCTA","GCTAGCTCTACGAGTCTA") KMP("TAATAAA","GGCTATAATGCGTA") KMP("word","there would have been a time for such a word") KMP("needle","needle need noodle needle") constant book = "InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesley"& "DKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeand"& "assemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented" KMP("put",book) KMP("and",book) constant farm = "Nearby farms grew a half acre of alfalfa on the dairy's behalf, with "& "bales of all that alfalfa exchanged for milk." KMP("alfalfa",farm) --KMP("aLfAlfa",farm) -- none --KMP("aLfAlfa",farm,true) -- as -2
- Output:
Significantly higher character comparison counts than Boyer-Moore_string_search#Phix.
Also higher than those on the above (lecroq) link, probably because this carries on for all matches.
Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons) Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons) No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons) Found `word` in `there woul...uch a word (44 characters)` once at {41} (45 character comparisons) Found `needle` in `needle need noodle needle` twice at {1,20} (27 character comparisons) Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons) Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons) Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)
Wren
This is based on the code here. The examples used are the same as in the Boyer-Moore_string_search#Wren task. <lang ecmascript>class KMP {
static search(haystack, needle) { haystack = haystack.bytes.toList needle = needle.bytes.toList var hc = haystack.count var nc = needle.count var indices = [] var i = 0 // index into haystack var j = 0 // index into needle var t = table_(needle) while (i < hc) { if (needle[j] == haystack[i]) { i = i + 1 j = j + 1 } if (j == nc) { indices.add(i - j) j = t[j-1] } else if (i < hc && needle[j] != haystack[i]) { if (j != 0) { j = t[j-1] } else { i = i + 1 } } } return indices }
static table_(needle) { var nc = needle.count var t = List.filled(nc, 0) var i = 1 // index into table var len = 0 // length of previous longest prefix while (i < nc) { if (needle[i] == needle[len]) { len = len + 1 t[i] = len i = i + 1 } else if (len != 0) { len = t[len-1] } else { t[i] = 0 i = i + 1 } } return t }
}
var texts = [
"GCTAGCTCTACGAGTCTA", "GGCTATAATGCGTA", "there would have been a time for such a word", "needle need noodle needle",
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented",
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk."
] var pats = ["TCTA", "TAATAAA", "word", "needle", "put", "and", "alfalfa"] for (i in 0...texts.count) System.print("text%(i+1) = %(texts[i])") System.print() for (i in 0...pats.count) {
var j = (i < 5) ? i : i-1 System.print("Found '%(pats[i])' in 'text%(j+1)' at indices %(KMP.search(texts[j], pats[i]))")
}</lang>
- Output:
text1 = GCTAGCTCTACGAGTCTA text2 = GGCTATAATGCGTA text3 = there would have been a time for such a word text4 = needle need noodle needle text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk. Found 'TCTA' in 'text1' at indices [6, 14] Found 'TAATAAA' in 'text2' at indices [] Found 'word' in 'text3' at indices [40] Found 'needle' in 'text4' at indices [0, 19] Found 'put' in 'text5' at indices [26, 90] Found 'and' in 'text5' at indices [101, 128, 171] Found 'alfalfa' in 'text6' at indices [33, 87]