Knuth-Morris-Pratt string search
< About Knuth-Morris-Pratt String Search Algorithm >
Emacs Lisp
<lang lisp> (defun kmp_compile_pattern (pattern)
"Compile pattern to DFA."
(defun create-2d-array (x y init) (let ((arr1 (make-vector x nil))) (dotimes (i x)
(aset arr1 i (make-vector y init)) )
arr1 ) ) (let* ((patLen (length pattern))
(R 256)
(restartPos 0)
(dfa (create-2d-array R patLen 0)))
(aset (aref dfa (elt pattern 0)) 0 1)
(let ((patPos 0)) (while (progn (setq patPos (1+ patPos)) (< patPos patLen))
(dotimes (c R) (aset (aref dfa c) patPos (aref (aref dfa c) restartPos)) )
(aset (aref dfa (elt pattern patPos)) patPos (1+ patPos)) (setq restartPos (aref (aref dfa (elt pattern patPos)) restartPos) ) )
) dfa ) )
(defun kmp_search (pattern text)
"Pattern search with KMP algorithm." (let ((dfa (kmp_compile_pattern pattern)))
(let ((textPos 0) (patPos 0) (N (length text)) (M (length pattern))) (while (and (< textPos N) (< patPos M))
(setq patPos (aref (aref dfa (elt text textPos)) patPos)) (setq textPos (1+ textPos)) )
(if (= patPos M) (- textPos M) N ) ) ) )
</lang>
Julia
<lang ruby>"""
function kmp_table(W)
input:
an array of characters, W (the word to be analyzed)
output:
an array of integers, T (the table to be filled)
define variables:
an integer, i ← 2 (the current one-based position we are computing in T) an integer, j ← 0 (the additive to index i in W of the next character of the current candidate substring)
""" function kmp_table(W)
len = length(W) T = zeros(Int, len) # start with the second letter of W, looking for patterns in W i = 2 while i < len j = 0 while i + j <= len # avoid overshooting end with index # compute the longest proper prefix if W[i + j] == W[j + 1] T[i + j] = T[i + j - 1] + 1 else T[i + j] = 0 # back to start j += 1 break end j += 1 end # entry in T found, so begin at next starting point along W i += j end return T
end
"""
function kmp_search(S, W)
input:
an array of characters, S (the text to be searched) an array of characters, W (the word sought)
output:
an array of integers, P (positions in S at which W is found)
define variables (one based indexing in Julia differs from the Wikipedia example):
an integer, i ← 1 (the position of the current character in S) an integer, j ← 1 (the position of the current character in W) an array of integers, T (the table, computed elsewhere)
""" function kmp_search(S, W)
lenW, lenS = length(W), length(S) i, P = 1, Int[] T = kmp_table(W) # get pattern table while i <= lenS - lenW + 1 for j in 1:lenW if S[i + j - 1] != W[j] # pattern not found, so skip unnecessary inner loops i += T[j] + 1 @goto next_outer_loop end end # found pattern W in S, so add to output P push!(P, i) i += 1 @label next_outer_loop end return P
end
const text1 = "InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented" const text2 = "Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk." const pat1, pat2, pat3 = "put", "and", "alfalfa"
println("Found <$pat1> at: (one-based indices) ", kmp_search(text1, pat1)) println("Found <$pat2> at: (one-based indices) ", kmp_search(text1, pat2)) println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3))
</lang>
- Output:
Found <put> at: (one-based indices) [27, 91] Found <and> at: (one-based indices) [102, 129, 172] Found <alfalfa> at: (one-based indices) [34, 88]