Knuth-Morris-Pratt string search: Difference between revisions
Content added Content deleted
m (→{{header|J}}: trivial optimization) |
(Add Python) |
||
Line 270: | Line 270: | ||
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons) |
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons) |
||
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons) |
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons) |
||
</pre> |
|||
=={{header|Python}}== |
|||
Based on pseudocode found in the [https://en.wikipedia.org/wiki/Knuth%E2%80%93Morris%E2%80%93Pratt_algorithm Wikipedia article]. Uses test cases from the [[#Wren]] solution. |
|||
<lang python>"""Knuth-Morris-Pratt string search. Requires Python >= 3.7.""" |
|||
from typing import List |
|||
from typing import Tuple |
|||
def kmp_search(text: str, word: str) -> Tuple[List[int], int]: |
|||
text_position = 0 |
|||
word_position = 0 |
|||
positions: List[int] = [] |
|||
number_of_positions = 0 |
|||
table = kmp_table(word) |
|||
while text_position < len(text): |
|||
if word[word_position] == text[text_position]: |
|||
text_position += 1 |
|||
word_position += 1 |
|||
if word_position == len(word): |
|||
positions.append(text_position - word_position) |
|||
number_of_positions += 1 |
|||
word_position = table[word_position] |
|||
else: |
|||
word_position = table[word_position] |
|||
if word_position < 0: |
|||
text_position += 1 |
|||
word_position += 1 |
|||
return positions, number_of_positions |
|||
def kmp_table(word: str) -> List[int]: |
|||
position = 1 |
|||
candidate = 0 |
|||
table = [0] * (len(word) + 1) |
|||
table[0] = -1 |
|||
while position < len(word): |
|||
if word[position] == word[candidate]: |
|||
table[position] = table[candidate] |
|||
else: |
|||
table[position] = candidate |
|||
while candidate >= 0 and word[position] != word[candidate]: |
|||
candidate = table[candidate] |
|||
position += 1 |
|||
candidate += 1 |
|||
table[position] = candidate |
|||
return table |
|||
TEST_CASES = [ |
|||
("GCTAGCTCTACGAGTCTA", "TCTA"), |
|||
("GGCTATAATGCGTA", "TAATAAA"), |
|||
("there would have been a time for such a word", "word"), |
|||
("needle need noodle needle", "needle"), |
|||
( |
|||
( |
|||
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesley" |
|||
"DKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeand" |
|||
"assemblylanguagestoillustratetheconceptsandalgorithmsastheyare" |
|||
"presented" |
|||
), |
|||
"put", |
|||
), |
|||
( |
|||
( |
|||
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesley" |
|||
"DKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeand" |
|||
"assemblylanguagestoillustratetheconceptsandalgorithmsastheyare" |
|||
"presented" |
|||
), |
|||
"and", |
|||
), |
|||
( |
|||
( |
|||
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, " |
|||
"with bales of all that alfalfa exchanged for milk." |
|||
), |
|||
"alfalfa", |
|||
), |
|||
] |
|||
def main(): |
|||
for text, word in TEST_CASES: |
|||
positions, number_of_positions = kmp_search(text, word) |
|||
if number_of_positions == 0: |
|||
print(f"`{word}` not found in `{text[:10]}...`") |
|||
elif number_of_positions == 1: |
|||
print(f"Found `{word}` in `{text[:10]}...` once at {positions}.") |
|||
else: |
|||
print( |
|||
f"Found `{word}` in `{text[:10]}...` {number_of_positions} times at {positions}." |
|||
) |
|||
if __name__ == "__main__": |
|||
main() |
|||
</lang> |
|||
{{out}} |
|||
<pre> |
|||
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14]. |
|||
`TAATAAA` not found in `GGCTATAATG...` |
|||
Found `word` in `there woul...` once at [40]. |
|||
Found `needle` in `needle nee...` 2 times at [0, 19]. |
|||
Found `put` in `Inhisbooks...` 2 times at [26, 90]. |
|||
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171]. |
|||
Found `alfalfa` in `Nearby far...` 2 times at [33, 87]. |
|||
</pre> |
</pre> |
||