Knuth-Morris-Pratt string search: Difference between revisions
Content added Content deleted
SqrtNegInf (talk | contribs) (Added Perl) |
|||
Line 95: | Line 95: | ||
(J uses index 0 for the first character in a sequence of characters.) |
(J uses index 0 for the first character in a sequence of characters.) |
||
=={{header|jq}}== |
|||
'''Adapted from [[#Wren|Wren]]''' |
|||
{{Works with|jq}} |
|||
The program will also work with gojq once the prefix "KMP::" is |
|||
omitted in the two lines in which it occurs. |
|||
<syntaxhighlight lang=jq> |
|||
# search for the string needle in the string haystack |
|||
def KMP::search(haystack; needle): |
|||
def table_($needle): |
|||
($needle|length) as $nc |
|||
| {t: [range(0; $nc) | 0], |
|||
i: 1, # index into table |
|||
len: 0 # length of previous longest prefix |
|||
} |
|||
| until(.i >= .nc; |
|||
if $needle[.i] == $needle[.len] |
|||
then .len += 1 |
|||
| .t[.i] = .len |
|||
| .i += 1 |
|||
elif .len != 0 |
|||
then .len = .t[.len-1] |
|||
else .t[.i] = 0 |
|||
| .i += 1 |
|||
end ) |
|||
| .t ; |
|||
{ haystack: (haystack|explode), |
|||
needle: (needle|explode), |
|||
hc: (haystack|length), |
|||
nc: (needle|length), |
|||
indices: [], |
|||
i: 0, # index into haystack |
|||
j: 0 # index into needle |
|||
} |
|||
| table_(.needle) as $t |
|||
| until (.i >= .hc; |
|||
if .needle[.j] == .haystack[.i] |
|||
then .i += 1 |
|||
| .j += 1 |
|||
else . |
|||
end |
|||
| if .j == .nc |
|||
then .indices = .indices + [.i - .j] |
|||
| .j = $t[.j-1] |
|||
elif (.i < .hc and .needle[.j] != .haystack[.i]) |
|||
then if .j != 0 then .j = $t[.j-1] else .i += 1 end |
|||
else . |
|||
end ) |
|||
| .indices ; |
|||
# Examples |
|||
def texts: [ |
|||
"GCTAGCTCTACGAGTCTA", |
|||
"GGCTATAATGCGTA", |
|||
"there would have been a time for such a word", |
|||
"needle need noodle needle", |
|||
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented", |
|||
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk." |
|||
]; |
|||
def pats: ["TCTA", "TAATAAA", "word", "needle", "put", "and", "alfalfa"]; |
|||
def tasks($texts; $pats): |
|||
range (0; $texts|length) | "text\(.+1) = \($texts[.])", |
|||
"", |
|||
(range (0; $pats|length) as $i |
|||
| (if $i < 5 then $i else $i-1 end) as $j |
|||
| "Found '\($pats[$i])' in text\($j+1) at indices \(KMP::search($texts[$j]; $pats[$i]) )" ) ; |
|||
tasks(texts; pats) |
|||
</syntaxhighlight> |
|||
'''Invocation''': jq -nr -f kmp-string-search.jq |
|||
{{output}} |
|||
<pre> |
|||
text1 = GCTAGCTCTACGAGTCTA |
|||
text2 = GGCTATAATGCGTA |
|||
text3 = there would have been a time for such a word |
|||
text4 = needle need noodle needle |
|||
text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented |
|||
text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk. |
|||
Found 'TCTA' in text1 at indices [6, 14] |
|||
Found 'TAATAAA' in text2 at indices [] |
|||
Found 'word' in text3 at indices [40] |
|||
Found 'needle' in text4 at indices [0, 19] |
|||
Found 'put' in text5 at indices [26, 90] |
|||
Found 'and' in text5 at indices [101, 128, 171] |
|||
Found 'alfalfa' in text6 at indices [33, 87] |
|||
</pre> |
|||
=={{header|Julia}}== |
=={{header|Julia}}== |