Knuth-Morris-Pratt string search: Difference between revisions
Content added Content deleted
(Added Sidef) |
(Added FreeBasic) |
||
Line 5: | Line 5: | ||
=={{header|11l}}== |
=={{header|11l}}== |
||
{{trans|Python}} |
{{trans|Python}} |
||
<syntaxhighlight lang="11l">F kmp_table(String word) -> [Int] |
|||
<syntaxhighlight lang="11l"> |
|||
F kmp_table(String word) -> [Int] |
|||
V position = 1 |
V position = 1 |
||
V candidate = 0 |
V candidate = 0 |
||
Line 74: | Line 72: | ||
print(‘Found `’word‘` in `’text[0.<10]‘...` once at ’positions‘.’) |
print(‘Found `’word‘` in `’text[0.<10]‘...` once at ’positions‘.’) |
||
E |
E |
||
print(‘Found `’word‘` in `’text[0.<10]‘...` ’number_of_positions‘ times at ’positions‘.’) |
print(‘Found `’word‘` in `’text[0.<10]‘...` ’number_of_positions‘ times at ’positions‘.’)</syntaxhighlight> |
||
</syntaxhighlight> |
|||
{{out}} |
{{out}} |
||
<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14]. |
|||
<pre> |
|||
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14]. |
|||
`TAATAAA` not found in `GGCTATAATG...` |
`TAATAAA` not found in `GGCTATAATG...` |
||
Found `word` in `there woul...` once at [40]. |
Found `word` in `there woul...` once at [40]. |
||
Line 85: | Line 80: | ||
Found `put` in `Inhisbooks...` 2 times at [26, 90]. |
Found `put` in `Inhisbooks...` 2 times at [26, 90]. |
||
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171]. |
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171]. |
||
Found `alfalfa` in `Nearby far...` 2 times at [33, 87]. |
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre> |
||
</pre> |
|||
=={{header|Emacs Lisp}}== |
=={{header|Emacs Lisp}}== |
||
<syntaxhighlight lang="lisp"> |
<syntaxhighlight lang="lisp"> |
||
(defun kmp_compile_pattern (pattern) |
(defun kmp_compile_pattern (pattern) |
||
Line 129: | Line 122: | ||
(setq textPos (1+ textPos)) ) |
(setq textPos (1+ textPos)) ) |
||
(if (= patPos M) (- textPos M) N ) ) ) ) |
(if (= patPos M) (- textPos M) N ) ) ) )</syntaxhighlight> |
||
</syntaxhighlight> |
|||
=={{header|Fortran}}== |
=={{header|Fortran}}== |
||
<syntaxhighlight lang="Fortran"> |
<syntaxhighlight lang="Fortran"> |
||
Line 248: | Line 241: | ||
return |
return |
||
end function kmpsearch |
end function kmpsearch |
||
end module kmp_mod |
end module kmp_mod</syntaxhighlight> |
||
</syntaxhighlight> |
|||
</pre> |
|||
=={{header|J}}== |
|||
=={{header|FreeBASIC}}== |
|||
'''Adapted from [[#Wren|Wren]]''' |
|||
<syntaxhighlight lang="vb">Dim Shared As Integer t() |
|||
Dim Shared As Integer indices() |
|||
Sub Tabla(patron As String) |
|||
Dim As Integer nc = Len(patron) |
|||
Redim As Integer t(nc) |
|||
Dim As Integer idx = 1 ' index into table |
|||
Dim As Integer lon = 0 ' length of previous longest prefix |
|||
While idx < nc |
|||
If patron[idx] = patron[lon] Then |
|||
lon += 1 |
|||
t(idx) = lon |
|||
idx += 1 |
|||
Elseif lon <> 0 Then |
|||
lon = t(lon-1) |
|||
Else |
|||
t(idx) = 0 |
|||
idx += 1 |
|||
End If |
|||
Wend |
|||
End Sub |
|||
Sub BusquedaKMP(texto As String, patron As String) |
|||
Dim As Integer hc = Len(texto) |
|||
Dim As Integer nc = Len(patron) |
|||
Dim As Integer i = 0 ' index into haystack |
|||
Dim As Integer j = 0 ' index into needle |
|||
Dim As Integer cont = 1 |
|||
Tabla(patron) |
|||
While i < hc |
|||
If patron[j] = texto[i] Then |
|||
i += 1 |
|||
j += 1 |
|||
End If |
|||
If j = nc Then |
|||
Redim Preserve indices(cont) |
|||
indices(cont) = (i - j) |
|||
cont += 1 |
|||
j = t(j-1) |
|||
Elseif i < hc And patron[j] <> texto[i] Then |
|||
If j <> 0 Then j = t(j-1) Else i += 1 |
|||
End If |
|||
Wend |
|||
End Sub |
|||
Dim As Integer i, j, k |
|||
Dim As String cadenas(1 To 6) = {"GCTAGCTCTACGAGTCTA", "GGCTATAATGCGTA", _ |
|||
"there would have been a time for such a word", "needle need noodle needle", _ |
|||
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented", _ |
|||
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk."} |
|||
Dim As String palabra(1 To 7) = { _ |
|||
"TCTA", "TAATAAA", "word", "needle", "put", "and", "alfalfa"} |
|||
For i = 1 To Ubound(cadenas) |
|||
Print Using "text# = &"; i; cadenas(i) |
|||
Next |
|||
Print |
|||
For i = 1 To Ubound(palabra) |
|||
j = Iif(i < 6, i, i-1) |
|||
Print Using "Found '&' in 'text#' at indices [ "; palabra(i); j; |
|||
BusquedaKMP(cadenas(j), palabra(i)) |
|||
If Ubound(indices) > 0 Then |
|||
For k = 1 To Ubound(indices) |
|||
Print indices(k) & ", "; |
|||
Next |
|||
Else |
|||
Print " "; |
|||
End If |
|||
Print Chr(8) & Chr(8) & " ]" |
|||
Erase indices |
|||
Next |
|||
Sleep</syntaxhighlight> |
|||
{{out}} |
|||
<pre>text1 = GCTAGCTCTACGAGTCTA |
|||
text2 = GGCTATAATGCGTA |
|||
text3 = there would have been a time for such a word |
|||
text4 = needle need noodle needle |
|||
text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented |
|||
text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk. |
|||
Found 'TCTA' in 'text1' at indices [ 6, 14 ] |
|||
Found 'TAATAAA' in 'text2' at indices [ ] |
|||
Found 'word' in 'text3' at indices [ 40 ] |
|||
Found 'needle' in 'text4' at indices [ 0, 19 ] |
|||
Found 'put' in 'text5' at indices [ 26, 90 ] |
|||
Found 'and' in 'text5' at indices [ 101, 128, 171 ] |
|||
Found 'alfalfa' in 'text6' at indices [ 33, 87 ]</pre> |
|||
=={{header|J}}== |
|||
Implementation:<syntaxhighlight lang="j">kmp_table=: {{ |
Implementation:<syntaxhighlight lang="j">kmp_table=: {{ |
||
j=. 0 |
j=. 0 |
||
Line 306: | Line 391: | ||
The program will also work with gojq once the prefix "KMP::" is |
The program will also work with gojq once the prefix "KMP::" is |
||
omitted in the two lines in which it occurs. |
omitted in the two lines in which it occurs. |
||
<syntaxhighlight lang=jq> |
<syntaxhighlight lang=jq># search for the string needle in the string haystack |
||
# search for the string needle in the string haystack |
|||
def KMP::search(haystack; needle): |
def KMP::search(haystack; needle): |
||
Line 373: | Line 457: | ||
| "Found '\($pats[$i])' in text\($j+1) at indices \(KMP::search($texts[$j]; $pats[$i]) )" ) ; |
| "Found '\($pats[$i])' in text\($j+1) at indices \(KMP::search($texts[$j]; $pats[$i]) )" ) ; |
||
tasks(texts; pats) |
tasks(texts; pats)</syntaxhighlight> |
||
</syntaxhighlight> |
|||
'''Invocation''': jq -nr -f kmp-string-search.jq |
'''Invocation''': jq -nr -f kmp-string-search.jq |
||
{{output}} |
{{output}} |
||
<pre>text1 = GCTAGCTCTACGAGTCTA |
|||
<pre> |
|||
text1 = GCTAGCTCTACGAGTCTA |
|||
text2 = GGCTATAATGCGTA |
text2 = GGCTATAATGCGTA |
||
text3 = there would have been a time for such a word |
text3 = there would have been a time for such a word |
||
Line 392: | Line 474: | ||
Found 'put' in text5 at indices [26, 90] |
Found 'put' in text5 at indices [26, 90] |
||
Found 'and' in text5 at indices [101, 128, 171] |
Found 'and' in text5 at indices [101, 128, 171] |
||
Found 'alfalfa' in text6 at indices [33, 87] |
Found 'alfalfa' in text6 at indices [33, 87]</pre> |
||
</pre> |
|||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
Line 473: | Line 554: | ||
println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3)) |
println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3)) |
||
</syntaxhighlight>{{out}} |
</syntaxhighlight>{{out}} |
||
<pre>Found <put> at: (one-based indices) [27, 91] |
|||
<pre> |
|||
Found <put> at: (one-based indices) [27, 91] |
|||
Found <and> at: (one-based indices) [102, 129, 172] |
Found <and> at: (one-based indices) [102, 129, 172] |
||
Found <alfalfa> at: (one-based indices) [34, 88] |
Found <alfalfa> at: (one-based indices) [34, 88]</pre> |
||
</pre> |
|||
=={{header|Nim}}== |
=={{header|Nim}}== |
||
Line 544: | Line 623: | ||
echo &"Not found “{pattern}” in “Text{j+1}”." |
echo &"Not found “{pattern}” in “Text{j+1}”." |
||
</syntaxhighlight> |
</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>Text1 = GCTAGCTCTACGAGTCTA |
<pre>Text1 = GCTAGCTCTACGAGTCTA |
||
Line 559: | Line 637: | ||
Found “put” in “Text5” at indices 26, 90. |
Found “put” in “Text5” at indices 26, 90. |
||
Found “and” in “Text5” at indices 101, 128, 171. |
Found “and” in “Text5” at indices 101, 128, 171. |
||
Found “alfalfa” in “Text6” at indices 33, 87. |
Found “alfalfa” in “Text6” at indices 33, 87.</pre> |
||
</pre> |
|||
=={{header|Perl}}== |
=={{header|Perl}}== |
||
Line 714: | Line 791: | ||
Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br> |
Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br> |
||
Also higher than those on the above (lecroq) link, probably because this carries on for all matches. |
Also higher than those on the above (lecroq) link, probably because this carries on for all matches. |
||
<pre>Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons) |
|||
<pre> |
|||
Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons) |
|||
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons) |
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons) |
||
No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons) |
No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons) |
||
Line 722: | Line 798: | ||
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons) |
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons) |
||
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons) |
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons) |
||
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons) |
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)</pre> |
||
</pre> |
|||
=={{header|Python}}== |
=={{header|Python}}== |
||
Line 828: | Line 903: | ||
main() |
main() |
||
</syntaxhighlight> |
</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14]. |
|||
<pre> |
|||
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14]. |
|||
`TAATAAA` not found in `GGCTATAATG...` |
`TAATAAA` not found in `GGCTATAATG...` |
||
Found `word` in `there woul...` once at [40]. |
Found `word` in `there woul...` once at [40]. |
||
Line 837: | Line 910: | ||
Found `put` in `Inhisbooks...` 2 times at [26, 90]. |
Found `put` in `Inhisbooks...` 2 times at [26, 90]. |
||
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171]. |
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171]. |
||
Found `alfalfa` in `Nearby far...` 2 times at [33, 87]. |
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre> |
||
</pre> |
|||
=={{header|Raku}}== |
=={{header|Raku}}== |
||
Line 892: | Line 964: | ||
}</syntaxhighlight> |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>text0 = GCTAGCTCTACGAGTCTA |
|||
<pre> |
|||
text0 = GCTAGCTCTACGAGTCTA |
|||
text1 = GGCTATAATGCGTA |
text1 = GGCTATAATGCGTA |
||
text2 = there would have been a time for such a word |
text2 = there would have been a time for such a word |
||
Line 908: | Line 979: | ||
Found 'put' in 'text5' at indices (26 90) |
Found 'put' in 'text5' at indices (26 90) |
||
Found 'and' in 'text5' at indices (101 128 171) |
Found 'and' in 'text5' at indices (101 128 171) |
||
Found 'alfalfa' in 'text6' at indices (33 87) |
Found 'alfalfa' in 'text6' at indices (33 87)</pre> |
||
</pre> |
|||
=={{header|Sidef}}== |
=={{header|Sidef}}== |
||
Line 979: | Line 1,049: | ||
}</syntaxhighlight> |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>text0 = GCTAGCTCTACGAGTCTA |
|||
<pre> |
|||
text0 = GCTAGCTCTACGAGTCTA |
|||
text1 = GGCTATAATGCGTA |
text1 = GGCTATAATGCGTA |
||
text2 = there would have been a time for such a word |
text2 = there would have been a time for such a word |
||
Line 995: | Line 1,064: | ||
Found 'put' in 'text5' at indices [26, 90] |
Found 'put' in 'text5' at indices [26, 90] |
||
Found 'and' in 'text5' at indices [101, 128, 171] |
Found 'and' in 'text5' at indices [101, 128, 171] |
||
Found 'alfalfa' in 'text6' at indices [33, 87] |
Found 'alfalfa' in 'text6' at indices [33, 87]</pre> |
||
</pre> |
|||
=={{header|Wren}}== |
=={{header|Wren}}== |
||
Line 1,065: | Line 1,133: | ||
System.print("Found '%(pats[i])' in 'text%(j+1)' at indices %(KMP.search(texts[j], pats[i]))") |
System.print("Found '%(pats[i])' in 'text%(j+1)' at indices %(KMP.search(texts[j], pats[i]))") |
||
}</syntaxhighlight> |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>text1 = GCTAGCTCTACGAGTCTA |
|||
<pre> |
|||
text1 = GCTAGCTCTACGAGTCTA |
|||
text2 = GGCTATAATGCGTA |
text2 = GGCTATAATGCGTA |
||
text3 = there would have been a time for such a word |
text3 = there would have been a time for such a word |
||
Line 1,081: | Line 1,147: | ||
Found 'put' in 'text5' at indices [26, 90] |
Found 'put' in 'text5' at indices [26, 90] |
||
Found 'and' in 'text5' at indices [101, 128, 171] |
Found 'and' in 'text5' at indices [101, 128, 171] |
||
Found 'alfalfa' in 'text6' at indices [33, 87] |
Found 'alfalfa' in 'text6' at indices [33, 87]</pre> |
||
</pre> |