Knuth-Morris-Pratt string search: Difference between revisions

Content added Content deleted
(Added Sidef)
(Added FreeBasic)
Line 5: Line 5:
=={{header|11l}}==
=={{header|11l}}==
{{trans|Python}}
{{trans|Python}}
<syntaxhighlight lang="11l">F kmp_table(String word) -> [Int]

<syntaxhighlight lang="11l">
F kmp_table(String word) -> [Int]
V position = 1
V position = 1
V candidate = 0
V candidate = 0
Line 74: Line 72:
print(‘Found `’word‘` in `’text[0.<10]‘...` once at ’positions‘.’)
print(‘Found `’word‘` in `’text[0.<10]‘...` once at ’positions‘.’)
E
E
print(‘Found `’word‘` in `’text[0.<10]‘...` ’number_of_positions‘ times at ’positions‘.’)
print(‘Found `’word‘` in `’text[0.<10]‘...` ’number_of_positions‘ times at ’positions‘.’)</syntaxhighlight>
</syntaxhighlight>

{{out}}
{{out}}
<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
<pre>
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
`TAATAAA` not found in `GGCTATAATG...`
`TAATAAA` not found in `GGCTATAATG...`
Found `word` in `there woul...` once at [40].
Found `word` in `there woul...` once at [40].
Line 85: Line 80:
Found `put` in `Inhisbooks...` 2 times at [26, 90].
Found `put` in `Inhisbooks...` 2 times at [26, 90].
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre>
</pre>


=={{header|Emacs Lisp}}==
=={{header|Emacs Lisp}}==

<syntaxhighlight lang="lisp">
<syntaxhighlight lang="lisp">
(defun kmp_compile_pattern (pattern)
(defun kmp_compile_pattern (pattern)
Line 129: Line 122:
(setq textPos (1+ textPos)) )
(setq textPos (1+ textPos)) )
(if (= patPos M) (- textPos M) N ) ) ) )
(if (= patPos M) (- textPos M) N ) ) ) )</syntaxhighlight>

</syntaxhighlight>
=={{header|Fortran}}==
=={{header|Fortran}}==
<syntaxhighlight lang="Fortran">
<syntaxhighlight lang="Fortran">
Line 248: Line 241:
return
return
end function kmpsearch
end function kmpsearch
end module kmp_mod
end module kmp_mod</syntaxhighlight>
</syntaxhighlight>
</pre>
=={{header|J}}==


=={{header|FreeBASIC}}==
'''Adapted from [[#Wren|Wren]]'''
<syntaxhighlight lang="vb">Dim Shared As Integer t()
Dim Shared As Integer indices()

Sub Tabla(patron As String)
Dim As Integer nc = Len(patron)
Redim As Integer t(nc)
Dim As Integer idx = 1 ' index into table
Dim As Integer lon = 0 ' length of previous longest prefix

While idx < nc
If patron[idx] = patron[lon] Then
lon += 1
t(idx) = lon
idx += 1
Elseif lon <> 0 Then
lon = t(lon-1)
Else
t(idx) = 0
idx += 1
End If
Wend
End Sub

Sub BusquedaKMP(texto As String, patron As String)
Dim As Integer hc = Len(texto)
Dim As Integer nc = Len(patron)
Dim As Integer i = 0 ' index into haystack
Dim As Integer j = 0 ' index into needle
Dim As Integer cont = 1
Tabla(patron)
While i < hc
If patron[j] = texto[i] Then
i += 1
j += 1
End If
If j = nc Then
Redim Preserve indices(cont)
indices(cont) = (i - j)
cont += 1
j = t(j-1)
Elseif i < hc And patron[j] <> texto[i] Then
If j <> 0 Then j = t(j-1) Else i += 1
End If
Wend
End Sub

Dim As Integer i, j, k
Dim As String cadenas(1 To 6) = {"GCTAGCTCTACGAGTCTA", "GGCTATAATGCGTA", _
"there would have been a time for such a word", "needle need noodle needle", _
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented", _
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk."}
Dim As String palabra(1 To 7) = { _
"TCTA", "TAATAAA", "word", "needle", "put", "and", "alfalfa"}

For i = 1 To Ubound(cadenas)
Print Using "text# = &"; i; cadenas(i)
Next
Print
For i = 1 To Ubound(palabra)
j = Iif(i < 6, i, i-1)
Print Using "Found '&' in 'text#' at indices [ "; palabra(i); j;
BusquedaKMP(cadenas(j), palabra(i))
If Ubound(indices) > 0 Then
For k = 1 To Ubound(indices)
Print indices(k) & ", ";
Next
Else
Print " ";
End If
Print Chr(8) & Chr(8) & " ]"
Erase indices
Next

Sleep</syntaxhighlight>
{{out}}
<pre>text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
text4 = needle need noodle needle
text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.

Found 'TCTA' in 'text1' at indices [ 6, 14 ]
Found 'TAATAAA' in 'text2' at indices [ ]
Found 'word' in 'text3' at indices [ 40 ]
Found 'needle' in 'text4' at indices [ 0, 19 ]
Found 'put' in 'text5' at indices [ 26, 90 ]
Found 'and' in 'text5' at indices [ 101, 128, 171 ]
Found 'alfalfa' in 'text6' at indices [ 33, 87 ]</pre>

=={{header|J}}==
Implementation:<syntaxhighlight lang="j">kmp_table=: {{
Implementation:<syntaxhighlight lang="j">kmp_table=: {{
j=. 0
j=. 0
Line 306: Line 391:
The program will also work with gojq once the prefix "KMP::" is
The program will also work with gojq once the prefix "KMP::" is
omitted in the two lines in which it occurs.
omitted in the two lines in which it occurs.
<syntaxhighlight lang=jq>
<syntaxhighlight lang=jq># search for the string needle in the string haystack
# search for the string needle in the string haystack
def KMP::search(haystack; needle):
def KMP::search(haystack; needle):


Line 373: Line 457:
| "Found '\($pats[$i])' in text\($j+1) at indices \(KMP::search($texts[$j]; $pats[$i]) )" ) ;
| "Found '\($pats[$i])' in text\($j+1) at indices \(KMP::search($texts[$j]; $pats[$i]) )" ) ;


tasks(texts; pats)
tasks(texts; pats)</syntaxhighlight>
</syntaxhighlight>


'''Invocation''': jq -nr -f kmp-string-search.jq
'''Invocation''': jq -nr -f kmp-string-search.jq
{{output}}
{{output}}
<pre>text1 = GCTAGCTCTACGAGTCTA
<pre>
text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
text3 = there would have been a time for such a word
Line 392: Line 474:
Found 'put' in text5 at indices [26, 90]
Found 'put' in text5 at indices [26, 90]
Found 'and' in text5 at indices [101, 128, 171]
Found 'and' in text5 at indices [101, 128, 171]
Found 'alfalfa' in text6 at indices [33, 87]
Found 'alfalfa' in text6 at indices [33, 87]</pre>
</pre>


=={{header|Julia}}==
=={{header|Julia}}==
Line 473: Line 554:
println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3))
println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3))
</syntaxhighlight>{{out}}
</syntaxhighlight>{{out}}
<pre>Found <put> at: (one-based indices) [27, 91]
<pre>
Found <put> at: (one-based indices) [27, 91]
Found <and> at: (one-based indices) [102, 129, 172]
Found <and> at: (one-based indices) [102, 129, 172]
Found <alfalfa> at: (one-based indices) [34, 88]
Found <alfalfa> at: (one-based indices) [34, 88]</pre>
</pre>


=={{header|Nim}}==
=={{header|Nim}}==
Line 544: Line 623:
echo &"Not found “{pattern}” in “Text{j+1}”."
echo &"Not found “{pattern}” in “Text{j+1}”."
</syntaxhighlight>
</syntaxhighlight>

{{out}}
{{out}}
<pre>Text1 = GCTAGCTCTACGAGTCTA
<pre>Text1 = GCTAGCTCTACGAGTCTA
Line 559: Line 637:
Found “put” in “Text5” at indices 26, 90.
Found “put” in “Text5” at indices 26, 90.
Found “and” in “Text5” at indices 101, 128, 171.
Found “and” in “Text5” at indices 101, 128, 171.
Found “alfalfa” in “Text6” at indices 33, 87.
Found “alfalfa” in “Text6” at indices 33, 87.</pre>
</pre>


=={{header|Perl}}==
=={{header|Perl}}==
Line 714: Line 791:
Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br>
Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br>
Also higher than those on the above (lecroq) link, probably because this carries on for all matches.
Also higher than those on the above (lecroq) link, probably because this carries on for all matches.
<pre>Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)
<pre>
Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons)
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons)
No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons)
No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons)
Line 722: Line 798:
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons)
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons)
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons)
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons)
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)</pre>
</pre>


=={{header|Python}}==
=={{header|Python}}==
Line 828: Line 903:
main()
main()
</syntaxhighlight>
</syntaxhighlight>

{{out}}
{{out}}
<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
<pre>
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
`TAATAAA` not found in `GGCTATAATG...`
`TAATAAA` not found in `GGCTATAATG...`
Found `word` in `there woul...` once at [40].
Found `word` in `there woul...` once at [40].
Line 837: Line 910:
Found `put` in `Inhisbooks...` 2 times at [26, 90].
Found `put` in `Inhisbooks...` 2 times at [26, 90].
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre>
</pre>


=={{header|Raku}}==
=={{header|Raku}}==
Line 892: Line 964:
}</syntaxhighlight>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>text0 = GCTAGCTCTACGAGTCTA
<pre>
text0 = GCTAGCTCTACGAGTCTA
text1 = GGCTATAATGCGTA
text1 = GGCTATAATGCGTA
text2 = there would have been a time for such a word
text2 = there would have been a time for such a word
Line 908: Line 979:
Found 'put' in 'text5' at indices (26 90)
Found 'put' in 'text5' at indices (26 90)
Found 'and' in 'text5' at indices (101 128 171)
Found 'and' in 'text5' at indices (101 128 171)
Found 'alfalfa' in 'text6' at indices (33 87)
Found 'alfalfa' in 'text6' at indices (33 87)</pre>
</pre>


=={{header|Sidef}}==
=={{header|Sidef}}==
Line 979: Line 1,049:
}</syntaxhighlight>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>text0 = GCTAGCTCTACGAGTCTA
<pre>
text0 = GCTAGCTCTACGAGTCTA
text1 = GGCTATAATGCGTA
text1 = GGCTATAATGCGTA
text2 = there would have been a time for such a word
text2 = there would have been a time for such a word
Line 995: Line 1,064:
Found 'put' in 'text5' at indices [26, 90]
Found 'put' in 'text5' at indices [26, 90]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'alfalfa' in 'text6' at indices [33, 87]
Found 'alfalfa' in 'text6' at indices [33, 87]</pre>
</pre>


=={{header|Wren}}==
=={{header|Wren}}==
Line 1,065: Line 1,133:
System.print("Found '%(pats[i])' in 'text%(j+1)' at indices %(KMP.search(texts[j], pats[i]))")
System.print("Found '%(pats[i])' in 'text%(j+1)' at indices %(KMP.search(texts[j], pats[i]))")
}</syntaxhighlight>
}</syntaxhighlight>

{{out}}
{{out}}
<pre>text1 = GCTAGCTCTACGAGTCTA
<pre>
text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
text3 = there would have been a time for such a word
Line 1,081: Line 1,147:
Found 'put' in 'text5' at indices [26, 90]
Found 'put' in 'text5' at indices [26, 90]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'alfalfa' in 'text6' at indices [33, 87]
Found 'alfalfa' in 'text6' at indices [33, 87]</pre>
</pre>