Knuth-Morris-Pratt string search: Difference between revisions

Line 5:

=={{header|11l}}==

<syntaxhighlight lang="11l">F kmp_table(String word) -> [Int]

F kmp_table(String word) -> [Int]

V position = 1

V candidate = 0

Line 74:

Line 72:

print(‘Found `’word‘` in `’text[0.<10]‘...` once at ’positions‘.’)

E

print(‘Found `’word‘` in `’text[0.<10]‘...` ’number_of_positions‘ times at ’positions‘.’)

print(‘Found `’word‘` in `’text[0.<10]‘...` ’number_of_positions‘ times at ’positions‘.’)</syntaxhighlight>

</syntaxhighlight>

<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].

<pre>

Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].

`TAATAAA` not found in `GGCTATAATG...`

Found `word` in `there woul...` once at [40].

Line 85:

Line 80:

Found `put` in `Inhisbooks...` 2 times at [26, 90].

Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].

Found `alfalfa` in `Nearby far...` 2 times at [33, 87].

Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre>

</pre>

=={{header|Emacs Lisp}}==

(defun kmp_compile_pattern (pattern)

Line 129:

Line 122:

(setq textPos (1+ textPos)) )

(if (= patPos M) (- textPos M) N ) ) ) )

(if (= patPos M) (- textPos M) N ) ) ) )</syntaxhighlight>

</syntaxhighlight>

=={{header|Fortran}}==

Line 248:

Line 241:

return

end function kmpsearch

end module kmp_mod

end module kmp_mod</syntaxhighlight>

</syntaxhighlight>

</pre>

=={{header|J}}==

=={{header|FreeBASIC}}==

'''Adapted from [[#Wren|Wren]]'''

<syntaxhighlight lang="vb">Dim Shared As Integer t()

Dim Shared As Integer indices()

Sub Tabla(patron As String)

Dim As Integer nc = Len(patron)

Redim As Integer t(nc)

Dim As Integer idx = 1 ' index into table

Dim As Integer lon = 0 ' length of previous longest prefix

While idx < nc

If patron[idx] = patron[lon] Then

lon += 1

t(idx) = lon

idx += 1

Elseif lon <> 0 Then

lon = t(lon-1)

Else

t(idx) = 0

idx += 1

End If

Wend

End Sub

Sub BusquedaKMP(texto As String, patron As String)

Dim As Integer hc = Len(texto)

Dim As Integer nc = Len(patron)

Dim As Integer i = 0 ' index into haystack

Dim As Integer j = 0 ' index into needle

Dim As Integer cont = 1

Tabla(patron)

While i < hc

If patron[j] = texto[i] Then

i += 1

j += 1

End If

If j = nc Then

Redim Preserve indices(cont)

indices(cont) = (i - j)

cont += 1

j = t(j-1)

Elseif i < hc And patron[j] <> texto[i] Then

If j <> 0 Then j = t(j-1) Else i += 1

End If

Wend

End Sub

Dim As Integer i, j, k

Dim As String cadenas(1 To 6) = {"GCTAGCTCTACGAGTCTA", "GGCTATAATGCGTA", _

"there would have been a time for such a word", "needle need noodle needle", _

"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented", _

"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk."}

Dim As String palabra(1 To 7) = { _

"TCTA", "TAATAAA", "word", "needle", "put", "and", "alfalfa"}

For i = 1 To Ubound(cadenas)

Print Using "text# = &"; i; cadenas(i)

For i = 1 To Ubound(palabra)

j = Iif(i < 6, i, i-1)

Print Using "Found '&' in 'text#' at indices [ "; palabra(i); j;

BusquedaKMP(cadenas(j), palabra(i))

If Ubound(indices) > 0 Then

For k = 1 To Ubound(indices)

Print indices(k) & ", ";

Print " ";

End If

Print Chr(8) & Chr(8) & " ]"

Erase indices

<pre>text1 = GCTAGCTCTACGAGTCTA

text2 = GGCTATAATGCGTA

text3 = there would have been a time for such a word

text4 = needle need noodle needle

text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented

text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.

Found 'TCTA' in 'text1' at indices [ 6, 14 ]

Found 'TAATAAA' in 'text2' at indices [ ]

Found 'word' in 'text3' at indices [ 40 ]

Found 'needle' in 'text4' at indices [ 0, 19 ]

Found 'put' in 'text5' at indices [ 26, 90 ]

Found 'and' in 'text5' at indices [ 101, 128, 171 ]

Found 'alfalfa' in 'text6' at indices [ 33, 87 ]</pre>

=={{header|J}}==

Implementation:<syntaxhighlight lang="j">kmp_table=: {{

j=. 0

Line 306:

Line 391:

The program will also work with gojq once the prefix "KMP::" is

omitted in the two lines in which it occurs.

<syntaxhighlight lang=jq># search for the string needle in the string haystack

# search for the string needle in the string haystack

def KMP::search(haystack; needle):

Line 373:

Line 457:

| "Found '\($pats[$i])' in text\($j+1) at indices \(KMP::search($texts[$j]; $pats[$i]) )" ) ;

tasks(texts; pats)

tasks(texts; pats)</syntaxhighlight>

</syntaxhighlight>

'''Invocation''': jq -nr -f kmp-string-search.jq

<pre>text1 = GCTAGCTCTACGAGTCTA

<pre>

text1 = GCTAGCTCTACGAGTCTA

text2 = GGCTATAATGCGTA

text3 = there would have been a time for such a word

Line 392:

Line 474:

Found 'put' in text5 at indices [26, 90]

Found 'and' in text5 at indices [101, 128, 171]

Found 'alfalfa' in text6 at indices [33, 87]

Found 'alfalfa' in text6 at indices [33, 87]</pre>

</pre>

=={{header|Julia}}==

Line 473:

Line 554:

println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3))

</syntaxhighlight>{{out}}

<pre>Found <put> at: (one-based indices) [27, 91]

<pre>

Found <put> at: (one-based indices) [27, 91]

Found <and> at: (one-based indices) [102, 129, 172]

Found <alfalfa> at: (one-based indices) [34, 88]

Found <alfalfa> at: (one-based indices) [34, 88]</pre>

</pre>

=={{header|Nim}}==

Line 544:

Line 623:

echo &"Not found “{pattern}” in “Text{j+1}”."

</syntaxhighlight>

<pre>Text1 = GCTAGCTCTACGAGTCTA

Line 559:

Line 637:

Found “put” in “Text5” at indices 26, 90.

Found “and” in “Text5” at indices 101, 128, 171.

Found “alfalfa” in “Text6” at indices 33, 87.

Found “alfalfa” in “Text6” at indices 33, 87.</pre>

</pre>

=={{header|Perl}}==

Line 714:

Line 791:

Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br>

Also higher than those on the above (lecroq) link, probably because this carries on for all matches.

<pre>Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)

<pre>

Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)

Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons)

No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons)

Line 722:

Line 798:

Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons)

Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons)

Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)

Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)</pre>

</pre>

=={{header|Python}}==

Line 828:

Line 903:

main()

</syntaxhighlight>

<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].

<pre>

Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].

`TAATAAA` not found in `GGCTATAATG...`

Found `word` in `there woul...` once at [40].

Line 837:

Line 910:

Found `put` in `Inhisbooks...` 2 times at [26, 90].

Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].

Found `alfalfa` in `Nearby far...` 2 times at [33, 87].

Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre>

</pre>

=={{header|Raku}}==

Line 892:

Line 964:

}</syntaxhighlight>

<pre>text0 = GCTAGCTCTACGAGTCTA

<pre>

text0 = GCTAGCTCTACGAGTCTA

text1 = GGCTATAATGCGTA

text2 = there would have been a time for such a word

Line 908:

Line 979:

Found 'put' in 'text5' at indices (26 90)

Found 'and' in 'text5' at indices (101 128 171)

Found 'alfalfa' in 'text6' at indices (33 87)

Found 'alfalfa' in 'text6' at indices (33 87)</pre>

</pre>

=={{header|Sidef}}==

Line 979:

Line 1,049:

}</syntaxhighlight>

<pre>text0 = GCTAGCTCTACGAGTCTA

<pre>

text0 = GCTAGCTCTACGAGTCTA

text1 = GGCTATAATGCGTA

text2 = there would have been a time for such a word

Line 995:

Line 1,064:

Found 'put' in 'text5' at indices [26, 90]

Found 'and' in 'text5' at indices [101, 128, 171]

Found 'alfalfa' in 'text6' at indices [33, 87]

Found 'alfalfa' in 'text6' at indices [33, 87]</pre>

</pre>

=={{header|Wren}}==

Line 1,065:

Line 1,133:

System.print("Found '%(pats[i])' in 'text%(j+1)' at indices %(KMP.search(texts[j], pats[i]))")

}</syntaxhighlight>

<pre>text1 = GCTAGCTCTACGAGTCTA

<pre>

text1 = GCTAGCTCTACGAGTCTA

text2 = GGCTATAATGCGTA

text3 = there would have been a time for such a word

Line 1,081:

Line 1,147:

Found 'put' in 'text5' at indices [26, 90]

Found 'and' in 'text5' at indices [101, 128, 171]

Found 'alfalfa' in 'text6' at indices [33, 87]

Found 'alfalfa' in 'text6' at indices [33, 87]</pre>

</pre>