Knuth-Morris-Pratt string search: Difference between revisions

m
→‎{{header|Wren}}: Changed to Wren S/H
(Added 11l)
m (→‎{{header|Wren}}: Changed to Wren S/H)
 
(6 intermediate revisions by 4 users not shown)
Line 5:
=={{header|11l}}==
{{trans|Python}}
<syntaxhighlight lang="11l">F kmp_table(String word) -> [Int]
 
<syntaxhighlight lang="11l">
F kmp_table(String word) -> [Int]
V position = 1
V candidate = 0
Line 74 ⟶ 72:
print(‘Found `’word‘` in `’text[0.<10]‘...` once at ’positions‘.’)
E
print(‘Found `’word‘` in `’text[0.<10]‘...` ’number_of_positions‘ times at ’positions‘.’)</syntaxhighlight>
</syntaxhighlight>
 
{{out}}
<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
<pre>
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
`TAATAAA` not found in `GGCTATAATG...`
Found `word` in `there woul...` once at [40].
Line 85 ⟶ 80:
Found `put` in `Inhisbooks...` 2 times at [26, 90].
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre>
</pre>
 
=={{header|Emacs Lisp}}==
 
<syntaxhighlight lang="lisp">
(defun kmp_compile_pattern (pattern)
Line 129 ⟶ 122:
(setq textPos (1+ textPos)) )
(if (= patPos M) (- textPos M) N ) ) ) )</syntaxhighlight>
</syntaxhighlight>
 
=={{header|JFortran}}==
<syntaxhighlight lang="Fortran">
module kmp_mod
use iso_fortran_env
implicit none
private
public :: kmpsearch
contains
!This subroutine creates the "fail" table used for the KMP algorithm
pure subroutine createtable(pattern,patlen,table)
implicit none
!
! Dummy arguments
!
integer :: patlen
character(*) :: pattern
integer , dimension(:) :: table
intent (in) patlen , pattern
intent (inout) table
!
! Local variables
!
integer :: counter
integer :: i
integer :: j
!*Code
counter=2
i=1
j=0
!Determine the table/fail value for each letter in the pattern
do i=1 , patlen
j=i
do
! If j is equal to zero, then there is no offset/fail value required. Therefore, just set the fail value to 0
if( j==1 )then
table(counter)=1
counter=counter+1
exit
end if
! If a match is made, the fail value can be imcremented by one (based off the fail value of the previous character in the pattern)
if( pattern(table(j):table(j))==pattern(i:i) )then
table(counter)=table(j)+1
counter=counter+1
exit
end if
! If neither if statement is true, restart the fail value counter
j=table(j)
end do
end do
end subroutine createtable
! This subroutine executes the string search using the KMP algorithm (fail table)
pure function kmpsearch(pattern,patlen,line,linelen) &
& result(foundmatch)
implicit none
!
! Dummy arguments
!
character(*) :: line
integer :: linelen
integer :: patlen
character(*) :: pattern
intent (in) line , linelen , patlen , pattern
!
! Local variables
!
integer :: foundmatch
integer :: indice
integer :: match
integer , dimension(256) :: table
!*Code
if( (patlen==0) .or. (linelen==0) .or. (linelen<patlen) )then
foundmatch=-1
return
end if
indice=1
match=0
foundmatch=-1
call createtable(pattern,patlen,table)
!Search the entire string
do while ( indice+match<=linelen )
if( match+1<patlen+1 )then
! If the character matches the character we are expecting (based on where in the pattern we have already matched characters with) increment the match indice
if( line(indice+match:indice+match)==pattern(match+1:match+1) )then
match=match+1
! If the match indice is equal to the length of the pattern, that means we have matched the entire pattern
if( match==patlen )then
foundmatch=indice !-1
exit !Found
end if
!Look at the table to determine what indice to check next
! If no match was made on the first letter of the pattern, then just increment the indice counter by one and check again
else if( match==0 )then
indice=indice+1
! Check how many characters to skip ahead (the point of the KMP algorithm)
else
indice=indice+match+1-table(match+1)
match=table(match+1)-1
end if
! If no match was made after scanning the length of the pattern, need increment the indice counter and check again
else if( match==0 )then
indice=indice+1 !Not even a partial match, try next character
else !Check how many characters to skip ahead (the point of the KMP algorithm)
indice=indice+match+1-table(match+1)
match=table(match+1)-1
end if
end do
! 'No match found'
return
end function kmpsearch
end module kmp_mod</syntaxhighlight>
 
=={{header|FreeBASIC}}==
'''Adapted from [[#Wren|Wren]]'''
<syntaxhighlight lang="vb">Dim Shared As Integer t()
Dim Shared As Integer indices()
 
Sub Tabla(patron As String)
Dim As Integer nc = Len(patron)
Redim As Integer t(nc)
Dim As Integer idx = 1 ' index into table
Dim As Integer lon = 0 ' length of previous longest prefix
 
While idx < nc
If patron[idx] = patron[lon] Then
lon += 1
t(idx) = lon
idx += 1
Elseif lon <> 0 Then
lon = t(lon-1)
Else
t(idx) = 0
idx += 1
End If
Wend
End Sub
 
Sub BusquedaKMP(texto As String, patron As String)
Dim As Integer hc = Len(texto)
Dim As Integer nc = Len(patron)
Dim As Integer i = 0 ' index into haystack
Dim As Integer j = 0 ' index into needle
Dim As Integer cont = 1
Tabla(patron)
While i < hc
If patron[j] = texto[i] Then
i += 1
j += 1
End If
If j = nc Then
Redim Preserve indices(cont)
indices(cont) = (i - j)
cont += 1
j = t(j-1)
Elseif i < hc And patron[j] <> texto[i] Then
If j <> 0 Then j = t(j-1) Else i += 1
End If
Wend
End Sub
 
Dim As Integer i, j, k
Dim As String cadenas(1 To 6) = {"GCTAGCTCTACGAGTCTA", "GGCTATAATGCGTA", _
"there would have been a time for such a word", "needle need noodle needle", _
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented", _
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk."}
Dim As String palabra(1 To 7) = { _
"TCTA", "TAATAAA", "word", "needle", "put", "and", "alfalfa"}
 
For i = 1 To Ubound(cadenas)
Print Using "text# = &"; i; cadenas(i)
Next
Print
For i = 1 To Ubound(palabra)
j = Iif(i < 6, i, i-1)
Print Using "Found '&' in 'text#' at indices [ "; palabra(i); j;
BusquedaKMP(cadenas(j), palabra(i))
If Ubound(indices) > 0 Then
For k = 1 To Ubound(indices)
Print indices(k) & ", ";
Next
Else
Print " ";
End If
Print Chr(8) & Chr(8) & " ]"
Erase indices
Next
 
Sleep</syntaxhighlight>
{{out}}
<pre>text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
text4 = needle need noodle needle
text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.
 
Found 'TCTA' in 'text1' at indices [ 6, 14 ]
Found 'TAATAAA' in 'text2' at indices [ ]
Found 'word' in 'text3' at indices [ 40 ]
Found 'needle' in 'text4' at indices [ 0, 19 ]
Found 'put' in 'text5' at indices [ 26, 90 ]
Found 'and' in 'text5' at indices [ 101, 128, 171 ]
Found 'alfalfa' in 'text6' at indices [ 33, 87 ]</pre>
 
=={{header|J}}==
Implementation:<syntaxhighlight lang="j">kmp_table=: {{
j=. 0
Line 187 ⟶ 391:
The program will also work with gojq once the prefix "KMP::" is
omitted in the two lines in which it occurs.
<syntaxhighlight lang=jq># search for the string needle in the string haystack
# search for the string needle in the string haystack
def KMP::search(haystack; needle):
 
Line 254 ⟶ 457:
| "Found '\($pats[$i])' in text\($j+1) at indices \(KMP::search($texts[$j]; $pats[$i]) )" ) ;
 
tasks(texts; pats)</syntaxhighlight>
</syntaxhighlight>
 
'''Invocation''': jq -nr -f kmp-string-search.jq
{{output}}
<pre>text1 = GCTAGCTCTACGAGTCTA
<pre>
text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
Line 273 ⟶ 474:
Found 'put' in text5 at indices [26, 90]
Found 'and' in text5 at indices [101, 128, 171]
Found 'alfalfa' in text6 at indices [33, 87]</pre>
</pre>
 
=={{header|Julia}}==
Line 354 ⟶ 554:
println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3))
</syntaxhighlight>{{out}}
<pre>Found <put> at: (one-based indices) [27, 91]
<pre>
Found <put> at: (one-based indices) [27, 91]
Found <and> at: (one-based indices) [102, 129, 172]
Found <alfalfa> at: (one-based indices) [34, 88]</pre>
 
</pre>
=={{header|Nim}}==
This is a translation of pseudocode in Wikipedia page. We use the examples of Wren solution.
<syntaxhighlight lang="Nim">import std/[strformat, strutils]
 
func kmpTable(w: string): seq[int] =
## Build the partial match table for "w".
result.setLen w.len + 1
var pos = 1
var cnd = 0
result[0] = -1
while pos < w.len:
if w[pos] == w[cnd]:
result[pos] = result[cnd]
else:
result[pos] = cnd
while cnd >= 0 and w[pos] != w[cnd]:
cnd = result[cnd]
inc pos
inc cnd
result[pos] = cnd
 
func kmpSearch(s, w: string): seq[int] =
## Return the positions of "w" ins "s".
var j, k = 0
let t = kmpTable(w)
while j < s.len:
if w[k] == s[j]:
inc j
inc k
if k == w.len:
result.add j - k
k = t[k]
else:
k = t[k]
if k < 0:
inc j
inc k
 
 
const
Texts = ["GCTAGCTCTACGAGTCTA",
"GGCTATAATGCGTA",
"there would have been a time for such a word",
"needle need noodle needle",
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuth" &
"usesanimaginarycomputertheMIXanditsassociatedmachinecodeandassembly" &
"languagestoillustratetheconceptsandalgorithmsastheyarepresented",
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, " &
"with bales of all that alfalfa exchanged for milk."
]
 
# Couples (pattern, index of text to use).
Patterns = {"TCTA": 0, "TAATAAA": 1, "word": 2, "needle": 3, "put": 4, "and": 4, "alfalfa": 5}
 
for i, text in Texts:
echo &"Text{i+1} = {text}"
echo()
 
for i, (pattern, j) in Patterns:
let indices = Texts[j].kmpSearch(pattern)
if indices.len > 0:
echo &"Found “{pattern}” in “Text{j+1}” at indices {indices.join(\", \")}."
else:
echo &"Not found “{pattern}” in “Text{j+1}”."
</syntaxhighlight>
{{out}}
<pre>Text1 = GCTAGCTCTACGAGTCTA
Text2 = GGCTATAATGCGTA
Text3 = there would have been a time for such a word
Text4 = needle need noodle needle
Text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
Text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.
 
Found “TCTA” in “Text1” at indices 6, 14.
Not found “TAATAAA” in “Text2”.
Found “word” in “Text3” at indices 40.
Found “needle” in “Text4” at indices 0, 19.
Found “put” in “Text5” at indices 26, 90.
Found “and” in “Text5” at indices 101, 128, 171.
Found “alfalfa” in “Text6” at indices 33, 87.</pre>
 
=={{header|Perl}}==
Line 512 ⟶ 791:
Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br>
Also higher than those on the above (lecroq) link, probably because this carries on for all matches.
<pre>Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)
<pre>
Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons)
No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons)
Line 520 ⟶ 798:
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons)
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons)
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)</pre>
</pre>
 
=={{header|Python}}==
Line 626 ⟶ 903:
main()
</syntaxhighlight>
 
{{out}}
<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
<pre>
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
`TAATAAA` not found in `GGCTATAATG...`
Found `word` in `there woul...` once at [40].
Line 635 ⟶ 910:
Found `put` in `Inhisbooks...` 2 times at [26, 90].
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre>
</pre>
 
=={{header|Raku}}==
Line 690 ⟶ 964:
}</syntaxhighlight>
{{out}}
<pre>text0 = GCTAGCTCTACGAGTCTA
<pre>
text0 = GCTAGCTCTACGAGTCTA
text1 = GGCTATAATGCGTA
text2 = there would have been a time for such a word
Line 706 ⟶ 979:
Found 'put' in 'text5' at indices (26 90)
Found 'and' in 'text5' at indices (101 128 171)
Found 'alfalfa' in 'text6' at indices (33 87)</pre>
 
</pre>
=={{header|Sidef}}==
{{trans|Raku}}
<syntaxhighlight lang="ruby">func kmp_search (String S, String W) {
 
S.graphemes!
W.graphemes!
 
func kmp_table {
var (pos, cnd, *T) = (1, 0, -1)
for (nil; pos < W.len; (++pos; ++cnd)) {
if (W[pos] == W[cnd]) {
T[pos] = T[cnd]
}
else {
T[pos] = cnd
while ((cnd >= 0) && (W[pos] != W[cnd])) {
cnd = T[cnd]
}
}
}
T[pos] = cnd
return T
}
 
var (k, T, *I) = (0, kmp_table())
 
for (var j = 0; j < S.len; nil) {
if (W[k] == S[j]) {
++j
++k
if (k == W.len) {
I << (j - k)
k = T[k]
}
}
elsif ((k = T[k]) < 0) {
++j
++k
}
}
 
return I
}
 
var texts = [
"GCTAGCTCTACGAGTCTA",
"GGCTATAATGCGTA",
"there would have been a time for such a word",
"needle need noodle needle",
"BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhārat"+
"amഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரத"+
"ம்BhāratamഭാരതംBhāratadēsamభారతదేశం",
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnu"+
"thusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblyl"+
"anguagestoillustratetheconceptsandalgorithmsastheyarepresented",
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with ba"+
"les of all that alfalfa exchanged for milk.",
]
 
var pats = ["TCTA", "TAATAAA", "word", "needle", "ഭാരതം", "put", "and", "alfalfa"]
 
texts.each_kv {|k,v| say "text#{k} = #{v}" }; say ''
 
pats.each_kv {|k,v|
var j = (k < 6 ? k : k-1)
say ("Found '#{v}' in 'text#{j}' at indices ", kmp_search(texts[j], v))
}</syntaxhighlight>
{{out}}
<pre>text0 = GCTAGCTCTACGAGTCTA
text1 = GGCTATAATGCGTA
text2 = there would have been a time for such a word
text3 = needle need noodle needle
text4 = BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhāratamഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரதம்BhāratamഭാരതംBhāratadēsamభారతదేశం
text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.
 
Found 'TCTA' in 'text0' at indices [6, 14]
Found 'TAATAAA' in 'text1' at indices []
Found 'word' in 'text2' at indices [40]
Found 'needle' in 'text3' at indices [0, 19]
Found 'ഭാരതം' in 'text4' at indices [68, 138]
Found 'put' in 'text5' at indices [26, 90]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'alfalfa' in 'text6' at indices [33, 87]</pre>
 
=={{header|Wren}}==
This is based on the code [https://www.geeksforgeeks.org/kmp-algorithm-for-pattern-searching/ here]. The examples used are the same as in the [[Boyer-Moore_string_search#Wren]] task.
<syntaxhighlight lang="ecmascriptwren">class KMP {
static search(haystack, needle) {
haystack = haystack.bytes.toList
Line 776 ⟶ 1,133:
System.print("Found '%(pats[i])' in 'text%(j+1)' at indices %(KMP.search(texts[j], pats[i]))")
}</syntaxhighlight>
 
{{out}}
<pre>text1 = GCTAGCTCTACGAGTCTA
<pre>
text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
Line 792 ⟶ 1,147:
Found 'put' in 'text5' at indices [26, 90]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'alfalfa' in 'text6' at indices [33, 87]</pre>
</pre>
9,476

edits