Knuth-Morris-Pratt string search: Difference between revisions

m
→‎{{header|Wren}}: Changed to Wren S/H
(Added a missing ‘"’ in the strings.)
m (→‎{{header|Wren}}: Changed to Wren S/H)
 
(3 intermediate revisions by 3 users not shown)
Line 5:
=={{header|11l}}==
{{trans|Python}}
<syntaxhighlight lang="11l">F kmp_table(String word) -> [Int]
 
<syntaxhighlight lang="11l">
F kmp_table(String word) -> [Int]
V position = 1
V candidate = 0
Line 74 ⟶ 72:
print(‘Found `’word‘` in `’text[0.<10]‘...` once at ’positions‘.’)
E
print(‘Found `’word‘` in `’text[0.<10]‘...` ’number_of_positions‘ times at ’positions‘.’)</syntaxhighlight>
</syntaxhighlight>
 
{{out}}
<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
<pre>
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
`TAATAAA` not found in `GGCTATAATG...`
Found `word` in `there woul...` once at [40].
Line 85 ⟶ 80:
Found `put` in `Inhisbooks...` 2 times at [26, 90].
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre>
</pre>
 
=={{header|Emacs Lisp}}==
 
<syntaxhighlight lang="lisp">
(defun kmp_compile_pattern (pattern)
Line 129 ⟶ 122:
(setq textPos (1+ textPos)) )
(if (= patPos M) (- textPos M) N ) ) ) )</syntaxhighlight>
</syntaxhighlight>
 
=={{header|JFortran}}==
<syntaxhighlight lang="Fortran">
module kmp_mod
use iso_fortran_env
implicit none
private
public :: kmpsearch
contains
!This subroutine creates the "fail" table used for the KMP algorithm
pure subroutine createtable(pattern,patlen,table)
implicit none
!
! Dummy arguments
!
integer :: patlen
character(*) :: pattern
integer , dimension(:) :: table
intent (in) patlen , pattern
intent (inout) table
!
! Local variables
!
integer :: counter
integer :: i
integer :: j
!*Code
counter=2
i=1
j=0
!Determine the table/fail value for each letter in the pattern
do i=1 , patlen
j=i
do
! If j is equal to zero, then there is no offset/fail value required. Therefore, just set the fail value to 0
if( j==1 )then
table(counter)=1
counter=counter+1
exit
end if
! If a match is made, the fail value can be imcremented by one (based off the fail value of the previous character in the pattern)
if( pattern(table(j):table(j))==pattern(i:i) )then
table(counter)=table(j)+1
counter=counter+1
exit
end if
! If neither if statement is true, restart the fail value counter
j=table(j)
end do
end do
end subroutine createtable
! This subroutine executes the string search using the KMP algorithm (fail table)
pure function kmpsearch(pattern,patlen,line,linelen) &
& result(foundmatch)
implicit none
!
! Dummy arguments
!
character(*) :: line
integer :: linelen
integer :: patlen
character(*) :: pattern
intent (in) line , linelen , patlen , pattern
!
! Local variables
!
integer :: foundmatch
integer :: indice
integer :: match
integer , dimension(256) :: table
!*Code
if( (patlen==0) .or. (linelen==0) .or. (linelen<patlen) )then
foundmatch=-1
return
end if
indice=1
match=0
foundmatch=-1
call createtable(pattern,patlen,table)
!Search the entire string
do while ( indice+match<=linelen )
if( match+1<patlen+1 )then
! If the character matches the character we are expecting (based on where in the pattern we have already matched characters with) increment the match indice
if( line(indice+match:indice+match)==pattern(match+1:match+1) )then
match=match+1
! If the match indice is equal to the length of the pattern, that means we have matched the entire pattern
if( match==patlen )then
foundmatch=indice !-1
exit !Found
end if
!Look at the table to determine what indice to check next
! If no match was made on the first letter of the pattern, then just increment the indice counter by one and check again
else if( match==0 )then
indice=indice+1
! Check how many characters to skip ahead (the point of the KMP algorithm)
else
indice=indice+match+1-table(match+1)
match=table(match+1)-1
end if
! If no match was made after scanning the length of the pattern, need increment the indice counter and check again
else if( match==0 )then
indice=indice+1 !Not even a partial match, try next character
else !Check how many characters to skip ahead (the point of the KMP algorithm)
indice=indice+match+1-table(match+1)
match=table(match+1)-1
end if
end do
! 'No match found'
return
end function kmpsearch
end module kmp_mod</syntaxhighlight>
 
=={{header|FreeBASIC}}==
'''Adapted from [[#Wren|Wren]]'''
<syntaxhighlight lang="vb">Dim Shared As Integer t()
Dim Shared As Integer indices()
 
Sub Tabla(patron As String)
Dim As Integer nc = Len(patron)
Redim As Integer t(nc)
Dim As Integer idx = 1 ' index into table
Dim As Integer lon = 0 ' length of previous longest prefix
 
While idx < nc
If patron[idx] = patron[lon] Then
lon += 1
t(idx) = lon
idx += 1
Elseif lon <> 0 Then
lon = t(lon-1)
Else
t(idx) = 0
idx += 1
End If
Wend
End Sub
 
Sub BusquedaKMP(texto As String, patron As String)
Dim As Integer hc = Len(texto)
Dim As Integer nc = Len(patron)
Dim As Integer i = 0 ' index into haystack
Dim As Integer j = 0 ' index into needle
Dim As Integer cont = 1
Tabla(patron)
While i < hc
If patron[j] = texto[i] Then
i += 1
j += 1
End If
If j = nc Then
Redim Preserve indices(cont)
indices(cont) = (i - j)
cont += 1
j = t(j-1)
Elseif i < hc And patron[j] <> texto[i] Then
If j <> 0 Then j = t(j-1) Else i += 1
End If
Wend
End Sub
 
Dim As Integer i, j, k
Dim As String cadenas(1 To 6) = {"GCTAGCTCTACGAGTCTA", "GGCTATAATGCGTA", _
"there would have been a time for such a word", "needle need noodle needle", _
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented", _
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk."}
Dim As String palabra(1 To 7) = { _
"TCTA", "TAATAAA", "word", "needle", "put", "and", "alfalfa"}
 
For i = 1 To Ubound(cadenas)
Print Using "text# = &"; i; cadenas(i)
Next
Print
For i = 1 To Ubound(palabra)
j = Iif(i < 6, i, i-1)
Print Using "Found '&' in 'text#' at indices [ "; palabra(i); j;
BusquedaKMP(cadenas(j), palabra(i))
If Ubound(indices) > 0 Then
For k = 1 To Ubound(indices)
Print indices(k) & ", ";
Next
Else
Print " ";
End If
Print Chr(8) & Chr(8) & " ]"
Erase indices
Next
 
Sleep</syntaxhighlight>
{{out}}
<pre>text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
text4 = needle need noodle needle
text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.
 
Found 'TCTA' in 'text1' at indices [ 6, 14 ]
Found 'TAATAAA' in 'text2' at indices [ ]
Found 'word' in 'text3' at indices [ 40 ]
Found 'needle' in 'text4' at indices [ 0, 19 ]
Found 'put' in 'text5' at indices [ 26, 90 ]
Found 'and' in 'text5' at indices [ 101, 128, 171 ]
Found 'alfalfa' in 'text6' at indices [ 33, 87 ]</pre>
 
=={{header|J}}==
Implementation:<syntaxhighlight lang="j">kmp_table=: {{
j=. 0
Line 187 ⟶ 391:
The program will also work with gojq once the prefix "KMP::" is
omitted in the two lines in which it occurs.
<syntaxhighlight lang=jq># search for the string needle in the string haystack
# search for the string needle in the string haystack
def KMP::search(haystack; needle):
 
Line 254 ⟶ 457:
| "Found '\($pats[$i])' in text\($j+1) at indices \(KMP::search($texts[$j]; $pats[$i]) )" ) ;
 
tasks(texts; pats)</syntaxhighlight>
</syntaxhighlight>
 
'''Invocation''': jq -nr -f kmp-string-search.jq
{{output}}
<pre>text1 = GCTAGCTCTACGAGTCTA
<pre>
text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
Line 273 ⟶ 474:
Found 'put' in text5 at indices [26, 90]
Found 'and' in text5 at indices [101, 128, 171]
Found 'alfalfa' in text6 at indices [33, 87]</pre>
</pre>
 
=={{header|Julia}}==
Line 354 ⟶ 554:
println("Found <$pat3> at: (one-based indices) ", kmp_search(text2, pat3))
</syntaxhighlight>{{out}}
<pre>Found <put> at: (one-based indices) [27, 91]
<pre>
Found <put> at: (one-based indices) [27, 91]
Found <and> at: (one-based indices) [102, 129, 172]
Found <alfalfa> at: (one-based indices) [34, 88]</pre>
</pre>
 
=={{header|Nim}}==
Line 425 ⟶ 623:
echo &"Not found “{pattern}” in “Text{j+1}”."
</syntaxhighlight>
 
{{out}}
<pre>Text1 = GCTAGCTCTACGAGTCTA
Line 440 ⟶ 637:
Found “put” in “Text5” at indices 26, 90.
Found “and” in “Text5” at indices 101, 128, 171.
Found “alfalfa” in “Text6” at indices 33, 87.</pre>
</pre>
 
=={{header|Perl}}==
Line 595 ⟶ 791:
Significantly higher character comparison counts than [[Boyer-Moore_string_search#Phix]].<br>
Also higher than those on the above (lecroq) link, probably because this carries on for all matches.
<pre>Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)
<pre>
Found `GCAGAGAG` in `GCATCGCAGAGAGTATACAGTACG` once at {6} (26 character comparisons)
Found `TCTA` in `GCTAGCTCTACGAGTCTA` twice at {7,15} (19 character comparisons)
No `TAATAAA` in `GGCTATAATGCGTA` (16 character comparisons)
Line 603 ⟶ 798:
Found `put` in `Inhisbooks...epresented (202 characters)` twice at {27,91} (205 character comparisons)
Found `and` in `Inhisbooks...epresented (202 characters)` three times at {102,129,172} (216 character comparisons)
Found `alfalfa` in `Nearby far... for milk. (114 characters)` twice at {34,88} (125 character comparisons)</pre>
</pre>
 
=={{header|Python}}==
Line 709 ⟶ 903:
main()
</syntaxhighlight>
 
{{out}}
<pre>Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
<pre>
Found `TCTA` in `GCTAGCTCTA...` 2 times at [6, 14].
`TAATAAA` not found in `GGCTATAATG...`
Found `word` in `there woul...` once at [40].
Line 718 ⟶ 910:
Found `put` in `Inhisbooks...` 2 times at [26, 90].
Found `and` in `Inhisbooks...` 3 times at [101, 128, 171].
Found `alfalfa` in `Nearby far...` 2 times at [33, 87].</pre>
</pre>
 
=={{header|Raku}}==
Line 773 ⟶ 964:
}</syntaxhighlight>
{{out}}
<pre>text0 = GCTAGCTCTACGAGTCTA
<pre>
text0 = GCTAGCTCTACGAGTCTA
text1 = GGCTATAATGCGTA
text2 = there would have been a time for such a word
Line 789 ⟶ 979:
Found 'put' in 'text5' at indices (26 90)
Found 'and' in 'text5' at indices (101 128 171)
Found 'alfalfa' in 'text6' at indices (33 87)</pre>
 
</pre>
=={{header|Sidef}}==
{{trans|Raku}}
<syntaxhighlight lang="ruby">func kmp_search (String S, String W) {
 
S.graphemes!
W.graphemes!
 
func kmp_table {
var (pos, cnd, *T) = (1, 0, -1)
for (nil; pos < W.len; (++pos; ++cnd)) {
if (W[pos] == W[cnd]) {
T[pos] = T[cnd]
}
else {
T[pos] = cnd
while ((cnd >= 0) && (W[pos] != W[cnd])) {
cnd = T[cnd]
}
}
}
T[pos] = cnd
return T
}
 
var (k, T, *I) = (0, kmp_table())
 
for (var j = 0; j < S.len; nil) {
if (W[k] == S[j]) {
++j
++k
if (k == W.len) {
I << (j - k)
k = T[k]
}
}
elsif ((k = T[k]) < 0) {
++j
++k
}
}
 
return I
}
 
var texts = [
"GCTAGCTCTACGAGTCTA",
"GGCTATAATGCGTA",
"there would have been a time for such a word",
"needle need noodle needle",
"BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhārat"+
"amഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரத"+
"ம்BhāratamഭാരതംBhāratadēsamభారతదేశం",
"InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnu"+
"thusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblyl"+
"anguagestoillustratetheconceptsandalgorithmsastheyarepresented",
"Nearby farms grew a half acre of alfalfa on the dairy's behalf, with ba"+
"les of all that alfalfa exchanged for milk.",
]
 
var pats = ["TCTA", "TAATAAA", "word", "needle", "ഭാരതം", "put", "and", "alfalfa"]
 
texts.each_kv {|k,v| say "text#{k} = #{v}" }; say ''
 
pats.each_kv {|k,v|
var j = (k < 6 ? k : k-1)
say ("Found '#{v}' in 'text#{j}' at indices ", kmp_search(texts[j], v))
}</syntaxhighlight>
{{out}}
<pre>text0 = GCTAGCTCTACGAGTCTA
text1 = GGCTATAATGCGTA
text2 = there would have been a time for such a word
text3 = needle need noodle needle
text4 = BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhāratamഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரதம்BhāratamഭാരതംBhāratadēsamభారతదేశం
text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.
 
Found 'TCTA' in 'text0' at indices [6, 14]
Found 'TAATAAA' in 'text1' at indices []
Found 'word' in 'text2' at indices [40]
Found 'needle' in 'text3' at indices [0, 19]
Found 'ഭാരതം' in 'text4' at indices [68, 138]
Found 'put' in 'text5' at indices [26, 90]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'alfalfa' in 'text6' at indices [33, 87]</pre>
 
=={{header|Wren}}==
This is based on the code [https://www.geeksforgeeks.org/kmp-algorithm-for-pattern-searching/ here]. The examples used are the same as in the [[Boyer-Moore_string_search#Wren]] task.
<syntaxhighlight lang="ecmascriptwren">class KMP {
static search(haystack, needle) {
haystack = haystack.bytes.toList
Line 859 ⟶ 1,133:
System.print("Found '%(pats[i])' in 'text%(j+1)' at indices %(KMP.search(texts[j], pats[i]))")
}</syntaxhighlight>
 
{{out}}
<pre>text1 = GCTAGCTCTACGAGTCTA
<pre>
text1 = GCTAGCTCTACGAGTCTA
text2 = GGCTATAATGCGTA
text3 = there would have been a time for such a word
Line 875 ⟶ 1,147:
Found 'put' in 'text5' at indices [26, 90]
Found 'and' in 'text5' at indices [101, 128, 171]
Found 'alfalfa' in 'text6' at indices [33, 87]</pre>
</pre>
9,476

edits