Bioinformatics/Subsequence: Difference between revisions

From Rosetta Code
Content added Content deleted
(Added 11l)
m (→‎{{header|Wren}}: Minor tidy)
(6 intermediate revisions by 4 users not shown)
Line 5: Line 5:
Write a routine to find all the positions of a randomly generated subsequence   (four letters).
Write a routine to find all the positions of a randomly generated subsequence   (four letters).
<br><br>
<br><br>

=={{header|11l}}==
=={{header|11l}}==
{{trans|Python}}
{{trans|Python}}


<lang 11l>UInt32 seed = 34
<syntaxhighlight lang="11l">UInt32 seed = 34
F nonrandom_choice(lst)
F nonrandom_choice(lst)
:seed = (1664525 * :seed + 1013904223) [&] FFFF'FFFF
:seed = (1664525 * :seed + 1013904223) [&] FFFF'FFFF
Line 28: Line 27:
R r
R r


F dna_findall(String needle, String haystack) -> N
F dna_findall(String needle, haystack) -> N
V pp = positions(haystack, needle)
V pp = positions(haystack, needle)
I pp.empty
I pp.empty
Line 35: Line 34:
print(‘Found ’needle‘ at the following indices:’)
print(‘Found ’needle‘ at the following indices:’)
L(p) pp
L(p) pp
print(String(p)‘:’String(p + needle.len))
print(p‘:(p + needle.len))


V dna_seq = generate_sequence(200)
V dna_seq = generate_sequence(200)
Line 46: Line 45:
print("\nSearch Sample: "sample_seq)
print("\nSearch Sample: "sample_seq)


dna_findall(sample_seq, dna_seq)</lang>
dna_findall(sample_seq, dna_seq)</syntaxhighlight>


{{out}}
{{out}}
Line 66: Line 65:
94:98
94:98
</pre>
</pre>
=={{header|Action!}}==
<syntaxhighlight lang="action!">DEFINE SEQLEN="200"
DEFINE SUBLEN="4"


PROC RandomSeq(CHAR ARRAY s BYTE len)
CHAR ARRAY letters="ACGT"
BYTE i

FOR i=1 TO len
DO
s(i)=letters(Rand(4)+1)
OD
s(0)=len
RETURN

PROC PrintSeq(CHAR ARRAY s)
BYTE i

FOR i=1 TO s(0)
DO
IF i MOD 20=1 THEN
IF i<10 THEN Put(32) FI
IF i<100 THEN Put(32) FI
PrintB(i)
Print(": ")
FI
Put(s(i))
IF i MOD 20=0 THEN
PutE()
FI
OD
RETURN

BYTE FUNC StartsWith(CHAR ARRAY s,prefix BYTE start)
BYTE i

FOR i=1 TO prefix(0)
DO
IF s(start+i-1)#prefix(i) THEN
RETURN (0)
FI
OD
RETURN (1)

PROC Main()
CHAR ARRAY seq(SEQLEN+1),sub(SUBLEN+1)
BYTE i,notfirst

RandomSeq(seq,SEQLEN)
RandomSeq(sub,SUBLEN)

PrintE("Search sequence:")
PrintSeq(seq)
PutE()
PrintF("Subsequence to find: %S%E%E",sub)

PrintE("Found subsequence at positions:")
notfirst=0
FOR i=1 TO SEQLEN-SUBLEN
DO
IF StartsWith(seq,sub,i) THEN
IF notfirst THEN
Print(", ")
FI
notfirst=1
PrintF("%I-%I",i,i+SUBLEN-1)
FI
OD
IF notfirst=0 THEN
PrintE("Not found")
FI
RETURN</syntaxhighlight>
{{out}}
[https://gitlab.com/amarok8bit/action-rosetta-code/-/raw/master/images/Bioinformatics_subsequence.png Screenshot from Atari 8-bit computer]
<pre>
Search sequence:
1: CGACTCAGGAAGGCCACGTG
21: GTAACTTCTTAGTTACCGTA
41: AGGCTAATAGCTAGCGCTGC
61: GTGACCAGGCATAGTAACCG
81: GCACGCACGTTCACCAAGGG
101: GTCCCGATGGGAGGCACGTT
121: ACTACTCCAAGAACTGTAGT
141: AAGTTACCGAAAAGTTCTCA
161: TCCTTGGGTAGTGAGTACTT
181: TGTGCTATGAAAAATAAGGA

Subsequence to find: ACGC

Found subsequence at positions:
83-86
</pre>
=={{header|Ada}}==
=={{header|Ada}}==
<lang Ada>with Ada.Text_Io;
<syntaxhighlight lang="ada">with Ada.Text_Io;
with Ada.Strings.Fixed;
with Ada.Strings.Fixed;
with Ada.Numerics.Discrete_Random;
with Ada.Numerics.Discrete_Random;
Line 132: Line 222:
New_Line;
New_Line;
end loop;
end loop;
end Sub_Sequence;</lang>
end Sub_Sequence;</syntaxhighlight>
{{out}}
{{out}}
<pre>Search sequence:
<pre>Search sequence:
Line 150: Line 240:
Found at position: 371..374
Found at position: 371..374
Found at position: 380..383</pre>
Found at position: 380..383</pre>

=={{header|Arturo}}==
=={{header|Arturo}}==


<lang rebol>bases: [`A` `G` `C` `T`]
<syntaxhighlight lang="rebol">bases: [`A` `G` `C` `T`]
randSeq: join map 1..200 => [sample bases]
randSeq: join map 1..200 => [sample bases]
randSub: join map 1..4 => [sample bases]
randSub: join map 1..4 => [sample bases]
Line 171: Line 260:
print ["Found subsequence at position:" idx]
print ["Found subsequence at position:" idx]
idx: idx + 1
idx: idx + 1
]</lang>
]</syntaxhighlight>


{{out}}
{{out}}
Line 192: Line 281:
Found subsequence at position: 71
Found subsequence at position: 71
Found subsequence at position: 169</pre>
Found subsequence at position: 169</pre>

=={{header|Factor}}==
=={{header|Factor}}==
{{works with|Factor|0.99 2021-02-05}}
{{works with|Factor|0.99 2021-02-05}}
<lang factor>USING: accessors formatting grouping io kernel math
<syntaxhighlight lang="factor">USING: accessors formatting grouping io kernel math
math.functions.integer-logs math.parser random regexp sequences ;
math.functions.integer-logs math.parser random regexp sequences ;


Line 221: Line 309:


80 10 .biosub nl
80 10 .biosub nl
600 39 .biosub nl</lang>
600 39 .biosub nl</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre> 0: ATTCAAGGAC
0: ATTCAAGGAC
10: CACTATTAAC
10: CACTATTAAC
20: CTGCATTGTG
20: CTGCATTGTG
Line 257: Line 344:
145..149
145..149
289..293
289..293
312..316
312..316</pre>

</pre>
=={{header|FreeBASIC}}==
{{trans|Wren}}
<syntaxhighlight lang="vb">Const base_ = "ACGT"

Sub findDnaSubsequence(dnaSize As Integer, chunkSize As Integer)
Dim As String dnaSeq(1 To dnaSize)
Dim As Integer i, chunk
For i = 1 To dnaSize
dnaSeq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
Next
Dim As String dnaStr
For i = 1 To dnaSize
dnaStr += dnaSeq(i)
Next
Dim As String dnaSubseq(1 To 4)
For i = 1 To 4
dnaSubseq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
Next
Dim As String dnaSubstr
For i = 1 To 4
dnaSubstr += dnaSubseq(i)
Next
Print "DNA sequence:"
For chunk = 1 To Len(dnaStr) Step chunkSize
Print Using "###_._.###: &"; chunk; chunk+chunkSize-1; Mid(dnaStr, chunk, chunkSize)
Next
Print !"\nSubsequence to locate: "; dnaSubstr
Dim As Integer idx = Instr(dnaStr, dnaSubstr)
Print Iif(idx <> 0, "Matches found at the following indices:", "No matches found.")
Do While idx > 0
If idx <> 0 Then Print Using "###_._.###"; idx; idx + 3
idx = Instr(idx+4, dnaStr, dnaSubstr)
Loop
End Sub

findDnaSubsequence(200, 20)
Print
findDnaSubsequence(600, 40)

Sleep</syntaxhighlight>
{{out}}
<pre>DNA sequence:
1.. 20: TTATAGTCTTGGAGGCATGT
21.. 40: TAACTTATGCGGAGCAGACA
41.. 60: CGGAGTATGCATTCCTCTTA
61.. 80: CCAAACGGTGCTGCCCGCGC
81..100: ACTCGCTGTATTCCGTATCG
101..120: TCACATTATCTAAACCACGA
121..140: TTTCCAGCGTGCGTGGGAAG
141..160: GCCATGTTTAGTCGGGGGCC
161..180: AAGGTCTTTGGCTTATGCTG
181..200: TTTTTTTTTCTTCGGTTACA

Subsequence to locate: ATTT
Matches found at the following indices:
120..123

DNA sequence:
1.. 40: GTGCGGGCCGTTAGCAGCTACGAGTGCTAGATGGAACTAG
41.. 80: TCCCCGCTCCCAAATGCAAAGCGTCCCAGACCAGTCTTGA
81..120: AGCCCGTTAAATTACACCTGAACCGTTGCAAATGATCGAT
121..160: AGACGGGGTATAATAGCGGAAAACACAGGGGAACTGCATG
161..200: CAAGCTCGAGCCGCTGAAGGATGGCTCCCCCCCGAGTGTA
201..240: AGTGGATCTCGCCCAAATAGCGGGGGAACAAAGAAAGGTA
241..280: AGTCTTACTTCGCACGTCCCCTCTCATACACGCCAGGACT
281..320: AATGGATCATTCATAGGTGACGGGTGACTTGCGGTGTTTC
321..360: TAGTTGGAGTCACCCGTCAGCTTAGATCTAAGTATGAACC
361..400: GTAAGAGTTTGTAACTGCACCTTCCGTCTCTTCCTCTGTA
401..440: GGAACGCTTTTGCTTGTTATCAGATAGTGTCTCCTTATCA
441..480: TAGGACAGGTTCCTTGTGAAGGTCCACAGAGTTTGCCCGG
481..520: GGTTCGAATATACGACGCTTGTGGTTCCGGCACTATAACT
521..560: TCCGCAGTGTTGTCGACGCCCCTAGCTCCCGGGGTCTTTT
561..600: CGCTTCCCTATAGCGCGAAATGAGTGCAAGGGTACCGGCC

Subsequence to locate: GCAC
Matches found at the following indices:
252..255
377..380
510..513</pre>


=={{header|Go}}==
=={{header|Go}}==
{{trans|Wren}}
{{trans|Wren}}
<lang go>package main
<syntaxhighlight lang="go">package main


import (
import (
Line 307: Line 474:
fmt.Println()
fmt.Println()
findDnaSubsequence(600, 40)
findDnaSubsequence(600, 40)
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
Line 351: Line 518:
388..391
388..391
</pre>
</pre>

=={{header|jq}}==
=={{header|jq}}==
{{works with|jq}}
{{works with|jq}}
Line 364: Line 530:
`jot -r N MIN MAX` but a fourth argument can also be
`jot -r N MIN MAX` but a fourth argument can also be
used to specify a seed. An alternative would be to use `gshuf` along the lines of:
used to specify a seed. An alternative would be to use `gshuf` along the lines of:
<syntaxhighlight lang="sh">
<lang sh>
# For 200 pseudo-random integers in the range 0 to 3 inclusive:
# For 200 pseudo-random integers in the range 0 to 3 inclusive:
gshuf -i 0-3 -r -n 200 --random-source=/dev/random
gshuf -i 0-3 -r -n 200 --random-source=/dev/random
</syntaxhighlight>
</lang>


Note that the indices shown below are offsets (i.e., the index origin is taken to be 0).
Note that the indices shown below are offsets (i.e., the index origin is taken to be 0).
<syntaxhighlight lang="sh">
<lang sh>
#!/bin/bash
#!/bin/bash


Line 386: Line 552:
"Zero-based indices of \($four):",
"Zero-based indices of \($four):",
($strand | indices($four) | join(" "))
($strand | indices($four) | join(" "))
'</lang>
'</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 401: Line 567:
55 141 169
55 141 169
</pre>
</pre>

=={{header|Julia}}==
=={{header|Julia}}==
<lang julia>DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))
<syntaxhighlight lang="julia">DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))


DNAsearch(needle, haystack, lap=true) = findall(needle, haystack, overlap=lap)
DNAsearch(needle, haystack, lap=true) = findall(needle, haystack, overlap=lap)
Line 412: Line 577:
println("Search sequence:\n$rand_string\nfor substring $subseq. Found at positions: ")
println("Search sequence:\n$rand_string\nfor substring $subseq. Found at positions: ")
foreach(p -> print(rpad(p[2], 8), p[1] % 10 == 0 ? "\n" : ""), enumerate(DNAsearch(subseq, rand_string)))
foreach(p -> print(rpad(p[2], 8), p[1] % 10 == 0 ? "\n" : ""), enumerate(DNAsearch(subseq, rand_string)))
</lang>{{out}}
</syntaxhighlight>{{out}}
<pre>
<pre>
Search sequence:
Search sequence:
Line 419: Line 584:
21:24 74:77 99:102
21:24 74:77 99:102
</pre>
</pre>

=={{header|Nim}}==
=={{header|Nim}}==
<lang Nim>import random, sequtils, strutils
<syntaxhighlight lang="nim">import random, sequtils, strutils


proc dnaSequence(n: Positive): string =
proc dnaSequence(n: Positive): string =
Line 460: Line 624:
else:
else:
let tail = if pos.len == 1: ": " else: "s: "
let tail = if pos.len == 1: ": " else: "s: "
echo "Subsequence found at position", tail, pos.join(", ")</lang>
echo "Subsequence found at position", tail, pos.join(", ")</syntaxhighlight>


{{out}}
{{out}}
Line 478: Line 642:


Subsequence found at positions: 61, 122, 170</pre>
Subsequence found at positions: 61, 122, 170</pre>

=={{header|Perl}}==
=={{header|Perl}}==
<lang perl>use strict;
<syntaxhighlight lang="perl">use strict;
use warnings;
use warnings;
use feature 'say';
use feature 'say';
Line 492: Line 655:
say "Target: $target";
say "Target: $target";
say 'Matches at these positions:';
say 'Matches at these positions:';
say (($string =~ s/.{1,40}\K/\n/gr) =~ s/($target)/ >$1< /gr);</lang>
say (($string =~ s/.{1,40}\K/\n/gr) =~ s/($target)/ >$1< /gr);</syntaxhighlight>
{{out}}
{{out}}
<pre>Target: CCTG
<pre>Target: CCTG
Line 503: Line 666:
TGCGAG >CCTG< TAGAGCCGGGCCTCAAATTAAACGAAAAAT
TGCGAG >CCTG< TAGAGCCGGGCCTCAAATTAAACGAAAAAT
ATAAGTTTGCTTGGCACGCTGTACTACTTATCC >CCTG< ACT</pre>
ATAAGTTTGCTTGGCACGCTGTACTACTTATCC >CCTG< ACT</pre>

=={{header|Phix}}==
=={{header|Phix}}==
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".
<!--<lang Phix>(phixonline)-->
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
Line 561: Line 723:
<span style="color: #004080;">sequence</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">show</span><span style="color: #0000FF;">(</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">,</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">show</span><span style="color: #0000FF;">(</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">,</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">)</span>
<!--</lang>-->
<!--</syntaxhighlight>-->
{{out}}
{{out}}
with cheat enabled
with cheat enabled
Line 582: Line 744:
GCTA does not occur
GCTA does not occur
</pre>
</pre>

=={{header|Python}}==
=={{header|Python}}==


Line 588: Line 749:
{{libheader|regex}}
{{libheader|regex}}


<lang python>
<syntaxhighlight lang="python">
from random import choice
from random import choice
import regex as re
import regex as re
Line 615: Line 776:


dna_findall(sample_seq, dna_seq)
dna_findall(sample_seq, dna_seq)
</syntaxhighlight>
</lang>
{{out}}
{{out}}


Line 636: Line 797:
167:171
167:171
</pre>
</pre>

=={{header|Racket}}==
=={{header|Racket}}==


<lang racket>#lang racket
<syntaxhighlight lang="racket">#lang racket


(define (rand-seq n)
(define (rand-seq n)
Line 660: Line 820:
sub (report-sequence full) (subsequence-indices full sub)))
sub (report-sequence full) (subsequence-indices full sub)))


(module+ main (for ((i 4)) (Bioinformatics/Subsequence)))</lang>
(module+ main (for ((i 4)) (Bioinformatics/Subsequence)))</syntaxhighlight>


{{out}}
{{out}}
Line 703: Line 863:
350 : GGCCTCGACCCAATTTAACCTCCCACTCCGTGGGTACAGCTTGAACCCCC
350 : GGCCTCGACCCAATTTAACCTCCCACTCCGTGGGTACAGCTTGAACCCCC
((245 . 248) (250 . 253) (329 . 332) (386 . 389))</pre>
((245 . 248) (250 . 253) (329 . 332) (386 . 389))</pre>

=={{header|Raku}}==
=={{header|Raku}}==
Chances are actually pretty small that a random 4 codon string will show up at all in a random 200 codon sequence. Bump up the sequence size to get a reasonable chance of multiple matches.
Chances are actually pretty small that a random 4 codon string will show up at all in a random 200 codon sequence. Bump up the sequence size to get a reasonable chance of multiple matches.
<lang perl6>use String::Splice:ver<0.0.3>;
<syntaxhighlight lang="raku" line>use String::Splice:ver<0.0.3+>;


my $line = 80;
my $line = 80;
Line 727: Line 886:
}
}


say $disp;</lang>
say $disp;</syntaxhighlight>
{{out}}
{{out}}
Show in custom div to better display highlighting.
Show in custom div to better display highlighting.
Line 756: Line 915:
:* &nbsp; DNA proteins to be searched in the data &nbsp; &nbsp; &nbsp; &nbsp; (the default is four unique random proteins).
:* &nbsp; DNA proteins to be searched in the data &nbsp; &nbsp; &nbsp; &nbsp; (the default is four unique random proteins).
:* &nbsp; the seed for the RANDOM function so runs can be repeated with the same data &nbsp; &nbsp; (no default).
:* &nbsp; the seed for the RANDOM function so runs can be repeated with the same data &nbsp; &nbsp; (no default).
<lang rexx>/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
<syntaxhighlight lang="rexx">/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
if totLen=='' | totLen=="," then totLen= 200 /*Not specified? Then use the default.*/
if totLen=='' | totLen=="," then totLen= 200 /*Not specified? Then use the default.*/
Line 802: Line 961:
if $\=='' then say right(idx, 7)"│" strip($, 'T') /*show residual protein data*/
if $\=='' then say right(idx, 7)"│" strip($, 'T') /*show residual protein data*/
say "───────┴"center('' , oWidth+10, '─')
say "───────┴"center('' , oWidth+10, '─')
say; return</lang>
say; return</syntaxhighlight>
{{out|output|text=&nbsp; when using the default inputs:}}
{{out|output|text=&nbsp; when using the default inputs:}}
<pre>
<pre>
Line 838: Line 997:
the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963
the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963
</pre>
</pre>

=={{header|Ring}}==
=={{header|Ring}}==
<lang ring>
<syntaxhighlight lang="ring">
/*-----------------------------------
/*-----------------------------------
# Project : DNA subsequences
# Project : DNA subsequences
Line 1,088: Line 1,246:


//-----------------------------------------
//-----------------------------------------
</syntaxhighlight>
</lang>


'''Output:'''
'''Output:'''


[https://i.imgur.com/5hhbRBK.mp4 Bioinformatics/Subsequence - video]
[https://i.imgur.com/5hhbRBK.mp4 Bioinformatics/Subsequence - video]

=={{header|Wren}}==
=={{header|Wren}}==
{{libheader|Wren-pattern}}
{{libheader|Wren-pattern}}
{{libheader|Wren-str}}
{{libheader|Wren-str}}
{{libheader|Wren-fmt}}
{{libheader|Wren-fmt}}
<lang ecmascript>import "random" for Random
<syntaxhighlight lang="wren">import "random" for Random
import "/pattern" for Pattern
import "./pattern" for Pattern
import "/str" for Str
import "./str" for Str
import "/fmt" for Fmt
import "./fmt" for Fmt


var rand = Random.new()
var rand = Random.new()
Line 1,134: Line 1,291:
findDnaSubsequence.call(200, 20)
findDnaSubsequence.call(200, 20)
System.print()
System.print()
findDnaSubsequence.call(600, 40)</lang>
findDnaSubsequence.call(600, 40)</syntaxhighlight>


{{out}}
{{out}}

Revision as of 09:41, 8 November 2023

Bioinformatics/Subsequence is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Randomly generate a string of   200   DNA bases   (represented by   A,  C,  G,  and  T).

Write a routine to find all the positions of a randomly generated subsequence   (four letters).

11l

Translation of: Python
UInt32 seed = 34
F nonrandom_choice(lst)
   :seed = (1664525 * :seed + 1013904223) [&] FFFF'FFFF
   R lst[Int(:seed >> 16) % lst.len]

F generate_sequence(Int n)
   R ((0 .< n).map(_ -> nonrandom_choice([‘A’, ‘C’, ‘G’, ‘T’]))).join(‘’)

F positions(dnaSeq, subSeq)
   [Int] r
   V start = 0
   L
      V? pos = dnaSeq.find(subSeq, start)
      I pos == N
         L.break
      r.append(pos)
      start = pos + 1
   R r

F dna_findall(String needle, haystack) -> N
   V pp = positions(haystack, needle)
   I pp.empty
      print(‘No matches found’)
   E
      print(‘Found ’needle‘ at the following indices:’)
      L(p) pp
         print(p‘:’(p + needle.len))

V dna_seq = generate_sequence(200)
V sample_seq = generate_sequence(4)

V c = 1
L(i) dna_seq
   I c % 20 != 0 {print(i, end' ‘’)} E print(i)
   c++
print("\nSearch Sample: "sample_seq)

dna_findall(sample_seq, dna_seq)
Output:
GAAGTGCTCAAACCCTTTTT
CCTTGCCGTAGGTTGTGCTG
CCGCCGCACACCCGCAACAG
CTTTTAGGCATAAGTATACG
GACCGCGGACGGGGCGTAAC
GGTGAACATTTTGCTAAATT
GGCTCTAGGGATGAGCCCTA
TAGCGCTGGGGACTACGCCC
CGGTAAAGATCGAGGCGACT
CACCGATTGCGCTAGGGACA

Search Sample: CGTA
Found CGTA at the following indices:
26:30
94:98

Action!

DEFINE SEQLEN="200"
DEFINE SUBLEN="4"

PROC RandomSeq(CHAR ARRAY s BYTE len)
  CHAR ARRAY letters="ACGT"
  BYTE i

  FOR i=1 TO len
  DO
    s(i)=letters(Rand(4)+1)
  OD
  s(0)=len
RETURN

PROC PrintSeq(CHAR ARRAY s)
  BYTE i

  FOR i=1 TO s(0)
  DO
    IF i MOD 20=1 THEN
      IF i<10 THEN Put(32) FI
      IF i<100 THEN Put(32) FI
      PrintB(i)
      Print(": ")
    FI
    Put(s(i))
    IF i MOD 20=0 THEN
      PutE()
    FI
  OD  
RETURN

BYTE FUNC StartsWith(CHAR ARRAY s,prefix BYTE start)
  BYTE i

  FOR i=1 TO prefix(0)
  DO
    IF s(start+i-1)#prefix(i) THEN
      RETURN (0)
    FI
  OD
RETURN (1)

PROC Main()
  CHAR ARRAY seq(SEQLEN+1),sub(SUBLEN+1)
  BYTE i,notfirst

  RandomSeq(seq,SEQLEN)
  RandomSeq(sub,SUBLEN)

  PrintE("Search sequence:")
  PrintSeq(seq)
  PutE()
  PrintF("Subsequence to find: %S%E%E",sub)

  PrintE("Found subsequence at positions:")
  notfirst=0
  FOR i=1 TO SEQLEN-SUBLEN
  DO
    IF StartsWith(seq,sub,i) THEN
      IF notfirst THEN
        Print(", ")
      FI
      notfirst=1
      PrintF("%I-%I",i,i+SUBLEN-1)
    FI
  OD
  IF notfirst=0 THEN
    PrintE("Not found")
  FI
RETURN
Output:

Screenshot from Atari 8-bit computer

Search sequence:
  1: CGACTCAGGAAGGCCACGTG
 21: GTAACTTCTTAGTTACCGTA
 41: AGGCTAATAGCTAGCGCTGC
 61: GTGACCAGGCATAGTAACCG
 81: GCACGCACGTTCACCAAGGG
101: GTCCCGATGGGAGGCACGTT
121: ACTACTCCAAGAACTGTAGT
141: AAGTTACCGAAAAGTTCTCA
161: TCCTTGGGTAGTGAGTACTT
181: TGTGCTATGAAAAATAAGGA

Subsequence to find: ACGC

Found subsequence at positions:
83-86

Ada

with Ada.Text_Io;
with Ada.Strings.Fixed;
with Ada.Numerics.Discrete_Random;

procedure Sub_Sequence is

   type Nucleotide is (A, C, G, T);

   function To_Character (N : Nucleotide) return Character
   is (case N is
          when A => 'A', when C => 'C',
          when G => 'G', when T => 'T');

   package Random_Nucleotide is new Ada.Numerics.Discrete_Random (Nucleotide);
   use Random_Nucleotide;

   package Position_Io is new Ada.Text_Io.Integer_Io (Natural);
   use Ada.Text_Io;

   procedure Put_Bases (Seq : String; Width : Positive) is
      First : Natural := Seq'First;
   begin
      while First < Seq'Last loop
         declare
            Last : constant Natural :=
              Natural'Min (First + Width - 1, Seq'Last);
         begin
            Position_Io.Put (First); Put ("..");
            Position_Io.Put (Last);  Put (" ");
            Put (Seq (First .. Last));
            New_Line;
            First := Last + 1;
         end;
      end loop;
   end Put_Bases;

   Gen       : Generator;
   Sequence  : String (1 .. 405);
   Substring : String (1 ..   4);
   Pos       : Natural := 0;
begin
   Position_Io.Default_Width := 3;

   Reset (Gen);

   Sequence  := (others => To_Character (Random (Gen)));
   Substring := (others => To_Character (Random (Gen)));

   Put_Line ("Search sequence:");
   Put_Bases (Sequence, Width => 50);
   New_Line;

   Put ("Substring to search: ");
   Put (Substring);
   New_Line;

   loop
      Pos := Ada.Strings.Fixed.Index (Sequence, Substring, Pos + 1);
      exit when Pos = 0;
      Put ("Found at position: ");
      Position_Io.Put (Pos); Put ("..");
      Position_Io.Put (Pos + Substring'Length - 1);
      New_Line;
   end loop;
end Sub_Sequence;
Output:
Search sequence:
  1.. 50 CCTACGGAAAAGTGATAAGGACAGATACATAATCCTAAAACCCTGGAAAA
 51..100 CTTGTCTCGCCAGAGTAGGGCTCGGCAGGGGGGGCAGTGTTTTAAAACGT
101..150 CAGAGAATAGGCTCTACCTTGTTAGACTGCGAGTACTGGAGCGTAGTTCC
151..200 TATATTGCAAGCTGCTACAGTAAGTATCAAAGTATGCCACACATCCTTCT
201..250 ACAACCGGATTGGTTGCCCAGTAGAAGGCTCGTAGTCACCGGACACGCTG
251..300 TTCTTAAGGTCGGTAAGCTATTACGTCCATGGGAGATTCTCAAGGGTGCG
301..350 TTAGCGGACCCCCGTTACGTCCACGTATCTTCCGTCCAACTACCCCCTAA
351..400 TGTCATTGACATCGCCCGAGTATTTAATTTATTTGAACGGCACCAATTTA
401..405 GAGCT

Substring to search: TATT
Found at position: 153..156
Found at position: 269..272
Found at position: 371..374
Found at position: 380..383

Arturo

bases: [`A` `G` `C` `T`]
randSeq: join map 1..200 => [sample bases]
randSub: join map 1..4 => [sample bases]

idx: 0

print "Random sequence:"
print join.with:"\n" split.every: 20 randSeq
print ""

print "Looking for subsequence:"
print randSub
print ""

while [(size randSeq) > idx + 4][
    if prefix? slice randSeq idx idx+4 randSub ->
        print ["Found subsequence at position:" idx]
    idx: idx + 1
]
Output:
Random sequence:
CACGCGCGTTAACCCTGCAT
CTTTTCTCTAAGATGATGCG
CTACTCTGCCCGATTACTAT
GATGTCACCGGCGGTTCGGC
GACTGGCGCTGGCAGAAAGC
GCATGTCAAATTGCCCCAGT
GTGCAAGTCCAAGTATTAGT
GAGGTGCTCCGCTTCGTCCG
GGGTCGACTCGGTCCCACTT
CATTACATGTTGGTAATAGT

Looking for subsequence:
CGGT

Found subsequence at position: 71 
Found subsequence at position: 169

Factor

Works with: Factor version 0.99 2021-02-05
USING: accessors formatting grouping io kernel math
math.functions.integer-logs math.parser random regexp sequences ;

: new-dna ( n -- str ) [ "ACGT" random ] "" replicate-as ;

: pad ( n d -- str ) [ number>string ] dip 32 pad-head ;

:: .dna ( seq n -- )
    seq length integer-log10 1 + :> d seq n group
    [ n * d pad write ": " write write nl ] each-index ;

: .match ( slice -- ) [ from>> ] [ to>> ] bi "%d..%d\n" printf ;

: .matches ( slices -- )
    "Matches found at the following indices:" print
    [ .match ] each ;

: .locate ( slices -- )
    [ "No matches found." print ] [ .matches ] if-empty ;

: .biosub ( dna-size row-size -- )
    [ new-dna dup ] [ .dna nl ] bi*
    4 new-dna dup "Subsequence to locate: %s\n" printf
    <regexp> all-matching-slices .locate ;

80 10 .biosub nl
600 39 .biosub nl
Output:
 0: ATTCAAGGAC
10: CACTATTAAC
20: CTGCATTGTG
30: AGAACTTGCA
40: GTGTACCGAG
50: AGCGAGTTTA
60: AAGCAACACA
70: TCTTTACCGA

Subsequence to locate: GTAG
No matches found.

  0: GATCTCGTCATGGTCCATCCTAACATTTCGGTTGTGGGC
 39: GCATCCCGATAGGCGAAGTTAAATCTACGTAGTCCTACG
 78: TCACGACGGAACATGATTGCCCACCGAAGTCGTAGGCGA
117: GCTAAAGTCGGTACATACACGATCTGCTATATTCGTTCT
156: CCGACACACGACATGCAATCCGAGAAGCTCTCGAAGTGC
195: GGTCAGATCCTCAGACTCGAACAGAGGAGACCTTAACTG
234: ATACCCACAGTACTTCTCGCATAACCTAAGCACCTATGC
273: TTACACCATCGTCCTGATATTGAGTGAGTCTGGTCGGAG
312: ATATTATCTAGCACCCTCAAGCTCTGTGTGCCACACCAG
351: GATTCCACTTCGCGCTTGCCTAGAGAAAGTAGAGTAGGT
390: GGTGTCATTAGTACACTGTTTGCGATGCACCAACCAAAC
429: CCGACCGCCATGATGACTGCTTTTCGGCCAACGTCAGAT
468: TAAGAGTACTTTTAGTAGCACCGCAAGCCAGCCGGTTTA
507: GCAAGATCCTGCAGCCTCCACGTTATTTCAGGTCTCTAA
546: GCGTTCTTTCCATGGAAGTAGTCACCGCTCCCGTTGCCA
585: ATGGACACAGACGTT

Subsequence to locate: ATAT
Matches found at the following indices:
145..149
289..293
312..316

FreeBASIC

Translation of: Wren
Const base_ = "ACGT"

Sub findDnaSubsequence(dnaSize As Integer, chunkSize As Integer)
    Dim As String dnaSeq(1 To dnaSize)
    Dim As Integer i, chunk
    For i = 1 To dnaSize
        dnaSeq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
    Next
    Dim As String dnaStr
    For i = 1 To dnaSize
        dnaStr += dnaSeq(i)
    Next
    Dim As String dnaSubseq(1 To 4)
    For i = 1 To 4
        dnaSubseq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
    Next
    Dim As String dnaSubstr
    For i = 1 To 4
        dnaSubstr += dnaSubseq(i)
    Next
    Print "DNA sequence:"
    For chunk = 1 To Len(dnaStr) Step chunkSize
        Print Using "###_._.###: &"; chunk; chunk+chunkSize-1; Mid(dnaStr, chunk, chunkSize)
    Next
    
    Print !"\nSubsequence to locate: "; dnaSubstr
    Dim As Integer idx = Instr(dnaStr, dnaSubstr)
    Print Iif(idx <> 0, "Matches found at the following indices:", "No matches found.")
    Do While idx > 0
        If idx <> 0 Then Print Using "###_._.###"; idx; idx + 3
        idx = Instr(idx+4, dnaStr, dnaSubstr)
    Loop
End Sub

findDnaSubsequence(200, 20)
Print
findDnaSubsequence(600, 40)

Sleep
Output:
DNA sequence:
  1.. 20: TTATAGTCTTGGAGGCATGT
 21.. 40: TAACTTATGCGGAGCAGACA
 41.. 60: CGGAGTATGCATTCCTCTTA
 61.. 80: CCAAACGGTGCTGCCCGCGC
 81..100: ACTCGCTGTATTCCGTATCG
101..120: TCACATTATCTAAACCACGA
121..140: TTTCCAGCGTGCGTGGGAAG
141..160: GCCATGTTTAGTCGGGGGCC
161..180: AAGGTCTTTGGCTTATGCTG
181..200: TTTTTTTTTCTTCGGTTACA

Subsequence to locate: ATTT
Matches found at the following indices:
120..123

DNA sequence:
  1.. 40: GTGCGGGCCGTTAGCAGCTACGAGTGCTAGATGGAACTAG
 41.. 80: TCCCCGCTCCCAAATGCAAAGCGTCCCAGACCAGTCTTGA
 81..120: AGCCCGTTAAATTACACCTGAACCGTTGCAAATGATCGAT
121..160: AGACGGGGTATAATAGCGGAAAACACAGGGGAACTGCATG
161..200: CAAGCTCGAGCCGCTGAAGGATGGCTCCCCCCCGAGTGTA
201..240: AGTGGATCTCGCCCAAATAGCGGGGGAACAAAGAAAGGTA
241..280: AGTCTTACTTCGCACGTCCCCTCTCATACACGCCAGGACT
281..320: AATGGATCATTCATAGGTGACGGGTGACTTGCGGTGTTTC
321..360: TAGTTGGAGTCACCCGTCAGCTTAGATCTAAGTATGAACC
361..400: GTAAGAGTTTGTAACTGCACCTTCCGTCTCTTCCTCTGTA
401..440: GGAACGCTTTTGCTTGTTATCAGATAGTGTCTCCTTATCA
441..480: TAGGACAGGTTCCTTGTGAAGGTCCACAGAGTTTGCCCGG
481..520: GGTTCGAATATACGACGCTTGTGGTTCCGGCACTATAACT
521..560: TCCGCAGTGTTGTCGACGCCCCTAGCTCCCGGGGTCTTTT
561..600: CGCTTCCCTATAGCGCGAAATGAGTGCAAGGGTACCGGCC

Subsequence to locate: GCAC
Matches found at the following indices:
252..255
377..380
510..513

Go

Translation of: Wren
package main

import (
    "fmt"
    "math/rand"
    "regexp"
    "time"
)

const base = "ACGT"

func findDnaSubsequence(dnaSize, chunkSize int) {
    dnaSeq := make([]byte, dnaSize)
    for i := 0; i < dnaSize; i++ {
        dnaSeq[i] = base[rand.Intn(4)]
    }
    dnaStr := string(dnaSeq)
    dnaSubseq := make([]byte, 4)
    for i := 0; i < 4; i++ {
        dnaSubseq[i] = base[rand.Intn(4)]
    }
    dnaSubstr := string(dnaSubseq)
    fmt.Println("DNA sequnence:")
    for i := chunkSize; i <= len(dnaStr); i += chunkSize {
        start := i - chunkSize
        fmt.Printf("%3d..%3d: %s\n", start+1, i, dnaStr[start:i])
    }
    fmt.Println("\nSubsequence to locate:", dnaSubstr)
    var r = regexp.MustCompile(dnaSubstr)
    var matches = r.FindAllStringIndex(dnaStr, -1)
    if len(matches) == 0 {
        fmt.Println("No matches found.")
    } else {
        fmt.Println("Matches found at the following indices:")
        for _, m := range matches {
            fmt.Printf("%3d..%-3d\n", m[0]+1, m[1])
        }
    }
}

func main() {
    rand.Seed(time.Now().UnixNano())
    findDnaSubsequence(200, 20)
    fmt.Println()
    findDnaSubsequence(600, 40)
}
Output:

Sample run:

DNA sequnence:
  1.. 20: GTTGCCCACACGTCTTATTG
 21.. 40: TAAAAATCACCGTGCAGCGA
 41.. 60: GGTTAAAAATGGTAGGAAAA
 61.. 80: TATCCTCAGCCAGCGGTGCC
 81..100: GGCCAACAAAAGGGACGTTG
101..120: GATTAAAGTAGGTCTAGGTA
121..140: TCTCGTATCCGGTTGATCCG
141..160: GGATGGTGGACGATATTGGA
161..180: GACCGGAGTGTACATCGGTG
181..200: TTGTCGCTTGCAGCTACGGT

Subsequence to locate: AATA
Matches found at the following indices:
 59..62 

DNA sequnence:
  1.. 40: GTACAGCCACTGTTAGTAGACGGATGCTATTGGGACGCAA
 41.. 80: CACATCAGTACACTGCTTGTTCGTAATCGCGTACCCAGCG
 81..120: CAAAAGGAGGGGAGGAACCTGCTCAGACTGTCGCTAAAAA
121..160: CGAGCACGTGTCCTTACGCAGTGATGGTAGCGGTCCACGA
161..200: CTTCCACTGGCATAAGGAGAATGTTTAGTAACGCCCCTCA
201..240: TAGGTGCAATTCTACAGGTTAAGGGACCGTGGGATGTTTC
241..280: TATAAAAGTTGAAGAGATTACTAATCCGTCCCGTGCGCGT
281..320: GCCGCAATTTAGCGCCCGTTCTTGAGTAAACATACATGCA
321..360: CGCTCTTGAGTTTTCTAAAACCTGATCAAAACGGTCGCCC
361..400: ACATGCAGGAGCGCCGCAGGGTTTCAGAGGTCAACCATCG
401..440: GCAGCACACGTGAACCCTCTGTACTGACCAGGGGCTTGCT
441..480: CCTTGGTAGGAGATGGTGGAGAATGCGTCGATGCACTGAA
481..520: GCAGACCGCTGATAGCATGTACGATGTTTACGGGTTGACG
521..560: ATAGCTTTGCTAGTGATCGAACATATGATGAAAAACGCTT
561..600: CCATTGATAGAGCATCTTAGGAGCTCAGTCCAGTGACCTC

Subsequence to locate: AGGT
Matches found at the following indices:
202..205
216..219
388..391

jq

Works with: jq

Works with gojq, the Go implementation of jq

Neither jq nor gojq currently has any PRNG built-ins so one possibility is to use a jq-coded PRNG function such as can be found at https://rosettacode.org/wiki/Random_numbers#jq

In practice, it's usually more convenient to use a utility such as gshuf or jot to provide the source of randomness. Here we use `jot -r N MIN MAX` but a fourth argument can also be used to specify a seed. An alternative would be to use `gshuf` along the lines of:

# For 200 pseudo-random integers in the range 0 to 3 inclusive:
gshuf -i 0-3 -r -n 200 --random-source=/dev/random

Note that the indices shown below are offsets (i.e., the index origin is taken to be 0).

#!/bin/bash

jot -r 200 0 3 | jq -nr --slurpfile four <(jot -r 4 0 3) '

 # input: an array of integers
 def toDNA: 
   def base: . as $in | "ACGT" | .[$in : $in+1];
   map(base) | join("");

 ([inputs] | toDNA) as $strand
 | ($four  | toDNA) as $four
 | "Strand of length \($strand|length):",
   $strand,
   "Zero-based indices of \($four):",
   ($strand | indices($four) | join(" "))
'
Output:
./bioinformatics-subsequence.sh
Strand of length 200:
TGGGCCCAAGCATTGCCACGTAGCTTTGTCAGTGGGCTTGTAAGGGACGAACACAAACTCACAGACCAGGAATTCTCGAGTTCCAGTCCCCCCACTTGTCGCTATTTAGTTAAGACGTTCAGTTTCGTTGCGAACTGTGTCCCCCAGGCTAACGTGATGGGTGTCAGGAATCAATGGCCAACTTTCAGTTAGACTTGACC
Zero-based indices of CAAC:
178

./bioinformatics-subsequence.sh
Strand of length 200:
TAAGACTGCAGGGTACGAAGAGTGGAAGATTGGCTCGTACTTGTCGACGTCGCGTGACATAATCTCTGTGCTCGCCTCGCAGTAAGGGACTAGGTCCCGTTCGAGCGCCCTGCTAGAAGGAGCATCCTACCATGCTCTGATGACATCCTGTCGGCATTAGAGTTTCTACGACATCTAAAGAGTACGATCGACTTCCCAGT
Zero-based indices of GACA:
55 141 169

Julia

DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))

DNAsearch(needle, haystack, lap=true) =  findall(needle, haystack, overlap=lap)

const rand_string = DNArand(200)
const subseq = DNArand(4)

println("Search sequence:\n$rand_string\nfor substring $subseq. Found at positions: ")
foreach(p -> print(rpad(p[2], 8), p[1] % 10 == 0 ? "\n" : ""), enumerate(DNAsearch(subseq, rand_string)))
Output:
Search sequence:
CCGAAGCCAGGAGGACTGAGCGCTTGCGTCCCGAGTTCTGCGACGAGTCTCTTCATTATAAGGCCACTGATTGCGCTCATCATGAGTGCCAGAAGCACCGCTAAACATAAGTGTCCTTTCTTCCTGACGCACTTGAAGATTGTGACCATTTGTGCGGGTTGTGAGTTAGGGGCTCTCATTGTACACGATCTATAGTGTGC
for substring CGCT. Found at positions:
21:24   74:77   99:102

Nim

import random, sequtils, strutils

proc dnaSequence(n: Positive): string =
  ## Create a random DNA sequence of length "n".
  newSeqWith(n, sample("ACGT")).join()

proc positions(dnaSeq, subSeq: string): seq[int] =
  ## Return the list of starting positions of a subsequence
  ## "subSeq" in a sequence "dnaSeq". Positions start at 1.
  var start = 0
  while true:
    let pos = dnaSeq.find(subSeq, start)
    if pos < 0: break
    result.add pos + 1
    start = pos + 1


when isMainModule:

  const
    N = 200
    Step = 20

  randomize()

  let dnaSeq = dnaSequence(N)
  echo "DNA sequence:"
  for i in countup(0, N - 1, Step):
    echo ($(i+1)).align(3), ' ', dnaSeq[i..i+(Step-1)]

  let subSeq = dnaSequence(3)
  echo "\nDNA subsequence: ", subSeq

  echo()
  let pos = dnaSeq.positions(subSeq)
  if pos.len == 0:
    echo "Subsequence not found."
  else:
    let tail = if pos.len == 1: ": " else: "s: "
    echo "Subsequence found at position", tail, pos.join(", ")
Output:
DNA sequence:
  1 CACATACGATGAGCTGGGCG
 21 CCTAAGAGGCGGAAAGACAA
 41 CCGTGTGTGTCTAACCCATG
 61 GTTTAATTGCAGATAGTCTC
 81 TAGACTACAAACATTAGAGC
101 AATGCACCGGGGTGCACGTG
121 TGTTTTGACTTCCCATGAAA
141 GCCCTTATCCTAGAGTACAG
161 TCGGCAAATGTTCGCTCCTT
181 GGCCCACTCCATTTGGACGG

DNA subsequence: GTT

Subsequence found at positions: 61, 122, 170

Perl

use strict;
use warnings;
use feature 'say';

my @bases = <A C G T>;
my $basecnt = 160;

my($string,$target);
$string .= $bases[ int rand @bases ] for 1 .. $basecnt;
$target .= $bases[ int rand @bases ] for 1 .. 4;
say "Target: $target";
say 'Matches at these positions:';
say (($string =~ s/.{1,40}\K/\n/gr) =~ s/($target)/ >$1< /gr);
Output:
Target: CCTG
Matches at these positions:
9
90
157
TTGCC >CCTG< CAAAGTTAATAAGTAAACAATTAAGTGAGTG
CTCTAGGGTAAGGTGAGGGCGGGAAGGGGAAAAATACCGA
TGCGAG >CCTG< TAGAGCCGGGCCTCAAATTAAACGAAAAAT
ATAAGTTTGCTTGGCACGCTGTACTACTTATCC >CCTG< ACT

Phix

Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".

with javascript_semantics

constant cheat = false

function grandna(integer len)
    string dna = repeat(' ',len)
    for i=1 to len do dna[i] = "ACGT"[rand(4)] end for
    return dna
end function

procedure show(string dna, test, sequence idx)
    idx = deep_copy(idx) & length(dna)+100 -- (add an otherwise unused sentinel)
    sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n")
    integer ii = 1,         -- idx index
            i = idx[ii],    -- current target
            ux = 1,         -- underline index (1..4)
            ldx = 1         -- line index (1, 51, 101, etc)
    for si=1 to length(s) do
        printf(1,"%3d: %s\n",{ldx,s[si]})
        ldx += 50
        if i and i<ldx then
            string ul = repeat(' ',59)
            while i and i<ldx do
                integer up = i-ldx+51       -- underline pos (relative to ldx)
                up += floor((up-1)/10)+5    -- (plus any needed spacing)
                ul[up] = "<==>"[ux]
                ux += 1
                i += 1
                if ux>4 then
                    ux = 1
                    ii += 1
                    i = idx[ii]
                end if
            end while
            printf(1,"%s\n",ul)
        end if
    end for
    if length(idx)>1 then
        string p = iff(length(idx)>1?"s":""),
               t = join(apply(idx[1..$-1],sprint),", ")
        printf(1,"%s occurs at location%s: %s\n",{test,p,t})
    else
        printf(1,"%s does not occur\n",{test})
    end if
end procedure

string dna = grandna(200),
       test = grandna(4)
constant cheats = iff(cheat?{9,13,49,60,64,68}:{})
for i=1 to length(cheats) do
    dna[cheats[i]..cheats[i]+3] = test
end for
sequence idx = match_all(test,dna)
show(dna,test,idx)
Output:

with cheat enabled

  1: GGAGATATCG ACCGACCGAA GTAAAGTCAA AGTCGTCCAA TCCACGGACG
             <= =><==>                                   <=
 51: ACTTCAGCAC GACCGACCGA CCTATTTAAG AGACCACACT TAAGGAATCC
     =>       < ==><==><== >
101: ATGCGAAATA AAAATGGGCG AGTAGCCGTG GGGCGCTAAA GCACCCACCT
151: AGTTTTCGCC GAAGTACTAG ACCACCTTCG GATCGACAAA GCTTTCACCA
                                         <==>
CGAC occurs at locations: 9, 13, 49, 60, 64, 68, 184

with cheat disabled

  1: TGATTTAAAC CGTGGTGCAA TTTATAAACA CTGCGATATG CCTCCTGATG
 51: GCATGGTATT CGACACCAAG ACGCTGGTGG GCACACTGGC TTTCAGAATA
101: GGAGTCACAA TCCCTCTATG ATGTCCTCTA GCGGGTGTGT GTTCAGTGCC
151: AGCGCTTACT TCCGGCGTGG CCGACTCTTT TTAAAGCGTA TAGCTGGGGT
GCTA does not occur

Python

Works with: Python version 3.8
Library: regex
 
from random import choice
import regex as re 
import time

def generate_sequence(n: int ) -> str:
    return "".join([ choice(['A','C','G','T']) for _ in range(n) ])

def dna_findall(needle: str, haystack: str) -> None:

    if sum(1 for _ in re.finditer(needle, haystack, overlapped=True)) == 0:
        print("No matches found")
    else:
        print(f"Found {needle} at the following indices: ")
        for match in re.finditer(needle, haystack, overlapped=True):
            print(f"{match.start()}:{match.end()} ")

dna_seq = generate_sequence(200)
sample_seq = generate_sequence(4)

c = 1
for i in dna_seq:
    print(i, end="") if c % 20 != 0 else print(f"{i}")
    c += 1
print(f"\nSearch Sample: {sample_seq}")

dna_findall(sample_seq, dna_seq)
Output:
TTGCCCCTGTACTGAGCCCA
TAAGCTTGCACTCAAGGTTT
TGCCCCCTCATATTATAACG
CATCCATTATACAAAACCGA
TACCCTTCCGCATATTATGA
AAAGTGGCGAAGTGCCTTGA
TTTGCATTCATAGTACAACG
GTGCAAAAGCATTGTATGTC
TCACATTTACATGGGAAATG
CCTAGTAGGTGCAAGACCTG

Search Sample: TACA
Found TACA at the following indices:
69:73
133:137
167:171

Racket

#lang racket

(define (rand-seq n)
  (build-string n (lambda _ (string-ref "TGAC" (random 4)))))

(define (subsequence-indices full part)
  (let ((part-length (string-length part)) (full-length (string-length full)))
    (for/list ((i (- full-length part-length))
               #:when (for/and ((p part) (f (in-string full i))) (eq? p f)))
      (cons i (+ i part-length -1)))))

(define (report-sequence s (l 50))
  (string-join (for/list ((i (in-range 0 (string-length s) l)))
                 (format "~a: ~a" (~a #:width 4 i)
                         (substring s i (min (string-length s) (+ i l)))))
               "\n"))
  
(define (Bioinformatics/Subsequence (full (rand-seq 400)) (sub (rand-seq 4)))
  (printf "Indices of ~a in~%~a~%~a~%"
          sub (report-sequence full) (subsequence-indices full sub)))

(module+ main (for ((i 4)) (Bioinformatics/Subsequence)))
Output:
Indices of TTAC in
0   : TTATCCTACCGCGTAAGTTCAATGCTCACCGCAGTTTGCTAACCGTTCCT
50  : AAATTCACTTCCTAAGGTATCTTTCGCTTAATTGATGCCGATTGAATTCC
100 : ACGGAGGGCGTAATTGTTTCGGACTTTAGACCTGACATAAGGGCACACTA
150 : GTCCTATTGAATTTGGTGCTATTCGGCGACCTACTAACCTTAGTCAGTGA
200 : AGAGCCATCTCAAAAGTACAGTCATCCTCAAGTGTTACATACGGCACCAT
250 : GACAGTGTATAAGCATGGAGGTTGGCCTATCGTCATATCGAGGCGGCGCC
300 : ATAGACCGGCCAGGTGATGAGATCGACTTTAATGTTGTTGCTTAGCTTGA
350 : CCTCTAGTTTGGATTAAGACGGTCATAGATAGATAGACCGTAAAGTATTC
((234 . 237))
Indices of GTAA in
0   : GTCAGTCCACGCAAGAATAGCAGTTGAGTGGACAATTTATGAGACGGAGA
50  : TAAGTAACCCGCTCCGAGATAAACGTCAGCCGGATTCCGCTGAGTCGGTC
100 : GCCTTCCAAGTGGCAGCTTGTTTGCATTGCTTACAGTGACTTGAACGATC
150 : ACCTACTCGAGGACTCTGCGGGTATTCCAGTTGCCTTGCACTCAGCGATG
200 : CACAAACTTTAAATTATCACAGAAAGAATGTGATTCGGGTGGTCACCCTT
250 : ATCGGTGAAACCAGTCCTTCCATGGGCATATTCTGCGTCGAAATGAGCCC
300 : GCTGTTTACGTTGTACGAACTGGGGACCTAAGGAAACGGGCCGTTCTTAG
350 : GTGATGTCAGCTGCAACGAACTACTGTTAACCTTCTCGATCTGTTGAAAA
((53 . 56))
Indices of AACG in
0   : TTTACAGTACGATTCCGAAGACACAAGAATGCGCCGGCTGTGGGTAGGGG
50  : CGACCCTGCGCGACCTATAAAAGGGGCGACTCAATTTTAGGCCCACCACG
100 : GACCCAGCCCTGTGCACAGAGCGGGGCATTTTTACCTCGCGTGCGCACCA
150 : ACTGCGATCTGCCTTGTCACATAATCCCACATACGAGTTGTATCTCTAAG
200 : AAGGGATGAGGCCAATTTAAATCCGGGTGCATTTCTCGGGGGGAGACACC
250 : AATGAGAGTGGGGCAAGGTGGCGTAGAGAGCTAATCGGGTTTTATGACCG
300 : CGGAAGACCTGGGATACGTCTGGGTGATAACTGAGGGCAGGTCAACGAAC
350 : CCTGATGCGTAGCCACGTCTCAGCTATCGGGCCTGTTTTCATAGTCCATG
((343 . 346))
Indices of CAGC in
0   : TGTGAACCACTATGACACGCTACACGCCTCAAGTTGGCCCCCATATAAGA
50  : ATATCCATCGGTTAATGTGTCTCGCGGCCGTTAGAACAAGCACTAAAGTT
100 : AGAGAAACCAACCATTGGACTAGATCAACATCAACGTCGCTGATAATAAA
150 : TGTATATCTGATGTGGCCGTTCATAAAATCGTTAACTACAGGTATCAACA
200 : TAGTCTCCCAACTTATATAATTGGTTAACTTAGGAGGAGCTTGCACAGCT
250 : CAGCTATATGCTATCTGGCCCTGGGCTTGGTAGGCATCACGTCGTTATGC
300 : TGCGAACATCTCAAAGACAAACGTTGATCCAGCCCCTAGAGAGGTCATTA
350 : GGCCTCGACCCAATTTAACCTCCCACTCCGTGGGTACAGCTTGAACCCCC
((245 . 248) (250 . 253) (329 . 332) (386 . 389))

Raku

Chances are actually pretty small that a random 4 codon string will show up at all in a random 200 codon sequence. Bump up the sequence size to get a reasonable chance of multiple matches.

use String::Splice:ver<0.0.3+>;

my $line = 80;

my $haystack = [~] <A C G T>.roll($line * 8);

say 'Needle: ' ~ my $needle = [~] <A C G T>.roll(4);

my $these = $haystack ~~ m:g/<$needle>/;

my @match = $these.map: { .from, .pos }

printf "From: %3s to %3s\n", |$_ for @match;

my $disp = $haystack.comb.batch($line)».join.join("\n");

for @match.reverse {
    $disp.=&splice(.[1] + .[1] div $line, "\e[0m" );
    $disp.=&splice(.[0] + .[0] div $line, "\e[31m");
}

say $disp;
Output:

Show in custom div to better display highlighting.

Needle: TAGC
From: 159 to 163
From: 262 to 266
From: 315 to 319
From: 505 to 509
From: 632 to 636
CATATGTGACACTGACAGCTCGCGCGAAAATCCGTGTGACGGTCTGAACACTATACTATAGGCCCGGTCGGCATTTGTGG
CTCCCCAGTGGAGAGACCACTCGTCAATTGCTGACGACTTAACACAAATCGAGTCGCCCTTAGTGCCAGACGGGACTCCT
AGCAAAGGGCGGCACGTGGTGACTCCCAATATGTGAGCATGCCATCTAATTGATCTGGGGGGTTTCGCGGGAATACCTAG
GGGCGTTCTGTCCATGGATCTCTAGCCCTGCGAAGAGATACCCGCAGTGAGTTGCACGTGCAAAGAACTTGTAACTAGCG
TATTCTGTATCCGCCGCGCGATATGCTTCTGCGGGATGTACTTCTTGTGACTAAGACTTTGTTATCCAAATTGACCAATA
TTCAACGGTCGACTCTCCGAGGCAGTATCGGTACGCCGAAAAATGGTTACTTCGGCCATACGTAACCTCTCAAGTCACGA
TTACAGCCCACGGGGGCTTACAGCATAGCTCCAAAGACATTCCAATTGAGCTACAACGTGTTCAGTGCGGAGCAGTATCC
AGTACTCGACTGTTATGGTAAAAGGGCATCGTGATCGTTTATATTAATCATTGGGACAGGTGGTTAATGTCATAGCTTAG

REXX

This REXX version allows the user to specify:

  •   length of the (random) DNA data sequence     (default is 200).
  •   length of the (random) DNA sequence             (default is four).
  •   DNA proteins to be used in the sequence         (default is ACGT).
  •   width of the output lines of (random DNA)         (default is 100).
  •   often (if ever) to add a blank to the output       (default is every 10 proteins).
  •   DNA proteins to be searched in the data         (the default is four unique random proteins).
  •   the seed for the RANDOM function so runs can be repeated with the same data     (no default).
/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
if totLen=='' | totLen==","  then totLen=   200  /*Not specified?  Then use the default.*/
if rndLen=='' | rndLen==","  then rndLen=     4  /* "      "         "   "   "     "    */
if basePr=='' | basePr==","  then basePr= 'acgt' /* "      "         "   "   "     "    */
if oWidth=='' | oWidth==","  then oWidth=   100  /* "      "         "   "   "     "    */
if Bevery=='' | Bevery==","  then Bevery=    10  /* "      "         "   "   "     "    */
if rndDNA=='' | rndDNA==","  then rndDNA=  copies(., rndLen)    /*what we're looking for*/
if datatype(seed, 'W')  then call random ,,seed  /*used to generate repeatable random #s*/
call genRnd                                      /*gen  data field of random proteins.  */
call show                                        /*show   "    "    "    "      "       */
say '  base DNA proteins used: '  basePr
say 'random DNA proteins used: '  dna?
call findRnd
if @=='' then do;  say "the random DNA proteins weren't found.";  exit 4;  end
say 'the random DNA proteins were found in positions:'     strip(@)
exit 0                                           /*stick a fork in it,  we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
commas: parse arg ?;  do jc=length(?)-3  to 1  by -3; ?=insert(',', ?, jc); end;  return ?
/*──────────────────────────────────────────────────────────────────────────────────────*/
findRnd: @=;           p=0                       /*@:  list of the found target proteins*/
              do until p==0;    p= pos(dna?, $$, p+1);     if p>0  then @= @ commas(p)
   /*Found one?  Append it to the "Found"s*/
              end   /*p*/;             return
/*──────────────────────────────────────────────────────────────────────────────────────*/
genRnd: dna?=;        use= basePr;     upper use basePr rndDNA;       lenB= length(basePr)
              do k=1  for rndLEN;      x= substr(rndDNA, k, 1)
              if x==.  then  do;  ?= random(1, length(use) );         x= substr(use, ?, 1)
                                  use= delstr(use, ?, 1)   /*elide so no protein repeats*/
                             end
              dna?= dna? || x                              /*build a random protein seq.*/
              end   /*k*/
        return
/*──────────────────────────────────────────────────────────────────────────────────────*/
show: say " index │"center('DNA sequence of ' commas(totLen)  " proteins", oWidth+10)
      say "───────┼"center(''                                            , oWidth+10, '─')
      $=; $$=;                 idx= 1               /*gen data field of random proteins.*/
         do j=1  for totLen;   c= substr( basePr, random(1, lenB), 1)
          $$= $$ || c                                       /*append a random protein.  */
          if Bevery\==0  then if j//Bevery==0  then $= $' ' /*possibly add a blank.     */
          if length( space($ || c, 0) )<oWidth  then do;   $= $  ||  c;   iterate;   end
          say strip( right(idx, 7)'│' $, 'T');  $=          /*display line ──► terminal.*/
          idx= idx + oWidth                                 /*bump the index number.    */
          end   /*j*/
      if $\==''  then say right(idx, 7)"│" strip($, 'T')    /*show residual protein data*/
      say "───────┴"center(''                                            , oWidth+10, '─')
      say;                return
output   when using the default inputs:
 index │                                                 DNA sequence of  200  proteins
───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────
      1│ TTTTTAGCG CGTTTTGTAG CGCTCTAAAA ACCGTAGCTA TATTTCTCGA AGTTTCACCC AGCTCTTTTG CCCCAGGGTT GCGCTAAGCC CAGCTTCGAG
    101│ GGGGCACAG GTAAAATACT ACCGTCCGTG GAGGGGGATG AATTGACCCG ACATTTTTTG AAGCATAACT CGTGACTCAA TATTGCATGA TTACACCAGC
───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────

  base DNA proteins used:  ACGT
random DNA proteins used:  GCAT

the random DNA proteins were found in positions: 162 184
output   when using the inputs of:     1000   ,   ,   ,   ,   tttt
 index │                                       DNA sequence of  1,000  proteins
───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────
      1│ GTGATTTTT AGCGCGTTTT GTAGCGCTCT AAAAACCGTA GCTATATTTC TCGAAGTTTC ACCCAGCTCT TTTGCCCCAG GGTTGCGCTA AGCCCAGCTT
    101│ GAGGGGGGC ACAGGTAAAA TACTACCGTC CGTGGAGGGG GATGAATTGA CCCGACATTT TTTGAAGCAT AACTCGTGAC TCAATATTGC ATGATTACAC
    201│ AGCTAGGTT AGTGTAAAAA CCCCCCTATC TTCCTGATCA ATGGCGAGTA AAACATGCAA CCAATTTGTG AGCGAGTACT GGAAATTATT GTTTACGGGA
    301│ AGGCACATG CTACGCGCAA CAGATATCTT AGACTGACCC TTTTAGAGTC ATAAGCCCCT GTCGCCTACA TGCTACTAAT ACTCCAACTA GCGGCGCACC
    401│ TCAACCGGA TCATGGCGCC AGGGAAAATG TGGCGTAGCG ACGTGCTCAT CGCTCGCCGG GGAGAGCCTT TCAGAATCTC GAATAAAACC TGGTAATGAC
    501│ TCATCAATC GTAATGGTCG TCTGGGGCAA GAAGCCGATA TTATAGACTC AGGTCAGACG TGTGCACAAC GGCAGAATTT ATAGTAATTC GCGTGAACTA
    601│ GTTTCGGGA TAGGCCTACG ACCAATCATA GGACATTCGA TGCACGGTGT AGAAACAGTT CTCTGATGTT ACTCGGGATA ACACTCGCAA TCCCCTAGGA
    701│ ACCGTGAGC GTCGCTAGTA TCTGAGATAG TCGCGACTGC CCAGCGGTCT TTAAGTTCGC ACACTACGGG ACTCCTAGTT CGCCCATTCA TGGCTATTTT
    801│ CCTATCAGT CCAATCCCAC GGGGAGGGCA CTCGCGCAAT TCATTCAAAG AGGGCCATTT GCCGATATAA GGTCCATCAT CGGGAGGAAT ATGACTCCTG
    901│ TTAGTATTA GAGCAGCCTC GCTGCGTACT ACTGTCAGTG GCCCGTCAGG GAAGGCAAAA CGTTTTTCCT CTAGGAATCC GTCAATTGGA CTTCTAGACT
───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────

  base DNA proteins used:  ACGT
random DNA proteins used:  TTTT

the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963

Ring

/*-----------------------------------
# Project : DNA subsequences
# Date    : 2021/03/23
# Author  : Gal Zsolt (~ CalmoSoft ~)
# Email   : <calmosoft@gmail.com>
-----------------------------------*/

//-----------------------------------------

load "stdlibcore.ring"
load "guilib.ring"

start = 0
base = ["A","C","G","T"]
dnaList = []
dnaSeq = []
ColLine = list(21) 

C_Spacing = 2 

C_ButtonDnaStyle  = ' background-color: Red; border-radius: 8px;' 
C_ButtonStyle  = '"background-color:white"; border-radius: 8px;'
Button = newlist(10,20)
LayoutButtonRow = list(10)

//-----------------------------------------

app = new qApp 
{
      win = new qWidget() {
            setWindowTitle('DNA subsequences')
	    setWinIcon(self,AppFile("white.jpg"))
            setStyleSheet('background-color:White')
            setgeometry(560,180,300,300)
            //reSize(400,400)
            winheight = 10 
            fontSize = 8 # + (winheight / 100)

            LayoutButtonMain = new QVBoxLayout()            
            LayoutButtonMain.setSpacing(C_Spacing)
            LayoutButtonMain.setContentsmargins(0,0,0,0)

            LabelInd = new qLabel(win) { settext("    DNA subsequences start positions:")
                                         setAlignment(Qt_AlignHCenter | Qt_AlignVCenter)
                                         setStyleSheet("background-color:yellow") }

            ButtonInd = new QPushButton(win) { setStyleSheet("background-color:yellow") }

            LabelFind = new qLabel(win) { settext("    DNA subsequence to find:")
                                          setStyleSheet("background-color:yellow") }

            ButtonFind = new QPushButton(win)

            DnaSearch = new QPushButton(win) { setclickevent("pstart()")
                                               setStyleSheet("background-color:yellow")
                                               settext("Find")
                                             }
    
            for Col = 1 to 21
                ColLine[Col] = new qLabel(win) {
                                setmaximumheight(20)
                                setAlignment(Qt_AlignHCenter | Qt_AlignVCenter) 
                                setStyleSheet("background-color:darkgray")
                                setText(string(Col-1))
                                } 
            next

            LayoutInd = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }
            LayoutInd.AddWidget(LabelInd) 
            LayoutInd.AddWidget(ButtonInd) 
            LayoutButtonMain.AddLayout(LayoutInd) 
                
            LayoutTitleRow = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }

                for Col = 1 to 21                
                    LayoutTitleRow.AddWidget(ColLine[Col])         
                next
                            
            LayoutButtonMain.AddLayout(LayoutTitleRow)  
            
            RowLine = list(10)  

            for Row = 1 to 10
                Letter = "" + Row*20
                if Row*20 < 100
                   Letter = "  " + Row*20
                ok
                RowLine[Row] = new qLabel(win) { setFont(new qFont("Verdana",fontSize,40,0))
                                                 setAlignment(Qt_AlignHCenter | Qt_AlignVCenter) 
                                                 setStyleSheet("background-color:darkgray")
                                                 setText(Letter)
                                               } 
            next

            for Row = 1 to 10
                LayoutButtonRow[Row] = new QHBoxLayout()    
                {
                    setSpacing(C_Spacing)
                    setContentsmargins(0,0,0,0)
                } 

               LayoutButtonRow[Row].AddWidget(RowLine[Row])
               
               for Col = 1 to 20
                    Button[Row][Col] = new QPushButton(win) {
                                       setmaximumwidth(20) 
                                       }
                    LayoutButtonRow[Row].AddWidget(Button[Row][Col])    
               next
               
               LayoutButtonMain.AddLayout(LayoutButtonRow[Row])         
            next

            LayoutDataRow = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }

            LayoutDataRow.AddWidget(LabelFind)
            LayoutDataRow.AddWidget(ButtonFind)               
            LayoutDataRow.AddWidget(DnaSearch)
            LayoutButtonMain.AddLayout(LayoutDataRow) 
                  
            setLayout(LayoutButtonMain)
            
            pStart()
            show()
   }
   exec()
 }

//-----------------------------------------

func pStart()
     start = start + 1

     dnaList = []
     for row = 1 to 10
         for col = 1 to 20
             Button[row][col].settext("")
         next
     next
     for nr = 1 to 200
         rnd = random(3)+1
         baseStr = base[rnd]
         row = ceil(nr/20)
         col = nr%20
         if col = 0
            col = 20
         ok
         Button[row][col].settext(baseStr)
         add(dnaList,baseStr)
     next

     startDna()

//-----------------------------------------

func startDna()

     strDna = list2str(dnaList)
     strDna = substr(strDna,nl,"")

     while true
           strBase = ""
           for n = 1 to 4
               rnd = random(3)+1
               strBase = strBase + base[rnd]
           next
           ind = substr(strDna,strBase)
           if ind > 0
              exit
           ok
     end

     showDna(dnaList)

//-----------------------------------------

func showDna(dnaList)


     if start > 1
     see nl
     for n = 1 to len(dnaSeq)
         for m = 0 to 3
             ind = dnaSeq[n] + m
             row = ceil(ind/20)
             col = ind%20
             if col = 0
                col = 20
             ok
             Button[row][col].setstylesheet(C_ButtonStyle)
         next
     next
     ok


     dnaSeq = []
     strDna = list2str(dnaList)
     strDna = substr(strDna,nl,"")

     while true
           strBase = ""
           for n = 1 to 4
               rnd = random(3)+1
               strBase = strBase + base[rnd]
           next
           ind = substr(strDna,strBase)
           if ind > 0
              exit
           ok
     end

     ButtonFind.setStyleSheet("background-color:yellow")
     ButtonFind.settext(strBase)

     for n = 1 to 196
         flag = 1
         for m = 0 to 3
             if dnaList[n+m] != strBase[m+1]
                flag = 0
                exit
             ok
         next
         if flag = 1
            add(dnaSeq,n)
         ok
     next
     
     temp = ""
     ButtonInd.settext("")
     for nr = 1 to len(dnaList)
         ind = find(dnaSeq,nr)
         if ind > 0
            temp = temp + string(dnaSeq[ind]) + " "
            ButtonInd.settext(temp)
            for n = nr to nr + 3
                row = ceil(n/20)
                col = n%20
                if col = 0
                   col = 20
                ok
                Button[row][col].setStyleSheet(C_ButtonDnaStyle)
                Button[row][col].settext(dnaList[n])
            next
         ok
      next           

//-----------------------------------------

Output:

Bioinformatics/Subsequence - video

Wren

Library: Wren-pattern
Library: Wren-str
Library: Wren-fmt
import "random" for Random
import "./pattern" for Pattern
import "./str" for Str
import "./fmt" for Fmt

var rand = Random.new()
var base = "ACGT"

var findDnaSubsequence = Fn.new { |dnaSize, chunkSize|
    var dnaSeq = List.filled(dnaSize, null)
    for (i in 0...dnaSize) dnaSeq[i] = base[rand.int(4)]
    var dnaStr = dnaSeq.join()
    var dnaSubseq = List.filled(4, null)
    for (i in 0...4) dnaSubseq[i] = base[rand.int(4)]
    var dnaSubstr = dnaSubseq.join()
    System.print("DNA sequence:")
    var i = chunkSize
    for (chunk in Str.chunks(dnaStr, chunkSize)) {
         Fmt.print("$3d..$3d: $s", i - chunkSize + 1, i, chunk)
         i = i + chunkSize
    }
    System.print("\nSubsequence to locate: %(dnaSubstr)")
    var p = Pattern.new(dnaSubstr)
    var matches = p.findAll(dnaStr)
    if (matches.count == 0) {
        System.print("No matches found.")
    } else {
        System.print("Matches found at the following indices:")
        for (m in matches) {
            Fmt.print("$3d..$3d", m.index + 1, m.index + 4)
        }
    }
}

findDnaSubsequence.call(200, 20)
System.print()
findDnaSubsequence.call(600, 40)
Output:
DNA sequence:
  1.. 20: TATGGGCGCATTATGACAAC
 21.. 40: GGCTACTGAAACGAAAATTC
 41.. 60: ATGCCTTCGGAGGCTAGACC
 61.. 80: ACTCATACATGATTTACAGC
 81..100: TAGTCAGTTGCGTCCGCCAT
101..120: CCCGCATAACTATGTATTAC
121..140: GAGCATGTTCTGGCAACCTT
141..160: TCAGTGACAGTTCCTCAGGC
161..180: GCGTTCGCGTTGAAGGCCTC
181..200: CCCACACCGCACCCCTGCCG

Subsequence to locate: AATT
Matches found at the following indices:
 36.. 39

DNA sequence:
  1.. 40: GCGCTGAGCGCCCCAGTACAGCGGGTTAAACCGAGCCCGC
 41.. 80: TCCGATGAACCAACTCCCATTCCTATAATGGTGCCCCGAC
 81..120: ATATTGAATTCGGCGGGTCCGCTATCGGGCTGAGGATGCC
121..160: AATATCTAGGCGCTACCCTGAAGATCCTCAGTTGTGGTGT
161..200: CGCGGAGTGTCGATCCCAGAGCTCCCAATTGACTCAATTA
201..240: CTTTTTCCGTCCTCTTGCTTACGGATTTATGTTTGTGGCA
241..280: GAGGTTATGCTTCAGGCATCCCCATGTTTCCTGAGATACG
281..320: ACCACTGTCAGGTGGCTTGAATCTACCTTGTATTTCCTCT
321..360: AGTACCAGTCACTGTCATCTACTGGAAGCCATATCAGCGT
361..400: TGAAATGTCTATAATTTACTCTCCGGTTGTACCCAAGCGA
401..440: TAACAGCAACGTGTGGGTCTAAAGAGTTCCGCGTTTCGAC
441..480: ATAACGTGCTCCTATTTATCTACCGAAACACCCTATTTTC
481..520: CATCTAACCGGCACCCAATGCGCAGGTGTACGCGTCCTAC
521..560: TACGTTTGAAACGGTTCCATCTCGCCATGTACAATTGTGG
561..600: GGCTACGATTAAGTGTAGTCGGTAATTCAGGGTGAAGTTG

Subsequence to locate: TTCG
Matches found at the following indices:
 89.. 92
435..438