Bioinformatics/Subsequence: Difference between revisions

From Rosetta Code
Content added Content deleted
(julia example)
m (→‎{{header|Wren}}: Minor tidy)
(44 intermediate revisions by 16 users not shown)
Line 5: Line 5:
Write a routine to find all the positions of a randomly generated subsequence   (four letters).
Write a routine to find all the positions of a randomly generated subsequence   (four letters).
<br><br>
<br><br>
=={{header|11l}}==
{{trans|Python}}


<syntaxhighlight lang="11l">UInt32 seed = 34
F nonrandom_choice(lst)
:seed = (1664525 * :seed + 1013904223) [&] FFFF'FFFF
R lst[Int(:seed >> 16) % lst.len]

F generate_sequence(Int n)
R ((0 .< n).map(_ -> nonrandom_choice([‘A’, ‘C’, ‘G’, ‘T’]))).join(‘’)

F positions(dnaSeq, subSeq)
[Int] r
V start = 0
L
V? pos = dnaSeq.find(subSeq, start)
I pos == N
L.break
r.append(pos)
start = pos + 1
R r

F dna_findall(String needle, haystack) -> N
V pp = positions(haystack, needle)
I pp.empty
print(‘No matches found’)
E
print(‘Found ’needle‘ at the following indices:’)
L(p) pp
print(p‘:’(p + needle.len))

V dna_seq = generate_sequence(200)
V sample_seq = generate_sequence(4)

V c = 1
L(i) dna_seq
I c % 20 != 0 {print(i, end' ‘’)} E print(i)
c++
print("\nSearch Sample: "sample_seq)

dna_findall(sample_seq, dna_seq)</syntaxhighlight>

{{out}}
<pre>
GAAGTGCTCAAACCCTTTTT
CCTTGCCGTAGGTTGTGCTG
CCGCCGCACACCCGCAACAG
CTTTTAGGCATAAGTATACG
GACCGCGGACGGGGCGTAAC
GGTGAACATTTTGCTAAATT
GGCTCTAGGGATGAGCCCTA
TAGCGCTGGGGACTACGCCC
CGGTAAAGATCGAGGCGACT
CACCGATTGCGCTAGGGACA

Search Sample: CGTA
Found CGTA at the following indices:
26:30
94:98
</pre>
=={{header|Action!}}==
<syntaxhighlight lang="action!">DEFINE SEQLEN="200"
DEFINE SUBLEN="4"

PROC RandomSeq(CHAR ARRAY s BYTE len)
CHAR ARRAY letters="ACGT"
BYTE i

FOR i=1 TO len
DO
s(i)=letters(Rand(4)+1)
OD
s(0)=len
RETURN

PROC PrintSeq(CHAR ARRAY s)
BYTE i

FOR i=1 TO s(0)
DO
IF i MOD 20=1 THEN
IF i<10 THEN Put(32) FI
IF i<100 THEN Put(32) FI
PrintB(i)
Print(": ")
FI
Put(s(i))
IF i MOD 20=0 THEN
PutE()
FI
OD
RETURN

BYTE FUNC StartsWith(CHAR ARRAY s,prefix BYTE start)
BYTE i

FOR i=1 TO prefix(0)
DO
IF s(start+i-1)#prefix(i) THEN
RETURN (0)
FI
OD
RETURN (1)

PROC Main()
CHAR ARRAY seq(SEQLEN+1),sub(SUBLEN+1)
BYTE i,notfirst

RandomSeq(seq,SEQLEN)
RandomSeq(sub,SUBLEN)

PrintE("Search sequence:")
PrintSeq(seq)
PutE()
PrintF("Subsequence to find: %S%E%E",sub)

PrintE("Found subsequence at positions:")
notfirst=0
FOR i=1 TO SEQLEN-SUBLEN
DO
IF StartsWith(seq,sub,i) THEN
IF notfirst THEN
Print(", ")
FI
notfirst=1
PrintF("%I-%I",i,i+SUBLEN-1)
FI
OD
IF notfirst=0 THEN
PrintE("Not found")
FI
RETURN</syntaxhighlight>
{{out}}
[https://gitlab.com/amarok8bit/action-rosetta-code/-/raw/master/images/Bioinformatics_subsequence.png Screenshot from Atari 8-bit computer]
<pre>
Search sequence:
1: CGACTCAGGAAGGCCACGTG
21: GTAACTTCTTAGTTACCGTA
41: AGGCTAATAGCTAGCGCTGC
61: GTGACCAGGCATAGTAACCG
81: GCACGCACGTTCACCAAGGG
101: GTCCCGATGGGAGGCACGTT
121: ACTACTCCAAGAACTGTAGT
141: AAGTTACCGAAAAGTTCTCA
161: TCCTTGGGTAGTGAGTACTT
181: TGTGCTATGAAAAATAAGGA

Subsequence to find: ACGC

Found subsequence at positions:
83-86
</pre>
=={{header|Ada}}==
<syntaxhighlight lang="ada">with Ada.Text_Io;
with Ada.Strings.Fixed;
with Ada.Numerics.Discrete_Random;

procedure Sub_Sequence is

type Nucleotide is (A, C, G, T);

function To_Character (N : Nucleotide) return Character
is (case N is
when A => 'A', when C => 'C',
when G => 'G', when T => 'T');

package Random_Nucleotide is new Ada.Numerics.Discrete_Random (Nucleotide);
use Random_Nucleotide;

package Position_Io is new Ada.Text_Io.Integer_Io (Natural);
use Ada.Text_Io;

procedure Put_Bases (Seq : String; Width : Positive) is
First : Natural := Seq'First;
begin
while First < Seq'Last loop
declare
Last : constant Natural :=
Natural'Min (First + Width - 1, Seq'Last);
begin
Position_Io.Put (First); Put ("..");
Position_Io.Put (Last); Put (" ");
Put (Seq (First .. Last));
New_Line;
First := Last + 1;
end;
end loop;
end Put_Bases;

Gen : Generator;
Sequence : String (1 .. 405);
Substring : String (1 .. 4);
Pos : Natural := 0;
begin
Position_Io.Default_Width := 3;

Reset (Gen);

Sequence := (others => To_Character (Random (Gen)));
Substring := (others => To_Character (Random (Gen)));

Put_Line ("Search sequence:");
Put_Bases (Sequence, Width => 50);
New_Line;

Put ("Substring to search: ");
Put (Substring);
New_Line;

loop
Pos := Ada.Strings.Fixed.Index (Sequence, Substring, Pos + 1);
exit when Pos = 0;
Put ("Found at position: ");
Position_Io.Put (Pos); Put ("..");
Position_Io.Put (Pos + Substring'Length - 1);
New_Line;
end loop;
end Sub_Sequence;</syntaxhighlight>
{{out}}
<pre>Search sequence:
1.. 50 CCTACGGAAAAGTGATAAGGACAGATACATAATCCTAAAACCCTGGAAAA
51..100 CTTGTCTCGCCAGAGTAGGGCTCGGCAGGGGGGGCAGTGTTTTAAAACGT
101..150 CAGAGAATAGGCTCTACCTTGTTAGACTGCGAGTACTGGAGCGTAGTTCC
151..200 TATATTGCAAGCTGCTACAGTAAGTATCAAAGTATGCCACACATCCTTCT
201..250 ACAACCGGATTGGTTGCCCAGTAGAAGGCTCGTAGTCACCGGACACGCTG
251..300 TTCTTAAGGTCGGTAAGCTATTACGTCCATGGGAGATTCTCAAGGGTGCG
301..350 TTAGCGGACCCCCGTTACGTCCACGTATCTTCCGTCCAACTACCCCCTAA
351..400 TGTCATTGACATCGCCCGAGTATTTAATTTATTTGAACGGCACCAATTTA
401..405 GAGCT

Substring to search: TATT
Found at position: 153..156
Found at position: 269..272
Found at position: 371..374
Found at position: 380..383</pre>
=={{header|Arturo}}==

<syntaxhighlight lang="rebol">bases: [`A` `G` `C` `T`]
randSeq: join map 1..200 => [sample bases]
randSub: join map 1..4 => [sample bases]

idx: 0

print "Random sequence:"
print join.with:"\n" split.every: 20 randSeq
print ""

print "Looking for subsequence:"
print randSub
print ""

while [(size randSeq) > idx + 4][
if prefix? slice randSeq idx idx+4 randSub ->
print ["Found subsequence at position:" idx]
idx: idx + 1
]</syntaxhighlight>

{{out}}

<pre>Random sequence:
CACGCGCGTTAACCCTGCAT
CTTTTCTCTAAGATGATGCG
CTACTCTGCCCGATTACTAT
GATGTCACCGGCGGTTCGGC
GACTGGCGCTGGCAGAAAGC
GCATGTCAAATTGCCCCAGT
GTGCAAGTCCAAGTATTAGT
GAGGTGCTCCGCTTCGTCCG
GGGTCGACTCGGTCCCACTT
CATTACATGTTGGTAATAGT

Looking for subsequence:
CGGT

Found subsequence at position: 71
Found subsequence at position: 169</pre>
=={{header|Factor}}==
=={{header|Factor}}==
{{works with|Factor|0.99 2021-02-05}}
{{works with|Factor|0.99 2021-02-05}}
<lang factor>USING: accessors formatting grouping io kernel math
<syntaxhighlight lang="factor">USING: accessors formatting grouping io kernel math
math.functions.integer-logs math.parser random regexp sequences ;
math.functions.integer-logs math.parser random regexp sequences ;


Line 34: Line 309:


80 10 .biosub nl
80 10 .biosub nl
600 39 .biosub nl</lang>
600 39 .biosub nl</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre> 0: ATTCAAGGAC
0: ATTCAAGGAC
10: CACTATTAAC
10: CACTATTAAC
20: CTGCATTGTG
20: CTGCATTGTG
Line 70: Line 344:
145..149
145..149
289..293
289..293
312..316
312..316</pre>

=={{header|FreeBASIC}}==
{{trans|Wren}}
<syntaxhighlight lang="vb">Const base_ = "ACGT"

Sub findDnaSubsequence(dnaSize As Integer, chunkSize As Integer)
Dim As String dnaSeq(1 To dnaSize)
Dim As Integer i, chunk
For i = 1 To dnaSize
dnaSeq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
Next
Dim As String dnaStr
For i = 1 To dnaSize
dnaStr += dnaSeq(i)
Next
Dim As String dnaSubseq(1 To 4)
For i = 1 To 4
dnaSubseq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
Next
Dim As String dnaSubstr
For i = 1 To 4
dnaSubstr += dnaSubseq(i)
Next
Print "DNA sequence:"
For chunk = 1 To Len(dnaStr) Step chunkSize
Print Using "###_._.###: &"; chunk; chunk+chunkSize-1; Mid(dnaStr, chunk, chunkSize)
Next
Print !"\nSubsequence to locate: "; dnaSubstr
Dim As Integer idx = Instr(dnaStr, dnaSubstr)
Print Iif(idx <> 0, "Matches found at the following indices:", "No matches found.")
Do While idx > 0
If idx <> 0 Then Print Using "###_._.###"; idx; idx + 3
idx = Instr(idx+4, dnaStr, dnaSubstr)
Loop
End Sub

findDnaSubsequence(200, 20)
Print
findDnaSubsequence(600, 40)

Sleep</syntaxhighlight>
{{out}}
<pre>DNA sequence:
1.. 20: TTATAGTCTTGGAGGCATGT
21.. 40: TAACTTATGCGGAGCAGACA
41.. 60: CGGAGTATGCATTCCTCTTA
61.. 80: CCAAACGGTGCTGCCCGCGC
81..100: ACTCGCTGTATTCCGTATCG
101..120: TCACATTATCTAAACCACGA
121..140: TTTCCAGCGTGCGTGGGAAG
141..160: GCCATGTTTAGTCGGGGGCC
161..180: AAGGTCTTTGGCTTATGCTG
181..200: TTTTTTTTTCTTCGGTTACA

Subsequence to locate: ATTT
Matches found at the following indices:
120..123

DNA sequence:
1.. 40: GTGCGGGCCGTTAGCAGCTACGAGTGCTAGATGGAACTAG
41.. 80: TCCCCGCTCCCAAATGCAAAGCGTCCCAGACCAGTCTTGA
81..120: AGCCCGTTAAATTACACCTGAACCGTTGCAAATGATCGAT
121..160: AGACGGGGTATAATAGCGGAAAACACAGGGGAACTGCATG
161..200: CAAGCTCGAGCCGCTGAAGGATGGCTCCCCCCCGAGTGTA
201..240: AGTGGATCTCGCCCAAATAGCGGGGGAACAAAGAAAGGTA
241..280: AGTCTTACTTCGCACGTCCCCTCTCATACACGCCAGGACT
281..320: AATGGATCATTCATAGGTGACGGGTGACTTGCGGTGTTTC
321..360: TAGTTGGAGTCACCCGTCAGCTTAGATCTAAGTATGAACC
361..400: GTAAGAGTTTGTAACTGCACCTTCCGTCTCTTCCTCTGTA
401..440: GGAACGCTTTTGCTTGTTATCAGATAGTGTCTCCTTATCA
441..480: TAGGACAGGTTCCTTGTGAAGGTCCACAGAGTTTGCCCGG
481..520: GGTTCGAATATACGACGCTTGTGGTTCCGGCACTATAACT
521..560: TCCGCAGTGTTGTCGACGCCCCTAGCTCCCGGGGTCTTTT
561..600: CGCTTCCCTATAGCGCGAAATGAGTGCAAGGGTACCGGCC

Subsequence to locate: GCAC
Matches found at the following indices:
252..255
377..380
510..513</pre>

=={{header|Go}}==
{{trans|Wren}}
<syntaxhighlight lang="go">package main

import (
"fmt"
"math/rand"
"regexp"
"time"
)

const base = "ACGT"

func findDnaSubsequence(dnaSize, chunkSize int) {
dnaSeq := make([]byte, dnaSize)
for i := 0; i < dnaSize; i++ {
dnaSeq[i] = base[rand.Intn(4)]
}
dnaStr := string(dnaSeq)
dnaSubseq := make([]byte, 4)
for i := 0; i < 4; i++ {
dnaSubseq[i] = base[rand.Intn(4)]
}
dnaSubstr := string(dnaSubseq)
fmt.Println("DNA sequnence:")
for i := chunkSize; i <= len(dnaStr); i += chunkSize {
start := i - chunkSize
fmt.Printf("%3d..%3d: %s\n", start+1, i, dnaStr[start:i])
}
fmt.Println("\nSubsequence to locate:", dnaSubstr)
var r = regexp.MustCompile(dnaSubstr)
var matches = r.FindAllStringIndex(dnaStr, -1)
if len(matches) == 0 {
fmt.Println("No matches found.")
} else {
fmt.Println("Matches found at the following indices:")
for _, m := range matches {
fmt.Printf("%3d..%-3d\n", m[0]+1, m[1])
}
}
}

func main() {
rand.Seed(time.Now().UnixNano())
findDnaSubsequence(200, 20)
fmt.Println()
findDnaSubsequence(600, 40)
}</syntaxhighlight>

{{out}}
Sample run:
<pre>
DNA sequnence:
1.. 20: GTTGCCCACACGTCTTATTG
21.. 40: TAAAAATCACCGTGCAGCGA
41.. 60: GGTTAAAAATGGTAGGAAAA
61.. 80: TATCCTCAGCCAGCGGTGCC
81..100: GGCCAACAAAAGGGACGTTG
101..120: GATTAAAGTAGGTCTAGGTA
121..140: TCTCGTATCCGGTTGATCCG
141..160: GGATGGTGGACGATATTGGA
161..180: GACCGGAGTGTACATCGGTG
181..200: TTGTCGCTTGCAGCTACGGT

Subsequence to locate: AATA
Matches found at the following indices:
59..62

DNA sequnence:
1.. 40: GTACAGCCACTGTTAGTAGACGGATGCTATTGGGACGCAA
41.. 80: CACATCAGTACACTGCTTGTTCGTAATCGCGTACCCAGCG
81..120: CAAAAGGAGGGGAGGAACCTGCTCAGACTGTCGCTAAAAA
121..160: CGAGCACGTGTCCTTACGCAGTGATGGTAGCGGTCCACGA
161..200: CTTCCACTGGCATAAGGAGAATGTTTAGTAACGCCCCTCA
201..240: TAGGTGCAATTCTACAGGTTAAGGGACCGTGGGATGTTTC
241..280: TATAAAAGTTGAAGAGATTACTAATCCGTCCCGTGCGCGT
281..320: GCCGCAATTTAGCGCCCGTTCTTGAGTAAACATACATGCA
321..360: CGCTCTTGAGTTTTCTAAAACCTGATCAAAACGGTCGCCC
361..400: ACATGCAGGAGCGCCGCAGGGTTTCAGAGGTCAACCATCG
401..440: GCAGCACACGTGAACCCTCTGTACTGACCAGGGGCTTGCT
441..480: CCTTGGTAGGAGATGGTGGAGAATGCGTCGATGCACTGAA
481..520: GCAGACCGCTGATAGCATGTACGATGTTTACGGGTTGACG
521..560: ATAGCTTTGCTAGTGATCGAACATATGATGAAAAACGCTT
561..600: CCATTGATAGAGCATCTTAGGAGCTCAGTCCAGTGACCTC

Subsequence to locate: AGGT
Matches found at the following indices:
202..205
216..219
388..391
</pre>
</pre>
=={{header|jq}}==
{{works with|jq}}
'''Works with gojq, the Go implementation of jq'''


Neither jq nor gojq currently has any PRNG built-ins so one
possibility is to use a jq-coded PRNG function such
as can be found at https://rosettacode.org/wiki/Random_numbers#jq

In practice, it's usually more convenient to use a utility such as
gshuf or jot to provide the source of randomness. Here we use
`jot -r N MIN MAX` but a fourth argument can also be
used to specify a seed. An alternative would be to use `gshuf` along the lines of:
<syntaxhighlight lang="sh">
# For 200 pseudo-random integers in the range 0 to 3 inclusive:
gshuf -i 0-3 -r -n 200 --random-source=/dev/random
</syntaxhighlight>

Note that the indices shown below are offsets (i.e., the index origin is taken to be 0).
<syntaxhighlight lang="sh">
#!/bin/bash

jot -r 200 0 3 | jq -nr --slurpfile four <(jot -r 4 0 3) '

# input: an array of integers
def toDNA:
def base: . as $in | "ACGT" | .[$in : $in+1];
map(base) | join("");

([inputs] | toDNA) as $strand
| ($four | toDNA) as $four
| "Strand of length \($strand|length):",
$strand,
"Zero-based indices of \($four):",
($strand | indices($four) | join(" "))
'</syntaxhighlight>
{{out}}
<pre>
./bioinformatics-subsequence.sh
Strand of length 200:
TGGGCCCAAGCATTGCCACGTAGCTTTGTCAGTGGGCTTGTAAGGGACGAACACAAACTCACAGACCAGGAATTCTCGAGTTCCAGTCCCCCCACTTGTCGCTATTTAGTTAAGACGTTCAGTTTCGTTGCGAACTGTGTCCCCCAGGCTAACGTGATGGGTGTCAGGAATCAATGGCCAACTTTCAGTTAGACTTGACC
Zero-based indices of CAAC:
178

./bioinformatics-subsequence.sh
Strand of length 200:
TAAGACTGCAGGGTACGAAGAGTGGAAGATTGGCTCGTACTTGTCGACGTCGCGTGACATAATCTCTGTGCTCGCCTCGCAGTAAGGGACTAGGTCCCGTTCGAGCGCCCTGCTAGAAGGAGCATCCTACCATGCTCTGATGACATCCTGTCGGCATTAGAGTTTCTACGACATCTAAAGAGTACGATCGACTTCCCAGT
Zero-based indices of GACA:
55 141 169
</pre>
=={{header|Julia}}==
=={{header|Julia}}==
<lang julia>DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))
<syntaxhighlight lang="julia">DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))


DNAsearch(needle, haystack, lap=true) = findall(needle, haystack, overlap=lap)
DNAsearch(needle, haystack, lap=true) = findall(needle, haystack, overlap=lap)
Line 82: Line 576:


println("Search sequence:\n$rand_string\nfor substring $subseq. Found at positions: ")
println("Search sequence:\n$rand_string\nfor substring $subseq. Found at positions: ")
foreach(p -> print(rpad(p[2], 8), p[1] % 10 == 0 ? "\n" : ""), DNAsearch(subseq, rand_string))
foreach(p -> print(rpad(p[2], 8), p[1] % 10 == 0 ? "\n" : ""), enumerate(DNAsearch(subseq, rand_string)))
</lang>{{out}}
</syntaxhighlight>{{out}}
<pre>
<pre>
Search sequence:
Search sequence:
CCGAAGCCAGGAGGACTGAGCGCTTGCGTCCCGAGTTCTGCGACGAGTCTCTTCATTATAAGGCCACTGATTGCGCTCATCATGAGTGCCAGAAGCACCGCTAAACATAAGTGTCCTTTCTTCCTGACGCACTTGAAGATTGTGACCATTTGTGCGGGTTGTGAGTTAGGGGCTCTCATTGTACACGATCTATAGTGTGC
AAGAGTACGTCGCCAGGGAAGCTTCGGAACGTGCCCGGTGCCAAGCGTCACTACGTGGCAGAGTATATTCATGCTGCAAGGAAATTATTAATCGGGTACTGTGCGCAGCTTGTGCCGCGATATTTTGTACTCTCTCGGAATAGGCAACGCTGGATCATGCGTAGACTGTTGATCGGACGAGTTTGTTACGGATAATAAGG
for substring TGTT. Found at positions:
for substring CGCT. Found at positions:
168 185
21:24 74:77 99:102
</pre>
</pre>
=={{header|Nim}}==
<syntaxhighlight lang="nim">import random, sequtils, strutils


proc dnaSequence(n: Positive): string =
## Create a random DNA sequence of length "n".
newSeqWith(n, sample("ACGT")).join()


proc positions(dnaSeq, subSeq: string): seq[int] =
=={{header|Phix}}==
## Return the list of starting positions of a subsequence
Note: match_all() is due to become a builtin in the next release, so the version below may or may not need renaming/deleting before it will run.<br>
## "subSeq" in a sequence "dnaSeq". Positions start at 1.
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".
var start = 0
<lang Phix>constant cheat = false
while true:
let pos = dnaSeq.find(subSeq, start)
if pos < 0: break
result.add pos + 1
start = pos + 1


function grandna(integer len)
string dna = repeat(' ',len)
for i=1 to len do dna[i] = "ACGT"[rand(4)] end for
return dna
end function


when isMainModule:
procedure show(string dna, sequence idx)
idx &= length(dna)+100 -- (add an otherwise unused sentinel)
sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n")
integer ii = 1, -- idx index
i = idx[ii], -- current target
ux = 1, -- underline index (1..4)
ldx = 1 -- line index (1, 51, 101, etc)
for si=1 to length(s) do
printf(1,"%3d: %s\n",{ldx,s[si]})
ldx += 50
if i and i<ldx then
string ul = repeat(' ',59)
while i and i<ldx do
integer up = i-ldx+51 -- underline pos (relative to ldx)
up += floor((up-1)/10)+5 -- (plus any needed spacing)
ul[up] = "<==>"[ux]
ux += 1
i += 1
if ux>4 then
ux = 1
ii += 1
i = idx[ii]
end if
end while
printf(1,"%s\n",ul)
end if
end for
if length(idx) then
string s = iff(length(idx)>1?"s":""),
t = join(apply(idx,sprint),", ")
printf(1,"%s occurs at location%s: %s\n",{test,s,t})
else
printf(1,"%s does not occur\n",{test})
end if
end procedure


const
function match_all(object needle, sequence haystack, bool bOverlap = false)
N = 200
if atom(needle) then return find_all(needle,haystack) end if
integer start = 1
Step = 20

sequence res = {}
randomize()
while 1 do

start = match(needle,haystack,start)
let dnaSeq = dnaSequence(N)
if start=0 then exit end if
echo "DNA sequence:"
res = append(res,start)
for i in countup(0, N - 1, Step):
start += iff(bOverlap?1:length(needle))
echo ($(i+1)).align(3), ' ', dnaSeq[i..i+(Step-1)]
end while

return res
let subSeq = dnaSequence(3)
end function
echo "\nDNA subsequence: ", subSeq

echo()
let pos = dnaSeq.positions(subSeq)
if pos.len == 0:
echo "Subsequence not found."
else:
let tail = if pos.len == 1: ": " else: "s: "
echo "Subsequence found at position", tail, pos.join(", ")</syntaxhighlight>

{{out}}
<pre>DNA sequence:
1 CACATACGATGAGCTGGGCG
21 CCTAAGAGGCGGAAAGACAA
41 CCGTGTGTGTCTAACCCATG
61 GTTTAATTGCAGATAGTCTC
81 TAGACTACAAACATTAGAGC
101 AATGCACCGGGGTGCACGTG
121 TGTTTTGACTTCCCATGAAA
141 GCCCTTATCCTAGAGTACAG
161 TCGGCAAATGTTCGCTCCTT
181 GGCCCACTCCATTTGGACGG

DNA subsequence: GTT

Subsequence found at positions: 61, 122, 170</pre>
=={{header|Perl}}==
<syntaxhighlight lang="perl">use strict;
use warnings;
use feature 'say';

my @bases = <A C G T>;
my $basecnt = 160;

my($string,$target);
$string .= $bases[ int rand @bases ] for 1 .. $basecnt;
$target .= $bases[ int rand @bases ] for 1 .. 4;
say "Target: $target";
say 'Matches at these positions:';
say (($string =~ s/.{1,40}\K/\n/gr) =~ s/($target)/ >$1< /gr);</syntaxhighlight>
{{out}}
<pre>Target: CCTG
Matches at these positions:
9
90
157
TTGCC >CCTG< CAAAGTTAATAAGTAAACAATTAAGTGAGTG
CTCTAGGGTAAGGTGAGGGCGGGAAGGGGAAAAATACCGA
TGCGAG >CCTG< TAGAGCCGGGCCTCAAATTAAACGAAAAAT
ATAAGTTTGCTTGGCACGCTGTACTACTTATCC >CCTG< ACT</pre>
=={{header|Phix}}==
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">cheat</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">grandna</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">len</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">dna</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #000000;">len</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len</span> <span style="color: #008080;">do</span> <span style="color: #000000;">dna</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"ACGT"</span><span style="color: #0000FF;">[</span><span style="color: #7060A8;">rand</span><span style="color: #0000FF;">(</span><span style="color: #000000;">4</span><span style="color: #0000FF;">)]</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">dna</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">show</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">dna</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">test</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">idx</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">deep_copy</span><span style="color: #0000FF;">(</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">)</span> <span style="color: #0000FF;">&</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">100</span> <span style="color: #000080;font-style:italic;">-- (add an otherwise unused sentinel)</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">trim</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">join_by</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">join_by</span><span style="color: #0000FF;">(</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">,</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">10</span><span style="color: #0000FF;">,</span><span style="color: #008000;">""</span><span style="color: #0000FF;">),</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">),</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">5</span><span style="color: #0000FF;">,</span><span style="color: #008000;">" "</span><span style="color: #0000FF;">)),</span><span style="color: #008000;">"\n"</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">ii</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">-- idx index</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">idx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ii</span><span style="color: #0000FF;">],</span> <span style="color: #000080;font-style:italic;">-- current target</span>
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">-- underline index (1..4)</span>
<span style="color: #000000;">ldx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> <span style="color: #000080;font-style:italic;">-- line index (1, 51, 101, etc)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">si</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">s</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%3d: %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">ldx</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">[</span><span style="color: #000000;">si</span><span style="color: #0000FF;">]})</span>
<span style="color: #000000;">ldx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">50</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span> <span style="color: #008080;">and</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><</span><span style="color: #000000;">ldx</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">ul</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">' '</span><span style="color: #0000FF;">,</span><span style="color: #000000;">59</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span> <span style="color: #008080;">and</span> <span style="color: #000000;">i</span><span style="color: #0000FF;"><</span><span style="color: #000000;">ldx</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">up</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">-</span><span style="color: #000000;">ldx</span><span style="color: #0000FF;">+</span><span style="color: #000000;">51</span> <span style="color: #000080;font-style:italic;">-- underline pos (relative to ldx)</span>
<span style="color: #000000;">up</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">floor</span><span style="color: #0000FF;">((</span><span style="color: #000000;">up</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)/</span><span style="color: #000000;">10</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">5</span> <span style="color: #000080;font-style:italic;">-- (plus any needed spacing)</span>
<span style="color: #000000;">ul</span><span style="color: #0000FF;">[</span><span style="color: #000000;">up</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"&lt;==&gt;"</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ux</span><span style="color: #0000FF;">]</span>
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ux</span><span style="color: #0000FF;">></span><span style="color: #000000;">4</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">ii</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">idx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">ii</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">ul</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">)></span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">p</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">)></span><span style="color: #000000;">1</span><span style="color: #0000FF;">?</span><span style="color: #008000;">"s"</span><span style="color: #0000FF;">:</span><span style="color: #008000;">""</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">apply</span><span style="color: #0000FF;">(</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..$-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #7060A8;">sprint</span><span style="color: #0000FF;">),</span><span style="color: #008000;">", "</span><span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s occurs at location%s: %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">p</span><span style="color: #0000FF;">,</span><span style="color: #000000;">t</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">else</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"%s does not occur\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">test</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">dna</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grandna</span><span style="color: #0000FF;">(</span><span style="color: #000000;">200</span><span style="color: #0000FF;">),</span>
string dna = grandna(200),
<span style="color: #000000;">test</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grandna</span><span style="color: #0000FF;">(</span><span style="color: #000000;">4</span><span style="color: #0000FF;">)</span>
test = grandna(4)
<span style="color: #008080;">constant</span> <span style="color: #000000;">cheats</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">cheat</span><span style="color: #0000FF;">?{</span><span style="color: #000000;">9</span><span style="color: #0000FF;">,</span><span style="color: #000000;">13</span><span style="color: #0000FF;">,</span><span style="color: #000000;">49</span><span style="color: #0000FF;">,</span><span style="color: #000000;">60</span><span style="color: #0000FF;">,</span><span style="color: #000000;">64</span><span style="color: #0000FF;">,</span><span style="color: #000000;">68</span><span style="color: #0000FF;">}:{})</span>
constant cheats = iff(cheat?{9,13,49,60,64,68}:{})
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">cheats</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
for i=1 to length(cheats) do
<span style="color: #000000;">dna</span><span style="color: #0000FF;">[</span><span style="color: #000000;">cheats</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]..</span><span style="color: #000000;">cheats</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]+</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">test</span>
dna[cheats[i]..cheats[i]+3] = test
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
end for
<span style="color: #004080;">sequence</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">)</span>
sequence idx = match_all(test,dna)
<span style="color: #000000;">show</span><span style="color: #0000FF;">(</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">,</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">)</span>
show(dna,idx)</lang>
<!--</syntaxhighlight>-->
{{out}}
{{out}}
with cheat enabled
with cheat enabled
Line 180: Line 744:
GCTA does not occur
GCTA does not occur
</pre>
</pre>
=={{header|Python}}==


{{works with|Python|3.8}}
{{libheader|regex}}

<syntaxhighlight lang="python">
from random import choice
import regex as re
import time

def generate_sequence(n: int ) -> str:
return "".join([ choice(['A','C','G','T']) for _ in range(n) ])

def dna_findall(needle: str, haystack: str) -> None:

if sum(1 for _ in re.finditer(needle, haystack, overlapped=True)) == 0:
print("No matches found")
else:
print(f"Found {needle} at the following indices: ")
for match in re.finditer(needle, haystack, overlapped=True):
print(f"{match.start()}:{match.end()} ")

dna_seq = generate_sequence(200)
sample_seq = generate_sequence(4)

c = 1
for i in dna_seq:
print(i, end="") if c % 20 != 0 else print(f"{i}")
c += 1
print(f"\nSearch Sample: {sample_seq}")

dna_findall(sample_seq, dna_seq)
</syntaxhighlight>
{{out}}

<pre>
TTGCCCCTGTACTGAGCCCA
TAAGCTTGCACTCAAGGTTT
TGCCCCCTCATATTATAACG
CATCCATTATACAAAACCGA
TACCCTTCCGCATATTATGA
AAAGTGGCGAAGTGCCTTGA
TTTGCATTCATAGTACAACG
GTGCAAAAGCATTGTATGTC
TCACATTTACATGGGAAATG
CCTAGTAGGTGCAAGACCTG

Search Sample: TACA
Found TACA at the following indices:
69:73
133:137
167:171
</pre>
=={{header|Racket}}==

<syntaxhighlight lang="racket">#lang racket

(define (rand-seq n)
(build-string n (lambda _ (string-ref "TGAC" (random 4)))))

(define (subsequence-indices full part)
(let ((part-length (string-length part)) (full-length (string-length full)))
(for/list ((i (- full-length part-length))
#:when (for/and ((p part) (f (in-string full i))) (eq? p f)))
(cons i (+ i part-length -1)))))

(define (report-sequence s (l 50))
(string-join (for/list ((i (in-range 0 (string-length s) l)))
(format "~a: ~a" (~a #:width 4 i)
(substring s i (min (string-length s) (+ i l)))))
"\n"))
(define (Bioinformatics/Subsequence (full (rand-seq 400)) (sub (rand-seq 4)))
(printf "Indices of ~a in~%~a~%~a~%"
sub (report-sequence full) (subsequence-indices full sub)))

(module+ main (for ((i 4)) (Bioinformatics/Subsequence)))</syntaxhighlight>

{{out}}
<pre>Indices of TTAC in
0 : TTATCCTACCGCGTAAGTTCAATGCTCACCGCAGTTTGCTAACCGTTCCT
50 : AAATTCACTTCCTAAGGTATCTTTCGCTTAATTGATGCCGATTGAATTCC
100 : ACGGAGGGCGTAATTGTTTCGGACTTTAGACCTGACATAAGGGCACACTA
150 : GTCCTATTGAATTTGGTGCTATTCGGCGACCTACTAACCTTAGTCAGTGA
200 : AGAGCCATCTCAAAAGTACAGTCATCCTCAAGTGTTACATACGGCACCAT
250 : GACAGTGTATAAGCATGGAGGTTGGCCTATCGTCATATCGAGGCGGCGCC
300 : ATAGACCGGCCAGGTGATGAGATCGACTTTAATGTTGTTGCTTAGCTTGA
350 : CCTCTAGTTTGGATTAAGACGGTCATAGATAGATAGACCGTAAAGTATTC
((234 . 237))
Indices of GTAA in
0 : GTCAGTCCACGCAAGAATAGCAGTTGAGTGGACAATTTATGAGACGGAGA
50 : TAAGTAACCCGCTCCGAGATAAACGTCAGCCGGATTCCGCTGAGTCGGTC
100 : GCCTTCCAAGTGGCAGCTTGTTTGCATTGCTTACAGTGACTTGAACGATC
150 : ACCTACTCGAGGACTCTGCGGGTATTCCAGTTGCCTTGCACTCAGCGATG
200 : CACAAACTTTAAATTATCACAGAAAGAATGTGATTCGGGTGGTCACCCTT
250 : ATCGGTGAAACCAGTCCTTCCATGGGCATATTCTGCGTCGAAATGAGCCC
300 : GCTGTTTACGTTGTACGAACTGGGGACCTAAGGAAACGGGCCGTTCTTAG
350 : GTGATGTCAGCTGCAACGAACTACTGTTAACCTTCTCGATCTGTTGAAAA
((53 . 56))
Indices of AACG in
0 : TTTACAGTACGATTCCGAAGACACAAGAATGCGCCGGCTGTGGGTAGGGG
50 : CGACCCTGCGCGACCTATAAAAGGGGCGACTCAATTTTAGGCCCACCACG
100 : GACCCAGCCCTGTGCACAGAGCGGGGCATTTTTACCTCGCGTGCGCACCA
150 : ACTGCGATCTGCCTTGTCACATAATCCCACATACGAGTTGTATCTCTAAG
200 : AAGGGATGAGGCCAATTTAAATCCGGGTGCATTTCTCGGGGGGAGACACC
250 : AATGAGAGTGGGGCAAGGTGGCGTAGAGAGCTAATCGGGTTTTATGACCG
300 : CGGAAGACCTGGGATACGTCTGGGTGATAACTGAGGGCAGGTCAACGAAC
350 : CCTGATGCGTAGCCACGTCTCAGCTATCGGGCCTGTTTTCATAGTCCATG
((343 . 346))
Indices of CAGC in
0 : TGTGAACCACTATGACACGCTACACGCCTCAAGTTGGCCCCCATATAAGA
50 : ATATCCATCGGTTAATGTGTCTCGCGGCCGTTAGAACAAGCACTAAAGTT
100 : AGAGAAACCAACCATTGGACTAGATCAACATCAACGTCGCTGATAATAAA
150 : TGTATATCTGATGTGGCCGTTCATAAAATCGTTAACTACAGGTATCAACA
200 : TAGTCTCCCAACTTATATAATTGGTTAACTTAGGAGGAGCTTGCACAGCT
250 : CAGCTATATGCTATCTGGCCCTGGGCTTGGTAGGCATCACGTCGTTATGC
300 : TGCGAACATCTCAAAGACAAACGTTGATCCAGCCCCTAGAGAGGTCATTA
350 : GGCCTCGACCCAATTTAACCTCCCACTCCGTGGGTACAGCTTGAACCCCC
((245 . 248) (250 . 253) (329 . 332) (386 . 389))</pre>
=={{header|Raku}}==
=={{header|Raku}}==
Chances are actually pretty small that a random 4 codon string will show up at all in a random 200 codon sequence. Bump up the sequence size to get a reasonable chance of multiple matches.
Chances are actually pretty small that a random 4 codon string will show up at all in a random 200 codon sequence. Bump up the sequence size to get a reasonable chance of multiple matches.
<lang perl6>use String::Splice:ver<0.0.3>;
<syntaxhighlight lang="raku" line>use String::Splice:ver<0.0.3+>;


my $line = 80;
my $line = 80;
Line 204: Line 886:
}
}


say $disp;</lang>
say $disp;</syntaxhighlight>
{{out}}
{{out}}
Show in custom div to better display highlighting.
Show in custom div to better display highlighting.
Line 233: Line 915:
:* &nbsp; DNA proteins to be searched in the data &nbsp; &nbsp; &nbsp; &nbsp; (the default is four unique random proteins).
:* &nbsp; DNA proteins to be searched in the data &nbsp; &nbsp; &nbsp; &nbsp; (the default is four unique random proteins).
:* &nbsp; the seed for the RANDOM function so runs can be repeated with the same data &nbsp; &nbsp; (no default).
:* &nbsp; the seed for the RANDOM function so runs can be repeated with the same data &nbsp; &nbsp; (no default).
<lang rexx>/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
<syntaxhighlight lang="rexx">/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
if totLen=='' | totLen=="," then totLen= 200 /*Not specified? Then use the default.*/
if totLen=='' | totLen=="," then totLen= 200 /*Not specified? Then use the default.*/
Line 278: Line 960:
end /*j*/
end /*j*/
if $\=='' then say right(idx, 7)"│" strip($, 'T') /*show residual protein data*/
if $\=='' then say right(idx, 7)"│" strip($, 'T') /*show residual protein data*/
say; return</lang>
say "───────┴"center('' , oWidth+10, '─')
say; return</syntaxhighlight>
{{out|output|text=&nbsp; when using the default inputs:}}
{{out|output|text=&nbsp; when using the default inputs:}}
<pre>
<pre>
Line 285: Line 968:
1│ TTTTTAGCG CGTTTTGTAG CGCTCTAAAA ACCGTAGCTA TATTTCTCGA AGTTTCACCC AGCTCTTTTG CCCCAGGGTT GCGCTAAGCC CAGCTTCGAG
1│ TTTTTAGCG CGTTTTGTAG CGCTCTAAAA ACCGTAGCTA TATTTCTCGA AGTTTCACCC AGCTCTTTTG CCCCAGGGTT GCGCTAAGCC CAGCTTCGAG
101│ GGGGCACAG GTAAAATACT ACCGTCCGTG GAGGGGGATG AATTGACCCG ACATTTTTTG AAGCATAACT CGTGACTCAA TATTGCATGA TTACACCAGC
101│ GGGGCACAG GTAAAATACT ACCGTCCGTG GAGGGGGATG AATTGACCCG ACATTTTTTG AAGCATAACT CGTGACTCAA TATTGCATGA TTACACCAGC
───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────


base DNA proteins used: ACGT
base DNA proteins used: ACGT
Line 306: Line 990:
801│ CCTATCAGT CCAATCCCAC GGGGAGGGCA CTCGCGCAAT TCATTCAAAG AGGGCCATTT GCCGATATAA GGTCCATCAT CGGGAGGAAT ATGACTCCTG
801│ CCTATCAGT CCAATCCCAC GGGGAGGGCA CTCGCGCAAT TCATTCAAAG AGGGCCATTT GCCGATATAA GGTCCATCAT CGGGAGGAAT ATGACTCCTG
901│ TTAGTATTA GAGCAGCCTC GCTGCGTACT ACTGTCAGTG GCCCGTCAGG GAAGGCAAAA CGTTTTTCCT CTAGGAATCC GTCAATTGGA CTTCTAGACT
901│ TTAGTATTA GAGCAGCCTC GCTGCGTACT ACTGTCAGTG GCCCGTCAGG GAAGGCAAAA CGTTTTTCCT CTAGGAATCC GTCAATTGGA CTTCTAGACT
───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────


base DNA proteins used: ACGT
base DNA proteins used: ACGT
Line 312: Line 997:
the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963
the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963
</pre>
</pre>

=={{header|Ring}}==
=={{header|Ring}}==
<lang ring>
<syntaxhighlight lang="ring">
/*-----------------------------------
load "consolecolors.ring"
# Project : DNA subsequences
# Date : 2021/03/23
# Author : Gal Zsolt (~ CalmoSoft ~)
# Email : <calmosoft@gmail.com>
-----------------------------------*/


//-----------------------------------------
row = 0

load "stdlibcore.ring"
load "guilib.ring"

start = 0
base = ["A","C","G","T"]
dnaList = []
dnaList = []
dnaSeq = []
dnaSeq = []
ColLine = list(21)
base = ["A","C","G","T"]
long = 20
plus = 0
see "DNA sequence:" + nl + nl
see " 12345678901234567890" + nl
see " " + long + ": "


C_Spacing = 2
for nr = 1 to 200
row = row + 1
rnd = random(3)+1
baseStr = base[rnd]
see baseStr
plusLine()
add(dnaList,baseStr)
next
see nl+ " 12345678901234567890" + nl


C_ButtonDnaStyle = ' background-color: Red; border-radius: 8px;'
strDna = list2str(dnaList)
C_ButtonStyle = '"background-color:white"; border-radius: 8px;'
strDna = substr(strDna,nl,"")
Button = newlist(10,20)
LayoutButtonRow = list(10)


//-----------------------------------------
while true
strBase = ""
for n = 1 to 4
rnd = random(3)+1
strBase = strBase + base[rnd]
next
ind = substr(strDna,strBase)
if ind > 0
exit
ok
end


app = new qApp
see nl + "subsequence to search: " + strBase + nl
{
win = new qWidget() {
setWindowTitle('DNA subsequences')
setWinIcon(self,AppFile("white.jpg"))
setStyleSheet('background-color:White')
setgeometry(560,180,300,300)
//reSize(400,400)
winheight = 10
fontSize = 8 # + (winheight / 100)


LayoutButtonMain = new QVBoxLayout()
seqok = 0
LayoutButtonMain.setSpacing(C_Spacing)
see "start positions of subsequence : "
LayoutButtonMain.setContentsmargins(0,0,0,0)


LabelInd = new qLabel(win) { settext(" DNA subsequences start positions:")
for n = 1 to 196
setAlignment(Qt_AlignHCenter | Qt_AlignVCenter)
flag = 1
setStyleSheet("background-color:yellow") }
for m = 0 to 3
if dnaList[n+m] != strBase[m+1]
flag = 0
exit
ok
next
if flag = 1
add(dnaSeq,n)
seqok = 1
see "" + n + " "
ok
next


ButtonInd = new QPushButton(win) { setStyleSheet("background-color:yellow") }
if seqok = 0
see "sequence not found" + nl
ok


LabelFind = new qLabel(win) { settext(" DNA subsequence to find:")
row = 0
setStyleSheet("background-color:yellow") }
showDna(dnaList)


ButtonFind = new QPushButton(win)
func showDna(dnaList)

long = 20
DnaSearch = new QPushButton(win) { setclickevent("pstart()")
see nl + "found subsequences:" + nl + nl
setStyleSheet("background-color:yellow")
see " 12345678901234567890" + nl
settext("Find")
see " " + long + ": "
}
for nr = 1 to len(dnaList)
if plus = 0
row = row + 1
for Col = 1 to 21
ColLine[Col] = new qLabel(win) {
setmaximumheight(20)
setAlignment(Qt_AlignHCenter | Qt_AlignVCenter)
setStyleSheet("background-color:darkgray")
setText(string(Col-1))
}
next

LayoutInd = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }
LayoutInd.AddWidget(LabelInd)
LayoutInd.AddWidget(ButtonInd)
LayoutButtonMain.AddLayout(LayoutInd)
LayoutTitleRow = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }

for Col = 1 to 21
LayoutTitleRow.AddWidget(ColLine[Col])
next
LayoutButtonMain.AddLayout(LayoutTitleRow)
RowLine = list(10)

for Row = 1 to 10
Letter = "" + Row*20
if Row*20 < 100
Letter = " " + Row*20
ok
RowLine[Row] = new qLabel(win) { setFont(new qFont("Verdana",fontSize,40,0))
setAlignment(Qt_AlignHCenter | Qt_AlignVCenter)
setStyleSheet("background-color:darkgray")
setText(Letter)
}
next

for Row = 1 to 10
LayoutButtonRow[Row] = new QHBoxLayout()
{
setSpacing(C_Spacing)
setContentsmargins(0,0,0,0)
}

LayoutButtonRow[Row].AddWidget(RowLine[Row])
for Col = 1 to 20
Button[Row][Col] = new QPushButton(win) {
setmaximumwidth(20)
}
LayoutButtonRow[Row].AddWidget(Button[Row][Col])
next
LayoutButtonMain.AddLayout(LayoutButtonRow[Row])
next

LayoutDataRow = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }

LayoutDataRow.AddWidget(LabelFind)
LayoutDataRow.AddWidget(ButtonFind)
LayoutDataRow.AddWidget(DnaSearch)
LayoutButtonMain.AddLayout(LayoutDataRow)
setLayout(LayoutButtonMain)
pStart()
show()
}
exec()
}

//-----------------------------------------

func pStart()
start = start + 1

dnaList = []
for row = 1 to 10
for col = 1 to 20
Button[row][col].settext("")
next
next
for nr = 1 to 200
rnd = random(3)+1
baseStr = base[rnd]
row = ceil(nr/20)
col = nr%20
if col = 0
col = 20
ok
ok
if plus = 1
Button[row][col].settext(baseStr)
nr = nr + 3
add(dnaList,baseStr)
next
row = row + 1

plusLine()
startDna()

//-----------------------------------------

func startDna()

strDna = list2str(dnaList)
strDna = substr(strDna,nl,"")

while true
strBase = ""
for n = 1 to 4
rnd = random(3)+1
strBase = strBase + base[rnd]
next
ind = substr(strDna,strBase)
if ind > 0
exit
ok
end

showDna(dnaList)

//-----------------------------------------

func showDna(dnaList)


if start > 1
see nl
for n = 1 to len(dnaSeq)
for m = 0 to 3
ind = dnaSeq[n] + m
row = ceil(ind/20)
col = ind%20
if col = 0
col = 20
ok
Button[row][col].setstylesheet(C_ButtonStyle)
next
next
ok


dnaSeq = []
strDna = list2str(dnaList)
strDna = substr(strDna,nl,"")

while true
strBase = ""
for n = 1 to 4
rnd = random(3)+1
strBase = strBase + base[rnd]
next
ind = substr(strDna,strBase)
if ind > 0
exit
ok
end

ButtonFind.setStyleSheet("background-color:yellow")
ButtonFind.settext(strBase)

for n = 1 to 196
flag = 1
for m = 0 to 3
if dnaList[n+m] != strBase[m+1]
flag = 0
exit
ok
next
if flag = 1
add(dnaSeq,n)
ok
ok
next
temp = ""
ButtonInd.settext("")
for nr = 1 to len(dnaList)
ind = find(dnaSeq,nr)
ind = find(dnaSeq,nr)
if ind > 0
if ind > 0
temp = temp + string(dnaSeq[ind]) + " "
ButtonInd.settext(temp)
for n = nr to nr + 3
for n = nr to nr + 3
cc_print(CC_BG_DARK_RED | CC_FG_WHITE,dnaList[n])
row = ceil(n/20)
if n != nr
col = n%20
row = row + 1
if col = 0
ok
col = 20
plusLine()
ok
Button[row][col].setStyleSheet(C_ButtonDnaStyle)
Button[row][col].settext(dnaList[n])
next
next
plus = 1
if (row%20) = 0
row = row + 1
nr = nr + 1
ok
else
plus = 0
see dnaList[nr]
ok
ok
plusLine()
next
next
see nl+ " 12345678901234567890" + nl


//-----------------------------------------
func plusLine()
</syntaxhighlight>
if (row%20) = 0 and long < 200
long = long + 20
see nl
if long < 100
see " " + long + ": "
else
see "" + long + ": "
ok
ok
</lang>
{{out}}
<pre>
DNA sequence:


'''Output:'''
12345678901234567890
20: CAGTAAATAAGGAGAACAGG
40: GATCTATCTGCGCAGTTGTT
60: CAAATCAAGAGGAAAAAGTT
80: AAATCCAACACGGTAGGATG
100: CATTGAAAGGTTGCGTAAGA
120: AAAAAGGAGGGAAATGATCG
140: AAACAAAGTACGTCAATTAG
160: ATGCCAAAGACCGATAAAAG
180: GTATTAGTATTAGAGCAGCG
200: AATGAGGAAGACTTCGAGAA
12345678901234567890

subsequence to search: AAGA
start positions of subsequence : 47 97 147 188
found subsequences:

12345678901234567890
20: CAGTAAATAAGGAGAACAGG
40: GATCTATCTGCGCAGTTGTT
60: CAAATCAAGAGGAAAAAGTT
80: AAATCCAACACGGTAGGATG
100: CATTGAAAGGTTGCGTAAGA
120: AAAAGGAGGGAAATGATCG
140: AAACAAAGTACGTCAATTAG
160: ATGCCAAAGACCGATAAAAG
180: GTATTAGTATTAGAGCAGCG
200: AATGAGGAAGACTTCGAGAA
12345678901234567890
</pre>


[https://i.imgur.com/5hhbRBK.mp4 Bioinformatics/Subsequence - video]
=={{header|Wren}}==
=={{header|Wren}}==
{{libheader|Wren-pattern}}
{{libheader|Wren-pattern}}
{{libheader|Wren-str}}
{{libheader|Wren-str}}
{{libheader|Wren-fmt}}
{{libheader|Wren-fmt}}
<lang ecmascript>import "random" for Random
<syntaxhighlight lang="wren">import "random" for Random
import "/pattern" for Pattern
import "./pattern" for Pattern
import "/str" for Str
import "./str" for Str
import "/fmt" for Fmt
import "./fmt" for Fmt


var rand = Random.new()
var rand = Random.new()
Line 501: Line 1,291:
findDnaSubsequence.call(200, 20)
findDnaSubsequence.call(200, 20)
System.print()
System.print()
findDnaSubsequence.call(600, 40)</lang>
findDnaSubsequence.call(600, 40)</syntaxhighlight>


{{out}}
{{out}}

Revision as of 09:41, 8 November 2023

Bioinformatics/Subsequence is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Randomly generate a string of   200   DNA bases   (represented by   A,  C,  G,  and  T).

Write a routine to find all the positions of a randomly generated subsequence   (four letters).

11l

Translation of: Python
UInt32 seed = 34
F nonrandom_choice(lst)
   :seed = (1664525 * :seed + 1013904223) [&] FFFF'FFFF
   R lst[Int(:seed >> 16) % lst.len]

F generate_sequence(Int n)
   R ((0 .< n).map(_ -> nonrandom_choice([‘A’, ‘C’, ‘G’, ‘T’]))).join(‘’)

F positions(dnaSeq, subSeq)
   [Int] r
   V start = 0
   L
      V? pos = dnaSeq.find(subSeq, start)
      I pos == N
         L.break
      r.append(pos)
      start = pos + 1
   R r

F dna_findall(String needle, haystack) -> N
   V pp = positions(haystack, needle)
   I pp.empty
      print(‘No matches found’)
   E
      print(‘Found ’needle‘ at the following indices:’)
      L(p) pp
         print(p‘:’(p + needle.len))

V dna_seq = generate_sequence(200)
V sample_seq = generate_sequence(4)

V c = 1
L(i) dna_seq
   I c % 20 != 0 {print(i, end' ‘’)} E print(i)
   c++
print("\nSearch Sample: "sample_seq)

dna_findall(sample_seq, dna_seq)
Output:
GAAGTGCTCAAACCCTTTTT
CCTTGCCGTAGGTTGTGCTG
CCGCCGCACACCCGCAACAG
CTTTTAGGCATAAGTATACG
GACCGCGGACGGGGCGTAAC
GGTGAACATTTTGCTAAATT
GGCTCTAGGGATGAGCCCTA
TAGCGCTGGGGACTACGCCC
CGGTAAAGATCGAGGCGACT
CACCGATTGCGCTAGGGACA

Search Sample: CGTA
Found CGTA at the following indices:
26:30
94:98

Action!

DEFINE SEQLEN="200"
DEFINE SUBLEN="4"

PROC RandomSeq(CHAR ARRAY s BYTE len)
  CHAR ARRAY letters="ACGT"
  BYTE i

  FOR i=1 TO len
  DO
    s(i)=letters(Rand(4)+1)
  OD
  s(0)=len
RETURN

PROC PrintSeq(CHAR ARRAY s)
  BYTE i

  FOR i=1 TO s(0)
  DO
    IF i MOD 20=1 THEN
      IF i<10 THEN Put(32) FI
      IF i<100 THEN Put(32) FI
      PrintB(i)
      Print(": ")
    FI
    Put(s(i))
    IF i MOD 20=0 THEN
      PutE()
    FI
  OD  
RETURN

BYTE FUNC StartsWith(CHAR ARRAY s,prefix BYTE start)
  BYTE i

  FOR i=1 TO prefix(0)
  DO
    IF s(start+i-1)#prefix(i) THEN
      RETURN (0)
    FI
  OD
RETURN (1)

PROC Main()
  CHAR ARRAY seq(SEQLEN+1),sub(SUBLEN+1)
  BYTE i,notfirst

  RandomSeq(seq,SEQLEN)
  RandomSeq(sub,SUBLEN)

  PrintE("Search sequence:")
  PrintSeq(seq)
  PutE()
  PrintF("Subsequence to find: %S%E%E",sub)

  PrintE("Found subsequence at positions:")
  notfirst=0
  FOR i=1 TO SEQLEN-SUBLEN
  DO
    IF StartsWith(seq,sub,i) THEN
      IF notfirst THEN
        Print(", ")
      FI
      notfirst=1
      PrintF("%I-%I",i,i+SUBLEN-1)
    FI
  OD
  IF notfirst=0 THEN
    PrintE("Not found")
  FI
RETURN
Output:

Screenshot from Atari 8-bit computer

Search sequence:
  1: CGACTCAGGAAGGCCACGTG
 21: GTAACTTCTTAGTTACCGTA
 41: AGGCTAATAGCTAGCGCTGC
 61: GTGACCAGGCATAGTAACCG
 81: GCACGCACGTTCACCAAGGG
101: GTCCCGATGGGAGGCACGTT
121: ACTACTCCAAGAACTGTAGT
141: AAGTTACCGAAAAGTTCTCA
161: TCCTTGGGTAGTGAGTACTT
181: TGTGCTATGAAAAATAAGGA

Subsequence to find: ACGC

Found subsequence at positions:
83-86

Ada

with Ada.Text_Io;
with Ada.Strings.Fixed;
with Ada.Numerics.Discrete_Random;

procedure Sub_Sequence is

   type Nucleotide is (A, C, G, T);

   function To_Character (N : Nucleotide) return Character
   is (case N is
          when A => 'A', when C => 'C',
          when G => 'G', when T => 'T');

   package Random_Nucleotide is new Ada.Numerics.Discrete_Random (Nucleotide);
   use Random_Nucleotide;

   package Position_Io is new Ada.Text_Io.Integer_Io (Natural);
   use Ada.Text_Io;

   procedure Put_Bases (Seq : String; Width : Positive) is
      First : Natural := Seq'First;
   begin
      while First < Seq'Last loop
         declare
            Last : constant Natural :=
              Natural'Min (First + Width - 1, Seq'Last);
         begin
            Position_Io.Put (First); Put ("..");
            Position_Io.Put (Last);  Put (" ");
            Put (Seq (First .. Last));
            New_Line;
            First := Last + 1;
         end;
      end loop;
   end Put_Bases;

   Gen       : Generator;
   Sequence  : String (1 .. 405);
   Substring : String (1 ..   4);
   Pos       : Natural := 0;
begin
   Position_Io.Default_Width := 3;

   Reset (Gen);

   Sequence  := (others => To_Character (Random (Gen)));
   Substring := (others => To_Character (Random (Gen)));

   Put_Line ("Search sequence:");
   Put_Bases (Sequence, Width => 50);
   New_Line;

   Put ("Substring to search: ");
   Put (Substring);
   New_Line;

   loop
      Pos := Ada.Strings.Fixed.Index (Sequence, Substring, Pos + 1);
      exit when Pos = 0;
      Put ("Found at position: ");
      Position_Io.Put (Pos); Put ("..");
      Position_Io.Put (Pos + Substring'Length - 1);
      New_Line;
   end loop;
end Sub_Sequence;
Output:
Search sequence:
  1.. 50 CCTACGGAAAAGTGATAAGGACAGATACATAATCCTAAAACCCTGGAAAA
 51..100 CTTGTCTCGCCAGAGTAGGGCTCGGCAGGGGGGGCAGTGTTTTAAAACGT
101..150 CAGAGAATAGGCTCTACCTTGTTAGACTGCGAGTACTGGAGCGTAGTTCC
151..200 TATATTGCAAGCTGCTACAGTAAGTATCAAAGTATGCCACACATCCTTCT
201..250 ACAACCGGATTGGTTGCCCAGTAGAAGGCTCGTAGTCACCGGACACGCTG
251..300 TTCTTAAGGTCGGTAAGCTATTACGTCCATGGGAGATTCTCAAGGGTGCG
301..350 TTAGCGGACCCCCGTTACGTCCACGTATCTTCCGTCCAACTACCCCCTAA
351..400 TGTCATTGACATCGCCCGAGTATTTAATTTATTTGAACGGCACCAATTTA
401..405 GAGCT

Substring to search: TATT
Found at position: 153..156
Found at position: 269..272
Found at position: 371..374
Found at position: 380..383

Arturo

bases: [`A` `G` `C` `T`]
randSeq: join map 1..200 => [sample bases]
randSub: join map 1..4 => [sample bases]

idx: 0

print "Random sequence:"
print join.with:"\n" split.every: 20 randSeq
print ""

print "Looking for subsequence:"
print randSub
print ""

while [(size randSeq) > idx + 4][
    if prefix? slice randSeq idx idx+4 randSub ->
        print ["Found subsequence at position:" idx]
    idx: idx + 1
]
Output:
Random sequence:
CACGCGCGTTAACCCTGCAT
CTTTTCTCTAAGATGATGCG
CTACTCTGCCCGATTACTAT
GATGTCACCGGCGGTTCGGC
GACTGGCGCTGGCAGAAAGC
GCATGTCAAATTGCCCCAGT
GTGCAAGTCCAAGTATTAGT
GAGGTGCTCCGCTTCGTCCG
GGGTCGACTCGGTCCCACTT
CATTACATGTTGGTAATAGT

Looking for subsequence:
CGGT

Found subsequence at position: 71 
Found subsequence at position: 169

Factor

Works with: Factor version 0.99 2021-02-05
USING: accessors formatting grouping io kernel math
math.functions.integer-logs math.parser random regexp sequences ;

: new-dna ( n -- str ) [ "ACGT" random ] "" replicate-as ;

: pad ( n d -- str ) [ number>string ] dip 32 pad-head ;

:: .dna ( seq n -- )
    seq length integer-log10 1 + :> d seq n group
    [ n * d pad write ": " write write nl ] each-index ;

: .match ( slice -- ) [ from>> ] [ to>> ] bi "%d..%d\n" printf ;

: .matches ( slices -- )
    "Matches found at the following indices:" print
    [ .match ] each ;

: .locate ( slices -- )
    [ "No matches found." print ] [ .matches ] if-empty ;

: .biosub ( dna-size row-size -- )
    [ new-dna dup ] [ .dna nl ] bi*
    4 new-dna dup "Subsequence to locate: %s\n" printf
    <regexp> all-matching-slices .locate ;

80 10 .biosub nl
600 39 .biosub nl
Output:
 0: ATTCAAGGAC
10: CACTATTAAC
20: CTGCATTGTG
30: AGAACTTGCA
40: GTGTACCGAG
50: AGCGAGTTTA
60: AAGCAACACA
70: TCTTTACCGA

Subsequence to locate: GTAG
No matches found.

  0: GATCTCGTCATGGTCCATCCTAACATTTCGGTTGTGGGC
 39: GCATCCCGATAGGCGAAGTTAAATCTACGTAGTCCTACG
 78: TCACGACGGAACATGATTGCCCACCGAAGTCGTAGGCGA
117: GCTAAAGTCGGTACATACACGATCTGCTATATTCGTTCT
156: CCGACACACGACATGCAATCCGAGAAGCTCTCGAAGTGC
195: GGTCAGATCCTCAGACTCGAACAGAGGAGACCTTAACTG
234: ATACCCACAGTACTTCTCGCATAACCTAAGCACCTATGC
273: TTACACCATCGTCCTGATATTGAGTGAGTCTGGTCGGAG
312: ATATTATCTAGCACCCTCAAGCTCTGTGTGCCACACCAG
351: GATTCCACTTCGCGCTTGCCTAGAGAAAGTAGAGTAGGT
390: GGTGTCATTAGTACACTGTTTGCGATGCACCAACCAAAC
429: CCGACCGCCATGATGACTGCTTTTCGGCCAACGTCAGAT
468: TAAGAGTACTTTTAGTAGCACCGCAAGCCAGCCGGTTTA
507: GCAAGATCCTGCAGCCTCCACGTTATTTCAGGTCTCTAA
546: GCGTTCTTTCCATGGAAGTAGTCACCGCTCCCGTTGCCA
585: ATGGACACAGACGTT

Subsequence to locate: ATAT
Matches found at the following indices:
145..149
289..293
312..316

FreeBASIC

Translation of: Wren
Const base_ = "ACGT"

Sub findDnaSubsequence(dnaSize As Integer, chunkSize As Integer)
    Dim As String dnaSeq(1 To dnaSize)
    Dim As Integer i, chunk
    For i = 1 To dnaSize
        dnaSeq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
    Next
    Dim As String dnaStr
    For i = 1 To dnaSize
        dnaStr += dnaSeq(i)
    Next
    Dim As String dnaSubseq(1 To 4)
    For i = 1 To 4
        dnaSubseq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
    Next
    Dim As String dnaSubstr
    For i = 1 To 4
        dnaSubstr += dnaSubseq(i)
    Next
    Print "DNA sequence:"
    For chunk = 1 To Len(dnaStr) Step chunkSize
        Print Using "###_._.###: &"; chunk; chunk+chunkSize-1; Mid(dnaStr, chunk, chunkSize)
    Next
    
    Print !"\nSubsequence to locate: "; dnaSubstr
    Dim As Integer idx = Instr(dnaStr, dnaSubstr)
    Print Iif(idx <> 0, "Matches found at the following indices:", "No matches found.")
    Do While idx > 0
        If idx <> 0 Then Print Using "###_._.###"; idx; idx + 3
        idx = Instr(idx+4, dnaStr, dnaSubstr)
    Loop
End Sub

findDnaSubsequence(200, 20)
Print
findDnaSubsequence(600, 40)

Sleep
Output:
DNA sequence:
  1.. 20: TTATAGTCTTGGAGGCATGT
 21.. 40: TAACTTATGCGGAGCAGACA
 41.. 60: CGGAGTATGCATTCCTCTTA
 61.. 80: CCAAACGGTGCTGCCCGCGC
 81..100: ACTCGCTGTATTCCGTATCG
101..120: TCACATTATCTAAACCACGA
121..140: TTTCCAGCGTGCGTGGGAAG
141..160: GCCATGTTTAGTCGGGGGCC
161..180: AAGGTCTTTGGCTTATGCTG
181..200: TTTTTTTTTCTTCGGTTACA

Subsequence to locate: ATTT
Matches found at the following indices:
120..123

DNA sequence:
  1.. 40: GTGCGGGCCGTTAGCAGCTACGAGTGCTAGATGGAACTAG
 41.. 80: TCCCCGCTCCCAAATGCAAAGCGTCCCAGACCAGTCTTGA
 81..120: AGCCCGTTAAATTACACCTGAACCGTTGCAAATGATCGAT
121..160: AGACGGGGTATAATAGCGGAAAACACAGGGGAACTGCATG
161..200: CAAGCTCGAGCCGCTGAAGGATGGCTCCCCCCCGAGTGTA
201..240: AGTGGATCTCGCCCAAATAGCGGGGGAACAAAGAAAGGTA
241..280: AGTCTTACTTCGCACGTCCCCTCTCATACACGCCAGGACT
281..320: AATGGATCATTCATAGGTGACGGGTGACTTGCGGTGTTTC
321..360: TAGTTGGAGTCACCCGTCAGCTTAGATCTAAGTATGAACC
361..400: GTAAGAGTTTGTAACTGCACCTTCCGTCTCTTCCTCTGTA
401..440: GGAACGCTTTTGCTTGTTATCAGATAGTGTCTCCTTATCA
441..480: TAGGACAGGTTCCTTGTGAAGGTCCACAGAGTTTGCCCGG
481..520: GGTTCGAATATACGACGCTTGTGGTTCCGGCACTATAACT
521..560: TCCGCAGTGTTGTCGACGCCCCTAGCTCCCGGGGTCTTTT
561..600: CGCTTCCCTATAGCGCGAAATGAGTGCAAGGGTACCGGCC

Subsequence to locate: GCAC
Matches found at the following indices:
252..255
377..380
510..513

Go

Translation of: Wren
package main

import (
    "fmt"
    "math/rand"
    "regexp"
    "time"
)

const base = "ACGT"

func findDnaSubsequence(dnaSize, chunkSize int) {
    dnaSeq := make([]byte, dnaSize)
    for i := 0; i < dnaSize; i++ {
        dnaSeq[i] = base[rand.Intn(4)]
    }
    dnaStr := string(dnaSeq)
    dnaSubseq := make([]byte, 4)
    for i := 0; i < 4; i++ {
        dnaSubseq[i] = base[rand.Intn(4)]
    }
    dnaSubstr := string(dnaSubseq)
    fmt.Println("DNA sequnence:")
    for i := chunkSize; i <= len(dnaStr); i += chunkSize {
        start := i - chunkSize
        fmt.Printf("%3d..%3d: %s\n", start+1, i, dnaStr[start:i])
    }
    fmt.Println("\nSubsequence to locate:", dnaSubstr)
    var r = regexp.MustCompile(dnaSubstr)
    var matches = r.FindAllStringIndex(dnaStr, -1)
    if len(matches) == 0 {
        fmt.Println("No matches found.")
    } else {
        fmt.Println("Matches found at the following indices:")
        for _, m := range matches {
            fmt.Printf("%3d..%-3d\n", m[0]+1, m[1])
        }
    }
}

func main() {
    rand.Seed(time.Now().UnixNano())
    findDnaSubsequence(200, 20)
    fmt.Println()
    findDnaSubsequence(600, 40)
}
Output:

Sample run:

DNA sequnence:
  1.. 20: GTTGCCCACACGTCTTATTG
 21.. 40: TAAAAATCACCGTGCAGCGA
 41.. 60: GGTTAAAAATGGTAGGAAAA
 61.. 80: TATCCTCAGCCAGCGGTGCC
 81..100: GGCCAACAAAAGGGACGTTG
101..120: GATTAAAGTAGGTCTAGGTA
121..140: TCTCGTATCCGGTTGATCCG
141..160: GGATGGTGGACGATATTGGA
161..180: GACCGGAGTGTACATCGGTG
181..200: TTGTCGCTTGCAGCTACGGT

Subsequence to locate: AATA
Matches found at the following indices:
 59..62 

DNA sequnence:
  1.. 40: GTACAGCCACTGTTAGTAGACGGATGCTATTGGGACGCAA
 41.. 80: CACATCAGTACACTGCTTGTTCGTAATCGCGTACCCAGCG
 81..120: CAAAAGGAGGGGAGGAACCTGCTCAGACTGTCGCTAAAAA
121..160: CGAGCACGTGTCCTTACGCAGTGATGGTAGCGGTCCACGA
161..200: CTTCCACTGGCATAAGGAGAATGTTTAGTAACGCCCCTCA
201..240: TAGGTGCAATTCTACAGGTTAAGGGACCGTGGGATGTTTC
241..280: TATAAAAGTTGAAGAGATTACTAATCCGTCCCGTGCGCGT
281..320: GCCGCAATTTAGCGCCCGTTCTTGAGTAAACATACATGCA
321..360: CGCTCTTGAGTTTTCTAAAACCTGATCAAAACGGTCGCCC
361..400: ACATGCAGGAGCGCCGCAGGGTTTCAGAGGTCAACCATCG
401..440: GCAGCACACGTGAACCCTCTGTACTGACCAGGGGCTTGCT
441..480: CCTTGGTAGGAGATGGTGGAGAATGCGTCGATGCACTGAA
481..520: GCAGACCGCTGATAGCATGTACGATGTTTACGGGTTGACG
521..560: ATAGCTTTGCTAGTGATCGAACATATGATGAAAAACGCTT
561..600: CCATTGATAGAGCATCTTAGGAGCTCAGTCCAGTGACCTC

Subsequence to locate: AGGT
Matches found at the following indices:
202..205
216..219
388..391

jq

Works with: jq

Works with gojq, the Go implementation of jq

Neither jq nor gojq currently has any PRNG built-ins so one possibility is to use a jq-coded PRNG function such as can be found at https://rosettacode.org/wiki/Random_numbers#jq

In practice, it's usually more convenient to use a utility such as gshuf or jot to provide the source of randomness. Here we use `jot -r N MIN MAX` but a fourth argument can also be used to specify a seed. An alternative would be to use `gshuf` along the lines of:

# For 200 pseudo-random integers in the range 0 to 3 inclusive:
gshuf -i 0-3 -r -n 200 --random-source=/dev/random

Note that the indices shown below are offsets (i.e., the index origin is taken to be 0).

#!/bin/bash

jot -r 200 0 3 | jq -nr --slurpfile four <(jot -r 4 0 3) '

 # input: an array of integers
 def toDNA: 
   def base: . as $in | "ACGT" | .[$in : $in+1];
   map(base) | join("");

 ([inputs] | toDNA) as $strand
 | ($four  | toDNA) as $four
 | "Strand of length \($strand|length):",
   $strand,
   "Zero-based indices of \($four):",
   ($strand | indices($four) | join(" "))
'
Output:
./bioinformatics-subsequence.sh
Strand of length 200:
TGGGCCCAAGCATTGCCACGTAGCTTTGTCAGTGGGCTTGTAAGGGACGAACACAAACTCACAGACCAGGAATTCTCGAGTTCCAGTCCCCCCACTTGTCGCTATTTAGTTAAGACGTTCAGTTTCGTTGCGAACTGTGTCCCCCAGGCTAACGTGATGGGTGTCAGGAATCAATGGCCAACTTTCAGTTAGACTTGACC
Zero-based indices of CAAC:
178

./bioinformatics-subsequence.sh
Strand of length 200:
TAAGACTGCAGGGTACGAAGAGTGGAAGATTGGCTCGTACTTGTCGACGTCGCGTGACATAATCTCTGTGCTCGCCTCGCAGTAAGGGACTAGGTCCCGTTCGAGCGCCCTGCTAGAAGGAGCATCCTACCATGCTCTGATGACATCCTGTCGGCATTAGAGTTTCTACGACATCTAAAGAGTACGATCGACTTCCCAGT
Zero-based indices of GACA:
55 141 169

Julia

DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))

DNAsearch(needle, haystack, lap=true) =  findall(needle, haystack, overlap=lap)

const rand_string = DNArand(200)
const subseq = DNArand(4)

println("Search sequence:\n$rand_string\nfor substring $subseq. Found at positions: ")
foreach(p -> print(rpad(p[2], 8), p[1] % 10 == 0 ? "\n" : ""), enumerate(DNAsearch(subseq, rand_string)))
Output:
Search sequence:
CCGAAGCCAGGAGGACTGAGCGCTTGCGTCCCGAGTTCTGCGACGAGTCTCTTCATTATAAGGCCACTGATTGCGCTCATCATGAGTGCCAGAAGCACCGCTAAACATAAGTGTCCTTTCTTCCTGACGCACTTGAAGATTGTGACCATTTGTGCGGGTTGTGAGTTAGGGGCTCTCATTGTACACGATCTATAGTGTGC
for substring CGCT. Found at positions:
21:24   74:77   99:102

Nim

import random, sequtils, strutils

proc dnaSequence(n: Positive): string =
  ## Create a random DNA sequence of length "n".
  newSeqWith(n, sample("ACGT")).join()

proc positions(dnaSeq, subSeq: string): seq[int] =
  ## Return the list of starting positions of a subsequence
  ## "subSeq" in a sequence "dnaSeq". Positions start at 1.
  var start = 0
  while true:
    let pos = dnaSeq.find(subSeq, start)
    if pos < 0: break
    result.add pos + 1
    start = pos + 1


when isMainModule:

  const
    N = 200
    Step = 20

  randomize()

  let dnaSeq = dnaSequence(N)
  echo "DNA sequence:"
  for i in countup(0, N - 1, Step):
    echo ($(i+1)).align(3), ' ', dnaSeq[i..i+(Step-1)]

  let subSeq = dnaSequence(3)
  echo "\nDNA subsequence: ", subSeq

  echo()
  let pos = dnaSeq.positions(subSeq)
  if pos.len == 0:
    echo "Subsequence not found."
  else:
    let tail = if pos.len == 1: ": " else: "s: "
    echo "Subsequence found at position", tail, pos.join(", ")
Output:
DNA sequence:
  1 CACATACGATGAGCTGGGCG
 21 CCTAAGAGGCGGAAAGACAA
 41 CCGTGTGTGTCTAACCCATG
 61 GTTTAATTGCAGATAGTCTC
 81 TAGACTACAAACATTAGAGC
101 AATGCACCGGGGTGCACGTG
121 TGTTTTGACTTCCCATGAAA
141 GCCCTTATCCTAGAGTACAG
161 TCGGCAAATGTTCGCTCCTT
181 GGCCCACTCCATTTGGACGG

DNA subsequence: GTT

Subsequence found at positions: 61, 122, 170

Perl

use strict;
use warnings;
use feature 'say';

my @bases = <A C G T>;
my $basecnt = 160;

my($string,$target);
$string .= $bases[ int rand @bases ] for 1 .. $basecnt;
$target .= $bases[ int rand @bases ] for 1 .. 4;
say "Target: $target";
say 'Matches at these positions:';
say (($string =~ s/.{1,40}\K/\n/gr) =~ s/($target)/ >$1< /gr);
Output:
Target: CCTG
Matches at these positions:
9
90
157
TTGCC >CCTG< CAAAGTTAATAAGTAAACAATTAAGTGAGTG
CTCTAGGGTAAGGTGAGGGCGGGAAGGGGAAAAATACCGA
TGCGAG >CCTG< TAGAGCCGGGCCTCAAATTAAACGAAAAAT
ATAAGTTTGCTTGGCACGCTGTACTACTTATCC >CCTG< ACT

Phix

Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".

with javascript_semantics

constant cheat = false

function grandna(integer len)
    string dna = repeat(' ',len)
    for i=1 to len do dna[i] = "ACGT"[rand(4)] end for
    return dna
end function

procedure show(string dna, test, sequence idx)
    idx = deep_copy(idx) & length(dna)+100 -- (add an otherwise unused sentinel)
    sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n")
    integer ii = 1,         -- idx index
            i = idx[ii],    -- current target
            ux = 1,         -- underline index (1..4)
            ldx = 1         -- line index (1, 51, 101, etc)
    for si=1 to length(s) do
        printf(1,"%3d: %s\n",{ldx,s[si]})
        ldx += 50
        if i and i<ldx then
            string ul = repeat(' ',59)
            while i and i<ldx do
                integer up = i-ldx+51       -- underline pos (relative to ldx)
                up += floor((up-1)/10)+5    -- (plus any needed spacing)
                ul[up] = "<==>"[ux]
                ux += 1
                i += 1
                if ux>4 then
                    ux = 1
                    ii += 1
                    i = idx[ii]
                end if
            end while
            printf(1,"%s\n",ul)
        end if
    end for
    if length(idx)>1 then
        string p = iff(length(idx)>1?"s":""),
               t = join(apply(idx[1..$-1],sprint),", ")
        printf(1,"%s occurs at location%s: %s\n",{test,p,t})
    else
        printf(1,"%s does not occur\n",{test})
    end if
end procedure

string dna = grandna(200),
       test = grandna(4)
constant cheats = iff(cheat?{9,13,49,60,64,68}:{})
for i=1 to length(cheats) do
    dna[cheats[i]..cheats[i]+3] = test
end for
sequence idx = match_all(test,dna)
show(dna,test,idx)
Output:

with cheat enabled

  1: GGAGATATCG ACCGACCGAA GTAAAGTCAA AGTCGTCCAA TCCACGGACG
             <= =><==>                                   <=
 51: ACTTCAGCAC GACCGACCGA CCTATTTAAG AGACCACACT TAAGGAATCC
     =>       < ==><==><== >
101: ATGCGAAATA AAAATGGGCG AGTAGCCGTG GGGCGCTAAA GCACCCACCT
151: AGTTTTCGCC GAAGTACTAG ACCACCTTCG GATCGACAAA GCTTTCACCA
                                         <==>
CGAC occurs at locations: 9, 13, 49, 60, 64, 68, 184

with cheat disabled

  1: TGATTTAAAC CGTGGTGCAA TTTATAAACA CTGCGATATG CCTCCTGATG
 51: GCATGGTATT CGACACCAAG ACGCTGGTGG GCACACTGGC TTTCAGAATA
101: GGAGTCACAA TCCCTCTATG ATGTCCTCTA GCGGGTGTGT GTTCAGTGCC
151: AGCGCTTACT TCCGGCGTGG CCGACTCTTT TTAAAGCGTA TAGCTGGGGT
GCTA does not occur

Python

Works with: Python version 3.8
Library: regex
 
from random import choice
import regex as re 
import time

def generate_sequence(n: int ) -> str:
    return "".join([ choice(['A','C','G','T']) for _ in range(n) ])

def dna_findall(needle: str, haystack: str) -> None:

    if sum(1 for _ in re.finditer(needle, haystack, overlapped=True)) == 0:
        print("No matches found")
    else:
        print(f"Found {needle} at the following indices: ")
        for match in re.finditer(needle, haystack, overlapped=True):
            print(f"{match.start()}:{match.end()} ")

dna_seq = generate_sequence(200)
sample_seq = generate_sequence(4)

c = 1
for i in dna_seq:
    print(i, end="") if c % 20 != 0 else print(f"{i}")
    c += 1
print(f"\nSearch Sample: {sample_seq}")

dna_findall(sample_seq, dna_seq)
Output:
TTGCCCCTGTACTGAGCCCA
TAAGCTTGCACTCAAGGTTT
TGCCCCCTCATATTATAACG
CATCCATTATACAAAACCGA
TACCCTTCCGCATATTATGA
AAAGTGGCGAAGTGCCTTGA
TTTGCATTCATAGTACAACG
GTGCAAAAGCATTGTATGTC
TCACATTTACATGGGAAATG
CCTAGTAGGTGCAAGACCTG

Search Sample: TACA
Found TACA at the following indices:
69:73
133:137
167:171

Racket

#lang racket

(define (rand-seq n)
  (build-string n (lambda _ (string-ref "TGAC" (random 4)))))

(define (subsequence-indices full part)
  (let ((part-length (string-length part)) (full-length (string-length full)))
    (for/list ((i (- full-length part-length))
               #:when (for/and ((p part) (f (in-string full i))) (eq? p f)))
      (cons i (+ i part-length -1)))))

(define (report-sequence s (l 50))
  (string-join (for/list ((i (in-range 0 (string-length s) l)))
                 (format "~a: ~a" (~a #:width 4 i)
                         (substring s i (min (string-length s) (+ i l)))))
               "\n"))
  
(define (Bioinformatics/Subsequence (full (rand-seq 400)) (sub (rand-seq 4)))
  (printf "Indices of ~a in~%~a~%~a~%"
          sub (report-sequence full) (subsequence-indices full sub)))

(module+ main (for ((i 4)) (Bioinformatics/Subsequence)))
Output:
Indices of TTAC in
0   : TTATCCTACCGCGTAAGTTCAATGCTCACCGCAGTTTGCTAACCGTTCCT
50  : AAATTCACTTCCTAAGGTATCTTTCGCTTAATTGATGCCGATTGAATTCC
100 : ACGGAGGGCGTAATTGTTTCGGACTTTAGACCTGACATAAGGGCACACTA
150 : GTCCTATTGAATTTGGTGCTATTCGGCGACCTACTAACCTTAGTCAGTGA
200 : AGAGCCATCTCAAAAGTACAGTCATCCTCAAGTGTTACATACGGCACCAT
250 : GACAGTGTATAAGCATGGAGGTTGGCCTATCGTCATATCGAGGCGGCGCC
300 : ATAGACCGGCCAGGTGATGAGATCGACTTTAATGTTGTTGCTTAGCTTGA
350 : CCTCTAGTTTGGATTAAGACGGTCATAGATAGATAGACCGTAAAGTATTC
((234 . 237))
Indices of GTAA in
0   : GTCAGTCCACGCAAGAATAGCAGTTGAGTGGACAATTTATGAGACGGAGA
50  : TAAGTAACCCGCTCCGAGATAAACGTCAGCCGGATTCCGCTGAGTCGGTC
100 : GCCTTCCAAGTGGCAGCTTGTTTGCATTGCTTACAGTGACTTGAACGATC
150 : ACCTACTCGAGGACTCTGCGGGTATTCCAGTTGCCTTGCACTCAGCGATG
200 : CACAAACTTTAAATTATCACAGAAAGAATGTGATTCGGGTGGTCACCCTT
250 : ATCGGTGAAACCAGTCCTTCCATGGGCATATTCTGCGTCGAAATGAGCCC
300 : GCTGTTTACGTTGTACGAACTGGGGACCTAAGGAAACGGGCCGTTCTTAG
350 : GTGATGTCAGCTGCAACGAACTACTGTTAACCTTCTCGATCTGTTGAAAA
((53 . 56))
Indices of AACG in
0   : TTTACAGTACGATTCCGAAGACACAAGAATGCGCCGGCTGTGGGTAGGGG
50  : CGACCCTGCGCGACCTATAAAAGGGGCGACTCAATTTTAGGCCCACCACG
100 : GACCCAGCCCTGTGCACAGAGCGGGGCATTTTTACCTCGCGTGCGCACCA
150 : ACTGCGATCTGCCTTGTCACATAATCCCACATACGAGTTGTATCTCTAAG
200 : AAGGGATGAGGCCAATTTAAATCCGGGTGCATTTCTCGGGGGGAGACACC
250 : AATGAGAGTGGGGCAAGGTGGCGTAGAGAGCTAATCGGGTTTTATGACCG
300 : CGGAAGACCTGGGATACGTCTGGGTGATAACTGAGGGCAGGTCAACGAAC
350 : CCTGATGCGTAGCCACGTCTCAGCTATCGGGCCTGTTTTCATAGTCCATG
((343 . 346))
Indices of CAGC in
0   : TGTGAACCACTATGACACGCTACACGCCTCAAGTTGGCCCCCATATAAGA
50  : ATATCCATCGGTTAATGTGTCTCGCGGCCGTTAGAACAAGCACTAAAGTT
100 : AGAGAAACCAACCATTGGACTAGATCAACATCAACGTCGCTGATAATAAA
150 : TGTATATCTGATGTGGCCGTTCATAAAATCGTTAACTACAGGTATCAACA
200 : TAGTCTCCCAACTTATATAATTGGTTAACTTAGGAGGAGCTTGCACAGCT
250 : CAGCTATATGCTATCTGGCCCTGGGCTTGGTAGGCATCACGTCGTTATGC
300 : TGCGAACATCTCAAAGACAAACGTTGATCCAGCCCCTAGAGAGGTCATTA
350 : GGCCTCGACCCAATTTAACCTCCCACTCCGTGGGTACAGCTTGAACCCCC
((245 . 248) (250 . 253) (329 . 332) (386 . 389))

Raku

Chances are actually pretty small that a random 4 codon string will show up at all in a random 200 codon sequence. Bump up the sequence size to get a reasonable chance of multiple matches.

use String::Splice:ver<0.0.3+>;

my $line = 80;

my $haystack = [~] <A C G T>.roll($line * 8);

say 'Needle: ' ~ my $needle = [~] <A C G T>.roll(4);

my $these = $haystack ~~ m:g/<$needle>/;

my @match = $these.map: { .from, .pos }

printf "From: %3s to %3s\n", |$_ for @match;

my $disp = $haystack.comb.batch($line)».join.join("\n");

for @match.reverse {
    $disp.=&splice(.[1] + .[1] div $line, "\e[0m" );
    $disp.=&splice(.[0] + .[0] div $line, "\e[31m");
}

say $disp;
Output:

Show in custom div to better display highlighting.

Needle: TAGC
From: 159 to 163
From: 262 to 266
From: 315 to 319
From: 505 to 509
From: 632 to 636
CATATGTGACACTGACAGCTCGCGCGAAAATCCGTGTGACGGTCTGAACACTATACTATAGGCCCGGTCGGCATTTGTGG
CTCCCCAGTGGAGAGACCACTCGTCAATTGCTGACGACTTAACACAAATCGAGTCGCCCTTAGTGCCAGACGGGACTCCT
AGCAAAGGGCGGCACGTGGTGACTCCCAATATGTGAGCATGCCATCTAATTGATCTGGGGGGTTTCGCGGGAATACCTAG
GGGCGTTCTGTCCATGGATCTCTAGCCCTGCGAAGAGATACCCGCAGTGAGTTGCACGTGCAAAGAACTTGTAACTAGCG
TATTCTGTATCCGCCGCGCGATATGCTTCTGCGGGATGTACTTCTTGTGACTAAGACTTTGTTATCCAAATTGACCAATA
TTCAACGGTCGACTCTCCGAGGCAGTATCGGTACGCCGAAAAATGGTTACTTCGGCCATACGTAACCTCTCAAGTCACGA
TTACAGCCCACGGGGGCTTACAGCATAGCTCCAAAGACATTCCAATTGAGCTACAACGTGTTCAGTGCGGAGCAGTATCC
AGTACTCGACTGTTATGGTAAAAGGGCATCGTGATCGTTTATATTAATCATTGGGACAGGTGGTTAATGTCATAGCTTAG

REXX

This REXX version allows the user to specify:

  •   length of the (random) DNA data sequence     (default is 200).
  •   length of the (random) DNA sequence             (default is four).
  •   DNA proteins to be used in the sequence         (default is ACGT).
  •   width of the output lines of (random DNA)         (default is 100).
  •   often (if ever) to add a blank to the output       (default is every 10 proteins).
  •   DNA proteins to be searched in the data         (the default is four unique random proteins).
  •   the seed for the RANDOM function so runs can be repeated with the same data     (no default).
/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
if totLen=='' | totLen==","  then totLen=   200  /*Not specified?  Then use the default.*/
if rndLen=='' | rndLen==","  then rndLen=     4  /* "      "         "   "   "     "    */
if basePr=='' | basePr==","  then basePr= 'acgt' /* "      "         "   "   "     "    */
if oWidth=='' | oWidth==","  then oWidth=   100  /* "      "         "   "   "     "    */
if Bevery=='' | Bevery==","  then Bevery=    10  /* "      "         "   "   "     "    */
if rndDNA=='' | rndDNA==","  then rndDNA=  copies(., rndLen)    /*what we're looking for*/
if datatype(seed, 'W')  then call random ,,seed  /*used to generate repeatable random #s*/
call genRnd                                      /*gen  data field of random proteins.  */
call show                                        /*show   "    "    "    "      "       */
say '  base DNA proteins used: '  basePr
say 'random DNA proteins used: '  dna?
call findRnd
if @=='' then do;  say "the random DNA proteins weren't found.";  exit 4;  end
say 'the random DNA proteins were found in positions:'     strip(@)
exit 0                                           /*stick a fork in it,  we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
commas: parse arg ?;  do jc=length(?)-3  to 1  by -3; ?=insert(',', ?, jc); end;  return ?
/*──────────────────────────────────────────────────────────────────────────────────────*/
findRnd: @=;           p=0                       /*@:  list of the found target proteins*/
              do until p==0;    p= pos(dna?, $$, p+1);     if p>0  then @= @ commas(p)
   /*Found one?  Append it to the "Found"s*/
              end   /*p*/;             return
/*──────────────────────────────────────────────────────────────────────────────────────*/
genRnd: dna?=;        use= basePr;     upper use basePr rndDNA;       lenB= length(basePr)
              do k=1  for rndLEN;      x= substr(rndDNA, k, 1)
              if x==.  then  do;  ?= random(1, length(use) );         x= substr(use, ?, 1)
                                  use= delstr(use, ?, 1)   /*elide so no protein repeats*/
                             end
              dna?= dna? || x                              /*build a random protein seq.*/
              end   /*k*/
        return
/*──────────────────────────────────────────────────────────────────────────────────────*/
show: say " index │"center('DNA sequence of ' commas(totLen)  " proteins", oWidth+10)
      say "───────┼"center(''                                            , oWidth+10, '─')
      $=; $$=;                 idx= 1               /*gen data field of random proteins.*/
         do j=1  for totLen;   c= substr( basePr, random(1, lenB), 1)
          $$= $$ || c                                       /*append a random protein.  */
          if Bevery\==0  then if j//Bevery==0  then $= $' ' /*possibly add a blank.     */
          if length( space($ || c, 0) )<oWidth  then do;   $= $  ||  c;   iterate;   end
          say strip( right(idx, 7)'│' $, 'T');  $=          /*display line ──► terminal.*/
          idx= idx + oWidth                                 /*bump the index number.    */
          end   /*j*/
      if $\==''  then say right(idx, 7)"│" strip($, 'T')    /*show residual protein data*/
      say "───────┴"center(''                                            , oWidth+10, '─')
      say;                return
output   when using the default inputs:
 index │                                                 DNA sequence of  200  proteins
───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────
      1│ TTTTTAGCG CGTTTTGTAG CGCTCTAAAA ACCGTAGCTA TATTTCTCGA AGTTTCACCC AGCTCTTTTG CCCCAGGGTT GCGCTAAGCC CAGCTTCGAG
    101│ GGGGCACAG GTAAAATACT ACCGTCCGTG GAGGGGGATG AATTGACCCG ACATTTTTTG AAGCATAACT CGTGACTCAA TATTGCATGA TTACACCAGC
───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────

  base DNA proteins used:  ACGT
random DNA proteins used:  GCAT

the random DNA proteins were found in positions: 162 184
output   when using the inputs of:     1000   ,   ,   ,   ,   tttt
 index │                                       DNA sequence of  1,000  proteins
───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────
      1│ GTGATTTTT AGCGCGTTTT GTAGCGCTCT AAAAACCGTA GCTATATTTC TCGAAGTTTC ACCCAGCTCT TTTGCCCCAG GGTTGCGCTA AGCCCAGCTT
    101│ GAGGGGGGC ACAGGTAAAA TACTACCGTC CGTGGAGGGG GATGAATTGA CCCGACATTT TTTGAAGCAT AACTCGTGAC TCAATATTGC ATGATTACAC
    201│ AGCTAGGTT AGTGTAAAAA CCCCCCTATC TTCCTGATCA ATGGCGAGTA AAACATGCAA CCAATTTGTG AGCGAGTACT GGAAATTATT GTTTACGGGA
    301│ AGGCACATG CTACGCGCAA CAGATATCTT AGACTGACCC TTTTAGAGTC ATAAGCCCCT GTCGCCTACA TGCTACTAAT ACTCCAACTA GCGGCGCACC
    401│ TCAACCGGA TCATGGCGCC AGGGAAAATG TGGCGTAGCG ACGTGCTCAT CGCTCGCCGG GGAGAGCCTT TCAGAATCTC GAATAAAACC TGGTAATGAC
    501│ TCATCAATC GTAATGGTCG TCTGGGGCAA GAAGCCGATA TTATAGACTC AGGTCAGACG TGTGCACAAC GGCAGAATTT ATAGTAATTC GCGTGAACTA
    601│ GTTTCGGGA TAGGCCTACG ACCAATCATA GGACATTCGA TGCACGGTGT AGAAACAGTT CTCTGATGTT ACTCGGGATA ACACTCGCAA TCCCCTAGGA
    701│ ACCGTGAGC GTCGCTAGTA TCTGAGATAG TCGCGACTGC CCAGCGGTCT TTAAGTTCGC ACACTACGGG ACTCCTAGTT CGCCCATTCA TGGCTATTTT
    801│ CCTATCAGT CCAATCCCAC GGGGAGGGCA CTCGCGCAAT TCATTCAAAG AGGGCCATTT GCCGATATAA GGTCCATCAT CGGGAGGAAT ATGACTCCTG
    901│ TTAGTATTA GAGCAGCCTC GCTGCGTACT ACTGTCAGTG GCCCGTCAGG GAAGGCAAAA CGTTTTTCCT CTAGGAATCC GTCAATTGGA CTTCTAGACT
───────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────

  base DNA proteins used:  ACGT
random DNA proteins used:  TTTT

the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963

Ring

/*-----------------------------------
# Project : DNA subsequences
# Date    : 2021/03/23
# Author  : Gal Zsolt (~ CalmoSoft ~)
# Email   : <calmosoft@gmail.com>
-----------------------------------*/

//-----------------------------------------

load "stdlibcore.ring"
load "guilib.ring"

start = 0
base = ["A","C","G","T"]
dnaList = []
dnaSeq = []
ColLine = list(21) 

C_Spacing = 2 

C_ButtonDnaStyle  = ' background-color: Red; border-radius: 8px;' 
C_ButtonStyle  = '"background-color:white"; border-radius: 8px;'
Button = newlist(10,20)
LayoutButtonRow = list(10)

//-----------------------------------------

app = new qApp 
{
      win = new qWidget() {
            setWindowTitle('DNA subsequences')
	    setWinIcon(self,AppFile("white.jpg"))
            setStyleSheet('background-color:White')
            setgeometry(560,180,300,300)
            //reSize(400,400)
            winheight = 10 
            fontSize = 8 # + (winheight / 100)

            LayoutButtonMain = new QVBoxLayout()            
            LayoutButtonMain.setSpacing(C_Spacing)
            LayoutButtonMain.setContentsmargins(0,0,0,0)

            LabelInd = new qLabel(win) { settext("    DNA subsequences start positions:")
                                         setAlignment(Qt_AlignHCenter | Qt_AlignVCenter)
                                         setStyleSheet("background-color:yellow") }

            ButtonInd = new QPushButton(win) { setStyleSheet("background-color:yellow") }

            LabelFind = new qLabel(win) { settext("    DNA subsequence to find:")
                                          setStyleSheet("background-color:yellow") }

            ButtonFind = new QPushButton(win)

            DnaSearch = new QPushButton(win) { setclickevent("pstart()")
                                               setStyleSheet("background-color:yellow")
                                               settext("Find")
                                             }
    
            for Col = 1 to 21
                ColLine[Col] = new qLabel(win) {
                                setmaximumheight(20)
                                setAlignment(Qt_AlignHCenter | Qt_AlignVCenter) 
                                setStyleSheet("background-color:darkgray")
                                setText(string(Col-1))
                                } 
            next

            LayoutInd = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }
            LayoutInd.AddWidget(LabelInd) 
            LayoutInd.AddWidget(ButtonInd) 
            LayoutButtonMain.AddLayout(LayoutInd) 
                
            LayoutTitleRow = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }

                for Col = 1 to 21                
                    LayoutTitleRow.AddWidget(ColLine[Col])         
                next
                            
            LayoutButtonMain.AddLayout(LayoutTitleRow)  
            
            RowLine = list(10)  

            for Row = 1 to 10
                Letter = "" + Row*20
                if Row*20 < 100
                   Letter = "  " + Row*20
                ok
                RowLine[Row] = new qLabel(win) { setFont(new qFont("Verdana",fontSize,40,0))
                                                 setAlignment(Qt_AlignHCenter | Qt_AlignVCenter) 
                                                 setStyleSheet("background-color:darkgray")
                                                 setText(Letter)
                                               } 
            next

            for Row = 1 to 10
                LayoutButtonRow[Row] = new QHBoxLayout()    
                {
                    setSpacing(C_Spacing)
                    setContentsmargins(0,0,0,0)
                } 

               LayoutButtonRow[Row].AddWidget(RowLine[Row])
               
               for Col = 1 to 20
                    Button[Row][Col] = new QPushButton(win) {
                                       setmaximumwidth(20) 
                                       }
                    LayoutButtonRow[Row].AddWidget(Button[Row][Col])    
               next
               
               LayoutButtonMain.AddLayout(LayoutButtonRow[Row])         
            next

            LayoutDataRow = new QHBoxLayout() { setSpacing(C_Spacing) setContentsMargins(0,0,0,0) }

            LayoutDataRow.AddWidget(LabelFind)
            LayoutDataRow.AddWidget(ButtonFind)               
            LayoutDataRow.AddWidget(DnaSearch)
            LayoutButtonMain.AddLayout(LayoutDataRow) 
                  
            setLayout(LayoutButtonMain)
            
            pStart()
            show()
   }
   exec()
 }

//-----------------------------------------

func pStart()
     start = start + 1

     dnaList = []
     for row = 1 to 10
         for col = 1 to 20
             Button[row][col].settext("")
         next
     next
     for nr = 1 to 200
         rnd = random(3)+1
         baseStr = base[rnd]
         row = ceil(nr/20)
         col = nr%20
         if col = 0
            col = 20
         ok
         Button[row][col].settext(baseStr)
         add(dnaList,baseStr)
     next

     startDna()

//-----------------------------------------

func startDna()

     strDna = list2str(dnaList)
     strDna = substr(strDna,nl,"")

     while true
           strBase = ""
           for n = 1 to 4
               rnd = random(3)+1
               strBase = strBase + base[rnd]
           next
           ind = substr(strDna,strBase)
           if ind > 0
              exit
           ok
     end

     showDna(dnaList)

//-----------------------------------------

func showDna(dnaList)


     if start > 1
     see nl
     for n = 1 to len(dnaSeq)
         for m = 0 to 3
             ind = dnaSeq[n] + m
             row = ceil(ind/20)
             col = ind%20
             if col = 0
                col = 20
             ok
             Button[row][col].setstylesheet(C_ButtonStyle)
         next
     next
     ok


     dnaSeq = []
     strDna = list2str(dnaList)
     strDna = substr(strDna,nl,"")

     while true
           strBase = ""
           for n = 1 to 4
               rnd = random(3)+1
               strBase = strBase + base[rnd]
           next
           ind = substr(strDna,strBase)
           if ind > 0
              exit
           ok
     end

     ButtonFind.setStyleSheet("background-color:yellow")
     ButtonFind.settext(strBase)

     for n = 1 to 196
         flag = 1
         for m = 0 to 3
             if dnaList[n+m] != strBase[m+1]
                flag = 0
                exit
             ok
         next
         if flag = 1
            add(dnaSeq,n)
         ok
     next
     
     temp = ""
     ButtonInd.settext("")
     for nr = 1 to len(dnaList)
         ind = find(dnaSeq,nr)
         if ind > 0
            temp = temp + string(dnaSeq[ind]) + " "
            ButtonInd.settext(temp)
            for n = nr to nr + 3
                row = ceil(n/20)
                col = n%20
                if col = 0
                   col = 20
                ok
                Button[row][col].setStyleSheet(C_ButtonDnaStyle)
                Button[row][col].settext(dnaList[n])
            next
         ok
      next           

//-----------------------------------------

Output:

Bioinformatics/Subsequence - video

Wren

Library: Wren-pattern
Library: Wren-str
Library: Wren-fmt
import "random" for Random
import "./pattern" for Pattern
import "./str" for Str
import "./fmt" for Fmt

var rand = Random.new()
var base = "ACGT"

var findDnaSubsequence = Fn.new { |dnaSize, chunkSize|
    var dnaSeq = List.filled(dnaSize, null)
    for (i in 0...dnaSize) dnaSeq[i] = base[rand.int(4)]
    var dnaStr = dnaSeq.join()
    var dnaSubseq = List.filled(4, null)
    for (i in 0...4) dnaSubseq[i] = base[rand.int(4)]
    var dnaSubstr = dnaSubseq.join()
    System.print("DNA sequence:")
    var i = chunkSize
    for (chunk in Str.chunks(dnaStr, chunkSize)) {
         Fmt.print("$3d..$3d: $s", i - chunkSize + 1, i, chunk)
         i = i + chunkSize
    }
    System.print("\nSubsequence to locate: %(dnaSubstr)")
    var p = Pattern.new(dnaSubstr)
    var matches = p.findAll(dnaStr)
    if (matches.count == 0) {
        System.print("No matches found.")
    } else {
        System.print("Matches found at the following indices:")
        for (m in matches) {
            Fmt.print("$3d..$3d", m.index + 1, m.index + 4)
        }
    }
}

findDnaSubsequence.call(200, 20)
System.print()
findDnaSubsequence.call(600, 40)
Output:
DNA sequence:
  1.. 20: TATGGGCGCATTATGACAAC
 21.. 40: GGCTACTGAAACGAAAATTC
 41.. 60: ATGCCTTCGGAGGCTAGACC
 61.. 80: ACTCATACATGATTTACAGC
 81..100: TAGTCAGTTGCGTCCGCCAT
101..120: CCCGCATAACTATGTATTAC
121..140: GAGCATGTTCTGGCAACCTT
141..160: TCAGTGACAGTTCCTCAGGC
161..180: GCGTTCGCGTTGAAGGCCTC
181..200: CCCACACCGCACCCCTGCCG

Subsequence to locate: AATT
Matches found at the following indices:
 36.. 39

DNA sequence:
  1.. 40: GCGCTGAGCGCCCCAGTACAGCGGGTTAAACCGAGCCCGC
 41.. 80: TCCGATGAACCAACTCCCATTCCTATAATGGTGCCCCGAC
 81..120: ATATTGAATTCGGCGGGTCCGCTATCGGGCTGAGGATGCC
121..160: AATATCTAGGCGCTACCCTGAAGATCCTCAGTTGTGGTGT
161..200: CGCGGAGTGTCGATCCCAGAGCTCCCAATTGACTCAATTA
201..240: CTTTTTCCGTCCTCTTGCTTACGGATTTATGTTTGTGGCA
241..280: GAGGTTATGCTTCAGGCATCCCCATGTTTCCTGAGATACG
281..320: ACCACTGTCAGGTGGCTTGAATCTACCTTGTATTTCCTCT
321..360: AGTACCAGTCACTGTCATCTACTGGAAGCCATATCAGCGT
361..400: TGAAATGTCTATAATTTACTCTCCGGTTGTACCCAAGCGA
401..440: TAACAGCAACGTGTGGGTCTAAAGAGTTCCGCGTTTCGAC
441..480: ATAACGTGCTCCTATTTATCTACCGAAACACCCTATTTTC
481..520: CATCTAACCGGCACCCAATGCGCAGGTGTACGCGTCCTAC
521..560: TACGTTTGAAACGGTTCCATCTCGCCATGTACAATTGTGG
561..600: GGCTACGATTAAGTGTAGTCGGTAATTCAGGGTGAAGTTG

Subsequence to locate: TTCG
Matches found at the following indices:
 89.. 92
435..438