Bioinformatics/Subsequence: Difference between revisions
m (→{{header|Phix}}: added syntax colouring the hard way) |
|||
Line 94: | Line 94: | ||
Note: match_all() is due to become a builtin in the next release, so the version below may or may not need renaming/deleting before it will run.<br> |
Note: match_all() is due to become a builtin in the next release, so the version below may or may not need renaming/deleting before it will run.<br> |
||
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>". |
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>". |
||
<lang Phix> |
<!--lang Phix--> |
||
<span style="color: #008080;">constant</span> <span style="color: #000000;">cheat</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span> |
|||
function grandna(integer len) |
|||
string dna = repeat(' ',len) |
|||
for i=1 to len do dna[i] = "ACGT"[rand(4)] end for |
|||
return dna |
|||
end function |
|||
procedure show(string dna, sequence idx) |
|||
idx &= length(dna)+100 -- (add an otherwise unused sentinel) |
|||
sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n") |
|||
integer ii = 1, -- idx index |
|||
i = idx[ii], -- current target |
|||
ux = 1, -- underline index (1..4) |
|||
ldx = 1 -- line index (1, 51, 101, etc) |
|||
for si=1 to length(s) do |
|||
printf(1,"%3d: %s\n",{ldx,s[si]}) |
|||
ldx += 50 |
|||
if i and i<ldx then |
|||
string ul = repeat(' ',59) |
|||
while i and i<ldx do |
|||
integer up = i-ldx+51 -- underline pos (relative to ldx) |
|||
up += floor((up-1)/10)+5 -- (plus any needed spacing) |
|||
ul[up] = "<==>"[ux] |
|||
ux += 1 |
|||
i += 1 |
|||
if ux>4 then |
|||
ux = 1 |
|||
ii += 1 |
|||
i = idx[ii] |
|||
end if |
|||
end while |
|||
printf(1,"%s\n",ul) |
|||
end if |
|||
end for |
|||
if length(idx) then |
|||
string s = iff(length(idx)>1?"s":""), |
|||
t = join(apply(idx,sprint),", ") |
|||
printf(1,"%s occurs at location%s: %s\n",{test,s,t}) |
|||
else |
|||
printf(1,"%s does not occur\n",{test}) |
|||
end if |
|||
end procedure |
|||
function match_all(object needle, sequence haystack, bool bOverlap = false) |
|||
if atom(needle) then return find_all(needle,haystack) end if |
|||
integer start = 1 |
|||
sequence res = {} |
|||
while 1 do |
|||
start = match(needle,haystack,start) |
|||
if start=0 then exit end if |
|||
res = append(res,start) |
|||
start += iff(bOverlap?1:length(needle)) |
|||
end while |
|||
return res |
|||
end function |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">grandna<span style="color: #0000FF;">(<span style="color: #004080;">integer</span> <span style="color: #000000;">len<span style="color: #0000FF;">)</span> |
|||
string dna = grandna(200), |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">dna</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat<span style="color: #0000FF;">(<span style="color: #008000;">' '<span style="color: #0000FF;">,<span style="color: #000000;">len<span style="color: #0000FF;">)</span> |
|||
test = grandna(4) |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len</span> <span style="color: #008080;">do</span> <span style="color: #000000;">dna<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"ACGT"<span style="color: #0000FF;">[<span style="color: #7060A8;">rand<span style="color: #0000FF;">(<span style="color: #000000;">4<span style="color: #0000FF;">)<span style="color: #0000FF;">]</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
constant cheats = iff(cheat?{9,13,49,60,64,68}:{}) |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">dna</span> |
|||
for i=1 to length(cheats) do |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
dna[cheats[i]..cheats[i]+3] = test |
|||
end for |
|||
<span style="color: #008080;">procedure</span> <span style="color: #000000;">show<span style="color: #0000FF;">(<span style="color: #004080;">string</span> <span style="color: #000000;">dna<span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">idx<span style="color: #0000FF;">)</span> |
|||
sequence idx = match_all(test,dna) |
|||
<span style="color: #000000;">idx</span> <span style="color: #0000FF;">&=</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">)<span style="color: #0000FF;">+<span style="color: #000000;">100</span> <span style="color: #000080;font-style:italic;">-- (add an otherwise unused sentinel)</span> |
|||
show(dna,idx)</lang> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split<span style="color: #0000FF;">(<span style="color: #7060A8;">trim<span style="color: #0000FF;">(<span style="color: #7060A8;">join_by<span style="color: #0000FF;">(<span style="color: #7060A8;">split<span style="color: #0000FF;">(<span style="color: #7060A8;">join_by<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">,<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #000000;">10<span style="color: #0000FF;">,<span style="color: #008000;">""<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #008000;">"\n"<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #000000;">5<span style="color: #0000FF;">,<span style="color: #008000;">" "<span style="color: #0000FF;">)<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #008000;">"\n"<span style="color: #0000FF;">)</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">ii</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1<span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">-- idx index</span> |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">idx<span style="color: #0000FF;">[<span style="color: #000000;">ii<span style="color: #0000FF;">]<span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">-- current target</span> |
|||
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1<span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">-- underline index (1..4)</span> |
|||
<span style="color: #000000;">ldx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> <span style="color: #000080;font-style:italic;">-- line index (1, 51, 101, etc)</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">si<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">s<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%3d: %s\n"<span style="color: #0000FF;">,<span style="color: #0000FF;">{<span style="color: #000000;">ldx<span style="color: #0000FF;">,<span style="color: #000000;">s<span style="color: #0000FF;">[<span style="color: #000000;">si<span style="color: #0000FF;">]<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">ldx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">50</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span> <span style="color: #008080;">and</span> <span style="color: #000000;">i<span style="color: #0000FF;"><<span style="color: #000000;">ldx</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">ul</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat<span style="color: #0000FF;">(<span style="color: #008000;">' '<span style="color: #0000FF;">,<span style="color: #000000;">59<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span> <span style="color: #008080;">and</span> <span style="color: #000000;">i<span style="color: #0000FF;"><<span style="color: #000000;">ldx</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">up</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i<span style="color: #0000FF;">-<span style="color: #000000;">ldx<span style="color: #0000FF;">+<span style="color: #000000;">51</span> <span style="color: #000080;font-style:italic;">-- underline pos (relative to ldx)</span> |
|||
<span style="color: #000000;">up</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">floor<span style="color: #0000FF;">(<span style="color: #0000FF;">(<span style="color: #000000;">up<span style="color: #0000FF;">-<span style="color: #000000;">1<span style="color: #0000FF;">)<span style="color: #0000FF;">/<span style="color: #000000;">10<span style="color: #0000FF;">)<span style="color: #0000FF;">+<span style="color: #000000;">5</span> <span style="color: #000080;font-style:italic;">-- (plus any needed spacing)</span> |
|||
<span style="color: #000000;">ul<span style="color: #0000FF;">[<span style="color: #000000;">up<span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"<==>"<span style="color: #0000FF;">[<span style="color: #000000;">ux<span style="color: #0000FF;">]</span> |
|||
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">ux<span style="color: #0000FF;">><span style="color: #000000;">4</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">ii</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">idx<span style="color: #0000FF;">[<span style="color: #000000;">ii<span style="color: #0000FF;">]</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%s\n"<span style="color: #0000FF;">,<span style="color: #000000;">ul<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">idx<span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff<span style="color: #0000FF;">(<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">idx<span style="color: #0000FF;">)<span style="color: #0000FF;">><span style="color: #000000;">1<span style="color: #0000FF;">?<span style="color: #008000;">"s"<span style="color: #0000FF;">:<span style="color: #008000;">""<span style="color: #0000FF;">)<span style="color: #0000FF;">,</span> |
|||
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">join<span style="color: #0000FF;">(<span style="color: #7060A8;">apply<span style="color: #0000FF;">(<span style="color: #000000;">idx<span style="color: #0000FF;">,<span style="color: #7060A8;">sprint<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #008000;">", "<span style="color: #0000FF;">)</span> |
|||
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%s occurs at location%s: %s\n"<span style="color: #0000FF;">,<span style="color: #0000FF;">{<span style="color: #000000;">test<span style="color: #0000FF;">,<span style="color: #000000;">s<span style="color: #0000FF;">,<span style="color: #000000;">t<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">else</span> |
|||
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%s does not occur\n"<span style="color: #0000FF;">,<span style="color: #0000FF;">{<span style="color: #000000;">test<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span> |
|||
<span style="color: #008080;">function</span> <span style="color: #000000;">match_all<span style="color: #0000FF;">(<span style="color: #004080;">object</span> <span style="color: #000000;">needle<span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">haystack<span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">bOverlap</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #004080;">atom<span style="color: #0000FF;">(<span style="color: #000000;">needle<span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">find_all<span style="color: #0000FF;">(<span style="color: #000000;">needle<span style="color: #0000FF;">,<span style="color: #000000;">haystack<span style="color: #0000FF;">)</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #004080;">integer</span> <span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{<span style="color: #0000FF;">}</span> |
|||
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match<span style="color: #0000FF;">(<span style="color: #000000;">needle<span style="color: #0000FF;">,<span style="color: #000000;">haystack<span style="color: #0000FF;">,<span style="color: #000000;">start<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">if</span> <span style="color: #000000;">start<span style="color: #0000FF;">=<span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span> |
|||
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append<span style="color: #0000FF;">(<span style="color: #000000;">res<span style="color: #0000FF;">,<span style="color: #000000;">start<span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">start</span> <span style="color: #0000FF;">+=</span> <span style="color: #008080;">iff<span style="color: #0000FF;">(<span style="color: #000000;">bOverlap<span style="color: #0000FF;">?<span style="color: #000000;">1<span style="color: #0000FF;">:<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">needle<span style="color: #0000FF;">)<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span> |
|||
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span> |
|||
<span style="color: #004080;">string</span> <span style="color: #000000;">dna</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grandna<span style="color: #0000FF;">(<span style="color: #000000;">200<span style="color: #0000FF;">)<span style="color: #0000FF;">,</span> |
|||
<span style="color: #000000;">test</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grandna<span style="color: #0000FF;">(<span style="color: #000000;">4<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">constant</span> <span style="color: #000000;">cheats</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff<span style="color: #0000FF;">(<span style="color: #000000;">cheat<span style="color: #0000FF;">?<span style="color: #0000FF;">{<span style="color: #000000;">9<span style="color: #0000FF;">,<span style="color: #000000;">13<span style="color: #0000FF;">,<span style="color: #000000;">49<span style="color: #0000FF;">,<span style="color: #000000;">60<span style="color: #0000FF;">,<span style="color: #000000;">64<span style="color: #0000FF;">,<span style="color: #000000;">68<span style="color: #0000FF;">}<span style="color: #0000FF;">:<span style="color: #0000FF;">{<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span> |
|||
<span style="color: #008080;">for</span> <span style="color: #000000;">i<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">cheats<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span> |
|||
<span style="color: #000000;">dna<span style="color: #0000FF;">[<span style="color: #000000;">cheats<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]<span style="color: #0000FF;">..<span style="color: #000000;">cheats<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]<span style="color: #0000FF;">+<span style="color: #000000;">3<span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">test</span> |
|||
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span> |
|||
<span style="color: #004080;">sequence</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">match_all<span style="color: #0000FF;">(<span style="color: #000000;">test<span style="color: #0000FF;">,<span style="color: #000000;">dna<span style="color: #0000FF;">)</span> |
|||
<span style="color: #000000;">show<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">,<span style="color: #000000;">idx<span style="color: #0000FF;">) |
|||
<!--</lang>--> |
|||
{{out}} |
{{out}} |
||
with cheat enabled |
with cheat enabled |
Revision as of 04:57, 23 March 2021
- Task
Randomly generate a string of 200 DNA bases (represented by A, C, G, and T).
Write a routine to find all the positions of a randomly generated subsequence (four letters).
Factor
<lang factor>USING: accessors formatting grouping io kernel math math.functions.integer-logs math.parser random regexp sequences ;
- new-dna ( n -- str ) [ "ACGT" random ] "" replicate-as ;
- pad ( n d -- str ) [ number>string ] dip 32 pad-head ;
- .dna ( seq n -- )
seq length integer-log10 1 + :> d seq n group [ n * d pad write ": " write write nl ] each-index ;
- .match ( slice -- ) [ from>> ] [ to>> ] bi "%d..%d\n" printf ;
- .matches ( slices -- )
"Matches found at the following indices:" print [ .match ] each ;
- .locate ( slices -- )
[ "No matches found." print ] [ .matches ] if-empty ;
- .biosub ( dna-size row-size -- )
[ new-dna dup ] [ .dna nl ] bi* 4 new-dna dup "Subsequence to locate: %s\n" printf <regexp> all-matching-slices .locate ;
80 10 .biosub nl 600 39 .biosub nl</lang>
- Output:
0: ATTCAAGGAC 10: CACTATTAAC 20: CTGCATTGTG 30: AGAACTTGCA 40: GTGTACCGAG 50: AGCGAGTTTA 60: AAGCAACACA 70: TCTTTACCGA Subsequence to locate: GTAG No matches found. 0: GATCTCGTCATGGTCCATCCTAACATTTCGGTTGTGGGC 39: GCATCCCGATAGGCGAAGTTAAATCTACGTAGTCCTACG 78: TCACGACGGAACATGATTGCCCACCGAAGTCGTAGGCGA 117: GCTAAAGTCGGTACATACACGATCTGCTATATTCGTTCT 156: CCGACACACGACATGCAATCCGAGAAGCTCTCGAAGTGC 195: GGTCAGATCCTCAGACTCGAACAGAGGAGACCTTAACTG 234: ATACCCACAGTACTTCTCGCATAACCTAAGCACCTATGC 273: TTACACCATCGTCCTGATATTGAGTGAGTCTGGTCGGAG 312: ATATTATCTAGCACCCTCAAGCTCTGTGTGCCACACCAG 351: GATTCCACTTCGCGCTTGCCTAGAGAAAGTAGAGTAGGT 390: GGTGTCATTAGTACACTGTTTGCGATGCACCAACCAAAC 429: CCGACCGCCATGATGACTGCTTTTCGGCCAACGTCAGAT 468: TAAGAGTACTTTTAGTAGCACCGCAAGCCAGCCGGTTTA 507: GCAAGATCCTGCAGCCTCCACGTTATTTCAGGTCTCTAA 546: GCGTTCTTTCCATGGAAGTAGTCACCGCTCCCGTTGCCA 585: ATGGACACAGACGTT Subsequence to locate: ATAT Matches found at the following indices: 145..149 289..293 312..316
Julia
<lang julia>DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))
DNAsearch(needle, haystack, lap=true) = findall(needle, haystack, overlap=lap)
const rand_string = DNArand(200) const subseq = DNArand(4)
println("Search sequence:\n$rand_string\nfor substring $subseq. Found at positions: ") foreach(p -> print(rpad(p[2], 8), p[1] % 10 == 0 ? "\n" : ""), enumerate(DNAsearch(subseq, rand_string)))
</lang>
- Output:
Search sequence: CCGAAGCCAGGAGGACTGAGCGCTTGCGTCCCGAGTTCTGCGACGAGTCTCTTCATTATAAGGCCACTGATTGCGCTCATCATGAGTGCCAGAAGCACCGCTAAACATAAGTGTCCTTTCTTCCTGACGCACTTGAAGATTGTGACCATTTGTGCGGGTTGTGAGTTAGGGGCTCTCATTGTACACGATCTATAGTGTGC for substring CGCT. Found at positions: 21:24 74:77 99:102
Phix
Note: match_all() is due to become a builtin in the next release, so the version below may or may not need renaming/deleting before it will run.
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".
constant cheat = false function grandna(integer len) string dna = repeat(' ',len) for i=1 to len do dna[i] = "ACGT"[rand(4)] end for return dna end function procedure show(string dna, sequence idx) idx &= length(dna)+100 -- (add an otherwise unused sentinel) sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n") integer ii = 1, -- idx index i = idx[ii], -- current target ux = 1, -- underline index (1..4) ldx = 1 -- line index (1, 51, 101, etc) for si=1 to length(s) do printf(1,"%3d: %s\n",{ldx,s[si]}) ldx += 50 if i and i<ldx then string ul = repeat(' ',59) while i and i<ldx do integer up = i-ldx+51 -- underline pos (relative to ldx) up += floor((up-1)/10)+5 -- (plus any needed spacing) ul[up] = "<==>"[ux] ux += 1 i += 1 if ux>4 then ux = 1 ii += 1 i = idx[ii] end if end while printf(1,"%s\n",ul) end if end for if length(idx) then string s = iff(length(idx)>1?"s":""), t = join(apply(idx,sprint),", ") printf(1,"%s occurs at location%s: %s\n",{test,s,t}) else printf(1,"%s does not occur\n",{test}) end if end procedure function match_all(object needle, sequence haystack, bool bOverlap = false) if atom(needle) then return find_all(needle,haystack) end if integer start = 1 sequence res = {} while 1 do start = match(needle,haystack,start) if start=0 then exit end if res = append(res,start) start += iff(bOverlap?1:length(needle)) end while return res end function string dna = grandna(200), test = grandna(4) constant cheats = iff(cheat?{9,13,49,60,64,68}:{}) for i=1 to length(cheats) do dna[cheats