Bioinformatics/Subsequence: Difference between revisions

m
→‎{{header|Phix}}: added syntax colouring the hard way
m (→‎{{header|Phix}}: added syntax colouring the hard way)
Line 94:
Note: match_all() is due to become a builtin in the next release, so the version below may or may not need renaming/deleting before it will run.<br>
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".
<!--lang Phix-->constant cheat = false
<span style="color: #008080;">constant</span> <span style="color: #000000;">cheat</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false</span>
 
function grandna(integer len)
string dna = repeat(' ',len)
for i=1 to len do dna[i] = "ACGT"[rand(4)] end for
return dna
end function
 
procedure show(string dna, sequence idx)
idx &= length(dna)+100 -- (add an otherwise unused sentinel)
sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n")
integer ii = 1, -- idx index
i = idx[ii], -- current target
ux = 1, -- underline index (1..4)
ldx = 1 -- line index (1, 51, 101, etc)
for si=1 to length(s) do
printf(1,"%3d: %s\n",{ldx,s[si]})
ldx += 50
if i and i<ldx then
string ul = repeat(' ',59)
while i and i<ldx do
integer up = i-ldx+51 -- underline pos (relative to ldx)
up += floor((up-1)/10)+5 -- (plus any needed spacing)
ul[up] = "<==>"[ux]
ux += 1
i += 1
if ux>4 then
ux = 1
ii += 1
i = idx[ii]
end if
end while
printf(1,"%s\n",ul)
end if
end for
if length(idx) then
string s = iff(length(idx)>1?"s":""),
t = join(apply(idx,sprint),", ")
printf(1,"%s occurs at location%s: %s\n",{test,s,t})
else
printf(1,"%s does not occur\n",{test})
end if
end procedure
 
function match_all(object needle, sequence haystack, bool bOverlap = false)
if atom(needle) then return find_all(needle,haystack) end if
integer start = 1
sequence res = {}
while 1 do
start = match(needle,haystack,start)
if start=0 then exit end if
res = append(res,start)
start += iff(bOverlap?1:length(needle))
end while
return res
end function
<span style="color: #008080;">function</span> <span style="color: #000000;">grandna<span style="color: #0000FF;">(<span style="color: #004080;">integer</span> <span style="color: #000000;">len<span style="color: #0000FF;">)</span>
string dna = grandna(200),
<span style="color: #004080;">string</span> <span style="color: #000000;">dna</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat<span style="color: #0000FF;">(<span style="color: #008000;">' '<span style="color: #0000FF;">,<span style="color: #000000;">len<span style="color: #0000FF;">)</span>
test = grandna(4)
<span style="color: #008080;">for</span> <span style="color: #000000;">i<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #000000;">len</span> <span style="color: #008080;">do</span> <span style="color: #000000;">dna<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"ACGT"<span style="color: #0000FF;">[<span style="color: #7060A8;">rand<span style="color: #0000FF;">(<span style="color: #000000;">4<span style="color: #0000FF;">)<span style="color: #0000FF;">]</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
constant cheats = iff(cheat?{9,13,49,60,64,68}:{})
<span style="color: #008080;">return</span> <span style="color: #000000;">dna</span>
for i=1 to length(cheats) do
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
dna[cheats[i]..cheats[i]+3] = test
end for
<span style="color: #008080;">procedure</span> <span style="color: #000000;">show<span style="color: #0000FF;">(<span style="color: #004080;">string</span> <span style="color: #000000;">dna<span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">idx<span style="color: #0000FF;">)</span>
sequence idx = match_all(test,dna)
<span style="color: #000000;">idx</span> <span style="color: #0000FF;">&=</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">)<span style="color: #0000FF;">+<span style="color: #000000;">100</span> <span style="color: #000080;font-style:italic;">-- (add an otherwise unused sentinel)</span>
show(dna,idx)</lang>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split<span style="color: #0000FF;">(<span style="color: #7060A8;">trim<span style="color: #0000FF;">(<span style="color: #7060A8;">join_by<span style="color: #0000FF;">(<span style="color: #7060A8;">split<span style="color: #0000FF;">(<span style="color: #7060A8;">join_by<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">,<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #000000;">10<span style="color: #0000FF;">,<span style="color: #008000;">""<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #008000;">"\n"<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #000000;">5<span style="color: #0000FF;">,<span style="color: #008000;">" "<span style="color: #0000FF;">)<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #008000;">"\n"<span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">ii</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1<span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">-- idx index</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">idx<span style="color: #0000FF;">[<span style="color: #000000;">ii<span style="color: #0000FF;">]<span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">-- current target</span>
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1<span style="color: #0000FF;">,</span> <span style="color: #000080;font-style:italic;">-- underline index (1..4)</span>
<span style="color: #000000;">ldx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span> <span style="color: #000080;font-style:italic;">-- line index (1, 51, 101, etc)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">si<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">s<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%3d: %s\n"<span style="color: #0000FF;">,<span style="color: #0000FF;">{<span style="color: #000000;">ldx<span style="color: #0000FF;">,<span style="color: #000000;">s<span style="color: #0000FF;">[<span style="color: #000000;">si<span style="color: #0000FF;">]<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span>
<span style="color: #000000;">ldx</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">50</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span> <span style="color: #008080;">and</span> <span style="color: #000000;">i<span style="color: #0000FF;"><<span style="color: #000000;">ldx</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">ul</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">repeat<span style="color: #0000FF;">(<span style="color: #008000;">' '<span style="color: #0000FF;">,<span style="color: #000000;">59<span style="color: #0000FF;">)</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">i</span> <span style="color: #008080;">and</span> <span style="color: #000000;">i<span style="color: #0000FF;"><<span style="color: #000000;">ldx</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">up</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i<span style="color: #0000FF;">-<span style="color: #000000;">ldx<span style="color: #0000FF;">+<span style="color: #000000;">51</span> <span style="color: #000080;font-style:italic;">-- underline pos (relative to ldx)</span>
<span style="color: #000000;">up</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">floor<span style="color: #0000FF;">(<span style="color: #0000FF;">(<span style="color: #000000;">up<span style="color: #0000FF;">-<span style="color: #000000;">1<span style="color: #0000FF;">)<span style="color: #0000FF;">/<span style="color: #000000;">10<span style="color: #0000FF;">)<span style="color: #0000FF;">+<span style="color: #000000;">5</span> <span style="color: #000080;font-style:italic;">-- (plus any needed spacing)</span>
<span style="color: #000000;">ul<span style="color: #0000FF;">[<span style="color: #000000;">up<span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"<==>"<span style="color: #0000FF;">[<span style="color: #000000;">ux<span style="color: #0000FF;">]</span>
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ux<span style="color: #0000FF;">><span style="color: #000000;">4</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">ux</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">ii</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
<span style="color: #000000;">i</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">idx<span style="color: #0000FF;">[<span style="color: #000000;">ii<span style="color: #0000FF;">]</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%s\n"<span style="color: #0000FF;">,<span style="color: #000000;">ul<span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">idx<span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">s</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff<span style="color: #0000FF;">(<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">idx<span style="color: #0000FF;">)<span style="color: #0000FF;">><span style="color: #000000;">1<span style="color: #0000FF;">?<span style="color: #008000;">"s"<span style="color: #0000FF;">:<span style="color: #008000;">""<span style="color: #0000FF;">)<span style="color: #0000FF;">,</span>
<span style="color: #000000;">t</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">join<span style="color: #0000FF;">(<span style="color: #7060A8;">apply<span style="color: #0000FF;">(<span style="color: #000000;">idx<span style="color: #0000FF;">,<span style="color: #7060A8;">sprint<span style="color: #0000FF;">)<span style="color: #0000FF;">,<span style="color: #008000;">", "<span style="color: #0000FF;">)</span>
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%s occurs at location%s: %s\n"<span style="color: #0000FF;">,<span style="color: #0000FF;">{<span style="color: #000000;">test<span style="color: #0000FF;">,<span style="color: #000000;">s<span style="color: #0000FF;">,<span style="color: #000000;">t<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span>
<span style="color: #008080;">else</span>
<span style="color: #7060A8;">printf<span style="color: #0000FF;">(<span style="color: #000000;">1<span style="color: #0000FF;">,<span style="color: #008000;">"%s does not occur\n"<span style="color: #0000FF;">,<span style="color: #0000FF;">{<span style="color: #000000;">test<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #008080;">function</span> <span style="color: #000000;">match_all<span style="color: #0000FF;">(<span style="color: #004080;">object</span> <span style="color: #000000;">needle<span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">haystack<span style="color: #0000FF;">,</span> <span style="color: #004080;">bool</span> <span style="color: #000000;">bOverlap</span> <span style="color: #0000FF;">=</span> <span style="color: #004600;">false<span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #004080;">atom<span style="color: #0000FF;">(<span style="color: #000000;">needle<span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span> <span style="color: #008080;">return</span> <span style="color: #7060A8;">find_all<span style="color: #0000FF;">(<span style="color: #000000;">needle<span style="color: #0000FF;">,<span style="color: #000000;">haystack<span style="color: #0000FF;">)</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{<span style="color: #0000FF;">}</span>
<span style="color: #008080;">while</span> <span style="color: #000000;">1</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">start</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match<span style="color: #0000FF;">(<span style="color: #000000;">needle<span style="color: #0000FF;">,<span style="color: #000000;">haystack<span style="color: #0000FF;">,<span style="color: #000000;">start<span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">start<span style="color: #0000FF;">=<span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">res</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">append<span style="color: #0000FF;">(<span style="color: #000000;">res<span style="color: #0000FF;">,<span style="color: #000000;">start<span style="color: #0000FF;">)</span>
<span style="color: #000000;">start</span> <span style="color: #0000FF;">+=</span> <span style="color: #008080;">iff<span style="color: #0000FF;">(<span style="color: #000000;">bOverlap<span style="color: #0000FF;">?<span style="color: #000000;">1<span style="color: #0000FF;">:<span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">needle<span style="color: #0000FF;">)<span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">res</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">dna</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grandna<span style="color: #0000FF;">(<span style="color: #000000;">200<span style="color: #0000FF;">)<span style="color: #0000FF;">,</span>
<span style="color: #000000;">test</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">grandna<span style="color: #0000FF;">(<span style="color: #000000;">4<span style="color: #0000FF;">)</span>
<span style="color: #008080;">constant</span> <span style="color: #000000;">cheats</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff<span style="color: #0000FF;">(<span style="color: #000000;">cheat<span style="color: #0000FF;">?<span style="color: #0000FF;">{<span style="color: #000000;">9<span style="color: #0000FF;">,<span style="color: #000000;">13<span style="color: #0000FF;">,<span style="color: #000000;">49<span style="color: #0000FF;">,<span style="color: #000000;">60<span style="color: #0000FF;">,<span style="color: #000000;">64<span style="color: #0000FF;">,<span style="color: #000000;">68<span style="color: #0000FF;">}<span style="color: #0000FF;">:<span style="color: #0000FF;">{<span style="color: #0000FF;">}<span style="color: #0000FF;">)</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i<span style="color: #0000FF;">=<span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length<span style="color: #0000FF;">(<span style="color: #000000;">cheats<span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #000000;">dna<span style="color: #0000FF;">[<span style="color: #000000;">cheats<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]<span style="color: #0000FF;">..<span style="color: #000000;">cheats<span style="color: #0000FF;">[<span style="color: #000000;">i<span style="color: #0000FF;">]<span style="color: #0000FF;">+<span style="color: #000000;">3<span style="color: #0000FF;">]</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">test</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">match_all<span style="color: #0000FF;">(<span style="color: #000000;">test<span style="color: #0000FF;">,<span style="color: #000000;">dna<span style="color: #0000FF;">)</span>
<span style="color: #000000;">show<span style="color: #0000FF;">(<span style="color: #000000;">dna<span style="color: #0000FF;">,<span style="color: #000000;">idx<span style="color: #0000FF;">)
<!--</lang>-->
{{out}}
with cheat enabled
7,796

edits