Bioinformatics/Subsequence: Difference between revisions

Content added Content deleted
Line 71: Line 71:
289..293
289..293
312..316
312..316
</pre>

=={{header|Phix}}==
Note: match_all() is due to become a builtin in the next release, so the version below may or may not need renaming/deleting before it will run.
<lang Phix>constant cheat = false

function grandna(integer len)
string dna = repeat(' ',len)
for i=1 to len do dna[i] = "ACGT"[rand(4)] end for
return dna
end function

procedure show(string dna, sequence idx)
idx &= length(dna)+100 -- (add an otherwise unused sentinel)
sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n")
integer ii = 1, -- idx index
i = idx[ii], -- current target
ux = 1, -- underline index (1..4)
ldx = 1 -- line index (1, 51, 101, etc)
for si=1 to length(s) do
printf(1,"%3d: %s\n",{ldx,s[si]})
ldx += 50
if i and i<ldx then
string ul = repeat(' ',59)
while i and i<ldx do
integer up = i-ldx+51 -- underline pos (relative to ldx)
up += floor((up-1)/10)+5 -- (plus any needed spacing)
ul[up] = "<==>"[ux]
ux += 1
i += 1
if ux>4 then
ux = 1
ii += 1
i = idx[ii]
end if
end while
printf(1,"%s\n",ul)
end if
end for
if length(idx) then
string s = iff(length(idx)>1?"s":""),
t = join(apply(idx,sprint),", ")
printf(1,"%s occurs at location%s: %s\n",{test,s,t})
else
printf(1,"%s does not occur\n",{test})
end if
end procedure

function match_all(object needle, sequence haystack)
integer start = 1
sequence res = {}
while 1 do
start = match(needle,haystack,start)
if start=0 then exit end if
res = append(res,start)
start += length(needle)
end while
return res
end function
string dna = grandna(200),
test = grandna(4)
constant cheats = iff(cheat?{9,13,49,60,64,68}:{})
for i=1 to length(cheats) do
dna[cheats[i]..cheats[i]+3] = test
end for
sequence idx = match_all(test,dna)
show(dna,idx)</lang>
{{out}}
with cheat enabled
<pre>
1: GGAGATATCG ACCGACCGAA GTAAAGTCAA AGTCGTCCAA TCCACGGACG
<= =><==> <=
51: ACTTCAGCAC GACCGACCGA CCTATTTAAG AGACCACACT TAAGGAATCC
=> < ==><==><== >
101: ATGCGAAATA AAAATGGGCG AGTAGCCGTG GGGCGCTAAA GCACCCACCT
151: AGTTTTCGCC GAAGTACTAG ACCACCTTCG GATCGACAAA GCTTTCACCA
<==>
CGAC occurs at locations: 9, 13, 49, 60, 64, 68, 184
</pre>
with cheat disabled
<pre>
1: TGATTTAAAC CGTGGTGCAA TTTATAAACA CTGCGATATG CCTCCTGATG
51: GCATGGTATT CGACACCAAG ACGCTGGTGG GCACACTGGC TTTCAGAATA
101: GGAGTCACAA TCCCTCTATG ATGTCCTCTA GCGGGTGTGT GTTCAGTGCC
151: AGCGCTTACT TCCGGCGTGG CCGACTCTTT TTAAAGCGTA TAGCTGGGGT
GCTA does not occur
</pre>
</pre>