Bioinformatics/Subsequence: Difference between revisions
Content added Content deleted
Line 71: | Line 71: | ||
289..293 |
289..293 |
||
312..316 |
312..316 |
||
</pre> |
|||
=={{header|Phix}}== |
|||
Note: match_all() is due to become a builtin in the next release, so the version below may or may not need renaming/deleting before it will run. |
|||
<lang Phix>constant cheat = false |
|||
function grandna(integer len) |
|||
string dna = repeat(' ',len) |
|||
for i=1 to len do dna[i] = "ACGT"[rand(4)] end for |
|||
return dna |
|||
end function |
|||
procedure show(string dna, sequence idx) |
|||
idx &= length(dna)+100 -- (add an otherwise unused sentinel) |
|||
sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n") |
|||
integer ii = 1, -- idx index |
|||
i = idx[ii], -- current target |
|||
ux = 1, -- underline index (1..4) |
|||
ldx = 1 -- line index (1, 51, 101, etc) |
|||
for si=1 to length(s) do |
|||
printf(1,"%3d: %s\n",{ldx,s[si]}) |
|||
ldx += 50 |
|||
if i and i<ldx then |
|||
string ul = repeat(' ',59) |
|||
while i and i<ldx do |
|||
integer up = i-ldx+51 -- underline pos (relative to ldx) |
|||
up += floor((up-1)/10)+5 -- (plus any needed spacing) |
|||
ul[up] = "<==>"[ux] |
|||
ux += 1 |
|||
i += 1 |
|||
if ux>4 then |
|||
ux = 1 |
|||
ii += 1 |
|||
i = idx[ii] |
|||
end if |
|||
end while |
|||
printf(1,"%s\n",ul) |
|||
end if |
|||
end for |
|||
if length(idx) then |
|||
string s = iff(length(idx)>1?"s":""), |
|||
t = join(apply(idx,sprint),", ") |
|||
printf(1,"%s occurs at location%s: %s\n",{test,s,t}) |
|||
else |
|||
printf(1,"%s does not occur\n",{test}) |
|||
end if |
|||
end procedure |
|||
function match_all(object needle, sequence haystack) |
|||
integer start = 1 |
|||
sequence res = {} |
|||
while 1 do |
|||
start = match(needle,haystack,start) |
|||
if start=0 then exit end if |
|||
res = append(res,start) |
|||
start += length(needle) |
|||
end while |
|||
return res |
|||
end function |
|||
string dna = grandna(200), |
|||
test = grandna(4) |
|||
constant cheats = iff(cheat?{9,13,49,60,64,68}:{}) |
|||
for i=1 to length(cheats) do |
|||
dna[cheats[i]..cheats[i]+3] = test |
|||
end for |
|||
sequence idx = match_all(test,dna) |
|||
show(dna,idx)</lang> |
|||
{{out}} |
|||
with cheat enabled |
|||
<pre> |
|||
1: GGAGATATCG ACCGACCGAA GTAAAGTCAA AGTCGTCCAA TCCACGGACG |
|||
<= =><==> <= |
|||
51: ACTTCAGCAC GACCGACCGA CCTATTTAAG AGACCACACT TAAGGAATCC |
|||
=> < ==><==><== > |
|||
101: ATGCGAAATA AAAATGGGCG AGTAGCCGTG GGGCGCTAAA GCACCCACCT |
|||
151: AGTTTTCGCC GAAGTACTAG ACCACCTTCG GATCGACAAA GCTTTCACCA |
|||
<==> |
|||
CGAC occurs at locations: 9, 13, 49, 60, 64, 68, 184 |
|||
</pre> |
|||
with cheat disabled |
|||
<pre> |
|||
1: TGATTTAAAC CGTGGTGCAA TTTATAAACA CTGCGATATG CCTCCTGATG |
|||
51: GCATGGTATT CGACACCAAG ACGCTGGTGG GCACACTGGC TTTCAGAATA |
|||
101: GGAGTCACAA TCCCTCTATG ATGTCCTCTA GCGGGTGTGT GTTCAGTGCC |
|||
151: AGCGCTTACT TCCGGCGTGG CCGACTCTTT TTAAAGCGTA TAGCTGGGGT |
|||
GCTA does not occur |
|||
</pre> |
</pre> |
||