Anonymous user
Bioinformatics/Subsequence: Difference between revisions
→{{header|REXX}}: added the computer programming language REXX.
m (fixed stub.) |
(→{{header|REXX}}: added the computer programming language REXX.) |
||
Line 48:
=={{header|REXX}}==
This REXX version allows the user to specify:
:* length of the (random) DNA data sequence (default is 200).
:* length of the (random) DNA sequence (default is four).
:* DNA proteins to be used in the sequence (default is '''ACGT''').
:* width of the output lines of (random DNA) (default is 100).
:* often (if ever) to add a blank to the output (default is every 10 proteins).
:* DNA proteins to be searched in the data (the default is four unique random proteins).
:* the seed for the RANDOM function so runs can be repeated with the same data (no default).
<lang rexx>/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
if totLen=='' | totLen=="," then totLen= 200 /*Not specified? Then use the default.*/
if rndLen=='' | rndLen=="," then rndLen= 4 /* " " " " " " */
if basePr=='' | basePr=="," then basePr= 'acgt' /* " " " " " " */
if oWidth=='' | oWidth=="," then oWidth= 100 /* " " " " " " */
if Bevery=='' | Bevery=="," then Bevery= 10 /* " " " " " " */
if rndDNA=='' | rndDNA=="," then rndDNA= copies(., rndLen) /*what we're looking for*/
if datatype(seed, 'W') then call random ,,seed /*used to generate repeatable random #s*/
call genRnd /*gen data field of random proteins. */
say " index │"center(' DNA sequence', oWidth + 10) /*oWidth: output line width.*/
say "───────┼"center('' , oWidth + 10, '─')
idx= 1 /*assign the index number of 1st line. */
$=; $$= /*gen data field of random proteins. */
do j=1 for totLen; c= substr( basePr, random(1, lenB), 1)
$$= $$ || c /*append a random protein.*/
if Bevery\==0 then if j//Bevery==0 then $= $' ' /*maybe add a blank. */
if length( space($ || c, 0) )<oWidth then do; $= $ || c; iterate /*width OK*/
end
say strip( right(idx, 7)'│' $, 'T'); $= /*display line──► terminal*/
idx= idx + oWidth /*bump the index number. */
end /*j*/
if $\=='' then say right(idx, 7)"│" strip($, 'T') /*display residual protein data.*/
say
say ' base DNA proteins used: ' basePr
say 'random DNA proteins used: ' dna?
@=; p=0 /*@: list of all found random proteins*/
do until p==0; p= pos(dna?, $$, p+1) /*find all overlapped positions in data*/
if p>0 then @= @ p /*Found one? Append it to the "Found"s*/
end /*p*/
say
if @=='' then do; say "the random DNA proteins weren't found."; exit 4; end
say 'the random DNA proteins were found in positions:' strip(@)
exit 0 /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
genRnd: dna?=; use= basePr; upper use basePr rndDNA; lenB= length(basePr)
do k=1 for rndLEN; x= substr(rndDNA, k, 1)
if x==. then do; ?= random(1, length(use) ); x= substr(use, ?, 1)
use= delstr(use, ?, 1) /*elide so no protein repeats*/
end
dna?= dna? || x /*build a random protein seq.*/
end /*k*/
return</lang>
{{out|output|text= when using the default inputs:}}
<pre>
index │ DNA sequence
───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────
1│ TTTTTAGCG CGTTTTGTAG CGCTCTAAAA ACCGTAGCTA TATTTCTCGA AGTTTCACCC AGCTCTTTTG CCCCAGGGTT GCGCTAAGCC CAGCTTCGAG
101│ GGGGCACAG GTAAAATACT ACCGTCCGTG GAGGGGGATG AATTGACCCG ACATTTTTTG AAGCATAACT CGTGACTCAA TATTGCATGA TTACACCAGC
base DNA proteins used: ACGT
random DNA proteins used: GCAT
the random DNA proteins were found in positions: 162 184
</pre>
{{out|output|text= when using the inputs of: <tt> 1000 , , , , tttt </tt>}}
<pre>
index │ DNA sequence
───────┼──────────────────────────────────────────────────────────────────────────────────────────────────────────────
1│ GTGATTTTT AGCGCGTTTT GTAGCGCTCT AAAAACCGTA GCTATATTTC TCGAAGTTTC ACCCAGCTCT TTTGCCCCAG GGTTGCGCTA AGCCCAGCTT
101│ GAGGGGGGC ACAGGTAAAA TACTACCGTC CGTGGAGGGG GATGAATTGA CCCGACATTT TTTGAAGCAT AACTCGTGAC TCAATATTGC ATGATTACAC
201│ AGCTAGGTT AGTGTAAAAA CCCCCCTATC TTCCTGATCA ATGGCGAGTA AAACATGCAA CCAATTTGTG AGCGAGTACT GGAAATTATT GTTTACGGGA
301│ AGGCACATG CTACGCGCAA CAGATATCTT AGACTGACCC TTTTAGAGTC ATAAGCCCCT GTCGCCTACA TGCTACTAAT ACTCCAACTA GCGGCGCACC
401│ TCAACCGGA TCATGGCGCC AGGGAAAATG TGGCGTAGCG ACGTGCTCAT CGCTCGCCGG GGAGAGCCTT TCAGAATCTC GAATAAAACC TGGTAATGAC
501│ TCATCAATC GTAATGGTCG TCTGGGGCAA GAAGCCGATA TTATAGACTC AGGTCAGACG TGTGCACAAC GGCAGAATTT ATAGTAATTC GCGTGAACTA
601│ GTTTCGGGA TAGGCCTACG ACCAATCATA GGACATTCGA TGCACGGTGT AGAAACAGTT CTCTGATGTT ACTCGGGATA ACACTCGCAA TCCCCTAGGA
701│ ACCGTGAGC GTCGCTAGTA TCTGAGATAG TCGCGACTGC CCAGCGGTCT TTAAGTTCGC ACACTACGGG ACTCCTAGTT CGCCCATTCA TGGCTATTTT
801│ CCTATCAGT CCAATCCCAC GGGGAGGGCA CTCGCGCAAT TCATTCAAAG AGGGCCATTT GCCGATATAA GGTCCATCAT CGGGAGGAAT ATGACTCCTG
901│ TTAGTATTA GAGCAGCCTC GCTGCGTACT ACTGTCAGTG GCCCGTCAGG GAAGGCAAAA CGTTTTTCCT CTAGGAATCC GTCAATTGGA CTTCTAGACT
base DNA proteins used: ACGT
random DNA proteins used: TTTT
the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963
</pre>
|