Bioinformatics/base count: Difference between revisions

added AWK
(added AWK)
Line 444:
450 : GACCGGGGAC TTGCATGATG GGAGCAGCTT TGTTAAACTA CGAACGTAAT
Total count => A: 129 T: 155 G: 119 C: 97</pre>
=={{header|AWK}}==
<lang AWK>
# syntax: GAWK -f BIOINFORMATICS_BASE_COUNT.AWK
# converted from FreeBASIC
#
# sorting:
# PROCINFO["sorted_in"] is used by GAWK
# SORTTYPE is used by Thompson Automation's TAWK
#
BEGIN {
dna = "CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATG" \
"CTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTG" \
"AGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGAT" \
"GGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT" \
"CGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGG" \
"TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA" \
"TTTAATTTTTCTATATAGCGATCTGTATTTAAGCAATTCATTTAGGTTAT" \
"CGCCGCGATGCTCGGTTCGGACCGCCAAGCATCTGGCTCCACTGCTAGTG" \
"TCCTAAATTTGAATGGCAAACACAAATAAGATTTAGCAATTCGTGTAGAC" \
"GACCGGGGACTTGCATGATGGGAGCAGCTTTGTTAAACTACGAACGTAAT"
curr = first = 1
while (curr <= length(dna)) {
curr_base = substr(dna,curr,1)
base_arr[curr_base]++
rec = sprintf("%s%s",rec,curr_base)
curr++
if (curr % 10 == 1) {
rec = sprintf("%s ",rec)
}
if (curr % 50 == 1) {
printf("%3d-%3d: %s\n",first,curr-1,rec)
rec = ""
first = curr
}
}
PROCINFO["sorted_in"] = "@ind_str_asc" ; SORTTYPE = 1
printf("\nBase count\n")
for (i in base_arr) {
printf("%s %8d\n",i,base_arr[i])
total += base_arr[i]
}
printf("%10d total\n",total)
exit(0)
}
</lang>
{{out}}
<pre>
1- 50: CGTAAAAAAT TACAACGTCC TTTGGCTATC TCTTAAACTC CTGCTAAATG
51-100: CTCGTGCTTT CCAATTATGT AAGCGTTCCG AGACGGGGTG GTCGATTCTG
101-150: AGGACAAAGG TCAAGATGGA GCGCATCGAA CGCAATAAGG ATCATTTGAT
151-200: GGGACGTTTC GTCGACAAAG TCTTGTTTCG AGAGTAACGG CTACCGTCTT
201-250: CGATTCTGCT TATAACACTA TGTTCTTATG AAATGGATGT TCTGAGTTGG
251-300: TCAGTCCCAA TGTGCGGGGT TTCTTTTAGT ACGTCGGGAG TGGTATTATA
301-350: TTTAATTTTT CTATATAGCG ATCTGTATTT AAGCAATTCA TTTAGGTTAT
351-400: CGCCGCGATG CTCGGTTCGG ACCGCCAAGC ATCTGGCTCC ACTGCTAGTG
401-450: TCCTAAATTT GAATGGCAAA CACAAATAAG ATTTAGCAAT TCGTGTAGAC
451-500: GACCGGGGAC TTGCATGATG GGAGCAGCTT TGTTAAACTA CGAACGTAAT
 
Base count
A 129
C 97
G 119
T 155
500 total
</pre>
 
=={{header|C}}==
477

edits