Bioinformatics/base count: Difference between revisions

added Haskell
(added Haskell)
Line 288:
======
</pre>
=={{header|Haskell}}==
<lang haskell>import Data.List (group, sort)
import Data.List.Split (chunksOf)
import Text.Printf (printf, IsChar(..), PrintfArg(..), fmtChar, fmtPrecision, formatString)
 
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord)
type DNASequence = [DNABase]
 
instance IsChar DNABase where
toChar = head . show
fromChar = read . pure
 
instance PrintfArg DNABase where
formatArg x fmt = formatString (show x) (fmt { fmtChar = 's', fmtPrecision = Nothing })
 
test :: DNASequence
test = read . pure <$> concat
[ "CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATG"
, "CTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTG"
, "AGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGAT"
, "GGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT"
, "CGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGG"
, "TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA"
, "TTTAATTTTTCTATATAGCGATCTGTATTTAAGCAATTCATTTAGGTTAT"
, "CGCCGCGATGCTCGGTTCGGACCGCCAAGCATCTGGCTCCACTGCTAGTG"
, "TCCTAAATTTGAATGGCAAACACAAATAAGATTTAGCAATTCGTGTAGAC"
, "GACCGGGGACTTGCATGATGGGAGCAGCTTTGTTAAACTACGAACGTAAT" ]
 
chunkedDNASequence :: DNASequence -> [(Int, [DNABase])]
chunkedDNASequence = zip [50,100..] . chunksOf 50
 
baseCounts :: DNASequence -> [(DNABase, Int)]
baseCounts = fmap ((,) . head <*> length) . group . sort
 
main :: IO ()
main = do
putStrLn "Sequence:"
mapM_ (uncurry (printf "%3d: %s\n")) $ chunkedDNASequence test
putStrLn "\nBase Counts:"
mapM_ (uncurry (printf "%2s: %2d\n")) $ baseCounts test
putStrLn (replicate 8 '-') >> printf " Σ: %d\n\n" (length test)</lang>
{{out}}
<pre>Sequence:
50: CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATG
100: CTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTG
150: AGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGAT
200: GGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT
250: CGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGG
300: TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA
350: TTTAATTTTTCTATATAGCGATCTGTATTTAAGCAATTCATTTAGGTTAT
400: CGCCGCGATGCTCGGTTCGGACCGCCAAGCATCTGGCTCCACTGCTAGTG
450: TCCTAAATTTGAATGGCAAACACAAATAAGATTTAGCAATTCGTGTAGAC
500: GACCGGGGACTTGCATGATGGGAGCAGCTTTGTTAAACTACGAACGTAAT
 
Base Counts:
A: 129
C: 97
G: 119
T: 155
--------
Σ: 500</pre>
=={{header|Julia}}==
<lang julia>const sequence =
Anonymous user