Bioinformatics/Sequence mutation: Difference between revisions

added Haskell
(Added Java solution)
(added Haskell)
Line 901:
======
</pre>
=={{header|Haskell}}==
<lang haskell>import Data.List (group, sort)
import Data.List.Split (chunksOf)
import System.Random (Random, randomR, random, newStdGen, randoms, getStdRandom)
import Text.Printf (PrintfArg(..), fmtChar, fmtPrecision, formatString, IsChar(..), printf)
 
data Mutation = Swap | Delete | Insert deriving (Show, Eq, Ord, Enum, Bounded)
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded)
type DNASequence = [DNABase]
 
instance Random DNABase where
randomR (a, b) g = case randomR (fromEnum a, fromEnum b) g of
(x, y) -> (toEnum x, y)
random = randomR (minBound, maxBound)
 
instance Random Mutation where
randomR (a, b) g = case randomR (fromEnum a, fromEnum b) g of
(x, y) -> (toEnum x, y)
random = randomR (minBound, maxBound)
 
instance PrintfArg DNABase where
formatArg x fmt = formatString (show x) (fmt { fmtChar = 's', fmtPrecision = Nothing })
 
instance PrintfArg Mutation where
formatArg x fmt = formatString (show x) (fmt { fmtChar = 's', fmtPrecision = Nothing })
 
instance IsChar DNABase where
toChar = head . show
fromChar c = read [c]
 
dropIndex :: Int -> [a] -> [a]
dropIndex i xs = take (pred i) xs <> drop i xs
 
insertIndex :: Int -> a -> [a] -> [a]
insertIndex i e xs = take i xs <> [e] <> drop i xs
 
swapIndex :: Int -> a -> [a] -> [a]
swapIndex i a xs = take (pred i) xs <> [a] <> drop i xs
 
chunkedDNASequence :: DNASequence -> [(Int, [DNABase])]
chunkedDNASequence = zip [50,100..] . chunksOf 50
 
baseCounts :: DNASequence -> [(DNABase, Int)]
baseCounts = fmap ((,) . head <*> length) . group . sort
 
newSequence :: Int -> IO DNASequence
newSequence n = take n . randoms <$> newStdGen
 
mutateSequence :: DNASequence -> IO ((Mutation, Int), DNASequence)
mutateSequence xs = randomMutation >>= (`mutate` xs)
 
mutate :: Mutation -> DNASequence -> IO ((Mutation, Int), DNASequence)
mutate m xs = do
i <- randomIndex (length xs)
case m of
Swap -> randomDNA >>= \d -> pure ((Swap, i), swapIndex i d xs)
Insert -> randomDNA >>= \d -> pure ((Insert, i), insertIndex i d xs)
Delete -> pure ((Delete, i), dropIndex i xs)
 
randomIndex :: Int -> IO Int
randomIndex max = getStdRandom (randomR (1, max))
 
randomDNA :: IO DNABase
randomDNA = head . randoms <$> newStdGen
 
randomMutation :: IO Mutation
randomMutation = head . randoms <$> newStdGen
 
mutations :: Int -> DNASequence -> IO DNASequence
mutations 0 s = pure s
mutations n s = do
(m, ms) <- mutateSequence s
uncurry (printf "%6s @ %d\n") m
mutations (pred n) ms
 
main :: IO ()
main = do
dnaseq <- newSequence 200
putStrLn "\nInitial Sequence:" >> showSequence dnaseq
putStrLn "\nBase Counts:" >> showBaseCounts dnaseq
printf "Total: %d\n\n" $ length dnaseq
ms <- mutations 10 dnaseq
putStrLn "\nMutated Sequence:" >> showSequence ms
putStrLn "\nBase Counts:" >> showBaseCounts ms
printf "Total: %d\n" $ length ms
where
showSequence = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence
showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts</lang>
{{out}}
<pre>Initial Sequence:
50: ACAGAGAGACCCACAATGGGGGGTCCGACATAGGCAGATACAGTAGACGA
100: AGTAGCGTTCTCACATTCGCCGTCTTCCACACGTTTGCCTCCCGGGTTGA
150: CCCCGTGTAATGGAACCCAAGCGAATGGCGGCGTAGGCAAACTTAACATG
200: GAATCGGTGGCATAAATGACGGTTCTCCGCCGACAGCGCATGGATTCTTG
 
Base Counts:
A: 51
C: 53
G: 56
T: 40
Total: 200
 
Swap @ 82
Insert @ 15
Swap @ 11
Swap @ 159
Swap @ 121
Swap @ 184
Swap @ 126
Delete @ 134
Swap @ 78
Insert @ 69
 
Mutated Sequence:
50: ACAGAGAGACGCACATATGGGGGGTCCGACATAGGCAGATACAGTAGACG
100: AAGTAGCGTTCTCACATTCGGCCGTCTTTCACATGTTTGCCTCCCGGGTT
150: GACCCCGTGTAATGGAACCCAAGCGATTGGCGGCTAGGCAAACTTAACAT
200: GGAATCGGGGGCATAAATGACGGTTCTCCGCCGCCAGCGCATGGATTCTT
250: G
 
Base Counts:
A: 49
C: 51
G: 58
T: 43
Total: 201
</pre>
=={{header|J}}==
<lang J>ACGT=: 'ACGT'
Anonymous user