Bioinformatics/Sequence mutation: Difference between revisions
Content added Content deleted
(Added Java solution) |
(added Haskell) |
||
Line 901: | Line 901: | ||
====== |
====== |
||
</pre> |
</pre> |
||
=={{header|Haskell}}== |
|||
<lang haskell>import Data.List (group, sort) |
|||
import Data.List.Split (chunksOf) |
|||
import System.Random (Random, randomR, random, newStdGen, randoms, getStdRandom) |
|||
import Text.Printf (PrintfArg(..), fmtChar, fmtPrecision, formatString, IsChar(..), printf) |
|||
data Mutation = Swap | Delete | Insert deriving (Show, Eq, Ord, Enum, Bounded) |
|||
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded) |
|||
type DNASequence = [DNABase] |
|||
instance Random DNABase where |
|||
randomR (a, b) g = case randomR (fromEnum a, fromEnum b) g of |
|||
(x, y) -> (toEnum x, y) |
|||
random = randomR (minBound, maxBound) |
|||
instance Random Mutation where |
|||
randomR (a, b) g = case randomR (fromEnum a, fromEnum b) g of |
|||
(x, y) -> (toEnum x, y) |
|||
random = randomR (minBound, maxBound) |
|||
instance PrintfArg DNABase where |
|||
formatArg x fmt = formatString (show x) (fmt { fmtChar = 's', fmtPrecision = Nothing }) |
|||
instance PrintfArg Mutation where |
|||
formatArg x fmt = formatString (show x) (fmt { fmtChar = 's', fmtPrecision = Nothing }) |
|||
instance IsChar DNABase where |
|||
toChar = head . show |
|||
fromChar c = read [c] |
|||
dropIndex :: Int -> [a] -> [a] |
|||
dropIndex i xs = take (pred i) xs <> drop i xs |
|||
insertIndex :: Int -> a -> [a] -> [a] |
|||
insertIndex i e xs = take i xs <> [e] <> drop i xs |
|||
swapIndex :: Int -> a -> [a] -> [a] |
|||
swapIndex i a xs = take (pred i) xs <> [a] <> drop i xs |
|||
chunkedDNASequence :: DNASequence -> [(Int, [DNABase])] |
|||
chunkedDNASequence = zip [50,100..] . chunksOf 50 |
|||
baseCounts :: DNASequence -> [(DNABase, Int)] |
|||
baseCounts = fmap ((,) . head <*> length) . group . sort |
|||
newSequence :: Int -> IO DNASequence |
|||
newSequence n = take n . randoms <$> newStdGen |
|||
mutateSequence :: DNASequence -> IO ((Mutation, Int), DNASequence) |
|||
mutateSequence xs = randomMutation >>= (`mutate` xs) |
|||
mutate :: Mutation -> DNASequence -> IO ((Mutation, Int), DNASequence) |
|||
mutate m xs = do |
|||
i <- randomIndex (length xs) |
|||
case m of |
|||
Swap -> randomDNA >>= \d -> pure ((Swap, i), swapIndex i d xs) |
|||
Insert -> randomDNA >>= \d -> pure ((Insert, i), insertIndex i d xs) |
|||
Delete -> pure ((Delete, i), dropIndex i xs) |
|||
randomIndex :: Int -> IO Int |
|||
randomIndex max = getStdRandom (randomR (1, max)) |
|||
randomDNA :: IO DNABase |
|||
randomDNA = head . randoms <$> newStdGen |
|||
randomMutation :: IO Mutation |
|||
randomMutation = head . randoms <$> newStdGen |
|||
mutations :: Int -> DNASequence -> IO DNASequence |
|||
mutations 0 s = pure s |
|||
mutations n s = do |
|||
(m, ms) <- mutateSequence s |
|||
uncurry (printf "%6s @ %d\n") m |
|||
mutations (pred n) ms |
|||
main :: IO () |
|||
main = do |
|||
dnaseq <- newSequence 200 |
|||
putStrLn "\nInitial Sequence:" >> showSequence dnaseq |
|||
putStrLn "\nBase Counts:" >> showBaseCounts dnaseq |
|||
printf "Total: %d\n\n" $ length dnaseq |
|||
ms <- mutations 10 dnaseq |
|||
putStrLn "\nMutated Sequence:" >> showSequence ms |
|||
putStrLn "\nBase Counts:" >> showBaseCounts ms |
|||
printf "Total: %d\n" $ length ms |
|||
where |
|||
showSequence = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence |
|||
showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts</lang> |
|||
{{out}} |
|||
<pre>Initial Sequence: |
|||
50: ACAGAGAGACCCACAATGGGGGGTCCGACATAGGCAGATACAGTAGACGA |
|||
100: AGTAGCGTTCTCACATTCGCCGTCTTCCACACGTTTGCCTCCCGGGTTGA |
|||
150: CCCCGTGTAATGGAACCCAAGCGAATGGCGGCGTAGGCAAACTTAACATG |
|||
200: GAATCGGTGGCATAAATGACGGTTCTCCGCCGACAGCGCATGGATTCTTG |
|||
Base Counts: |
|||
A: 51 |
|||
C: 53 |
|||
G: 56 |
|||
T: 40 |
|||
Total: 200 |
|||
Swap @ 82 |
|||
Insert @ 15 |
|||
Swap @ 11 |
|||
Swap @ 159 |
|||
Swap @ 121 |
|||
Swap @ 184 |
|||
Swap @ 126 |
|||
Delete @ 134 |
|||
Swap @ 78 |
|||
Insert @ 69 |
|||
Mutated Sequence: |
|||
50: ACAGAGAGACGCACATATGGGGGGTCCGACATAGGCAGATACAGTAGACG |
|||
100: AAGTAGCGTTCTCACATTCGGCCGTCTTTCACATGTTTGCCTCCCGGGTT |
|||
150: GACCCCGTGTAATGGAACCCAAGCGATTGGCGGCTAGGCAAACTTAACAT |
|||
200: GGAATCGGGGGCATAAATGACGGTTCTCCGCCGCCAGCGCATGGATTCTT |
|||
250: G |
|||
Base Counts: |
|||
A: 49 |
|||
C: 51 |
|||
G: 58 |
|||
T: 43 |
|||
Total: 201 |
|||
</pre> |
|||
=={{header|J}}== |
=={{header|J}}== |
||
<lang J>ACGT=: 'ACGT' |
<lang J>ACGT=: 'ACGT' |