Bioinformatics/Sequence mutation: Difference between revisions

Content added Content deleted
Line 910: Line 910:
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded)
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded)
type DNASequence = [DNABase]
type DNASequence = [DNABase]

data Result = Result Mutation Int [DNABase]


instance Random DNABase where
instance Random DNABase where
Line 938: Line 940:
newSequence n = take n . randoms <$> newStdGen
newSequence n = take n . randoms <$> newStdGen


mutateSequence :: DNASequence -> IO ((Mutation, Int), DNASequence)
mutateSequence :: DNASequence -> IO (Result, DNASequence)
mutateSequence [] = fail "empty sequence"
mutateSequence [] = fail "empty dna sequence"
mutateSequence ds = mutate ds =<< randomMutation
mutateSequence ds = mutate ds =<< randomMutation
where
where
Line 945: Line 947:
mutate xs m = do
mutate xs m = do
i <- randomIndex (length xs)
i <- randomIndex (length xs)
case m of
case m of
Swap -> randomDNA >>= \d -> pure ((Swap, i), swapElement i d xs)
Swap -> randomDNA >>= \d -> pure (Result Swap i [xs !! pred i, d], swapElement i d xs)
Insert -> randomDNA >>= \d -> pure ((Insert, i), insertElement i d xs)
Insert -> randomDNA >>= \d -> pure (Result Insert i [d], insertElement i d xs)
Delete -> pure ((Delete, i), dropElement i xs)
Delete -> pure (Result Delete i [xs !! pred i], dropElement i xs)
where
where
dropElement i xs = take (pred i) xs <> drop i xs
dropElement i xs = take (pred i) xs <> drop i xs
Line 959: Line 961:
mutate 0 s = pure s
mutate 0 s = pure s
mutate n s = do
mutate n s = do
(m, ms) <- mutateSequence s
(Result m i b, ms) <- mutateSequence s
case m of
uncurry (printf "%6s @ %d\n") m
Swap -> printf "%6s @ %-3d : %s -> %s \n" m i (head b) (last b)
_ -> printf "%6s @ %-3d : %s\n" m i (head b)
mutate (pred n) ms
mutate (pred n) ms


Line 968: Line 972:
putStrLn "\nInitial Sequence:" >> showSequence ds
putStrLn "\nInitial Sequence:" >> showSequence ds
putStrLn "\nBase Counts:" >> showBaseCounts ds
putStrLn "\nBase Counts:" >> showBaseCounts ds
showSumBaseCounts ds
printf "Total: %d\n\n" $ length ds
ms <- mutate 10 ds
ms <- mutate 10 ds
putStrLn "\nMutated Sequence:" >> showSequence ms
putStrLn "\nMutated Sequence:" >> showSequence ms
putStrLn "\nBase Counts:" >> showBaseCounts ms
putStrLn "\nBase Counts:" >> showBaseCounts ms
showSumBaseCounts ms
printf "Total: %d\n" $ length ms
where
where
showSequence = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence
showSequence = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence
showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts</lang>
showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts
showSumBaseCounts xs = putStrLn (replicate 6 '-') >> printf "Σ: %d\n\n" (length xs)</lang>
{{out}}
{{out}}
<pre>Initial Sequence:
<pre>Initial Sequence:
50: CCGGCGAACTGGTAGGTCTTTAATTATGCGGCCGCGATCGCGACACAGGT
50: ACAGAGAGACCCACAATGGGGGGTCCGACATAGGCAGATACAGTAGACGA
100: GCAGGAGGAAAATAGGCCCCCGTTCTGGGCAGCCTGATTGCACACTCCCG
100: AGTAGCGTTCTCACATTCGCCGTCTTCCACACGTTTGCCTCCCGGGTTGA
150: ATACCAGACGTGTGGCGGCTTTTTCGCAAGATCTTACCAAACATTAAGAT
150: CCCCGTGTAATGGAACCCAAGCGAATGGCGGCGTAGGCAAACTTAACATG
200: TCGAAATACCAACTGTCGAAAGCAGAACGTGAATGTACCACCCGGATGCG
200: GAATCGGTGGCATAAATGACGGTTCTCCGCCGACAGCGCATGGATTCTTG


Base Counts:
Base Counts:
A: 51
A: 53
C: 53
C: 53
G: 56
G: 53
T: 40
T: 41
------
Total: 200
Σ: 200


Swap @ 82
Insert @ 104 : C
Delete @ 133 : T
Insert @ 15
Swap @ 11
Insert @ 60 : A
Swap @ 159
Insert @ 42 : G
Swap @ 121
Swap @ 14 : A -> C
Swap @ 184
Insert @ 88 : A
Delete @ 9 : C
Swap @ 126
Swap @ 185 : A -> G
Delete @ 134
Swap @ 78
Insert @ 27 : G
Swap @ 102 : C -> T
Insert @ 69


Mutated Sequence:
Mutated Sequence:
50: CCGGCGAATGGTCGGTCTTTAATTATGGCGGCCGCGATCGCGGACACAGG
50: ACAGAGAGACGCACATATGGGGGGTCCGACATAGGCAGATACAGTAGACG
100: TGCAGGAGGAAAAATAGGCCCCCGTTCTGGGCAGCCTGAATTGCACACTC
100: AAGTAGCGTTCTCACATTCGGCCGTCTTTCACATGTTTGCCTCCCGGGTT
150: CTGATACCCAGACGTGTGGCGGCTTTTTCGCAAGACTTACCAAACATTAA
150: GACCCCGTGTAATGGAACCCAAGCGATTGGCGGCTAGGCAAACTTAACAT
200: GATTCGAAATACCAACTGTCGAAAGCAGAACGTGAGTGTACCACCCGGAT
200: GGAATCGGGGGCATAAATGACGGTTCTCCGCCGCCAGCGCATGGATTCTT
250: G
250: GCG


Base Counts:
Base Counts:
A: 49
A: 53
C: 51
C: 53
G: 58
G: 56
T: 43
T: 41
------
Total: 201
</pre>
Σ: 203</pre>


=={{header|J}}==
=={{header|J}}==