Bioinformatics/Sequence mutation: Difference between revisions

m
Line 910:
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded)
type DNASequence = [DNABase]
 
data Result = Result Mutation Int [DNABase]
 
instance Random DNABase where
Line 938 ⟶ 940:
newSequence n = take n . randoms <$> newStdGen
 
mutateSequence :: DNASequence -> IO ((Mutation, Int)Result, DNASequence)
mutateSequence [] = fail "empty dna sequence"
mutateSequence ds = mutate ds =<< randomMutation
where
Line 945 ⟶ 947:
mutate xs m = do
i <- randomIndex (length xs)
case m of
Swap -> randomDNA >>= \d -> pure ((Result Swap, i) [xs !! pred i, d], swapElement i d xs)
Insert -> randomDNA >>= \d -> pure ((Result Insert, i) [d], insertElement i d xs)
Delete -> pure ((Result Delete, i) [xs !! pred i], dropElement i xs)
where
dropElement i xs = take (pred i) xs <> drop i xs
Line 959 ⟶ 961:
mutate 0 s = pure s
mutate n s = do
(Result m i b, ms) <- mutateSequence s
case m of
uncurry (printf "%6s @ %d\n") m
Swap -> printf "%6s @ %-3d : %s -> %s \n" m i (head b) (last b)
_ -> printf "%6s @ %-3d : %s\n" m i (head b)
mutate (pred n) ms
 
Line 968 ⟶ 972:
putStrLn "\nInitial Sequence:" >> showSequence ds
putStrLn "\nBase Counts:" >> showBaseCounts ds
showSumBaseCounts ds
printf "Total: %d\n\n" $ length ds
ms <- mutate 10 ds
putStrLn "\nMutated Sequence:" >> showSequence ms
putStrLn "\nBase Counts:" >> showBaseCounts ms
showSumBaseCounts ms
printf "Total: %d\n" $ length ms
where
showSequence = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence
showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts</lang>
showSumBaseCounts xs = putStrLn (replicate 6 '-') >> printf "Σ: %d\n\n" (length xs)</lang>
{{out}}
<pre>Initial Sequence:
50: CCGGCGAACTGGTAGGTCTTTAATTATGCGGCCGCGATCGCGACACAGGT
50: ACAGAGAGACCCACAATGGGGGGTCCGACATAGGCAGATACAGTAGACGA
100: GCAGGAGGAAAATAGGCCCCCGTTCTGGGCAGCCTGATTGCACACTCCCG
100: AGTAGCGTTCTCACATTCGCCGTCTTCCACACGTTTGCCTCCCGGGTTGA
150: ATACCAGACGTGTGGCGGCTTTTTCGCAAGATCTTACCAAACATTAAGAT
150: CCCCGTGTAATGGAACCCAAGCGAATGGCGGCGTAGGCAAACTTAACATG
200: TCGAAATACCAACTGTCGAAAGCAGAACGTGAATGTACCACCCGGATGCG
200: GAATCGGTGGCATAAATGACGGTTCTCCGCCGACAGCGCATGGATTCTTG
 
Base Counts:
A: 5153
C: 53
G: 5653
T: 4041
------
TotalΣ: 200
 
Insert @ Swap104 @: 82C
Delete @ 134133 : T
Insert @ 15
Insert @ Swap60 @ 11: A
Insert @ Swap42 @ 159: G
Swap @ 12114 : A -> C
Insert @ Swap88 @ 184: A
Delete @ 9 : C
Swap @ 126185 : A -> G
Delete @ 134
Insert @ Swap27 @ 78: G
Swap @ 102 : C -> T
Insert @ 69
 
Mutated Sequence:
50: CCGGCGAATGGTCGGTCTTTAATTATGGCGGCCGCGATCGCGGACACAGG
50: ACAGAGAGACGCACATATGGGGGGTCCGACATAGGCAGATACAGTAGACG
100: TGCAGGAGGAAAAATAGGCCCCCGTTCTGGGCAGCCTGAATTGCACACTC
100: AAGTAGCGTTCTCACATTCGGCCGTCTTTCACATGTTTGCCTCCCGGGTT
150: CTGATACCCAGACGTGTGGCGGCTTTTTCGCAAGACTTACCAAACATTAA
150: GACCCCGTGTAATGGAACCCAAGCGATTGGCGGCTAGGCAAACTTAACAT
200: GATTCGAAATACCAACTGTCGAAAGCAGAACGTGAGTGTACCACCCGGAT
200: GGAATCGGGGGCATAAATGACGGTTCTCCGCCGCCAGCGCATGGATTCTT
250: GGCG
 
Base Counts:
A: 4953
C: 5153
G: 5856
T: 4341
------
Total: 201
Σ: 203</pre>
 
=={{header|J}}==
Anonymous user