Bioinformatics/Sequence mutation: Difference between revisions
Content added Content deleted
Line 910: | Line 910: | ||
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded) |
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded) |
||
type DNASequence = [DNABase] |
type DNASequence = [DNABase] |
||
data Result = Result Mutation Int [DNABase] |
|||
instance Random DNABase where |
instance Random DNABase where |
||
Line 938: | Line 940: | ||
newSequence n = take n . randoms <$> newStdGen |
newSequence n = take n . randoms <$> newStdGen |
||
mutateSequence :: DNASequence -> IO ( |
mutateSequence :: DNASequence -> IO (Result, DNASequence) |
||
mutateSequence [] = fail "empty sequence" |
mutateSequence [] = fail "empty dna sequence" |
||
mutateSequence ds = mutate ds =<< randomMutation |
mutateSequence ds = mutate ds =<< randomMutation |
||
where |
where |
||
Line 945: | Line 947: | ||
mutate xs m = do |
mutate xs m = do |
||
i <- randomIndex (length xs) |
i <- randomIndex (length xs) |
||
case m of |
case m of |
||
Swap -> randomDNA >>= \d -> pure ( |
Swap -> randomDNA >>= \d -> pure (Result Swap i [xs !! pred i, d], swapElement i d xs) |
||
Insert -> randomDNA >>= \d -> pure ( |
Insert -> randomDNA >>= \d -> pure (Result Insert i [d], insertElement i d xs) |
||
Delete -> pure ( |
Delete -> pure (Result Delete i [xs !! pred i], dropElement i xs) |
||
where |
where |
||
dropElement i xs = take (pred i) xs <> drop i xs |
dropElement i xs = take (pred i) xs <> drop i xs |
||
Line 959: | Line 961: | ||
mutate 0 s = pure s |
mutate 0 s = pure s |
||
mutate n s = do |
mutate n s = do |
||
(m, ms) <- mutateSequence s |
(Result m i b, ms) <- mutateSequence s |
||
case m of |
|||
uncurry (printf "%6s @ %d\n") m |
|||
Swap -> printf "%6s @ %-3d : %s -> %s \n" m i (head b) (last b) |
|||
_ -> printf "%6s @ %-3d : %s\n" m i (head b) |
|||
mutate (pred n) ms |
mutate (pred n) ms |
||
Line 968: | Line 972: | ||
putStrLn "\nInitial Sequence:" >> showSequence ds |
putStrLn "\nInitial Sequence:" >> showSequence ds |
||
putStrLn "\nBase Counts:" >> showBaseCounts ds |
putStrLn "\nBase Counts:" >> showBaseCounts ds |
||
showSumBaseCounts ds |
|||
printf "Total: %d\n\n" $ length ds |
|||
ms <- mutate 10 ds |
ms <- mutate 10 ds |
||
putStrLn "\nMutated Sequence:" >> showSequence ms |
putStrLn "\nMutated Sequence:" >> showSequence ms |
||
putStrLn "\nBase Counts:" >> showBaseCounts ms |
putStrLn "\nBase Counts:" >> showBaseCounts ms |
||
showSumBaseCounts ms |
|||
printf "Total: %d\n" $ length ms |
|||
where |
where |
||
showSequence = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence |
showSequence = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence |
||
showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts |
showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts |
||
showSumBaseCounts xs = putStrLn (replicate 6 '-') >> printf "Σ: %d\n\n" (length xs)</lang> |
|||
{{out}} |
{{out}} |
||
<pre>Initial Sequence: |
<pre>Initial Sequence: |
||
50: CCGGCGAACTGGTAGGTCTTTAATTATGCGGCCGCGATCGCGACACAGGT |
|||
50: ACAGAGAGACCCACAATGGGGGGTCCGACATAGGCAGATACAGTAGACGA |
|||
100: GCAGGAGGAAAATAGGCCCCCGTTCTGGGCAGCCTGATTGCACACTCCCG |
|||
100: AGTAGCGTTCTCACATTCGCCGTCTTCCACACGTTTGCCTCCCGGGTTGA |
|||
150: ATACCAGACGTGTGGCGGCTTTTTCGCAAGATCTTACCAAACATTAAGAT |
|||
150: CCCCGTGTAATGGAACCCAAGCGAATGGCGGCGTAGGCAAACTTAACATG |
|||
200: TCGAAATACCAACTGTCGAAAGCAGAACGTGAATGTACCACCCGGATGCG |
|||
200: GAATCGGTGGCATAAATGACGGTTCTCCGCCGACAGCGCATGGATTCTTG |
|||
Base Counts: |
Base Counts: |
||
A: |
A: 53 |
||
C: 53 |
C: 53 |
||
G: |
G: 53 |
||
T: |
T: 41 |
||
------ |
|||
Σ: 200 |
|||
Insert @ 104 : C |
|||
⚫ | |||
Insert @ 15 |
|||
Insert @ 60 : A |
|||
Insert @ 42 : G |
|||
Swap @ |
Swap @ 14 : A -> C |
||
Insert @ 88 : A |
|||
Delete @ 9 : C |
|||
Swap @ |
Swap @ 185 : A -> G |
||
⚫ | |||
Insert @ 27 : G |
|||
Swap @ 102 : C -> T |
|||
Insert @ 69 |
|||
Mutated Sequence: |
Mutated Sequence: |
||
50: CCGGCGAATGGTCGGTCTTTAATTATGGCGGCCGCGATCGCGGACACAGG |
|||
50: ACAGAGAGACGCACATATGGGGGGTCCGACATAGGCAGATACAGTAGACG |
|||
100: TGCAGGAGGAAAAATAGGCCCCCGTTCTGGGCAGCCTGAATTGCACACTC |
|||
100: AAGTAGCGTTCTCACATTCGGCCGTCTTTCACATGTTTGCCTCCCGGGTT |
|||
150: CTGATACCCAGACGTGTGGCGGCTTTTTCGCAAGACTTACCAAACATTAA |
|||
150: GACCCCGTGTAATGGAACCCAAGCGATTGGCGGCTAGGCAAACTTAACAT |
|||
200: GATTCGAAATACCAACTGTCGAAAGCAGAACGTGAGTGTACCACCCGGAT |
|||
200: GGAATCGGGGGCATAAATGACGGTTCTCCGCCGCCAGCGCATGGATTCTT |
|||
250: |
250: GCG |
||
Base Counts: |
Base Counts: |
||
A: |
A: 53 |
||
C: |
C: 53 |
||
G: |
G: 56 |
||
T: |
T: 41 |
||
------ |
|||
Total: 201 |
|||
</pre> |
Σ: 203</pre> |
||
=={{header|J}}== |
=={{header|J}}== |