Bioinformatics/Sequence mutation: Difference between revisions

no edit summary
(Added Arturo implementation)
No edit summary
Line 2,791:
Base Counts: 191 : A(49) C(45) G(57) T(40)
</pre>
 
=={{header|Rust}}==
<lang Rust>
use rand::prelude::*;
use std::collections::HashMap;
use std::fmt::{Display, Formatter, Error};
 
pub struct Seq<'a> {
alphabet: Vec<&'a str>,
distr: rand::distributions::Uniform<usize>,
pos_distr: rand::distributions::Uniform<usize>,
seq: Vec<&'a str>,
}
 
impl Display for Seq<'_> {
fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
 
let pretty: String = self.seq
.iter()
.enumerate()
.map(|(i, nt)| if (i + 1) % 60 == 0 { format!("{}\n", nt) } else { nt.to_string() })
.collect();
let counts_hm = self.seq
.iter()
.fold(HashMap::<&str, usize>::new(), |mut m, nt| {
*m.entry(nt).or_default() += 1;
m
});
 
let mut counts_vec: Vec<(&str, usize)> = counts_hm.into_iter().collect();
counts_vec.sort_by(|a, b| a.0.cmp(&b.0));
let counts_string = counts_vec
.iter()
.fold(String::new(), |mut counts_string, (nt, count)| {
counts_string += &format!("{} = {}\n", nt, count);
counts_string
});
 
write!(f, "Seq:\n{}\n\nLength: {}\n\nCounts:\n{}", pretty, self.seq.len(), counts_string)
}
}
 
impl Seq<'_> {
pub fn new(alphabet: Vec<&str>, len: usize) -> Seq {
let distr = rand::distributions::Uniform::new_inclusive(0, alphabet.len() - 1);
let pos_distr = rand::distributions::Uniform::new_inclusive(0, len - 1);
 
let seq: Vec<&str> = (0..len)
.map(|_| {
alphabet[thread_rng().sample(distr)]
})
.collect();
Seq { alphabet, distr, pos_distr, seq }
}
 
pub fn insert(&mut self) {
let pos = thread_rng().sample(self.pos_distr);
let nt = self.alphabet[thread_rng().sample(self.distr)];
println!("Inserting {} at position {}", nt, pos);
self.seq.insert(pos, nt);
}
 
pub fn delete(&mut self) {
let pos = thread_rng().sample(self.pos_distr);
println!("Deleting {} at position {}", self.seq[pos], pos);
self.seq.remove(pos);
}
 
pub fn swap(&mut self) {
let pos = thread_rng().sample(self.pos_distr);
let cur_nt = self.seq[pos];
let new_nt = self.alphabet[thread_rng().sample(self.distr)];
println!("Replacing {} at position {} with {}", cur_nt, pos, new_nt);
self.seq[pos] = new_nt;
}
}
 
fn main() {
 
let mut seq = Seq::new(vec!["A", "C", "T", "G"], 200);
println!("Initial sequnce:\n{}", seq);
 
let mut_distr = rand::distributions::Uniform::new_inclusive(0, 2);
 
for _ in 0..10 {
let mutation = thread_rng().sample(mut_distr);
 
if mutation == 0 {
seq.insert()
} else if mutation == 1 {
seq.delete()
} else {
seq.swap()
}
}
 
println!("\nMutated sequence:\n{}", seq);
}
</lang>
{{out}}
<pre>
Initial sequnce:
Seq:
TAAGTTTAGTCTGTTTACGAGATCTAGAGGAGGACACCGTGTAGAGGGGATTTGTCAGGA
CACATGCATGGCACCCTAGTCAAATAGTGCCGAGAACAGGCTCTCCTGAGAAAGTTAGGT
CTGCCGAAGTGACGAAGTGCACGTTATAGCTCTATTAAGTATGTTCGTTAACAGGTATTA
ATGCTCTTAGCCAAGACCGT
 
Length: 200
 
Counts:
A = 56
C = 38
G = 53
T = 53
 
Deleting C at position 197
Inserting T at position 157
Replacing C at position 149 with G
Replacing A at position 171 with G
Replacing T at position 182 with G
Deleting C at position 124
Inserting T at position 128
Replacing G at position 175 with C
Deleting A at position 35
Replacing A at position 193 with G
 
Mutated sequence:
Seq:
TAAGTTTAGTCTGTTTACGAGATCTAGAGGAGGACCCGTGTAGAGGGGATTTGTCAGGAC
ACATGCATGGCACCCTAGTCAAATAGTGCCGAGAACAGGCTCTCCTGAGAAAGTTAGGTC
TGCGAAGTTGACGAAGTGCACGTTATAGGTCTATTATAGTATGTTCGTTAGCAGCTATTA
AGGCTCTTAGCCAGGACGT
 
Length: 199
 
Counts:
A = 53
C = 36
G = 56
T = 54</pre>