Jump to content

Bioinformatics/Sequence mutation: Difference between revisions

julia example
(Added Perl example)
(julia example)
Line 472:
======
</pre>
 
=={{header|Julia}}==
<lang julia>dnabases = ['A', 'C', 'G', 'T']
randpos(seq) = rand(1:length*seq) # 1
mutateat(pos, seq) = (s = seq[:]; s[pos] = rand(dnabases); s) # 2-1
deleteat(pos, seq) = [seq[1:pos-1]; seq[pos+1:end]] # 2-2
randinsertat(pos, seq) = [seq[1:pos]; rand(dnabases); seq[pos+1:end]] # 2-3
 
function weightedmutation(seq, pos, weights=[1, 1, 1], verbose=true) # Extra credit
p, r = weights ./ sum(weights), rand()
f = (r <= p[1]) ? mutateat : (r < p[1] + p[2]) ? deleteat : randinsertat
verbose && print("Mutate by ", f == mutateat ? "swap" :
f == deleteat ? "delete" : "insert")
return f(pos, seq)
end
 
function weightedrandomsitemutation(seq, weights=[1, 1, 1], verbose=true)
position = rand(1:length(seq))
newseq = weightedmutation(seq, position, weights, verbose)
verbose && println(" at position $position")
return newseq
end
 
randdnasequence(n) = rand(dnabases, n) # 3
 
function dnasequenceprettyprint(seq, colsize=50) # 4
println(length(seq), "nt DNA sequence:\n")
rows = [seq[i:min(length(seq), i + colsize - 1)] for i in 1:colsize:length(seq)]
for (i, r) in enumerate(rows)
println(lpad(colsize * (i - 1), 5), " ", String(r))
end
bases = [[c, 0] for c in dnabases]
for c in seq, base in bases
if c == base[1]
base[2] += 1
end
end
println("\nNucleotide counts:\n")
for base in bases
println(lpad(base[1], 10), lpad(string(base[2]), 12))
end
println(lpad("Other", 10), lpad(string(length(seq) - sum(x[2] for x in bases)), 12))
println(" _________________\n", lpad("Total", 10), lpad(string(length(seq)), 12))
end
 
function testbioseq()
sequence = randdnasequence(500)
dnasequenceprettyprint(sequence)
for _ in 1:10 # 5
sequence = weightedrandomsitemutation(sequence)
end
println("\n Mutated:"); dnasequenceprettyprint(sequence) # 6
end
 
testbioseq()
</lang>{{out}}
<pre>
500nt DNA sequence:
 
0 TCACGTAACAGAGGTATAGTTGATCTTGAGCGGGCTGGCTCCCGGGTTTC
50 TAGCAAGAAAAGGGGAGGGAAGTGCGCCTGCTTTTGCCCCGGGCACCCCA
100 ATCGAAGACAGCTCCGGGGTCGCACATTTTTATGGCCACATAATGAGGGA
150 ATGCACGCATCACTCCTATCACTAACTGCGAACTCATGTGACTGTGCAAA
200 ACACCTTTAACACTGCGATGCCGTGGGGACGGGCCCCCCCAGCGGTATAG
250 CGCGCACACGCGACAGATGTTAACTCGAATGGTCGCGCCGGGGAGTGCAC
300 CCCTTGACATATACTCCAGATGCAATGCGCTATACTTTATGAACTTGCAT
350 AAGCTGCGCAGGGGGGATTGACTTATACTACATATTAACTACCGATATCG
400 ACGCAAATATTCGGGCGGTCTAAAGTGTGTCAGAACGGACATGCCGCCCA
450 GAATCACGGCTACTGAGGACAAATACGCATTCCCGGTGCTGCATTCATTC
 
Nucleotide counts:
 
A 127
C 133
G 131
T 109
Other 0
_________________
Total 500
Mutate by swap at position 253
Mutate by swap at position 448
Mutate by insert at position 379
Mutate by delete at position 311
Mutate by delete at position 335
Mutate by insert at position 132
Mutate by swap at position 191
Mutate by swap at position 481
Mutate by insert at position 189
Mutate by insert at position 423
 
Mutated:
502nt DNA sequence:
 
0 TCACGTAACAGAGGTATAGTTGATCTTGAGCGGGCTGGCTCCCGGGTTTC
50 TAGCAAGAAAAGGGGAGGGAAGTGCGCCTGCTTTTGCCCCGGGCACCCCA
100 ATCGAAGACAGCTCCGGGGTCGCACATTTTTACTGGCCACATAATGAGGG
150 AATGCACGCATCACTCCTATCACTAACTGCGAACTCATGATCACTGTGCA
200 AAACACCTTTAACACTGCGATGCCGTGGGGACGGGCCCCCCCAGCGGTAT
250 AGCGAGCACACGCGACAGATGTTAACTCGAATGGTCGCGCCGGGGAGTGC
300 ACCCCTTGACATTACTCCAGATGCAATGCGCTATACTTATGAACTTGCAT
350 AAGCTGCGCAGGGGGGATTGACTTATACTGACATATTAACTACCGATATC
400 GACGCAAATATTCGGGCGGTCTAGAAGTGTGTCAGAACGGACATGCCGCT
450 CAGAATCACGGCTACTGAGGACAAATACGCATTCCCGGTGCTGCATTCAT
500 TC
 
Nucleotide counts:
 
A 128
C 133
G 132
T 109
Other 0
_________________
Total 502
</pre>
 
 
=={{header|Perl}}==
4,108

edits

Cookies help us deliver our services. By using our services, you agree to our use of cookies.