Bioinformatics/Sequence mutation: Difference between revisions

Content added Content deleted
m (Replaced the ugly "ord(Base.other) - 1" by "MaxBaseVal".)
Line 1,296: Line 1,296:
Base counts:
Base counts:
A: 71, C: 62, G: 58, T: 61, Total: 252
A: 71, C: 62, G: 58, T: 61, Total: 252
</pre>


=={{header|JavaScript}}==
<lang javascript>// Basic set-up
const numBases = 250
const numMutations = 30
const bases = ['A', 'C', 'G', 'T'];

// Utility functions
const copy = arr => [...arr];

const randTo = max => () => (Math.random() * max) | 0;

const randBasePos = randTo(bases.length);

const randBase = () => bases[randBasePos()];

const leftPadN = n => v => {
const arr = [...('' + v)];
const short = n - arr.length;
return short ? [...(Array(short).fill(' ')), ...arr].join('') : arr.join('');
}

const filterCount = arr => s => arr.filter(e => e === s).length;

// Pretty Print functions
const prettyPrint = seq => {
let idx = 0;
const pad = leftPadN(4);
const print = (v, s) => console.log(`${pad(v)}:\t${s}`)
const rem = seq.reduce((p,c) => {
const s = p + c;
if (s.length === 50) {
print(idx, s);
idx = idx + 50;
return '';
}
return s;
}, '');
if (rem !== '') {
print(idx, rem);
}
}

const printBases = seq => {
const filterSeq = filterCount(seq);
const pad = leftPadN(4);
const print = (v, n) => console.log(`${pad(v)}:\t${n}`)
let tot = 0;
bases.forEach(e => {
const cnt = filterSeq(e);
print(e, cnt);
tot = tot + cnt;
})
print('Σ', tot);
}

const select = seq => {
const randPos = randTo(seq.length)();
const extBase = seq[randPos];
const newBase = randBase();
return [randPos, extBase, newBase];
}

// Mutations definitions
const swap = ([hist, seq]) => {
const arr = copy(seq);
const [randPos, extBase, newBase] = select(arr);
arr.splice(randPos, 1, newBase);
const opp = `Swapped ${extBase} for ${newBase} at ${randPos}`;
return [[...hist, opp], arr];
};

const del = ([hist, seq]) => {
const arr = copy(seq);
const [randPos, extBase, _] = select(arr);
const opp = `Deleted ${extBase} at ${randPos}`;
arr.splice(randPos, 1);
return [[...hist, opp], arr];
}

const insert = ([hist, seq]) => {
const arr = copy(seq);
const [randPos, _, newBase] = select(arr);
const opp = `Inserted ${newBase} at ${randPos}`;
arr.splice(randPos, 0, newBase);
return [[...hist, opp], arr];
}

// Create the starting sequence
const seq = Array(numBases).fill(undefined).map(randBase);

// Create a set of mutations
const operations = [swap, del, insert];
const randMutate = randTo(operations.length);
const randomMutation = () => operations[randMutate()];
const mutations = Array(numMutations).fill(undefined).map(randomMutation);

// Mutate the sequence
const [hist, mut] = mutations.reduce((p,c,i) => c(p), [[], seq]);

console.log('ORIGINAL SEQUENCE:')
prettyPrint(seq);

console.log('\nBASE COUNTS:')
printBases(seq);

console.log('\nMUTATION LOG:')
hist.forEach((e,i) => console.log(`${i}:\t${e}`));

console.log('\nMUTATED SEQUENCE:')
prettyPrint(mut);

console.log('\nMUTATED BASE COUNTS:')
printBases(mut);
</lang>
{{out}}
<pre>
ORIGINAL SEQUENCE:
0: CATTGTGAACCGGAATTATAGTTATACCGAAGATGCCAGAGTCCGGGGGA
50: GCGGATGCAGGGTATGTTCCTCAGAACCCCGTCATGCACGGCCTATACGG
100: AAAGGAAGGGCCTGCGCAGACTTAAGACAAGGAGGACTGTATGAAAGGGA
150: TGGCTTGTGTGGGAAGAGCACCGCGCGCGAAGCCTGTAATCGACTACAGC
200: AAATAATTATGTTGATACGGCAAATGGTCATAATCGCCCCCACCCGCGGT

BASE COUNTS:
A: 70
C: 57
G: 75
T: 48
Σ: 250

MUTATION LOG:
0: Swapped G for T at 53
1: Swapped G for A at 174
2: Inserted A at 105
3: Inserted G at 136
4: Swapped G for C at 183
5: Swapped T for A at 188
6: Deleted C at 240
7: Inserted G at 62
8: Deleted A at 166
9: Deleted G at 215
10: Inserted A at 130
11: Inserted G at 174
12: Deleted G at 162
13: Swapped A for A at 1
14: Swapped G for G at 117
15: Deleted C at 140
16: Inserted C at 217
17: Inserted C at 220
18: Inserted A at 150
19: Swapped T for T at 206
20: Inserted G at 153
21: Deleted T at 33
22: Deleted A at 190
23: Deleted A at 120
24: Deleted A at 141
25: Deleted T at 206
26: Inserted C at 216
27: Deleted G at 110
28: Inserted G at 230
29: Deleted G at 43

MUTATED SEQUENCE:
0: CATTGTGAACCGGAATTATAGTTATACCGAAGAGCCAGAGTCCGGGGAGC
50: GTATGCAGGGGTATGTTCCTCAGAACCCCGTCATGCACGGCCTATACGGA
100: AAGGAAAGGCCTGCGCAGCTTAAGACAAAGGAGGGATGTTGAAAGAGGGA
150: TGGCTTGTTGGGAGAGCACGCGCACGCGAACCCTGAATCGACTACAGCAA
200: ATAATATGTTATCACCCGGCAAATGGTCAGTAATCGCCCCACCCGCGGT

MUTATED BASE COUNTS:
A: 71
C: 59
G: 73
T: 46
Σ: 249
</pre>
</pre>