Jump to content

Bioinformatics/Sequence mutation: Difference between revisions

Added C implementation
(→‎{{header|zkl}}: added code)
(Added C implementation)
Line 15:
* Give more information on the individual mutations applied.
* Allow mutations to be weighted and/or chosen.
 
=={{header|C}}==
Adenine ( A ) is always swapped for Thymine ( T ) and vice versa. Similarly with Cytosine ( C ) and Guanine ( G ).
<lang C>
#include<stdlib.h>
#include<stdio.h>
#include<time.h>
 
typedef struct genome{
char base;
struct genome *next;
}genome;
 
typedef struct{
char mutation;
int position;
}genomeChange;
 
typedef struct{
int adenineCount,thymineCount,cytosineCount,guanineCount;
}baseCounts;
 
genome *strand;
baseCounts baseData;
int genomeLength = 100, lineLength = 50;
 
int numDigits(int num){
int len = 1;
 
while(num>10){
num /= 10;
len++;
}
return len;
}
 
void generateStrand(){
 
int baseChoice = rand()%4, i;
genome *strandIterator, *newStrand;
 
baseData.adenineCount = 0;
baseData.thymineCount = 0;
baseData.cytosineCount = 0;
baseData.guanineCount = 0;
strand = (genome*)malloc(sizeof(genome));
strand->base = baseChoice==0?'A':(baseChoice==1?'T':(baseChoice==2?'C':'G'));
baseChoice==0?baseData.adenineCount++:(baseChoice==1?baseData.thymineCount++:(baseChoice==2?baseData.cytosineCount++:baseData.guanineCount++));
strand->next = NULL;
 
strandIterator = strand;
 
for(i=1;i<genomeLength;i++){
baseChoice = rand()%4;
 
newStrand = (genome*)malloc(sizeof(genome));
newStrand->base = baseChoice==0?'A':(baseChoice==1?'T':(baseChoice==2?'C':'G'));
baseChoice==0?baseData.adenineCount++:(baseChoice==1?baseData.thymineCount++:(baseChoice==2?baseData.cytosineCount++:baseData.guanineCount++));
newStrand->next = NULL;
 
strandIterator->next = newStrand;
strandIterator = newStrand;
}
}
 
genomeChange generateMutation(int swapWeight, int insertionWeight, int deletionWeight){
int mutationChoice = rand()%(swapWeight + insertionWeight + deletionWeight);
genomeChange mutationCommand;
 
mutationCommand.mutation = mutationChoice<swapWeight?'S':((mutationChoice>=swapWeight && mutationChoice<swapWeight+insertionWeight)?'I':'D');
mutationCommand.position = rand()%genomeLength;
 
return mutationCommand;
}
 
void printGenome(){
int rows, width = numDigits(genomeLength), len = 0,i,j;
lineLength = (genomeLength<lineLength)?genomeLength:lineLength;
rows = genomeLength/lineLength + (genomeLength%lineLength!=0);
genome* strandIterator = strand;
 
printf("\n\nGenome : \n--------\n");
 
for(i=0;i<rows;i++){
printf("\n%*d%3s",width,len,":");
 
for(j=0;j<lineLength && strandIterator!=NULL;j++){
printf("%c",strandIterator->base);
strandIterator = strandIterator->next;
}
len += lineLength;
}
 
while(strandIterator!=NULL){
printf("%c",strandIterator->base);
strandIterator = strandIterator->next;
}
 
printf("\n\nBase Counts\n-----------");
 
printf("\n%*c%3s%*d",width,'A',":",width,baseData.adenineCount);
printf("\n%*c%3s%*d",width,'T',":",width,baseData.thymineCount);
printf("\n%*c%3s%*d",width,'C',":",width,baseData.cytosineCount);
printf("\n%*c%3s%*d",width,'G',":",width,baseData.guanineCount);
printf("\n\nTotal:%*d",width,baseData.adenineCount + baseData.thymineCount + baseData.cytosineCount + baseData.guanineCount);
 
printf("\n");
}
 
void mutateStrand(int numMutations, int swapWeight, int insertionWeight, int deletionWeight){
int i,j,width,baseChoice;
genomeChange newMutation;
genome *strandIterator, *strandFollower, *newStrand;
 
for(i=0;i<numMutations;i++){
strandIterator = strand;
strandFollower = strand;
newMutation = generateMutation(swapWeight,insertionWeight,deletionWeight);
width = numDigits(genomeLength);
 
for(j=0;j<newMutation.position;j++){
strandFollower = strandIterator;
strandIterator = strandIterator->next;
}
if(newMutation.mutation=='S'){
if(strandIterator->base=='A'){
strandIterator->base='T';
printf("\nSwapping A at position : %*d with T",width,newMutation.position);
}
else if(strandIterator->base=='A'){
strandIterator->base='T';
printf("\nSwapping A at position : %*d with T",width,newMutation.position);
}
else if(strandIterator->base=='C'){
strandIterator->base='G';
printf("\nSwapping C at position : %*d with G",width,newMutation.position);
}
else{
strandIterator->base='C';
printf("\nSwapping G at position : %*d with C",width,newMutation.position);
}
}
 
else if(newMutation.mutation=='I'){
baseChoice = rand()%4;
 
newStrand = (genome*)malloc(sizeof(genome));
newStrand->base = baseChoice==0?'A':(baseChoice==1?'T':(baseChoice==2?'C':'G'));
printf("\nInserting %c at position : %*d",newStrand->base,width,newMutation.position);
baseChoice==0?baseData.adenineCount++:(baseChoice==1?baseData.thymineCount++:(baseChoice==2?baseData.cytosineCount++:baseData.guanineCount++));
newStrand->next = strandIterator;
strandFollower->next = newStrand;
genomeLength++;
}
 
else{
strandFollower->next = strandIterator->next;
strandIterator->next = NULL;
printf("\nDeleting %c at position : %*d",strandIterator->base,width,newMutation.position);
free(strandIterator);
genomeLength--;
}
}
}
 
int main(int argc,char* argv[])
{
int numMutations = 10, swapWeight = 10, insertWeight = 10, deleteWeight = 10;
 
if(argc==1||argc>6){
printf("Usage : %s <Genome Length> <Optional number of mutations> <Optional Swapping weight> <Optional Insertion weight> <Optional Deletion weight>\n",argv[0]);
return 0;
}
 
switch(argc){
case 2: genomeLength = atoi(argv[1]);
break;
case 3: genomeLength = atoi(argv[1]);
numMutations = atoi(argv[2]);
break;
case 4: genomeLength = atoi(argv[1]);
numMutations = atoi(argv[2]);
swapWeight = atoi(argv[3]);
break;
case 5: genomeLength = atoi(argv[1]);
numMutations = atoi(argv[2]);
swapWeight = atoi(argv[3]);
insertWeight = atoi(argv[4]);
break;
case 6: genomeLength = atoi(argv[1]);
numMutations = atoi(argv[2]);
swapWeight = atoi(argv[3]);
insertWeight = atoi(argv[4]);
deleteWeight = atoi(argv[5]);
break;
};
 
srand(time(NULL));
generateStrand();
printf("\nOriginal:");
printGenome();
mutateStrand(numMutations,swapWeight,insertWeight,deleteWeight);
printf("\n\nMutated:");
printGenome();
 
return 0;
}
</lang>
Sample run :
<pre>
Original:
 
Genome :
--------
 
0 :CGATGAGTTTCCTCCAAGGAGCAGGGCGTGACGGAAGGGAGGCTTAGGTC
50 :CGCATGCTCGTCGGCAGCCGGCTGGTGCCGTCGTAACCTTCACATTATTC
100 :TAGAATTTCGATGCACCTGATGACTCATACCCAGATGTAGGGGTACGCGA
150 :TGCAGATGCGGGCACGAGGAATTGTGGGCAAGCCGGCAGGTCTTTTGTAA
200 :GTTGTCACTAACTAAATAGAGGGATGGATGTTATAGCACACTACTGTCGA
250 :TTACGGACAGCGTCCCGATTCGTCATACGACCAGGATATATACTCGACGT
300 :CCAACAGGAGATTCACGTAGTGAACGCAGTTGACAGCCTGCTCGTATCTC
350 :CAGGGGTGGACTGCACCGTTCGTTAACTGCTGCCACATTAAACAGCTTCC
400 :CACTCCTTGACGCCAGACTCGGTACCACAGACCGTCAAGCTCCTATTTCC
450 :TTTGCAGTTAAAAAACACTATGGTGAAGGTCGGAGAGATGACCTCATCTA
 
Base Counts
-----------
A :124
T :118
C :126
G :132
 
Total:500
 
Inserting G at position : 205
Inserting G at position : 144
Inserting C at position : 171
Swapping A at position : 335 with T
Inserting A at position : 101
Swapping C at position : 109 with G
Swapping A at position : 306 with T
Inserting G at position : 51
Swapping G at position : 1 with C
Deleting G at position : 60
Swapping G at position : 66 with C
Inserting C at position : 41
Inserting C at position : 425
Swapping C at position : 173 with G
Inserting A at position : 319
Swapping G at position : 460 with C
Deleting T at position : 61
Swapping C at position : 160 with G
Inserting C at position : 251
Swapping G at position : 337 with C
Inserting G at position : 43
Inserting T at position : 146
Inserting T at position : 181
Deleting G at position : 53
Deleting A at position : 464
Swapping G at position : 362 with C
Swapping G at position : 190 with C
Swapping C at position : 280 with G
Inserting T at position : 479
Deleting C at position : 400
 
Mutated:
 
Genome :
--------
 
0 :CCATGAGTTTCCTCCAAGGAGCAGGGCGTGACGGAAGGGAGCGGCTTAGG
50 :TCCGCATGCTCCGGCACCCGGCTGGTGCCGTCGTAACCTTCACATTATTC
100 :TAAGAATTTGGATGCACCTGATGACTCATACCCAGATGTAGGGGTTGACG
150 :CGATGCAGATGGGGGCACGAGGAGATTGTGTGGCAAGCCGCCAGGTCTTT
200 :TGTAAGTTGTGCACTAACTAAATAGAGGGATGGATGTTATAGCACACTAC
250 :TGTCCGATTACGGACAGCGTCCCGATTCGTGATACGACCAGGATATATAC
300 :TCGACGTCCTACAGGAGATTCAACGTAGTGAACGCAGTTCTCAGCCTGCT
350 :CGTATCTCCAGGCGTGGACTGCACCGTTCGTTAACTGCTGCCACATTAAA
400 :AGCTTCCCACTCCTTGACGCCAGACTCCGGTACCACAGACCGTCAAGCTC
450 :CTATTTCCTTTCCGTTAAAAAACACTATTGGTGAAGGTCGGAGAGATGAC
500 :CTCATCTA
 
Base Counts
-----------
A :126
T :121
C :130
G :136
 
Total:513
</pre>
 
=={{header|Go}}==
503

edits

Cookies help us deliver our services. By using our services, you agree to our use of cookies.