Bioinformatics/Sequence mutation

From Rosetta Code
Task
Bioinformatics/Sequence mutation
You are encouraged to solve this task according to the task description, using any language you may know.
Task

Given a string of characters A, C, G, and T representing a DNA sequence write a routine to mutate the sequence, (string) by:

  1. Choosing a random base position in the sequence.
  2. Mutate the sequence by doing one of either:
    1. Swap the base at that position by changing it to one of A, C, G, or T. (which has a chance of swapping the base for the same base)
    2. Delete the chosen base at the position.
    3. Insert another base randomly chosen from A,C, G, or T into the sequence at that position.
  3. Randomly generate a test DNA sequence of at least 200 bases
  4. "Pretty print" the sequence and a count of its size, and the count of each base in the sequence
  5. Mutate the sequence ten times.
  6. "Pretty print" the sequence after all mutations, and a count of its size, and the count of each base in the sequence.
Extra credit
  • Give more information on the individual mutations applied.
  • Allow mutations to be weighted and/or chosen.

11l[edit]

Translation of: Python
UInt32 seed = 0
F nonrandom(n)
   :seed = 1664525 * :seed + 1013904223
   R Int(:seed >> 16) % n
F nonrandom_choice(lst)
   R lst[nonrandom(lst.len)]

F basecount(dna)
   DefaultDict[Char, Int] d
   L(c) dna
      d[c]++
   R sorted(d.items())

F seq_split(dna, n = 50)
   R (0 .< dna.len).step(n).map(i -> @dna[i .< i + @n])

F seq_pp(dna, n = 50)
   L(part) seq_split(dna, n)
      print(‘#5: #.’.format(L.index * n, part))
   print("\n  BASECOUNT:")
   V tot = 0
   L(base, count) basecount(dna)
      print(‘    #3: #.’.format(base, count))
      tot += count
   V (base, count) = (‘TOT’, tot)
   print(‘    #3= #.’.format(base, count))

F seq_mutate(String =dna; count = 1, kinds = ‘IDSSSS’, choice = ‘ATCG’)
   [(String, Int)] mutation
   V k2txt = [‘I’ = ‘Insert’, ‘D’ = ‘Delete’, ‘S’ = ‘Substitute’]
   L 0 .< count
      V kind = nonrandom_choice(kinds)
      V index = nonrandom(dna.len + 1)
      I kind == ‘I’
         dna = dna[0 .< index]‘’nonrandom_choice(choice)‘’dna[index..]
      E I kind == ‘D’ & !dna.empty
         dna = dna[0 .< index]‘’dna[index+1..]
      E I kind == ‘S’ & !dna.empty
         dna = dna[0 .< index]‘’nonrandom_choice(choice)‘’dna[index+1..]
      mutation.append((k2txt[kind], index))
   R (dna, mutation)

print(‘SEQUENCE:’)
V sequence = ‘TCAATCATTAATCGATTAATACATTCAATTTGAACATCTCCAGGAGAAGGCAGGGTAATCTCGTGTAGCCGTGCTTGGGGCCTCCGATATGGCCGGGGAATTTCAAAGTATAGTGTGCATCCCCTCATAATACATAGATCTATAGGTAAGTATATGGGTTGACGTTGTTAGATGCGATACACGTGCACACTTTATGAATTTTACGTTCCTCTGCCTAGAGTGCCAAGTTTCAATTTGCTACGGTTCCTCA’
seq_pp(sequence)
print("\n\nMUTATIONS:")
V (mseq, m) = seq_mutate(sequence, 10)
L(kind, index) m
   print(‘ #10 @#.’.format(kind, index))
print()
seq_pp(mseq)
Output:
SEQUENCE:
    0: TCAATCATTAATCGATTAATACATTCAATTTGAACATCTCCAGGAGAAGG
   50: CAGGGTAATCTCGTGTAGCCGTGCTTGGGGCCTCCGATATGGCCGGGGAA
  100: TTTCAAAGTATAGTGTGCATCCCCTCATAATACATAGATCTATAGGTAAG
  150: TATATGGGTTGACGTTGTTAGATGCGATACACGTGCACACTTTATGAATT
  200: TTACGTTCCTCTGCCTAGAGTGCCAAGTTTCAATTTGCTACGGTTCCTCA

  BASECOUNT:
      A: 66
      C: 51
      G: 55
      T: 78
    TOT= 250


MUTATIONS:
 Substitute @184
 Substitute @70
 Substitute @28
 Substitute @6
 Substitute @25
 Substitute @197
 Substitute @81
 Substitute @130
 Substitute @76
     Delete @76

    0: TCAATCTTTAATCGATTAATACATTCAATTTGAACATCTCCAGGAGAAGG
   50: CAGGGTAATCTCGTGTAGCCCTGCTTGGGCATCCGATATGGCCGGGGAAT
  100: TTCAAAGTATAGTGTGCATCCCCTCATAACACATAGATCTATAGGTAAGT
  150: ATATGGGTTGACGTTGTTAGATGCGATACACGTACACACTTTATGATTTT
  200: TACGTTCCTCTGCCTAGAGTGCCAAGTTTCAATTTGCTACGGTTCCTCA

  BASECOUNT:
      A: 66
      C: 52
      G: 52
      T: 79
    TOT= 249

Ada[edit]

with Ada.Containers.Vectors;
with Ada.Numerics.Discrete_Random;
with Ada.Text_Io;

procedure Mutations is

   Width : constant := 60;

   type Nucleotide_Type is (A, C, G, T);
   type Operation_Type is (Delete, Insert, Swap);
   type Position_Type is new Natural;

   package Position_Io   is new Ada.Text_Io.Integer_Io (Position_Type);
   package Nucleotide_Io is new Ada.Text_Io.Enumeration_Io (Nucleotide_Type);
   package Operation_Io  is new Ada.Text_Io.Enumeration_Io (Operation_Type);

   use Ada.Text_Io, Position_Io, Nucleotide_Io, Operation_Io;

   package Sequence_Vectors is new Ada.Containers.Vectors (Index_Type   => Position_Type,
                                                           Element_Type => Nucleotide_Type);
   package Nucleotide_Generators is new Ada.Numerics.Discrete_Random (Result_Subtype => Nucleotide_Type);
   package Operation_Generators  is new Ada.Numerics.Discrete_Random (Result_Subtype => Operation_Type);

   procedure Pretty_Print (Sequence : Sequence_Vectors.Vector) is
      First : Position_Type := Sequence.First_Index;
      Last  : Position_Type;
      Count : array (Nucleotide_Type) of Natural := (others  => 0);
   begin
      Last := Position_Type'Min (First + Width - 1,
                                 Sequence.Last_Index);
      loop
         Position_Io.Put (First, Width => 4);
         Put (": ");
         for N in First .. Last loop
            declare
               Nucleotide : Nucleotide_Type renames Sequence (N);
            begin
               Put (Nucleotide);
               Count (Nucleotide) := Count (Nucleotide) + 1;
            end;
         end loop;
         New_Line;
         exit when Last = Sequence.Last_Index;
         First := Last + 1;
         Last  := Position_Type'Min (First + Width - 1,
                                     Sequence.Last_Index);
      end loop;

      for N in Count'Range loop
         Put ("Count of "); Put (N); Put (" is "); Put (Natural'Image (Count (N))); New_Line;
      end loop;

   end Pretty_Print;

   function Random_Position (First, Last : Position_Type) return Position_Type is
      subtype Position_Range is Position_Type range First .. Last;
      package Position_Generators is new Ada.Numerics.Discrete_Random (Result_Subtype => Position_Range);
      Generator : Position_Generators.Generator;
   begin
      Position_Generators.Reset (Generator);
      return Position_Generators.Random (Generator);
   end Random_Position;

   Nucleotide_Generator : Nucleotide_Generators.Generator;
   Operation_Generator  : Operation_Generators.Generator;

   Sequence   : Sequence_Vectors.Vector;
   Position   : Position_Type;
   Nucleotide : Nucleotide_Type;
   Operation  : Operation_Type;
begin
   Nucleotide_Generators.Reset (Nucleotide_Generator);
   Operation_Generators.Reset (Operation_Generator);

   for A in 1 .. 200 loop
      Sequence.Append (Nucleotide_Generators.Random (Nucleotide_Generator));
   end loop;

   Put_Line ("Initial sequence:");
   Pretty_Print (Sequence);
   New_Line;

   Put_Line ("Mutations:");
   for Mutate in 1 .. 10 loop

      Operation := Operation_Generators.Random (Operation_Generator);
      case Operation is

         when Delete =>
            Position := Random_Position (Sequence.First_Index, Sequence.Last_Index);
            Sequence.Delete (Index => Position);
            Put (Operation); Put (" at position "); Put (Position, Width => 0); New_Line;

         when Insert =>
            Position   := Random_Position (Sequence.First_Index, Sequence.Last_Index + 1);
            Nucleotide := Nucleotide_Generators.Random (Nucleotide_Generator);
            Sequence.Insert (Before   => Position,
                             New_Item => Nucleotide);
            Put (Operation); Put (" "); Put (Nucleotide); Put (" at position ");
            Put (Position, Width => 0); New_Line;

         when Swap =>
            Position   := Random_Position (Sequence.First_Index, Sequence.Last_Index);
            Nucleotide := Nucleotide_Generators.Random (Nucleotide_Generator);
            Sequence.Replace_Element (Index    => Position,
                                      New_Item => Nucleotide);
            Put (Operation); Put (" at position "); Put (Position, Width => 0);
            Put (" to "); Put (Nucleotide); New_Line;

      end case;
   end loop;

   New_Line;
   Put_Line ("Mutated sequence:");
   Pretty_Print (Sequence);

end Mutations;
Output:
Initial sequence:
   0: GCTGAGTCCGAATTAGTATTCATGAGATACGCATGTCAGTACGGCGACGACACGGGAAGA
  60: GCAGATGAAAACTACTGGGGAGCTACCGAGCTGCCGTCGATTGTACGGATGTTATATTTC
 120: CCATAGAACTACGAAGTTTTAGGATCCTTTCGGCGATGTGATAAGCAGGTATCAGTAGTA
 180: AGCGAAGCGTTGACGTTTTT
Count of A is  55
Count of C is  37
Count of G is  56
Count of T is  52

Mutations:
DELETE at position 129
SWAP at position 172 to T
SWAP at position 28 to T
INSERT A at position 193
DELETE at position 164
SWAP at position 165 to G
DELETE at position 91
INSERT A at position 169
INSERT C at position 72
DELETE at position 146

Mutated sequence:
   0: GCTGAGTCCGAATTAGTATTCATGAGATTCGCATGTCAGTACGGCGACGACACGGGAAGA
  60: GCAGATGAAAACCTACTGGGGAGCTACCGAGCGCCGTCGATTGTACGGATGTTATATTTC
 120: CCATAGAACACGAAGTTTTAGGATCCTTCGGCGATGTGATAAGAGGTATACTGTAGTAAG
 180: CGAAGCGTTGACAGTTTTT
Count of A is  55
Count of C is  37
Count of G is  56
Count of T is  51

Arturo[edit]

bases: ["A" "T" "G" "C"]
dna: map 1..200 => [sample bases]

prettyPrint: function [in][
    count: #[ A: 0, T: 0, G: 0, C: 0 ]

    loop.with:'i split.every:50 in 'line [
        prints [pad to :string i*50 3 ":"]
        print map split.every:10 line => join

        loop split line 'ch [
            case [ch=]
                when? -> "A" -> count\A: count\A + 1
                when? -> "T" -> count\T: count\T + 1
                when? -> "G" -> count\G: count\G + 1
                when? -> "C" -> count\C: count\C + 1
                else []
        ]
    ]
    print ["Total count => A:" count\A, "T:" count\T "G:" count\G "C:" count\C]
]

performRandomModifications: function [seq,times][
    result: new seq

    loop times [x][
        what: random 1 3
        case [what=]
            when? -> 1 [
                ind: random 0 (size result)
                previous: get result ind
                next: sample bases
                set result ind next
                print ["changing base at position" ind "from" previous "to" next]
            ]
            when? -> 2 [
                ind: random 0 (size result)
                next: sample bases
                result: insert result ind next
                print ["inserting base" next "at position" ind]
            ]
            else [
                ind: random 0 (size result)
                previous: get result ind
                result: remove result .index ind
                print ["deleting base" previous "at position" ind]
            ]
    ]
    return result
]

print "------------------------------"
print " Initial sequence"
print "------------------------------"
prettyPrint dna
print ""

print "------------------------------"
print " Modifying sequence"
print "------------------------------"
dna: performRandomModifications dna 10
print ""

print "------------------------------"
print " Final sequence"
print "------------------------------"
prettyPrint dna
print ""
Output:
------------------------------
 Initial sequence
------------------------------
  0 : GGCCGAAGGC GGCATCAGTG GACTGGGTTG TGGAGCAAAA CGAACACGCC 
 50 : GAGAGCCGGA GGGTTCGGAA GATTTATTTA GGACGAAATC CCGGACATGT 
100 : CGCCCTAGAT TGGCCTCCCT ACACCTAGTA TTATTACTCC TACGCGTTTG 
150 : CCTACTGGGT GTCATCTCGT GTTAATCGCA AAATCACCTA CGAATTGCCC 
Total count => A: 48 T: 47 G: 53 C: 52 

------------------------------
 Modifying sequence
------------------------------
deleting base A at position 180 
changing base at position 110 from T to G 
inserting base T at position 104 
inserting base C at position 180 
changing base at position 183 from A to A 
deleting base C at position 90 
changing base at position 6 from A to T 
inserting base C at position 146 
inserting base G at position 4 
changing base at position 150 from T to C 

------------------------------
 Final sequence
------------------------------
  0 : GGCCGGATGG CGGCATCAGT GGACTGGGTT GTGGAGCAAA ACGAACACGC 
 50 : CGAGAGCCGG AGGGTTCGGA AGATTTATTT AGGACGAAAT CCGGACATGT 
100 : CGCCTCTAGA TGGGCCTCCC TACACCTAGT ATTATTACTC CTACGCGCTT 
150 : CGCCTACTGG GTGTCATCTC GTGTTAATCG CCAAATCACC TACGAATTGC 
200 : CC 
Total count => A: 46 T: 47 G: 55 C: 54

C[edit]

Adenine ( A ) is always swapped for Thymine ( T ) and vice versa. Similarly with Cytosine ( C ) and Guanine ( G ).

#include<stdlib.h>
#include<stdio.h>
#include<time.h>

typedef struct genome{
    char base;
    struct genome *next;
}genome;

typedef struct{
    char mutation;
    int position;
}genomeChange;

typedef struct{
    int adenineCount,thymineCount,cytosineCount,guanineCount;
}baseCounts;

genome *strand;
baseCounts baseData;
int genomeLength = 100, lineLength = 50;

int numDigits(int num){
    int len = 1;

    while(num>10){
        num /= 10;
        len++;
    }
    return len;
}

void generateStrand(){

    int baseChoice = rand()%4, i;
    genome *strandIterator, *newStrand;

    baseData.adenineCount = 0;
    baseData.thymineCount = 0;
    baseData.cytosineCount = 0;
    baseData.guanineCount = 0;
    
    strand = (genome*)malloc(sizeof(genome));
    strand->base = baseChoice==0?'A':(baseChoice==1?'T':(baseChoice==2?'C':'G'));
    baseChoice==0?baseData.adenineCount++:(baseChoice==1?baseData.thymineCount++:(baseChoice==2?baseData.cytosineCount++:baseData.guanineCount++));
    strand->next = NULL;

    strandIterator = strand;

    for(i=1;i<genomeLength;i++){
        baseChoice = rand()%4;

        newStrand = (genome*)malloc(sizeof(genome));
        newStrand->base = baseChoice==0?'A':(baseChoice==1?'T':(baseChoice==2?'C':'G'));
        baseChoice==0?baseData.adenineCount++:(baseChoice==1?baseData.thymineCount++:(baseChoice==2?baseData.cytosineCount++:baseData.guanineCount++));
        newStrand->next = NULL;

        strandIterator->next = newStrand;
        strandIterator = newStrand;
    }
}

genomeChange generateMutation(int swapWeight, int insertionWeight, int deletionWeight){
    int mutationChoice = rand()%(swapWeight + insertionWeight + deletionWeight);
    
    genomeChange mutationCommand;

    mutationCommand.mutation = mutationChoice<swapWeight?'S':((mutationChoice>=swapWeight && mutationChoice<swapWeight+insertionWeight)?'I':'D');
    mutationCommand.position = rand()%genomeLength;

    return mutationCommand;
}

void printGenome(){
    int rows, width = numDigits(genomeLength), len = 0,i,j;
	lineLength = (genomeLength<lineLength)?genomeLength:lineLength;
	
	rows = genomeLength/lineLength + (genomeLength%lineLength!=0);
	
    genome* strandIterator = strand;

    printf("\n\nGenome : \n--------\n");

    for(i=0;i<rows;i++){
        printf("\n%*d%3s",width,len,":");

        for(j=0;j<lineLength && strandIterator!=NULL;j++){
                printf("%c",strandIterator->base);
                strandIterator = strandIterator->next;
        }
        len += lineLength;
    }

    while(strandIterator!=NULL){
            printf("%c",strandIterator->base);
            strandIterator = strandIterator->next;
    }

    printf("\n\nBase Counts\n-----------");

    printf("\n%*c%3s%*d",width,'A',":",width,baseData.adenineCount);
    printf("\n%*c%3s%*d",width,'T',":",width,baseData.thymineCount);
    printf("\n%*c%3s%*d",width,'C',":",width,baseData.cytosineCount);
    printf("\n%*c%3s%*d",width,'G',":",width,baseData.guanineCount);
	
	printf("\n\nTotal:%*d",width,baseData.adenineCount + baseData.thymineCount + baseData.cytosineCount + baseData.guanineCount);

    printf("\n");
}

void mutateStrand(int numMutations, int swapWeight, int insertionWeight, int deletionWeight){
    int i,j,width,baseChoice;
    genomeChange newMutation;
    genome *strandIterator, *strandFollower, *newStrand;

    for(i=0;i<numMutations;i++){
        strandIterator = strand;
        strandFollower = strand;
        newMutation = generateMutation(swapWeight,insertionWeight,deletionWeight);
        width = numDigits(genomeLength);

        for(j=0;j<newMutation.position;j++){
            strandFollower = strandIterator;
            strandIterator = strandIterator->next;
        }
            
        if(newMutation.mutation=='S'){
            if(strandIterator->base=='A'){
                strandIterator->base='T';
                printf("\nSwapping A at position : %*d with T",width,newMutation.position);
            }
            else if(strandIterator->base=='A'){
                strandIterator->base='T';
                printf("\nSwapping A at position : %*d with T",width,newMutation.position);
            }
            else if(strandIterator->base=='C'){
                strandIterator->base='G';
                printf("\nSwapping C at position : %*d with G",width,newMutation.position);
            }
            else{
                strandIterator->base='C';
                printf("\nSwapping G at position : %*d with C",width,newMutation.position);
            }
        }

        else if(newMutation.mutation=='I'){
            baseChoice = rand()%4;

            newStrand = (genome*)malloc(sizeof(genome));
            newStrand->base = baseChoice==0?'A':(baseChoice==1?'T':(baseChoice==2?'C':'G'));
            printf("\nInserting %c at position : %*d",newStrand->base,width,newMutation.position);
            baseChoice==0?baseData.adenineCount++:(baseChoice==1?baseData.thymineCount++:(baseChoice==2?baseData.cytosineCount++:baseData.guanineCount++));
            newStrand->next = strandIterator;
            strandFollower->next = newStrand;
            genomeLength++;
        }

        else{
            strandFollower->next = strandIterator->next;
            strandIterator->next = NULL;
            printf("\nDeleting %c at position : %*d",strandIterator->base,width,newMutation.position);
            free(strandIterator);
            genomeLength--;
        }
    }
}

int main(int argc,char* argv[])
{
    int numMutations = 10, swapWeight = 10, insertWeight = 10, deleteWeight = 10;

    if(argc==1||argc>6){
                printf("Usage : %s <Genome Length> <Optional number of mutations> <Optional Swapping weight> <Optional Insertion weight> <Optional Deletion weight>\n",argv[0]);
                return 0;
    }

    switch(argc){
        case 2: genomeLength = atoi(argv[1]);
                break;
        case 3: genomeLength = atoi(argv[1]);
                numMutations = atoi(argv[2]);
                break;
        case 4: genomeLength = atoi(argv[1]);
                numMutations = atoi(argv[2]);
                swapWeight   = atoi(argv[3]);
                break;    
        case 5: genomeLength = atoi(argv[1]);
                numMutations = atoi(argv[2]);
                swapWeight   = atoi(argv[3]);
                insertWeight = atoi(argv[4]);
                break; 
        case 6: genomeLength = atoi(argv[1]);
                numMutations = atoi(argv[2]);
                swapWeight   = atoi(argv[3]);
                insertWeight = atoi(argv[4]);
                deleteWeight = atoi(argv[5]);
                break; 
    };

    srand(time(NULL));
    generateStrand();
	
	printf("\nOriginal:");
    printGenome();
    mutateStrand(numMutations,swapWeight,insertWeight,deleteWeight);
    
	printf("\n\nMutated:");
	printGenome();

    return 0;
}

Sample run :

C:\My Projects\networks>a 500 30 15 10 5

Original:

Genome :
--------

  0  :CGATGAGTTTCCTCCAAGGAGCAGGGCGTGACGGAAGGGAGGCTTAGGTC
 50  :CGCATGCTCGTCGGCAGCCGGCTGGTGCCGTCGTAACCTTCACATTATTC
100  :TAGAATTTCGATGCACCTGATGACTCATACCCAGATGTAGGGGTACGCGA
150  :TGCAGATGCGGGCACGAGGAATTGTGGGCAAGCCGGCAGGTCTTTTGTAA
200  :GTTGTCACTAACTAAATAGAGGGATGGATGTTATAGCACACTACTGTCGA
250  :TTACGGACAGCGTCCCGATTCGTCATACGACCAGGATATATACTCGACGT
300  :CCAACAGGAGATTCACGTAGTGAACGCAGTTGACAGCCTGCTCGTATCTC
350  :CAGGGGTGGACTGCACCGTTCGTTAACTGCTGCCACATTAAACAGCTTCC
400  :CACTCCTTGACGCCAGACTCGGTACCACAGACCGTCAAGCTCCTATTTCC
450  :TTTGCAGTTAAAAAACACTATGGTGAAGGTCGGAGAGATGACCTCATCTA

Base Counts
-----------
  A  :124
  T  :118
  C  :126
  G  :132

Total:500

Inserting G at position : 205
Inserting G at position : 144
Inserting C at position : 171
Swapping A at position : 335 with T
Inserting A at position : 101
Swapping C at position : 109 with G
Swapping A at position : 306 with T
Inserting G at position :  51
Swapping G at position :   1 with C
Deleting G at position :  60
Swapping G at position :  66 with C
Inserting C at position :  41
Inserting C at position : 425
Swapping C at position : 173 with G
Inserting A at position : 319
Swapping G at position : 460 with C
Deleting T at position :  61
Swapping C at position : 160 with G
Inserting C at position : 251
Swapping G at position : 337 with C
Inserting G at position :  43
Inserting T at position : 146
Inserting T at position : 181
Deleting G at position :  53
Deleting A at position : 464
Swapping G at position : 362 with C
Swapping G at position : 190 with C
Swapping C at position : 280 with G
Inserting T at position : 479
Deleting C at position : 400

Mutated:

Genome :
--------

  0  :CCATGAGTTTCCTCCAAGGAGCAGGGCGTGACGGAAGGGAGCGGCTTAGG
 50  :TCCGCATGCTCCGGCACCCGGCTGGTGCCGTCGTAACCTTCACATTATTC
100  :TAAGAATTTGGATGCACCTGATGACTCATACCCAGATGTAGGGGTTGACG
150  :CGATGCAGATGGGGGCACGAGGAGATTGTGTGGCAAGCCGCCAGGTCTTT
200  :TGTAAGTTGTGCACTAACTAAATAGAGGGATGGATGTTATAGCACACTAC
250  :TGTCCGATTACGGACAGCGTCCCGATTCGTGATACGACCAGGATATATAC
300  :TCGACGTCCTACAGGAGATTCAACGTAGTGAACGCAGTTCTCAGCCTGCT
350  :CGTATCTCCAGGCGTGGACTGCACCGTTCGTTAACTGCTGCCACATTAAA
400  :AGCTTCCCACTCCTTGACGCCAGACTCCGGTACCACAGACCGTCAAGCTC
450  :CTATTTCCTTTCCGTTAAAAAACACTATTGGTGAAGGTCGGAGAGATGAC
500  :CTCATCTA

Base Counts
-----------
  A  :126
  T  :121
  C  :130
  G  :136

Total:513

C++[edit]

#include <array>
#include <iomanip>
#include <iostream>
#include <random>
#include <string>

class sequence_generator {
public:
    sequence_generator();
    std::string generate_sequence(size_t length);
    void mutate_sequence(std::string&);
    static void print_sequence(std::ostream&, const std::string&);
    enum class operation { change, erase, insert };
    void set_weight(operation, unsigned int);
private:
    char get_random_base() {
        return bases_[base_dist_(engine_)];
    }
    operation get_random_operation();
    static const std::array<char, 4> bases_;
    std::mt19937 engine_;
    std::uniform_int_distribution<size_t> base_dist_;
    std::array<unsigned int, 3> operation_weight_;
    unsigned int total_weight_;
};

const std::array<char, 4> sequence_generator::bases_{ 'A', 'C', 'G', 'T' };

sequence_generator::sequence_generator() : engine_(std::random_device()()),
    base_dist_(0, bases_.size() - 1),
    total_weight_(operation_weight_.size()) {
    operation_weight_.fill(1);
}

sequence_generator::operation sequence_generator::get_random_operation() {
    std::uniform_int_distribution<unsigned int> op_dist(0, total_weight_ - 1);
    unsigned int n = op_dist(engine_), op = 0, weight = 0;
    for (; op < operation_weight_.size(); ++op) {
        weight += operation_weight_[op];
        if (n < weight)
            break;
    }
    return static_cast<operation>(op);
}

void sequence_generator::set_weight(operation op, unsigned int weight) {
    total_weight_ -= operation_weight_[static_cast<size_t>(op)];
    operation_weight_[static_cast<size_t>(op)] = weight;
    total_weight_ += weight;
}

std::string sequence_generator::generate_sequence(size_t length) {
    std::string sequence;
    sequence.reserve(length);
    for (size_t i = 0; i < length; ++i)
        sequence += get_random_base();
    return sequence;
}

void sequence_generator::mutate_sequence(std::string& sequence) {
    std::uniform_int_distribution<size_t> dist(0, sequence.length() - 1);
    size_t pos = dist(engine_);
    char b;
    switch (get_random_operation()) {
    case operation::change:
        b = get_random_base();
        std::cout << "Change base at position " << pos << " from "
            << sequence[pos] << " to " << b << '\n';
        sequence[pos] = b;
        break;
    case operation::erase:
        std::cout << "Erase base " << sequence[pos] << " at position "
            << pos << '\n';
        sequence.erase(pos, 1);
        break;
    case operation::insert:
        b = get_random_base();
        std::cout << "Insert base " << b << " at position "
            << pos << '\n';
        sequence.insert(pos, 1, b);
        break;
    }
}

void sequence_generator::print_sequence(std::ostream& out, const std::string& sequence) {
    constexpr size_t base_count = bases_.size();
    std::array<size_t, base_count> count = { 0 };
    for (size_t i = 0, n = sequence.length(); i < n; ++i) {
        if (i % 50 == 0) {
            if (i != 0)
                out << '\n';
            out << std::setw(3) << i << ": ";
        }
        out << sequence[i];
        for (size_t j = 0; j < base_count; ++j) {
            if (bases_[j] == sequence[i]) {
                ++count[j];
                break;
            }
        }
    }
    out << '\n';
    out << "Base counts:\n";
    size_t total = 0;
    for (size_t j = 0; j < base_count; ++j) {
        total += count[j];
        out << bases_[j] << ": " << count[j] << ", ";
    }
    out << "Total: " << total << '\n';
}

int main() {
    sequence_generator gen;
    gen.set_weight(sequence_generator::operation::change, 2);
    std::string sequence = gen.generate_sequence(250);
    std::cout << "Initial sequence:\n";
    sequence_generator::print_sequence(std::cout, sequence);
    constexpr int count = 10;
    for (int i = 0; i < count; ++i)
        gen.mutate_sequence(sequence);
    std::cout << "After " << count << " mutations:\n";
    sequence_generator::print_sequence(std::cout, sequence);
    return 0;
}
Output:
Initial sequence:
  0: CATATCTGCGTAAGGCGTCGAATCCTTAGAGAAAACTCGCCAAACGCGCT
 50: AGCCAAGACTTAATTAAAGGCTGGCCACATAACAGTAGTACTGCAAGGAT
100: GACGTGACTACAACGTGGAATACTCTATCTGATGAGCCCCACGTGGGCCA
150: ACCTTCCAATGCGGCGTCTTGCAGTCTTCGGACTTTGCCTCTACTAGGAG
200: TAGCCATGACGAGTGGTGAGGCGGAGGGACCAATTCCGCACTTCGAATCG
Base counts:
A: 67, C: 65, G: 64, T: 54, Total: 250
Change base at position 39 from C to C
Erase base T at position 194
Insert base T at position 70
Insert base C at position 190
Insert base T at position 45
Erase base A at position 111
Change base at position 96 from A to C
Change base at position 113 from A to C
Change base at position 5 from C to A
Change base at position 44 from C to T
After 10 mutations:
  0: CATATATGCGTAAGGCGTCGAATCCTTAGAGAAAACTCGCCAAATTGCGC
 50: TAGCCAAGACTTAATTAAAGGTCTGGCCACATAACAGTAGTACTGCCAGG
100: ATGACGTGACTCACCGTGGAATACTCTATCTGATGAGCCCCACGTGGGCC
150: AACCTTCCAATGCGGCGTCTTGCAGTCTTCGGACTTTGCCCTCTACAGGA
200: GTAGCCATGACGAGTGGTGAGGCGGAGGGACCAATTCCGCACTTCGAATC
250: G
Base counts:
A: 65, C: 66, G: 64, T: 56, Total: 251

Common Lisp[edit]

Usage :

(mutate (<Genome length> <Number of mutations>
:ins_w <Insertion weight> :swp_w <Swap weight> :del_w <Delete weight>
:genome <Genome Sequence>)

All keys are optional. <Genome length> is discarded when :genome is set.

(defun random_base ()
    (random 4))

(defun basechar (base)
    (char "ACTG" base))

(defun generate_genome (genome_length)
    (let (genome '())
        (loop for i below genome_length do
            (push (random_base) genome))
        (return-from generate_genome genome)))

(defun map_genome (genome)
    (let (seq '())
        (loop for n from (1- (length genome)) downto 0 do
            (push (position (char genome n) "ACTG") seq))
        seq))

(defun output_genome_info (genome &optional (genome_name "ORIGINAL"))
    (let ((ac 0) (tc 0) (cc 0) (gc 0))
        (format t "~%           ---- ~a ----" genome_name)
        (do ((n 0 (1+ n)))
            ((= n (length genome)))
            (when (= 0 (mod n 50)) (format t "~& ~4d: " (1+ n)))
            (case (nth n genome)
                (0 (incf ac))
                (1 (incf tc))
                (2 (incf cc))
                (3 (incf gc)))
            (format t "~c" (basechar (nth n genome))))
            (format t "~2%- Total : ~3d~%A : ~d   C : ~d~%T : ~d   G : ~d~2%" (length genome) ac tc cc gc)))

(defun insert_base (genome)
    (let ((place (random (length genome)))
        (base (random_base)))
        (format t "Insert      +  ~c   at   ~3d~%"
            (basechar base) (+ 1 place))
        (if (= 0 place)
            (push base genome)
            (push base (cdr (nthcdr (1- place) genome))))
        (return-from insert_base genome)))

(defun swap_base (genome)
    (let ((place (random (length genome)))
        (base (random_base)))
        (format t "Swap      ~c -> ~c   at   ~3d~%"
            (basechar (nth place genome)) (basechar base) (+ 1 place))
        (setf (nth place genome) base)
        (return-from swap_base genome)))

(defun delete_base (genome)
    (let ((place (random (length genome))))
        (format t "Delete      -  ~c   at   ~3d~%"
            (basechar (nth place genome)) (+ 1 place))
        (if (= 0 place) (pop genome)
        (pop (cdr (nthcdr (1- place) genome))))
        (return-from delete_base genome)))

(defun mutate (genome_length n_mutations
    &key (ins_w 10) (swp_w 10) (del_w 10)
        (genome (generate_genome genome_length) has_genome))
    (if has_genome (setf genome (map_genome genome)))
    (output_genome_info genome)
    (format t "      ---- MUTATION SEQUENCE ----~%")
    (do ((n 0 (1+ n)))
        ((= n n_mutations))
        (setf mutation_type (random (+ ins_w swp_w del_w)))
        (format t "~3d : " (1+ n))
        (setf genome
            (cond ((< mutation_type ins_w) (insert_base genome))
                ((< mutation_type (+ ins_w swp_w)) (swap_base genome))
                (t (delete_base genome)))))
    (output_genome_info genome "MUTATED"))
Output:
[CLISP]> (mutate 500 30 :ins_w 5 :swp_w 10 :del_w 15)

           ---- ORIGINAL ----
    1: CTGCCATGCGTAAGATAGCAGAAGTGTTCGCGTATATGTTCATTTTGGCT
   51: TCAACCTACGGGCGTATACACATTTCAGCTCGGAGCTCGGGCCCCAGTAC
  101: CTAGTTTTTCTTTCAAGCTGGAACTACGGCGCCTTGTGCCGTAATCCGTC
  151: GGGGTGATAATTCTAACTTGCTAACACGCGCAGATGGTCCGGTCGCGGGT
  201: CAAACATTCGCCAAGGTCAACTTACCCTTAAAAGGCTTGCAACAGGGACC
  251: AGTACTAGGAAGTAGACTTCATGGATCTTGGGCATAGTGGACTAGCTTAT
  301: TTACCAGCGACGTTCTTGCACCCGAGACATTATCATAGTTCGACAGCGTT
  351: GAACTGTGCTTAGGTTAATGCCGCGCTTCCTACCTTGAACTAAACACAGC
  401: ACACAGTGAGAACGTAGCGGCCTCTTTTCCTGCCTGCAATTCGTAAGCCT
  451: GATTTGACGGGTCTGGAGTTTGGCTCGGAGTAGGTCTGCTACTTAAATTC

- Total : 500
A : 116   C : 124
T : 137   G : 123

      ---- MUTATION SEQUENCE ----
  1 : Swap      T -> C   at    74
  2 : Delete      -  T   at   208
  3 : Insert      +  C   at   332
  4 : Insert      +  G   at   287
  5 : Delete      -  T   at   188
  6 : Swap      G -> A   at   263
  7 : Delete      -  G   at   323
  8 : Delete      -  A   at   336
  9 : Swap      T -> A   at   426
 10 : Swap      G -> G   at    38
 11 : Swap      G -> C   at   288
 12 : Delete      -  T   at    11
 13 : Swap      G -> A   at   197
 14 : Insert      +  T   at   476
 15 : Swap      C -> A   at     5
 16 : Swap      A -> T   at   211
 17 : Swap      A -> T   at   248
 18 : Delete      -  C   at   471
 19 : Delete      -  C   at   455
 20 : Swap      T -> T   at   184
 21 : Insert      +  T   at   224
 22 : Delete      -  T   at   224
 23 : Insert      +  T   at   333
 24 : Delete      -  C   at    18
 25 : Delete      -  G   at   139
 26 : Delete      -  T   at   333
 27 : Insert      +  T   at    80
 28 : Insert      +  T   at   480
 29 : Swap      A -> T   at   341
 30 : Swap      T -> C   at    73

           ---- MUTATED ----
    1: CTGCAATGCGAAGATAGAGAAGTGTTCGCGTATATGTTCATTTTGGCTTC
   51: AACCTACGGGCGTATACACATCCCAGCTCTGGAGCTCGGGCCCCAGTACC
  101: TAGTTTTTCTTTCAAGCTGGAACTACGGCGCCTTGTGCCTAATCCGTCGG
  151: GGTGATAATTCTAACTTGCTAACACGCGCAGATGGCCGGTCGCGGATCAA
  201: ACATCGCCATGGTCAACTTACCCTTAAAAGGCTTGCAACAGGGACCTGTA
  251: CTAGGAAGTAAACTTCATGGATCTTGGGCATAGGTCGACTAGCTTATTTA
  301: CCAGCGACGTTCTTGCACCCAGACATTACTCTAGTTCGACTGCGTTGAAC
  351: TGTGCTTAGGTTAATGCCGCGCTTCCTACCTTGAACTAAACACAGCACAC
  401: AGTGAGAACGTAGCGGCCTCTTTACCTGCCTGCAATTCGTAAGCCTGATT
  451: TGAGGGTCTGGAGTTTGGTCGGTAGTAGGTTCTGCTACTTAAATTC

- Total : 496
A : 116   C : 124
T : 137   G : 119

Factor[edit]

USING: assocs combinators.random formatting grouping io kernel
macros math math.statistics namespaces prettyprint quotations
random sequences sorting ;
IN: sequence-mutation

SYMBOL: verbose?  ! Turn on to show mutation details.
                  ! Off by default.

! Return a random base as a character.
: rand-base ( -- n ) "ACGT" random ;

! Generate a random dna sequence of length n.
: <dna> ( n -- seq ) [ rand-base ] "" replicate-as ;

! Prettyprint a dna sequence in blocks of n.
: .dna ( seq n -- )
    "SEQUENCE:" print [ group ] keep
    [ * swap "  %3d: %s\n" printf ] curry each-index ;

! Show a histogram of bases in a dna sequence and their total.
: show-counts ( seq -- )
    "BASE COUNTS:" print histogram >alist [ first ] sort-with
    [ [ "    %c: %3d\n" printf ] assoc-each ]
    [ "TOTAL: " write [ second ] [ + ] map-reduce . ] bi ;

! Prettyprint the overall state of a dna sequence.
: show-dna ( seq -- ) [ 50 .dna nl ] [ show-counts nl ] bi ;

! Call a quotation only if verbose? is on.
: log ( quot -- ) verbose? get [ call ] [ drop ] if ; inline

! Set index n to a random base.
: bswap ( n seq -- seq' )
    [ rand-base ] 2dip 3dup [ nth ] keepd spin
    [ "  index %3d: swapping  %c with %c\n" printf ] 3curry log
    [ set-nth ] keep ;

! Remove the base at index n.
: bdelete ( n seq -- seq' )
    2dup dupd nth [ "  index %3d: deleting  %c\n" printf ]
    2curry log remove-nth ;

! Insert a random base at index n.
: binsert ( n seq -- seq' )
    [ rand-base ] 2dip over reach
    [ "  index %3d: inserting %c\n" printf ] 2curry log
    insert-nth ;

! Allow "passing" probabilities to casep. This is necessary
! because casep is a macro.
MACRO: build-casep-seq ( seq -- quot )
    { [ bswap ] [ bdelete ] [ binsert ] } zip 1quotation ;

! Mutate a dna sequence according to some weights.
! For example,
! "ACGT" { 0.1 0.3 0.6 } mutate
! means swap with 0.1 probability, delete with 0.3 probability,
! and insert with 0.6 probability.
: mutate ( dna-seq weights-seq -- dna-seq' )
    [ [ length random ] keep ] [ build-casep-seq ] bi* casep ;
    inline

! Prettyprint a sequence of weights.
: show-weights ( seq -- )
    "MUTATION PROBABILITIES:" print
    "  swap:   %.2f\n  delete: %.2f\n  insert: %.2f\n\n" vprintf
    ;

: main ( -- )
    verbose? on "ORIGINAL " write 200 <dna> dup show-dna 10
    { 0.2 0.2 0.6 } dup show-weights "MUTATION LOG:" print
    [ mutate ] curry times nl "MUTATED " write show-dna ;

MAIN: main
Output:
ORIGINAL SEQUENCE:
    0: CACAGGCAAGGGTCGTATGCTACTATAGATGTTTCAGAACCGTATTTCGA
   50: CTCCGACGCGGTCATGAAGCAGACACTCCGTCACCGATTGCAAGTGTGCA
  100: GTTGGGAGAATGCATTAAAATTCTGGGTTATGAAACGGGCAGCCTTGATT
  150: GACAGGTGGTCCAGCGACAGTTTAACATACCAAACTCTTTGAGTACGCAG

BASE COUNTS:
    A:  55
    C:  44
    G:  52
    T:  49
TOTAL: 200

MUTATION PROBABILITIES:
  swap:   0.20
  delete: 0.20
  insert: 0.60

MUTATION LOG:
  index  82: deleting  A
  index 161: inserting C
  index  48: deleting  G
  index  10: swapping  G with T
  index 184: swapping  T with C
  index 137: inserting T
  index  60: inserting T
  index 135: inserting C
  index  32: inserting T
  index 201: inserting A

MUTATED SEQUENCE:
    0: CACAGGCAAGTGTCGTATGCTACTATAGATGTTTTCAGAACCGTATTTCA
   50: CTCCGACGCGGTTCATGAAGCAGACACTCCGTCCCGATTGCAAGTGTGCA
  100: GTTGGGAGAATGCATTAAAATTCTGGGTTATGAAACCGGGTCAGCCTTGA
  150: TTGACAGGTGGTCCCAGCGACAGTTTAACATACCAAACCCTTTGAGTACG
  200: CAAG

BASE COUNTS:
    A:  55
    C:  47
    G:  50
    T:  52
TOTAL: 204

Go[edit]

package main

import (
    "fmt"
    "math/rand"
    "sort"
    "time"
)

const bases = "ACGT"

// 'w' contains the weights out of 300 for each
// of swap, delete or insert in that order.
func mutate(dna string, w [3]int) string {
    le := len(dna)
    // get a random position in the dna to mutate
    p := rand.Intn(le)
    // get a random number between 0 and 299 inclusive
    r := rand.Intn(300)
    bytes := []byte(dna)
    switch {
    case r < w[0]: // swap
        base := bases[rand.Intn(4)]
        fmt.Printf("  Change @%3d %q to %q\n", p, bytes[p], base)
        bytes[p] = base
    case r < w[0]+w[1]: // delete
        fmt.Printf("  Delete @%3d %q\n", p, bytes[p])
        copy(bytes[p:], bytes[p+1:])
        bytes = bytes[0 : le-1]
    default: // insert
        base := bases[rand.Intn(4)]
        bytes = append(bytes, 0)
        copy(bytes[p+1:], bytes[p:])
        fmt.Printf("  Insert @%3d %q\n", p, base)
        bytes[p] = base
    }
    return string(bytes)
}

// Generate a random dna sequence of given length.
func generate(le int) string {
    bytes := make([]byte, le)
    for i := 0; i < le; i++ {
        bytes[i] = bases[rand.Intn(4)]
    }
    return string(bytes)
}

// Pretty print dna and stats.
func prettyPrint(dna string, rowLen int) {
    fmt.Println("SEQUENCE:")
    le := len(dna)
    for i := 0; i < le; i += rowLen {
        k := i + rowLen
        if k > le {
            k = le
        }
        fmt.Printf("%5d: %s\n", i, dna[i:k])
    }
    baseMap := make(map[byte]int) // allows for 'any' base
    for i := 0; i < le; i++ {
        baseMap[dna[i]]++
    }
    var bases []byte
    for k := range baseMap {
        bases = append(bases, k)
    }
    sort.Slice(bases, func(i, j int) bool { // get bases into alphabetic order
        return bases[i] < bases[j]
    })

    fmt.Println("\nBASE COUNT:")
    for _, base := range bases {
        fmt.Printf("    %c: %3d\n", base, baseMap[base])
    }
    fmt.Println("    ------")
    fmt.Println("    Σ:", le)
    fmt.Println("    ======\n")
}

// Express weights as a string.
func wstring(w [3]int) string {
    return fmt.Sprintf("  Change: %d\n  Delete: %d\n  Insert: %d\n", w[0], w[1], w[2])
}

func main() {
    rand.Seed(time.Now().UnixNano())
    dna := generate(250)
    prettyPrint(dna, 50)
    muts := 10
    w := [3]int{100, 100, 100} // use e.g. {0, 300, 0} to choose only deletions
    fmt.Printf("WEIGHTS (ex 300):\n%s\n", wstring(w))
    fmt.Printf("MUTATIONS (%d):\n", muts)
    for i := 0; i < muts; i++ {
        dna = mutate(dna, w)
    }
    fmt.Println()
    prettyPrint(dna, 50)
}
Output:

Sample run:

SEQUENCE:
    0: AATCCAGAAGTTGCGGGAACCGTCGAATAGTGTTCATTAAGTGTCCCGCG
   50: GAGTAGCCTCGTAATATAGAATGACCGGGCTTCCCAGCTAGACTTGTCCG
  100: CCACGTTTGTGTAGGGCGCAGCGAGACTGCTCTTGATACTCGTTATGTTC
  150: CTCACCGGATTATTGAATAGAGTCGAGGGGCTGACGTGACTGAACATTGC
  200: CTCCTTTGCGACTAATCTTTCCTTCAATGAACAGGCGCTACCCGTCATCG

BASE COUNT:
    A:  56
    C:  63
    G:  64
    T:  67
    ------
    Σ: 250
    ======

WEIGHTS (ex 300):
  Change: 100
  Delete: 100
  Insert: 100

MUTATIONS (10):
  Change @195 'A' to 'C'
  Insert @ 95 'G'
  Change @137 'T' to 'C'
  Delete @207 'T'
  Insert @148 'C'
  Insert @113 'A'
  Change @ 45 'C' to 'T'
  Delete @ 93 'T'
  Insert @ 51 'C'
  Delete @248 'A'

SEQUENCE:
    0: AATCCAGAAGTTGCGGGAACCGTCGAATAGTGTTCATTAAGTGTCTCGCG
   50: GCAGTAGCCTCGTAATATAGAATGACCGGGCTTCCCAGCTAGACTGGTCC
  100: GCCACGTTTGTGTAAGGGCGCAGCGAGACTGCTCTTGACACTCGTTATGC
  150: TTCCTCACCGGATTATTGAATAGAGTCGAGGGGCTGACGTGACTGAACCT
  200: TGCCTCCTTGCGACTAATCTTTCCTTCAATGAACAGGCGCTACCCGTCTC
  250: G

BASE COUNT:
    A:  55
    C:  66
    G:  65
    T:  65
    ------
    Σ: 251
    ======

Haskell[edit]

import Data.List       (group, sort)
import Data.List.Split (chunksOf)
import System.Random   (Random, randomR, random, newStdGen, randoms, getStdRandom)
import Text.Printf     (PrintfArg(..), fmtChar, fmtPrecision, formatString, IsChar(..), printf)

data Mutation = Swap | Delete | Insert deriving (Show, Eq, Ord, Enum, Bounded)
data DNABase = A | C | G | T deriving (Show, Read, Eq, Ord, Enum, Bounded)
type DNASequence = [DNABase]

data Result = Swapped Mutation Int (DNABase, DNABase)
            | InsertDeleted Mutation Int DNABase

instance Random DNABase where
  randomR (a, b) g = case randomR (fromEnum a, fromEnum b) g of (x, y) -> (toEnum x, y)
  random = randomR (minBound, maxBound)

instance Random Mutation where
  randomR (a, b) g = case randomR (fromEnum a, fromEnum b) g of (x, y) -> (toEnum x, y)
  random = randomR (minBound, maxBound)

instance PrintfArg DNABase where
  formatArg x fmt = formatString (show x) (fmt { fmtChar = 's', fmtPrecision = Nothing })

instance PrintfArg Mutation where
  formatArg x fmt = formatString (show x) (fmt { fmtChar = 's', fmtPrecision = Nothing })

instance IsChar DNABase where
  toChar = head . show
  fromChar = read . pure

chunkedDNASequence :: DNASequence -> [(Int, [DNABase])]
chunkedDNASequence = zip [50,100..] . chunksOf 50

baseCounts :: DNASequence -> [(DNABase, Int)]
baseCounts = fmap ((,) . head <*> length) . group . sort

newSequence :: Int -> IO DNASequence
newSequence n = take n . randoms <$> newStdGen

mutateSequence :: DNASequence -> IO (Result, DNASequence)
mutateSequence [] = fail "empty dna sequence"
mutateSequence ds = randomMutation >>= mutate ds
  where
    randomMutation = head . randoms <$> newStdGen
    mutate xs m = do
      i <- randomIndex (length xs)
      case m of
        Swap   -> randomDNA >>= \d -> pure (Swapped Swap i (xs !! pred i, d), swapElement i d xs)
        Insert -> randomDNA >>= \d -> pure (InsertDeleted Insert i d, insertElement i d xs)
        Delete -> pure (InsertDeleted Delete i (xs !! pred i), dropElement i xs)
      where
        dropElement i xs = take (pred i) xs <> drop i xs
        insertElement i e xs = take i xs <> [e] <> drop i xs
        swapElement i a xs = take (pred i) xs <> [a] <> drop i xs
        randomIndex n = getStdRandom (randomR (1, n))
        randomDNA = head . randoms <$> newStdGen

mutate :: Int -> DNASequence -> IO DNASequence
mutate 0 s = pure s
mutate n s = do
  (r, ms) <- mutateSequence s
  case r of
    Swapped m i (a, b)  -> printf "%6s @ %-3d : %s -> %s \n" m i a b
    InsertDeleted m i a -> printf "%6s @ %-3d : %s\n" m i a
  mutate (pred n) ms

main :: IO ()
main = do
  ds <- newSequence 200
  putStrLn "\nInitial Sequence:" >> showSequence ds
  putStrLn "\nBase Counts:" >> showBaseCounts ds
  showSumBaseCounts ds
  ms <- mutate 10 ds
  putStrLn "\nMutated Sequence:" >> showSequence ms
  putStrLn "\nBase Counts:" >> showBaseCounts ms
  showSumBaseCounts ms
  where
    showSequence   = mapM_ (uncurry (printf "%3d: %s\n")) . chunkedDNASequence
    showBaseCounts = mapM_ (uncurry (printf "%s: %3d\n")) . baseCounts
    showSumBaseCounts xs = putStrLn (replicate 6 '-') >> printf "Σ: %d\n\n" (length xs)
Output:
Initial Sequence:
 50: CCGGCGAACTGGTAGGTCTTTAATTATGCGGCCGCGATCGCGACACAGGT
100: GCAGGAGGAAAATAGGCCCCCGTTCTGGGCAGCCTGATTGCACACTCCCG
150: ATACCAGACGTGTGGCGGCTTTTTCGCAAGATCTTACCAAACATTAAGAT
200: TCGAAATACCAACTGTCGAAAGCAGAACGTGAATGTACCACCCGGATGCG

Base Counts:
A:  53
C:  53
G:  53
T:  41
------
Σ: 200

Insert @ 104 : C
Delete @ 133 : T
Insert @ 60  : A
Insert @ 42  : G
  Swap @ 14  : A -> C 
Insert @ 88  : A
Delete @ 9   : C
  Swap @ 185 : A -> G 
Insert @ 27  : G
  Swap @ 102 : C -> T 

Mutated Sequence:
 50: CCGGCGAATGGTCGGTCTTTAATTATGGCGGCCGCGATCGCGGACACAGG
100: TGCAGGAGGAAAAATAGGCCCCCGTTCTGGGCAGCCTGAATTGCACACTC
150: CTGATACCCAGACGTGTGGCGGCTTTTTCGCAAGACTTACCAAACATTAA
200: GATTCGAAATACCAACTGTCGAAAGCAGAACGTGAGTGTACCACCCGGAT
250: GCG

Base Counts:
A:  53
C:  53
G:  56
T:  41
------
Σ: 203

J[edit]

ACGT=: 'ACGT'
MUTS=: ;: 'del ins mut'

NB. generate sequence of size y of uniformly selected nucleotides.
NB. represent sequences as ints in range i.4 pretty printed. nuc
NB. defined separately to avoid fixing value inside mutation
NB. functions.
nuc=: monad : '?4'
dna=: nuc"0 @ i.

NB. randomly mutate nucleotide at a random index by deletion insertion
NB. or mutation of a nucleotide.
del=: {.,[:}.}.
ins=: {.,nuc@],}.
mut=: {.,nuc@],[:}.}.

NB. pretty print nucleotides in rows of 50 with numbering
seq=: [: (;~ [: (4&":"0) 50*i.@#) _50]\{&ACGT

sim=: monad define
'n k ws'=. y        NB. initial size, mutations, and weights for mutations
ws=. (% +/) ws      NB. normalize weights
A=.0$]D0=.D=. dna n NB. initial dna and history of actions

NB. k times do a random action according to weights and record it
for. i.k do.
  D=.". action=. (":?#D),' ',(":MUTS{::~(+/\ws)I.?0),' D'
  A=. action ; A
end.

echo 'actions';,. A-.a:
echo ('mutation';'probability') , MUTS ,. <"0 ws
('start';'end'),.(seq D0) ,: seq D
)

simulate=: (sim@(1 1 1&; &. |. ))`sim@.(3=#)
Output:
   simulate 200 ; 10
┌─────────┐
│actions  │
├─────────┤
│60 mut D │
├─────────┤
│156 del D│
├─────────┤
│44 mut D │
├─────────┤
│64 mut D │
├─────────┤
│167 mut D│
├─────────┤
│40 ins D │
├─────────┤
│39 mut D │
├─────────┤
│187 del D│
├─────────┤
│186 del D│
├─────────┤
│150 del D│
└─────────┘
┌────────┬───────────┐
│mutation│probability│
├────────┼───────────┤
│del     │0.333333   │
├────────┼───────────┤
│ins     │0.333333   │
├────────┼───────────┤
│mut     │0.333333   │
└────────┴───────────┘
┌─────┬────┬──────────────────────────────────────────────────┐
│start│   0│GGCTGTTGGCCGCCAATCTACAATGATAGCGCGTGAGGAGGGCTAATGTA│
│     │  50│GAGCCAATAATGGATGCTCGCGCTTCTGCTTATGCTGGTTACTGCTGCCC│
│     │ 100│AAAAACGGGGTACATTGAGCGATAAGCCCGCAAGGTTACTGCTCGTGACA│
│     │ 150│GTCCGAACACCACATTCGTGGTTACTCGACTCTGCCACCTCTTAGCGGAT│
├─────┼────┼──────────────────────────────────────────────────┤
│end  │   0│GGCTGTTGGCCGCCAATCTACAATGATAGCGCGTGAGGACCGGCAAATGT│
│     │  50│AGAGCCAATACTGGATGCTCGCGCTTCTGCTTATGCTGGTTACTGCTGCC│
│     │ 100│CAAAAACGGGGTACATTGAGCGATAAGCCCGCAAGGTTACTGCTCGTGAC│
│     │ 150│ATCCGACACCACATTCCTGGTTACTCGACTCTGCCACCTTAGCGGAT   │
└─────┴────┴──────────────────────────────────────────────────┘
   simulate 200 ; 10 ; 1 3 1
┌─────────┐
│actions  │
├─────────┤
│120 ins D│
├─────────┤
│199 ins D│
├─────────┤
│138 mut D│
├─────────┤
│15 ins D │
├─────────┤
│8 del D  │
├─────────┤
│135 ins D│
├─────────┤
│29 ins D │
├─────────┤
│118 del D│
├─────────┤
│111 ins D│
├─────────┤
│10 del D │
└─────────┘
┌────────┬───────────┐
│mutation│probability│
├────────┼───────────┤
│del     │0.2        │
├────────┼───────────┤
│ins     │0.6        │
├────────┼───────────┤
│mut     │0.2        │
└────────┴───────────┘
┌─────┬────┬──────────────────────────────────────────────────┐
│start│   0│GAACATACAATATCGTGTGGGTGGTAAGGTGCGCCGATTTGGCAGTGTAG│
│     │  50│AGCGGCCTCTGGCCGGGCCCATACTGACATATCTTTTATCTCCGTGCTAG│
│     │ 100│CAGAAGAATCAAACGCGTCAAGATGCTGGCGCGGGCTGATATGCGCCCGG│
│     │ 150│CAGTGGAGAACTGCGTTGATACACCTCAAAGATAAGCGGACGATATTAGC│
├─────┼────┼──────────────────────────────────────────────────┤
│end  │   0│GAACATACAATCGTGCTGGGTGGTAAGGTTGCGCCGATTTGGCAGTGTAG│
│     │  50│AGCGGCCTCTGGCCGGGCCCATACTGACATATCTTTTATCTCCGTGCTAG│
│     │ 100│CAGAAGAATCAACACGCGTAGAGATGCTGGCGCGGGACTGATATGCGCCC│
│     │ 150│GGCAGTGGAGAACTGCGTTGATACACCTCAAAGATAAGCGGACGATATTA│
│     │ 200│GGC                                               │
└─────┴────┴──────────────────────────────────────────────────┘

Java[edit]

import java.util.Arrays;
import java.util.Random;

public class SequenceMutation {
    public static void main(String[] args) {
        SequenceMutation sm = new SequenceMutation();
        sm.setWeight(OP_CHANGE, 3);
        String sequence = sm.generateSequence(250);
        System.out.println("Initial sequence:");
        printSequence(sequence);
        int count = 10;
        for (int i = 0; i < count; ++i)
            sequence = sm.mutateSequence(sequence);
        System.out.println("After " + count + " mutations:");
        printSequence(sequence);
    }

    public SequenceMutation() {
        totalWeight_ = OP_COUNT;
        Arrays.fill(operationWeight_, 1);
    }

    public String generateSequence(int length) {
        char[] ch = new char[length];
        for (int i = 0; i < length; ++i)
            ch[i] = getRandomBase();
        return new String(ch);
    }

    public void setWeight(int operation, int weight) {
        totalWeight_ -= operationWeight_[operation];
        operationWeight_[operation] = weight;
        totalWeight_ += weight;
    }

    public String mutateSequence(String sequence) {
        char[] ch = sequence.toCharArray();
        int pos = random_.nextInt(ch.length);
        int operation = getRandomOperation();
        if (operation == OP_CHANGE) {
            char b = getRandomBase();
            System.out.println("Change base at position " + pos + " from "
                               + ch[pos] + " to " + b);
            ch[pos] = b;
        } else if (operation == OP_ERASE) {
            System.out.println("Erase base " + ch[pos] + " at position " + pos);
            char[] newCh = new char[ch.length - 1];
            System.arraycopy(ch, 0, newCh, 0, pos);
            System.arraycopy(ch, pos + 1, newCh, pos, ch.length - pos - 1);
            ch = newCh;
        } else if (operation == OP_INSERT) {
            char b = getRandomBase();
            System.out.println("Insert base " + b + " at position " + pos);
            char[] newCh = new char[ch.length + 1];
            System.arraycopy(ch, 0, newCh, 0, pos);
            System.arraycopy(ch, pos, newCh, pos + 1, ch.length - pos);
            newCh[pos] = b;
            ch = newCh;
        }
        return new String(ch);
    }

    public static void printSequence(String sequence) {
        int[] count = new int[BASES.length];
        for (int i = 0, n = sequence.length(); i < n; ++i) {
            if (i % 50 == 0) {
                if (i != 0)
                    System.out.println();
                System.out.printf("%3d: ", i);
            }
            char ch = sequence.charAt(i);
            System.out.print(ch);
            for (int j = 0; j < BASES.length; ++j) {
                if (BASES[j] == ch) {
                    ++count[j];
                    break;
                }
            }
        }
        System.out.println();
        System.out.println("Base counts:");
        int total = 0;
        for (int j = 0; j < BASES.length; ++j) {
            total += count[j];
            System.out.print(BASES[j] + ": " + count[j] + ", ");
        }
        System.out.println("Total: " + total);
    }

    private char getRandomBase() {
        return BASES[random_.nextInt(BASES.length)];
    }

    private int getRandomOperation() {
        int n = random_.nextInt(totalWeight_), op = 0;
        for (int weight = 0; op < OP_COUNT; ++op) {
            weight += operationWeight_[op];
            if (n < weight)
                break;
        }
        return op;
    }

    private final Random random_ = new Random();
    private int[] operationWeight_ = new int[OP_COUNT];
    private int totalWeight_ = 0;

    private static final int OP_CHANGE = 0;
    private static final int OP_ERASE = 1;
    private static final int OP_INSERT = 2;
    private static final int OP_COUNT = 3;
    private static final char[] BASES = {'A', 'C', 'G', 'T'};
}
Output:
Initial sequence:
  0: TCCCCTCCAGTTAGCAGAAATATTAGCTAACGATACCTCGACACGGAGGG
 50: GTGGGGCCAACTCTTAACACAATTACGAGAACCATCCTTCGAAAGCAAAA
100: AAGTTTATGCCTGTTGTGTCAGGAACCCCCCGCGACGGACAACACAGTAA
150: GCACCTGCGGATACTGTGGTTGCCCTGAAAGACGGAGGATGCCTCCTATG
200: TCATTTAGAACTATCGAACGTACGGTTCTTAAATGGTCGTAGTTAGATAG
Base counts:
A: 73, C: 61, G: 59, T: 57, Total: 250
Insert base T at position 196
Change base at position 19 from A to G
Erase base T at position 204
Change base at position 223 from G to T
Change base at position 183 from G to C
Change base at position 21 from A to T
Insert base T at position 40
Change base at position 20 from T to G
Insert base T at position 69
Change base at position 19 from G to T
After 10 mutations:
  0: TCCCCTCCAGTTAGCAGAATGTTTAGCTAACGATACCTCGTACACGGAGG
 50: GGTGGGGCCAACTCTTAACTACAATTACGAGAACCATCCTTCGAAAGCAA
100: AAAAGTTTATGCCTGTTGTGTCAGGAACCCCCCGCGACGGACAACACAGT
150: AAGCACCTGCGGATACTGTGGTTGCCCTGAAAGACCGAGGATGCCTCCTT
200: ATGTCATTAGAACTATCGAACGTACTGTTCTTAAATGGTCGTAGTTAGAT
250: AG
Base counts:
A: 71, C: 62, G: 58, T: 61, Total: 252

JavaScript[edit]

// Basic set-up
const numBases = 250
const numMutations = 30
const bases = ['A', 'C', 'G', 'T'];

// Utility functions
/**
 * Return a shallow copy of an array
 * @param {Array<*>} arr
 * @returns {*[]}
 */
const copy = arr => [...arr];

/**
 * Get a random int up to but excluding the the given number
 * @param {number} max
 * @returns {number}
 */
const randTo = max => (Math.random() * max) | 0;

/**
 * Given an array return a random element and the index of that element from
 * the array.
 * @param {Array<*>} arr
 * @returns {[*[], number]}
 */
const randSelect = arr => {
  const at = randTo(arr.length);
  return [arr[at], at];
};

/**
 * Given a number or string, return a left padded string
 * @param {string|number} v
 * @returns {string}
 */
const pad = v => ('' + v).padStart(4, ' ');

/**
 * Count the number of elements that match the given value in an array
 * @param {Array<string>} arr
 * @returns {function(string): number}
 */
const filterCount = arr => s => arr.filter(e => e === s).length;

/**
 * Utility logging function
 * @param {string|number} v
 * @param {string|number} n
 */
const print = (v, n) => console.log(`${pad(v)}:\t${n}`)

/**
 * Utility function to randomly select a new base, and an index in the given
 * sequence.
 * @param {Array<string>} seq
 * @param {Array<string>} bases
 * @returns {[string, string, number]}
 */
const getVars = (seq, bases) => {
  const [newBase, _] = randSelect(bases);
  const [extBase, randPos] = randSelect(seq);
  return [newBase, extBase, randPos];
};

// Bias the operations
/**
 * Given a map of function to ratio, return an array of those functions
 * appearing ratio number of times in the array.
 * @param weightMap
 * @returns {Array<function>}
 */
const weightedOps = weightMap => {
  return [...weightMap.entries()].reduce((p, [op, weight]) =>
      [...p, ...(Array(weight).fill(op))], []);
};

// Pretty Print functions
const prettyPrint = seq => {
  let idx = 0;
  const rem = seq.reduce((p, c) => {
    const s = p + c;
    if (s.length === 50) {
      print(idx, s);
      idx = idx + 50;
      return '';
    }
    return s;
  }, '');
  if (rem !== '') {
    print(idx, rem);
  }
}

const printBases = seq => {
  const filterSeq = filterCount(seq);
  let tot = 0;
  [...bases].forEach(e => {
    const cnt = filterSeq(e);
    print(e, cnt);
    tot = tot + cnt;
  })
  print('Σ', tot);
}

// Mutation definitions
const swap = ([hist, seq]) => {
  const arr = copy(seq);
  const [newBase, extBase, randPos] = getVars(arr, bases);
  arr.splice(randPos, 1, newBase);
  return [[...hist, `Swapped ${extBase} for ${newBase} at ${randPos}`], arr];
};

const del = ([hist, seq]) => {
  const arr = copy(seq);
  const [newBase, extBase, randPos] = getVars(arr, bases);
  arr.splice(randPos, 1);
  return [[...hist, `Deleted ${extBase} at ${randPos}`], arr];
}

const insert = ([hist, seq]) => {
  const arr = copy(seq);
  const [newBase, extBase, randPos] = getVars(arr, bases);
  arr.splice(randPos, 0, newBase);
  return [[...hist, `Inserted ${newBase} at ${randPos}`], arr];
}

// Create the starting sequence
const seq = Array(numBases).fill(undefined).map(
    () => randSelect(bases)[0]);

// Create a weighted set of mutations
const weightMap = new Map()
    .set(swap, 1)
    .set(del, 1)
    .set(insert, 1);
const operations = weightedOps(weightMap);
const mutations = Array(numMutations).fill(undefined).map(
    () => randSelect(operations)[0]);

// Mutate the sequence
const [hist, mut] = mutations.reduce((p, c) => c(p), [[], seq]);

console.log('ORIGINAL SEQUENCE:')
prettyPrint(seq);

console.log('\nBASE COUNTS:')
printBases(seq);

console.log('\nMUTATION LOG:')
hist.forEach((e, i) => console.log(`${i}:\t${e}`));

console.log('\nMUTATED SEQUENCE:')
prettyPrint(mut);

console.log('\nMUTATED BASE COUNTS:')
printBases(mut);
Output:
ORIGINAL SEQUENCE:
   0:	GTGATCGTAGCGTATCACACGTGCGGGCAGATTGCGGGGCTTCCGTAAGT
  50:	CTCTCGATTGGCTAAACCTAACGTATGGAGTCGGGCCTGTTCAGAACTGC
 100:	GCCATAACCACAAACGTCGACAGATAAAGTTCGTGAAGGGACAGGATGAG
 150:	ATTTTTTTCCCGGTCTGTGCTCAGGGCTTAAATTAGTGCCTTTCCCGAAT
 200:	GGATACCAACGATTCTGACTGGTTATTTTAATCACCTAATGCCAGTAGTC

BASE COUNTS:
   A:	61
   C:	57
   G:	64
   T:	68
   Σ:	250

MUTATION LOG:
0:	Inserted A at 231
1:	Inserted C at 67
2:	Deleted T at 192
3:	Inserted A at 106
4:	Inserted C at 226
5:	Swapped C for G at 34
6:	Swapped T for A at 165
7:	Inserted G at 11
8:	Inserted G at 75
9:	Inserted C at 219
10:	Swapped A for A at 205
11:	Inserted C at 67
12:	Inserted C at 34
13:	Deleted A at 31
14:	Inserted A at 171
15:	Deleted G at 0
16:	Deleted A at 170
17:	Deleted C at 67
18:	Deleted T at 173
19:	Inserted T at 109
20:	Inserted C at 232
21:	Inserted G at 137
22:	Inserted T at 151
23:	Deleted C at 93
24:	Deleted A at 95
25:	Inserted C at 229
26:	Inserted C at 65
27:	Inserted G at 84
28:	Inserted G at 212
29:	Inserted G at 161

MUTATED SEQUENCE:
   0:	TGATCGTAGCGGTATCACACGTGCGGGCAGTTCGGGGGGCTTCCGTAAGT
  50:	CTCTCGATTGGCTAACACCCTAACGGTATGGAGTGCGGGCCTGTTAGACT
 100:	GCGCCATAATACCACAAACGTCGACAGATAAAGTTCGGTGAAGGGACAGG
 150:	ATTGAGATTTTGTTTCCCGGACTGTGCCAGGGCTTAAATTAGTGCCTTCC
 200:	CGAATGGATACCAGACGATTCTCGACTGGTTACCTTTCTAAATCACCTAA
 250:	TGCCAGTAGTC

MUTATED BASE COUNTS:
   A:	62
   C:	62
   G:	70
   T:	67
   Σ:	261

Julia[edit]

dnabases = ['A', 'C', 'G', 'T']
randpos(seq) = rand(1:length(seq))                                      # 1
mutateat(pos, seq) = (s = seq[:]; s[pos] = rand(dnabases); s)           # 2-1
deleteat(pos, seq) = [seq[1:pos-1]; seq[pos+1:end]]                     # 2-2
randinsertat(pos, seq) = [seq[1:pos]; rand(dnabases); seq[pos+1:end]]   # 2-3

function weightedmutation(seq, pos, weights=[1, 1, 1], verbose=true)    # Extra credit
    p, r = weights ./ sum(weights), rand()
    f = (r <= p[1]) ? mutateat : (r < p[1] + p[2]) ? deleteat : randinsertat
    verbose && print("Mutate by ", f == mutateat ? "swap" :
                                   f == deleteat ? "delete" : "insert")
    return f(pos, seq)
end

function weightedrandomsitemutation(seq, weights=[1, 1, 1], verbose=true)
    position = randpos(seq)
    newseq = weightedmutation(seq, position, weights, verbose)
    verbose && println(" at position $position")
    return newseq
end

randdnasequence(n) = rand(dnabases, n)                                  # 3

function dnasequenceprettyprint(seq, colsize=50)                        # 4
    println(length(seq), "nt DNA sequence:\n")
    rows = [seq[i:min(length(seq), i + colsize - 1)] for i in 1:colsize:length(seq)]
    for (i, r) in enumerate(rows)
        println(lpad(colsize * (i - 1), 5), "   ", String(r))
    end
    bases = [[c, 0] for c in dnabases]
    for c in seq, base in bases
        if c == base[1]
            base[2] += 1
        end
    end
    println("\nNucleotide counts:\n")
    for base in bases
        println(lpad(base[1], 10), lpad(string(base[2]), 12))
    end
    println(lpad("Other", 10), lpad(string(length(seq) - sum(x[2] for x in bases)), 12))
    println("     _________________\n", lpad("Total", 10), lpad(string(length(seq)), 12))
end

function testbioseq()
    sequence = randdnasequence(500)
    dnasequenceprettyprint(sequence)
    for _ in 1:10                                                       # 5
        sequence = weightedrandomsitemutation(sequence)
    end
    println("\n Mutated:"); dnasequenceprettyprint(sequence)            # 6
end

testbioseq()
Output:
500nt DNA sequence:

    0   TCACGTAACAGAGGTATAGTTGATCTTGAGCGGGCTGGCTCCCGGGTTTC
   50   TAGCAAGAAAAGGGGAGGGAAGTGCGCCTGCTTTTGCCCCGGGCACCCCA
  100   ATCGAAGACAGCTCCGGGGTCGCACATTTTTATGGCCACATAATGAGGGA
  150   ATGCACGCATCACTCCTATCACTAACTGCGAACTCATGTGACTGTGCAAA
  200   ACACCTTTAACACTGCGATGCCGTGGGGACGGGCCCCCCCAGCGGTATAG
  250   CGCGCACACGCGACAGATGTTAACTCGAATGGTCGCGCCGGGGAGTGCAC
  300   CCCTTGACATATACTCCAGATGCAATGCGCTATACTTTATGAACTTGCAT
  350   AAGCTGCGCAGGGGGGATTGACTTATACTACATATTAACTACCGATATCG
  400   ACGCAAATATTCGGGCGGTCTAAAGTGTGTCAGAACGGACATGCCGCCCA
  450   GAATCACGGCTACTGAGGACAAATACGCATTCCCGGTGCTGCATTCATTC

Nucleotide counts:

         A         127
         C         133
         G         131
         T         109
     Other           0
     _________________
     Total         500
Mutate by swap at position 253
Mutate by swap at position 448
Mutate by insert at position 379
Mutate by delete at position 311
Mutate by delete at position 335
Mutate by insert at position 132
Mutate by swap at position 191
Mutate by swap at position 481
Mutate by insert at position 189
Mutate by insert at position 423

 Mutated:
502nt DNA sequence:

    0   TCACGTAACAGAGGTATAGTTGATCTTGAGCGGGCTGGCTCCCGGGTTTC
   50   TAGCAAGAAAAGGGGAGGGAAGTGCGCCTGCTTTTGCCCCGGGCACCCCA
  100   ATCGAAGACAGCTCCGGGGTCGCACATTTTTACTGGCCACATAATGAGGG
  150   AATGCACGCATCACTCCTATCACTAACTGCGAACTCATGATCACTGTGCA
  200   AAACACCTTTAACACTGCGATGCCGTGGGGACGGGCCCCCCCAGCGGTAT
  250   AGCGAGCACACGCGACAGATGTTAACTCGAATGGTCGCGCCGGGGAGTGC
  300   ACCCCTTGACATTACTCCAGATGCAATGCGCTATACTTATGAACTTGCAT
  350   AAGCTGCGCAGGGGGGATTGACTTATACTGACATATTAACTACCGATATC
  400   GACGCAAATATTCGGGCGGTCTAGAAGTGTGTCAGAACGGACATGCCGCT
  450   CAGAATCACGGCTACTGAGGACAAATACGCATTCCCGGTGCTGCATTCAT
  500   TC

Nucleotide counts:

         A         128
         C         133
         G         132
         T         109
     Other           0
     _________________
     Total         502

Lua[edit]

Using the prettyprint() function from Bioinformatics/base_count#Lua (not replicated here)

math.randomseed(os.time())
bases = {"A","C","T","G"}
function randbase() return bases[math.random(#bases)] end

function mutate(seq)
  local i,h = math.random(#seq), "%-6s %3s at %3d"
  local old,new = seq:sub(i,i), randbase()
  local ops = {
    function(s) h=h:format("Swap", old..">"..new, i) return s:sub(1,i-1)..new..s:sub(i+1) end,
    function(s) h=h:format("Delete", " -"..old, i) return s:sub(1,i-1)..s:sub(i+1) end,
    function(s) h=h:format("Insert", " +"..new, i) return s:sub(1,i-1)..new..s:sub(i) end,
  }
  local weighted = { 1,1,2,3 }
  local n = weighted[math.random(#weighted)]
  return ops[n](seq), h
end

local seq,hist="",{} for i = 1, 200 do seq=seq..randbase() end
print("ORIGINAL:")
prettyprint(seq)
print()

for i = 1, 10 do seq,h=mutate(seq) hist[#hist+1]=h end
print("MUTATIONS:")
for i,h in ipairs(hist) do print("  "..h) end
print()

print("MUTATED:")
prettyprint(seq)
Output:
ORIGINAL:
LOCUS       AB000000              200 bp    mRNA    linear   HUM 01-JAN-2001
 BASE COUNT           50 a           47 c           51 g           52 t
ORIGIN
        1 atggatccga cgtgattata ttcactatgg ggcaatcgca cattagtttt atctccatca
       61 gcgacacgat ggggatcaat gggctgctac tggagacgtc cgatgcgatg attggtaatt
      121 gcatagagtg gatctccttt aacctagtag aaacgccctt ccggttcagc atggcgagtg
      181 cgtacaacgt cacccagact

MUTATIONS:
  Insert  +A at 190
  Delete  -C at 134
  Swap   A>G at  57
  Delete  -G at  83
  Insert  +T at  81
  Swap   T>T at 164
  Delete  -C at 199
  Swap   T>G at 147
  Swap   C>G at  33
  Swap   C>G at 191

MUTATED:
LOCUS       AB000000              199 bp    mRNA    linear   HUM 01-JAN-2001
 BASE COUNT           50 a           43 c           54 g           52 t
ORIGIN
        1 atggatccga cgtgattata ttcactatgg gggaatcgca cattagtttt atctccgtca
       61 gcgacacgat ggggatcaat tggctgctac tggagacgtc cgatgcgatg attggtaatt
      121 gcatagagtg gattccttta acctaggaga aacgcccttc cggttcagca tggcgagtgc
      181 gtacaacgat gacccagat

Mathematica / Wolfram Language[edit]

BioSequence is a fundamental data type in Mathematica:

SeedRandom[13122345];
seq = BioSequence["DNA", "ATAAACGTACGTTTTTAGGCT"];
randompos = RandomInteger[seq["SequenceLength"]];
seq = StringReplacePart[seq, RandomChoice[{"A", "T", "C", "G"}], {randompos, randompos}];
randompos = RandomInteger[seq["SequenceLength"]];
seq = StringReplacePart[seq, "", {randompos, randompos}];
randompos = RandomInteger[seq["SequenceLength"]];
seq = StringInsert[seq, RandomChoice[{"A", "T", "C", "G"}], randompos];
seq = BioSequence["DNA", StringJoin@RandomChoice[{"A", "T", "C", "G"}, 250]];
size = 50;
parts = StringPartition[seq["SequenceString"], UpTo[size]];
begins = Most[Accumulate[Prepend[StringLength /@ parts, 1]]];
ends = Rest[Accumulate[Prepend[StringLength /@ parts, 0]]];
StringRiffle[MapThread[ToString[#1] <> "-" <> ToString[#2] <> ": " <> #3 &, {begins, ends, parts}], "\n"]
Tally[Characters[seq["SequenceString"]]]
Do[
 type = RandomChoice[{1, 2, 3}];
 Switch[type, 1,
  randompos = RandomInteger[seq["SequenceLength"]];
  seq = StringReplacePart[seq, RandomChoice[{"A", "T", "C", "G"}], {randompos, randompos}];
  , 2,
  randompos = RandomInteger[seq["SequenceLength"]];
  seq = StringReplacePart[seq, "", {randompos, randompos}];
  , 3,
  randompos = RandomInteger[seq["SequenceLength"]];
  seq = StringInsert[seq, RandomChoice[{"A", "T", "C", "G"}], randompos];
  ]
 ,
 {10}
 ]
parts = StringPartition[seq["SequenceString"], UpTo[size]];
begins = Most[Accumulate[Prepend[StringLength /@ parts, 1]]];
ends = Rest[Accumulate[Prepend[StringLength /@ parts, 0]]];
StringRiffle[MapThread[ToString[#1] <> "-" <> ToString[#2] <> ": " <> #3 &, {begins, ends, parts}], "\n"]
Tally[Characters[seq["SequenceString"]]]
Output:
1-50: TAGCAGGGGAATTGTCGACTCCCGGGTTTCAATTGCCAACCAAGCATATT
51-100: GTACGCTCGTTCATTATAGGGGAAATGCGAGGGGCTAGAACGTTAGCTTC
101-150: GAGAGGTCGCGGCAATTTAGGGGGGCACCAAACGGTTTATAATACAGGGA
151-200: CTGATACATTCGCTGGAAAACAATTCTGCCCAGCAGCGACTCCGGACAAC
201-250: GTGACTTTGGTCCAAGATATTAGATTATCAATCCGTATTAATGTAGGCTT
{{"T", 63}, {"A", 69}, {"G", 66}, {"C", 52}}
1-50: TAGCAGGGGAATTGTCGACTCCCGGGTTCAATTGCCAACCAAGATATTGT
51-100: ACGCTCGTTCATTATAGGGGAAATGCGAGGGGCTAGAAACGTTAGTTCGA
101-150: GAGGTCGCGGAAATTTAGGGGGGCACCAACGGTTTATAATACAGGGACTG
151-200: ATACATTCGCTGGAAAACAATTCTGCCCAGCAGCGACTCCGGACAACGTG
201-246: ACTTTGGTCCAAGATAGTTAGATATCAATCCGTATAATGTAGGCTT
{{"T", 60}, {"A", 70}, {"G", 67}, {"C", 49}}

Nim[edit]

import random
import strformat
import strutils

type

  # Enumeration type for bases.
  Base {.pure.} = enum A, C, G, T, Other = "other"

  # Sequence of bases.
  DnaSequence = string

  # Kind of mutation.
  Mutation = enum mutSwap, mutDelete, mutInsert

const MaxBaseVal = ord(Base.high) - 1   # Maximum base value.

#---------------------------------------------------------------------------------------------------

template toChar(base: Base): char = ($base)[0]

#---------------------------------------------------------------------------------------------------

proc newDnaSeq(length: Natural): DnaSequence =
  ## Create a DNA sequence of given length.

  result = newStringOfCap(length)
  for _ in 1..length:
    result.add($Base(rand(MaxBaseVal)))

#---------------------------------------------------------------------------------------------------

proc mutate(dnaSeq: var DnaSequence) =
  ## Mutate a sequence (it is changed in place).

  # Choose randomly the position of mutation.
  let idx = rand(dnaSeq.high)

  # Choose randomly the kind of mutation.
  let mut = Mutation(rand(ord(Mutation.high)))

  # Apply the mutation.
  case mut

  of mutSwap:
    let newBase = Base(rand(MaxBaseVal))
    echo fmt"Changing base at position {idx + 1} from {dnaSeq[idx]} to {newBase}"
    dnaSeq[idx] = newBase.toChar

  of mutDelete:
    echo fmt"Deleting base {dnaSeq[idx]} at position {idx + 1}"
    dnaSeq.delete(idx, idx)

  of mutInsert:
    let newBase = Base(rand(MaxBaseVal))
    echo fmt"Inserting base {newBase} at position {idx + 1}"
    dnaSeq.insert($newBase, idx)

#---------------------------------------------------------------------------------------------------

proc display(dnaSeq: DnaSequence) =
  ## Display a DNA sequence using EMBL format.

  var counts: array[Base, Natural]    # Count of bases.
  for c in dnaSeq:
    inc counts[parseEnum[Base]($c, Other)]  # Use Other as default value.

  # Display the SQ line.
  var sqline = fmt"SQ   {dnaSeq.len} BP; "
  for (base, count) in counts.pairs:
    sqline &= fmt"{count} {base}; "
  echo sqline

  # Display the sequence.
  var idx = 0
  var row = newStringOfCap(80)
  var remaining = dnaSeq.len

  while remaining > 0:
    row.setLen(0)
    row.add("     ")

    # Add groups of 10 bases.
    for group in 1..6:
      let nextIdx = idx + min(10, remaining)
      for i in idx..<nextIdx:
        row.add($dnaSeq[i])
      row.add(' ')
      dec remaining, nextIdx - idx
      idx = nextIdx
      if remaining == 0:
        break

    # Append the number of the last base in the row.
    row.add(spaces(72 - row.len))
    row.add(fmt"{idx:>8}")
    echo row

  # Add termination.
  echo "//"

#———————————————————————————————————————————————————————————————————————————————————————————————————

randomize()
var dnaSeq = newDnaSeq(200)
echo "Initial sequence"
echo "———————————————\n"
dnaSeq.display()

echo "\nMutations"
echo "—————————\n"
for _ in 1..10:
  dnaSeq.mutate()

echo "\nMutated sequence"
echo "————————————————\n"
dnaSeq.display()
Output:
Initial sequence
———————————————

SQ   200 BP; 53 A; 52 C; 45 G; 50 T; 0 other; 
     ATGGATAGAA TGGCACCTCA GTGTCACATG TCCTAGGCAC ATTCTACGTA CTAGTTTCTG        60
     GAGGTCGATA AATACAAGAT GGAATACTCT TATCAACCGC TAGCAATGAA ACTTAGATAG       120
     CCACCCCTCG ATCGCGGTTC GCTATGCGGC ATCGTCAACT GCGTCAAAGC ACTACGTCGT       180
     TTCGTGACTG CCAGTCGAGC                                                   200
//

Mutations
—————————

Inserting base A at position 121
Changing base at position 101 from T to G
Deleting base A at position 115
Changing base at position 126 from C to G
Deleting base T at position 155
Deleting base G at position 198
Deleting base T at position 159
Changing base at position 144 from A to T
Inserting base C at position 34
Changing base at position 127 from G to G

Mutated sequence
————————————————

SQ   198 BP; 52 A; 52 C; 46 G; 48 T; 0 other; 
     ATGGATAGAA TGGCACCTCA GTGTCACATG TCCCTAGGCA CATTCTACGT ACTAGTTTCT        60
     GGAGGTCGAT AAATACAAGA TGGAATACTC TTATCAACCG CGAGCAATGA AACTTGATAG       120
     ACCACCGCTC GATCGCGGTT CGCTTTGCGG CATCGCAACG CGTCAAAGCA CTACGTCGTT       180
     TCGTGACTGC CAGTCGAC                                                     198
//

Perl[edit]

Translation of: Raku
use strict;
use warnings;
use feature 'say';

my @bases = <A C G T>;

my $dna;
$dna .= $bases[int rand 4] for 1..200;

my %cnt;
$cnt{$_}++ for split //, $dna;

sub pretty {
    my($string) = @_;
    my $chunk = 10;
    my $wrap  = 5 * ($chunk+1);
    ($string =~ s/(.{$chunk})/$1 /gr) =~ s/(.{$wrap})/$1\n/gr;
}

sub mutate {
    my($dna,$count) = @_;
    my $orig = $dna;
    substr($dna,rand length $dna,1) = $bases[int rand 4] while $count > diff($orig, $dna) =~ tr/acgt//;
    $dna
}

sub diff {
    my($orig, $repl) = @_;
    for my $i (0 .. -1+length $orig) {
        substr($repl,$i,1, lc substr $repl,$i,1) if substr($orig,$i,1) ne substr($repl,$i,1);
    }
    $repl;
}

say "Original DNA strand:\n" . pretty($dna);
say "Total bases: ". length $dna;
say "$_: $cnt{$_}" for @bases;

my $mutate = mutate($dna, 10);
%cnt = ();
$cnt{$_}++ for split //, $mutate;
say "\nMutated DNA strand:\n" . pretty diff $dna, $mutate;
say "Total bases: ". length $mutate;
say "$_: $cnt{$_}" for @bases;
Output:
Original DNA strand:
TGGAACATGT CCCAACGAGT TCTTCTTGCT AGCAGATTTT TTCAGTTGAT
CGTCACATGC GGTAGACTAC CCAAGGTGTG ACTACTCGCA TGCCTGATCT
AAATGGACAG TCGGCAGGCT AGTGCTAATT ACCGGAAGTA CGAACGAGCC
ATGCTGAGCG ACTCATCATT GTGAAATCGA GCCTATCTGC ATGACCTAAT

Total bases: 200
A: 52
C: 48
G: 47
T: 53

Mutated DNA strand:
TGGAACATGT CCCAACGAGT cCTTCTTGCT AGCcGATTTT TTCAGTTGgT
gGTCACATGC aGTAGACTAC CCgAGGTGTG ACTACTCGCA TGCCTGATCT
AAATGGACAG TCGGCAGGCT AGTGCTAATT ACCGGAAGTA CGAACGAGCt
ATGCaGAGCG ACTCATCgTT GTGAAATCGA GCCTATCTGC AgGACCTAAT

Total bases: 200
A: 50
C: 48
G: 51
T: 51

Phix[edit]

string dna = repeat(' ',200+rand(300))
for i=1 to length(dna) do dna[i] = "ACGT"[rand(4)] end for
 
procedure show()
    sequence acgt = repeat(0,5)
    for i=1 to length(dna) do
        acgt[find(dna[i],"ACGT")] += 1
    end for
    acgt[$] = sum(acgt)
    sequence s = split(trim(join_by(split(join_by(dna,1,10,""),"\n"),1,5," ")),"\n")
    for i=1 to length(s) do
        printf(1,"%3d: %s\n",{(i-1)*50+1,s[i]})
    end for
    printf(1,"\nBase counts: A:%d, C:%d, G:%d, T:%d, total:%d\n",acgt)
end procedure
 
procedure mutate()
    printf(1,"\n")
    for i=1 to 10 do
        integer p = rand(length(dna)),
                sdi = "SDI"[rand(3)],
                rep = "ACGT"[rand(4)],
                was = dna[p]
        switch sdi do
            case 'S':dna[p] = rep           printf(1,"swapped %c at %d for %c\n",{was,p,rep})
            case 'D':dna[p..p] = ""         printf(1,"deleted %c at %d\n",{was,p})
            case 'I':dna[p..p-1] = ""&rep   printf(1,"inserted %c at %d, before %c\n",{rep,p,was})
        end switch
    end for
    printf(1,"\n")
end procedure
 
show()
mutate()
show()
Output:
  1: ATAGACCGAT GTGTAGGTCT CGAACATCCC TGGGGTAGCT CAGCTTGGGG
 51: GTTGACCTGT CTTGCTCCCA TGAACTGAGG GATTTGGAAA TAACGCTTAT
101: AACTGCGGGG GATTGATATG GGACATTGTT GCTGTAGGGC TTCGGCGTGC
151: TTAGAAACAA GAGAACACCA ATTTCGATAG ACCAGGTTTC GTCCCGCTAC
201: GAGTGATAGT AGCGCGTTAG GATTAATAAT CAGGGAGAGC ATTAAACATT
251: CTAAAAACTG ACATTCCCGA GGTGGAACCC GAGTTGATAA CGAGTATGCT
301: CTGAAAAATT AATTGATTGA TCCGCGACAC TATCACACCG TCTTCGCCGT
351: TGTAATGCAT GCTGGCTTAG CATCCGGATG CTCTTCTACC GATCTTAAGG
401: CGCGCACTCC CCAAGGAGCA TAGAAGCATC CCCGGCCCTC GACAGAGTCT
451: CCCAGTGTAA GTGTCTTTAT CCAAAATC

Base counts: A:125, C:111, G:120, T:122, total:478

inserted C at 58, before T
deleted T at 2
deleted C at 169
deleted G at 80
inserted A at 331, before T
swapped C at 27 for T
inserted A at 22, before A
inserted T at 190, before G
swapped C at 195 for C
inserted A at 274, before G

  1: AAGACCGATG TGTAGGTCTC GAAACATTCC TGGGGTAGCT CAGCTTGGGG
 51: GTTGACCCTG TCTTGCTCCC ATGAACTGAG GATTTGGAAA TAACGCTTAT
101: AACTGCGGGG GATTGATATG GGACATTGTT GCTGTAGGGC TTCGGCGTGC
151: TTAGAAACAA GAGAACACAA TTTCGATAGA CCAGGTTTCT GTCCCGCTAC
201: GAGTGATAGT AGCGCGTTAG GATTAATAAT CAGGGAGAGC ATTAAACATT
251: CTAAAAACTG ACATTCCCGA GGTAGGAACC CGAGTTGATA ACGAGTATGC
301: TCTGAAAAAT TAATTGATTG ATCCGCGACA CTAATCACAC CGTCTTCGCC
351: GTTGTAATGC ATGCTGGCTT AGCATCCGGA TGCTCTTCTA CCGATCTTAA
401: GGCGCGCACT CCCCAAGGAG CATAGAAGCA TCCCCGGCCC TCGACAGAGT
451: CTCCCAGTGT AAGTGTCTTT ATCCAAAATC

Base counts: A:128, C:110, G:119, T:123, total:480

PureBasic[edit]

#BASE$="ACGT"
#SEQLEN=200
#PROTOCOL=#True

Global dna.s
Define i.i

Procedure pprint()
  Define p.i, cnt.i, sum.i
  
  For p=1 To Len(dna) Step 50
    Print(RSet(Str(p-1)+": ",5))
    PrintN(Mid(dna,p,50))
  Next  
  PrintN("Base counts:")
  For p=1 To 4
    cnt=CountString(dna,Mid(#BASE$,p,1)) : sum+cnt
    Print(Mid(#BASE$,p,1)+": "+Str(cnt)+", ")
  Next
  PrintN("Total: "+Str(sum))  
EndProcedure

Procedure InsertAtPos(basenr.i,position.i)  
  If #PROTOCOL : PrintN("Insert base "+Mid(#BASE$,basenr,1)+" at position "+Str(position)) : EndIf
  dna=InsertString(dna,Mid(#BASE$,basenr,1),position)
EndProcedure

Procedure EraseAtPos(position.i)
  If #PROTOCOL : PrintN("Erase base "+Mid(dna,position,1)+" at position "+Str(position)) : EndIf
  If position>0 And position<=Len(dna)
    dna=Left(dna,position-1)+Right(dna,Len(dna)-position)
  EndIf  
EndProcedure

Procedure OverwriteAtPos(basenr.i,position.i) 
  If #PROTOCOL : PrintN("Change base at position "+Str(position)+" from "+Mid(dna,position,1)+" to "+Mid(#BASE$,basenr,1)) : EndIf
  If position>0 And position<=Len(dna)
    position-1
    PokeS(@dna+2*position,Mid(#BASE$,basenr,1),-1,#PB_String_NoZero)  
  EndIf  
EndProcedure

If OpenConsole()=0 : End 1 : EndIf
For i=1 To #SEQLEN : dna+Mid(#BASE$,Random(4,1),1) : Next
PrintN("Initial sequence:")
pprint()

For i=1 To 10
  Select Random(2)
    Case 0 : InsertAtPos(Random(4,1),Random(Len(dna),1))
    Case 1 : EraseAtPos(Random(Len(dna),1))
    Case 2 : OverwriteAtPos(Random(4,1),Random(Len(dna),1))
  EndSelect
Next

PrintN("After 10 mutations:")
pprint()
Input()
Output:
Initial sequence:
  0: AAGTTTACGTCGGACTTCATTAATCGGTTTAGTCAGACCCGATCCAAATC
 50: TTGCTTTCACTCCGCATTCTTCTCATGAGTAAAAGGCTGCTCCTGCACTA
100: AAGCGTTCTCAACACCTTGGAGAGCCATCTCGGTACTCCGCGCAAAATAG
150: CCATAGAGGGTATCAGGAAACGCATCGAAGGTTTAGCCGAACTAAGGTCT
Base counts:
A: 54, C: 52, G: 42, T: 52, Total: 200
Change base at position 7 from A to T
Insert base T at position 66
Erase base G at position 198
Insert base C at position 32
Change base at position 80 from A to G
Erase base A at position 2
Insert base C at position 33
Insert base C at position 201
Insert base G at position 70
Erase base T at position 187
After 10 mutations:
  0: AGTTTTCGTCGGACTTCATTAATCGGTTTACGCTCAGACCCGATCCAAAT
 50: CTTGCTTTCACTCCGCTATGTCTTCTCATGGGTAAAAGGCTGCTCCTGCA
100: CTAAAGCGTTCTCAACACCTTGGAGAGCCATCTCGGTACTCCGCGCAAAA
150: TAGCCATAGAGGGTATCAGGAAACGCATCGAAGGTTAGCCGAACTAAGTC
200: CT
Base counts:
A: 51, C: 55, G: 43, T: 53, Total: 202

Python[edit]

In function seq_mutate argument kinds selects between the three kinds of mutation. The characters I, D, and S are chosen from the string to give the kind of mutation to perform, so the more of that character, the more of that type of mutation performed.
Similarly parameter choice is chosen from to give the base for substitution or insertion - the more any base appears, the more likely it is to be chosen in any insertion/substitution.

import random
from collections import Counter

def basecount(dna):
    return sorted(Counter(dna).items())

def seq_split(dna, n=50):
    return [dna[i: i+n] for i in range(0, len(dna), n)]

def seq_pp(dna, n=50):
    for i, part in enumerate(seq_split(dna, n)):
        print(f"{i*n:>5}: {part}")
    print("\n  BASECOUNT:")
    tot = 0
    for base, count in basecount(dna):
        print(f"    {base:>3}: {count}")
        tot += count
    base, count = 'TOT', tot
    print(f"    {base:>3}= {count}")

def seq_mutate(dna, count=1, kinds="IDSSSS", choice="ATCG" ):
    mutation = []
    k2txt = dict(I='Insert', D='Delete', S='Substitute')
    for _ in range(count):
        kind = random.choice(kinds)
        index = random.randint(0, len(dna))
        if kind == 'I':    # Insert
            dna = dna[:index] + random.choice(choice) + dna[index:]
        elif kind == 'D' and dna:  # Delete
            dna = dna[:index] + dna[index+1:]
        elif kind == 'S' and dna:  # Substitute
            dna = dna[:index] + random.choice(choice) + dna[index+1:]
        mutation.append((k2txt[kind], index))
    return dna, mutation

if __name__ == '__main__':
    length = 250
    print("SEQUENCE:")
    sequence = ''.join(random.choices('ACGT', weights=(1, 0.8, .9, 1.1), k=length))
    seq_pp(sequence)
    print("\n\nMUTATIONS:")
    mseq, m = seq_mutate(sequence, 10)
    for kind, index in m:
        print(f" {kind:>10} @{index}")
    print()
    seq_pp(mseq)
Output:
SEQUENCE:
    0: GGAAGATTAGGTCACGGGCCTCATCTTGTGCGAGATAAATAATAACACTC
   50: AGCGATCATTAGAATGTATATTGTACGGGCATGTTTATCTACCATAGGTC
  100: CTGTCAAAAGATGGCTAGCTGCAATTTTTTCTTCTAGATCCCGATTACTG
  150: CGGTATTTTTGTATAACGTGCTAAACGGTGTGTTTTCAGGTCGGCCTGCT
  200: AATCTAACGCCAGTGGACTTGGGATGGACGCCCAACAACTGAGAGCGCGG

  BASECOUNT:
      A: 64
      C: 51
      G: 62
      T: 73
    TOT= 250


MUTATIONS:
 Substitute @138
 Substitute @72
     Insert @103
     Insert @129
     Insert @124
     Delete @52
     Delete @202
 Substitute @200
     Insert @158
     Delete @32

    0: GGAAGATTAGGTCACGGGCCTCATCTTGTGCGGATAAATAATAACACTCA
   50: GGATCATTAGAATGTATATTATACGGGCATGTTTATCTACCATAGGTCCT
  100: GCTCAAAAGATGGCTAGCTGCAGATTTTGTTCTTCTAGAGCCCGATTACT
  150: GCGGTATGTTTTGTATAACGTGCTAAACGGTGTGTTTTCAGGTCGGCCTG
  200: CTATCTAACGCCAGTGGACTTGGGATGGACGCCCAACAACTGAGAGCGCG
  250: G

  BASECOUNT:
      A: 63
      C: 51
      G: 65
      T: 72
    TOT= 251

Quackery[edit]

prettyprint and tallybases are defined at Bioinformatics/base count#Quackery.

  [ $ "ACGT" 4 random peek ]      is randomgene     (   --> c )

  [ $ "" swap times
      [ randomgene join ] ]       is randomsequence ( n --> $ )

  [ dup size random 
    3 random 
    [ table 
      [ pluck drop ]
      [ randomgene unrot stuff ]
      [ randomgene unrot poke ] ] 
    do ]                          is mutate         ( $ --> $ )

   200 randomsequence
   dup prettyprint cr cr dup tallybases
   cr cr say "Mutating..." cr
   10 times mutate
   dup prettyprint cr cr tallybases
Output:
   0 CGCCCGCACC TAGAACCATT AAGCTGTCTG GTGTCGGGAT CGTCACATTA
  50 CGCCCCTTGT TGCGTCGGCG CCGATGCGAA GGCATAATAT GTGGTCTAAT
 100 GTCATGCGTG CCCGGGGAAT CTGGCGCGAC CGTCATGGCA AACCGCATCC
 150 CCTCAGCAAA TTACTAGCTG GTTGATTTTC ATCATAGGCC TGATCATGTG

 adenine 41
 cytosine 56
 guanine 53
 thymine 50

 total 200

Mutating...

   0 AGGCCCGCAC CTAGAACCAT TAAGCTGTCT GGTGTCGGGA TCGTCACATT
  50 ACGCCCCTTG TGCGTCGGCG CCGATGCGAA GGCATAATAT GTGGTCTGAT
 100 GTCATGCGTG CGCGGGGAAT CTGGCGCGAC CGTCATGGCA AACCGCATCC
 150 CCTCAGCAAA ATTCTAGCTG GTTGATTTTA TCTATAGGCC TGACTCATGT
 200 G

 adenine 41
 cytosine 54
 guanine 56
 thymine 50

 total 201

Racket[edit]

#lang racket

(define current-S-weight (make-parameter 1))
(define current-D-weight (make-parameter 1))
(define current-I-weight (make-parameter 1))

(define bases '(#\A #\C #\G #\T))

(define (fold-sequence seq kons #:finalise (finalise (λ x (apply values x))) . k0s)
  (define (recur seq . ks)
    (if (null? seq)
      (call-with-values (λ () (apply finalise ks)) (λ vs (apply values vs)))
      (call-with-values (λ () (apply kons (car seq) ks)) (λ ks+ (apply recur (cdr seq) ks+)))))
  (apply recur (if (string? seq) (string->list (regexp-replace* #px"[^ACGT]" seq "")) seq) k0s))

(define (sequence->pretty-printed-string seq)
  (define (fmt idx cs-rev) (format "~a: ~a" (~a idx #:width 3 #:align 'right) (list->string (reverse cs-rev))))
  (fold-sequence
    seq
    (λ (b n start-idx lns-rev cs-rev)
       (if (zero? (modulo n 50))
         (values (+ n 1) n (if (pair? cs-rev) (cons (fmt start-idx cs-rev) lns-rev) lns-rev) (cons b null))
         (values (+ n 1) start-idx lns-rev (cons b cs-rev))))
    0 0 null null
    #:finalise (λ (n idx lns-rev cs-rev)
                (string-join (reverse (if (null? cs-rev) lns-rev (cons (fmt idx cs-rev) lns-rev))) "\n"))))

(define (count-bases b as cs gs ts n)
  (values (+ as (if (eq? b #\A) 1 0)) (+ cs (if (eq? b #\C) 1 0)) (+ gs (if (eq? b #\T) 1 0)) (+ ts (if (eq? b #\G) 1 0)) (add1 n)))

(define (report-sequence s)
  (define-values (as cs gs ts n) (fold-sequence s count-bases 0 0 0 0 0))
  (printf "SEQUENCE:~%~a~%" (sequence->pretty-printed-string s))
  (printf "BASE COUNT:~%-----------~%~a~%"
          (string-join (map (λ (c n) (format " ~a :~a" c (~a #:width 4 #:align 'right n))) bases (list as ts cs gs)) "\n"))
  (printf "TOTAL: ~a~%" n))


(define (make-random-sequence-string l)
  (list->string (for/list ((_ l)) (list-ref bases (random 4)))))

(define (weighted-random-call weights-and-functions . args)
  (let loop ((r (random)) (wfs weights-and-functions))
    (if (<= r (car wfs)) (apply (cadr wfs) args) (loop (- r (car wfs)) (cddr wfs)))))

(define (mutate-S s)
  (let ((r (random (string-length s))) (i (string (list-ref bases (random 4)))))
    (printf "Mutate at ~a -> ~a~%" r i)
    (string-append (substring s 0 r) i (substring s (add1 r)))))

(define (mutate-D s)
  (let ((r (random (string-length s))))
    (printf "Delete at ~a~%" r)
    (string-append (substring s 0 r) (substring s (add1 r)))))

(define (mutate-I s)
  (let ((r (random (string-length s))) (i (string (list-ref bases (random 4)))))
    (printf "Insert at ~a -> ~a~%" r i)
    (string-append (substring s 0 r) i (substring s r))))

(define (mutate s)
  (define W (+ (current-S-weight) (current-D-weight) (current-I-weight)))
  (weighted-random-call
    (list (/ (current-S-weight) W) mutate-S (/ (current-D-weight) W) mutate-D (/ (current-I-weight) W) mutate-I)
    s))

(module+
  main
  (define initial-sequence (make-random-sequence-string 200))
  (report-sequence initial-sequence)
  (newline)
  (define s+ (for/fold ((s initial-sequence)) ((_ 10)) (mutate s)))
  (newline)
  (report-sequence s+)
  (newline)
  (define s+d (parameterize ((current-D-weight 5)) (for/fold ((s initial-sequence)) ((_ 10)) (mutate s))))
  (newline)
  (report-sequence s+d))
Output:
SEQUENCE:
  0: AATGCTTGTGTCACCGGCCACCCCCTATGTACATACACGGTTCAGCTGGC
 50: CTGGCAGAGCGTCTGACAACGGAGCTCTAAAACTTCCTTCTGACTAGAGC
100: TCATCCTCTCCCGGCTGAACTCCGTGGCTTACAATCAGACCCTACCCGTG
150: TCCAACTACGCAGTCGCCCGATGAGGCTGGGCTCACCACGTTGGGCGAGG
BASE COUNT:
-----------
 A :  41
 C :  49
 G :  67
 T :  43
TOTAL: 200

Insert at 176 -> T
Insert at 99 -> G
Delete at 154
Mutate at 188 -> A
Insert at 14 -> A
Mutate at 62 -> G
Delete at 110
Mutate at 79 -> T
Insert at 109 -> A
Insert at 46 -> C

SEQUENCE:
  0: AATGCTTGTGTCACACGGCCACCCCCTATGTACATACACGGTTCAGCCTG
 50: GCCTGGCAGAGCGGCTGACAACGGAGCTCTTAAACTTCCTTCTGACTAGA
100: GGCTCATCCTACCCCGGCTGAACTCCGTGGCTTACAATCAGACCCTACCC
150: GTGTCCACTACGCAGTCGCCCGATGAGGTCTGGGCTCACCACGTTGGGCG
200: AGG
BASE COUNT:
-----------
 A :  41
 C :  51
 G :  68
 T :  43
TOTAL: 203

Delete at 79
Delete at 105
Delete at 63
Delete at 37
Delete at 166
Mutate at 42 -> C
Delete at 142
Delete at 23
Delete at 91
Insert at 4 -> T

SEQUENCE:
  0: AATGTCTTGTGTCACCGGCCACCCCTATGTACATACAGGTTCCGCTGGCC
 50: TGGCAGAGCGTCGACAACGGAGCTCTAAACTTCCTTCTGACTGAGCTCAT
100: CCCTCCCGGCTGAACTCCGTGGCTTACAATCAGACCCTACCGTGTCCAAC
150: TACGCAGTCGCCCGTGAGGCTGGGCTCACCACGTTGGGCGAGG
BASE COUNT:
-----------
 A :  37
 C :  49
 G :  65
 T :  42
TOTAL: 193

Raku[edit]

(formerly Perl 6)

Works with: Rakudo version 2019.07.1

Unweighted mutations at this point. The mutated DNA strand has a "diff" operation performed on it which (in this specific case) renders the mutated base in lower case so it may be picked out more easily.


my @bases = <A C G T>;

# The DNA strand
my $dna = @bases.roll(200).join;


# The Task
put "ORIGINAL DNA STRAND:";
put pretty $dna;
put "\nTotal bases: ", +my $bases = $dna.comb.Bag;
put $bases.sort( ~*.key ).join: "\n";

put "\nMUTATED DNA STRAND:";
my $mutate = $dna.&mutate(10);
put pretty diff $dna, $mutate;
put "\nTotal bases: ", +my $mutated = $mutate.comb.Bag;
put $mutated.sort( ~*.key ).join: "\n";


# Helper subs
sub pretty ($string, $wrap = 50) {
    $string.comb($wrap).map( { sprintf "%8d: %s", $++ * $wrap, $_ } ).join: "\n"
}

sub mutate ($dna is copy, $count = 1) {
    $dna.substr-rw((^$dna.chars).roll, 1) = @bases.roll for ^$count;
    $dna
}

sub diff ($orig, $repl) {
    ($orig.comb Z $repl.comb).map( -> ($o, $r) { $o eq $r ?? $o !! $r.lc }).join
}
Output:
ORIGINAL DNA STRAND:
       0: ACGGATAGACCGTTCCTGCAAGCTGGTACGGTTCGAATGTTGACCTTATT
      50: CTCCGCAGCGCACTACCCGATCGGGTAACGTACTCTATATGATGCCTATT
     100: TTCCCCGCCTTACATCGGCGATCAATGTTCTTTTACGCTAACTAGGCGCA
     150: CGTCGTGCCTTACCGAGAGCCAGTTCGAAATCGTGCTGAAAATATCTGGA

Total bases: 200
A	45
C	55
G	45
T	55

MUTATED DNA STRAND:
       0: ACGGATAGcCCGTTCCTGCAAGCTGGTACGGTTCGAATGTTGACCTTATT
      50: CTCCGCAGCGCACTACCCGATCGGGTcACtcACTCTATATGAcGCCTAaT
     100: TTCCCCGCCTTACATCGGCGATCAATGTTCTTTTACGCTAACTAGGCGCA
     150: CGTCGTGCCTTACCcAGAGCCAGTTCGAAATCGTGCTGAAAATATCTGGA

Total bases: 200
A	44
C	60
G	43
T	53

Ring[edit]

row = 0
dnaList = []
base = ["A","C","G","T"]
long = 20
see "Initial sequence:" + nl
see "     12345678901234567890" + nl
see " " + long + ": "

for nr = 1 to 200
    row = row + 1
    rnd = random(3)+1
    baseStr = base[rnd]
    see baseStr # + " "
    if (row%20) = 0 and long < 200
        long = long + 20
        see nl
        if long < 100
           see " " + long + ": "
        else
           see "" + long + ": "
        ok
    ok
    add(dnaList,baseStr)
next
see nl+ "     12345678901234567890" + nl

baseCount(dnaList)

for n = 1 to 10
    rnd = random(2)+1
    switch rnd
           on 1
              baseSwap(dnaList)
           on 2
              baseDelete(dnaList)
           on 3
              baseInsert(dnaList)
    off
next
showDna(dnaList)
baseCount(dnaList)

func baseInsert(dnaList)
     rnd1 = random(len(dnaList)-1)+1
     rnd2 = random(len(base)-1)+1
     insert(dnaList,rnd1,base[rnd2])
     see "Insert base " + base[rnd2] + " at position " + rnd1 + nl
     return dnaList

func baseDelete(dnaList)
     rnd = random(len(dnaList)-1)+1
     del(dnaList,rnd)
     see "Erase base " + dnaList[rnd] + " at position " + rnd + nl
     return dnaList

func baseSwap(dnaList)
     rnd1 = random(len(dnaList))
     rnd2 = random(3)+1
     see "Change base at position " + rnd1 + " from " + dnaList[rnd1] + " to " + base[rnd2] + nl
     dnaList[rnd1] = base[rnd2]

func showDna(dnaList)
     long = 20
     see nl + "After 10 mutations:" + nl
     see "     12345678901234567890" + nl
     see " " + long + ": "
     for nr = 1 to len(dnaList)
         row = row + 1
         see dnaList[nr]
         if (row%20) = 0 and long < 200
             long = long + 20
             see nl
             if long < 100
                see " " + long + ": "
             else
                see "" + long + ": "
             ok
         ok
     next
     see nl+ "     12345678901234567890" + nl

func baseCount(dnaList)
     dnaBase = [:A=0, :C=0, :G=0, :T=0]
     lenDna = len(dnaList)
     for n = 1 to lenDna
         dnaStr = dnaList[n]
         switch dnaStr
                on "A"
                   strA = dnaBase["A"]
                   strA++
                   dnaBase["A"] = strA
                on "C"
                   strC = dnaBase["C"]
                   strC++
                   dnaBase["C"] = strC
                on "G"
                   strG = dnaBase["G"]
                   strG++
                   dnaBase["G"] = strG
                on "T"
                   strT = dnaBase["T"]
                   strT++
                   dnaBase["T"] = strT
         off
     next
     see nl
     see "A: " + dnaBase["A"] + ", "
     see "T: " + dnaBase["T"] + ", "
     see "C: " + dnaBase["C"] + ", "
     see "G: " + dnaBase["G"] + ", "
     total = dnaBase["A"] + dnaBase["T"] + dnaBase["C"] + dnaBase["G"]
     see "Total: " + total+ nl + nl
Output:
Initial sequence:
     12345678901234567890
 20: GGAGACACTAACGAAACAAA
 40: CAGGTATATAGGACATGTAG
 60: AAACAATTAATACGTAGCGA
 80: ACTGTGGCGCGAAAGAAGGG
100: ATGGACTCGGGTATTGCCGA
120: GATTCACGCCAACGAAAAAT
140: ATCTCAGATGACCGAAATAG
160: GGTCATCGAAATGAGTCCAA
180: ATAACTAAGTGGACAAAGGT
200: AGACCAAAAAGGACAGAAAA
     12345678901234567890

A: 84, T: 32, C: 34, G: 50, Total: 200

Change base at position 176 from A to A
Change base at position 178 from G to C
Change base at position 180 from T to C
Erase base T at position 28
Insert base A at position 17
Erase base C at position 52
Change base at position 118 from A to A
Insert base T at position 192
Insert base A at position 142
Erase base A at position 18

After 10 mutations:
     12345678901234567890
 20: GGAGACACTAACGAAACAAA
 40: CAGGTATTAGGACATGTAGA
 60: AACAATTAATCGTAGCGAAC
 80: TGTGGCGCGAAAGAAGGGAT
100: GGACTCGGGTATTGCCGAGA
120: TTCACGCCAACGAAAAATAT
140: CTCAGATGACCGAAATAGGG
160: TACATCGAAATGAGTCCAAA
180: TAACTAAGTGGACAAACGCA
200: GACCAAAAAGGATCAGAAAA
     12345678901234567890

A: 83, T: 32, C: 36, G: 49, Total: 200

Ruby[edit]

class DNA_Seq
  attr_accessor :seq
  
  def initialize(bases: %i[A C G T] , size: 0)
    @bases = bases
    @seq   = Array.new(size){ bases.sample }
  end
  
  def mutate(n = 10)
    n.times{|n| method([:s, :d, :i].sample).call}
  end
  
  def to_s(n = 50)
    just_size = @seq.size / n
    (0...@seq.size).step(n).map{|from|  "#{from.to_s.rjust(just_size)} " + @seq[from, n].join}.join("\n") +
    "\nTotal #{seq.size}: #{@seq.tally.sort.to_h.inspect}\n\n"
  end

  def s = @seq[rand_index]= @bases.sample  
  def d = @seq.delete_at(rand_index)
  def i = @seq.insert(rand_index, @bases.sample )
  alias :swap   :s 
  alias :delete :d
  alias :insert :i
    
  private
  def rand_index = rand( @seq.size )
end

puts test = DNA_Seq.new(size: 200)
test.mutate
puts test
test.delete
puts test
Output:
   0 TAAGGTGAGGAGTGTGATGGAGTTCGGTGGCTAGCCACAAATACAACACA
  50 CTCACCCATACTCGCCTCTGAAGCATGTTTTACTTGGATAGGGCCTACAG
 100 CAGTATTCACCCATTCCTCGGCTCCTGACCTGATGTAGGTCTATGTGCGG
 150 GAAAATAGGACAATACTGCCGAAGTCATATCCATTGGAGGGGCCCCAGGC
Total 200: {:A=>51, :C=>50, :G=>52, :T=>47}

   0 TAAGCTGAGGTGTGTGATGGAGTTCGGTGGCTAGCCACAAATACAACACA
  50 CTCACCCATACTCGCCTCTGAAGCATGTTTTAATTGGATAGGGCCTACAG
 100 CAGTATTCACCCTTCCTCCGCTCCTGACCTGATATAGGTCTATGTGCGGG
 150 AAAATAGGACAATACTGCCGAAGTCATATCCATTGGAGGGGCCCCAAGGC
Total 200: {:A=>52, :C=>51, :G=>49, :T=>48}

  0 TAAGCTGAGGTGTGTGATGGAGTTCGGTGGCTAGCCACAAATACAACACA
 50 CTCACCCATACTCGCTCTGAAGCATGTTTTAATTGGATAGGGCCTACAGC
100 AGTATTCACCCTTCCTCCGCTCCTGACCTGATATAGGTCTATGTGCGGGA
150 AAATAGGACAATACTGCCGAAGTCATATCCATTGGAGGGGCCCCAAGGC
Total 199: {:A=>52, :C=>50, :G=>49, :T=>48}

Rust[edit]

use rand::prelude::*;
use std::collections::HashMap;
use std::fmt::{Display, Formatter, Error};

pub struct Seq<'a> {
    alphabet: Vec<&'a str>,
    distr: rand::distributions::Uniform<usize>,
    pos_distr: rand::distributions::Uniform<usize>,
    seq: Vec<&'a str>,
}

impl Display for Seq<'_> {
    fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {

        let pretty: String = self.seq
            .iter()
            .enumerate()
            .map(|(i, nt)| if (i + 1) % 60 == 0 { format!("{}\n", nt) } else { nt.to_string() })
            .collect();
        
        let counts_hm = self.seq
            .iter()
            .fold(HashMap::<&str, usize>::new(), |mut m, nt| {
                *m.entry(nt).or_default() += 1;
                m
            });

        let mut counts_vec: Vec<(&str, usize)> = counts_hm.into_iter().collect();
        counts_vec.sort_by(|a, b| a.0.cmp(&b.0));
        let counts_string = counts_vec
            .iter()
            .fold(String::new(), |mut counts_string, (nt, count)| {
                counts_string += &format!("{} = {}\n", nt, count);
                counts_string
            });

        write!(f, "Seq:\n{}\n\nLength: {}\n\nCounts:\n{}", pretty, self.seq.len(), counts_string)
    }
}

impl Seq<'_> {
    pub fn new(alphabet: Vec<&str>, len: usize) -> Seq {
        let distr = rand::distributions::Uniform::new_inclusive(0, alphabet.len() - 1);
        let pos_distr = rand::distributions::Uniform::new_inclusive(0, len - 1);

        let seq: Vec<&str> = (0..len)
            .map(|_| {
                alphabet[thread_rng().sample(distr)]
            })
            .collect();
        Seq { alphabet, distr, pos_distr, seq }
    }

    pub fn insert(&mut self) {
        let pos = thread_rng().sample(self.pos_distr);
        let nt = self.alphabet[thread_rng().sample(self.distr)];
        println!("Inserting {} at position {}", nt, pos);
        self.seq.insert(pos, nt);
    }

    pub fn delete(&mut self) {
        let pos = thread_rng().sample(self.pos_distr);
        println!("Deleting {} at position {}", self.seq[pos], pos);
        self.seq.remove(pos);
    }

    pub fn swap(&mut self) {
        let pos = thread_rng().sample(self.pos_distr);
        let cur_nt = self.seq[pos];
        let new_nt = self.alphabet[thread_rng().sample(self.distr)];
        println!("Replacing {} at position {} with {}", cur_nt, pos, new_nt);
        self.seq[pos] = new_nt;
    }
}

fn main() {

    let mut seq = Seq::new(vec!["A", "C", "T", "G"], 200);
    println!("Initial sequnce:\n{}", seq);

    let mut_distr = rand::distributions::Uniform::new_inclusive(0, 2);

    for _ in 0..10 {
        let mutation = thread_rng().sample(mut_distr);

        if mutation == 0 {
            seq.insert()
        } else if mutation == 1 {
            seq.delete()
        } else {
            seq.swap()
        }
    }

    println!("\nMutated sequence:\n{}", seq);
}
Output:
Initial sequnce:
Seq:
TAAGTTTAGTCTGTTTACGAGATCTAGAGGAGGACACCGTGTAGAGGGGATTTGTCAGGA
CACATGCATGGCACCCTAGTCAAATAGTGCCGAGAACAGGCTCTCCTGAGAAAGTTAGGT
CTGCCGAAGTGACGAAGTGCACGTTATAGCTCTATTAAGTATGTTCGTTAACAGGTATTA
ATGCTCTTAGCCAAGACCGT

Length: 200

Counts:
A = 56
C = 38
G = 53
T = 53

Deleting C at position 197
Inserting T at position 157
Replacing C at position 149 with G
Replacing A at position 171 with G
Replacing T at position 182 with G
Deleting C at position 124
Inserting T at position 128
Replacing G at position 175 with C
Deleting A at position 35
Replacing A at position 193 with G

Mutated sequence:
Seq:
TAAGTTTAGTCTGTTTACGAGATCTAGAGGAGGACCCGTGTAGAGGGGATTTGTCAGGAC
ACATGCATGGCACCCTAGTCAAATAGTGCCGAGAACAGGCTCTCCTGAGAAAGTTAGGTC
TGCGAAGTTGACGAAGTGCACGTTATAGGTCTATTATAGTATGTTCGTTAGCAGCTATTA
AGGCTCTTAGCCAGGACGT

Length: 199

Counts:
A = 53
C = 36
G = 56
T = 54

Swift[edit]

let bases: [Character] = ["A", "C", "G", "T"]

enum Action: CaseIterable {
  case swap, delete, insert
}

@discardableResult
func mutate(dna: inout String) -> Action {
  guard let i = dna.indices.shuffled().first(where: { $0 != dna.endIndex }) else {
    fatalError()
  }

  let action = Action.allCases.randomElement()!

  switch action {
  case .swap:
    dna.replaceSubrange(i..<i, with: [bases.randomElement()!])
  case .delete:
    dna.remove(at: i)
  case .insert:
    dna.insert(bases.randomElement()!, at: i)
  }

  return action
}

var d = ""

for _ in 0..<200 {
  d.append(bases.randomElement()!)
}

func printSeq(_ dna: String) {
  for startI in stride(from: 0, to: dna.count, by: 50) {
    print("\(startI): \(dna.dropFirst(startI).prefix(50))")
  }

  print()
  print("Size: \(dna.count)")
  print()

  let counts = dna.reduce(into: [:], { $0[$1, default: 0] += 1 })

  for (char, count) in counts.sorted(by: { $0.key < $1.key }) {
    print("\(char): \(count)")
  }
}

printSeq(d)

print()

for _ in 0..<20 {
  mutate(dna: &d)
}

printSeq(d)
Output:
0: CCCTGTTACCCTTAAGTCACAAATCATGATAAGCAGCCTTCGAGCACTTC
50: GTGTCAAGCCTGATTCGAGCGCGCCGGTCATCCTCCGATAGAGCACGGGG
100: ACGCCCGCACTACCCCACTGGCGCTTGGTCGCTGAATAGGGCGCCCTTGG
150: TGGTGGATGGTCTTAAGCTGTCGCAAATCTAGCCCCGACCAAGAGAAGGC

Size: 200

A: 43
C: 62
G: 54
T: 41

0: CCCTGTTACCCTTAAGTCACAAATCATGTATAAAGCAGCCTTCGAGCACT
50: TCGTGTCAAGCCTGATTCGAGCGCGCTAGGGCATCCTCCGTATAAGAGCA
100: CCGGGGACGCCCGCACTTACCACCACTGGCGCTTGGTCGCGAATAGGGGC
150: GCCCTTTGGTGGTGGATTGGTCTTAAGTGTCGCAAATCTAGCCCCCGACC
200: AAGAGAAGGC

Size: 210

A: 47
C: 62
G: 56
T: 45

Vlang[edit]

Translation of: Go
import rand
import rand.seed

const bases = "ACGT"
 
// 'w' contains the weights out of 300 for each
// of swap, delete or insert in that order.
fn mutate(dna string, w [3]int) string {
    le := dna.len
    // get a random position in the dna to mutate
    p := rand.intn(le) or {0}
    // get a random number between 0 and 299 inclusive
    r := rand.intn(300) or {0}
    mut bytes := dna.bytes()
    match true {
		r < w[0] { // swap
			base := bases[rand.intn(4) or {0}]
			println("  Change @${p:3} ${[bytes[p]].bytestr()} to ${[base].bytestr()}")
			bytes[p] = base
		}
		r < w[0]+w[1] { // delete
			println("  Delete @${p:3} ${bytes[p]}")
			bytes.delete(p)
			//copy(bytes[p:], bytes[p+1:])
			bytes = bytes[0..le-1]
		}
		else { // insert
			base := bases[rand.intn(4) or {0}]
			bytes << 0
			bytes.insert(p,bytes[p])
			//copy(bytes[p+1:], bytes[p:])
			println("  Insert @${p:3} $base")
			bytes[p] = base
		}
    }
    return bytes.bytestr()
}
 
// Generate a random dna sequence of given length.
fn generate(le int) string {
    mut bytes := []u8{len:le}
    for i := 0; i < le; i++ {
        bytes[i] = bases[rand.intn(4) or {0}]
    }
    return bytes.bytestr()
}
 
// Pretty print dna and stats.
fn pretty_print(dna string, rowLen int) {
    println("SEQUENCE:")
    le := dna.len
    for i := 0; i < le; i += rowLen {
        mut k := i + rowLen
        if k > le {
            k = le
        }
        println("${i:5}: ${dna[i..k]}")
    }
    mut base_map := map[byte]int{} // allows for 'any' base
    for i in 0..le {
        base_map[dna[i]]++
    }
    mut bb := base_map.keys()
	bb.sort()
 
    println("\nBASE COUNT:")
    for base in bb {
        println("    $base: ${base_map[base]:3}")
    }
    println("    ------")
    println("    Σ: $le")
    println("    ======\n")
}
 
// Express weights as a string.
fn wstring(w [3]int) string {
    return "  Change: ${w[0]}\n  Delete: ${w[1]}\n  Insert: ${w[2]}\n"
}
 
fn main() {
    rand.seed(seed.time_seed_array(2))
    mut dna := generate(250)
    pretty_print(dna, 50)
    muts := 10
    w := [100, 100, 100]! // use e.g. {0, 300, 0} to choose only deletions
    println("WEIGHTS (ex 300):\n${wstring(w)}")
    println("MUTATIONS ($muts):")
    for _ in 0..muts {
        dna = mutate(dna, w)
    }
    println('')
    pretty_print(dna, 50)
}
Output:

Sample run:

SEQUENCE:
    0: CATTGGATTGCTAGTCGTTCAATAGCGAACGAACAGTTTGCATGAATCAG
   50: AGAGAGCCTGAAACCTTGGTTGGTATCGACACAACCTCATAATTCACATT
  100: CACAAACTTATTTTCGGATCCGCGAAAACGCAAGCGCATTAAGAGACACC
  150: CCCAGAGACTCAATTCCGGATTTGCGCTGCTATATACCCACATTGATGAT
  200: ATAGGGCTTAGAACGGCCTTAGCCCCGTCGGCTAGTTTCTGAAGTCTCTT

BASE COUNT:
    A:  71
    C:  62
    G:  52
    T:  65
    ------
    Σ: 250
    ======

WEIGHTS (ex 300):
  Change: 100
  Delete: 100
  Insert: 100

MUTATIONS (10):
  Delete @166 "C"
  Change @185 "C" to "G"
  Insert @230 "T"
  Insert @230 "G"
  Insert @226 "C"
  Change @162 "A" to "C"
  Change @236 "G" to "C"
  Insert @ 25 "C"
  Delete @ 75 "A"
  Change @104 "A" to "T"

SEQUENCE:
    0: CATTGGATTGCTAGTCGTTCAATAGCCGAACGAACAGTTTGCATGAATCA
   50: GAGAGAGCCTGAAACCTTGGTTGGTTCGACACAACCTCATAATTCACATT
  100: CACATACTTATTTTCGGATCCGCGAAAACGCAAGCGCATTAAGAGACACC
  150: CCCAGAGACTCACTTCGGATTTGCGCTGCTATATAGCCACATTGATGATA
  200: TAGGGCTTAGAACGGCCTTAGCCCCGCTCGGGTCTACTTTCTGAAGTCTC
  250: TT

BASE COUNT:
    A:  68
    C:  64
    G:  53
    T:  67
    ------
    Σ: 252
    ======

Wren[edit]

Translation of: Go
Library: Wren-sort
Library: Wren-fmt
import "random" for Random
import "/fmt" for Fmt
import "/sort" for Sort

var rand = Random.new()
var bases = "ACGT"

// 'w' contains the weights out of 300 for each
// of swap, delete or insert in that order.
var mutate = Fn.new { |dna, w|
    var le = dna.count 
    // get a random position in the dna to mutate
    var p = rand.int(le)
    // get a random number between 0 and 299 inclusive
    var r = rand.int(300)
    var chars = dna.toList
    if (r < w[0]) {   // swap
        var base = bases[rand.int(4)]
        Fmt.print("  Change @$3d $q to $q", p, chars[p], base)
        chars[p] = base
    } else if (r < w[0] + w[1]) {  // delete
        Fmt.print("  Delete @$3d $q", p, chars[p])
        chars.removeAt(p)
    } else { // insert
        var base = bases[rand.int(4)]
        Fmt.print("  Insert @$3d $q", p, base)
        chars.insert(p, base)
    }
    return chars.join()
}

// Generate a random dna sequence of given length.
var generate = Fn.new { |le|
    var chars = [""] * le
    for (i in 0...le) chars[i] = bases[rand.int(4)] 
    return chars.join()
}

// Pretty print dna and stats.
var prettyPrint = Fn.new { |dna, rowLen|
    System.print("SEQUENCE:")
    var le = dna.count
    var i = 0
    while (i < le) {
        var k = i + rowLen
        if (k > le) k = le
        Fmt.print("$5d: $s", i, dna[i...k])
        i =  i + rowLen
    }
    var baseMap = {}
    for (i in 0...le) {
        var v = baseMap[dna[i]]
        baseMap[dna[i]] = (v) ? v + 1 : 1
    }
    var bases = []
    for (k in baseMap.keys) bases.add(k)
    Sort.insertion(bases) // get bases into alphabetic order
    System.print("\nBASE COUNT:")
    for (base in bases) Fmt.print("    $s: $3d", base, baseMap[base])
    System.print("    ------")
    System.print("    Σ: %(le)")
    System.print("    ======\n")
}

// Express weights as a string.
var wstring = Fn.new { |w|
    return Fmt.swrite("  Change: $d\n  Delete: $d\n  Insert: $d\n", w[0], w[1], w[2])
}

var dna = generate.call(250)
prettyPrint.call(dna, 50)
var muts = 10
var w = [100, 100, 100] // use e.g. {0, 300, 0} to choose only deletions
Fmt.print("WEIGHTS (ex 300):\n$s", wstring.call(w))
Fmt.print("MUTATIONS ($d):", muts)
for (i in 0...muts) dna = mutate.call(dna, w)
System.print()
prettyPrint.call(dna, 50)
Output:

Sample run:

SEQUENCE:
    0: CATTGGATTGCTAGTCGTTCAATAGCGAACGAACAGTTTGCATGAATCAG
   50: AGAGAGCCTGAAACCTTGGTTGGTATCGACACAACCTCATAATTCACATT
  100: CACAAACTTATTTTCGGATCCGCGAAAACGCAAGCGCATTAAGAGACACC
  150: CCCAGAGACTCAATTCCGGATTTGCGCTGCTATATACCCACATTGATGAT
  200: ATAGGGCTTAGAACGGCCTTAGCCCCGTCGGCTAGTTTCTGAAGTCTCTT

BASE COUNT:
    A:  71
    C:  62
    G:  52
    T:  65
    ------
    Σ: 250
    ======

WEIGHTS (ex 300):
  Change: 100
  Delete: 100
  Insert: 100

MUTATIONS (10):
  Delete @166 "C"
  Change @185 "C" to "G"
  Insert @230 "T"
  Insert @230 "G"
  Insert @226 "C"
  Change @162 "A" to "C"
  Change @236 "G" to "C"
  Insert @ 25 "C"
  Delete @ 75 "A"
  Change @104 "A" to "T"

SEQUENCE:
    0: CATTGGATTGCTAGTCGTTCAATAGCCGAACGAACAGTTTGCATGAATCA
   50: GAGAGAGCCTGAAACCTTGGTTGGTTCGACACAACCTCATAATTCACATT
  100: CACATACTTATTTTCGGATCCGCGAAAACGCAAGCGCATTAAGAGACACC
  150: CCCAGAGACTCACTTCGGATTTGCGCTGCTATATAGCCACATTGATGATA
  200: TAGGGCTTAGAACGGCCTTAGCCCCGCTCGGGTCTACTTTCTGAAGTCTC
  250: TT

BASE COUNT:
    A:  68
    C:  64
    G:  53
    T:  67
    ------
    Σ: 252
    ======

Yabasic[edit]

Translation of: Phix
// Rosetta Code problem: http://rosettacode.org/wiki/Sequence_mutation
// by Galileo, 07/2022

r = int(ran(300))

for i = 1 to 200 + r : dna$ = dna$ + mid$("ACGT", int(ran(4))+1, 1) : next

sub show()
    local acgt(4), i, j, x, total
    
    for i = 1 to len(dna$)
        x = instr("ACGT", mid$(dna$, i, 1))
        acgt(x) = acgt(x) + 1
    next
    
    for i = 1 to 4 : total = total + acgt(i) : next
    
    for i = 1 to len(dna$) step 50
        print i, ":\t";
        for j = 0 to 49 step 10
            print mid$(dna$, i+j, 10), " ";
        next
        print
    next
    print "\nBase counts: A: ", acgt(1), ", C: ", acgt(2), ", G: ", acgt(3), ", T: ", acgt(4), ", total: ", total
end sub


sub mutate()
    local i, p, sdi$, rep$, was$
    
    print
    
    for i = 1 to 10
        p = int(ran(len(dna$))) + 1
        sdi$ = mid$("SDI", int(ran(3)) + 1, 1)
        rep$ = mid$("ACGT", int(ran(4)) + 1, 1)
        was$ = mid$(dna$, p, 1)
        switch sdi$
            case "S": mid$(dna$, p, 1) = rep$ 
                      print "swapped ", was$, " at ", p, " for ", rep$ : break
            case "D": dna$ = left$(dna$, p - 1) + right$(dna$, len(dna$) - p)
                      print "deleted ", was$, " at ", p : break
            case "I": dna$ = left$(dna$, p - 1) + rep$ + right$(dna$, (len(dna$) - p + 1))
                      print "inserted ", rep$, " at ", p, ", before ", was$ : break
        end switch
    next
    print
end sub

show()
mutate()
show()
Output:
1:      TCCATCGTGG GATCGCTCTA GCGGTATGCT ATCATTCCTA TAGCAATTCT
51:     CAGGGGGCCC GACGGCGCCG ATCACATGTG ATCCTTGTGT GATCGCTTCA
101:    TGTCATGGCT TTCTAGACCT TGGATAAGCA TGTACGGTTG GACCAGTCGT
151:    GCGTCGGTAA ACAACGCATC TGTGTTATAT CCGTCGAATA ACCCATATGT
201:    CTCCAGTCTA ATCCCCTAAG CAACTGCTCA AGGTAAAATG CAAATACAGG
251:    TGAGGAGTCC TCGAAGGGGT CGCACCGCAA TATGGGCGTC CCTTATTGGC
301:    CCTCATCAGT AT

Base counts: A: 72, C: 81, G: 76, T: 83, total: 312

inserted G at 89, before G
swapped T at 174 for C
deleted A at 31
deleted G at 89
deleted C at 275
inserted A at 278, before A
inserted C at 200, before C
inserted C at 232, before G
deleted G at 10
swapped A at 124 for C

1:      TCCATCGTGG ATCGCTCTAG CGGTATGCTT CATTCCTATA GCAATTCTCA
51:     GGGGGCCCGA CGGCGCCGAT CACATGTGAT CCTTGTGTGA TCGCTTCATG
101:    TCATGGCTTT CTAGACCTTG GATCAGCATG TACGGTTGGA CCAGTCGTGC
151:    GTCGGTAAAC AACGCATCTG CGTTATATCC GTCGAATAAC CCATATGTCC
201:    TCCAGTCTAA TCCCCTAAGC AACTGCTCAA CGGTAAAATG CAAATACAGG
251:    TGAGGAGTCC TCGAAGGGGT CGCACGCAAA TATGGGCGTC CCTTATTGGC
301:    CCTCATCAGT AT

Base counts: A: 71, C: 84, G: 75, T: 82, total: 312
---Program done, press RETURN---

zkl[edit]

var [const] bases="ACGT", lbases=bases.toLower();
dna:=(190).pump(Data().howza(3),(0).random.fp(0,4),bases.get); // bucket of bytes

foreach s,m in (T("Original","Mutated").zip(T(True,False))){
   println("\n",s," DNA strand:"); dnaPP(dna);
   println("Base Counts: ", dna.len()," : ",
      dna.text.toUpper().counts()	// ("A",5, "C",10, ...)
      .pump(String,Void.Read,"%s(%d) ".fmt));
   if(m) mutate(dna,10,True);
}

fcn mutate(dna,count=1,verbose=False){
   if(verbose) println("Mutating:");
   do(count){ 
      n,rb := (0).random(dna.len()), lbases[(0).random(4)];
      switch( (0).random(3) ){
	 case(0){ if(verbose) println("Change[%d] '%s' to '%s'".fmt(n,dna.charAt(n),rb));
	    dna[n]=rb;
	 }
	 case(1){ if(verbose) println("Delete[%d] '%s'".fmt(n,dna.charAt(n)));
	    dna.del(n);
	 }
	 else{	  if(verbose) println("Insert[%d] '%s'".fmt(n,rb));
	    dna.insert(n,rb);
	 }
      }
   }
}

fcn dnaPP(dna,N=50){
   [0..*,50].zipWith(fcn(n,bases){ println("%6d: %s".fmt(n,bases.concat())) },
      dna.walker().walk.fp(50)).pump(Void);  // .pump forces the iterator
}
Output:
Original DNA strand:
     0: AACGACAGGTTCTCGATGCGTGTCTTCACACATGTGGAGTCGCCAAGGAT
    50: TGTTGATCAATGCGTAAACGTCTCCACGGGATACACGGGCAGCTTGCGGT
   100: GACGAGTGCGGACCACCAAAAAAGGTGGGATCCACGTTGAGGAGCCTCAC
   150: TACCTACGGCGTGATATGGCGGCAGGAGTCAAAAACTGCT
Base Counts: 190 : A(49) C(46) G(57) T(38) 
Mutating:
Insert[123] 'c'
Insert[69] 't'
Delete[5] 'C'
Delete[147] 'T'
Change[167] 'G' to 't'
Change[153] 'C' to 'c'
Insert[151] 't'
Insert[156] 't'
Delete[150] 'T'
Change[102] 'C' to 'g'

Mutated DNA strand:
     0: AACGAAGGTTCTCGATGCGTGTCTTCACACATGTGGAGTCGCCAAGGATT
    50: GTTGATCAATGCGTAAACtGTCTCCACGGGATACACGGGCAGCTTGCGGT
   100: GAgGAGTGCGGACCACCAAAAAAcGGTGGGATCCACGTTGAGGAGCCCAC
   150: tACcTtACGGCGTGATATtGCGGCAGGAGTCAAAAACTGCT
Base Counts: 191 : A(49) C(45) G(57) T(40)