Bioinformatics/Global alignment: Difference between revisions

Content added Content deleted
(Added Go)
(added Raku programming solution)
Line 507: Line 507:
TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA
TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA
Base counts: Other:0, A:74, C:57, G:75, T:94, total:300
Base counts: Other:0, A:74, C:57, G:75, T:94, total:300
</pre>

=={{header|Raku}}==
{{trans|Go}}
{{trans|Julia}}
<lang perl6># 20210209 Raku programming solution

sub printCounts(\seq) {
my %bases = seq.comb.Bag ;
say "\nNucleotide counts for ", seq, " :";
say %bases.kv, " and total length = ", %bases.values.sum
}

sub stringCentipede(\s1, \s2) {
loop ( my $offset = 0, my \S1 = $ = '' ; ; $offset++ ) {
S1 = s1.substr: $offset ;
with S1.index(s2.substr(0,1)) -> $p { $offset += $p } else { return 0 }
S1 = s1.substr: $offset ;
return s1.chars - $offset if s2.starts-with: S1
}
}

sub deduplicate {
my @sorted = @_.unique.sort: { $^a.chars <=> $^b.chars } ; # by length
gather while ( my $target = shift @sorted ) {
take $target unless @sorted.grep: { .contains: $target }
}
}

sub shortestCommonSuperstring {
my @ss = deduplicate @_ ;
my \ß = $ = [~] @ss ; # ShortestSuper
for @ss.permutations -> @perm {
my \sup = $ = @perm[0];
for @perm.rotor(2 => -1) -> @duo {
my \overlapPos = stringCentipede @duo[0], @duo[1];
sup ~= @duo[1].substr: overlapPos;
}
ß = sup if sup.chars < ß.chars ;
}
ß
}

.&shortestCommonSuperstring.&printCounts for (

<TA AAG TA GAA TA>,

<CATTAGGG ATTAG GGG TA>,

<AAGAUGGA GGAGCGCAUC AUCGCAAUAAGGA> ,

<ATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTAT
GGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGT
CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA
TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC
AACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT
GCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTC
CGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCT
TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC
CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGC
GATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATT
TTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC
CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA
TCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGA
>,
)
</lang>

{{out}}
<pre>

Nucleotide counts for TAAGAA :
(T 1 A 4 G 1) and total length = 6

Nucleotide counts for CATTAGGG :
(G 3 A 2 T 2 C 1) and total length = 8

Nucleotide counts for AAGAUGGAGCGCAUCGCAAUAAGGA :
(A 10 U 3 C 4 G 8) and total length = 25

Nucleotide counts for CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA :
(C 57 G 75 A 74 T 94) and total length = 300
</pre>
</pre>