Bioinformatics/Global alignment: Difference between revisions
Content added Content deleted
(Added Go) |
(added Raku programming solution) |
||
Line 507: | Line 507: | ||
TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA |
TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA |
||
Base counts: Other:0, A:74, C:57, G:75, T:94, total:300 |
Base counts: Other:0, A:74, C:57, G:75, T:94, total:300 |
||
</pre> |
|||
=={{header|Raku}}== |
|||
{{trans|Go}} |
|||
{{trans|Julia}} |
|||
<lang perl6># 20210209 Raku programming solution |
|||
sub printCounts(\seq) { |
|||
my %bases = seq.comb.Bag ; |
|||
say "\nNucleotide counts for ", seq, " :"; |
|||
say %bases.kv, " and total length = ", %bases.values.sum |
|||
} |
|||
sub stringCentipede(\s1, \s2) { |
|||
loop ( my $offset = 0, my \S1 = $ = '' ; ; $offset++ ) { |
|||
S1 = s1.substr: $offset ; |
|||
with S1.index(s2.substr(0,1)) -> $p { $offset += $p } else { return 0 } |
|||
S1 = s1.substr: $offset ; |
|||
return s1.chars - $offset if s2.starts-with: S1 |
|||
} |
|||
} |
|||
sub deduplicate { |
|||
my @sorted = @_.unique.sort: { $^a.chars <=> $^b.chars } ; # by length |
|||
gather while ( my $target = shift @sorted ) { |
|||
take $target unless @sorted.grep: { .contains: $target } |
|||
} |
|||
} |
|||
sub shortestCommonSuperstring { |
|||
my @ss = deduplicate @_ ; |
|||
my \ß = $ = [~] @ss ; # ShortestSuper |
|||
for @ss.permutations -> @perm { |
|||
my \sup = $ = @perm[0]; |
|||
for @perm.rotor(2 => -1) -> @duo { |
|||
my \overlapPos = stringCentipede @duo[0], @duo[1]; |
|||
sup ~= @duo[1].substr: overlapPos; |
|||
} |
|||
ß = sup if sup.chars < ß.chars ; |
|||
} |
|||
ß |
|||
} |
|||
.&shortestCommonSuperstring.&printCounts for ( |
|||
<TA AAG TA GAA TA>, |
|||
<CATTAGGG ATTAG GGG TA>, |
|||
<AAGAUGGA GGAGCGCAUC AUCGCAAUAAGGA> , |
|||
<ATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTAT |
|||
GGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGT |
|||
CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA |
|||
TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC |
|||
AACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT |
|||
GCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTC |
|||
CGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCT |
|||
TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC |
|||
CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGC |
|||
GATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATT |
|||
TTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC |
|||
CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA |
|||
TCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGA |
|||
>, |
|||
) |
|||
</lang> |
|||
{{out}} |
|||
<pre> |
|||
Nucleotide counts for TAAGAA : |
|||
(T 1 A 4 G 1) and total length = 6 |
|||
Nucleotide counts for CATTAGGG : |
|||
(G 3 A 2 T 2 C 1) and total length = 8 |
|||
Nucleotide counts for AAGAUGGAGCGCAUCGCAAUAAGGA : |
|||
(A 10 U 3 C 4 G 8) and total length = 25 |
|||
Nucleotide counts for CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA : |
|||
(C 57 G 75 A 74 T 94) and total length = 300 |
|||
</pre> |
</pre> |
||