Bioinformatics/Global alignment: Difference between revisions

m
m (Renamed a variable.)
m (→‎{{header|Wren}}: Minor tidy)
 
(4 intermediate revisions by one other user not shown)
Line 182:
Total length 300
</pre>
 
=={{header|C++}}==
<syntaxhighlight lang="c++">
 
#include <algorithm>
#include <cstdint>
#include <iostream>
#include <numeric>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <vector>
 
// Print a report of the given string to the standard output device.
void print_report(const std::string& text) {
std::unordered_map<char, int32_t> bases;
for ( const char& ch : text ) {
bases[ch]++;
}
 
const int32_t total = std::accumulate(bases.begin(), bases.end(), 0,
[&](int32_t previous_sum, std::pair<char, int32_t> entry) {
return previous_sum + entry.second;
});
 
std::cout << "Nucleotide counts for: " << ( ( text.length() > 50 ) ? "\n" : "" );
std::cout << text << std::endl;
std::cout << "Bases: A " << bases['A'] << ", C: " << bases['C'] << ", G: " << bases['G'] << ", T: " << bases['T']
<< ", total: " << total << "\n" << std::endl;
}
 
// Return all permutations of the given list of strings.
std::vector<std::vector<std::string>> permutations(std::vector<std::string>& list) {
int32_t indexes[list.size()] = {};
std::vector<std::vector<std::string>> result;
result.push_back(list);
int32_t i = 0;
while ( (uint64_t) i < list.size() ) {
if ( indexes[i] < i ) {
const int j = ( i % 2 == 0 ) ? 0 : indexes[i];
std::swap(list[i], list[j]);
result.push_back(list);
indexes[i]++;
i = 0;
} else {
indexes[i] = 0;
i++;
}
}
return result;
}
 
// Return 'before' concatenated with 'after', removing the longest suffix of 'before' that matches a prefix of 'after'.
std::string concatenate(const std::string& before, const std::string& after) {
for ( uint64_t i = 0; i < before.length(); ++i ) {
if ( after.starts_with(before.substr(i, before.length())) ) {
return before.substr(0, i) + after;
}
}
return before + after;
}
 
// Remove duplicate strings and strings which are substrings of other strings in the given list.
std::vector<std::string> deduplicate(const std::vector<std::string>& list) {
std::vector<std::string> singletons(list);
std::sort(singletons.begin(), singletons.end());
singletons.erase(std::unique(singletons.begin(), singletons.end()), singletons.end());
 
std::vector<std::string> result(singletons);
std::unordered_set<std::string> marked_for_removal;
for ( const std::string& test_word : result ) {
for ( const std::string& word : singletons ) {
if ( word != test_word && word.find(test_word) != std::string::npos ) {
marked_for_removal.emplace(test_word);
}
}
}
 
result.erase(std::remove_if(result.begin(), result.end(),
[&](std::string& word) {
return marked_for_removal.count(word) != 0;
}
), result.end());
 
return result;
}
 
// Return a set containing all of the shortest common superstrings of the given list of strings.
std::unordered_set<std::string> shortest_common_superstrings(const std::vector<std::string>& list) {
std::vector<std::string> deduplicated = deduplicate(list);
 
std::unordered_set<std::string> shortest;
shortest.emplace(std::reduce(list.begin(), list.end(), std::string("")));
 
uint64_t shortest_length;
for ( const std::string& word : list ) {
shortest_length += word.length();
}
 
for ( std::vector<std::string> permutation : permutations(deduplicated) ) {
std::string candidate;
for ( const std::string& word : permutation ) {
candidate = concatenate(candidate, word);
}
 
if ( candidate.length() < shortest_length ) {
shortest.clear();
shortest.emplace(candidate);
shortest_length = candidate.length();
} else if ( candidate.length() == shortest_length ) {
shortest.emplace(candidate);
}
}
return shortest;
}
 
int main() {
const std::vector<std::vector<std::string>> test_sequences = {
{ "TA", "AAG", "TA", "GAA", "TA" },
{ "CATTAGGG", "ATTAG", "GGG", "TA" },
{ "AAGAUGGA", "GGAGCGCAUC", "AUCGCAAUAAGGA" },
{ "ATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTAT",
"GGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGT",
"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA",
"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC",
"AACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT",
"GCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTC",
"CGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCT",
"TGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC",
"CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGC",
"GATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATT",
"TTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATC",
"CTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA",
"TCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGA" } };
 
for ( const std::vector<std::string>& test : test_sequences ) {
for ( const std::string& superstring : shortest_common_superstrings(test) ) {
print_report(superstring);
}
}
}
</syntaxhighlight>
<pre>
Nucleotide counts for: TAGAAG
Bases: A 3, C: 0, G: 2, T: 1, total: 6
 
Nucleotide counts for: TAAGAA
Bases: A 4, C: 0, G: 1, T: 1, total: 6
 
Nucleotide counts for: GAAGTA
Bases: A 3, C: 0, G: 2, T: 1, total: 6
 
Nucleotide counts for: CATTAGGG
Bases: A 2, C: 1, G: 3, T: 2, total: 8
 
Nucleotide counts for: AAGAUGGAGCGCAUCGCAAUAAGGA
Bases: A 10, C: 4, G: 8, T: 0, total: 25
 
Nucleotide counts for:
CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA
Bases: A 74, C: 57, G: 75, T: 94, total: 300
</pre>
 
=={{header|Go}}==
{{trans|Julia}}
Line 440 ⟶ 603:
}
// Return a set ofcontaining all of the shortest common superstrings of the given list of strings.
private static Set<String> shortestCommonSuperstrings(List<String> aList) {
List<String> deduplicated = deduplicate(aList);
Line 460 ⟶ 623:
}
 
// Remove duplicate wordsstrings and wordsstrings which are substrings of other wordsstrings in the given list.
private static List<String> deduplicate(List<String> aList) {
List<String> unique = aList.stream().distinct().collect(Collectors.toList());
Line 503 ⟶ 666:
} else {
indexes[i] = 0;
i ++= 1;
}
}
Line 1,752 ⟶ 1,915:
{{libheader|Wren-str}}
{{libheader|Wren-math}}
<syntaxhighlight lang="ecmascriptwren">import "./fmt" for Fmt
import "./seq" for Lst
import "./str" for Str
import "./math" for Int
 
/* Gets all permutations of a list of strings. */
9,479

edits