Bioinformatics/base count: Difference between revisions

→‎{{header|Java}}: Rework java code. Add some comments.
(→‎{{header|Java}}: Rework java code. Add some comments.)
Line 838:
Σ: 500</pre>
=={{header|Java}}==
For counting the bases, we simply use a <code>HashMap</code>, and then use the <code>Map.merge</code>, inserting <code>1</code>, and using <code>Integer::sum</code> as the aggregation function. This effectively creates a <code>Map</code> that keeps a running count for us. Java ''does'' provide the <code>groupingBy</code> and <code>counting</code> collectors, which would ''generally'' make these kinds of operation easier. However, <code>String</code>’s <code>chars()</code> method returns a <code>IntStream</code>, which generally just makes everything more complicated. Or verbose. Or inefficient. Ultimately, doing it by hand is easier and more efficient than with streams. The best tool for this job though would be Guava’s <code>MultiSet</code>, which is a dedicated Key to Count container.
<lang Java>
 
import java.util.*;
Note that Java’s native strings are UCS-2/UTF-16: Each character is 2-byte long. If parsing from a '''very''' large ASCII/UTF8 text file, then <code>String</code> is a poor choice, as opposed to, say <code>byte[]</code>. For the purpose of this exercise though, using <code>byte[]</code> would just add uninteresting casts and bloat to the code, so we stick to <code>String</code>.
 
<lang Java>import java.util.HashMap;
import java.util.Map;
 
public class orderedSequence {
public static void main(String[] args) {
Sequence gene = new Sequence("CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATATTTAATTTTTCTATATAGCGATCTGTATTTAAGCAATTCATTTAGGTTATCGCCGCGATGCTCGGTTCGGACCGCCAAGCATCTGGCTCCACTGCTAGTGTCCTAAATTTGAATGGCAAACACAAATAAGATTTAGCAATTCGTGTAGACGACCGGGGACTTGCATGATGGGAGCAGCTTTGTTAAACTACGAACGTAAT");
public static void main(String[] args) {
gene.runSequence();
Sequence gene = new Sequence("CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATGCTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTGAGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGATGGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTTCGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGGTCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATATTTAATTTTTCTATATAGCGATCTGTATTTAAGCAATTCATTTAGGTTATCGCCGCGATGCTCGGTTCGGACCGCCAAGCATCTGGCTCCACTGCTAGTGTCCTAAATTTGAATGGCAAACACAAATAAGATTTAGCAATTCGTGTAGACGACCGGGGACTTGCATGATGGGAGCAGCTTTGTTAAACTACGAACGTAAT");
}
gene.runSequence();
}
}
 
//separate** Separate class for defining behaviors */
public class Sequence {
private final String seq;
public Sequence(String sq) {
this.seq = sq;
}
this.seq = sq;
}
/** print the organized structure of the sequence */
public void prettyPrint() {
//turn sequence string into an array of characters
System.out.println("Sequence:");
public char[] getSequence() {
int i = 0;
char[] list = seq.toCharArray();
for ( ; i < seq.length() - 50 ; i += 50) {
return list;
System.out.printf("%5s : %s\n", i + 50, seq.substring(i, i + 50));
}
}
System.out.printf("%5s : %s\n", seq.length(), seq.substring(i));
//print the organized structure of the sequence
}
public void prettyPrint() {
/** display a base vs. frequency chart */
char[] l = this.getSequence();
public void displayCount() {
int i = 0;
Map<Character, Integer> counter = new HashMap<>();
System.out.println("Sequence: \n");
for (int i = 0 ; i < seq.length() ; ++i) {
counter.merge(seq.charAt(i), 1, Integer::sum);
}
 
System.out.println("Base vs. Count:");
while(i < l.length - 1) {
counter.forEach(
System.out.println("" + i + " : " + Arrays.toString(Arrays.copyOfRange(l, i, i + 50)) + "\n");
key, value -> System.out.printf("%5s : %s\n", key, value));
i += 50;
System.out.printf("%5s: %s\n", "SUM", seq.length());
}
}
}
public void runSequence() {
//count the frequency of each of the bases
this.prettyPrint();
public Map countBases() {
this.displayCount();
}
Map<String, Integer> counter = new HashMap<>();
counter.put("Sum", 0);
counter.put("A", 0);
char[] bases = this.getSequence();
for(char c : bases) {
counter.put("Sum", counter.get("Sum") + 1);
if(!counter.containsKey(String.valueOf(c)))
counter.put(String.valueOf(c), 1);
else
counter.put(String.valueOf(c), counter.get(String.valueOf(c)) + 1);
}
return counter;
}
//display a base vs. frequency chart
public void displayCount() {
Map<String, Integer> c = this.countBases();
System.out.println("Base vs. Count: \n");
c.forEach((String,Integer)->System.out.println(String + " : " + Integer + "\n"));
}
public void runSequence() {
this.prettyPrint();
this.displayCount();
}
}
 
</lang>{{out}}
{{out}}
<pre>
Sequence:
50 : CGTAAAAAATTACAACGTCCTTTGGCTATCTCTTAAACTCCTGCTAAATG
 
100 : CTCGTGCTTTCCAATTATGTAAGCGTTCCGAGACGGGGTGGTCGATTCTG
0 : [C, G, T, A, A, A, A, A, A, T, T, A, C, A, A, C, G, T, C, C, T, T, T, G, G, C, T, A, T, C, T, C, T, T, A, A, A, C, T, C, C, T, G, C, T, A, A, A, T, G]
150 : AGGACAAAGGTCAAGATGGAGCGCATCGAACGCAATAAGGATCATTTGAT
 
200 : GGGACGTTTCGTCGACAAAGTCTTGTTTCGAGAGTAACGGCTACCGTCTT
50 : [C, T, C, G, T, G, C, T, T, T, C, C, A, A, T, T, A, T, G, T, A, A, G, C, G, T, T, C, C, G, A, G, A, C, G, G, G, G, T, G, G, T, C, G, A, T, T, C, T, G]
250 : CGATTCTGCTTATAACACTATGTTCTTATGAAATGGATGTTCTGAGTTGG
 
300 : TCAGTCCCAATGTGCGGGGTTTCTTTTAGTACGTCGGGAGTGGTATTATA
100 : [A, G, G, A, C, A, A, A, G, G, T, C, A, A, G, A, T, G, G, A, G, C, G, C, A, T, C, G, A, A, C, G, C, A, A, T, A, A, G, G, A, T, C, A, T, T, T, G, A, T]
350 : TTTAATTTTTCTATATAGCGATCTGTATTTAAGCAATTCATTTAGGTTAT
 
400 : CGCCGCGATGCTCGGTTCGGACCGCCAAGCATCTGGCTCCACTGCTAGTG
150 : [G, G, G, A, C, G, T, T, T, C, G, T, C, G, A, C, A, A, A, G, T, C, T, T, G, T, T, T, C, G, A, G, A, G, T, A, A, C, G, G, C, T, A, C, C, G, T, C, T, T]
450 : TCCTAAATTTGAATGGCAAACACAAATAAGATTTAGCAATTCGTGTAGAC
 
500 : GACCGGGGACTTGCATGATGGGAGCAGCTTTGTTAAACTACGAACGTAAT
200 : [C, G, A, T, T, C, T, G, C, T, T, A, T, A, A, C, A, C, T, A, T, G, T, T, C, T, T, A, T, G, A, A, A, T, G, G, A, T, G, T, T, C, T, G, A, G, T, T, G, G]
Base vs. Count:
 
A : 129
250 : [T, C, A, G, T, C, C, C, A, A, T, G, T, G, C, G, G, G, G, T, T, T, C, T, T, T, T, A, G, T, A, C, G, T, C, G, G, G, A, G, T, G, G, T, A, T, T, A, T, A]
C : 97
 
T : 155
300 : [T, T, T, A, A, T, T, T, T, T, C, T, A, T, A, T, A, G, C, G, A, T, C, T, G, T, A, T, T, T, A, A, G, C, A, A, T, T, C, A, T, T, T, A, G, G, T, T, A, T]
G : 119
 
SUM: 500
350 : [C, G, C, C, G, C, G, A, T, G, C, T, C, G, G, T, T, C, G, G, A, C, C, G, C, C, A, A, G, C, A, T, C, T, G, G, C, T, C, C, A, C, T, G, C, T, A, G, T, G]
 
400 : [T, C, C, T, A, A, A, T, T, T, G, A, A, T, G, G, C, A, A, A, C, A, C, A, A, A, T, A, A, G, A, T, T, T, A, G, C, A, A, T, T, C, G, T, G, T, A, G, A, C]
 
450 : [G, A, C, C, G, G, G, G, A, C, T, T, G, C, A, T, G, A, T, G, G, G, A, G, C, A, G, C, T, T, T, G, T, T, A, A, A, C, T, A, C, G, A, A, C, G, T, A, A, T]
 
Base vs. Count:
 
A : 129
 
C : 97
 
T : 155
 
G : 119
 
Sum : 500
 
</pre>
 
 
=={{header|JavaScript}}==