Bioinformatics/Subsequence: Difference between revisions

m
(Added solution for Action!)
 
(5 intermediate revisions by 3 users not shown)
Line 5:
Write a routine to find all the positions of a randomly generated subsequence   (four letters).
<br><br>
 
=={{header|11l}}==
{{trans|Python}}
 
<langsyntaxhighlight lang="11l">UInt32 seed = 34
F nonrandom_choice(lst)
:seed = (1664525 * :seed + 1013904223) [&] FFFF'FFFF
Line 28 ⟶ 27:
R r
 
F dna_findall(String needle, haystack) -> NVoid
V pp = positions(haystack, needle)
I pp.empty
Line 46 ⟶ 45:
print("\nSearch Sample: "sample_seq)
 
dna_findall(sample_seq, dna_seq)</langsyntaxhighlight>
 
{{out}}
Line 68 ⟶ 67:
 
=={{header|Action!}}==
<langsyntaxhighlight Actionlang="action!">DEFINE SEQLEN="200"
DEFINE SUBLEN="4"
 
Line 138 ⟶ 137:
PrintE("Not found")
FI
RETURN</langsyntaxhighlight>
{{out}}
[https://gitlab.com/amarok8bit/action-rosetta-code/-/raw/master/images/Bioinformatics_subsequence.png Screenshot from Atari 8-bit computer]
Line 159 ⟶ 158:
83-86
</pre>
 
=={{header|Ada}}==
<langsyntaxhighlight Adalang="ada">with Ada.Text_Io;
with Ada.Strings.Fixed;
with Ada.Numerics.Discrete_Random;
Line 225 ⟶ 223:
New_Line;
end loop;
end Sub_Sequence;</langsyntaxhighlight>
{{out}}
<pre>Search sequence:
Line 243 ⟶ 241:
Found at position: 371..374
Found at position: 380..383</pre>
 
=={{header|Arturo}}==
 
<langsyntaxhighlight lang="rebol">bases: [`A` `G` `C` `T`]
randSeq: join map 1..200 => [sample bases]
randSub: join map 1..4 => [sample bases]
Line 264 ⟶ 261:
print ["Found subsequence at position:" idx]
idx: idx + 1
]</langsyntaxhighlight>
 
{{out}}
Line 285 ⟶ 282:
Found subsequence at position: 71
Found subsequence at position: 169</pre>
 
=={{header|Factor}}==
{{works with|Factor|0.99 2021-02-05}}
<langsyntaxhighlight lang="factor">USING: accessors formatting grouping io kernel math
math.functions.integer-logs math.parser random regexp sequences ;
 
Line 314 ⟶ 310:
 
80 10 .biosub nl
600 39 .biosub nl</langsyntaxhighlight>
{{out}}
<pre> 0: ATTCAAGGAC
0: ATTCAAGGAC
10: CACTATTAAC
20: CTGCATTGTG
Line 350 ⟶ 345:
145..149
289..293
312..316</pre>
 
</pre>
=={{header|FreeBASIC}}==
{{trans|Wren}}
<syntaxhighlight lang="vb">Const base_ = "ACGT"
 
Sub findDnaSubsequence(dnaSize As Integer, chunkSize As Integer)
Dim As String dnaSeq(1 To dnaSize)
Dim As Integer i, chunk
For i = 1 To dnaSize
dnaSeq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
Next
Dim As String dnaStr
For i = 1 To dnaSize
dnaStr += dnaSeq(i)
Next
Dim As String dnaSubseq(1 To 4)
For i = 1 To 4
dnaSubseq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
Next
Dim As String dnaSubstr
For i = 1 To 4
dnaSubstr += dnaSubseq(i)
Next
Print "DNA sequence:"
For chunk = 1 To Len(dnaStr) Step chunkSize
Print Using "###_._.###: &"; chunk; chunk+chunkSize-1; Mid(dnaStr, chunk, chunkSize)
Next
Print !"\nSubsequence to locate: "; dnaSubstr
Dim As Integer idx = Instr(dnaStr, dnaSubstr)
Print Iif(idx <> 0, "Matches found at the following indices:", "No matches found.")
Do While idx > 0
If idx <> 0 Then Print Using "###_._.###"; idx; idx + 3
idx = Instr(idx+4, dnaStr, dnaSubstr)
Loop
End Sub
 
findDnaSubsequence(200, 20)
Print
findDnaSubsequence(600, 40)
 
Sleep</syntaxhighlight>
{{out}}
<pre>DNA sequence:
1.. 20: TTATAGTCTTGGAGGCATGT
21.. 40: TAACTTATGCGGAGCAGACA
41.. 60: CGGAGTATGCATTCCTCTTA
61.. 80: CCAAACGGTGCTGCCCGCGC
81..100: ACTCGCTGTATTCCGTATCG
101..120: TCACATTATCTAAACCACGA
121..140: TTTCCAGCGTGCGTGGGAAG
141..160: GCCATGTTTAGTCGGGGGCC
161..180: AAGGTCTTTGGCTTATGCTG
181..200: TTTTTTTTTCTTCGGTTACA
 
Subsequence to locate: ATTT
Matches found at the following indices:
120..123
 
DNA sequence:
1.. 40: GTGCGGGCCGTTAGCAGCTACGAGTGCTAGATGGAACTAG
41.. 80: TCCCCGCTCCCAAATGCAAAGCGTCCCAGACCAGTCTTGA
81..120: AGCCCGTTAAATTACACCTGAACCGTTGCAAATGATCGAT
121..160: AGACGGGGTATAATAGCGGAAAACACAGGGGAACTGCATG
161..200: CAAGCTCGAGCCGCTGAAGGATGGCTCCCCCCCGAGTGTA
201..240: AGTGGATCTCGCCCAAATAGCGGGGGAACAAAGAAAGGTA
241..280: AGTCTTACTTCGCACGTCCCCTCTCATACACGCCAGGACT
281..320: AATGGATCATTCATAGGTGACGGGTGACTTGCGGTGTTTC
321..360: TAGTTGGAGTCACCCGTCAGCTTAGATCTAAGTATGAACC
361..400: GTAAGAGTTTGTAACTGCACCTTCCGTCTCTTCCTCTGTA
401..440: GGAACGCTTTTGCTTGTTATCAGATAGTGTCTCCTTATCA
441..480: TAGGACAGGTTCCTTGTGAAGGTCCACAGAGTTTGCCCGG
481..520: GGTTCGAATATACGACGCTTGTGGTTCCGGCACTATAACT
521..560: TCCGCAGTGTTGTCGACGCCCCTAGCTCCCGGGGTCTTTT
561..600: CGCTTCCCTATAGCGCGAAATGAGTGCAAGGGTACCGGCC
 
Subsequence to locate: GCAC
Matches found at the following indices:
252..255
377..380
510..513</pre>
 
=={{header|Go}}==
{{trans|Wren}}
<langsyntaxhighlight lang="go">package main
 
import (
Line 400 ⟶ 475:
fmt.Println()
findDnaSubsequence(600, 40)
}</langsyntaxhighlight>
 
{{out}}
Line 444 ⟶ 519:
388..391
</pre>
 
=={{header|jq}}==
{{works with|jq}}
Line 457 ⟶ 531:
`jot -r N MIN MAX` but a fourth argument can also be
used to specify a seed. An alternative would be to use `gshuf` along the lines of:
<syntaxhighlight lang="sh">
<lang sh>
# For 200 pseudo-random integers in the range 0 to 3 inclusive:
gshuf -i 0-3 -r -n 200 --random-source=/dev/random
</syntaxhighlight>
</lang>
 
Note that the indices shown below are offsets (i.e., the index origin is taken to be 0).
<syntaxhighlight lang="sh">
<lang sh>
#!/bin/bash
 
Line 479 ⟶ 553:
"Zero-based indices of \($four):",
($strand | indices($four) | join(" "))
'</langsyntaxhighlight>
{{out}}
<pre>
Line 494 ⟶ 568:
55 141 169
</pre>
 
=={{header|Julia}}==
<langsyntaxhighlight lang="julia">DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))
 
DNAsearch(needle, haystack, lap=true) = findall(needle, haystack, overlap=lap)
Line 505 ⟶ 578:
println("Search sequence:\n$rand_string\nfor substring $subseq. Found at positions: ")
foreach(p -> print(rpad(p[2], 8), p[1] % 10 == 0 ? "\n" : ""), enumerate(DNAsearch(subseq, rand_string)))
</langsyntaxhighlight>{{out}}
<pre>
Search sequence:
Line 512 ⟶ 585:
21:24 74:77 99:102
</pre>
 
=={{header|Nim}}==
<langsyntaxhighlight Nimlang="nim">import random, sequtils, strutils
 
proc dnaSequence(n: Positive): string =
Line 553 ⟶ 625:
else:
let tail = if pos.len == 1: ": " else: "s: "
echo "Subsequence found at position", tail, pos.join(", ")</langsyntaxhighlight>
 
{{out}}
Line 571 ⟶ 643:
 
Subsequence found at positions: 61, 122, 170</pre>
 
=={{header|Perl}}==
<langsyntaxhighlight lang="perl">use strict;
use warnings;
use feature 'say';
Line 585 ⟶ 656:
say "Target: $target";
say 'Matches at these positions:';
say (($string =~ s/.{1,40}\K/\n/gr) =~ s/($target)/ >$1< /gr);</langsyntaxhighlight>
{{out}}
<pre>Target: CCTG
Line 596 ⟶ 667:
TGCGAG >CCTG< TAGAGCCGGGCCTCAAATTAAACGAAAAAT
ATAAGTTTGCTTGGCACGCTGTACTACTTATCC >CCTG< ACT</pre>
 
=={{header|Phix}}==
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".
<!--<langsyntaxhighlight Phixlang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
Line 654 ⟶ 724:
<span style="color: #004080;">sequence</span> <span style="color: #000000;">idx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match_all</span><span style="color: #0000FF;">(</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">show</span><span style="color: #0000FF;">(</span><span style="color: #000000;">dna</span><span style="color: #0000FF;">,</span><span style="color: #000000;">test</span><span style="color: #0000FF;">,</span><span style="color: #000000;">idx</span><span style="color: #0000FF;">)</span>
<!--</langsyntaxhighlight>-->
{{out}}
with cheat enabled
Line 675 ⟶ 745:
GCTA does not occur
</pre>
 
=={{header|Python}}==
 
Line 681 ⟶ 750:
{{libheader|regex}}
 
<langsyntaxhighlight lang="python">
from random import choice
import regex as re
Line 708 ⟶ 777:
 
dna_findall(sample_seq, dna_seq)
</syntaxhighlight>
</lang>
{{out}}
 
Line 729 ⟶ 798:
167:171
</pre>
 
=={{header|Racket}}==
 
<langsyntaxhighlight lang="racket">#lang racket
 
(define (rand-seq n)
Line 753 ⟶ 821:
sub (report-sequence full) (subsequence-indices full sub)))
 
(module+ main (for ((i 4)) (Bioinformatics/Subsequence)))</langsyntaxhighlight>
 
{{out}}
Line 796 ⟶ 864:
350 : GGCCTCGACCCAATTTAACCTCCCACTCCGTGGGTACAGCTTGAACCCCC
((245 . 248) (250 . 253) (329 . 332) (386 . 389))</pre>
 
=={{header|Raku}}==
Chances are actually pretty small that a random 4 codon string will show up at all in a random 200 codon sequence. Bump up the sequence size to get a reasonable chance of multiple matches.
<syntaxhighlight lang="raku" perl6line>use String::Splice:ver<0.0.3+>;
 
my $line = 80;
Line 820 ⟶ 887:
}
 
say $disp;</langsyntaxhighlight>
{{out}}
Show in custom div to better display highlighting.
Line 849 ⟶ 916:
:* &nbsp; DNA proteins to be searched in the data &nbsp; &nbsp; &nbsp; &nbsp; (the default is four unique random proteins).
:* &nbsp; the seed for the RANDOM function so runs can be repeated with the same data &nbsp; &nbsp; (no default).
<langsyntaxhighlight lang="rexx">/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
if totLen=='' | totLen=="," then totLen= 200 /*Not specified? Then use the default.*/
Line 895 ⟶ 962:
if $\=='' then say right(idx, 7)"│" strip($, 'T') /*show residual protein data*/
say "───────┴"center('' , oWidth+10, '─')
say; return</langsyntaxhighlight>
{{out|output|text=&nbsp; when using the default inputs:}}
<pre>
Line 931 ⟶ 998:
the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963
</pre>
 
=={{header|Ring}}==
<langsyntaxhighlight lang="ring">
/*-----------------------------------
# Project : DNA subsequences
Line 1,181 ⟶ 1,247:
 
//-----------------------------------------
</syntaxhighlight>
</lang>
 
'''Output:'''
 
[https://i.imgur.com/5hhbRBK.mp4 Bioinformatics/Subsequence - video]
 
=={{header|Wren}}==
{{libheader|Wren-pattern}}
{{libheader|Wren-str}}
{{libheader|Wren-fmt}}
<langsyntaxhighlight ecmascriptlang="wren">import "random" for Random
import "./pattern" for Pattern
import "./str" for Str
import "./fmt" for Fmt
 
var rand = Random.new()
Line 1,227 ⟶ 1,292:
findDnaSubsequence.call(200, 20)
System.print()
findDnaSubsequence.call(600, 40)</langsyntaxhighlight>
 
{{out}}
1,481

edits