Bioinformatics/Subsequence: Difference between revisions

m
m (syntax highlighting fixup automation)
 
(4 intermediate revisions by 3 users not shown)
Line 5:
Write a routine to find all the positions of a randomly generated subsequence   (four letters).
<br><br>
 
=={{header|11l}}==
{{trans|Python}}
 
<syntaxhighlight lang="11l">UInt32 seed = 34
F nonrandom_choice(lst)
:seed = (1664525 * :seed + 1013904223) [&] FFFF'FFFF
Line 28 ⟶ 27:
R r
 
F dna_findall(String needle, haystack) -> NVoid
V pp = positions(haystack, needle)
I pp.empty
Line 68 ⟶ 67:
 
=={{header|Action!}}==
<syntaxhighlight lang=Action"action!">DEFINE SEQLEN="200"
DEFINE SUBLEN="4"
 
Line 159 ⟶ 158:
83-86
</pre>
 
=={{header|Ada}}==
<syntaxhighlight lang=Ada"ada">with Ada.Text_Io;
with Ada.Strings.Fixed;
with Ada.Numerics.Discrete_Random;
Line 243 ⟶ 241:
Found at position: 371..374
Found at position: 380..383</pre>
 
=={{header|Arturo}}==
 
<syntaxhighlight lang="rebol">bases: [`A` `G` `C` `T`]
randSeq: join map 1..200 => [sample bases]
randSub: join map 1..4 => [sample bases]
Line 285 ⟶ 282:
Found subsequence at position: 71
Found subsequence at position: 169</pre>
 
=={{header|Factor}}==
{{works with|Factor|0.99 2021-02-05}}
<syntaxhighlight lang="factor">USING: accessors formatting grouping io kernel math
math.functions.integer-logs math.parser random regexp sequences ;
 
Line 316 ⟶ 312:
600 39 .biosub nl</syntaxhighlight>
{{out}}
<pre> 0: ATTCAAGGAC
0: ATTCAAGGAC
10: CACTATTAAC
20: CTGCATTGTG
Line 350 ⟶ 345:
145..149
289..293
312..316</pre>
 
</pre>
=={{header|FreeBASIC}}==
{{trans|Wren}}
<syntaxhighlight lang="vb">Const base_ = "ACGT"
 
Sub findDnaSubsequence(dnaSize As Integer, chunkSize As Integer)
Dim As String dnaSeq(1 To dnaSize)
Dim As Integer i, chunk
For i = 1 To dnaSize
dnaSeq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
Next
Dim As String dnaStr
For i = 1 To dnaSize
dnaStr += dnaSeq(i)
Next
Dim As String dnaSubseq(1 To 4)
For i = 1 To 4
dnaSubseq(i) = Mid(base_, Int(Rnd * 4)+1, 1)
Next
Dim As String dnaSubstr
For i = 1 To 4
dnaSubstr += dnaSubseq(i)
Next
Print "DNA sequence:"
For chunk = 1 To Len(dnaStr) Step chunkSize
Print Using "###_._.###: &"; chunk; chunk+chunkSize-1; Mid(dnaStr, chunk, chunkSize)
Next
Print !"\nSubsequence to locate: "; dnaSubstr
Dim As Integer idx = Instr(dnaStr, dnaSubstr)
Print Iif(idx <> 0, "Matches found at the following indices:", "No matches found.")
Do While idx > 0
If idx <> 0 Then Print Using "###_._.###"; idx; idx + 3
idx = Instr(idx+4, dnaStr, dnaSubstr)
Loop
End Sub
 
findDnaSubsequence(200, 20)
Print
findDnaSubsequence(600, 40)
 
Sleep</syntaxhighlight>
{{out}}
<pre>DNA sequence:
1.. 20: TTATAGTCTTGGAGGCATGT
21.. 40: TAACTTATGCGGAGCAGACA
41.. 60: CGGAGTATGCATTCCTCTTA
61.. 80: CCAAACGGTGCTGCCCGCGC
81..100: ACTCGCTGTATTCCGTATCG
101..120: TCACATTATCTAAACCACGA
121..140: TTTCCAGCGTGCGTGGGAAG
141..160: GCCATGTTTAGTCGGGGGCC
161..180: AAGGTCTTTGGCTTATGCTG
181..200: TTTTTTTTTCTTCGGTTACA
 
Subsequence to locate: ATTT
Matches found at the following indices:
120..123
 
DNA sequence:
1.. 40: GTGCGGGCCGTTAGCAGCTACGAGTGCTAGATGGAACTAG
41.. 80: TCCCCGCTCCCAAATGCAAAGCGTCCCAGACCAGTCTTGA
81..120: AGCCCGTTAAATTACACCTGAACCGTTGCAAATGATCGAT
121..160: AGACGGGGTATAATAGCGGAAAACACAGGGGAACTGCATG
161..200: CAAGCTCGAGCCGCTGAAGGATGGCTCCCCCCCGAGTGTA
201..240: AGTGGATCTCGCCCAAATAGCGGGGGAACAAAGAAAGGTA
241..280: AGTCTTACTTCGCACGTCCCCTCTCATACACGCCAGGACT
281..320: AATGGATCATTCATAGGTGACGGGTGACTTGCGGTGTTTC
321..360: TAGTTGGAGTCACCCGTCAGCTTAGATCTAAGTATGAACC
361..400: GTAAGAGTTTGTAACTGCACCTTCCGTCTCTTCCTCTGTA
401..440: GGAACGCTTTTGCTTGTTATCAGATAGTGTCTCCTTATCA
441..480: TAGGACAGGTTCCTTGTGAAGGTCCACAGAGTTTGCCCGG
481..520: GGTTCGAATATACGACGCTTGTGGTTCCGGCACTATAACT
521..560: TCCGCAGTGTTGTCGACGCCCCTAGCTCCCGGGGTCTTTT
561..600: CGCTTCCCTATAGCGCGAAATGAGTGCAAGGGTACCGGCC
 
Subsequence to locate: GCAC
Matches found at the following indices:
252..255
377..380
510..513</pre>
 
=={{header|Go}}==
{{trans|Wren}}
<syntaxhighlight lang="go">package main
 
import (
Line 444 ⟶ 519:
388..391
</pre>
 
=={{header|jq}}==
{{works with|jq}}
Line 457 ⟶ 531:
`jot -r N MIN MAX` but a fourth argument can also be
used to specify a seed. An alternative would be to use `gshuf` along the lines of:
<syntaxhighlight lang="sh">
# For 200 pseudo-random integers in the range 0 to 3 inclusive:
gshuf -i 0-3 -r -n 200 --random-source=/dev/random
Line 463 ⟶ 537:
 
Note that the indices shown below are offsets (i.e., the index origin is taken to be 0).
<syntaxhighlight lang="sh">
#!/bin/bash
 
Line 494 ⟶ 568:
55 141 169
</pre>
 
=={{header|Julia}}==
<syntaxhighlight lang="julia">DNArand(n, bases=['A', 'T', 'C', 'G']) = String(rand(bases, n))
 
DNAsearch(needle, haystack, lap=true) = findall(needle, haystack, overlap=lap)
Line 512 ⟶ 585:
21:24 74:77 99:102
</pre>
 
=={{header|Nim}}==
<syntaxhighlight lang=Nim"nim">import random, sequtils, strutils
 
proc dnaSequence(n: Positive): string =
Line 571 ⟶ 643:
 
Subsequence found at positions: 61, 122, 170</pre>
 
=={{header|Perl}}==
<syntaxhighlight lang="perl">use strict;
use warnings;
use feature 'say';
Line 596 ⟶ 667:
TGCGAG >CCTG< TAGAGCCGGGCCTCAAATTAAACGAAAAAT
ATAAGTTTGCTTGGCACGCTGTACTACTTATCC >CCTG< ACT</pre>
 
=={{header|Phix}}==
Currently only searches for non-overlapped sequences, but it should be pretty obvious how to change that, in which case the next underline will simply partially overwrite the previous, so you'll get eg "<=<==>".
<!--<syntaxhighlight lang=Phix"phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
Line 675 ⟶ 745:
GCTA does not occur
</pre>
 
=={{header|Python}}==
 
Line 681 ⟶ 750:
{{libheader|regex}}
 
<syntaxhighlight lang="python">
from random import choice
import regex as re
Line 729 ⟶ 798:
167:171
</pre>
 
=={{header|Racket}}==
 
<syntaxhighlight lang="racket">#lang racket
 
(define (rand-seq n)
Line 796 ⟶ 864:
350 : GGCCTCGACCCAATTTAACCTCCCACTCCGTGGGTACAGCTTGAACCCCC
((245 . 248) (250 . 253) (329 . 332) (386 . 389))</pre>
 
=={{header|Raku}}==
Chances are actually pretty small that a random 4 codon string will show up at all in a random 200 codon sequence. Bump up the sequence size to get a reasonable chance of multiple matches.
<syntaxhighlight lang="raku" line>use String::Splice:ver<0.0.3+>;
 
my $line = 80;
Line 849 ⟶ 916:
:* &nbsp; DNA proteins to be searched in the data &nbsp; &nbsp; &nbsp; &nbsp; (the default is four unique random proteins).
:* &nbsp; the seed for the RANDOM function so runs can be repeated with the same data &nbsp; &nbsp; (no default).
<syntaxhighlight lang="rexx">/*REXX pgm gens random DNA (ACGT) sequence & finds positions of a random 4─protein seq. */
parse arg totLen rndLen basePr oWidth Bevery rndDNA seed .
if totLen=='' | totLen=="," then totLen= 200 /*Not specified? Then use the default.*/
Line 931 ⟶ 998:
the random DNA proteins were found in positions: 5 6 16 69 157 158 159 340 796 797 962 963
</pre>
 
=={{header|Ring}}==
<syntaxhighlight lang="ring">
/*-----------------------------------
# Project : DNA subsequences
Line 1,186 ⟶ 1,252:
 
[https://i.imgur.com/5hhbRBK.mp4 Bioinformatics/Subsequence - video]
 
=={{header|Wren}}==
{{libheader|Wren-pattern}}
{{libheader|Wren-str}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang=ecmascript"wren">import "random" for Random
import "./pattern" for Pattern
import "./str" for Str
import "./fmt" for Fmt
 
var rand = Random.new()
1,481

edits