Burrows–Wheeler transform: Difference between revisions

Content added Content deleted
m (→‎{{header|Phix}}: added syntax colouring, marked p2js compatible)
m (syntax highlighting fixup automation)
Line 18: Line 18:
{{trans|Python}}
{{trans|Python}}


<lang 11l>F bwt(String =s)
<syntaxhighlight lang=11l>F bwt(String =s)
‘Apply Burrows-Wheeler transform to input string.’
‘Apply Burrows-Wheeler transform to input string.’
assert("\002" !C s & "\003" !C s, ‘Input string cannot contain STX and ETX characters’)
assert("\002" !C s & "\003" !C s, ‘Input string cannot contain STX and ETX characters’)
Line 45: Line 45:
print(‘After transformation: ’transformed.replace("\2", ‘^’).replace("\3", ‘|’))
print(‘After transformation: ’transformed.replace("\2", ‘^’).replace("\3", ‘|’))
print(‘After inverse transformation: ’invTransformed)
print(‘After inverse transformation: ’invTransformed)
print()</lang>
print()</syntaxhighlight>


{{out}}
{{out}}
Line 72: Line 72:


=={{header|BQN}}==
=={{header|BQN}}==
<lang BQN>stx←@+2
<syntaxhighlight lang=BQN>stx←@+2
BWT ← { # Burrows-Wheeler Transform and its inverse as an invertible function
BWT ← { # Burrows-Wheeler Transform and its inverse as an invertible function
𝕊: "Input contained STX"!⊑stx¬∘∊𝕩 ⋄ (⍋↓𝕩) ⊏ stx∾𝕩;
𝕊: "Input contained STX"!⊑stx¬∘∊𝕩 ⋄ (⍋↓𝕩) ⊏ stx∾𝕩;
𝕊⁼: 1↓(⊑˜⍟(↕≠𝕩)⟜⊑ ⍋)⊸⊏ 𝕩
𝕊⁼: 1↓(⊑˜⍟(↕≠𝕩)⟜⊑ ⍋)⊸⊏ 𝕩
}
}
</syntaxhighlight>
</lang>
Example use:
Example use:
<lang> BWT "banana"
<syntaxhighlight lang=text> BWT "banana"
"annb␂aa"
"annb␂aa"
BWT⁼ BWT "banana"
BWT⁼ BWT "banana"
Line 106: Line 106:
BWT "␂abc"
BWT "␂abc"
Error: Input contained STX
Error: Input contained STX
</syntaxhighlight>
</lang>


=={{header|C}}==
=={{header|C}}==
{{trans|Python}}
{{trans|Python}}
<lang c>#include <string.h>
<syntaxhighlight lang=c>#include <string.h>
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
Line 208: Line 208:
}
}
return 0;
return 0;
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
Line 239: Line 239:
=={{header|C sharp|C#}}==
=={{header|C sharp|C#}}==
{{trans|D}}
{{trans|D}}
<lang csharp>using System;
<syntaxhighlight lang=csharp>using System;
using System.Collections.Generic;
using System.Collections.Generic;
using System.Linq;
using System.Linq;
Line 338: Line 338:
}
}
}
}
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 366: Line 366:
=={{header|C++}}==
=={{header|C++}}==
{{trans|C#}}
{{trans|C#}}
<lang cpp>#include <algorithm>
<syntaxhighlight lang=cpp>#include <algorithm>
#include <iostream>
#include <iostream>
#include <vector>
#include <vector>
Line 464: Line 464:


return 0;
return 0;
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 492: Line 492:
=={{header|D}}==
=={{header|D}}==
{{trans|Kotlin}}
{{trans|Kotlin}}
<lang d>import std.algorithm.iteration;
<syntaxhighlight lang=d>import std.algorithm.iteration;
import std.algorithm.mutation;
import std.algorithm.mutation;
import std.algorithm.searching;
import std.algorithm.searching;
Line 565: Line 565:
writeln;
writeln;
}
}
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 593: Line 593:
=={{header|Factor}}==
=={{header|Factor}}==
Factor has a Burrows-Wheeler transform implementation in its standard library. In addition to the transformed sequence, the <code>bwt</code> word also outputs an index for use with the inverse <code>ibwt</code> word.
Factor has a Burrows-Wheeler transform implementation in its standard library. In addition to the transformed sequence, the <code>bwt</code> word also outputs an index for use with the inverse <code>ibwt</code> word.
<lang factor>USING: formatting io kernel math.transforms.bwt sequences ;
<syntaxhighlight lang=factor>USING: formatting io kernel math.transforms.bwt sequences ;
{
{
"banana" "dogwood" "TO BE OR NOT TO BE OR WANT TO BE OR NOT?"
"banana" "dogwood" "TO BE OR NOT TO BE OR WANT TO BE OR NOT?"
Line 601: Line 601:
2dup " bwt-->%3d %u\n" printf
2dup " bwt-->%3d %u\n" printf
ibwt " ibwt-> %u\n" printf nl
ibwt " ibwt-> %u\n" printf nl
] each</lang>
] each</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 623: Line 623:
=={{header|Go}}==
=={{header|Go}}==
{{trans|Python}}
{{trans|Python}}
<lang go>package main
<syntaxhighlight lang=go>package main


import (
import (
Line 697: Line 697:
fmt.Println(" -->", r, "\n")
fmt.Println(" -->", r, "\n")
}
}
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
Line 728: Line 728:
=={{header|Groovy}}==
=={{header|Groovy}}==
{{trans|Java}}
{{trans|Java}}
<lang groovy>class BWT {
<syntaxhighlight lang=groovy>class BWT {
private static final String STX = "\u0002"
private static final String STX = "\u0002"
private static final String ETX = "\u0003"
private static final String ETX = "\u0003"
Line 801: Line 801:
}
}
}
}
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 828: Line 828:


=={{header|Haskell}}==
=={{header|Haskell}}==
<lang haskell>-- A straightforward, inefficient implementation of the Burrows–Wheeler
<syntaxhighlight lang=haskell>-- A straightforward, inefficient implementation of the Burrows–Wheeler
-- transform, based on the description in the Wikipedia article.
-- transform, based on the description in the Wikipedia article.
--
--
Line 872: Line 872:
prettyVal (In c) = c
prettyVal (In c) = c
prettyVal Pre = '^'
prettyVal Pre = '^'
prettyVal Post = '|'</lang>
prettyVal Post = '|'</syntaxhighlight>


{{out}}
{{out}}
Line 929: Line 929:
</pre>
</pre>


<lang>
<syntaxhighlight lang=text>
NB. articulate definition
NB. articulate definition
NB. local names (assignment =.) won't pollute the namespace
NB. local names (assignment =.) won't pollute the namespace
Line 968: Line 968:
EMPTY
EMPTY
)
)
</syntaxhighlight>
</lang>


=={{header|Java}}==
=={{header|Java}}==
{{trans|Kotlin}}
{{trans|Kotlin}}
<lang java>import java.util.ArrayList;
<syntaxhighlight lang=java>import java.util.ArrayList;
import java.util.List;
import java.util.List;


Line 1,048: Line 1,048:
}
}
}
}
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 1,078: Line 1,078:
{{works with|jq}}
{{works with|jq}}
'''Works with gojq, the Go implementation of jq'''
'''Works with gojq, the Go implementation of jq'''
<lang jq># substitute ^ for STX and | for ETX
<syntaxhighlight lang=jq># substitute ^ for STX and | for ETX
def makePrintable:
def makePrintable:
if . == null then null
if . == null then null
Line 1,103: Line 1,103:
| first( .[] | select(endswith("\u0003")))
| first( .[] | select(endswith("\u0003")))
| .[1:-1] ;
| .[1:-1] ;
</syntaxhighlight>
</lang>
'''Tests'''
'''Tests'''
<syntaxhighlight lang=jq>
<lang jq>
def tests:
def tests:
(
(
Line 1,121: Line 1,121:
(if $t then " --> \($t | ibwt)\n" else "" end) ;
(if $t then " --> \($t | ibwt)\n" else "" end) ;


tests</lang>
tests</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 1,149: Line 1,149:


=={{header|Julia}}==
=={{header|Julia}}==
<lang julia>bwsort(vec) = sort(vec, lt = (a, b) -> string(a) < string(b))
<syntaxhighlight lang=julia>bwsort(vec) = sort(vec, lt = (a, b) -> string(a) < string(b))


function burrowswheeler_encode(s)
function burrowswheeler_encode(s)
Line 1,176: Line 1,176:
"\nInverse transformation: ", burrowswheeler_decode(burrowswheeler_encode(s)), "\n")
"\nInverse transformation: ", burrowswheeler_decode(burrowswheeler_encode(s)), "\n")
end
end
</lang>{{out}}
</syntaxhighlight>{{out}}
<pre>
<pre>
Original: BANANA
Original: BANANA
Line 1,199: Line 1,199:
=={{header|Kotlin}}==
=={{header|Kotlin}}==
{{trans|Python}}
{{trans|Python}}
<lang scala>// Version 1.2.60
<syntaxhighlight lang=scala>// Version 1.2.60


const val STX = "\u0002"
const val STX = "\u0002"
Line 1,259: Line 1,259:
println(" --> $r\n")
println(" --> $r\n")
}
}
}</lang>
}</syntaxhighlight>


{{output}}
{{output}}
Line 1,289: Line 1,289:


=={{header|Ksh}}==
=={{header|Ksh}}==
<lang ksh>
<syntaxhighlight lang=ksh>
#!/bin/ksh
#!/bin/ksh


Line 1,396: Line 1,396:
echo
echo
done
done
</syntaxhighlight>
</lang>
{{out}}<pre>
{{out}}<pre>
BANANA
BANANA
Line 1,423: Line 1,423:
=={{header|Lua}}==
=={{header|Lua}}==
{{trans|Java}}
{{trans|Java}}
<lang lua>STX = string.char(tonumber(2,16))
<syntaxhighlight lang=lua>STX = string.char(tonumber(2,16))
ETX = string.char(tonumber(3,16))
ETX = string.char(tonumber(3,16))


Line 1,507: Line 1,507:
end
end


main()</lang>
main()</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 1,534: Line 1,534:


=={{header|Mathematica}} / {{header|Wolfram Language}}==
=={{header|Mathematica}} / {{header|Wolfram Language}}==
<lang Mathematica>ClearAll[BurrowWheeler, InverseBurrowWheeler]
<syntaxhighlight lang=Mathematica>ClearAll[BurrowWheeler, InverseBurrowWheeler]
BurrowWheeler[sin_String, {bdelim_, edelim_}] := Module[{s},
BurrowWheeler[sin_String, {bdelim_, edelim_}] := Module[{s},
s = Characters[bdelim <> sin <> edelim];
s = Characters[bdelim <> sin <> edelim];
Line 1,555: Line 1,555:
InverseBurrowWheeler[%, {"^", "|"}]
InverseBurrowWheeler[%, {"^", "|"}]
BurrowWheeler["dogwood", {"^", "|"}]
BurrowWheeler["dogwood", {"^", "|"}]
InverseBurrowWheeler[%, {"^", "|"}]</lang>
InverseBurrowWheeler[%, {"^", "|"}]</syntaxhighlight>
{{out}}
{{out}}
<pre>|ANNB^AA
<pre>|ANNB^AA
Line 1,567: Line 1,567:
Translation of the Wikipedia Python algorithm. The program uses characters '¹' and '²' to display STX and ETX.
Translation of the Wikipedia Python algorithm. The program uses characters '¹' and '²' to display STX and ETX.


<lang Nim>import algorithm
<syntaxhighlight lang=Nim>import algorithm
from sequtils import repeat
from sequtils import repeat
import strutils except repeat
import strutils except repeat
Line 1,633: Line 1,633:
echo "Original text: ", text
echo "Original text: ", text
echo "After transformation: ", transformed.displaybleString()
echo "After transformation: ", transformed.displaybleString()
echo "After inverse transformation: ", invTransformed</lang>
echo "After inverse transformation: ", invTransformed</syntaxhighlight>


{{out}}
{{out}}
Line 1,661: Line 1,661:


The first character in a Pascal string has index 1, but it's more convenient to describe the algorithm in terms of zero-based indices. The constant STR_BASE = 1 is used to make it clear where Pascal usage has been taken into account.
The first character in a Pascal string has index 1, but it's more convenient to describe the algorithm in terms of zero-based indices. The constant STR_BASE = 1 is used to make it clear where Pascal usage has been taken into account.
<lang pascal>
<syntaxhighlight lang=pascal>
program BurrowsWheeler;
program BurrowsWheeler;


Line 1,845: Line 1,845:
Test( 'SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES');
Test( 'SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES');
end.
end.
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>
<pre>
Line 1,886: Line 1,886:
=={{header|Perl}}==
=={{header|Perl}}==
{{trans|Raku}}
{{trans|Raku}}
<lang perl>use utf8;
<syntaxhighlight lang=perl>use utf8;
binmode STDOUT, ":utf8";
binmode STDOUT, ":utf8";


Line 1,921: Line 1,921:
}
}


print join "\n", @res;</lang>
print join "\n", @res;</syntaxhighlight>
{{out}}
{{out}}
<pre>Original: BANANA
<pre>Original: BANANA
Line 1,942: Line 1,942:
An efficient implementation, based mainly on http://spencer-carroll.com/an-easy-to-understand-explanation-of-the-burrows-wheeler-transform/ <br>
An efficient implementation, based mainly on http://spencer-carroll.com/an-easy-to-understand-explanation-of-the-burrows-wheeler-transform/ <br>
Takes around about ten seconds to transform and invert a 100K string. Note: requires 0.8.0+
Takes around about ten seconds to transform and invert a 100K string. Note: requires 0.8.0+
<!--<lang Phix>(phixonline)-->
<!--<syntaxhighlight lang=Phix>(phixonline)-->
<span style="color: #000080;font-style:italic;">-- demo\rosetta\burrows_wheeler.exw
<span style="color: #000080;font-style:italic;">-- demo\rosetta\burrows_wheeler.exw
--/*
--/*
Line 2,088: Line 2,088:
<span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"TO BE OR NOT TO BE OR WANT TO BE OR NOT?"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"TO BE OR NOT TO BE OR WANT TO BE OR NOT?"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES"</span><span style="color: #0000FF;">)</span>
<span style="color: #000000;">test</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES"</span><span style="color: #0000FF;">)</span>
<!--</lang>-->
<!--</syntaxhighlight>-->
{{out}}
{{out}}
<pre>
<pre>
Line 2,108: Line 2,108:
Ref: [https://www.codespeedy.com/burrows-wheeler-transform-in-python/ Burrows Wheeler Transform in Python]
Ref: [https://www.codespeedy.com/burrows-wheeler-transform-in-python/ Burrows Wheeler Transform in Python]


<lang Python>
<syntaxhighlight lang=Python>
def bwt(s):
def bwt(s):
"""Apply Burrows-Wheeler transform to input string."""
"""Apply Burrows-Wheeler transform to input string."""
Line 2,125: Line 2,125:
s = [row for row in table if row.endswith("\003")][0] # Find the correct row (ending in ETX)
s = [row for row in table if row.endswith("\003")][0] # Find the correct row (ending in ETX)
return s.rstrip("\003").strip("\002") # Get rid of start and end markers
return s.rstrip("\003").strip("\002") # Get rid of start and end markers
</syntaxhighlight>
</lang>


=={{header|Raku}}==
=={{header|Raku}}==
Line 2,131: Line 2,131:
{{works with|Rakudo|2018.06}}
{{works with|Rakudo|2018.06}}


<lang perl6># STX can be any character that doesn't appear in the text.
<syntaxhighlight lang=raku line># STX can be any character that doesn't appear in the text.
# Using a visible character here for ease of viewing.
# Using a visible character here for ease of viewing.
Line 2,158: Line 2,158:
say 'Inverse transformed: ', ɯɹoɟsuɐɹʇ transform $phrase;
say 'Inverse transformed: ', ɯɹoɟsuɐɹʇ transform $phrase;
say '';
say '';
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>Original: BANANA
<pre>Original: BANANA
Line 2,182: Line 2,182:
Programming note: &nbsp; a little bit of code was added to support more (legal) characters in the input string for the '''BWT'''
Programming note: &nbsp; a little bit of code was added to support more (legal) characters in the input string for the '''BWT'''
<br>function. &nbsp; The error recovery and error messages are rudimentary when an illegal character in the input is detected.
<br>function. &nbsp; The error recovery and error messages are rudimentary when an illegal character in the input is detected.
<lang rexx>/*REXX program performs a Burrows─Wheeler transform (BWT) on a character string(s). */
<syntaxhighlight lang=rexx>/*REXX program performs a Burrows─Wheeler transform (BWT) on a character string(s). */
$.= /*the default text for (all) the inputs*/
$.= /*the default text for (all) the inputs*/
parse arg $.1 /*obtain optional arguments from the CL*/
parse arg $.1 /*obtain optional arguments from the CL*/
Line 2,239: Line 2,239:
_= @.j; @.j= @.k; @.k= _; ok= 0 /*swap two elements; flag as not done.*/
_= @.j; @.j= @.k; @.k= _; ok= 0 /*swap two elements; flag as not done.*/
end /*j*/
end /*j*/
end /*m*/; return</lang>
end /*m*/; return</syntaxhighlight>
{{out|output|text=&nbsp; when using the default inputs:}}
{{out|output|text=&nbsp; when using the default inputs:}}
<pre>
<pre>
Line 2,272: Line 2,272:
=={{header|Ruby}}==
=={{header|Ruby}}==
{{trans|C#}}
{{trans|C#}}
<lang ruby>STX = "\u0002"
<syntaxhighlight lang=ruby>STX = "\u0002"
ETX = "\u0003"
ETX = "\u0003"


Line 2,341: Line 2,341:
end
end


main()</lang>
main()</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 2,368: Line 2,368:


=={{header|Rust}}==
=={{header|Rust}}==
<lang rust>
<syntaxhighlight lang=rust>
use core::cmp::Ordering;
use core::cmp::Ordering;


Line 2,466: Line 2,466:
}
}
}
}
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>
<pre>
Line 2,482: Line 2,482:
=={{header|Scala}}==
=={{header|Scala}}==
{{trans|Kotlin}}
{{trans|Kotlin}}
<lang scala>import scala.collection.mutable.ArrayBuffer
<syntaxhighlight lang=scala>import scala.collection.mutable.ArrayBuffer


object BWT {
object BWT {
Line 2,547: Line 2,547:
})
})
}
}
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 2,574: Line 2,574:
=={{header|Sidef}}==
=={{header|Sidef}}==
{{trans|Python}}
{{trans|Python}}
<lang ruby>class BurrowsWheelerTransform (String L = "\002") {
<syntaxhighlight lang=ruby>class BurrowsWheelerTransform (String L = "\002") {


method encode(String s) {
method encode(String s) {
Line 2,602: Line 2,602:
say "BWT(#{dec.dump}) = #{enc.dump}"
say "BWT(#{dec.dump}) = #{enc.dump}"
assert_eq(str, dec)
assert_eq(str, dec)
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 2,616: Line 2,616:
{{trans|Kotlin}}
{{trans|Kotlin}}


<lang swift>import Foundation
<syntaxhighlight lang=swift>import Foundation


private let stx = "\u{2}"
private let stx = "\u{2}"
Line 2,670: Line 2,670:


print("\(readableBwt(test)) -> \(readableBwt(b)) -> \(readableBwt(c))")
print("\(readableBwt(test)) -> \(readableBwt(b)) -> \(readableBwt(c))")
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
Line 2,683: Line 2,683:
=={{header|Visual Basic .NET}}==
=={{header|Visual Basic .NET}}==
{{trans|C#}}
{{trans|C#}}
<lang vbnet>Module Module1
<syntaxhighlight lang=vbnet>Module Module1


ReadOnly STX As Char = Chr(&H2)
ReadOnly STX As Char = Chr(&H2)
Line 2,782: Line 2,782:
End Sub
End Sub


End Module</lang>
End Module</syntaxhighlight>
{{out}}
{{out}}
<pre>banana
<pre>banana
Line 2,811: Line 2,811:
{{trans|Go}}
{{trans|Go}}
{{libheader|Wren-sort}}
{{libheader|Wren-sort}}
<lang ecmascript>import "/sort" for Sort
<syntaxhighlight lang=ecmascript>import "/sort" for Sort


var stx = "\x02"
var stx = "\x02"
Line 2,868: Line 2,868:
var r = ibwt.call(t)
var r = ibwt.call(t)
System.print(" --> %(r)\n")
System.print(" --> %(r)\n")
}</lang>
}</syntaxhighlight>


{{out}}
{{out}}
Line 2,898: Line 2,898:


=={{header|zkl}}==
=={{header|zkl}}==
<lang zkl>class BurrowsWheelerTransform{
<syntaxhighlight lang=zkl>class BurrowsWheelerTransform{
fcn init(chr="$"){ var special=chr; }
fcn init(chr="$"){ var special=chr; }
fcn encode(str){
fcn encode(str){
Line 2,914: Line 2,914:
table.filter1("%s*".fmt(special).glob)[1,*]; // str[0]==$, often first element
table.filter1("%s*".fmt(special).glob)[1,*]; // str[0]==$, often first element
}
}
}</lang>
}</syntaxhighlight>
<lang zkl>BWT:=BurrowsWheelerTransform();
<syntaxhighlight lang=zkl>BWT:=BurrowsWheelerTransform();
//BWT.encode("$"); // --> assert(bbb.zkl:25): String cannot contain char "$"
//BWT.encode("$"); // --> assert(bbb.zkl:25): String cannot contain char "$"


Line 2,924: Line 2,924:
enc:=BWT.encode(test);
enc:=BWT.encode(test);
println("%s\n -->%s\n -->%s".fmt(test,enc,BWT.decode(enc)));
println("%s\n -->%s\n -->%s".fmt(test,enc,BWT.decode(enc)));
}</lang>
}</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>