Burrows–Wheeler transform: Difference between revisions
Content added Content deleted
Thundergnat (talk | contribs) (Rename Perl 6 -> Raku, alphabetize, minor clean-up) |
|||
Line 144: | Line 144: | ||
</pre> |
</pre> |
||
=={{header|C |
=={{header|C sharp|C#}}== |
||
{{trans|C#}} |
|||
<lang cpp>#include <algorithm> |
|||
#include <iostream> |
|||
#include <vector> |
|||
const int STX = 0x02; |
|||
const int ETX = 0x03; |
|||
void rotate(std::string &a) { |
|||
char t = a[a.length() - 1]; |
|||
for (int i = a.length() - 1; i > 0; i--) { |
|||
a[i] = a[i - 1]; |
|||
} |
|||
a[0] = t; |
|||
} |
|||
std::string bwt(const std::string &s) { |
|||
for (char c : s) { |
|||
if (c == STX || c == ETX) { |
|||
throw std::runtime_error("Input can't contain STX or ETX"); |
|||
} |
|||
} |
|||
std::string ss; |
|||
ss += STX; |
|||
ss += s; |
|||
ss += ETX; |
|||
std::vector<std::string> table; |
|||
for (size_t i = 0; i < ss.length(); i++) { |
|||
table.push_back(ss); |
|||
rotate(ss); |
|||
} |
|||
//table.sort(); |
|||
std::sort(table.begin(), table.end()); |
|||
std::string out; |
|||
for (auto &s : table) { |
|||
out += s[s.length() - 1]; |
|||
} |
|||
return out; |
|||
} |
|||
std::string ibwt(const std::string &r) { |
|||
int len = r.length(); |
|||
std::vector<std::string> table(len); |
|||
for (int i = 0; i < len; i++) { |
|||
for (int j = 0; j < len; j++) { |
|||
table[j] = r[j] + table[j]; |
|||
} |
|||
std::sort(table.begin(), table.end()); |
|||
} |
|||
for (auto &row : table) { |
|||
if (row[row.length() - 1] == ETX) { |
|||
return row.substr(1, row.length() - 2); |
|||
} |
|||
} |
|||
return {}; |
|||
} |
|||
std::string makePrintable(const std::string &s) { |
|||
auto ls = s; |
|||
for (auto &c : ls) { |
|||
if (c == STX) { |
|||
c = '^'; |
|||
} else if (c == ETX) { |
|||
c = '|'; |
|||
} |
|||
} |
|||
return ls; |
|||
} |
|||
int main() { |
|||
auto tests = { |
|||
"banana", |
|||
"appellee", |
|||
"dogwood", |
|||
"TO BE OR NOT TO BE OR WANT TO BE OR NOT?", |
|||
"SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES", |
|||
"\u0002ABC\u0003" |
|||
}; |
|||
for (auto &test : tests) { |
|||
std::cout << makePrintable(test) << "\n"; |
|||
std::cout << " --> "; |
|||
std::string t; |
|||
try { |
|||
t = bwt(test); |
|||
std::cout << makePrintable(t) << "\n"; |
|||
} catch (std::runtime_error &e) { |
|||
std::cout << "Error " << e.what() << "\n"; |
|||
} |
|||
std::string r = ibwt(t); |
|||
std::cout << " --> " << r << "\n\n"; |
|||
} |
|||
return 0; |
|||
}</lang> |
|||
{{out}} |
|||
<pre>banana |
|||
--> |annb^aa |
|||
--> banana |
|||
appellee |
|||
--> |e^elplepa |
|||
--> appellee |
|||
dogwood |
|||
--> |do^oodwg |
|||
--> dogwood |
|||
TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
--> |?OOORREEETTRTW BBB ATTT NNOOONOO^ |
|||
--> TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
--> |STEXYDST.E.IXXIIXXSSMPPS.B..EE.^.USFXDIIOIIIT |
|||
--> SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
^ABC| |
|||
--> Error Input can't contain STX or ETX |
|||
--></pre> |
|||
=={{header|C#|C sharp}}== |
|||
{{trans|D}} |
{{trans|D}} |
||
<lang csharp>using System; |
<lang csharp>using System; |
||
Line 395: | Line 269: | ||
^ABC| |
^ABC| |
||
--> ERROR: Input can't contain STX or ETX |
--> ERROR: Input can't contain STX or ETX |
||
--></pre> |
|||
=={{header|C++}}== |
|||
{{trans|C#}} |
|||
<lang cpp>#include <algorithm> |
|||
#include <iostream> |
|||
#include <vector> |
|||
const int STX = 0x02; |
|||
const int ETX = 0x03; |
|||
void rotate(std::string &a) { |
|||
char t = a[a.length() - 1]; |
|||
for (int i = a.length() - 1; i > 0; i--) { |
|||
a[i] = a[i - 1]; |
|||
} |
|||
a[0] = t; |
|||
} |
|||
std::string bwt(const std::string &s) { |
|||
for (char c : s) { |
|||
if (c == STX || c == ETX) { |
|||
throw std::runtime_error("Input can't contain STX or ETX"); |
|||
} |
|||
} |
|||
std::string ss; |
|||
ss += STX; |
|||
ss += s; |
|||
ss += ETX; |
|||
std::vector<std::string> table; |
|||
for (size_t i = 0; i < ss.length(); i++) { |
|||
table.push_back(ss); |
|||
rotate(ss); |
|||
} |
|||
//table.sort(); |
|||
std::sort(table.begin(), table.end()); |
|||
std::string out; |
|||
for (auto &s : table) { |
|||
out += s[s.length() - 1]; |
|||
} |
|||
return out; |
|||
} |
|||
std::string ibwt(const std::string &r) { |
|||
int len = r.length(); |
|||
std::vector<std::string> table(len); |
|||
for (int i = 0; i < len; i++) { |
|||
for (int j = 0; j < len; j++) { |
|||
table[j] = r[j] + table[j]; |
|||
} |
|||
std::sort(table.begin(), table.end()); |
|||
} |
|||
for (auto &row : table) { |
|||
if (row[row.length() - 1] == ETX) { |
|||
return row.substr(1, row.length() - 2); |
|||
} |
|||
} |
|||
return {}; |
|||
} |
|||
std::string makePrintable(const std::string &s) { |
|||
auto ls = s; |
|||
for (auto &c : ls) { |
|||
if (c == STX) { |
|||
c = '^'; |
|||
} else if (c == ETX) { |
|||
c = '|'; |
|||
} |
|||
} |
|||
return ls; |
|||
} |
|||
int main() { |
|||
auto tests = { |
|||
"banana", |
|||
"appellee", |
|||
"dogwood", |
|||
"TO BE OR NOT TO BE OR WANT TO BE OR NOT?", |
|||
"SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES", |
|||
"\u0002ABC\u0003" |
|||
}; |
|||
for (auto &test : tests) { |
|||
std::cout << makePrintable(test) << "\n"; |
|||
std::cout << " --> "; |
|||
std::string t; |
|||
try { |
|||
t = bwt(test); |
|||
std::cout << makePrintable(t) << "\n"; |
|||
} catch (std::runtime_error &e) { |
|||
std::cout << "Error " << e.what() << "\n"; |
|||
} |
|||
std::string r = ibwt(t); |
|||
std::cout << " --> " << r << "\n\n"; |
|||
} |
|||
return 0; |
|||
}</lang> |
|||
{{out}} |
|||
<pre>banana |
|||
--> |annb^aa |
|||
--> banana |
|||
appellee |
|||
--> |e^elplepa |
|||
--> appellee |
|||
dogwood |
|||
--> |do^oodwg |
|||
--> dogwood |
|||
TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
--> |?OOORREEETTRTW BBB ATTT NNOOONOO^ |
|||
--> TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
--> |STEXYDST.E.IXXIIXXSSMPPS.B..EE.^.USFXDIIOIIIT |
|||
--> SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
^ABC| |
|||
--> Error Input can't contain STX or ETX |
|||
--></pre> |
--></pre> |
||
Line 1,000: | Line 1,000: | ||
Transformed: OOORREEETTRTW BBB ATTT NNOOONOO👍 ? |
Transformed: OOORREEETTRTW BBB ATTT NNOOONOO👍 ? |
||
Inverse transformed: TO BE OR NOT TO BE OR WANT TO BE OR NOT?</pre> |
Inverse transformed: TO BE OR NOT TO BE OR WANT TO BE OR NOT?</pre> |
||
=={{header|Perl 6}}== |
|||
{{works with|Rakudo|2018.06}} |
|||
<lang perl6># STX can be any character that doesn't appear in the text. |
|||
# Using a visible character here for ease of viewing. |
|||
constant \STX = '👍'; |
|||
# Burrows-Wheeler transform |
|||
sub transform (Str $s is copy) { |
|||
note "String can't contain STX character." and exit if $s.contains: STX; |
|||
$s = STX ~ $s; |
|||
(^$s.chars).map({ $s.comb.list.rotate: $_ }).sort[*;*-1].join |
|||
} |
|||
# Burrows-Wheeler inverse transform |
|||
sub ɯɹoɟsuɐɹʇ (Str $s) { |
|||
my @t = $s.comb.sort; |
|||
@t = ($s.comb Z~ @t).sort for 1..^$s.chars; |
|||
@t.first( *.ends-with: STX ).chop |
|||
} |
|||
# TESTING |
|||
for |<BANANA dogwood SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES>, |
|||
'TO BE OR NOT TO BE OR WANT TO BE OR NOT?', "Oops{STX}" |
|||
-> $phrase { |
|||
say 'Original: ', $phrase; |
|||
say 'Transformed: ', transform $phrase; |
|||
say 'Inverse transformed: ', ɯɹoɟsuɐɹʇ transform $phrase; |
|||
say ''; |
|||
}</lang> |
|||
{{out}} |
|||
<pre>Original: BANANA |
|||
Transformed: BNN👍AAA |
|||
Inverse transformed: BANANA |
|||
Original: dogwood |
|||
Transformed: 👍ooodwgd |
|||
Inverse transformed: dogwood |
|||
Original: SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
Transformed: TEXYDST.E.IXIXIXXSSMPPS.B..E.👍.UESFXDIIOIIITS |
|||
Inverse transformed: SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
Original: TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
Transformed: OOORREEETTRTW BBB ATTT NNOOONOO👍 ? |
|||
Inverse transformed: TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
Original: Oops👍 |
|||
String can't contain STX character.</pre> |
|||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
Line 1,233: | Line 1,182: | ||
return s.rstrip("\003").strip("\002") # Get rid of start and end markers |
return s.rstrip("\003").strip("\002") # Get rid of start and end markers |
||
</lang> |
</lang> |
||
=={{header|Raku}}== |
|||
(formerly Perl 6) |
|||
{{works with|Rakudo|2018.06}} |
|||
<lang perl6># STX can be any character that doesn't appear in the text. |
|||
# Using a visible character here for ease of viewing. |
|||
constant \STX = '👍'; |
|||
# Burrows-Wheeler transform |
|||
sub transform (Str $s is copy) { |
|||
note "String can't contain STX character." and exit if $s.contains: STX; |
|||
$s = STX ~ $s; |
|||
(^$s.chars).map({ $s.comb.list.rotate: $_ }).sort[*;*-1].join |
|||
} |
|||
# Burrows-Wheeler inverse transform |
|||
sub ɯɹoɟsuɐɹʇ (Str $s) { |
|||
my @t = $s.comb.sort; |
|||
@t = ($s.comb Z~ @t).sort for 1..^$s.chars; |
|||
@t.first( *.ends-with: STX ).chop |
|||
} |
|||
# TESTING |
|||
for |<BANANA dogwood SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES>, |
|||
'TO BE OR NOT TO BE OR WANT TO BE OR NOT?', "Oops{STX}" |
|||
-> $phrase { |
|||
say 'Original: ', $phrase; |
|||
say 'Transformed: ', transform $phrase; |
|||
say 'Inverse transformed: ', ɯɹoɟsuɐɹʇ transform $phrase; |
|||
say ''; |
|||
}</lang> |
|||
{{out}} |
|||
<pre>Original: BANANA |
|||
Transformed: BNN👍AAA |
|||
Inverse transformed: BANANA |
|||
Original: dogwood |
|||
Transformed: 👍ooodwgd |
|||
Inverse transformed: dogwood |
|||
Original: SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
Transformed: TEXYDST.E.IXIXIXXSSMPPS.B..E.👍.UESFXDIIOIIITS |
|||
Inverse transformed: SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
Original: TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
Transformed: OOORREEETTRTW BBB ATTT NNOOONOO👍 ? |
|||
Inverse transformed: TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
Original: Oops👍 |
|||
String can't contain STX character.</pre> |
|||
=={{header|REXX}}== |
=={{header|REXX}}== |