Burrows–Wheeler transform: Difference between revisions
Content added Content deleted
m (→{{header|Wren}}: Minor tidy) |
(→{{header|Sidef}}: added a more efficient/practical implementation) |
||
Line 2,773: | Line 2,773: | ||
BWT("SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES") = "STEXYDST.E.IXXIIXXSSMPPS.B..EE.$.USFXDIIOIIIT" |
BWT("SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES") = "STEXYDST.E.IXXIIXXSSMPPS.B..EE.$.USFXDIIOIIIT" |
||
</pre> |
</pre> |
||
More efficient implementation, running in '''O(n*log(n))''' time, using '''O(n)''' space: |
|||
<syntaxhighlight lang="ruby">define LOOKAHEAD_LEN = 128 |
|||
func bwt_sort (String s) { # O(n * LOOKAHEAD_LEN) space (fast) |
|||
^s.len -> map {|i| |
|||
var t = s.slice(i, LOOKAHEAD_LEN) |
|||
if (t.len < LOOKAHEAD_LEN) { |
|||
t += s.slice(0, min(i, LOOKAHEAD_LEN - t.len)) |
|||
} |
|||
[t, i] |
|||
}.sort {|a,b| |
|||
(a[0] <=> b[0]) || (s.rotate(a[1]) <=> s.rotate(b[1])) |
|||
}.map { .[1] } |
|||
} |
|||
func bwt_encode(String s) { |
|||
var bwt = bwt_sort(s) |
|||
var ret = bwt.map {|i| s.slice(i-1, 1) }.join |
|||
var prefix = s.slice(0, LOOKAHEAD_LEN) |
|||
var len = prefix.len |
|||
var idx = 0; |
|||
for i in (bwt) { |
|||
var lookahead = s.slice(i, len) |
|||
if (lookahead.len < len) { |
|||
lookahead += s.slice(0, len - lookahead.len) |
|||
} |
|||
if (lookahead == prefix) { |
|||
var row = s.rotate(i) |
|||
if (row == s) { |
|||
break |
|||
} |
|||
} |
|||
++idx |
|||
} |
|||
return (ret, idx) |
|||
} |
|||
func bwt_decode(String bwt, Number idx) { # fast inversion |
|||
var tail = bwt.chars |
|||
var head = tail.sort |
|||
var indices = Hash() |
|||
tail.each_kv {|i,v| |
|||
indices{v} := [] << i |
|||
} |
|||
var table = [] |
|||
head.each_kv {|i,v| |
|||
table[i] = indices{v}.shift |
|||
} |
|||
var dec = '' |
|||
var i = idx |
|||
head.len.times { |
|||
dec += head[i] |
|||
i = table[i] |
|||
} |
|||
return dec |
|||
} |
|||
var tests = [ |
|||
"banana", "appellee", "dogwood", "TOBEORNOTTOBEORTOBEORNOT" |
|||
"SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES", |
|||
] |
|||
tests.each { |str| |
|||
var (enc, idx) = bwt_encode(str) |
|||
var dec = bwt_decode(enc, idx) |
|||
say "BWT(#{dec.dump}) = (#{enc.dump}, #{idx})" |
|||
assert_eq(str, dec) |
|||
}</syntaxhighlight> |
|||
{{out}} |
|||
<pre> |
|||
BWT("banana") = ("nnbaaa", 3) |
|||
BWT("appellee") = ("eelplepa", 0) |
|||
BWT("dogwood") = ("odoodwg", 1) |
|||
BWT("TOBEORNOTTOBEORTOBEORNOT") = ("OOOBBBRRTTTEEENNOOORTTOO", 20) |
|||
BWT("SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES") = ("TEXYDST.E.IXIXIXXSSMPPS.B..E.S.EUSFXDIIOIIIT", 29) |
|||
</pre> |
|||
=={{header|Swift}}== |
=={{header|Swift}}== |
||