Burrows–Wheeler transform: Difference between revisions
Content added Content deleted
Alextretyak (talk | contribs) m (→{{header|11l}}) |
No edit summary |
||
Line 1,287: | Line 1,287: | ||
--> |
--> |
||
</pre> |
</pre> |
||
=={{header|Ksh}}== |
|||
<lang ksh> |
|||
#!/bin/ksh |
|||
# Burrows–Wheeler transform |
|||
# # Variables: |
|||
# |
|||
export LC_COLLATE=POSIX |
|||
STX=\^ # start marker |
|||
ETX=\| # end marker |
|||
typeset -a str |
|||
str[0]='BANANA' |
|||
str[1]='appellee' |
|||
str[2]='dogwood' |
|||
str[3]='TO BE OR NOT TO BE OR WANT TO BE OR NOT?' |
|||
str[4]='SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES' |
|||
str[5]='|ABC^' |
|||
# # Functions: |
|||
# |
|||
# # Function _bwt(str, arr, lc) - sort all circular shifts of arr, return last col |
|||
# |
|||
function _bwt { |
|||
typeset _str ; _str="$1" |
|||
typeset _arr ; nameref _arr="$2" |
|||
typeset _lastcol ; nameref _lastcol="$3" |
|||
typeset _i _j _newstr ; integer _i _j |
|||
[[ ${_str} == *+("$STX"|"$ETX")* ]] && return 1 |
|||
_str="$STX${_str}$ETX" |
|||
for ((_i=0; _i<${#_str}; _i++)); do |
|||
_newstr=${_str:${#_str}-1:1} |
|||
for ((_j=0; _j<${#_str}-1; _j++)); do |
|||
_newstr+=${_str:${_j}:1} |
|||
done |
|||
_arr+=( "${_newstr}" ) |
|||
_str="${_newstr}" |
|||
done |
|||
set -sA arr # Sort arr |
|||
for ((_i=0; _i<${#_arr[*]}; _i++)); do |
|||
_lastcol+=${_arr[_i]:${#_arr[_i]}-1:1} |
|||
done |
|||
} |
|||
# # Function _ibwt(str) - inverse bwt |
|||
# |
|||
function _ibwt { |
|||
typeset _str ; _str="$1" |
|||
typeset _arr _vec _ret _i ; typeset -a _arr _vec ; integer _i |
|||
_intovec "${_str}" _vec |
|||
for ((_i=1; _i<${#_str}; _i++)); do |
|||
_intoarr _vec _arr |
|||
set -sA _arr |
|||
done |
|||
for ((_i=0; _i<${#arr[*]}; _i++)); do |
|||
[[ "${arr[_i]}" == ${STX}*${ETX} ]] && echo "${arr[_i]}" && return |
|||
done |
|||
} |
|||
# # Function _intovec(str, vec) - trans str into vec[] |
|||
# |
|||
function _intovec { |
|||
typeset _str ; _str="$1" |
|||
typeset _vec ; nameref _vec="$2" |
|||
typeset _i ; integer _i |
|||
for ((_i=0; _i<${#_str}; _i++)); do |
|||
_vec+=( "${_str:${_i}:1}" ) |
|||
done |
|||
} |
|||
# # Function _intoarr(i, vec, arr) - insert vec into arr |
|||
# |
|||
function _intoarr { |
|||
typeset _vec ; nameref _vec="$1" |
|||
typeset _arr ; nameref _arr="$2" |
|||
typeset _j ; integer _j |
|||
for ((_j=0; _j<${#_vec[*]}; _j++)); do |
|||
_arr="${_vec[_j]}${_arr[_j]}" |
|||
done |
|||
} |
|||
###### |
|||
# main # |
|||
###### |
|||
for ((i=0; i<${#str[*]}; i++)); do |
|||
unset arr lastcol result ; typeset -a arr |
|||
print -- "${str[i]}" |
|||
_bwt "${str[i]}" arr lastcol |
|||
(( $? )) && print "ERROR: string cannot contain $STX or $ETX" && continue |
|||
print -- "${lastcol}" |
|||
result=$(_ibwt "${lastcol}") |
|||
print -- "${result}" |
|||
echo |
|||
done |
|||
</lang> |
|||
{{out}}<pre> |
|||
BANANA |
|||
BNN^AA|A |
|||
^BANANA| |
|||
appellee |
|||
|^lpelepae |
|||
^appellee| |
|||
dogwood |
|||
|^ooodwgd |
|||
^dogwood| |
|||
TO BE OR NOT TO BE OR WANT TO BE OR NOT? |
|||
OOORREEETTRTW BBB ATTT NNOOONOO^ |? |
|||
^TO BE OR NOT TO BE OR WANT TO BE OR NOT?| |
|||
SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES |
|||
TEXYDST.E.IXIXIXXSSMPPS.B..E.^.UESFXDIIOIIIT|S |
|||
^SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES| |
|||
|ABC^ |
|||
ERROR: string cannot contain ^ or |</pre> |
|||
=={{header|Lua}}== |
=={{header|Lua}}== |