Burrows–Wheeler transform: Difference between revisions

no edit summary
No edit summary
Line 1,287:
-->
</pre>
 
=={{header|Ksh}}==
<lang ksh>
#!/bin/ksh
 
# Burrows–Wheeler transform
 
# # Variables:
#
export LC_COLLATE=POSIX
 
STX=\^ # start marker
ETX=\| # end marker
 
typeset -a str
str[0]='BANANA'
str[1]='appellee'
str[2]='dogwood'
str[3]='TO BE OR NOT TO BE OR WANT TO BE OR NOT?'
str[4]='SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES'
str[5]='|ABC^'
 
# # Functions:
#
 
# # Function _bwt(str, arr, lc) - sort all circular shifts of arr, return last col
#
function _bwt {
typeset _str ; _str="$1"
typeset _arr ; nameref _arr="$2"
typeset _lastcol ; nameref _lastcol="$3"
typeset _i _j _newstr ; integer _i _j
 
[[ ${_str} == *+("$STX"|"$ETX")* ]] && return 1
_str="$STX${_str}$ETX"
for ((_i=0; _i<${#_str}; _i++)); do
_newstr=${_str:${#_str}-1:1}
for ((_j=0; _j<${#_str}-1; _j++)); do
_newstr+=${_str:${_j}:1}
done
_arr+=( "${_newstr}" )
_str="${_newstr}"
done
 
set -sA arr # Sort arr
 
for ((_i=0; _i<${#_arr[*]}; _i++)); do
_lastcol+=${_arr[_i]:${#_arr[_i]}-1:1}
done
}
 
# # Function _ibwt(str) - inverse bwt
#
function _ibwt {
typeset _str ; _str="$1"
typeset _arr _vec _ret _i ; typeset -a _arr _vec ; integer _i
 
_intovec "${_str}" _vec
for ((_i=1; _i<${#_str}; _i++)); do
_intoarr _vec _arr
set -sA _arr
done
 
for ((_i=0; _i<${#arr[*]}; _i++)); do
[[ "${arr[_i]}" == ${STX}*${ETX} ]] && echo "${arr[_i]}" && return
done
}
 
# # Function _intovec(str, vec) - trans str into vec[]
#
function _intovec {
typeset _str ; _str="$1"
typeset _vec ; nameref _vec="$2"
typeset _i ; integer _i
 
for ((_i=0; _i<${#_str}; _i++)); do
_vec+=( "${_str:${_i}:1}" )
done
}
 
# # Function _intoarr(i, vec, arr) - insert vec into arr
#
function _intoarr {
typeset _vec ; nameref _vec="$1"
typeset _arr ; nameref _arr="$2"
typeset _j ; integer _j
 
for ((_j=0; _j<${#_vec[*]}; _j++)); do
_arr="${_vec[_j]}${_arr[_j]}"
done
}
 
######
# main #
######
 
for ((i=0; i<${#str[*]}; i++)); do
unset arr lastcol result ; typeset -a arr
 
print -- "${str[i]}"
_bwt "${str[i]}" arr lastcol
(( $? )) && print "ERROR: string cannot contain $STX or $ETX" && continue
 
print -- "${lastcol}"
result=$(_ibwt "${lastcol}")
print -- "${result}"
echo
done
</lang>
{{out}}<pre>
BANANA
BNN^AA|A
^BANANA|
 
appellee
|^lpelepae
^appellee|
 
dogwood
|^ooodwgd
^dogwood|
 
TO BE OR NOT TO BE OR WANT TO BE OR NOT?
OOORREEETTRTW BBB ATTT NNOOONOO^ |?
^TO BE OR NOT TO BE OR WANT TO BE OR NOT?|
 
SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES
TEXYDST.E.IXIXIXXSSMPPS.B..E.^.UESFXDIIOIIIT|S
^SIX.MIXED.PIXIES.SIFT.SIXTY.PIXIE.DUST.BOXES|
 
|ABC^
ERROR: string cannot contain ^ or |</pre>
 
=={{header|Lua}}==
70

edits