LZW compression: Difference between revisions
(OCaml version) |
(Python version) |
||
Line 136: | Line 136: | ||
(Buffer.contents buf) |
(Buffer.contents buf) |
||
;;</ocaml> |
;;</ocaml> |
||
=={{header|Python}}== |
|||
<python>def compress(uncompressed): |
|||
"""Compress a string to a list of output symbols.""" |
|||
# Build the dictionary. |
|||
dict_size = 256 |
|||
dictionary = {} |
|||
for i in range(dict_size): |
|||
dictionary[chr(i)] = chr(i) |
|||
w = '' |
|||
result = [] |
|||
for c in uncompressed: |
|||
wc = w + c |
|||
if wc in dictionary: |
|||
w = wc |
|||
else: |
|||
result.append(dictionary[w]) |
|||
# Add wc to the dictionary. |
|||
dictionary[wc] = dict_size |
|||
dict_size += 1 |
|||
w = c |
|||
# Output the code for w. |
|||
if w: |
|||
result += [char for char in w] |
|||
return result |
|||
def decompress(compressed): |
|||
"""Decompress a list of output ks to a string.""" |
|||
# Build the dictionary. |
|||
dict_size = 256 |
|||
dictionary = {} |
|||
for i in range(dict_size): |
|||
dictionary[chr(i)] = chr(i) |
|||
w = result = compressed.pop(0) |
|||
for k in compressed: |
|||
if k in dictionary: |
|||
entry = dictionary[k] |
|||
elif k == len(dictionary): |
|||
entry = w + w[0] |
|||
else: |
|||
raise ValueError, 'Bad compressed k: %s' % k |
|||
result += entry |
|||
# Add w+entry[0] to the dictionary. |
|||
dictionary[dict_size] = w+entry[0] |
|||
dict_size += 1 |
|||
w = entry |
|||
return result</python> |
|||
How to use: |
|||
<python>compressed = compress('TOBEORNOTTOBEORTOBEORNOT') |
|||
print compressed |
|||
decompressed = decompress(compressed) |
|||
print decompressed</python> |
Revision as of 22:16, 8 August 2008
The Lempel-Ziv-Welch (LZW) algorithm provides lossless data compression. You can read a complete description of it in the Wikipedia article on the subject. It was patented, but it fell in the public domain in 2004.
OCaml
<ocaml>#directory "+site-lib/extlib/"
- load "extLib.cma"
open ExtString
(** compress a string to a list of output symbols *) let compress ~uncompressed =
(* build the dictionary *) let dict_size = 256 in let dictionary = Hashtbl.create 397 in for i=0 to 255 do let str = String.make 1 (char_of_int i) in Hashtbl.add dictionary str i done; let f = (fun (w, dict_size, result) c -> let c = String.make 1 c in let wc = w ^ c in if Hashtbl.mem dictionary wc then (wc, dict_size, result) else begin (* add wc to the dictionary *) Hashtbl.add dictionary wc dict_size; let this = Hashtbl.find dictionary w in (c, dict_size + 1, this::result) end ) in let w, _, result = String.fold_left f ("", dict_size, []) uncompressed in
(* output the code for w *) let result = if w = "" then result else (Hashtbl.find dictionary w) :: result in
(List.rev result)
exception ValueError of string
(** decompress a list of output symbols to a string *) let decompress ~compressed =
(* build the dictionary *) let dict_size = 256 in let dictionary = Hashtbl.create 397 in for i=0 to pred dict_size do let str = String.make 1 (char_of_int i) in Hashtbl.add dictionary i str done;
let w, compressed = match compressed with | hd::tl -> (String.make 1 (char_of_int hd)), tl | [] -> failwith "empty input" in let result = w::[] in
let result, _, _ = List.fold_left (fun (result, w, dict_size) k -> let entry = if Hashtbl.mem dictionary k then Hashtbl.find dictionary k else if k = Hashtbl.length dictionary then w ^ (String.make 1 w.[0]) else raise(ValueError(Printf.sprintf "Bad compressed k: %d" k)) in let result = entry :: result in (* add (w ^ entry.[0]) to the dictionary *) Hashtbl.add dictionary dict_size (w ^ (String.make 1 entry.[0])); (result, entry, dict_size + 1) ) (result, w, dict_size) compressed in (List.rev result)
- </ocaml>
here is the interface: <ocaml>val compress : uncompressed:string -> int list val decompress : compressed:int list -> string list</ocaml>
How to use:
The compressed datas are a list of symbols (of type int) that will require more than 8 bits to be saved.
So to know how many bits are required, you need to know how many bits are required for the greatest symbol in the list.
<ocaml>let greatest = List.fold_left (fun m x -> max m x) 0 ;;
(** number of bits needed to encode the integer m *) let n_bits m =
let m = float m in let rec aux n = let max = (2. ** n) -. 1. in if max >= m then int_of_float n else aux (n +. 1.0) in aux 1.0
let write_compressed ~filename ~compressed =
let nbits = n_bits(greatest compressed) in let oc = open_out filename in output_byte oc nbits; let ob = IO.output_bits(IO.output_channel oc) in List.iter (fun code -> IO.write_bits ob nbits code ) compressed; IO.flush_bits ob; close_out oc;
let read_compressed ~filename =
let ic = open_in filename in let nbits = input_byte ic in let ib = IO.input_bits(IO.input_channel ic) in let rec loop acc = try let code = IO.read_bits ib nbits in loop (code::acc) with _ -> List.rev acc in let compressed = loop [] in let result = decompress ~compressed in let buf = Buffer.create 2048 in List.iter (Buffer.add_string buf) result; (Buffer.contents buf)
- </ocaml>
Python
<python>def compress(uncompressed):
"""Compress a string to a list of output symbols.""" # Build the dictionary. dict_size = 256 dictionary = {} for i in range(dict_size): dictionary[chr(i)] = chr(i) w = result = [] for c in uncompressed: wc = w + c if wc in dictionary: w = wc else: result.append(dictionary[w]) # Add wc to the dictionary. dictionary[wc] = dict_size dict_size += 1 w = c # Output the code for w. if w: result += [char for char in w] return result
def decompress(compressed):
"""Decompress a list of output ks to a string.""" # Build the dictionary. dict_size = 256 dictionary = {} for i in range(dict_size): dictionary[chr(i)] = chr(i) w = result = compressed.pop(0) for k in compressed: if k in dictionary: entry = dictionary[k] elif k == len(dictionary): entry = w + w[0] else: raise ValueError, 'Bad compressed k: %s' % k result += entry # Add w+entry[0] to the dictionary. dictionary[dict_size] = w+entry[0] dict_size += 1 w = entry return result</python>
How to use:
<python>compressed = compress('TOBEORNOTTOBEORTOBEORNOT') print compressed decompressed = decompress(compressed) print decompressed</python>