LZW compression: Difference between revisions
(Add link to original source) |
(→{{header|Python}}: (xrange --> range, and print x --> print(x) in Python3.0)) |
||
Line 1,069: | Line 1,069: | ||
dict_size = 256 |
dict_size = 256 |
||
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size)) |
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size)) |
||
# in Python 3: dictionary = {chr(i): chr(i) for i in |
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)} |
||
w = "" |
w = "" |
||
Line 1,096: | Line 1,096: | ||
dict_size = 256 |
dict_size = 256 |
||
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size)) |
dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size)) |
||
# in Python 3: dictionary = {chr(i): chr(i) for i in |
# in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)} |
||
w = result = compressed.pop(0) |
w = result = compressed.pop(0) |
||
Line 1,118: | Line 1,118: | ||
# How to use: |
# How to use: |
||
compressed = compress('TOBEORNOTTOBEORTOBEORNOT') |
compressed = compress('TOBEORNOTTOBEORTOBEORNOT') |
||
print compressed |
print (compressed) |
||
decompressed = decompress(compressed) |
decompressed = decompress(compressed) |
||
print decompressed</lang> |
print (decompressed)</lang> |
||
Output: |
Output: |
Revision as of 05:41, 18 May 2009
You are encouraged to solve this task according to the task description, using any language you may know.
The Lempel-Ziv-Welch (LZW) algorithm provides lossless data compression. You can read a complete description of it in the Wikipedia article on the subject. It was patented, but it fell in the public domain in 2004.
C
See Bit oriented IO for bitio.h and Basic string manipulation functions for estrings.h.
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <stdbool.h>
- include <string.h>
- include "bitio.h"
- include "estrings.h"
- define MIN(A,B) ( ((A)<(B)) ? (A) : (B) )
- define MAX(A,B) ( ((A)>(B)) ? (A) : (B) )
- define DEBUG
- if defined(DEBUG)
- define Debug(...) fprintf(stderr, __VA_ARGS__)
- else
- define Debug(...)
- endif</lang>
We need a dictionary and so we implement one according to our need.
<lang c>struct DictionaryNode {
String string; unsigned int code; struct DictionaryNode *next;
}; typedef struct DictionaryNode DN;
- define NODES_PER_ALLOC 512
struct DictionaryStruct {
DN *first; DN **nodes; size_t num_of_nodes; size_t num_of_alloc; bool sorted;
}; typedef struct DictionaryStruct *Dictionary;
Dictionary newDictionary() {
Dictionary t; t = malloc(sizeof(struct DictionaryStruct)); if ( t==NULL ) return NULL; t->num_of_nodes = 0; t->num_of_alloc = 1; t->nodes = malloc(t->num_of_alloc * NODES_PER_ALLOC * sizeof(void *)); t->first = NULL; t->sorted = false; return t;
}
bool _expandDictionary(Dictionary d) {
d->nodes = realloc(d->nodes, (d->num_of_alloc + 1) * NODES_PER_ALLOC * sizeof(void *)); if ( d->nodes == NULL ) return false; d->num_of_alloc++; return true;
}
bool dictionaryPut_ns(Dictionary d, String s, unsigned int c) {
DN *newdw; if ( (d->num_of_nodes + 1) > (d->num_of_alloc * NODES_PER_ALLOC) ) { if ( !_expandDictionary(d) ) return false; } newdw = malloc(sizeof(DN)); if ( newdw == NULL ) return false; newdw->string = cloneString(s); newdw->code = c; newdw->next = d->first; d->first = newdw; d->nodes[d->num_of_nodes] = newdw; d->num_of_nodes++; d->sorted = false; return true;
}
int _dict_strcompare(const void *av, const void *bv) {
DN *a = *((DN **)av); DN *b = *((DN **)bv); return compareStrings(a->string, b->string);
}
void dictionarySort(Dictionary d) {
if ( ! (d->sorted) ) { qsort(d->nodes, d->num_of_nodes, sizeof(void *), _dict_strcompare); d->sorted = true; }
}
bool dictionaryPut(Dictionary d, String s, unsigned int c) {
bool added = dictionaryPut_ns(d, s, c); if ( added ) { dictionarySort(d); } return added;
}
DN *dictionaryLookUp(Dictionary d, String s)
{
if ( d->num_of_nodes == 0 ) return NULL; int low, high, med, p; /* perform a "binary" search ... */ for(low=-1, high = d->num_of_nodes-1; (high-low)>1 ; ) { med = (high+low) >> 1; p = compareStrings(s, d->nodes[med]->string); if ( p <= 0 ) { high = med; } else { low = med; } } if ( compareStrings(s, d->nodes[high]->string) == 0 ) return d->nodes[high]; return NULL;
}
void destroyDictionary(Dictionary d) {
int i; if ( d==NULL ) return; if ( d->nodes != NULL ) { for(i=0; i < d->num_of_nodes; i++) { destroyString(d->nodes[i]->string); free(d->nodes[i]); } free(d->nodes); } free(d);
}</lang>
LZW code:
<lang c>#define BITS_PER_WORD 9 unsigned int codemark = 256; unsigned char num_of_bits = BITS_PER_WORD;
struct LZWHandleStruct {
unsigned int codemark; unsigned char num_of_bits; Dictionary dict; size_t bytes_written; FILE *iostream;
}; typedef struct LZWHandleStruct LZWHandle;
LZWHandle *lzw_begin(FILE *o) {
LZWHandle *lh; lh = malloc(sizeof(LZWHandle)); if ( lh != NULL ) { lh->bytes_written = 0; lh->codemark = 256; lh->num_of_bits = BITS_PER_WORD; lh->dict = newDictionary(); lh->iostream = o; if ( lh->dict == NULL ) { free(lh); return NULL; } } return lh;
}
bool lzw_compress(LZWHandle *lh, char *stream, size_t len) {
unsigned int i; char tc; Dictionary d = lh->dict; DN *oc; String w = NULL; String wc = NULL; String t = NULL; size_t wbits = 0; t = newString(); for(i=0; i < 256; i++) { tc = i; setString(t, &tc, 1); dictionaryPut_ns(d, t, i); } destroyString(t); t = NULL; /* a trick: we have put values in order, so it is sorted*/ d->sorted = true; w = newString(); wc = newString(); for(i=0; i < len; i++) { copyString(wc, w); appendChar(wc, stream[i]); if ( (dictionaryLookUp(d, wc)) != NULL ) { copyString(w, wc); } else { oc = dictionaryLookUp(d, w); if ( oc != NULL ) { /* guardring for bugs :D */ wbits += bits_write(oc->code, lh->num_of_bits, lh->iostream); Debug("%u ; %d ; %u\n", oc->code, lh->num_of_bits, lh->codemark); } dictionaryPut(d, wc, lh->codemark); lh->codemark++; if ( lh->codemark >= (1 << lh->num_of_bits) ) { lh->num_of_bits++; } setString(w, &stream[i], 1); } } if ( ! stringIsEmpty(w) ) { oc = dictionaryLookUp(d, w); /* no guard ring for bugs here :) */ wbits += bits_write(oc->code, lh->num_of_bits, lh->iostream); Debug("%u ; %d ; %u\n", oc->code, lh->num_of_bits, lh->codemark); } lh->bytes_written = wbits >> 3; destroyString(w); destroyString(wc); return true;
}
int lzw_end(LZWHandle *lh) {
int oby=0; if ( lh != NULL ) { oby=bits_flush(lh->iostream); destroyDictionary(lh->dict); free(lh); } return oby;
}</lang>
Testing:
<lang c>char *text = "TOBEORNOTTOBEORTOBEORNOT"; int main() {
LZWHandle *lzw; size_t writtenbytecount = 0; lzw = lzw_begin(stdout); if ( lzw != NULL ) { lzw_compress(lzw, text, strlen(text)); writtenbytecount += lzw->bytes_written; Debug("bits on exit: %u\n", lzw->num_of_bits); lzw_end(lzw); fprintf(stderr, "n. of input bytes: %u\n" "n. of output bytes: %u\n", strlen(text), writtenbytecount ); } else return 1; return 0;
}</lang>
This test code really emits the compressed bytes stream. The decimal values of the codes (as other solutions do) are sent as debug informations.
C++
<lang cpp>#include <string>
- include <map>
// Compress a string to a list of output symbols. // The result will be written to the output iterator // starting at "result"; the final iterator is returned. template <typename Iterator> Iterator compress(const std::string &uncompressed, Iterator result) {
// Build the dictionary. int dictSize = 256; std::map<std::string,int> dictionary; for (int i = 0; i < 256; i++) dictionary[std::string(1, i)] = i; std::string w; for (std::string::const_iterator it = uncompressed.begin(); it != uncompressed.end(); ++it) { char c = *it; std::string wc = w + c; if (dictionary.count(wc)) w = wc; else { *result++ = dictionary[w]; // Add wc to the dictionary. dictionary[wc] = dictSize++; w = std::string(1, c); } } // Output the code for w. if (!w.empty()) *result++ = dictionary[w]; return result;
}
// Decompress a list of output ks to a string. // "begin" and "end" must form a valid range of ints template <typename Iterator> std::string decompress(Iterator begin, Iterator end) {
// Build the dictionary. int dictSize = 256; std::map<int,std::string> dictionary; for (int i = 0; i < 256; i++) dictionary[i] = std::string(1, i); std::string w(1, *begin++); std::string result = w; std::string entry; for ( ; begin != end; begin++) { int k = *begin; if (dictionary.count(k)) entry = dictionary[k]; else if (k == dictSize) entry = w + w[0]; else throw "Bad compressed k"; result += entry; // Add w+entry[0] to the dictionary. dictionary[dictSize++] = w + entry[0]; w = entry; } return result;
}
- include <iostream>
- include <iterator>
- include <vector>
int main() {
std::vector<int> compressed; compress("TOBEORNOTTOBEORTOBEORNOT", std::back_inserter(compressed)); copy(compressed.begin(), compressed.end(), std::ostream_iterator<int>(std::cout, ", ")); std::cout << std::endl; std::string decompressed = decompress(compressed.begin(), compressed.end()); std::cout << decompressed << std::endl; return 0;
}</lang>
Clojure
<lang Clojure> (defn make-dict []
(let [vals (range 0 256)] (zipmap (map (comp #'list #'char) vals) vals)))
(defn compress [#^String text]
(loop [t (seq text) r '() w '() dict (make-dict) s 256] (let [c (first t)] (if c (let [wc (cons c w)] (if (get dict wc) (recur (rest t) r wc dict s) (recur (rest t) (cons (get dict w) r) (list c) (assoc dict wc s) (inc s)))) (reverse (if w (cons (get dict w) r) r))))))
(compress "TOBEORNOTTOBEORTOBEORNOT") </lang> The output: <lang Clojure> (84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263) </lang>
D
D 1, with Phobos, from the Python version (the final writefln works only on 7-bit ASCII strings): <lang d> import std.stdio: writefln;
/// Compress a string to a list of output symbols. int[] compress(string uncompressed) {
// build the dictionary int dict_size = 256; int[string] dictionary; for (int i; i < dict_size; i++) dictionary["" ~ cast(char)i] = i;
string w; int[] result; foreach (c; uncompressed) { string wc = w ~ c; if (wc in dictionary) w = wc; else { result ~= dictionary[w]; // add wc to the dictionary dictionary[wc] = dict_size; dict_size++; w = "" ~ c; } }
// output the code for w if (w.length) result ~= dictionary[w]; return result;
}
/// Decompress a list of output ks to a string.
string decompress(int[] compressed) {
// build the dictionary int dict_size = 256; string[int] dictionary; for (int i; i < dict_size; i++) dictionary[i] = "" ~ cast(char)i;
string w = "" ~ cast(char)compressed[0]; string result = w; foreach (k; compressed[1 .. $]) { string entry; if (k in dictionary) entry = dictionary[k]; else if (k == dict_size) entry = w ~ w[0]; else throw new Exception("Bad compressed k"); result ~= entry;
// add w+entry[0] to the dictionary dictionary[dict_size] = w ~ entry[0]; dict_size++;
w = entry; } return result;
}
void main() {
auto compressed = compress("TOBEORNOTTOBEORTOBEORNOT"); writefln(compressed);
auto decompressed = decompress(compressed); writefln(decompressed);
} </lang> The output: <lang d> [84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263] TOBEORNOTTOBEORTOBEORNOT </lang>
Forth
256 value next-symbol \ current string fragment create w 256 allot \ counted string : w=c ( c -- ) w 1+ c! 1 w c! ; : w+c ( c -- ) w count + c! w c@ 1+ w c! ;
\ Compression \ dictionary of strings to symbols 0 value dict : init-dict table to dict 256 to next-symbol dict set-current ; : free-dict forth-wordlist set-current ; : in-dict? ( key len -- ? ) \ can assume len > 1 dict search-wordlist dup if nip then ; : lookup-dict ( key len -- symbol ) dup 1 = if drop c@ exit then dict search-wordlist if >body @ else abort" bad-dict!" then ; : put-dict ( data key len -- ) nextname create , ; \ output buffer of symbols \ in real life, these symbols would be packed into octets variable out-size create out 256 cells allot : output ( symbol -- ) dup out out-size @ cells + ! 1 out-size +! dup 256 < if emit space else . then ; : compress ( addr len -- ) init-dict 0 out-size ! over c@ w=c 1 /string bounds do i c@ w+c w count in-dict? 0= if w count 1- lookup-dict output next-symbol dup w count put-dict 1+ to next-symbol i c@ w=c then loop w count lookup-dict output free-dict ;
\ Decompression \ array of symbols to strings (in real code this would need to be growable) \ next-symbol is reused for the size of this table create symtab 256 cells allot 0 value start : init-symtab 256 to next-symbol here to start ; : free-symtab start here - allot ; : get-symbol ( symbol -- addr len ) dup 256 < if pad c! pad 1 exit then 256 - cells symtab + @ count ; : add-symbol ( addr len -- ) here symtab next-symbol 256 - cells + ! s, next-symbol 1+ to next-symbol ; create entry 256 allot : decompress ( addr len -- ) init-symtab over @ dup emit w=c cells bounds cell+ do i @ next-symbol < if i @ get-symbol entry place else i @ next-symbol = if w 1+ c@ w count + c! w count 1+ entry place else abort" bad symbol!" then then entry count type \ output entry 1+ c@ w+c w count add-symbol entry count w place 1 cells +loop free-symtab ;
\ Testing s" TOBEORNOTTOBEORTOBEORNOT" compress cr \ T O B E O R N O T 256 258 260 265 259 261 263 out out-size @ decompress cr \ TOBEORNOTTOBEORTOBEORNOT
Java
<lang java>import java.util.*;
public class LZW {
/** Compress a string to a list of output symbols. */ public static List<Integer> compress(String uncompressed) { // Build the dictionary. int dictSize = 256; Map<String,Integer> dictionary = new HashMap<String,Integer>(); for (int i = 0; i < 256; i++) dictionary.put("" + (char)i, i); String w = ""; List<Integer> result = new ArrayList<Integer>(); for (char c : uncompressed.toCharArray()) { String wc = w + c; if (dictionary.containsKey(wc)) w = wc; else { result.add(dictionary.get(w)); // Add wc to the dictionary. dictionary.put(wc, dictSize++); w = "" + c; } } // Output the code for w. if (!w.equals("")) result.add(dictionary.get(w)); return result; } /** Decompress a list of output ks to a string. */ public static String decompress(List<Integer> compressed) { // Build the dictionary. int dictSize = 256; Map<Integer,String> dictionary = new HashMap<Integer,String>(); for (int i = 0; i < 256; i++) dictionary.put(i, "" + (char)i); String w = "" + (char)(int)compressed.remove(0); String result = w; for (int k : compressed) { String entry; if (dictionary.containsKey(k)) entry = dictionary.get(k); else if (k == dictSize) entry = w + w.charAt(0); else throw new IllegalArgumentException("Bad compressed k: " + k); result += entry; // Add w+entry[0] to the dictionary. dictionary.put(dictSize++, w + entry.charAt(0)); w = entry; } return result; }
public static void main(String[] args) { List<Integer> compressed = compress("TOBEORNOTTOBEORTOBEORNOT"); System.out.println(compressed); String decompressed = decompress(compressed); System.out.println(decompressed); }
}</lang>
JavaScript
<lang javascript> //LZW Compression/Decompression for Strings var LZW = {
"compress" : function(uncompressed) { // Build the dictionary. var dictSize = 256; var dictionary = {}; for (var i = 0; i < 256; i++) { dictionary[String.fromCharCode(i)] = i; } var w = ""; var result = []; for (var i = 0; i < uncompressed.length; i++) { var c = uncompressed.charAt(i); var wc = w + c; if (dictionary[wc]) w = wc; else { result.push(dictionary[w]); // Add wc to the dictionary. dictionary[wc] = dictSize++; w = "" + c; } } // Output the code for w. if (w != "") result.push(dictionary[w]); return result; },
"decompress" : function(compressed) { // Build the dictionary. var dictSize = 256; var dictionary = []; for (var i = 0; i < 256; i++) { dictionary[i] = String.fromCharCode(i); } var w = String.fromCharCode(compressed[0]); var result = w; for (var i = 1; i < compressed.length; i++) { var entry = ""; var k = compressed[i]; if (dictionary[k]) entry = dictionary[k]; else if (k == dictSize) entry = w + w.charAt(0); else return null; result += entry; // Add w+entry[0] to the dictionary. dictionary[dictSize++] = w + entry.charAt(0); w = entry; } return result; }
}
// For Test Purposes
var comp = LZW.compress("TOBEORNOTTOBEORTOBEORNOT");
var decomp = LZW.decompress(comp);
document.write(comp+'
'+decomp);
</lang>
Output: <lang javascript> 84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263 TOBEORNOTTOBEORTOBEORNOT </lang>
Objective-C
The class for the LZW compression algorithm:
<lang objc>#import <Foundation/Foundation.h>
- import <stdio.h>
@interface LZWCompressor : NSObject {
@private NSMutableArray *iostream; NSMutableDictionary *dict; NSUInteger codemark;
}
-(LZWCompressor *) init; -(LZWCompressor *) initWithArray: (NSMutableArray *) stream; -(BOOL) compressData: (NSData *) string; -(void) setArray: (NSMutableArray *) stream; -(NSArray *) getArray; @end
@implementation LZWCompressor : NSObject
-(LZWCompressor *) init {
self = [super init]; if ( self ) { iostream = nil; codemark = 256; dict = [NSMutableDictionary dictionaryWithCapacity: 512]; } return self;
}
-(LZWCompressor *) initWithArray: (NSMutableArray *) stream {
self = [self init]; if ( self ) { [self setArray: stream]; } return self;
}
-(void) setArray: (NSMutableArray *) stream {
iostream = stream;
}
-(BOOL) compressData: (NSData *) string; {
NSUInteger i; unsigned char j; // prepare dict for(i=0; i < 256; i++) { j = i; NSData *s = [NSData dataWithBytes: &j length: 1]; [dict setObject: [NSNumber numberWithUnsignedInt: i] forKey: s]; } NSMutableData *w = [NSMutableData data]; NSMutableData *wc = [NSMutableData data]; for(i=0; i < [string length]; i++) { [wc setData: w]; [wc appendData: [string subdataWithRange: NSMakeRange(i, 1)]]; if ( [dict objectForKey: wc] != nil ) { [w setData: wc]; } else { [iostream addObject: [dict objectForKey: w]]; [dict setObject: [NSNumber numberWithUnsignedInt: codemark] forKey: wc]; codemark++; [w setData: [string subdataWithRange: NSMakeRange(i, 1)]]; } } if ( [w length] != 0 ) { [iostream addObject: [dict objectForKey: w]]; } return YES;
}
-(NSArray *) getArray {
return iostream;
}
@end</lang>
Usage example:
<lang objc>char *text = "TOBEORNOTTOBEORTOBEORNOT";
int main() {
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; LZWCompressor *lzw = [[LZWCompressor alloc] initWithArray: [[NSMutableArray alloc] init] ]; if ( lzw ) { [lzw compressData: [NSData dataWithBytes: text length: strlen(text)]]; NSEnumerator *en = [[lzw getArray] objectEnumerator]; id obj; while( (obj = [en nextObject]) ) { printf("%u\n", [obj unsignedIntValue]); } } [pool release]; return EXIT_SUCCESS;
}</lang>
Output (reformatted by hand):
84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263
OCaml
<lang ocaml>#directory "+site-lib/extlib/"
- load "extLib.cma"
open ExtString
(** compress a string to a list of output symbols *) let compress ~uncompressed =
(* build the dictionary *) let dict_size = 256 in let dictionary = Hashtbl.create 397 in for i=0 to 255 do let str = String.make 1 (char_of_int i) in Hashtbl.add dictionary str i done; let f = (fun (w, dict_size, result) c -> let c = String.make 1 c in let wc = w ^ c in if Hashtbl.mem dictionary wc then (wc, dict_size, result) else begin (* add wc to the dictionary *) Hashtbl.add dictionary wc dict_size; let this = Hashtbl.find dictionary w in (c, dict_size + 1, this::result) end ) in let w, _, result = String.fold_left f ("", dict_size, []) uncompressed in
(* output the code for w *) let result = if w = "" then result else (Hashtbl.find dictionary w) :: result in
(List.rev result)
exception ValueError of string
(** decompress a list of output symbols to a string *) let decompress ~compressed =
(* build the dictionary *) let dict_size = 256 in let dictionary = Hashtbl.create 397 in for i=0 to pred dict_size do let str = String.make 1 (char_of_int i) in Hashtbl.add dictionary i str done;
let w, compressed = match compressed with | hd::tl -> (String.make 1 (char_of_int hd)), tl | [] -> failwith "empty input" in let result = w::[] in
let result, _, _ = List.fold_left (fun (result, w, dict_size) k -> let entry = if Hashtbl.mem dictionary k then Hashtbl.find dictionary k else if k = Hashtbl.length dictionary then w ^ (String.make 1 w.[0]) else raise(ValueError(Printf.sprintf "Bad compressed k: %d" k)) in let result = entry :: result in (* add (w ^ entry.[0]) to the dictionary *) Hashtbl.add dictionary dict_size (w ^ (String.make 1 entry.[0])); (result, entry, dict_size + 1) ) (result, w, dict_size) compressed in (List.rev result)
- </lang>
here is the interface: <lang ocaml>val compress : uncompressed:string -> int list val decompress : compressed:int list -> string list</lang>
How to use:
The compressed datas are a list of symbols (of type int) that will require more than 8 bits to be saved.
So to know how many bits are required, you need to know how many bits are required for the greatest symbol in the list.
<lang ocaml>let greatest = List.fold_left (fun m x -> max m x) 0 ;;
(** number of bits needed to encode the integer m *) let n_bits m =
let m = float m in let rec aux n = let max = (2. ** n) -. 1. in if max >= m then int_of_float n else aux (n +. 1.0) in aux 1.0
let write_compressed ~filename ~compressed =
let nbits = n_bits(greatest compressed) in let oc = open_out filename in output_byte oc nbits; let ob = IO.output_bits(IO.output_channel oc) in List.iter (fun code -> IO.write_bits ob nbits code ) compressed; IO.flush_bits ob; close_out oc;
let read_compressed ~filename =
let ic = open_in filename in let nbits = input_byte ic in let ib = IO.input_bits(IO.input_channel ic) in let rec loop acc = try let code = IO.read_bits ib nbits in loop (code::acc) with _ -> List.rev acc in let compressed = loop [] in let result = decompress ~compressed in let buf = Buffer.create 2048 in List.iter (Buffer.add_string buf) result; (Buffer.contents buf)
- </lang>
Perl
In this version the hashes contain mixed typed data: <lang perl># Compress a string to a list of output symbols. sub compress {
my $uncompressed = shift;
# Build the dictionary. my $dict_size = 256; my %dictionary = map {chr $_ => chr $_} 0..$dict_size-1;
my $w = ""; my @result; foreach my $c (split , $uncompressed) { my $wc = $w . $c; if (exists $dictionary{$wc}) { $w = $wc; } else { push @result, $dictionary{$w}; # Add wc to the dictionary. $dictionary{$wc} = $dict_size; $dict_size++; $w = $c; } }
# Output the code for w. if ($w) { push @result, $dictionary{$w}; } return @result;
}
- Decompress a list of output ks to a string.
sub decompress {
my @compressed = @_;
# Build the dictionary. my $dict_size = 256; my %dictionary = map {chr $_ => chr $_} 0..$dict_size-1;
my $w = shift @compressed; my $result = $w; foreach my $k (@compressed) { my $entry; if (exists $dictionary{$k}) { $entry = $dictionary{$k}; } elsif ($k == $dict_size) { $entry = $w . substr(w,0,1); } else { die "Bad compressed k: $k"; } $result .= $entry;
# Add w+entry[0] to the dictionary. $dictionary{$dict_size} = $w . substr($entry,0,1); $dict_size++;
$w = $entry; } return $result;
}
- How to use:
my @compressed = compress('TOBEORNOTTOBEORTOBEORNOT'); print "@compressed\n"; my $decompressed = decompress(@compressed); print "$decompressed\n";</lang>
Output:
T O B E O R N O T 256 258 260 265 259 261 263 TOBEORNOTTOBEORTOBEORNOT
Python
In this version the dicts contain mixed typed data: <lang python>def compress(uncompressed):
"""Compress a string to a list of output symbols."""
# Build the dictionary. dict_size = 256 dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size)) # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = "" result = [] for c in uncompressed: wc = w + c if wc in dictionary: w = wc else: result.append(dictionary[w]) # Add wc to the dictionary. dictionary[wc] = dict_size dict_size += 1 w = c
# Output the code for w. if w: result.append(dictionary[w]) return result
def decompress(compressed):
"""Decompress a list of output ks to a string."""
# Build the dictionary. dict_size = 256 dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size)) # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = result = compressed.pop(0) for k in compressed: if k in dictionary: entry = dictionary[k] elif k == dict_size: entry = w + w[0] else: raise ValueError('Bad compressed k: %s' % k) result += entry
# Add w+entry[0] to the dictionary. dictionary[dict_size] = w + entry[0] dict_size += 1
w = entry return result
- How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT') print (compressed) decompressed = decompress(compressed) print (decompressed)</lang>
Output:
['T', 'O', 'B', 'E', 'O', 'R', 'N', 'O', 'T', 256, 258, 260, 265, 259, 261, 263] TOBEORNOTTOBEORTOBEORNOT
Ruby
In this version the hashes contain mixed typed data: <lang ruby># Compress a string to a list of output symbols. def compress(uncompressed)
# Build the dictionary. dict_size = 256 dictionary = Hash[* Array.new(dict_size) {|i| [i.chr, i.chr]}.flatten ]
w = "" result = [] for c in uncompressed.split() wc = w + c if dictionary.has_key?(wc) w = wc else result << dictionary[w] # Add wc to the dictionary. dictionary[wc] = dict_size dict_size += 1 w = c end end
# Output the code for w. if w result << dictionary[w] end result
end
- Decompress a list of output ks to a string.
def decompress(compressed)
# Build the dictionary. dict_size = 256 dictionary = Hash[* Array.new(dict_size) {|i| [i.chr, i.chr]}.flatten ]
w = result = compressed.shift for k in compressed if dictionary.has_key?(k) entry = dictionary[k] elsif k == dict_size entry = w + w[0,1] else raise 'Bad compressed k: %s' % k end result += entry
# Add w+entry[0] to the dictionary. dictionary[dict_size] = w + entry[0,1] dict_size += 1
w = entry end result
end
- How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT') p compressed decompressed = decompress(compressed) puts decompressed</lang>
Output:
["T", "O", "B", "E", "O", "R", "N", "O", "T", 256, 258, 260, 265, 259, 261, 263] TOBEORNOTTOBEORTOBEORNOT
Seed7
<lang seed7>$ include "seed7_05.s7i";
const func string: lzwCompress (in string: uncompressed) is func
result var string: result is ""; local var char: ch is ' '; var hash [string] char: mydict is (hash [string] char).value; var string: buffer is ""; var string: xstr is ""; begin for ch range chr(0) to chr(255) do mydict @:= [str(ch)] ch; end for; for ch range uncompressed do xstr := buffer & str(ch); if xstr in mydict then buffer &:= str(ch) else result &:= str(mydict[buffer]); mydict @:= [xstr] chr(length(mydict)); buffer := str(ch); end if; end for; if buffer <> "" then result &:= str(mydict[buffer]); end if; end func;
const func string: lzwDecompress (in string: compressed) is func
result var string: result is ""; local var char: ch is ' '; var hash [char] string: mydict is (hash [char] string).value; var string: buffer is ""; var string: current is ""; var string: chain is ""; begin for ch range chr(0) to chr(255) do mydict @:= [ch] str(ch); end for; for ch range compressed do if buffer = "" then buffer := mydict[ch]; result &:= buffer; elsif ch <= chr(255) then current := mydict[ch]; result &:= current; chain := buffer & current; mydict @:= [chr(length(mydict))] chain; buffer := current; else if ch in mydict then chain := mydict[ch]; else chain := buffer & str(buffer[1]); end if; result &:= chain; mydict @:= [chr(length(mydict))] buffer & str(chain[1]); buffer := chain; end if; end for; end func;
const proc: main is func
local var string: compressed is ""; var string: uncompressed is ""; begin compressed := lzwCompress("TOBEORNOTTOBEORTOBEORNOT"); writeln(literal(compressed)); uncompressed := lzwDecompress(compressed); writeln(uncompressed); end func;</lang>
Output:
"TOBEORNOT\256\\258\\260\\265\\259\\261\\263\" TOBEORNOTTOBEORTOBEORNOT
Tcl
<lang tcl>namespace eval LZW {
variable char2int variable chars for {set i 0} {$i < 256} {incr i} { set char [binary format c $i] set char2int($char) $i lappend chars $char }
}
proc LZW::encode {data} {
variable char2int array set dict [array get char2int]
set w "" set result [list]
foreach c [split $data ""] { set wc $w$c if {[info exists dict($wc)]} { set w $wc } else { lappend result $dict($w) set dict($wc) [array size dict] set w $c } } lappend result $dict($w)
}
proc LZW::decode {cdata} {
variable chars set dict $chars
set k [lindex $cdata 0] set w [lindex $dict $k] set result $w
foreach k [lrange $cdata 1 end] { set currSizeDict [llength $dict] if {$k < $currSizeDict} { set entry [lindex $dict $k] } elseif {$k == $currSizeDict} { set entry $w[string index $w 0] } else { error "invalid code ($k) in ($cdata)" } append result $entry lappend dict $w[string index $entry 0] set w $entry } return $result
}
set s TOBEORNOTTOBEORTOBEORNOT# set e [LZW::encode $s] ;# ==> 84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263 35 set d [LZW::decode $e] ;# ==> TOBEORNOTTOBEORTOBEORNOT#
- or
if {$s eq [LZW::decode [LZW::encode $s]]} then {puts success} else {puts fail} ;# ==> success