LZW compression: Difference between revisions
(added Go) |
No edit summary |
||
Line 1,790: | Line 1,790: | ||
TOBEORNOTTOBEORTOBEORNOT |
TOBEORNOTTOBEORTOBEORNOT |
||
</pre> |
</pre> |
||
=={{header|Scheme}}== |
|||
<lang scheme>; Get the list reference number for a member or #f if not found |
|||
(define (member-string-ref m l) |
|||
(define r #f) |
|||
(let loop ((i 0)) |
|||
(if (< i (length l)) |
|||
(if (not (string=? (list-ref l i) m)) |
|||
(loop (+ i 1)) |
|||
(set! r i)))) |
|||
r) |
|||
; Compress a string with LZW |
|||
(define (lzw-compress uncompressed) |
|||
(define dictionary '()) |
|||
(define n 0) |
|||
(define result '()) |
|||
(set! uncompressed (string->list uncompressed)) |
|||
; Setup Dictionary |
|||
(let dict-setup ((c 0)) |
|||
(if (> 256 c) |
|||
(begin |
|||
(set! dictionary (append dictionary |
|||
(list (string (integer->char c))))) |
|||
(set! n (+ n 1)) |
|||
(dict-setup (+ c 1))))) |
|||
; Compress the string |
|||
(let compress ((w "") (ci 0)) |
|||
(define c (string (list-ref uncompressed ci))) |
|||
(define wc "") |
|||
(set! wc (string-append w c)) |
|||
(if (member-string-ref wc dictionary) |
|||
(set! w wc) |
|||
(begin |
|||
(set! result (append result |
|||
(list (member-string-ref w dictionary)))) |
|||
(set! dictionary (append dictionary (list wc))) |
|||
(set! n (+ n 1)) |
|||
(set! w c))) |
|||
(if (eqv? ci (- (length uncompressed) 1)) |
|||
(set! result (append result |
|||
(list (member-string-ref w dictionary)))) |
|||
(compress w (+ ci 1)))) |
|||
result) |
|||
; Decompress a LZW compressed string (input should be a list of integers) |
|||
(define (lzw-decompress compressed) |
|||
(define dictionary '()) |
|||
(define n 0) |
|||
(define result "") |
|||
; Setup Dictionary |
|||
(let dict-setup ((c 0)) |
|||
(if (> 256 c) |
|||
(begin |
|||
(set! dictionary (append dictionary |
|||
(list (string (integer->char c))))) |
|||
(set! n (+ n 1)) |
|||
(dict-setup (+ c 1))))) |
|||
; Decompress the list |
|||
(let decompress ((k (list-ref compressed 0)) (ci 0)) |
|||
(define kn #f) |
|||
; Add to dictionary |
|||
(if (> (length compressed) (+ ci 1)) |
|||
(begin |
|||
(set! kn (list-ref compressed (+ ci 1))) |
|||
(if (< kn (length dictionary)) |
|||
(set! dictionary (append dictionary |
|||
(list (string-append (list-ref dictionary k) |
|||
(string (string-ref (list-ref dictionary kn) 0))))))))) |
|||
; Build the resulting string |
|||
(set! result (string-append result (list-ref dictionary k))) |
|||
(if (not (eqv? ci (- (length compressed) 1))) |
|||
(decompress kn (+ ci 1)))) |
|||
result) |
|||
(define compressed (lzw-compress "TOBEORNOTTOBEORTOBEORNOT")) |
|||
(display compressed)(newline) |
|||
(define decompressed (lzw-decompress compressed)) |
|||
(display decompressed)(newline)</lang> |
|||
Output:<pre>(84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263) |
|||
TOBEORNOTTOBEORTOBEORNOT</pre> |
|||
=={{header|Seed7}}== |
=={{header|Seed7}}== |
Revision as of 03:12, 27 May 2011
You are encouraged to solve this task according to the task description, using any language you may know.
The Lempel-Ziv-Welch (LZW) algorithm provides lossless data compression. You can read a complete description of it in the Wikipedia article on the subject. It was patented, but it fell in the public domain in 2004.
Ada
lzw.ads: <lang Ada>package LZW is
MAX_CODE : constant := 4095;
type Codes is new Natural range 0 .. MAX_CODE; type Compressed_Data is array (Positive range <>) of Codes;
function Compress (Cleartext : in String) return Compressed_Data; function Decompress (Data : in Compressed_Data) return String;
end LZW;</lang>
lzw.adb: <lang Ada>with Ada.Containers.Ordered_Maps; with Ada.Strings.Unbounded;
package body LZW is
package UStrings renames Ada.Strings.Unbounded; use type UStrings.Unbounded_String;
-------------- -- Compress -- --------------
function Compress (Cleartext : in String) return Compressed_Data is -- translate String to Code-ID package String_To_Code is new Ada.Containers.Ordered_Maps ( Key_Type => UStrings.Unbounded_String, Element_Type => Codes);
Dictionary : String_To_Code.Map; -- Next unused Code-ID Next_Entry : Codes := 256;
-- maximum same length as input, compression ratio always >=1.0 Result : Compressed_Data (1 .. Cleartext'Length); -- position for next Code-ID Result_Index : Natural := 1;
-- current and next input string Current_Word : UStrings.Unbounded_String := UStrings.Null_Unbounded_String; Next_Word : UStrings.Unbounded_String := UStrings.Null_Unbounded_String; begin -- initialize Dictionary for C in Character loop String_To_Code.Insert (Dictionary, UStrings.Null_Unbounded_String & C, Character'Pos (C)); end loop;
for Index in Cleartext'Range loop -- add character to current word Next_Word := Current_Word & Cleartext (Index); if String_To_Code.Contains (Dictionary, Next_Word) then -- already in dictionary, continue with next character Current_Word := Next_Word; else -- insert code for current word to result Result (Result_Index) := String_To_Code.Element (Dictionary, Current_Word); Result_Index := Result_Index + 1; -- add new Code to Dictionary String_To_Code.Insert (Dictionary, Next_Word, Next_Entry); Next_Entry := Next_Entry + 1; -- reset current word to one character Current_Word := UStrings.Null_Unbounded_String & Cleartext (Index); end if; end loop; -- Last word was not entered Result (Result_Index) := String_To_Code.Element (Dictionary, Current_Word); -- return correct array size return Result (1 .. Result_Index); end Compress;
---------------- -- Decompress -- ----------------
function Decompress (Data : in Compressed_Data) return String is -- translate Code-ID to String type Code_To_String is array (Codes) of UStrings.Unbounded_String;
Dictionary : Code_To_String; -- next unused Code-ID Next_Entry : Codes := 256;
-- initialize resulting string as empty string Result : UStrings.Unbounded_String := UStrings.Null_Unbounded_String;
Next_Code : Codes; -- first code has to be in dictionary Last_Code : Codes := Data (1); -- suffix appended to last string for new dictionary entry Suffix : Character; begin -- initialize Dictionary for C in Character loop Dictionary (Codes (Character'Pos (C))) := UStrings.Null_Unbounded_String & C; end loop;
-- output first Code-ID UStrings.Append (Result, Dictionary (Last_Code)); for Index in 2 .. Data'Last loop Next_Code := Data (Index); if Next_Code <= Next_Entry then -- next Code-ID already in dictionary -> append first char Suffix := UStrings.Element (Dictionary (Next_Code), 1); else -- next Code-ID not in dictionary -> use char from last ID Suffix := UStrings.Element (Dictionary (Last_Code), 1); end if; -- expand the dictionary Dictionary (Next_Entry) := Dictionary (Last_Code) & Suffix; Next_Entry := Next_Entry + 1; -- output the current Code-ID to result UStrings.Append (Result, Dictionary (Next_Code)); Last_Code := Next_Code; end loop; -- return String return UStrings.To_String (Result); end Decompress;
end LZW;</lang>
test.adb: <lang Ada>with LZW; with Ada.Text_IO;
procedure Test is
package Text_IO renames Ada.Text_IO; package Code_IO is new Ada.Text_IO.Integer_IO (LZW.Codes);
Test_Data : constant LZW.Compressed_Data := LZW.Compress ("TOBEORNOTTOBEORTOBEORNOT");
begin
for Index in Test_Data'Range loop Code_IO.Put (Test_Data (Index), 0); Text_IO.Put (" "); end loop; Text_IO.New_Line; declare Cleartext : constant String := LZW.Decompress (Test_Data); begin Text_IO.Put_Line (Cleartext); end;
end Test;</lang>
C
See Bit oriented IO for bitio.h and Basic string manipulation functions for estrings.h.
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <stdbool.h>
- include <string.h>
- include "bitio.h"
- include "estrings.h"
- define DEBUG
- if defined(DEBUG)
- define Debug(...) fprintf(stderr, __VA_ARGS__)
- else
- define Debug(...)
- endif</lang>
We need a dictionary and so we implement one according to our need.
<lang c>struct DictionaryNode {
String string; unsigned int code; struct DictionaryNode *next;
}; typedef struct DictionaryNode DN;
- define NODES_PER_ALLOC 512
struct DictionaryStruct {
DN *first; DN **nodes; size_t num_of_nodes; size_t num_of_alloc; bool sorted;
}; typedef struct DictionaryStruct *Dictionary;
Dictionary newDictionary() {
Dictionary t; t = malloc(sizeof(struct DictionaryStruct)); if ( t==NULL ) return NULL; t->num_of_nodes = 0; t->num_of_alloc = 1; t->nodes = malloc(t->num_of_alloc * NODES_PER_ALLOC * sizeof(void *)); t->first = NULL; t->sorted = false; return t;
}
bool _expandDictionary(Dictionary d) {
d->nodes = realloc(d->nodes, (d->num_of_alloc + 1) * NODES_PER_ALLOC * sizeof(void *)); if ( d->nodes == NULL ) return false; d->num_of_alloc++; return true;
}
bool dictionaryPut_ns(Dictionary d, String s, unsigned int c) {
DN *newdw; if ( (d->num_of_nodes + 1) > (d->num_of_alloc * NODES_PER_ALLOC) ) { if ( !_expandDictionary(d) ) return false; } newdw = malloc(sizeof(DN)); if ( newdw == NULL ) return false; newdw->string = cloneString(s); newdw->code = c; newdw->next = d->first; d->first = newdw; d->nodes[d->num_of_nodes] = newdw; d->num_of_nodes++; d->sorted = false; return true;
}
int _dict_strcompare(const void *av, const void *bv) {
DN *a = *((DN **)av); DN *b = *((DN **)bv); return compareStrings(a->string, b->string);
}
void dictionarySort(Dictionary d) {
if ( ! (d->sorted) ) { qsort(d->nodes, d->num_of_nodes, sizeof(void *), _dict_strcompare); d->sorted = true; }
}
bool dictionaryPut(Dictionary d, String s, unsigned int c) {
bool added = dictionaryPut_ns(d, s, c); if ( added ) { dictionarySort(d); } return added;
}
DN *dictionaryLookUp(Dictionary d, String s)
{
if ( d->num_of_nodes == 0 ) return NULL; int low, high, med, p; /* perform a "binary" search ... */ for(low=-1, high = d->num_of_nodes-1; (high-low)>1 ; ) { med = (high+low) >> 1; p = compareStrings(s, d->nodes[med]->string); if ( p <= 0 ) { high = med; } else { low = med; } } if ( compareStrings(s, d->nodes[high]->string) == 0 ) return d->nodes[high]; return NULL;
}
void destroyDictionary(Dictionary d) {
int i; if ( d==NULL ) return; if ( d->nodes != NULL ) { for(i=0; i < d->num_of_nodes; i++) { destroyString(d->nodes[i]->string); free(d->nodes[i]); } free(d->nodes); } free(d);
}</lang>
LZW code:
<lang c>#define BITS_PER_WORD 9 unsigned int codemark = 256; unsigned char num_of_bits = BITS_PER_WORD;
struct LZWHandleStruct {
unsigned int codemark; unsigned char num_of_bits; Dictionary dict; size_t bytes_written; FILE *iostream;
}; typedef struct LZWHandleStruct LZWHandle;
LZWHandle *lzw_begin(FILE *o) {
LZWHandle *lh; lh = malloc(sizeof(LZWHandle)); if ( lh != NULL ) { lh->bytes_written = 0; lh->codemark = 256; lh->num_of_bits = BITS_PER_WORD; lh->dict = newDictionary(); lh->iostream = o; if ( lh->dict == NULL ) { free(lh); return NULL; } } return lh;
}
bool lzw_compress(LZWHandle *lh, const char *stream, size_t len) {
unsigned int i; char tc; Dictionary d = lh->dict; DN *oc; String w = NULL; String wc = NULL; String t = NULL; size_t wbits = 0; t = newString(); for(i=0; i < 256; i++) { tc = i; setString(t, &tc, 1); dictionaryPut_ns(d, t, i); } destroyString(t); t = NULL; /* a trick: we have put values in order, so it is sorted*/ d->sorted = true; w = newString(); wc = newString(); for(i=0; i < len; i++) { copyString(wc, w); appendChar(wc, stream[i]); if ( (dictionaryLookUp(d, wc)) != NULL ) { copyString(w, wc); } else { oc = dictionaryLookUp(d, w); if ( oc != NULL ) { /* guardring for bugs :D */ wbits += bits_write(oc->code, lh->num_of_bits, lh->iostream); Debug("%u ; %d ; %u\n", oc->code, lh->num_of_bits, lh->codemark); } dictionaryPut(d, wc, lh->codemark); lh->codemark++; if ( lh->codemark >= (1 << lh->num_of_bits) ) { lh->num_of_bits++; } setString(w, &stream[i], 1); } } if ( ! stringIsEmpty(w) ) { oc = dictionaryLookUp(d, w); /* no guard ring for bugs here :) */ wbits += bits_write(oc->code, lh->num_of_bits, lh->iostream); Debug("%u ; %d ; %u\n", oc->code, lh->num_of_bits, lh->codemark); } lh->bytes_written = wbits >> 3; destroyString(w); destroyString(wc); return true;
}
int lzw_end(LZWHandle *lh) {
int oby=0; if ( lh != NULL ) { oby=bits_flush(lh->iostream); destroyDictionary(lh->dict); free(lh); } return oby;
}</lang>
Testing:
<lang c>const char *text = "TOBEORNOTTOBEORTOBEORNOT"; int main() {
LZWHandle *lzw; size_t writtenbytecount = 0; lzw = lzw_begin(stdout); if ( lzw != NULL ) { lzw_compress(lzw, text, strlen(text)); writtenbytecount += lzw->bytes_written; Debug("bits on exit: %u\n", lzw->num_of_bits); lzw_end(lzw); fprintf(stderr, "n. of input bytes: %u\n" "n. of output bytes: %u\n", strlen(text), writtenbytecount ); } else return 1; return 0;
}</lang>
This test code really emits the compressed bytes stream. The decimal values of the codes (as other solutions do) are sent as debug informations.
Common Lisp
This version is based upon the Perl one. It doesn't contain mixed type data at the cost of being more consy. It includes vector operation routines, since using VECTOR-PUSH-APPEND
reallocates the whole vector with each call.
The Babel library is required to convert octet vectors to strings. Lisp strings can contain characters out of the ASCII/latin1 character set, including the whole Unicode range in them. The exact encoding used is dependent upon the user's locale (LC_CTYPE
on Unix).
<lang lisp>(declaim (ftype (function (vector vector &optional fixnum fixnum) vector)
vector-append))
(defun vector-append (old new &optional (start2 0) end2)
(declare (optimize (speed 3) (safety 0) (debug 0))) (prog1 old (let* ((old-fill (fill-pointer old)) (new-fill (+ old-fill (length new)))) (when (> new-fill (array-dimension old 0)) (adjust-array old (* 4 new-fill))) (setf (fill-pointer old) new-fill) (replace old new :start1 old-fill :start2 start2 :end2 end2))))
(declaim (ftype (function (vector t) vector) vector-append1)) (defun vector-append1 (old new)
(prog1 old (let* ((old-fill (fill-pointer old)) (new-fill (1+ old-fill))) (when (> new-fill (array-dimension old 0)) (adjust-array old (* 4 new-fill))) (setf (fill-pointer old) new-fill) (setf (aref old old-fill) new))))
(declaim (ftype (function (&optional t) vector) make-empty-vector)) (defun make-empty-vector (&optional (element-type t))
(make-array 0 :element-type element-type :fill-pointer 0 :adjustable t))
(declaim (ftype (function (t &optional t) vector) make-vector-with-elt))
(defun make-vector-with-elt (elt &optional (element-type t))
(make-array 1 :element-type element-type :fill-pointer 1 :adjustable t :initial-element elt))
(declaim (ftype (function (vector t) vector) vector-append1-new)) (defun vector-append1-new (old new)
(vector-append1 (vector-append (make-empty-vector 'octet) old) new))
(declaim (ftype (function (vector vector) vector) vector-append-new)) (defun vector-append-new (old new)
(vector-append (vector-append (make-empty-vector 'octet) old) new))
(deftype octet () '(unsigned-byte 8))
(declaim (ftype (function () hash-table) build-dictionary)) (defun build-dictionary ()
(let ((dictionary (make-hash-table :test #'equalp))) (loop for i below 256 do (let ((vec (make-vector-with-elt i 'octet))) (setf (gethash vec dictionary) vec))) dictionary))
(declaim (ftype (function ((vector octet)) (vector octet))
lzw-compress-octets))
(defun lzw-compress-octets (octets)
(declare (optimize (speed 3) (safety 0) (debug 0))) (loop with dictionary-size of-type fixnum = 256 with w = (make-empty-vector 'octet) with result = (make-empty-vector 't) with dictionary = (build-dictionary) for c across octets for wc = (vector-append1-new w c) if (gethash wc dictionary) do (setq w wc) else do (vector-append result (gethash w dictionary)) (setf (gethash wc dictionary) (make-vector-with-elt dictionary-size)) (incf dictionary-size) (setq w (make-vector-with-elt c 'octet)) finally (unless (zerop (length (the (vector octet) w))) (vector-append result (gethash w dictionary))) (return result)))
(declaim (ftype (function (vector) (vector octet)) lzw-decompress)) (defun #1=lzw-decompress (octets)
(declare (optimize (speed 3) (safety 0) (debug 0))) (when (zerop (length octets)) (return-from #1# (make-empty-vector 'octet))) (loop with dictionary-size = 256 with dictionary = (build-dictionary) with result = (make-vector-with-elt (aref octets 0) 'octet) with w = (copy-seq result) for i from 1 below (length octets) for k = (make-vector-with-elt (aref octets i) 't) for entry = (or (gethash k dictionary) (if (equalp k dictionary-size) (coerce (list w (aref w 0)) '(vector octet)) (error "bad compresed entry at pos ~S" i))) do (vector-append result entry) (setf (gethash (make-vector-with-elt dictionary-size) dictionary) (vector-append1-new w (aref entry 0))) (incf dictionary-size) (setq w entry) finally (return result)))
(defgeneric lzw-compress (datum)
(:method ((string string)) (lzw-compress (babel:string-to-octets string))) (:method ((octets vector)) (lzw-compress-octets octets)))
(defun lzw-decompress-to-string (octets)
(babel:octets-to-string (lzw-decompress octets)))
(defun test (string)
(assert (equal #2=(lzw-decompress-to-string (lzw-compress string)) string) () "Can't compress ~S properly, got ~S instead" string #2#) t)</lang>
And the format used:
<lang lisp>CL-USER> (test "TOBEORNOTTOBEORTOBEORNOT") T CL-USER> (lzw-compress "TOBEORNOTTOBEORTOBEORNOT")
- (84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263)
CL-USER> (lzw-decompress-to-string *) "TOBEORNOTTOBEORTOBEORNOT"</lang>
C++
<lang cpp>#include <string>
- include <map>
// Compress a string to a list of output symbols. // The result will be written to the output iterator // starting at "result"; the final iterator is returned. template <typename Iterator> Iterator compress(const std::string &uncompressed, Iterator result) {
// Build the dictionary. int dictSize = 256; std::map<std::string,int> dictionary; for (int i = 0; i < 256; i++) dictionary[std::string(1, i)] = i; std::string w; for (std::string::const_iterator it = uncompressed.begin(); it != uncompressed.end(); ++it) { char c = *it; std::string wc = w + c; if (dictionary.count(wc)) w = wc; else { *result++ = dictionary[w]; // Add wc to the dictionary. dictionary[wc] = dictSize++; w = std::string(1, c); } } // Output the code for w. if (!w.empty()) *result++ = dictionary[w]; return result;
}
// Decompress a list of output ks to a string. // "begin" and "end" must form a valid range of ints template <typename Iterator> std::string decompress(Iterator begin, Iterator end) {
// Build the dictionary. int dictSize = 256; std::map<int,std::string> dictionary; for (int i = 0; i < 256; i++) dictionary[i] = std::string(1, i); std::string w(1, *begin++); std::string result = w; std::string entry; for ( ; begin != end; begin++) { int k = *begin; if (dictionary.count(k)) entry = dictionary[k]; else if (k == dictSize) entry = w + w[0]; else throw "Bad compressed k"; result += entry; // Add w+entry[0] to the dictionary. dictionary[dictSize++] = w + entry[0]; w = entry; } return result;
}
- include <iostream>
- include <iterator>
- include <vector>
int main() {
std::vector<int> compressed; compress("TOBEORNOTTOBEORTOBEORNOT", std::back_inserter(compressed)); copy(compressed.begin(), compressed.end(), std::ostream_iterator<int>(std::cout, ", ")); std::cout << std::endl; std::string decompressed = decompress(compressed.begin(), compressed.end()); std::cout << decompressed << std::endl; return 0;
}</lang>
Clojure
<lang lisp>(defn make-dict []
(let [vals (range 0 256)] (zipmap (map (comp #'list #'char) vals) vals)))
(defn compress [#^String text]
(loop [t (seq text) r '() w '() dict (make-dict) s 256] (let [c (first t)] (if c (let [wc (cons c w)] (if (get dict wc) (recur (rest t) r wc dict s) (recur (rest t) (cons (get dict w) r) (list c) (assoc dict wc s) (inc s)))) (reverse (if w (cons (get dict w) r) r))))))
(compress "TOBEORNOTTOBEORTOBEORNOT")</lang> The output: <lang lisp>(84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263)</lang>
D
<lang d>import std.stdio;
int[] compress(string uncompressed) {
int dictSize = 256; int[string] dictionary; foreach (i; 0 .. dictSize) dictionary["" ~ cast(char)i] = i;
string w; int[] result; foreach (c; uncompressed) { string wc = w ~ c; if (wc in dictionary) w = wc; else { result ~= dictionary[w]; dictionary[wc] = dictSize++; w = "" ~ c; } } if (w.length) result ~= dictionary[w]; return result;
}
string decompress(int[] compressed) {
int dictSize = 256; string[int] dictionary; foreach (i; 0 .. dictSize) dictionary[i] = "" ~ cast(char)i;
string w = "" ~ cast(char)compressed[0]; string result = w; foreach (k; compressed[1 .. $]) { string entry; if (k in dictionary) entry = dictionary[k]; else if (k == dictSize) entry = w ~ w[0]; else throw new Exception("Bad compressed k"); result ~= entry;
dictionary[dictSize++] = w ~ entry[0]; w = entry; } return result;
}
void main() {
auto compressed = compress("TOBEORNOTTOBEORTOBEORNOT"); auto decompressed = decompress(compressed); writeln(compressed, "\n", decompressed);
}</lang> Output: <lang d>[84, 79, 66, 69, 79, 82, 78, 79, 84, 256, 258, 260, 265, 259, 261, 263] TOBEORNOTTOBEORTOBEORNOT</lang>
Dylan
<lang>Module: LZW Synopsis: LZW implementation for Rosetta code
define method output(n :: <integer>)
format-out("%d ", n);
end;
define method contains?(dict, var)
let x = element(dict, var, default: #f); x ~= #f;
end;
define method byte->string(c)
add("", as(<character>, c));
end;
define method compress(input :: <string>) => <vector>;
let result = make(<vector>); let dict = make(<string-table>); for (x from 0 to 255) dict[byte->string(x)] := x; end;
let next-code = 256; let cur-seq = ""; for (c in input) let wc = add(cur-seq, c); if (contains?(dict, wc)) cur-seq := wc; else result := add(result, dict[cur-seq]); dict[wc] := next-code; next-code := next-code + 1; cur-seq := add("", c); end end; unless (empty?(cur-seq)) result := add(result, dict[cur-seq]); end; result
end;
format-out("%=\n", compress("TOBEORNOTTOBEORTOBEORNOT"))</lang>
Forth
<lang forth>256 value next-symbol
\ current string fragment
create w 256 allot \ counted string
- w=c ( c -- ) w 1+ c! 1 w c! ;
- w+c ( c -- ) w count + c! w c@ 1+ w c! ;
\ Compression
\ dictionary of strings to symbols 0 value dict
- init-dict table to dict 256 to next-symbol dict set-current ;
- free-dict forth-wordlist set-current ;
- in-dict? ( key len -- ? ) \ can assume len > 1
dict search-wordlist dup if nip then ;
- lookup-dict ( key len -- symbol )
dup 1 = if drop c@ exit then dict search-wordlist if >body @ else abort" bad-dict!" then ;
- put-dict ( data key len -- )
nextname create , ;
\ output buffer of symbols \ in real life, these symbols would be packed into octets variable out-size create out 256 cells allot
- output ( symbol -- )
dup out out-size @ cells + ! 1 out-size +! dup 256 < if emit space else . then ;
- compress ( addr len -- )
init-dict 0 out-size ! over c@ w=c 1 /string bounds do i c@ w+c w count in-dict? 0= if w count 1- lookup-dict output next-symbol dup w count put-dict 1+ to next-symbol i c@ w=c then loop w count lookup-dict output free-dict ;
\ Decompression
\ array of symbols to strings (in real code this would need to be growable) \ next-symbol is reused for the size of this table create symtab 256 cells allot 0 value start
- init-symtab 256 to next-symbol here to start ;
- free-symtab start here - allot ;
- get-symbol ( symbol -- addr len )
dup 256 < if pad c! pad 1 exit then 256 - cells symtab + @ count ;
- add-symbol ( addr len -- )
here symtab next-symbol 256 - cells + ! s, next-symbol 1+ to next-symbol ;
create entry 256 allot
- decompress ( addr len -- )
init-symtab over @ dup emit w=c cells bounds cell+ do i @ next-symbol < if i @ get-symbol entry place else i @ next-symbol = if w 1+ c@ w count + c! w count 1+ entry place else abort" bad symbol!" then then entry count type \ output entry 1+ c@ w+c w count add-symbol entry count w place 1 cells +loop free-symtab ;
\ Testing
s" TOBEORNOTTOBEORTOBEORNOT" compress cr \ T O B E O R N O T 256 258 260 265 259 261 263
out out-size @ decompress cr \ TOBEORNOTTOBEORTOBEORNOT</lang>
Go
<lang go>package main import "fmt"
// Compress a string to a list of output symbols. func compress(uncompressed string) []int {
// Build the dictionary. dictSize := 256 dictionary := make(map[string]int) for i := 0; i < 256; i++ { dictionary[string(i)] = i } w := "" result := make([]int, 0) for _, c := range []byte(uncompressed) { wc := w + string(c) if _, ok := dictionary[wc]; ok { w = wc } else { result = append(result, dictionary[w]) // Add wc to the dictionary. dictionary[wc] = dictSize dictSize++ w = string(c) } }
// Output the code for w. if w != "" { result = append(result, dictionary[w]) } return result
}
// Decompress a list of output ks to a string. func decompress(compressed []int) string {
// Build the dictionary. dictSize := 256 dictionary := make(map[int]string) for i := 0; i < 256; i++ { dictionary[i] = string(i) } w := string(compressed[0]) result := w for _, k := range compressed[1:] { var entry string if x, ok := dictionary[k]; ok { entry = x } else if k == dictSize { entry = w + w[:1] } else { panic(fmt.Sprintf("Bad compressed k: %d", k)) } result += entry // Add w+entry[0] to the dictionary. dictionary[dictSize] = w + entry[:1] dictSize++ w = entry } return result
}
func main() {
compressed := compress("TOBEORNOTTOBEORTOBEORNOT") fmt.Println(compressed) decompressed := decompress(compressed) fmt.Println(decompressed)
}</lang>
Haskell
<lang Haskell>import Data.List import Data.Char import Data.Maybe import Control.Monad import Control.Arrow
take2 = filter((==2).length). map (take 2). tails
doLZW _ [] = [] doLZW as (x:xs) = lzw (map return as) [x] xs
where lzw a w [] = [fromJust $ elemIndex w a] lzw a w (x:xs) | w' `elem` a = lzw a w' xs | otherwise = fromJust (elemIndex w a) : lzw (a++[w']) [x] xs where w' = w++[x]
undoLZW _ [] = [] undoLZW a cs =
((cs >>=).(!!)) $ foldl (liftM2 (.) (++) (((return. liftM2 (++) head (take 1. last)).). map. (!!))) (map return a) (take2 cs)</lang>
Testing: <lang Haskell>*Main> doLZW ['\0'..'\255'] "TOBEORNOTTOBEORTOBEORNOT" [84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263]
- Main> undoLZW ['\0'..'\255'] [84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263]
"TOBEORNOTTOBEORTOBEORNOT"</lang> Encode --> decode --> compare with original text. <lang Haskell>*Main> (ap (==) . liftM2 (.) undoLZW doLZW) ['\0'..'\255'] "TOBEORNOTTOBEORTOBEORNOT" True</lang>
Other (elegant) code can be found at Haskell wiki Toy compression
J
Straightforward implementations of encoding and decoding: <lang J>encodeLZW =: 4 : 0
d=. ;/x r=.0$0 wc=.w=.{.y for_c. }.y do. wc=.w,c if. d e.~ <wc do. w=.wc else. r=. r, d i.<w d=.d,<wc w=.c end. end. r, d i.<w
)</lang> Test:
a. encodeLZW 'TOBEORNOTTOBEORTOBEORNOT' 84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263
Decoding: <lang J>decodeLZW =: 4 : 0
d=.;/x w=.r=. >d{~{.y ds=. #d for_c. }.y do. select. * c-ds case. _1 do. r=.r,e=.>c{d case. 0 do. r=.r,e=.w,{.w case. do. 'error' return. end. d=.d,< w,{.e w=.e ds=.ds+1 end. ;r
)</lang> Test:
a. decodeLZW 84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263 TOBEORNOTTOBEORTOBEORNOT
encode --> decode --> compare with original:
a. (] -: [ decodeLZW encodeLZW) 'TOBEORNOTTOBEORTOBEORNOT' 1
Error test:
a. decodeLZW 84 79 66 69 79 82 78 79 84 256 258 456 260 265 259 261 263 error
Tacit J expression for decoding:
decodeLZW=:[:;]{[:;[:(],<@(>@{.,{.@>@{:)@:{)&.>/<@(;/@[),~|.@(2<\])
a. decodeLZW 84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263 TOBEORNOTTOBEORTOBEORNOT
Java
<lang java5>import java.util.*;
public class LZW {
/** Compress a string to a list of output symbols. */ public static List<Integer> compress(String uncompressed) { // Build the dictionary. int dictSize = 256; Map<String,Integer> dictionary = new HashMap<String,Integer>(); for (int i = 0; i < 256; i++) dictionary.put("" + (char)i, i); String w = ""; List<Integer> result = new ArrayList<Integer>(); for (char c : uncompressed.toCharArray()) { String wc = w + c; if (dictionary.containsKey(wc)) w = wc; else { result.add(dictionary.get(w)); // Add wc to the dictionary. dictionary.put(wc, dictSize++); w = "" + c; } } // Output the code for w. if (!w.equals("")) result.add(dictionary.get(w)); return result; } /** Decompress a list of output ks to a string. */ public static String decompress(List<Integer> compressed) { // Build the dictionary. int dictSize = 256; Map<Integer,String> dictionary = new HashMap<Integer,String>(); for (int i = 0; i < 256; i++) dictionary.put(i, "" + (char)i); String w = "" + (char)(int)compressed.remove(0); String result = w; for (int k : compressed) { String entry; if (dictionary.containsKey(k)) entry = dictionary.get(k); else if (k == dictSize) entry = w + w.charAt(0); else throw new IllegalArgumentException("Bad compressed k: " + k); result += entry; // Add w+entry[0] to the dictionary. dictionary.put(dictSize++, w + entry.charAt(0)); w = entry; } return result; }
public static void main(String[] args) { List<Integer> compressed = compress("TOBEORNOTTOBEORTOBEORNOT"); System.out.println(compressed); String decompressed = decompress(compressed); System.out.println(decompressed); }
}</lang>
JavaScript
<lang javascript>//LZW Compression/Decompression for Strings var LZW = {
"compress" : function(uncompressed) { // Build the dictionary. var dictSize = 256; var dictionary = {}; for (var i = 0; i < 256; i++) { dictionary[String.fromCharCode(i)] = i; } var w = ""; var result = []; for (var i = 0; i < uncompressed.length; i++) { var c = uncompressed.charAt(i); var wc = w + c; if (dictionary[wc]) w = wc; else { result.push(dictionary[w]); // Add wc to the dictionary. dictionary[wc] = dictSize++; w = "" + c; } } // Output the code for w. if (w != "") result.push(dictionary[w]); return result; },
"decompress" : function(compressed) { // Build the dictionary. var dictSize = 256; var dictionary = []; for (var i = 0; i < 256; i++) { dictionary[i] = String.fromCharCode(i); } var w = String.fromCharCode(compressed[0]); var result = w; for (var i = 1; i < compressed.length; i++) { var entry = ""; var k = compressed[i]; if (dictionary[k]) entry = dictionary[k]; else if (k == dictSize) entry = w + w.charAt(0); else return null; result += entry; // Add w+entry[0] to the dictionary. dictionary[dictSize++] = w + entry.charAt(0); w = entry; } return result; }
}
// For Test Purposes
var comp = LZW.compress("TOBEORNOTTOBEORTOBEORNOT");
var decomp = LZW.decompress(comp);
document.write(comp+'
'+decomp);</lang>
Output: <lang javascript>84,79,66,69,79,82,78,79,84,256,258,260,265,259,261,263 TOBEORNOTTOBEORTOBEORNOT</lang>
Objective-C
The class for the LZW compression algorithm:
<lang objc>#import <Foundation/Foundation.h>
- import <stdio.h>
@interface LZWCompressor : NSObject {
@private NSMutableArray *iostream; NSMutableDictionary *dict; NSUInteger codemark;
}
-(LZWCompressor *) init; -(LZWCompressor *) initWithArray: (NSMutableArray *) stream; -(BOOL) compressData: (NSData *) string; -(void) setArray: (NSMutableArray *) stream; -(NSArray *) getArray; @end
@implementation LZWCompressor : NSObject
-(LZWCompressor *) init {
self = [super init]; if ( self ) { iostream = nil; codemark = 256; dict = [[NSMutableDictionary alloc] initWithCapacity: 512]; } return self;
}
-(LZWCompressor *) initWithArray: (NSMutableArray *) stream {
self = [self init]; if ( self ) { [self setArray: stream]; } return self;
}
-(void) dealloc {
[dict release]; [iostream release]; [super dealloc];
}
-(void) setArray: (NSMutableArray *) stream {
iostream = [stream retain];
}
-(BOOL) compressData: (NSData *) string; {
NSUInteger i; unsigned char j; // prepare dict for(i=0; i < 256; i++) { j = i; NSData *s = [NSData dataWithBytes: &j length: 1]; [dict setObject: [NSNumber numberWithUnsignedInt: i] forKey: s]; } NSMutableData *w = [NSMutableData data]; NSMutableData *wc = [NSMutableData data]; for(i=0; i < [string length]; i++) { [wc setData: w]; [wc appendData: [string subdataWithRange: NSMakeRange(i, 1)]]; if ( [dict objectForKey: wc] != nil ) { [w setData: wc]; } else { [iostream addObject: [dict objectForKey: w]]; [dict setObject: [NSNumber numberWithUnsignedInt: codemark] forKey: wc]; codemark++; [w setData: [string subdataWithRange: NSMakeRange(i, 1)]]; } } if ( [w length] != 0 ) { [iostream addObject: [dict objectForKey: w]]; } return YES;
}
-(NSArray *) getArray {
return iostream;
}
@end</lang>
Usage example:
<lang objc>const char *text = "TOBEORNOTTOBEORTOBEORNOT";
int main() {
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; NSMutableArray *array = [[NSMutableArray alloc] init]; LZWCompressor *lzw = [[LZWCompressor alloc] initWithArray: array ]; if ( lzw ) { [lzw compressData: [NSData dataWithBytes: text length: strlen(text)]]; NSEnumerator *en = [array objectEnumerator]; id obj; while( (obj = [en nextObject]) ) { printf("%u\n", [obj unsignedIntValue]); } [lzw release]; } [array release]; [pool release]; return EXIT_SUCCESS;
}</lang>
Output (reformatted by hand):
84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263
OCaml
<lang ocaml>#directory "+extlib" (* or maybe "+site-lib/extlib/" *)
- load "extLib.cma"
open ExtString
(** compress a string to a list of output symbols *) let compress ~uncompressed =
(* build the dictionary *) let dict_size = 256 in let dictionary = Hashtbl.create 397 in for i=0 to 255 do let str = String.make 1 (char_of_int i) in Hashtbl.add dictionary str i done; let f = (fun (w, dict_size, result) c -> let c = String.make 1 c in let wc = w ^ c in if Hashtbl.mem dictionary wc then (wc, dict_size, result) else begin (* add wc to the dictionary *) Hashtbl.add dictionary wc dict_size; let this = Hashtbl.find dictionary w in (c, dict_size + 1, this::result) end ) in let w, _, result = String.fold_left f ("", dict_size, []) uncompressed in
(* output the code for w *) let result = if w = "" then result else (Hashtbl.find dictionary w) :: result in
(List.rev result)
exception ValueError of string
(** decompress a list of output symbols to a string *) let decompress ~compressed =
(* build the dictionary *) let dict_size = 256 in let dictionary = Hashtbl.create 397 in for i=0 to pred dict_size do let str = String.make 1 (char_of_int i) in Hashtbl.add dictionary i str done;
let w, compressed = match compressed with | hd::tl -> (String.make 1 (char_of_int hd)), tl | [] -> failwith "empty input" in let result = [w] in
let result, _, _ = List.fold_left (fun (result, w, dict_size) k -> let entry = if Hashtbl.mem dictionary k then Hashtbl.find dictionary k else if k = Hashtbl.length dictionary then w ^ (String.make 1 w.[0]) else raise(ValueError(Printf.sprintf "Bad compressed k: %d" k)) in let result = entry :: result in (* add (w ^ entry.[0]) to the dictionary *) Hashtbl.add dictionary dict_size (w ^ (String.make 1 entry.[0])); (result, entry, dict_size + 1) ) (result, w, dict_size) compressed in (List.rev result)
- </lang>
here is the interface: <lang ocaml>val compress : uncompressed:string -> int list val decompress : compressed:int list -> string list</lang>
How to use:
The compressed datas are a list of symbols (of type int) that will require more than 8 bits to be saved.
So to know how many bits are required, you need to know how many bits are required for the greatest symbol in the list.
<lang ocaml>let greatest = List.fold_left max 0 ;;
(** number of bits needed to encode the integer m *) let n_bits m =
let m = float m in let rec aux n = let max = (2. ** n) -. 1. in if max >= m then int_of_float n else aux (n +. 1.0) in aux 1.0
let write_compressed ~filename ~compressed =
let nbits = n_bits(greatest compressed) in let oc = open_out filename in output_byte oc nbits; let ob = IO.output_bits(IO.output_channel oc) in List.iter (IO.write_bits ob nbits) compressed; IO.flush_bits ob; close_out oc;
let read_compressed ~filename =
let ic = open_in filename in let nbits = input_byte ic in let ib = IO.input_bits(IO.input_channel ic) in let rec loop acc = try let code = IO.read_bits ib nbits in loop (code::acc) with _ -> List.rev acc in let compressed = loop [] in let result = decompress ~compressed in let buf = Buffer.create 2048 in List.iter (Buffer.add_string buf) result; (Buffer.contents buf)
- </lang>
Perl
In this version the hashes contain mixed typed data: <lang perl># Compress a string to a list of output symbols. sub compress {
my $uncompressed = shift;
# Build the dictionary. my $dict_size = 256; my %dictionary = map {chr $_ => chr $_} 0..$dict_size-1;
my $w = ""; my @result; foreach my $c (split , $uncompressed) { my $wc = $w . $c; if (exists $dictionary{$wc}) { $w = $wc; } else { push @result, $dictionary{$w}; # Add wc to the dictionary. $dictionary{$wc} = $dict_size; $dict_size++; $w = $c; } }
# Output the code for w. if ($w) { push @result, $dictionary{$w}; } return @result;
}
- Decompress a list of output ks to a string.
sub decompress {
my @compressed = @_;
# Build the dictionary. my $dict_size = 256; my %dictionary = map {chr $_ => chr $_} 0..$dict_size-1;
my $w = shift @compressed; my $result = $w; foreach my $k (@compressed) { my $entry; if (exists $dictionary{$k}) { $entry = $dictionary{$k}; } elsif ($k == $dict_size) { $entry = $w . substr($w,0,1); } else { die "Bad compressed k: $k"; } $result .= $entry;
# Add w+entry[0] to the dictionary. $dictionary{$dict_size} = $w . substr($entry,0,1); $dict_size++;
$w = $entry; } return $result;
}
- How to use:
my @compressed = compress('TOBEORNOTTOBEORTOBEORNOT'); print "@compressed\n"; my $decompressed = decompress(@compressed); print "$decompressed\n";</lang>
Output:
T O B E O R N O T 256 258 260 265 259 261 263 TOBEORNOTTOBEORTOBEORNOT
PicoLisp
<lang PicoLisp>(de lzwCompress (Lst)
(let (Codes 255 Dict) (balance 'Dict (make (for C Codes (link (cons (char C) C)) ) ) ) (make (let W (pop 'Lst) (for C Lst (let WC (pack W C) (if (lup Dict WC) (setq W WC) (link (cdr (lup Dict W))) (idx 'Dict (cons WC (inc 'Codes)) T) (setq W C) ) ) ) (and W (link (cdr (lup Dict W)))) ) ) ) )</lang>
Output:
: (lzwCompress (chop "TOBEORNOTTOBEORTOBEORNOT")) -> (84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263)
PureBasic
This version encodes character sequences as 16-bit values. Because this version only encodes an input string it won't handle Null values. This is because PureBasic uses these to terminate strings. Only slight modifications are necessary to handle Null values that would be present for a more generic routine that could be used with a buffer containing any data type. <lang PureBasic>Procedure compress(uncompressed.s, List result.u())
;Compress a string to a list of output symbols ;Build the dictionary. Protected dict_size = 255, i newmap dict.u() For i = 0 To 254 dict(Chr(i + 1)) = i Next
Protected w.s, wc.s, *c.Character = @uncompressed w = "" LastElement(result()) While *c\c <> #Null wc = w + Chr(*c\c) If FindMapElement(dict(), wc) w = wc Else AddElement(result()) result() = dict(w) ;Add wc to the dictionary dict(wc) = dict_size dict_size + 1 ;no check is performed for overfilling the dictionary. w = Chr(*c\c) EndIf *c + 1 Wend ;Output the code for w If w AddElement(result()) result() = dict(w) EndIf
EndProcedure
Procedure.s decompress(List compressed.u())
;Decompress a list of encoded values to a string If ListSize(compressed()) = 0: ProcedureReturn "": EndIf ;Build the dictionary. Protected dict_size = 255, i Dim dict.s(255) For i = 1 To 255 dict(i - 1) = Chr(i) Next
Protected w.s, entry.s, result.s FirstElement(compressed()) w = dict(compressed()) result = w i = 0 While NextElement(compressed()) i + 1 If compressed() < dict_size entry = dict(compressed()) ElseIf i = dict_size entry = w + Left(w, 1) Else MessageRequester("Error","Bad compression at [" + Str(i) + "]") ProcedureReturn result;abort EndIf result + entry ;Add w + Left(entry, 1) to the dictionary If ArraySize(dict()) <= dict_size Redim dict(dict_size + 256) EndIf dict(dict_size) = w + Left(entry, 1) dict_size + 1 ;no check is performed for overfilling the dictionary.
w = entry Wend ProcedureReturn result
EndProcedure
If OpenConsole()
;How to use: Define initial.s, decompressed.s Print("Type something: ") initial = Input() NewList compressed.u() compress(initial, compressed()) ForEach compressed() Print(Str(compressed()) + " ") Next PrintN("") decompressed = decompress(compressed()) PrintN(decompressed) Print(#CRLF$ + #CRLF$ + "Press ENTER to exit") Input() CloseConsole()
EndIf</lang> Sample output:
Type something: TOBEORNOTTOBEORTOBEORNOT 83 78 65 68 78 81 77 78 83 255 257 259 264 258 260 262 TOBEORNOTTOBEORTOBEORNOT
Python
In this version the dicts contain mixed typed data: <lang python>def compress(uncompressed):
"""Compress a string to a list of output symbols."""
# Build the dictionary. dict_size = 256 dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size)) # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = "" result = [] for c in uncompressed: wc = w + c if wc in dictionary: w = wc else: result.append(dictionary[w]) # Add wc to the dictionary. dictionary[wc] = dict_size dict_size += 1 w = c
# Output the code for w. if w: result.append(dictionary[w]) return result
def decompress(compressed):
"""Decompress a list of output ks to a string."""
# Build the dictionary. dict_size = 256 dictionary = dict((chr(i), chr(i)) for i in xrange(dict_size)) # in Python 3: dictionary = {chr(i): chr(i) for i in range(dict_size)}
w = result = compressed.pop(0) for k in compressed: if k in dictionary: entry = dictionary[k] elif k == dict_size: entry = w + w[0] else: raise ValueError('Bad compressed k: %s' % k) result += entry
# Add w+entry[0] to the dictionary. dictionary[dict_size] = w + entry[0] dict_size += 1
w = entry return result
- How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT') print (compressed) decompressed = decompress(compressed) print (decompressed)</lang>
Output:
['T', 'O', 'B', 'E', 'O', 'R', 'N', 'O', 'T', 256, 258, 260, 265, 259, 261, 263] TOBEORNOTTOBEORTOBEORNOT
Ruby
In this version the hashes contain mixed typed data: <lang ruby># Compress a string to a list of output symbols. def compress(uncompressed)
# Build the dictionary. dict_size = 256 dictionary = Hash[ Array.new(dict_size) {|i| [i.chr, i.chr]} ]
w = "" result = [] for c in uncompressed.split() wc = w + c if dictionary.has_key?(wc) w = wc else result << dictionary[w] # Add wc to the dictionary. dictionary[wc] = dict_size dict_size += 1 w = c end end
# Output the code for w. result << dictionary[w] unless w.empty? result
end
- Decompress a list of output ks to a string.
def decompress(compressed)
# Build the dictionary. dict_size = 256 dictionary = Hash[ Array.new(dict_size) {|i| [i.chr, i.chr]} ]
w = result = compressed.shift for k in compressed if dictionary.has_key?(k) entry = dictionary[k] elsif k == dict_size entry = w + w[0,1] else raise 'Bad compressed k: %s' % k end result += entry
# Add w+entry[0] to the dictionary. dictionary[dict_size] = w + entry[0,1] dict_size += 1
w = entry end result
end
- How to use:
compressed = compress('TOBEORNOTTOBEORTOBEORNOT') p compressed decompressed = decompress(compressed) puts decompressed</lang>
Output:
["T", "O", "B", "E", "O", "R", "N", "O", "T", 256, 258, 260, 265, 259, 261, 263] TOBEORNOTTOBEORTOBEORNOT
Scheme
<lang scheme>; Get the list reference number for a member or #f if not found (define (member-string-ref m l) (define r #f) (let loop ((i 0)) (if (< i (length l)) (if (not (string=? (list-ref l i) m)) (loop (+ i 1)) (set! r i)))) r)
- Compress a string with LZW
(define (lzw-compress uncompressed) (define dictionary '()) (define n 0) (define result '()) (set! uncompressed (string->list uncompressed))
; Setup Dictionary (let dict-setup ((c 0)) (if (> 256 c) (begin (set! dictionary (append dictionary (list (string (integer->char c))))) (set! n (+ n 1)) (dict-setup (+ c 1)))))
; Compress the string (let compress ((w "") (ci 0)) (define c (string (list-ref uncompressed ci))) (define wc "") (set! wc (string-append w c)) (if (member-string-ref wc dictionary) (set! w wc) (begin (set! result (append result (list (member-string-ref w dictionary)))) (set! dictionary (append dictionary (list wc))) (set! n (+ n 1)) (set! w c))) (if (eqv? ci (- (length uncompressed) 1)) (set! result (append result (list (member-string-ref w dictionary)))) (compress w (+ ci 1)))) result)
- Decompress a LZW compressed string (input should be a list of integers)
(define (lzw-decompress compressed) (define dictionary '()) (define n 0) (define result "")
; Setup Dictionary (let dict-setup ((c 0)) (if (> 256 c) (begin (set! dictionary (append dictionary (list (string (integer->char c))))) (set! n (+ n 1)) (dict-setup (+ c 1)))))
; Decompress the list (let decompress ((k (list-ref compressed 0)) (ci 0)) (define kn #f) ; Add to dictionary (if (> (length compressed) (+ ci 1)) (begin (set! kn (list-ref compressed (+ ci 1))) (if (< kn (length dictionary)) (set! dictionary (append dictionary (list (string-append (list-ref dictionary k) (string (string-ref (list-ref dictionary kn) 0)))))))))
; Build the resulting string (set! result (string-append result (list-ref dictionary k)))
(if (not (eqv? ci (- (length compressed) 1))) (decompress kn (+ ci 1)))) result)
(define compressed (lzw-compress "TOBEORNOTTOBEORTOBEORNOT")) (display compressed)(newline) (define decompressed (lzw-decompress compressed)) (display decompressed)(newline)</lang>
Output:
(84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263) TOBEORNOTTOBEORTOBEORNOT
Seed7
<lang seed7>$ include "seed7_05.s7i";
const func string: lzwCompress (in string: uncompressed) is func
result var string: result is ""; local var char: ch is ' '; var hash [string] char: mydict is (hash [string] char).value; var string: buffer is ""; var string: xstr is ""; begin for ch range chr(0) to chr(255) do mydict @:= [str(ch)] ch; end for; for ch range uncompressed do xstr := buffer & str(ch); if xstr in mydict then buffer &:= str(ch) else result &:= str(mydict[buffer]); mydict @:= [xstr] chr(length(mydict)); buffer := str(ch); end if; end for; if buffer <> "" then result &:= str(mydict[buffer]); end if; end func;
const func string: lzwDecompress (in string: compressed) is func
result var string: result is ""; local var char: ch is ' '; var hash [char] string: mydict is (hash [char] string).value; var string: buffer is ""; var string: current is ""; var string: chain is ""; begin for ch range chr(0) to chr(255) do mydict @:= [ch] str(ch); end for; for ch range compressed do if buffer = "" then buffer := mydict[ch]; result &:= buffer; elsif ch <= chr(255) then current := mydict[ch]; result &:= current; chain := buffer & current; mydict @:= [chr(length(mydict))] chain; buffer := current; else if ch in mydict then chain := mydict[ch]; else chain := buffer & str(buffer[1]); end if; result &:= chain; mydict @:= [chr(length(mydict))] buffer & str(chain[1]); buffer := chain; end if; end for; end func;
const proc: main is func
local var string: compressed is ""; var string: uncompressed is ""; begin compressed := lzwCompress("TOBEORNOTTOBEORTOBEORNOT"); writeln(literal(compressed)); uncompressed := lzwDecompress(compressed); writeln(uncompressed); end func;</lang>
Output:
"TOBEORNOT\256\\258\\260\\265\\259\\261\\263\" TOBEORNOTTOBEORTOBEORNOT
Tcl
<lang tcl>namespace eval LZW {
variable char2int variable chars for {set i 0} {$i < 256} {incr i} { set char [binary format c $i] set char2int($char) $i lappend chars $char }
}
proc LZW::encode {data} {
variable char2int array set dict [array get char2int]
set w "" set result [list]
foreach c [split $data ""] { set wc $w$c if {[info exists dict($wc)]} { set w $wc } else { lappend result $dict($w) set dict($wc) [array size dict] set w $c } } lappend result $dict($w)
}
proc LZW::decode {cdata} {
variable chars set dict $chars
set k [lindex $cdata 0] set w [lindex $dict $k] set result $w
foreach k [lrange $cdata 1 end] { set currSizeDict [llength $dict] if {$k < $currSizeDict} { set entry [lindex $dict $k] } elseif {$k == $currSizeDict} { set entry $w[string index $w 0] } else { error "invalid code ($k) in ($cdata)" } append result $entry lappend dict $w[string index $entry 0] set w $entry } return $result
}
set s TOBEORNOTTOBEORTOBEORNOT# set e [LZW::encode $s] ;# ==> 84 79 66 69 79 82 78 79 84 256 258 260 265 259 261 263 35 set d [LZW::decode $e] ;# ==> TOBEORNOTTOBEORTOBEORNOT#
- or
if {$s eq [LZW::decode [LZW::encode $s]]} then {puts success} else {puts fail} ;# ==> success