Huffman coding: Difference between revisions

m
No edit summary
m (→‎{{header|Wren}}: Minor tidy)
 
(12 intermediate revisions by 8 users not shown)
Line 33:
{{trans|Python}}
 
<langsyntaxhighlight lang="11l">T Element
Int weight
[(Char, String)] symbols
Line 70:
print("Symbol\tWeight\tHuffman Code")
L(p) huff
print("#.\t#.\t#.".format(p[0], symb2freq[p[0]], p[1]))</langsyntaxhighlight>
 
{{out}}
Line 100:
 
huffman.ads:
<langsyntaxhighlight Adalang="ada">with Ada.Containers.Indefinite_Ordered_Maps;
with Ada.Containers.Ordered_Maps;
with Ada.Finalization;
Line 182:
-- free memory after finalization
overriding procedure Finalize (Object : in out Huffman_Tree);
end Huffman;</langsyntaxhighlight>
 
huffman.adb:
<langsyntaxhighlight Adalang="ada">with Ada.Text_IO;
with Ada.Unchecked_Deallocation;
with Ada.Containers.Vectors;
Line 428:
end loop;
end Dump_Encoding;
end Huffman;</langsyntaxhighlight>
 
example main.adb:
<langsyntaxhighlight Adalang="ada">with Ada.Text_IO;
with Huffman;
procedure Main is
Line 479:
(Char_Natural_Huffman_Tree.Decode (Tree => Tree, Code => Code));
end;
end Main;</langsyntaxhighlight>
 
{{out}}
Line 507:
{{incorrect|BBC BASIC|Huffman code can not contain another code as a prefix}}
{{works with|BBC BASIC for Windows}}
<langsyntaxhighlight lang="bbcbasic"> INSTALL @lib$+"SORTSALIB"
SortUp% = FN_sortSAinit(0,0) : REM Ascending
SortDn% = FN_sortSAinit(1,0) : REM Descending
Line 559:
ENDIF
NEXT
END</langsyntaxhighlight>
{{out}}
<pre>
Line 584:
 
=={{header|Bracmat}}==
<langsyntaxhighlight lang="bracmat">( "this is an example for huffman encoding":?S
& 0:?chars
& 0:?p
Line 639:
| !arg
)
& out$("decoded:" str$(decode$(str$!encoded)));</langsyntaxhighlight>
{{out}}
<pre>(L=
Line 669:
This code lacks a lot of needed checkings, especially for memory allocation.
 
<langsyntaxhighlight lang="c">#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Line 842:
 
return 0;
}</langsyntaxhighlight>
===Alternative===
Using a simple heap-based priority queue. Heap is an array, while ndoe tree is done by binary links.
<langsyntaxhighlight lang="c">#include <stdio.h>
#include <string.h>
 
Line 967:
 
return 0;
}</langsyntaxhighlight>
{{out}}
<pre>' ': 000
Line 992:
 
=={{header|C sharp}}==
<langsyntaxhighlight lang="csharp">using System;
using System.Collections.Generic;
 
Line 1,305:
}
}
}</langsyntaxhighlight>
[[File:CSharpHuffman.jpg]]
 
Line 1,312:
This code builds a tree to generate huffman codes, then prints the codes.
 
<langsyntaxhighlight lang="cpp">#include <iostream>
#include <queue>
#include <map>
Line 1,426:
}
return 0;
}</langsyntaxhighlight>
 
{{out}}
Line 1,451:
=={{header|Clojure}}==
(Updated to 1.6 & includes pretty-printing). Uses Java PriorityQueue
<langsyntaxhighlight lang="clojure">(require '[clojure.pprint :refer :all])
 
(defn probs [s]
Line 1,484:
(->> s huffman-encode (sort-by :weight >) print-table))
 
(display-huffman-encode "this is an example for huffman encoding")</langsyntaxhighlight>
 
{{out}}
Line 1,510:
===Alternate Version===
Uses c.d.priority-map. Creates a more shallow tree but appears to meet the requirements.
<langsyntaxhighlight lang="clojure">(require '[clojure.data.priority-map :refer [priority-map-keyfn-by]])
(require '[clojure.pprint :refer [print-table]])
 
Line 1,547:
(->> s huffman-encode (sort-by :weight >) print-table))
 
(display-huffman-encode "this is an example for huffman encoding")</langsyntaxhighlight>
{{out}}
<pre>| :sym | :weight | :code |
Line 1,572:
 
=={{header|CoffeeScript}}==
<langsyntaxhighlight lang="coffeescript">
huffman_encoding_table = (counts) ->
# counts is a hash where keys are characters and
Line 1,658:
console.log "#{rpad(code, 5)}: #{c} (#{counts[c]})"
console.log()
</langsyntaxhighlight>
 
{{out}}
Line 1,705:
(For a more efficient implementation of a priority queue, see the [[Heapsort]] task.)
 
<langsyntaxhighlight lang="lisp">(defstruct huffman-node
(weight 0 :type number)
(element nil :type t)
Line 1,763:
(huffman-node-element node)
(huffman-node-weight node)
(huffman-node-encoding node))))</langsyntaxhighlight>
 
Example:
Line 1,791:
 
=={{header|D}}==
<langsyntaxhighlight lang="d">import std.stdio, std.algorithm, std.typecons, std.container, std.array;
 
auto encode(alias eq, R)(Group!(eq, R) sf) /*pure nothrow @safe*/ {
Line 1,811:
foreach (const p; s.dup.sort().group.encode)
writefln("'%s' %s", p[]);
}</langsyntaxhighlight>
{{out}}
<pre>' ' 101
Line 1,835:
=={{header|Eiffel}}==
Adapted C# solution.
<langsyntaxhighlight lang="eiffel">
class HUFFMAN_NODE[T -> COMPARABLE]
inherit
Line 2,024:
end
end
</syntaxhighlight>
</lang>
 
{{out}}
Line 2,050:
=={{header|Erlang}}==
The main part of the code used here is extracted from [https://gist.github.com/rmies/2828351 Michel Rijnders' GitHubGist]. See also [http://codingpraxis.com/erlang/2012/10/23/huffman-coding-in-erlang.html his blog], for a complete description of the original module.
<langsyntaxhighlight lang="erlang">-module(huffman).
-export([encode/1, decode/2, main/0]).
Line 2,112:
print_bits(<<Bit:1, Rest/bitstring>>) ->
io:format("~w", [Bit]),
print_bits(Rest).</langsyntaxhighlight>
{{out}}
<pre> : 111
Line 2,138:
=={{header|F_Sharp|F#}}==
{{Trans|OCaml}}
<langsyntaxhighlight lang="fsharp">type 'a HuffmanTree =
| Leaf of int * 'a
| Node of int * 'a HuffmanTree * 'a HuffmanTree
Line 2,172:
let tree = charFreqs |> Map.toList |> buildTree
printfn "Symbol\tWeight\tHuffman code";
printTree ([], tree)</langsyntaxhighlight>
{{out}}
<pre>Symbol Weight Huffman code
Line 2,196:
 
=={{header|Factor}}==
<langsyntaxhighlight lang="factor">
USING: kernel sequences combinators accessors assocs math hashtables math.order
sorting.slots classes formatting prettyprint ;
Line 2,329:
! u 1 { 1 0 0 1 1 }
! x 1 { 1 0 0 1 0 }
</syntaxhighlight>
</lang>
 
=={{header|Fantom}}==
 
<langsyntaxhighlight lang="fantom">
class Node
{
Line 2,431:
}
}
</syntaxhighlight>
</lang>
 
{{out}}
Line 2,460:
=={{header|Fortran}}==
 
<langsyntaxhighlight lang="fortran">! output:
! d-> 00000, t-> 00001, h-> 0001, s-> 0010,
! c-> 00110, x-> 00111, m-> 0100, o-> 0101,
Line 2,608:
print *
end program
</syntaxhighlight>
</lang>
 
=={{header|FreeBASIC}}==
<langsyntaxhighlight lang="freebasic">type block
freq as uinteger
chars as string
Line 2,696:
print "'"+codelist(i).char+"'", codelist(i).code
next i
</syntaxhighlight>
</lang>
{{out}}
<pre>' ' 111
Line 2,720:
=={{header|Go}}==
{{trans|Java}}
<langsyntaxhighlight lang="go">package main
 
import (
Line 2,816:
fmt.Println("SYMBOL\tWEIGHT\tHUFFMAN CODE")
printCodes(tree, []byte{})
}</langsyntaxhighlight>
{{out}}
<pre>
Line 2,841:
</pre>
{{trans|Python}}
<langsyntaxhighlight lang="go">package main
 
import (
Line 2,905:
fmt.Printf(" %c %d %s\n", c.sym, sym2freq[c.sym], c.code)
}
}</langsyntaxhighlight>
 
=={{header|Groovy}}==
Line 2,911:
Implemented and tested with Groovy 2.3.
 
<langsyntaxhighlight lang="groovy">
import groovy.transform.*
 
Line 2,962:
: decoded
}
</syntaxhighlight>
</lang>
Usage:
<langsyntaxhighlight lang="groovy">
def message = "this is an example for huffman encoding"
 
Line 2,975:
def decoded = decode(encoded, codeTable)
println decoded
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 3,004:
=={{header|Haskell}}==
Credits go to [http://www.haskell.org/haskellwiki/99_questions/46_to_50#Problem_50 huffman] where you'll also find a non-tree solution. Uses sorted list as a priority queue.
<langsyntaxhighlight lang="haskell">import Data.List (group, insertBy, sort, sortBy)
import Control.Arrow ((&&&), second)
import Data.Ord (comparing)
Line 3,043:
 
main :: IO ()
main = test "this is an example for huffman encoding"</langsyntaxhighlight>
{{out}}
<langsyntaxhighlight lang="haskell">'p' : 00000
'r' : 00001
'g' : 00010
Line 3,063:
'a' : 1100
'e' : 1101
' ' : 111</langsyntaxhighlight>
 
===Using <code>Set</code> as a priority queue===
(might be worth it for bigger alphabets):
<langsyntaxhighlight lang="haskell">import qualified Data.Set as S
 
htree :: (Ord t, Num t, Ord a) => S.Set (t, HTree a) -> HTree a
Line 3,078:
huffmanTree :: (Ord w, Num w, Ord a) => [(w, a)] -> HTree a
huffmanTree = htree . S.fromList . map (second Leaf)</langsyntaxhighlight>
 
===A non-tree version===
This produces the output required without building the Huffman tree at all,
by building all the trace strings directly while reducing the histogram:
<langsyntaxhighlight lang="haskell">import Data.List (sortBy, insertBy, sort, group)
import Control.Arrow (second, (&&&))
import Data.Ord (comparing)
Line 3,099:
 
main = do
test "this is an example for huffman encoding"</langsyntaxhighlight>
 
=={{header|Icon}} and {{header|Unicon}}==
<langsyntaxhighlight Iconlang="icon">record huffnode(l,r,n,c) # internal and leaf nodes
record huffcode(c,n,b,i) # encoding table char, freq, bitstring, bits (int)
 
Line 3,161:
}
return T
end</langsyntaxhighlight>
 
{{out}}
Line 3,190:
and implements the algorithm given in the problem description.
The program produces Huffman codes based on each line of input.
<langsyntaxhighlight Uniconlang="unicon">import Collections
 
procedure main(A)
Line 3,219:
else codeMap[node[1]] := prefix
return codeMap
end</langsyntaxhighlight>
 
A sample run:
Line 3,265:
'''Solution''' (drawn from [[J:Essays/Huffman_Coding|the J wiki]]):
 
<langsyntaxhighlight lang="j">hc=: 4 : 0
if. 1=#x do. y
else. ((i{x),+/j{x) hc (i{y),<j{y [ i=. (i.#x) -. j=. 2{./:x end.
Line 3,278:
t=. 0 {:: x hc w NB. minimal weight binary tree
((< S: 0 t) i. w) { <@(1&=)@; S: 1 {:: t
)</langsyntaxhighlight>
 
'''Example''':<langsyntaxhighlight lang="j"> ;"1":L:0(#/.~ (],.(<' '),.hcodes) ,&.>@~.)'this is an example for huffman encoding'
t 0 1 0 1 0
h 1 1 1 1 1
Line 3,299:
c 1 0 0 0 0
d 1 0 0 0 1
g 1 1 1 1 0</langsyntaxhighlight>
 
=={{header|Java}}==
This implementation creates an actual tree structure, and then traverses the tree to recover the code.
<langsyntaxhighlight lang="java">import java.util.*;
 
abstract class HuffmanTree implements Comparable<HuffmanTree> {
Line 3,397:
printCodes(tree, new StringBuffer());
}
}</langsyntaxhighlight>
 
{{out}}
Line 3,429:
 
The Huffman encoder
<langsyntaxhighlight lang="javascript">function HuffmanEncoding(str) {
this.str = str;
 
Line 3,491:
}
return decoded;
}</langsyntaxhighlight>
 
And, using the Huffman encoder
<langsyntaxhighlight lang="javascript">var s = "this is an example for huffman encoding";
print(s);
 
Line 3,506:
print(t);
 
print("is decoded string same as original? " + (s==t));</langsyntaxhighlight>
 
{{out}}
Line 3,532:
this is an example for huffman encoding
is decoded string same as original? true</pre>
 
{{trans|C}}
<syntaxhighlight lang = "javascript">
class node{
constructor(freq, char, left, right){
this.left = left;
this.right = right;
this.freq = freq;
this.c = char;
}
};
 
nodes = [];
code = {};
 
function new_node(left, right){
return new node(left.freq + right.freq, -1, left, right);;
};
 
function qinsert(node){
nodes.push(node);
nodes.sort(compareFunction);
};
 
function qremove(){
return nodes.pop();
};
 
function compareFunction(a, b){
return b.freq - a.freq;
};
 
function build_code(node, codeString, length){
if (node.c != -1){
code[node.c] = codeString;
return;
};
/* Left Branch */
leftCodeString = codeString + "0";
build_code(node.left, leftCodeString, length + 1);
/* Right Branch */
rightCodeString = codeString + "1";
build_code(node.right, rightCodeString, length + 1);
};
 
function init(string){
var i;
var freq = [];
var codeString = "";
for (var i = 0; i < string.length; i++){
if (isNaN(freq[string.charCodeAt(i)])){
freq[string.charCodeAt(i)] = 1;
} else {
freq[string.charCodeAt(i)] ++;
};
};
for (var i = 0; i < freq.length; i++){
if (freq[i] > 0){
qinsert(new node(freq[i], i, null, null));
};
};
while (nodes.length > 1){
qinsert(new_node(qremove(), qremove()));
};
build_code(nodes[0], codeString, 0);
};
 
function encode(string){
output = "";
for (var i = 0; i < string.length; i ++){
output += code[string.charCodeAt(i)];
};
return output;
};
 
function decode(input){
output = "";
node = nodes[0];
for (var i = 0; i < input.length; i++){
if (input[i] == "0"){
node = node.left;
} else {
node = node.right;
};
if (node.c != -1){
output += String.fromCharCode(node.c);
node = nodes[0];
};
};
return output
};
 
 
string = "this is an example of huffman encoding";
console.log("initial string: " + string);
init(string);
for (var i = 0; i < Object.keys(code).length; i++){
if (isNaN(code[Object.keys(code)[i]])){
} else {
console.log("'" + String.fromCharCode(Object.keys(code)[i]) + "'" + ": " + code[Object.keys(code)[i]]);
};
};
 
huffman = encode(string);
console.log("encoded: " + huffman + "\n");
output = decode(huffman);
console.log("decoded: " + output);
</syntaxhighlight>
 
<pre style='width: full; overflow: scroll'>
initial string: this is an example of huffman encoding
' ': 111
'a': 1011
'c': 00101
'd': 00100
'e': 1010
'f': 1101
'g': 00111
'h': 0101
'i': 1100
'l': 00110
'm': 0100
'n': 100
'o': 0111
'p': 00001
's': 0110
't': 00000
'u': 00011
'x': 00010
encoded: 000000101110001101111100011011110111001111010000101011010000001001101010111011111011110101000111101110101001011100111101010000101011100100110010000111
decoded: this is an example of huffman encoding
</pre>
 
=={{header|Julia}}==
<langsyntaxhighlight lang="julia">
abstract type HuffmanTree end
 
Line 3,588 ⟶ 3,729:
 
printencoding(huffmantree(makefreqdict(msg)), "")
</langsyntaxhighlight>{{output}}<pre>
Char Freq Huffman code
p 1 00000
Line 3,614 ⟶ 3,755:
{{trans|Java}}
This implementation creates an actual tree structure, and then traverses the tree to recover the code.
<langsyntaxhighlight lang="kotlin">import java.util.*
 
abstract class HuffmanTree(var freq: Int) : Comparable<HuffmanTree> {
Line 3,667 ⟶ 3,808:
println("SYMBOL\tWEIGHT\tHUFFMAN CODE")
printCodes(tree, StringBuffer())
}</langsyntaxhighlight>
 
{{out}}
Line 3,692 ⟶ 3,833:
 
=={{header|Lua}}==
{{trans|Lua}}
This implementation proceeds in three steps: determine word frequencies,
construct the Huffman tree, and finally fold the tree into the codes
while outputting them.
<langsyntaxhighlight lang="lua">local build_freqtable = function (data)
local freq = { }
 
Line 3,770 ⟶ 3,910:
return huffcode "this is an example for huffman encoding"
 
</syntaxhighlight>
</lang>
<pre>
frequency word huffman code
Line 3,795 ⟶ 3,935:
 
=={{header|M2000 Interpreter}}==
<syntaxhighlight lang="m2000 interpreter">
<lang M2000 Interpreter>
Module Huffman {
comp=lambda (a, b) ->{
Line 3,880 ⟶ 4,020:
}
Huffman
</syntaxhighlight>
</lang>
 
{{out}}
Line 3,912 ⟶ 4,052:
=={{header|Mathematica}} / {{header|Wolfram Language}}==
 
<langsyntaxhighlight lang="mathematica">huffman[s_String] := huffman[Characters[s]];
huffman[l_List] := Module[{merge, structure, rules},
Line 3,924 ⟶ 4,064:
rules = (# -> Flatten[Position[structure, #] - 1]) & /@ DeleteDuplicates[l];
 
{Flatten[l /. rules], rules}];</langsyntaxhighlight>
 
=={{header|Nim}}==
 
<langsyntaxhighlight lang="nim">import tables, sequtils
 
type
Line 4,023 ⟶ 4,163:
 
for (key, value) in sortedByIt(toSeq(huffCodes.pairs), it[1]):
echo &"'{key}' → {value}"</langsyntaxhighlight>
 
{{out}}
Line 4,048 ⟶ 4,188:
=={{header|Oberon-2}}==
{{works with|oo2c}}
<langsyntaxhighlight lang="oberon2">
MODULE HuffmanEncoding;
IMPORT
Line 4,140 ⟶ 4,280:
 
END HuffmanEncoding.
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 4,168 ⟶ 4,308:
 
This is not purely Objective-C. It uses Apple's Core Foundation library for its binary heap, which admittedly is very ugly. Thus, this only builds on Mac OS X, not GNUstep.
<langsyntaxhighlight lang="objc">#import <Foundation/Foundation.h>
 
 
Line 4,317 ⟶ 4,457:
}
return 0;
}</langsyntaxhighlight>
 
{{out}}
Line 4,348 ⟶ 4,488:
We use a Set (which is automatically sorted) as a priority queue.
{{works with|OCaml|4.02+}}
<langsyntaxhighlight lang="ocaml">type 'a huffman_tree =
| Leaf of 'a
| Node of 'a huffman_tree * 'a huffman_tree
Line 4,397 ⟶ 4,537:
let tree = build_tree charFreqs in
print_string "Symbol\tHuffman code\n";
print_tree [] tree</langsyntaxhighlight>
 
=={{header|Ol}}==
<langsyntaxhighlight lang="scheme">
(define phrase "this is an example for huffman encoding")
 
Line 4,439 ⟶ 4,579:
(return (reverse prefix))))))))
(map car table)))
</syntaxhighlight>
</lang>
{{Out}}
<langsyntaxhighlight lang="scheme">
(print "weights: ---------------------------")
(for-each (lambda (ch)
Line 4,452 ⟶ 4,592:
(reverse (map car table))
(reverse codes))
</syntaxhighlight>
</lang>
<pre>
weights: ---------------------------
Line 4,497 ⟶ 4,637:
 
=={{header|Perl}}==
<langsyntaxhighlight lang="perl">use 5.10.0;
use strict;
 
Line 4,553 ⟶ 4,693:
print "$enc\n";
 
print decode($enc, $rev_h), "\n";</langsyntaxhighlight>
{{out}}
<pre>
Line 4,581 ⟶ 4,721:
=={{header|Phix}}==
{{trans|Lua}}
<!--<syntaxhighlight lang="phix">(phixonline)-->
<lang Phix>function store_nodes(object key, object data, integer nodes)
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
setd({data,key},0,nodes)
<span style="color: #008080;">function</span> <span style="color: #000000;">store_nodes</span><span style="color: #0000FF;">(</span><span style="color: #004080;">object</span> <span style="color: #000000;">key</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">object</span> <span style="color: #000000;">data</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
return 1
<span style="color: #7060A8;">setd</span><span style="color: #0000FF;">({</span><span style="color: #000000;">data</span><span style="color: #0000FF;">,</span><span style="color: #000000;">key</span><span style="color: #0000FF;">},</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
end function
<span style="color: #008080;">return</span> <span style="color: #000000;">1</span>
constant r_store_nodes = routine_id("store_nodes")
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
 
function build_freqtable(string data)
<span style="color: #008080;">function</span> <span style="color: #000000;">build_freqtable</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">data</span><span style="color: #0000FF;">)</span>
integer freq = new_dict(),
<span style="color: #004080;">integer</span> <span style="color: #000000;">freq</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">new_dict</span><span style="color: #0000FF;">(),</span>
nodes = new_dict()
<span style="color: #000000;">nodes</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">new_dict</span><span style="color: #0000FF;">()</span>
for i=1 to length(data) do
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
integer di = data[i]
<span style="color: #004080;">integer</span> <span style="color: #000000;">di</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
setd(di,getd(di,freq)+1,freq)
<span style="color: #7060A8;">setd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">di</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">getd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">di</span><span style="color: #0000FF;">,</span><span style="color: #000000;">freq</span><span style="color: #0000FF;">)+</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #000000;">freq</span><span style="color: #0000FF;">)</span>
end for
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
traverse_dict(r_store_nodes, nodes, freq)
<span style="color: #7060A8;">traverse_dict</span><span style="color: #0000FF;">(</span><span style="color: #000000;">store_nodes</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">nodes</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">freq</span><span style="color: #0000FF;">)</span>
destroy_dict(freq)
<span style="color: #7060A8;">destroy_dict</span><span style="color: #0000FF;">(</span><span style="color: #000000;">freq</span><span style="color: #0000FF;">)</span>
return nodes
<span style="color: #008080;">return</span> <span style="color: #000000;">nodes</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
function build_hufftree(integer nodes)
<span style="color: #008080;">function</span> <span style="color: #000000;">build_hufftree</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
sequence lkey, rkey, node
<span style="color: #004080;">sequence</span> <span style="color: #000000;">node</span>
integer lfreq, rfreq
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
while true do
<span style="color: #004080;">sequence</span> <span style="color: #000000;">lkey</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">getd_partial_key</span><span style="color: #0000FF;">({</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">},</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
lkey = getd_partial_key({0,0},nodes)
<span style="color: #004080;">integer</span> <span style="color: #000000;">lfreq</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">lkey</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
lfreq = lkey[1]
<span style="color: #7060A8;">deld</span><span style="color: #0000FF;">(</span><span style="color: #000000;">lkey</span><span style="color: #0000FF;">,</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
deld(lkey,nodes)
<span style="color: #004080;">sequence</span> <span style="color: #000000;">rkey</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">getd_partial_key</span><span style="color: #0000FF;">({</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">},</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
rkey = getd_partial_key({0,0},nodes)
<span style="color: #004080;">integer</span> <span style="color: #000000;">rfreq</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">rkey</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">]</span>
rfreq = rkey[1]
<span style="color: #7060A8;">deld</span><span style="color: #0000FF;">(</span><span style="color: #000000;">rkey</span><span style="color: #0000FF;">,</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
deld(rkey,nodes)
<span style="color: #000000;">node</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">lfreq</span><span style="color: #0000FF;">+</span><span style="color: #000000;">rfreq</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">lkey</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rkey</span><span style="color: #0000FF;">}}</span>
node = {lfreq+rfreq,{lkey,rkey}}
 
<span style="color: #008080;">if</span> <span style="color: #7060A8;">dict_size</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
if dict_size(nodes)=0 then exit end if
<span style="color: #7060A8;">setd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">node</span><span style="color: #0000FF;">,</span><span style="color: #000000;">0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
setd(node,0,nodes)
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
end while
<span style="color: #7060A8;">destroy_dict</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
destroy_dict(nodes)
<span style="color: #008080;">return</span> <span style="color: #000000;">node</span>
return node
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end function
<span style="color: #008080;">procedure</span> <span style="color: #000000;">build_huffcodes</span><span style="color: #0000FF;">(</span><span style="color: #004080;">object</span> <span style="color: #000000;">node</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">string</span> <span style="color: #000000;">bits</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
procedure build_huffcodes(object node, string bits, integer d)
<span style="color: #0000FF;">{</span><span style="color: #004080;">integer</span> <span style="color: #000000;">freq</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">object</span> <span style="color: #000000;">data</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">node</span>
{integer freq, object data} = node
<span style="color: #008080;">if</span> <span style="color: #004080;">sequence</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">then</span>
if sequence(data) then
<span style="color: #000000;">build_huffcodes</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span><span style="color: #000000;">bits</span><span style="color: #0000FF;">&</span><span style="color: #008000;">'0'</span><span style="color: #0000FF;">,</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
build_huffcodes(data[1],bits&'0',d)
<span style="color: #000000;">build_huffcodes</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span><span style="color: #000000;">bits</span><span style="color: #0000FF;">&</span><span style="color: #008000;">'1'</span><span style="color: #0000FF;">,</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
build_huffcodes(data[2],bits&'1',d)
<span style="color: #008080;">else</span>
else
<span style="color: #7060A8;">setd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">freq</span><span style="color: #0000FF;">,</span><span style="color: #000000;">bits</span><span style="color: #0000FF;">},</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
setd(data,{freq,bits},d)
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
end if
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
end procedure
 
<span style="color: #008080;">function</span> <span style="color: #000000;">print_huffcode</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">key</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">data</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000080;font-style:italic;">/*user_data*/</span><span style="color: #0000FF;">)</span>
function print_huffcode(integer key, sequence data, integer /*user_data*/)
<span style="color: #0000FF;">{</span><span style="color: #004080;">integer</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">string</span> <span style="color: #000000;">s</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">data</span>
printf(1,"'%c' [%d] %s\n",key&data)
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"'%c' [%d] %s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">key</span><span style="color: #0000FF;">,</span><span style="color: #000000;">i</span><span style="color: #0000FF;">,</span><span style="color: #000000;">s</span><span style="color: #0000FF;">})</span>
return 1
<span style="color: #008080;">return</span> <span style="color: #000000;">1</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
constant r_print_huffcode = routine_id("print_huffcode")
 
<span style="color: #008080;">procedure</span> <span style="color: #000000;">print_huffcodes</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
procedure print_huffcodes(integer d)
<span style="color: #7060A8;">traverse_dict</span><span style="color: #0000FF;">(</span><span style="color: #000000;">print_huffcode</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">0</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
traverse_dict(r_print_huffcode, 0, d)
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
end procedure
 
<span style="color: #008080;">function</span> <span style="color: #000000;">invert_huffcode</span><span style="color: #0000FF;">(</span><span style="color: #004080;">integer</span> <span style="color: #000000;">key</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">data</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">integer</span> <span style="color: #000000;">rd</span><span style="color: #0000FF;">)</span>
function invert_huffcode(integer key, sequence data, integer rd)
<span style="color: #7060A8;">setd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span><span style="color: #000000;">key</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rd</span><span style="color: #0000FF;">)</span>
setd(data[2],key,rd)
<span style="color: #008080;">return</span> <span style="color: #000000;">1</span>
return 1
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
end function
constant r_invert_huffcode = routine_id("invert_huffcode")
<span style="color: #008080;">procedure</span> <span style="color: #000000;">main</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">data</span><span style="color: #0000FF;">)</span>
 
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">)<</span><span style="color: #000000;">2</span> <span style="color: #008080;">then</span> <span style="color: #0000FF;">?</span><span style="color: #000000;">9</span><span style="color: #0000FF;">/</span><span style="color: #000000;">0</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
procedure main(string data)
<span style="color: #004080;">integer</span> <span style="color: #000000;">nodes</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">build_freqtable</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">)</span>
if length(data)<2 then ?9/0 end if
<span style="color: #004080;">sequence</span> <span style="color: #000000;">huff</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">build_hufftree</span><span style="color: #0000FF;">(</span><span style="color: #000000;">nodes</span><span style="color: #0000FF;">)</span>
integer nodes = build_freqtable(data)
<span style="color: #004080;">integer</span> <span style="color: #000000;">d</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">new_dict</span><span style="color: #0000FF;">()</span>
sequence huff = build_hufftree(nodes)
<span style="color: #000000;">build_huffcodes</span><span style="color: #0000FF;">(</span><span style="color: #000000;">huff</span><span style="color: #0000FF;">,</span> <span style="color: #008000;">""</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
integer d = new_dict()
<span style="color: #000000;">print_huffcodes</span><span style="color: #0000FF;">(</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
build_huffcodes(huff, "", d)
print_huffcodes(d)
<span style="color: #004080;">string</span> <span style="color: #000000;">encoded</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
 
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
string encoded = ""
<span style="color: #000000;">encoded</span> <span style="color: #0000FF;">&=</span> <span style="color: #7060A8;">getd</span><span style="color: #0000FF;">(</span><span style="color: #000000;">data</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">d</span><span style="color: #0000FF;">)[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">]</span>
for i=1 to length(data) do
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
encoded &= getd(data[i],d)[2]
<span style="color: #0000FF;">?</span><span style="color: #7060A8;">shorten</span><span style="color: #0000FF;">(</span><span style="color: #000000;">encoded</span><span style="color: #0000FF;">)</span>
end for
?encoded
<span style="color: #004080;">integer</span> <span style="color: #000000;">rd</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">new_dict</span><span style="color: #0000FF;">()</span>
<span style="color: #7060A8;">traverse_dict</span><span style="color: #0000FF;">(</span><span style="color: #000000;">invert_huffcode</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">rd</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">d</span><span style="color: #0000FF;">)</span>
integer rd = new_dict()
<span style="color: #004080;">string</span> <span style="color: #000000;">decoded</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
traverse_dict(r_invert_huffcode, rd, d)
<span style="color: #004080;">integer</span> <span style="color: #000000;">done</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
string decoded = ""
<span style="color: #008080;">while</span> <span style="color: #000000;">done</span><span style="color: #0000FF;"><</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">encoded</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
integer done = 0
<span style="color: #004080;">string</span> <span style="color: #000000;">key</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">""</span>
while done<length(encoded) do
<span style="color: #004080;">integer</span> <span style="color: #000000;">node</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
string key = ""
<span style="color: #008080;">while</span> <span style="color: #000000;">node</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">do</span>
integer node = 0
<span style="color: #000000;">done</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
while node=0 do
<span style="color: #000000;">key</span> <span style="color: #0000FF;">&=</span> <span style="color: #000000;">encoded</span><span style="color: #0000FF;">[</span><span style="color: #000000;">done</span><span style="color: #0000FF;">]</span>
done += 1
<span style="color: #000000;">node</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">getd_index</span><span style="color: #0000FF;">(</span><span style="color: #000000;">key</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">rd</span><span style="color: #0000FF;">)</span>
key &= encoded[done]
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
node = getd_index(key, rd)
<span style="color: #000000;">decoded</span> <span style="color: #0000FF;">&=</span> <span style="color: #7060A8;">getd_by_index</span><span style="color: #0000FF;">(</span><span style="color: #000000;">node</span><span style="color: #0000FF;">,</span><span style="color: #000000;">rd</span><span style="color: #0000FF;">)</span>
end while
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
decoded &= getd_by_index(node,rd)
<span style="color: #0000FF;">?</span><span style="color: #000000;">decoded</span>
end while
?decoded
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
 
end procedure
<span style="color: #000000;">main</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"this is an example for huffman encoding"</span><span style="color: #0000FF;">)</span>
<!--</syntaxhighlight>-->
main("this is an example for huffman encoding")</lang>
{{out}}
<pre>
Line 4,700 ⟶ 4,840:
'u' [1] 10001
'x' [1] 11110
"10000111111110010010...01101011111000001100 (157 digits)"
"1000011111111001001011110010010110010001011100111101001001001110011011100101110100110111110111111100011101110100101001000101110000001010001101011111000001100"
"this is an example for huffman encoding"
</pre>
Line 4,708 ⟶ 4,848:
{{trans|Python}} (not exactly)
 
<langsyntaxhighlight lang="php"><?php
function encode($symb2freq) {
$heap = new SplPriorityQueue;
Line 4,735 ⟶ 4,875:
foreach ($huff as $sym => $code)
echo "$sym\t$symb2freq[$sym]\t$code\n";
?></langsyntaxhighlight>
 
{{out}}
Line 4,760 ⟶ 4,900:
e 3 1111
</pre>
 
=={{header|Picat}}==
{{trans|Prolog}}
<syntaxhighlight lang="picat">go =>
huffman("this is an example for huffman encoding").
 
huffman(LA) :-
LS=sort(LA),
packList(LS,PL),
PLS=sort(PL).remove_dups(),
build_tree(PLS, A),
coding(A, [], C),
SC=sort(C).remove_dups(),
println("Symbol\tWeight\tCode"),
foreach(SS in SC) print_code(SS) end.
build_tree([[V1|R1], [V2|R2]|T], AF) :-
V = V1 + V2,
A = [V, [V1|R1], [V2|R2]],
( T=[] -> AF=A ; NT=sort([A|T]), build_tree(NT, AF) ).
coding([_A,FG,FD], Code, CF) :-
( is_node(FG) -> coding(FG, [0 | Code], C1)
; leaf_coding(FG, [0 | Code], C1) ),
( is_node(FD) -> coding(FD, [1 | Code], C2)
; leaf_coding(FD, [1 | Code], C2) ),
append(C1, C2, CF).
leaf_coding([FG,FD], Code, CF) :-
CodeR = reverse(Code),
CF = [[FG, FD, CodeR]] .
is_node([_V, _FG, _FD]).
print_code([N, Car, Code]) :-
printf("%w:\t%w\t", Car, N),
foreach(V in Code) print(V) end,
nl.
 
packList([], []).
packList([X],[[1,X]]).
packList([X|Rest], XRunPacked) :-
XRunPacked = [XRun|Packed],
run(X, Rest, XRun, RRest),
packList(RRest, Packed).
run(V, [], VV, []) :- VV=[1,V].
run(V, [V|LRest], [N1,V], RRest) :-
run(V, LRest, [N, V], RRest),
N1 = N + 1.
run(V, [Other|RRest], [1,V], [Other|RRest]) :-
different_terms(V, Other).</syntaxhighlight>
 
{{out}}
<pre>Symbol Weight Code
c: 1 01010
d: 1 01011
g: 1 01100
l: 1 01101
p: 1 01110
r: 1 01111
t: 1 10000
u: 1 10001
x: 1 11110
h: 2 11111
m: 2 0010
o: 2 0011
s: 2 0100
a: 3 1001
e: 3 1100
f: 3 1101
i: 3 1110
n: 4 000
: 6 101</pre>
 
=={{header|PicoLisp}}==
Using a cons cells (freq . char) for leaves, and two cells (freq left . right)
for nodes.
<langsyntaxhighlight PicoLisplang="picolisp">(de prio (Idx)
(while (cadr Idx) (setq Idx @))
(car Idx) )
Line 4,781 ⟶ 4,995:
(prinl (cdr P) " " L)
(recurse (cadr P) (cons 0 L))
(recurse (cddr P) (cons 1 L)) ) ) )</langsyntaxhighlight>
{{out}}
<pre>n 000
Line 4,804 ⟶ 5,018:
 
=={{header|PL/I}}==
<langsyntaxhighlight lang="pli">*process source attributes xref or(!);
hencode: Proc Options(main);
/*--------------------------------------------------------------------
Line 5,061 ⟶ 5,275:
End;
 
End;</langsyntaxhighlight>
{{out}}
<pre>The list of nodes:
Line 5,131 ⟶ 5,345:
=={{header|PowerShell}}==
{{works with|PowerShell|2}}
<syntaxhighlight lang="powershell">
<lang PowerShell>
function Get-HuffmanEncodingTable ( $String )
{
Line 5,193 ⟶ 5,407:
}
$EncodedString
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 5,224 ⟶ 5,438:
=={{header|Prolog}}==
Works with SWI-Prolog
<langsyntaxhighlight Prologlang="prolog">huffman :-
L = 'this is an example for huffman encoding',
atom_chars(L, LA),
Line 5,270 ⟶ 5,484:
N1 is N + 1.
run(V, [Other|RRest], [1,V], [Other|RRest]):-
dif(V, Other).</langsyntaxhighlight>
{{out}}
<pre> ?- huffman.
Line 5,297 ⟶ 5,511:
=={{header|PureBasic}}==
{{works with|PureBasic|4.50}}
<syntaxhighlight lang="purebasic">
<lang PureBasic>
OpenConsole()
Line 5,381 ⟶ 5,595:
CloseConsole()
</syntaxhighlight>
</lang>
 
{{out}}
Line 5,409 ⟶ 5,623:
The output is sorted first on length of the code, then on the symbols.
 
<langsyntaxhighlight lang="python">from heapq import heappush, heappop, heapify
from collections import defaultdict
 
Line 5,435 ⟶ 5,649:
print "Symbol\tWeight\tHuffman Code"
for p in huff:
print "%s\t%s\t%s" % (p[0], symb2freq[p[0]], p[1])</langsyntaxhighlight>
 
{{out}}
Line 5,460 ⟶ 5,674:
 
An extension to the method outlined above is given [http://paddy3118.blogspot.com/2009/04/abuse-of-pythons-in-built-data.html here].
 
===Alternative===
 
This implementation creates an explicit tree structure, which is used during decoding. We also make use of a "pseudo end of file" symbol and padding bits to facilitate reading and writing encoded data to from/to a file.
 
<syntaxhighlight lang="python">"""Huffman encoding and decoding. Requires Python >= 3.7."""
from __future__ import annotations
 
from collections import Counter
 
from heapq import heapify
from heapq import heappush
from heapq import heappop
 
from itertools import chain
from itertools import islice
 
from typing import BinaryIO
from typing import Dict
from typing import Iterable
from typing import Optional
from typing import Tuple
 
 
LEFT_BIT = "0"
RIGHT_BIT = "1"
WORD_SIZE = 8 # Assumed to be a multiple of 8.
READ_SIZE = WORD_SIZE // 8
P_EOF = 1 << WORD_SIZE
 
 
class Node:
"""Huffman tree node."""
 
def __init__(
self,
weight: int,
symbol: Optional[int] = None,
left: Optional[Node] = None,
right: Optional[Node] = None,
):
self.weight = weight
self.symbol = symbol
self.left = left
self.right = right
 
def is_leaf(self) -> bool:
"""Return `True` if this node is a leaf node, or `False` otherwise."""
return self.left is None and self.right is None
 
def __lt__(self, other: Node) -> bool:
return self.weight < other.weight
 
 
def huffman_tree(weights: Dict[int, int]) -> Node:
"""Build a prefix tree from a map of symbol frequencies."""
heap = [Node(v, k) for k, v in weights.items()]
heapify(heap)
 
# Pseudo end-of-file with a weight of 1.
heappush(heap, Node(1, P_EOF))
 
while len(heap) > 1:
left, right = heappop(heap), heappop(heap)
node = Node(weight=left.weight + right.weight, left=left, right=right)
heappush(heap, node)
 
return heappop(heap)
 
 
def huffman_table(tree: Node) -> Dict[int, str]:
"""Build a table of prefix codes by visiting every leaf node in `tree`."""
codes: Dict[int, str] = {}
 
def walk(node: Optional[Node], code: str = ""):
if node is None:
return
 
if node.is_leaf():
assert node.symbol
codes[node.symbol] = code
return
 
walk(node.left, code + LEFT_BIT)
walk(node.right, code + RIGHT_BIT)
 
walk(tree)
return codes
 
 
def huffman_encode(data: bytes) -> Tuple[Iterable[bytes], Node]:
"""Encode the given byte string using Huffman coding.
 
Returns the encoded byte stream and the Huffman tree required to
decode those bytes.
"""
weights = Counter(data)
tree = huffman_tree(weights)
table = huffman_table(tree)
return _encode(data, table), tree
 
 
def huffman_decode(data: Iterable[bytes], tree: Node) -> bytes:
"""Decode the given byte stream using a Huffman tree."""
return bytes(_decode(_bits_from_bytes(data), tree))
 
 
def _encode(stream: Iterable[int], codes: Dict[int, str]) -> Iterable[bytes]:
bits = chain(chain.from_iterable(codes[s] for s in stream), codes[P_EOF])
 
# Pack bits (stream of 1s and 0s) one word at a time.
while True:
word = "".join(islice(bits, WORD_SIZE)) # Most significant bit first.
if not word:
break
 
# Pad last bits if they don't align to a whole word.
if len(word) < WORD_SIZE:
word = word.ljust(WORD_SIZE, "0")
 
# Byte order becomes relevant when READ_SIZE > 1.
yield int(word, 2).to_bytes(READ_SIZE, byteorder="big", signed=False)
 
 
def _decode(bits: Iterable[str], tree: Node) -> Iterable[int]:
node = tree
 
for bit in bits:
if bit == LEFT_BIT:
assert node.left
node = node.left
else:
assert node.right
node = node.right
 
if node.symbol == P_EOF:
break
 
if node.is_leaf():
assert node.symbol
yield node.symbol
node = tree # Back to the top of the tree.
 
 
def _word_to_bits(word: bytes) -> str:
"""Return the binary representation of a word given as a byte string,
including leading zeros up to WORD_SIZE.
 
For example, when WORD_SIZE is 8:
_word_to_bits(b'65') == '01000001'
"""
i = int.from_bytes(word, "big")
return bin(i)[2:].zfill(WORD_SIZE)
 
 
def _bits_from_file(file: BinaryIO) -> Iterable[str]:
"""Generate a stream of bits (strings of either "0" or "1") from file-like
object `file`, opened in binary mode."""
word = file.read(READ_SIZE)
while word:
yield from _word_to_bits(word)
word = file.read(READ_SIZE)
 
 
def _bits_from_bytes(stream: Iterable[bytes]) -> Iterable[str]:
"""Generate a stream of bits (strings of either "0" or "1") from an
iterable of single byte byte-strings."""
return chain.from_iterable(_word_to_bits(byte) for byte in stream)
 
 
def main():
"""Example usage."""
s = "this is an example for huffman encoding"
data = s.encode() # Need a byte string
encoded, tree = huffman_encode(data)
 
# Pretty print the Huffman table
print(f"Symbol Code\n------ ----")
for k, v in sorted(huffman_table(tree).items(), key=lambda x: len(x[1])):
print(f"{chr(k):<6} {v}")
 
# Print the bit pattern of the encoded data
print("".join(_bits_from_bytes(encoded)))
 
# Encode then decode
decoded = huffman_decode(*huffman_encode(data))
print(decoded.decode())
 
 
if __name__ == "__main__":
main()
</syntaxhighlight>
 
{{out}}
<pre>
Symbol Code
------ ----
n 000
110
m 0010
h 0101
i 1001
f 1010
e 1011
a 1110
r 00110
l 00111
c 01000
u 01001
x 01100
d 01101
t 01110
p 01111
Ā 10000
g 10001
o 11110
s 11111
011100101100111111110100111111110111000011010110110011100010011110011110111101010111100011011001010100110101010001011100001101011000010001111001101100100010001100000000
this is an example for huffman encoding
</pre>
 
=={{header|Quackery}}==
 
To use this code you will need to load the higher-order words defined at [[Higher-order functions#Quackery]] and the priority queue words defined at [[Priority queue#Quackery]].
 
The word <code>huffmantree</code> takes a string and generates a tree from it suitable for Huffman decoding. To decode a single character, start with the whole tree and either <code>0 peek</code> or <code>1 peek</code> according to the next bit in the compressed stream until you reach a number (ascii character code.)
 
The word <code>huffmanlist</code> will turn the Huffman tree into a nest of nests, each containing an ascii character code and a nest containing a Huffman code. The nests are sorted by ascii character code to facilitate binary splitting.
 
<syntaxhighlight lang="quackery"> [ 2dup peek 1+ unrot poke ] is itemincr ( [ n --> [ )
[ [ 0 128 of ] constant
swap witheach itemincr
' [ i^ join ] map
' [ 0 peek ] filter ] is countchars ( $ --> [ )
[ 0 peek dip [ 0 peek ] < ] is fewerchars ( [ [ --> b )
[ behead rot
behead rot + unrot
dip nested nested
join join ] is makenode ( [ [ --> [ )
[ [ dup pqsize 1 > while
frompq dip frompq
makenode topq again ]
frompq nip
0 pluck drop ] is maketree ( [ --> [ )
[ countchars
pqwith fewerchars
maketree ] is huffmantree ( $ --> [ )
[ stack ] is path.hfl ( --> s )
[ stack ] is list.hfl ( --> s )
forward is makelist ( [ --> )
[ dup size 1 = iff
[ 0 peek
path.hfl behead drop
nested join nested
list.hfl take
join
list.hfl put ] done
unpack
1 path.hfl put
makelist
0 path.hfl replace
makelist
path.hfl release ] resolves makelist ( [ --> )
[ sortwith
[ 0 peek swap 0 peek < ] ] is charsort ( [ --> [ )
[ [] list.hfl put
makelist
list.hfl take
charsort ] is huffmanlist ( [ --> [ )
[ sortwith
[ 1 peek size
swap 1 peek size < ] ] is codesort ( [ --> [ )
[ witheach
[ unpack swap
say ' "' emit
say '" ' echo cr ] ] is echohuff ( [ --> [ )
$ "this is an example for huffman encoding"
huffmantree
huffmanlist
say " Huffman codes sorted by character." cr
dup echohuff cr
say " Huffman codes sorted by code length." cr
codesort echohuff
</syntaxhighlight>
 
{{out}}
 
<pre> Huffman codes sorted by character.
" " [ 1 1 1 ]
"a" [ 1 0 0 1 ]
"c" [ 1 0 0 0 1 ]
"d" [ 0 0 1 1 0 ]
"e" [ 1 0 1 1 ]
"f" [ 1 1 0 1 ]
"g" [ 1 0 1 0 1 0 ]
"h" [ 0 0 0 1 ]
"i" [ 1 1 0 0 ]
"l" [ 1 0 0 0 0 ]
"m" [ 0 1 0 0 ]
"n" [ 0 1 1 ]
"o" [ 0 1 0 1 ]
"p" [ 1 0 1 0 1 1 ]
"r" [ 1 0 1 0 0 ]
"s" [ 0 0 0 0 ]
"t" [ 0 0 1 1 1 ]
"u" [ 0 0 1 0 0 ]
"x" [ 0 0 1 0 1 ]
 
Huffman codes sorted by code length.
" " [ 1 1 1 ]
"n" [ 0 1 1 ]
"a" [ 1 0 0 1 ]
"e" [ 1 0 1 1 ]
"f" [ 1 1 0 1 ]
"h" [ 0 0 0 1 ]
"i" [ 1 1 0 0 ]
"m" [ 0 1 0 0 ]
"o" [ 0 1 0 1 ]
"s" [ 0 0 0 0 ]
"c" [ 1 0 0 0 1 ]
"d" [ 0 0 1 1 0 ]
"l" [ 1 0 0 0 0 ]
"r" [ 1 0 1 0 0 ]
"t" [ 0 0 1 1 1 ]
"u" [ 0 0 1 0 0 ]
"x" [ 0 0 1 0 1 ]
"g" [ 1 0 1 0 1 0 ]
"p" [ 1 0 1 0 1 1 ]</pre>
 
=={{header|Racket}}==
<langsyntaxhighlight lang="racket">
#lang racket
Line 5,567 ⟶ 6,122:
(define decode (make-decoder tree))
;; Here's what the decoded message looks like:
(decode encoded)</langsyntaxhighlight>
 
=={{header|Raku}}==
Line 5,577 ⟶ 6,132:
 
{{works with|rakudo|2015-12-17}}
<syntaxhighlight lang="raku" perl6line>sub huffman (%frequencies) {
my @queue = %frequencies.map({ [.value, .key] }).sort;
while @queue > 1 {
Line 5,589 ⟶ 6,144:
multi walk ($node, $prefix) { take $node => $prefix; }
multi walk ([$node1, $node2], $prefix) { walk $node1, $prefix ~ '0';
walk $node2, $prefix ~ '1'; }</langsyntaxhighlight>
 
===Without building a tree===
Line 5,596 ⟶ 6,151:
 
{{works with|rakudo|2015-12-17}}
<syntaxhighlight lang="raku" perl6line>sub huffman (%frequencies) {
my @queue = %frequencies.map: { .value => (hash .key => '') };
while @queue > 1 {
Line 5,626 ⟶ 6,181:
my $decoded = $encoded .subst: /@codes/, { %decode-key{$_} }, :g;
 
.say for $original, $encoded, $decoded;</langsyntaxhighlight>
 
{{out}}
Line 5,654 ⟶ 6,209:
 
=={{header|Red}}==
<langsyntaxhighlight Redlang="red">Red [file: %huffy.red]
 
;; message to encode:
Line 5,745 ⟶ 6,300:
]
]
</syntaxhighlight>
</lang>
{{out}}
<pre> = 111
Line 5,775 ⟶ 6,330:
 
=={{header|REXX}}==
<langsyntaxhighlight lang="rexx">/* REXX ---------------------------------------------------------------
* 27.12.2013 Walter Pachl
* 29.12.2013 -"- changed for test of s=xrange('00'x,'ff'x)
Line 6,032 ⟶ 6,587:
Say ' 'c '-->' code.c
End
Return</langsyntaxhighlight>
{{out}}
<pre>We encode this string:
Line 6,103 ⟶ 6,658:
=={{header|Ruby}}==
Uses a {{libheader|RubyGems}} package [http://ruby.brian-amberg.de/priority-queue/ PriorityQueue]
<langsyntaxhighlight lang="ruby">require 'priority_queue'
 
def huffman_encoding(str)
Line 6,156 ⟶ 6,711:
enc = encode(str, encoding)
dec = decode(enc, encoding)
puts "success!" if str == dec</langsyntaxhighlight>
<pre>[" ", "111"]
["a", "1011"]
Line 6,183 ⟶ 6,738:
Adapted C++ solution.
 
<langsyntaxhighlight lang="rust">
use std::collections::BTreeMap;
use std::collections::binary_heap::BinaryHeap;
Line 6,270 ⟶ 6,825:
}
}
</syntaxhighlight>
</lang>
 
Output:
Line 6,297 ⟶ 6,852:
=={{header|Scala}}==
{{works with|scala|2.8}}
<langsyntaxhighlight lang="scala">object Huffman {
import scala.collection.mutable.{Map, PriorityQueue}
Line 6,345 ⟶ 6,900:
}
}
}</langsyntaxhighlight>
 
{{out}}
Line 6,372 ⟶ 6,927:
==={{header|Scala (Alternate version)}}===
{{works with|scala|2.11.7}}
<langsyntaxhighlight lang="scala">
// this version uses immutable data only, recursive functions and pattern matching
object Huffman {
Line 6,415 ⟶ 6,970:
frequencies.foreach(x => println(x._1.value + "\t" + x._2 + s"/${text.length}" + s"\t${encodeChar(huffmanTree, x._1.value)}"))
}
}</langsyntaxhighlight>
<pre>
Char Weight Code
Line 6,441 ⟶ 6,996:
=={{header|Scheme}}==
 
<langsyntaxhighlight lang="scheme">(define (char-freq port table)
(if
(eof-object? (peek-char port))
Line 6,494 ⟶ 7,049:
(define freq-table (char-freq (open-input-string input) '()))
(define tree (huffman-tree (nodeify freq-table)))
(list-encodings tree (map car freq-table))</langsyntaxhighlight>
 
{{out}}
Line 6,520 ⟶ 7,075:
 
=={{header|SETL}}==
<langsyntaxhighlight SETLlang="setl">var forest := {}, encTab := {};
 
plaintext := 'this is an example for huffman encoding';
Line 6,557 ⟶ 7,112:
forest less:= [f, n];
return [f, n];
end proc;</langsyntaxhighlight>
 
=={{header|Sidef}}==
<langsyntaxhighlight lang="ruby">func walk(n, s, h) {
if (n.contains(:a)) {
h{n{:a}} = s
Line 6,613 ⟶ 7,168:
 
say enc
say decode(enc, tree)</langsyntaxhighlight>
{{out}}
<pre>n: 000
Line 6,639 ⟶ 7,194:
=={{header|Standard ML}}==
{{works with|SML/NJ}}
<langsyntaxhighlight lang="sml">datatype 'a huffman_tree =
Leaf of 'a
| Node of 'a huffman_tree * 'a huffman_tree
Line 6,695 ⟶ 7,250:
print "SYMBOL\tHUFFMAN CODE\n";
printCodes ([], tree)
end</langsyntaxhighlight>
 
=={{header|Swift}}==
Rather than a priority queue of subtrees, we use the strategy of two sorted lists, one for leaves and one for nodes, and "merge" them as we iterate through them, taking advantage of the fact that any new nodes we create are bigger than any previously created nodes, so go at the end of the nodes list.
{{works with|Swift|2+}}
<langsyntaxhighlight lang="swift">enum HuffmanTree<T> {
case Leaf(T)
indirect case Node(HuffmanTree<T>, HuffmanTree<T>)
Line 6,765 ⟶ 7,320:
let tree = buildTree(charFreqs)
print("Symbol\tHuffman code")
tree.printCodes("")</langsyntaxhighlight>
{{out}}
<pre>
Line 6,792 ⟶ 7,347:
=={{header|Tcl}}==
{{tcllib|struct::prioqueue}}
<langsyntaxhighlight lang="tcl">package require Tcl 8.5
package require struct::prioqueue
 
Line 6,844 ⟶ 7,399:
 
puts $str
puts [string map $encoding $str]</langsyntaxhighlight>
{{out}}
<pre style='width:full; overflow:scroll'>n 4 000
Line 6,870 ⟶ 7,425:
=={{header|UNIX Shell}}==
{{works with|Bourne Again SHell}}
<langsyntaxhighlight lang="bash">
#!/bin/bash
 
Line 6,912 ⟶ 7,467:
awk -F: '{printf "%c\t%d\t%s\n", $2, $3, $1}' |
sort -k 2,2nr -k 3,3n
</syntaxhighlight>
</lang>
 
=={{header|Ursala}}==
following the algorithm given above
<langsyntaxhighlight Ursalalang="ursala">#import std
#import nat
#import flo
Line 6,929 ⟶ 7,484:
#cast %csAL
 
table = code_table 'this is an example for huffman encoding'</langsyntaxhighlight>
a quick walk through the code starting from the bottom:
* <code>*K2 ^/~&h float+ length</code> compute character frequencies by partitioning the input list of characters by equality, and transforming each equivalence class to a pair containing its member and its cardinality represented as a floating point number
Line 6,967 ⟶ 7,522:
{{libheader|Wren-queue}}
Note that the results differ from Java because the PriorityQueue implementation is not the same.
<langsyntaxhighlight ecmascriptlang="wren">import "./queue" for PriorityQueue
 
class HuffmanTree {
Line 7,038 ⟶ 7,593:
var tree = buildTree.call(freqs)
System.print("SYMBOL\tWEIGHT\tHUFFMAN CODE")
printCodes.call(tree, "")</langsyntaxhighlight>
 
{{out}}
Line 7,062 ⟶ 7,617:
x 1 11110
h 2 11111
</pre>
 
=={{header|Zig}}==
 
{{trans|Rust}}
 
<syntaxhighlight lang="zig">
const std = @import("std");
 
const Node = struct {
frequency: usize,
kind: union(enum) {
internal: struct {
left: *Node,
right: *Node,
},
leaf: u8,
},
 
fn initLeaf(frequency: usize, ch: u8) Node {
return .{
.frequency = frequency,
.kind = .{ .leaf = ch },
};
}
 
fn initInternal(
allocator: std.mem.Allocator,
left_child: Node,
right_child: Node,
) !Node {
const left = try allocator.create(Node);
const right = try allocator.create(Node);
left.* = left_child;
right.* = right_child;
return .{
.frequency = left_child.frequency + right_child.frequency,
.kind = .{ .internal = .{ .left = left, .right = right } },
};
}
 
fn deinit(self: Node, allocator: std.mem.Allocator) void {
switch (self.kind) {
.internal => |inner| {
inner.left.deinit(allocator);
inner.right.deinit(allocator);
allocator.destroy(inner.left);
allocator.destroy(inner.right);
},
.leaf => {},
}
}
 
fn compare(context: void, a: Node, b: Node) std.math.Order {
_ = context;
return std.math.order(a.frequency, b.frequency);
}
};
 
pub fn main() !void {
const text = "this is an example for huffman encoding";
 
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
 
const allocator = gpa.allocator();
var frequencies = std.AutoHashMap(u8, usize).init(allocator);
defer frequencies.deinit();
 
for (text) |ch| {
const gop = try frequencies.getOrPut(ch);
if (gop.found_existing) {
gop.value_ptr.* += 1;
} else {
gop.value_ptr.* = 1;
}
}
 
var prioritized_frequencies =
std.PriorityQueue(Node, void, Node.compare).init(allocator, {});
defer prioritized_frequencies.deinit();
 
var freq_it = frequencies.iterator();
while (freq_it.next()) |counted_char| {
try prioritized_frequencies.add(Node.initLeaf(
counted_char.value_ptr.*,
counted_char.key_ptr.*,
));
}
 
while (prioritized_frequencies.len > 1) {
try prioritized_frequencies.add(try Node.initInternal(
allocator,
prioritized_frequencies.remove(),
prioritized_frequencies.remove(),
));
}
 
const root = prioritized_frequencies.items[0];
defer root.deinit(allocator);
 
var codes = std.AutoArrayHashMap(u8, []const u8).init(allocator);
defer codes.deinit();
 
var arena = std.heap.ArenaAllocator.init(allocator);
defer arena.deinit();
 
try generateCodes(arena.allocator(), &root, &.{}, &codes);
 
const stdout = std.io.getStdOut().writer();
var code_it = codes.iterator();
while (code_it.next()) |item| {
try stdout.print("{c}: {s}\n", .{
item.key_ptr.*,
item.value_ptr.*,
});
}
}
 
fn generateCodes(
arena: std.mem.Allocator,
node: *const Node,
prefix: []const u8,
out_codes: *std.AutoArrayHashMap(u8, []const u8),
) !void {
switch (node.kind) {
.internal => |inner| {
const left_prefix = try arena.alloc(u8, prefix.len + 1);
std.mem.copy(u8, left_prefix, prefix);
left_prefix[prefix.len] = '0';
try generateCodes(arena, inner.left, left_prefix, out_codes);
 
const right_prefix = try arena.alloc(u8, prefix.len + 1);
std.mem.copy(u8, right_prefix, prefix);
right_prefix[prefix.len] = '1';
try generateCodes(arena, inner.right, right_prefix, out_codes);
},
.leaf => |ch| {
try out_codes.put(ch, prefix);
},
}
}
</syntaxhighlight>
{{out}}
<pre>
n: 000
m: 0010
d: 00110
t: 00111
o: 0100
h: 0101
x: 01100
r: 01101
c: 01110
u: 01111
s: 1000
g: 10010
p: 100110
l: 100111
a: 1010
i: 1011
: 110
e: 1110
f: 1111
</pre>
 
=={{header|zkl}}==
This code was adapted from Perl, Python and most of the other examples.
<langsyntaxhighlight lang="zkl">fcn buildHuffman(text){ //-->(encode dictionary, decode dictionary)
ft:=Dictionary();
foreach c in (text){ ft[c]=ft.find(c,0)+1 } // leafs w/count
Line 7,088 ⟶ 7,807:
decodeTable:=encodeTable.pump(Dictionary(),"reverse"); // code:symbol
return(encodeTable,decodeTable);
}</langsyntaxhighlight>
<langsyntaxhighlight lang="zkl">fcn encode(text,table){ text.pump(String,table.get) }
fcn decode(bits,table){ // this is a horrible decoder, for testing only
w:=bits.walker(); sink:=Sink(String);
Line 7,096 ⟶ 7,815:
}}catch(TheEnd){}
sink.close();
}</langsyntaxhighlight>
<langsyntaxhighlight lang="zkl">text:="this is an example for huffman encoding";
encodeTable,decodeTable := buildHuffman(text);
encodeTable.pump(Console.println,fcn(kv){"%s : %s".fmt(kv.xplode())});
Line 7,106 ⟶ 7,825:
println(e);
 
0'|Bits decoded to: "%s"|.fmt(decode(e,decodeTable)).println();</langsyntaxhighlight>
{{out}}
<pre>
9,476

edits