Entropy/Narcissist

From Rosetta Code
Entropy/Narcissist is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
ENTROPY.JPG
Task

Write a computer program that computes and shows its own   entropy.


Related Tasks



AutoHotkey[edit]

Works with: AutoHotkey 1.1
FileRead, var, *C %A_ScriptFullPath%
MsgBox, % Entropy(var)
 
Entropy(n) {
a := [], len := StrLen(n), m := n
while StrLen(m) {
s := SubStr(m, 1, 1)
m := RegExReplace(m, s, "", c)
a[s] := c
}
for key, val in a {
m := Log(p := val / len)
e -= p * m / Log(2)
}
return, e
}
Output:
5.942956

C[edit]

Minor edit to the Entropy answer.

Assumes that the source file is stored in the working directory as "entropy.c".

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
 
#define MAXLEN 961 //maximum string length
 
int makehist(char *S,int *hist,int len){
int wherechar[256];
int i,histlen;
histlen=0;
for(i=0;i<256;i++)wherechar[i]=-1;
for(i=0;i<len;i++){
if(wherechar[(int)S[i]]==-1){
wherechar[(int)S[i]]=histlen;
histlen++;
}
hist[wherechar[(int)S[i]]]++;
}
return histlen;
}
 
double entropy(int *hist,int histlen,int len){
int i;
double H;
H=0;
for(i=0;i<histlen;i++){
H-=(double)hist[i]/len*log2((double)hist[i]/len);
}
return H;
}
 
int main(void){
char S[MAXLEN];
int len,*hist,histlen;
double H;
FILE *f;
f=fopen("entropy.c","r");
for(len=0;!feof(f);len++)S[len]=fgetc(f);
S[--len]='\0';
hist=(int*)calloc(len,sizeof(int));
histlen=makehist(S,hist,len);
//hist now has no order (known to the program) but that doesn't matter
H=entropy(hist,histlen,len);
printf("%lf\n",H);
return 0;
}
Output:
5.195143

D[edit]

void main(in string[] args) {
import std.stdio, std.algorithm, std.math, std.file;
 
auto data = sort(cast(ubyte[])args[0].read);
return data
.group
.map!(g => g[1] / double(data.length))
.map!(p => -p * p.log2)
.sum
.writeln;
}
Output:
6.29803

Elixir[edit]

File.open(__ENV__.file, [:read], fn(file) ->
text = IO.read(file, :all)
leng = String.length(text)
String.codepoints(text)
|> Enum.group_by(&(&1))
|> Enum.map(fn{_,value} -> length(value) end)
|> Enum.reduce(0, fn count, entropy ->
freq = count / leng
entropy - freq * :math.log2(freq)
end)
|> IO.puts
end)
Output:
4.848342673395324

Emacs Lisp[edit]

(defun shannon-entropy (input)
(let ((freq-table (make-hash-table))
(entropy 0)
(length (+ (length input) 0.0)))
(mapcar (lambda (x)
(puthash x
(+ 1 (gethash x freq-table 0))
freq-table))
input)
(maphash (lambda (k v)
(set 'entropy (+ entropy
(* (/ v length)
(log (/ v length) 2)))))
freq-table)
(- entropy)))
 
(defun narcissist ()
(shannon-entropy (with-temp-buffer
(insert-file-contents "U:/rosetta/narcissist.el")
(buffer-string))))
Output:
(narcissist)
4.5129548515535785

FreeBASIC[edit]

' version 01-06-2016
' compile with: fbc -s console
' modified code from ENTROPY entry
 
Dim As Integer i, count, totalchar(255)
Dim As UByte buffer
Dim As Double prop, entropy
' command (0) returns the name of this program (including the path)
Dim As String slash, filename = Command(0)
Dim As Integer ff = FreeFile ' find first free filenumber
Open filename For Binary As #ff
 
If Err > 0 Then ' should not happen
Print "Error opening the file"
Beep : Sleep 5000, 1
End
End If
 
' will read 1 UByte from the file until it reaches the end of the file
For i = 1 To Lof(ff)
Get #ff, ,buffer
totalchar(buffer) += 1
count = count + 1
Next
 
For i = 0 To 255
If totalchar(i) = 0 Then Continue For
prop = totalchar(i) / count
entropy = entropy - (prop * Log (prop) / Log(2))
Next
 
' next lines are only compiled when compiling for Windows OS (32/64)
#Ifdef __FB_WIN32__
slash = chr(92)
print "Windows version"
#endif
#Ifdef __FB_LINUX__
slash = chr(47)
print "LINUX version"
#EndIf
 
i = InStrRev(filename, slash)
If i <> 0 Then filename = Right(filename, Len(filename)-i)
 
Print "My name is "; filename
Print : Print "The Entropy of myself is"; entropy
Print
 
' empty keyboard buffer
While InKey <> "" : Wend
Print : Print "hit any key to end program"
Sleep
End
Output:
Windows version
My name is entropy_narcissist.exe

The Entropy of myself is 6.142286625408597

LINUX version
My name is entropy_narcissist

The Entropy of myself is 5.450343613062795

Go[edit]

package main
 
import (
"fmt"
"io/ioutil"
"log"
"math"
"os"
"runtime"
)
 
func main() {
_, src, _, _ := runtime.Caller(0)
fmt.Println("Source file entropy:", entropy(src))
fmt.Println("Binary file entropy:", entropy(os.Args[0]))
}
 
func entropy(file string) float64 {
d, err := ioutil.ReadFile(file)
if err != nil {
log.Fatal(err)
}
var f [256]float64
for _, b := range d {
f[b]++
}
hm := 0.
for _, c := range f {
if c > 0 {
hm += c * math.Log2(c)
}
}
l := float64(len(d))
return math.Log2(l) - hm/l
}
Output:
Source file entropy: 5.038501725029859
Binary file entropy: 5.388171194771937

Haskell[edit]

import qualified Data.ByteString as BS
import Data.List
import System.Environment
 
(>>>) = flip (.)
 
main = getArgs >>= head >>> BS.readFile >>= BS.unpack >>> entropy >>> print
 
entropy = sort >>> group >>> map genericLength >>> normalize >>> map lg >>> sum
where lg c = -c * logBase 2 c
normalize c = let sc = sum c in map (/ sc) c
Output:
In a shell
$ ghc --make -O3 Narcissist.hs

Entropy of the source

$ ./Narcissist Narcissist.hs
4.452645183154108

Entropy of the binary

$ ./Narcissist Narcissist
5.525417236346172

J[edit]

Solution:
   entropy=:  +/@:-@(* 2&^.)@(#/.~ % #)
1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3''
Example:
   load 'entropy.ijs'
4.73307

Lua[edit]

arg[0] gives the path of the script currently being executed

function getFile (filename)
local inFile = io.open(filename, "r")
local fileContent = inFile:read("*all")
inFile:close()
return fileContent
end
 
function log2 (x) return math.log(x) / math.log(2) end
 
function entropy (X)
local N, count, sum, i = X:len(), {}, 0
for char = 1, N do
i = X:sub(char, char)
if count[i] then
count[i] = count[i] + 1
else
count[i] = 1
end
end
for n_i, count_i in pairs(count) do
sum = sum + count_i / N * log2(count_i / N)
end
return -sum
end
 
print(entropy(getFile(arg[0])))
Output:
4.3591214356783

PARI/GP[edit]

entropy(s)=s=Vec(s);my(v=vecsort(s,,8));-sum(i=1,#v,(x->x*log(x))(sum(j=1,#s,v[i]==s[j])/#s))/log(2);
entropy(Str(entropy))
Output:
%1 = 4.54978213

Perl[edit]

#!/usr/bin/perl
use strict ;
use warnings ;
use feature 'say' ;
 
sub log2 {
my $number = shift ;
return log( $number ) / log( 2 ) ;
}
 
open my $fh , "<" , $ARGV[ 0 ] or die "Can't open $ARGV[ 0 ]$!\n" ;
my %frequencies ;
my $totallength = 0 ;
while ( my $line = <$fh> ) {
chomp $line ;
next if $line =~ /^$/ ;
map { $frequencies{ $_ }++ } split( // , $line ) ;
$totallength += length ( $line ) ;
}
close $fh ;
my $infocontent = 0 ;
for my $letter ( keys %frequencies ) {
my $content = $frequencies{ $letter } / $totallength ;
$infocontent += $content * log2( $content ) ;
}
$infocontent *= -1 ;
say "The information content of the source file is $infocontent !" ;
Output:
The information content of the source file is 4.6487923749222 !

Perl 6[edit]

Works with: rakudo version 2016.05
say log(2) R/ [+] map -> \p { p * -log p }, $_.comb.Bag.values >>/>> +$_
given slurp($*PROGRAM-NAME).comb

Result should be in the neighborhood of 4.9

Output:
4.89351613053006

Python[edit]

Works with: Python 3.4

Minor edit to the Entropy answer.

import math
from collections import Counter
 
def entropy(s):
p, lns = Counter(s), float(len(s))
return -sum( count/lns * math.log(count/lns, 2) for count in p.values())
 
with open('c:/E.py') as f:
b=f.read()
 
print(entropy(b))
Output:
4.57672378235371

Racket[edit]

The entropy of the program below is 4.512678555350348.

 
#lang racket
(require math)
(define (log2 x) (/ (log x) (log 2)))
(define ds (string->list (file->string "entropy.rkt")))
(define n (length ds))
(- (for/sum ([(d c) (in-hash (samples->hash ds))])
(* (/ c n) (log2 (/ c n)))))
 

REXX[edit]

REXX doesn't have a BIF (built-in function) for   log   or   ln,   so the subroutine (function)   log2   is included herein.

/*REXX program calculates the   "information entropy"   for  ~this~  REXX program.      */
numeric digits 50 /*use 50 decimal digits for precision. */
#=0; @.=0; $=; $$=; recs=sourceline() /*define some handy─dandy REXX vars. */
 
do m=1 for recs /* [↓] obtain program source and ──► $*/
$=$ || sourceline(m) /*get a sourceLine of this REXX program*/
end /*m*/ /* [↑] $ str won't have any meta chars*/
L=length($) /*the byte length of this REXX program.*/
 
do j=1 for L; _=substr($,j,1) /*process each character in $ string.*/
if @._==0 then do; #=#+1 /*¿Character unique? Bump char counter*/
$$=$$ || _ /*add this character to the $$ list. */
end
@._=@._+1 /*keep track of this character's count.*/
end /*j*/ /* [↑] characters are all 8─bit bytes.*/
sum=0 /*calculate info entropy for each char.*/
do i=1 for #; _=substr($$,i,1) /*obtain a character from unique list. */
sum=sum - @._/L * log2(@._/L) /*add {negatively} the char entropies. */
end /*i*/
 
say ' program length: ' L /*pgm length doesn't include meta chars*/
say 'program statements: ' recs /*pgm statements are actually pgm lines*/
say ' unique characters: ' #; say /*characters are 8─bit bytes of the pgm*/
say 'The information entropy of this REXX program ──► ' format(sum,,12)
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
log2: procedure; parse arg x 1 ox; ig= x>1.5; ii=0; is=1 - 2 * (ig\==1)
numeric digits digits()+5 /* [↓] precision of E must be≥digits()*/
e=2.71828182845904523536028747135266249775724709369995957496696762772407663035354759
do while ig & ox>1.5 | \ig&ox<.5; _=e; do j=-1; iz=ox* _**-is
if j>=0 & (ig & iz<1 | \ig&iz>.5) then leave; _=_*_; izz=iz; end /*j*/
ox=izz; ii=ii+is*2**j; end; x=x* e**-ii-1; z=0; _=-1; p=z
do k=1; _=-_*x; z=z+_/k; if z=p then leave; p=z; end /*k*/
r=z+ii; if arg()==2 then return r; return r/log2(2,.)

output

    program length:  2612
program statements:  34
 unique characters:  78

The information entropy of this REXX program ──►  4.284631866395

Ruby[edit]

def entropy(s)
counts = s.each_char.with_object(Hash.new(0.0)) {|c,h| h[c] += 1}
counts.values.reduce(0) do |entropy, count|
freq = count / s.size
entropy - freq * Math.log2(freq)
end
end
s = File.read(__FILE__)
p entropy(s)
Output:
4.885234973253878

Sidef[edit]

func entropy(s) {
[0,
s.chars.freq.values.map {|c|
var f = c/s.len
f * f.log2
}...
]«-»
}
 
say entropy(File(__FILE__).open_r.slurp)
Output:
4.27307750866434915713432109186549

Tcl[edit]

Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program.

proc entropy {str} {
set log2 [expr log(2)]
foreach char [split $str ""] {dict incr counts $char}
set entropy 0.0
foreach count [dict values $counts] {
set freq [expr {$count / double([string length $str])}]
set entropy [expr {$entropy - $freq * log($freq)/$log2}]
}
return $entropy
}
 
puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]
Output:
entropy = 4.59099

zkl[edit]

Minor edit to the Entropy answer.

fcn entropy(text){
text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq }
.fp1((0).pump(256,List,(0.0).create.fp(0)).copy()))
.filter() // remove all zero entries
.apply('/(text.len())) // (num of char)/len
.apply(fcn(p){-p*p.log()}) // |p*ln(p)|
.sum(0.0)/(2.0).log(); // sum * ln(e)/ln(2) to convert to log2
}
 
entropy(File("entropy.zkl").read().text).println();
Output:
4.8422