CloudFlare suffered a massive security issue affecting all of its customers, including Rosetta Code. All passwords not changed since February 19th 2017 have been expired, and session cookie longevity will be reduced until late March.--Michael Mol (talk) 05:15, 25 February 2017 (UTC)

Entropy/Narcissist

From Rosetta Code
Entropy/Narcissist is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
ENTROPY.JPG
Task

Write a computer program that computes and shows its own   entropy.


Related Tasks



AutoHotkey[edit]

Works with: AutoHotkey 1.1
FileRead, var, *C %A_ScriptFullPath%
MsgBox, % Entropy(var)
 
Entropy(n) {
a := [], len := StrLen(n), m := n
while StrLen(m) {
s := SubStr(m, 1, 1)
m := RegExReplace(m, s, "", c)
a[s] := c
}
for key, val in a {
m := Log(p := val / len)
e -= p * m / Log(2)
}
return, e
}
Output:
5.942956

C[edit]

Minor edit to the Entropy answer.

Assumes that the source file is stored in the working directory as "entropy.c".

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <math.h>
 
#define MAXLEN 961 //maximum string length
 
int makehist(char *S,int *hist,int len){
int wherechar[256];
int i,histlen;
histlen=0;
for(i=0;i<256;i++)wherechar[i]=-1;
for(i=0;i<len;i++){
if(wherechar[(int)S[i]]==-1){
wherechar[(int)S[i]]=histlen;
histlen++;
}
hist[wherechar[(int)S[i]]]++;
}
return histlen;
}
 
double entropy(int *hist,int histlen,int len){
int i;
double H;
H=0;
for(i=0;i<histlen;i++){
H-=(double)hist[i]/len*log2((double)hist[i]/len);
}
return H;
}
 
int main(void){
char S[MAXLEN];
int len,*hist,histlen;
double H;
FILE *f;
f=fopen("entropy.c","r");
for(len=0;!feof(f);len++)S[len]=fgetc(f);
S[--len]='\0';
hist=(int*)calloc(len,sizeof(int));
histlen=makehist(S,hist,len);
//hist now has no order (known to the program) but that doesn't matter
H=entropy(hist,histlen,len);
printf("%lf\n",H);
return 0;
}
Output:
5.195143

D[edit]

void main(in string[] args) {
import std.stdio, std.algorithm, std.math, std.file;
 
auto data = sort(cast(ubyte[])args[0].read);
return data
.group
.map!(g => g[1] / double(data.length))
.map!(p => -p * p.log2)
.sum
.writeln;
}
Output:
6.29803

Elixir[edit]

File.open(__ENV__.file, [:read], fn(file) ->
text = IO.read(file, :all)
leng = String.length(text)
String.codepoints(text)
|> Enum.group_by(&(&1))
|> Enum.map(fn{_,value} -> length(value) end)
|> Enum.reduce(0, fn count, entropy ->
freq = count / leng
entropy - freq * :math.log2(freq)
end)
|> IO.puts
end)
Output:
4.848342673395324

Emacs Lisp[edit]

(defun shannon-entropy (input)
(let ((freq-table (make-hash-table))
(entropy 0)
(length (+ (length input) 0.0)))
(mapcar (lambda (x)
(puthash x
(+ 1 (gethash x freq-table 0))
freq-table))
input)
(maphash (lambda (k v)
(set 'entropy (+ entropy
(* (/ v length)
(log (/ v length) 2)))))
freq-table)
(- entropy)))
 
(defun narcissist ()
(shannon-entropy (with-temp-buffer
(insert-file-contents "U:/rosetta/narcissist.el")
(buffer-string))))
Output:
(narcissist)
4.5129548515535785

FreeBASIC[edit]

' version 01-06-2016
' compile with: fbc -s console
' modified code from ENTROPY entry
 
Dim As Integer i, count, totalchar(255)
Dim As UByte buffer
Dim As Double prop, entropy
' command (0) returns the name of this program (including the path)
Dim As String slash, filename = Command(0)
Dim As Integer ff = FreeFile ' find first free filenumber
Open filename For Binary As #ff
 
If Err > 0 Then ' should not happen
Print "Error opening the file"
Beep : Sleep 5000, 1
End
End If
 
' will read 1 UByte from the file until it reaches the end of the file
For i = 1 To Lof(ff)
Get #ff, ,buffer
totalchar(buffer) += 1
count = count + 1
Next
 
For i = 0 To 255
If totalchar(i) = 0 Then Continue For
prop = totalchar(i) / count
entropy = entropy - (prop * Log (prop) / Log(2))
Next
 
' next lines are only compiled when compiling for Windows OS (32/64)
#Ifdef __FB_WIN32__
slash = chr(92)
print "Windows version"
#endif
#Ifdef __FB_LINUX__
slash = chr(47)
print "LINUX version"
#EndIf
 
i = InStrRev(filename, slash)
If i <> 0 Then filename = Right(filename, Len(filename)-i)
 
Print "My name is "; filename
Print : Print "The Entropy of myself is"; entropy
Print
 
' empty keyboard buffer
While InKey <> "" : Wend
Print : Print "hit any key to end program"
Sleep
End
Output:
Windows version
My name is entropy_narcissist.exe

The Entropy of myself is 6.142286625408597

LINUX version
My name is entropy_narcissist

The Entropy of myself is 5.450343613062795

Go[edit]

package main
 
import (
"fmt"
"io/ioutil"
"log"
"math"
"os"
"runtime"
)
 
func main() {
_, src, _, _ := runtime.Caller(0)
fmt.Println("Source file entropy:", entropy(src))
fmt.Println("Binary file entropy:", entropy(os.Args[0]))
}
 
func entropy(file string) float64 {
d, err := ioutil.ReadFile(file)
if err != nil {
log.Fatal(err)
}
var f [256]float64
for _, b := range d {
f[b]++
}
hm := 0.
for _, c := range f {
if c > 0 {
hm += c * math.Log2(c)
}
}
l := float64(len(d))
return math.Log2(l) - hm/l
}
Output:
Source file entropy: 5.038501725029859
Binary file entropy: 5.388171194771937

Haskell[edit]

import qualified Data.ByteString as BS
import Data.List
import System.Environment
 
(>>>) = flip (.)
 
main = getArgs >>= head >>> BS.readFile >>= BS.unpack >>> entropy >>> print
 
entropy = sort >>> group >>> map genericLength >>> normalize >>> map lg >>> sum
where lg c = -c * logBase 2 c
normalize c = let sc = sum c in map (/ sc) c
Output:
In a shell
$ ghc --make -O3 Narcissist.hs

Entropy of the source

$ ./Narcissist Narcissist.hs
4.452645183154108

Entropy of the binary

$ ./Narcissist Narcissist
5.525417236346172

J[edit]

Solution:
   entropy=:  +/@:-@(* 2&^.)@(#/.~ % #)
1!:2&2 entropy 1!:1 (4!:4 <'entropy') { 4!:3''
Example:
   load 'entropy.ijs'
4.73307

Kotlin[edit]

// version 1.1.0 (entropy_narc.kt)
 
fun log2(d: Double) = Math.log(d) / Math.log(2.0)
 
fun shannon(s: String): Double {
val counters = mutableMapOf<Char, Int>()
for (c in s) {
if (counters.containsKey(c)) counters[c] = counters[c]!! + 1
else counters.put(c, 1)
}
val nn = s.length.toDouble()
var sum = 0.0
for (key in counters.keys) {
val term = counters[key]!! / nn
sum += term * log2(term)
}
return -sum
}
 
fun main(args: Array<String>) {
val prog = java.io.File("entropy_narc.kt").readText()
println("This program's entropy is ${"%18.16f".format(shannon(prog))}")
}
Output:
This program's entropy is 4.8471803665906705

Lua[edit]

arg[0] gives the path of the script currently being executed

function getFile (filename)
local inFile = io.open(filename, "r")
local fileContent = inFile:read("*all")
inFile:close()
return fileContent
end
 
function log2 (x) return math.log(x) / math.log(2) end
 
function entropy (X)
local N, count, sum, i = X:len(), {}, 0
for char = 1, N do
i = X:sub(char, char)
if count[i] then
count[i] = count[i] + 1
else
count[i] = 1
end
end
for n_i, count_i in pairs(count) do
sum = sum + count_i / N * log2(count_i / N)
end
return -sum
end
 
print(entropy(getFile(arg[0])))
Output:
4.3591214356783

PARI/GP[edit]

entropy(s)=s=Vec(s);my(v=vecsort(s,,8));-sum(i=1,#v,(x->x*log(x))(sum(j=1,#s,v[i]==s[j])/#s))/log(2);
entropy(Str(entropy))
Output:
%1 = 4.54978213

Perl[edit]

#!/usr/bin/perl
use strict ;
use warnings ;
use feature 'say' ;
 
sub log2 {
my $number = shift ;
return log( $number ) / log( 2 ) ;
}
 
open my $fh , "<" , $ARGV[ 0 ] or die "Can't open $ARGV[ 0 ]$!\n" ;
my %frequencies ;
my $totallength = 0 ;
while ( my $line = <$fh> ) {
chomp $line ;
next if $line =~ /^$/ ;
map { $frequencies{ $_ }++ } split( // , $line ) ;
$totallength += length ( $line ) ;
}
close $fh ;
my $infocontent = 0 ;
for my $letter ( keys %frequencies ) {
my $content = $frequencies{ $letter } / $totallength ;
$infocontent += $content * log2( $content ) ;
}
$infocontent *= -1 ;
say "The information content of the source file is $infocontent !" ;
Output:
The information content of the source file is 4.6487923749222 !

Perl 6[edit]

Works with: rakudo version 2016.05
say log(2) R/ [+] map -> \p { p * -log p }, $_.comb.Bag.values >>/>> +$_
given slurp($*PROGRAM-NAME).comb

Result should be in the neighborhood of 4.9

Output:
4.89351613053006

Phix[edit]

Minor edit to the Entropy answer, if compiled assumes source code is in the same directory.

function log2(atom v)
return log(v)/log(2)
end function
 
function entropy(sequence s)
sequence symbols = {},
counts = {}
integer N = length(s)
for i=1 to N do
object si = s[i]
integer k = find(si,symbols)
if k=0 then
symbols = append(symbols,si)
counts = append(counts,1)
else
counts[k] += 1
end if
end for
atom H = 0
integer n = length(counts)
for i=1 to n do
atom ci = counts[i]/N
H -= ci*log2(ci)
end for
return H
end function
 
?entropy(get_text(open(substitute(command_line()[2],".exe",".exw")),"rb"))
Output:
4.993666233

Python[edit]

Works with: Python 3.4

Minor edit to the Entropy answer.

import math
from collections import Counter
 
def entropy(s):
p, lns = Counter(s), float(len(s))
return -sum( count/lns * math.log(count/lns, 2) for count in p.values())
 
with open('c:/E.py') as f:
b=f.read()
 
print(entropy(b))
Output:
4.57672378235371

Racket[edit]

The entropy of the program below is 4.512678555350348.

 
#lang racket
(require math)
(define (log2 x) (/ (log x) (log 2)))
(define ds (string->list (file->string "entropy.rkt")))
(define n (length ds))
(- (for/sum ([(d c) (in-hash (samples->hash ds))])
(* (/ c n) (log2 (/ c n)))))
 

REXX[edit]

REXX doesn't have a BIF (built-in function) for   log   or   ln,   so the subroutine (function)   log2   is included herein.

/*REXX program calculates the   "information entropy"   for  ~this~  REXX program.      */
numeric digits 50 /*use 50 decimal digits for precision. */
#=0; @.=0; $=; $$=; recs=sourceline() /*define some handy─dandy REXX vars. */
 
do m=1 for recs /* [↓] obtain program source and ──► $*/
$=$ || sourceline(m) /*get a sourceLine of this REXX program*/
end /*m*/ /* [↑] $ str won't have any meta chars*/
L=length($) /*the byte length of this REXX program.*/
 
do j=1 for L; _=substr($,j,1) /*process each character in $ string.*/
if @._==0 then do; #=#+1 /*¿Character unique? Bump char counter*/
$$=$$ || _ /*add this character to the $$ list. */
end
@._=@._+1 /*keep track of this character's count.*/
end /*j*/ /* [↑] characters are all 8─bit bytes.*/
sum=0 /*calculate info entropy for each char.*/
do i=1 for #; _=substr($$,i,1) /*obtain a character from unique list. */
sum=sum - @._/L * log2(@._/L) /*add {negatively} the char entropies. */
end /*i*/
 
say ' program length: ' L /*pgm length doesn't include meta chars*/
say 'program statements: ' recs /*pgm statements are actually pgm lines*/
say ' unique characters: ' #; say /*characters are 8─bit bytes of the pgm*/
say 'The information entropy of this REXX program ──► ' format(sum,,12)
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
log2: procedure; parse arg x 1 ox; ig= x>1.5; ii=0; is=1 - 2 * (ig\==1)
numeric digits digits()+5 /* [↓] precision of E must be≥digits()*/
e=2.71828182845904523536028747135266249775724709369995957496696762772407663035354759
do while ig & ox>1.5 | \ig&ox<.5; _=e; do j=-1; iz=ox* _**-is
if j>=0 & (ig & iz<1 | \ig&iz>.5) then leave; _=_*_; izz=iz; end /*j*/
ox=izz; ii=ii+is*2**j; end; x=x* e**-ii-1; z=0; _=-1; p=z
do k=1; _=-_*x; z=z+_/k; if z=p then leave; p=z; end /*k*/
r=z+ii; if arg()==2 then return r; return r/log2(2,.)

output

    program length:  2612
program statements:  34
 unique characters:  78

The information entropy of this REXX program ──►  4.284631866395

Ruby[edit]

def entropy(s)
counts = s.each_char.with_object(Hash.new(0.0)) {|c,h| h[c] += 1}
counts.values.reduce(0) do |entropy, count|
freq = count / s.size
entropy - freq * Math.log2(freq)
end
end
s = File.read(__FILE__)
p entropy(s)
Output:
4.885234973253878

Sidef[edit]

func entropy(s) {
[0,
s.chars.freq.values.map {|c|
var f = c/s.len
f * f.log2
}...
]«-»
}
 
say entropy(File(__FILE__).open_r.slurp)
Output:
4.27307750866434915713432109186549

Tcl[edit]

Note that this code doesn't bother to close the open handle on the script; it is only suitable as a demonstration program.

proc entropy {str} {
set log2 [expr log(2)]
foreach char [split $str ""] {dict incr counts $char}
set entropy 0.0
foreach count [dict values $counts] {
set freq [expr {$count / double([string length $str])}]
set entropy [expr {$entropy - $freq * log($freq)/$log2}]
}
return $entropy
}
 
puts [format "entropy = %.5f" [entropy [read [open [info script]]]]]
Output:
entropy = 4.59099

zkl[edit]

Minor edit to the Entropy answer.

fcn entropy(text){
text.pump(Void,fcn(c,freq){ c=c.toAsc(); freq[c]=freq[c]+1; freq }
.fp1((0).pump(256,List,(0.0).create.fp(0)).copy()))
.filter() // remove all zero entries
.apply('/(text.len())) // (num of char)/len
.apply(fcn(p){-p*p.log()}) // |p*ln(p)|
.sum(0.0)/(2.0).log(); // sum * ln(e)/ln(2) to convert to log2
}
 
entropy(File("entropy.zkl").read().text).println();
Output:
4.8422