Probabilistic choice
You are encouraged to solve this task according to the task description, using any language you may know.
Given a mapping between items and their required probability of occurrence, generate a million items randomly subject to the given probabilities and compare the target probability of occurrence versus the generated values.
The total of all the probabilities should equal one. (Because floating point arithmetic is involved this is subject to rounding errors).
Use the following mapping to test your programs:aleph 1/5.0 beth 1/6.0 gimel 1/7.0 daleth 1/8.0 he 1/9.0 waw 1/10.0 zayin 1/11.0 heth 1759/27720 # adjusted so that probabilities add to 1
[edit] Ada
with Ada.Numerics.Float_Random; use Ada.Numerics.Float_Random;
with Ada.Text_IO; use Ada.Text_IO;
procedure Random_Distribution is
Trials : constant := 1_000_000;
type Outcome is (Aleph, Beth, Gimel, Daleth, He, Waw, Zayin, Heth);
Pr : constant array (Outcome) of Uniformly_Distributed :=
(1.0/5.0, 1.0/6.0, 1.0/7.0, 1.0/8.0, 1.0/9.0, 1.0/10.0, 1.0/11.0, 1.0);
Samples : array (Outcome) of Natural := (others => 0);
Value : Uniformly_Distributed;
Dice : Generator;
begin
for Try in 1..Trials loop
Value := Random (Dice);
for I in Pr'Range loop
if Value <= Pr (I) then
Samples (I) := Samples (I) + 1;
exit;
else
Value := Value - Pr (I);
end if;
end loop;
end loop;
-- Printing the results
for I in Pr'Range loop
Put (Outcome'Image (I) & Character'Val (9));
Put (Float'Image (Float (Samples (I)) / Float (Trials)) & Character'Val (9));
if I = Heth then
Put_Line (" rest");
else
Put_Line (Uniformly_Distributed'Image (Pr (I)));
end if;
end loop;
end Random_Distribution;
Sample output:
ALEPH 2.00167E-01 2.00000E-01 BETH 1.67212E-01 1.66667E-01 GIMEL 1.42290E-01 1.42857E-01 DALETH 1.24186E-01 1.25000E-01 HE 1.11455E-01 1.11111E-01 WAW 1.00325E-01 1.00000E-01 ZAYIN 9.10220E-02 9.09091E-02 HETH 6.33430E-02 rest
[edit] ALGOL 68
INT trials = 1 000 000;
MODE LREAL = LONG REAL;
MODE ITEM = STRUCT(
STRING name,
INT prob count,
LREAL expect,
mapping
);
INT col width = 9;
FORMAT real repr = $g(-col width+1, 6)$,
item repr = $"Name: "g", Prob count: "g(0)", Expect: "f(real repr)", Mapping: ", f(real repr)l$;
[8]ITEM items := (
( "aleph", 0, ~, ~ ),
( "beth", 0, ~, ~ ),
( "gimel", 0, ~, ~ ),
( "daleth", 0, ~, ~ ),
( "he", 0, ~, ~ ),
( "waw", 0, ~, ~ ),
( "zayin", 0, ~, ~ ),
( "heth", 0, ~, ~ )
);
main:
(
LREAL offset = 5; # const #
# initialise items #
LREAL total sum := 0;
FOR i FROM LWB items TO UPB items - 1 DO
expect OF items[i] := 1/(i-1+offset);
total sum +:= expect OF items[i]
OD;
expect OF items[UPB items] := 1 - total sum;
mapping OF items[LWB items] := expect OF items[LWB items];
FOR i FROM LWB items + 1 TO UPB items DO
mapping OF items[i] := mapping OF items[i-1] + expect OF items[i]
OD;
# printf((item repr, items)) #
# perform the sampling #
PROC sample = (REF[]LREAL mapping)INT:(
INT out;
LREAL rand real = random;
FOR j FROM LWB items TO UPB items DO
IF rand real < mapping[j] THEN
out := j;
done
FI
OD;
done: out
);
FOR i TO trials DO
prob count OF items[sample(mapping OF items)] +:= 1
OD;
FORMAT indent = $17k$;
# print the results #
printf(($"Trials: "g(0)l$, trials));
printf(($"Items:"$,indent));
FOR i FROM LWB items TO UPB items DO printf(($gn(col width)k" "$, name OF items[i])) OD;
printf(($l"Target prob.:"$, indent, $f(real repr)" "$, expect OF items));
printf(($l"Attained prob.:"$, indent));
FOR i FROM LWB items TO UPB items DO printf(($f(real repr)" "$, prob count OF items[i]/trials)) OD;
printf($l$)
)
Sample output:
Trials: 1000000 Items: aleph beth gimel daleth he waw zayin heth Target prob.: 0.200000 0.166667 0.142857 0.125000 0.111111 0.100000 0.090909 0.063456 Attained prob.: 0.199987 0.166917 0.142531 0.124203 0.111338 0.099702 0.091660 0.063662
[edit] AutoHotkey
contributed by Laszlo on the ahk forum
s1 := "aleph", p1 := 1/5.0 ; Input
s2 := "beth", p2 := 1/6.0
s3 := "gimel", p3 := 1/7.0
s4 := "daleth", p4 := 1/8.0
s5 := "he", p5 := 1/9.0
s6 := "waw", p6 := 1/10.0
s7 := "zayin", p7 := 1/11.0
s8 := "heth", p8 := 1-p1-p2-p3-p4-p5-p6-p7
n := 8, r0 := 0, r%n% := 1 ; auxiliary data
Loop % n-1
i := A_Index-1, r%A_Index% := r%i% + p%A_Index% ; cummulative distribution
Loop 1000000 {
Random R, 0, 1.0
Loop %n% ; linear search
If (R < r%A_Index%) {
c%A_Index%++
Break
}
}
; Output
Loop %n%
t .= s%A_Index% "`t" p%A_Index% "`t" c%A_Index%*1.0e-6 "`n"
Msgbox %t%
/*
output:
---------------------------
aleph 0.200000 0.199960
beth 0.166667 0.166146
gimel 0.142857 0.142624
daleth 0.125000 0.124924
he 0.111111 0.111226
waw 0.100000 0.100434
zayin 0.090909 0.091344
heth 0.063456 0.063342
---------------------------
*/
[edit] AWK
#!/usr/bin/awk -f
BEGIN {
ITERATIONS = 1000000
delete symbMap
delete probMap
delete counts
initData();
for (i = 0; i < ITERATIONS; i++) {
distribute(rand())
}
showDistributions()
exit
}
function distribute(rnd, cnt, symNum, sym, symPrb) {
cnt = length(symbMap)
for (symNum = 1; symNum <= cnt; symNum++) {
sym = symbMap[symNum];
symPrb = probMap[sym];
rnd -= symPrb;
if (rnd <= 0) {
counts[sym]++
return;
}
}
}
function showDistributions( s, sym, prb, actSum, expSum, totItr) {
actSum = 0.0
expSum = 0.0
totItr = 0
printf "%-7s %-7s %-5s %-5s\n", "symb", "num.", "act.", "expt."
print "------- ------- ----- -----"
for (s = 1; s <= length(symbMap); s++) {
sym = symbMap[s]
prb = counts[sym]/ITERATIONS
actSum += prb
expSum += probMap[sym]
totItr += counts[sym]
printf "%-7s %7d %1.3f %1.3f\n", sym, counts[sym], prb, probMap[sym]
}
print "------- ------- ----- -----"
printf "Totals: %7d %1.3f %1.3f\n", totItr, actSum, expSum
}
function initData( sym) {
srand()
probMap["aleph"] = 1.0 / 5.0
probMap["beth"] = 1.0 / 6.0
probMap["gimel"] = 1.0 / 7.0
probMap["daleth"] = 1.0 / 8.0
probMap["he"] = 1.0 / 9.0
probMap["waw"] = 1.0 / 10.0
probMap["zyin"] = 1.0 / 11.0
probMap["heth"] = 1759.0 / 27720.0
symbMap[1] = "aleph"
symbMap[2] = "beth"
symbMap[3] = "gimel"
symbMap[4] = "daleth"
symbMap[5] = "he"
symbMap[6] = "waw"
symbMap[7] = "zyin"
symbMap[8] = "heth"
for (sym in probMap)
counts[sym] = 0;
}
Example output:
symb num. act. expt. ------- ------- ----- ----- aleph 200598 0.201 0.200 beth 166317 0.166 0.167 gimel 142391 0.142 0.143 daleth 125051 0.125 0.125 he 110658 0.111 0.111 waw 100464 0.100 0.100 zyin 90649 0.091 0.091 heth 63872 0.064 0.063 ------- ------- ----- ----- Totals: 1000000 1.000 1.000
Rounding off makes the results look perfect.
[edit] BBC BASIC
DIM item$(7), prob(7), cnt%(7)
item$() = "aleph","beth","gimel","daleth","he","waw","zayin","heth"
prob() = 1/5.0, 1/6.0, 1/7.0, 1/8.0, 1/9.0, 1/10.0, 1/11.0, 1759/27720
IF ABS(SUM(prob())-1) > 1E-6 ERROR 100, "Probabilities don't sum to 1"
FOR trial% = 1 TO 1E6
r = RND(1)
p = 0
FOR i% = 0 TO DIM(prob(),1)
p += prob(i%)
IF r < p cnt%(i%) += 1 : EXIT FOR
NEXT
NEXT
@% = &2060A
PRINT "Item actual theoretical"
FOR i% = 0 TO DIM(item$(),1)
PRINT item$(i%), cnt%(i%)/1E6, prob(i%)
NEXT
Output:
Item actual theoretical aleph 0.200306 0.200000 beth 0.165963 0.166667 gimel 0.143089 0.142857 daleth 0.125387 0.125000 he 0.111057 0.111111 waw 0.100098 0.100000 zayin 0.091031 0.090909 heth 0.063069 0.063456
[edit] C
#include <stdio.h>output
#include <stdlib.h>
/* pick a random index from 0 to n-1, according to probablities listed
in p[] which is assumed to have a sum of 1. The values in the probablity
list matters up to the point where the sum goes over 1 */
int rand_idx(double *p, int n)
{
double s = rand() / (RAND_MAX + 1.0);
int i;
for (i = 0; i < n - 1 && (s -= p[i]) >= 0; i++);
return i;
}
#define LEN 8
#define N 1000000
int main()
{
const char *names[LEN] = { "aleph", "beth", "gimel", "daleth",
"he", "waw", "zayin", "heth" };
double s, p[LEN] = { 1./5, 1./6, 1./7, 1./8, 1./9, 1./10, 1./11, 1e300 };
int i, count[LEN] = {0};
for (i = 0; i < N; i++) count[rand_idx(p, LEN)] ++;
printf(" Name Count Ratio Expected\n");
for (i = 0, s = 1; i < LEN; s -= p[i++])
printf("%6s%7d %7.4f%% %7.4f%%\n",
names[i], count[i],
(double)count[i] / N * 100,
((i < LEN - 1) ? p[i] : s) * 100);
return 0;
}
Name Count Ratio Expected
aleph 199928 19.9928% 20.0000%
beth 166489 16.6489% 16.6667%
gimel 143211 14.3211% 14.2857%
daleth 125257 12.5257% 12.5000%
he 110849 11.0849% 11.1111%
waw 99935 9.9935% 10.0000%
zayin 91001 9.1001% 9.0909%
heth 63330 6.3330% 6.3456%
[edit] C++
#include <cstdlib>
#include <iostream>
#include <vector>
#include <utility>
#include <algorithm>
#include <ctime>
#include <iomanip>
int main( ) {
typedef std::vector<std::pair<std::string, double> >::const_iterator SPI ;
typedef std::vector<std::pair<std::string , double> > ProbType ;
ProbType probabilities ;
probabilities.push_back( std::make_pair( "aleph" , 1/5.0 ) ) ;
probabilities.push_back( std::make_pair( "beth" , 1/6.0 ) ) ;
probabilities.push_back( std::make_pair( "gimel" , 1/7.0 ) ) ;
probabilities.push_back( std::make_pair( "daleth" , 1/8.0 ) ) ;
probabilities.push_back( std::make_pair( "he" , 1/9.0 ) ) ;
probabilities.push_back( std::make_pair( "waw" , 1/10.0 ) ) ;
probabilities.push_back( std::make_pair( "zayin" , 1/11.0 ) ) ;
probabilities.push_back( std::make_pair( "heth" , 1759/27720.0 ) ) ;
std::vector<std::string> generated ; //for the strings that are generatod
std::vector<int> decider ; //holds the numbers that determine the choice of letters
for ( int i = 0 ; i < probabilities.size( ) ; i++ ) {
if ( i == 0 ) {
decider.push_back( 27720 * (probabilities[ i ].second) ) ;
}
else {
int number = 0 ;
for ( int j = 0 ; j < i ; j++ ) {
number += 27720 * ( probabilities[ j ].second ) ;
}
number += 27720 * probabilities[ i ].second ;
decider.push_back( number ) ;
}
}
srand( time( 0 ) ) ;
for ( int i = 0 ; i < 1000000 ; i++ ) {
int randnumber = rand( ) % 27721 ;
int j = 0 ;
while ( randnumber > decider[ j ] )
j++ ;
generated.push_back( ( probabilities[ j ]).first ) ;
}
std::cout << "letter frequency attained frequency expected\n" ;
for ( SPI i = probabilities.begin( ) ; i != probabilities.end( ) ; i++ ) {
std::cout << std::left << std::setw( 8 ) << i->first ;
int found = std::count ( generated.begin( ) , generated.end( ) , i->first ) ;
std::cout << std::left << std::setw( 21 ) << found / 1000000.0 ;
std::cout << std::left << std::setw( 17 ) << i->second << '\n' ;
}
return 0 ;
}
Output:
letter frequency attained frequency expected aleph 0.200089 0.2 beth 0.16695 0.166667 gimel 0.142693 0.142857 daleth 0.124859 0.125 he 0.111258 0.111111 waw 0.099665 0.1 zayin 0.090654 0.0909091 heth 0.063832 0.063456
[edit] C#
using System;
class Program
{
static long TRIALS = 1000000L;
private class Expv
{
public string name;
public int probcount;
public double expect;
public double mapping;
public Expv(string name, int probcount, double expect, double mapping)
{
this.name = name;
this.probcount = probcount;
this.expect = expect;
this.mapping = mapping;
}
}
static Expv[] items = {
new Expv("aleph", 0, 0.0, 0.0), new Expv("beth", 0, 0.0, 0.0),
new Expv("gimel", 0, 0.0, 0.0), new Expv("daleth", 0, 0.0, 0.0),
new Expv("he", 0, 0.0, 0.0), new Expv("waw", 0, 0.0, 0.0),
new Expv("zayin", 0, 0.0, 0.0), new Expv("heth", 0, 0.0, 0.0)
};
static void Main(string[] args)
{
double rnum, tsum = 0.0;
Random random = new Random();
for (int i = 0, rnum = 5.0; i < 7; i++, rnum += 1.0)
{
items[i].expect = 1.0 / rnum;
tsum += items[i].expect;
}
items[7].expect = 1.0 - tsum;
items[0].mapping = 1.0 / 5.0;
for (int i = 1; i < 7; i++)
items[i].mapping = items[i - 1].mapping + 1.0 / ((double)i + 5.0);
items[7].mapping = 1.0;
for (int i = 0; i < TRIALS; i++)
{
rnum = random.NextDouble();
for (int j = 0; j < 8; j++)
if (rnum < items[j].mapping)
{
items[j].probcount++;
break;
}
}
Console.WriteLine("Trials: {0}", TRIALS);
Console.Write("Items: ");
for (int i = 0; i < 8; i++)
Console.Write(items[i].name.PadRight(9));
Console.WriteLine();
Console.Write("Target prob.: ");
for (int i = 0; i < 8; i++)
Console.Write("{0:0.000000} ", items[i].expect);
Console.WriteLine();
Console.Write("Attained prob.: ");
for (int i = 0; i < 8; i++)
Console.Write("{0:0.000000} ", (double)items[i].probcount / (double)TRIALS);
Console.WriteLine();
}
}
Output:
Trials: 1000000 Items: aleph beth gimel daleth he waw zayin heth Target prob.: 0.200000 0.166667 0.142857 0.125000 0.111111 0.100000 0.090909 0.063456 Attained prob.: 0.199975 0.166460 0.142290 0.125510 0.111374 0.100018 0.090746 0.063627
[edit] Clojure
Works by first converting the provided Probability Distribution Function into a Cumulative Distribution Function, so that it can simply scan through the CDF list and return the current item as soon as the CDF at that point is greater than the random number generated. The code could be made more concise by skipping this step and instead tracking the whole PDF for each random number; but this code is both faster and more readable.
It uses the language built-in (frequencies) to count the number of occurrences of each distinct name. Note that while we actually generate a sequence of num-trials random samples, the sequence is lazily generated and lazily consumed. This means that the program will scale to an arbitrarily-large num-trials with no ill effects, by throwing away elements it's already processed.
(defn to-cdf [pdf]
(reduce
(fn [acc n] (conj acc (+ (or (last acc) 0) n)))
[]
pdf))
(defn choose [cdf]
(let [r (rand)]
(count
(filter (partial > r) cdf))))
(def *names* '[aleph beth gimel daleth he waw zayin heth])
(def *pdf* (map double [1/5 1/6 1/7 1/8 1/9 1/10 1/11 1759/27720]))
(let [num-trials 1000000
cdf (to-cdf *pdf*)
indexes (range (count *names*)) ;; use integer key internally, not name
expected (into (sorted-map) (zipmap indexes *pdf*))
actual (frequencies (repeatedly num-trials #(choose cdf)))]
(doseq [[idx exp] expected]
(println "Expected number of" (*names* idx) "was"
(* num-trials exp) "and actually got" (actual idx))))
Expected number of aleph was 200000.0 and actually got 199300 Expected number of beth was 166666.66666666672 and actually got 166291 Expected number of gimel was 142857.1428571429 and actually got 143297 Expected number of daleth was 125000.0 and actually got 125032 Expected number of he was 111111.11111111111 and actually got 111540 Expected number of waw was 100000.0 and actually got 100062 Expected number of zayin was 90909.09090909091 and actually got 90719 Expected number of heth was 63455.98845598846 and actually got 63759
[edit] Common Lisp
This is a straightforward, if a little verbose implementation based upon the Perl one.
(defvar *probabilities* '((aleph 1/5)
(beth 1/6)
(gimel 1/7)
(daleth 1/8)
(he 1/9)
(waw 1/10)
(zayin 1/11)
(heth 1759/27720)))
(defun calculate-probabilities (choices &key (repetitions 1000000))
(assert (= 1 (reduce #'+ choices :key #'second)))
(labels ((make-ranges ()
(loop for (datum probability) in choices
sum (coerce probability 'double-float) into total
collect (list datum total)))
(pick (ranges)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(loop with random = (random 1.0d0)
for (datum below) of-type (t double-float) in ranges
when (< random below)
do (return datum)))
(populate-hash (ranges)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(loop repeat (the fixnum repetitions)
with hash = (make-hash-table)
do (incf (the fixnum (gethash (pick ranges) hash 0)))
finally (return hash)))
(make-table-data (hash)
(loop for (datum probability) in choices
collect (list datum
(float (/ (gethash datum hash)
repetitions))
(float probability)))))
(format t "Datum~10,2TOccured~20,2TExpected~%")
(format t "~{~{~A~10,2T~F~20,2T~F~}~%~}"
(make-table-data (populate-hash (make-ranges))))))
CL-USER> (calculate-probabilities *probabilities*)
Datum Occured Expected
ALEPH 0.200156 0.2
BETH 0.166521 0.16666667
GIMEL 0.142936 0.14285715
DALETH 0.124779 0.125
HE 0.111601 0.11111111
WAW 0.100068 0.1
ZAYIN 0.090458 0.09090909
HETH 0.063481 0.06345599
[edit] D
import std.stdio, std.random, std.string, std.range;
void main() {
enum int nTrials = 1_000_000;
enum items = "aleph beth gimel daleth he waw zayin heth".split();
enum pr = [1/5., 1/6., 1/7., 1/8., 1/9., 1/10., 1/11., 1759/27720.];
double[pr.length] counts = 0.0;
foreach (_; 0 .. nTrials)
counts[dice(pr)]++;
writeln("Item Target prob Attained prob");
foreach (name, p, co; zip(items, pr, counts[]))
writefln("%-7s %.8f %.8f", name, p, co / nTrials);
}
Output:
Item Target prob Attained prob aleph 0.20000000 0.19932700 beth 0.16666667 0.16677800 gimel 0.14285714 0.14317100 daleth 0.12500000 0.12518100 he 0.11111111 0.11118200 waw 0.10000000 0.10023800 zayin 0.09090909 0.09081600 heth 0.06345598 0.06330700
A faster version:
import std.stdio, std.random, std.algorithm, std.range;
void main() {
enum int nTrials = 1_000_000;
auto items = "aleph beth gimel daleth he waw zayin heth".split();
enum pr = [1/5., 1/6., 1/7., 1/8., 1/9., 1/10., 1/11., 1759/27720.];
double[pr.length] cumulatives = pr[];
foreach (i, ref c; cumulatives[1 .. $ - 1])
c += cumulatives[i];
cumulatives[$ - 1] = 1.0;
double[pr.length] counts = 0.0;
auto rnd = Xorshift(unpredictableSeed());
foreach (_; 0 .. nTrials) {
double rnd01 = rnd.front / cast(double)rnd.max;
rnd.popFront();
counts[cumulatives[].countUntil!(c => c >= rnd01)()]++;
}
writeln("Item Target prob Attained prob");
foreach (name, p, co; zip(items, pr, counts[]))
writefln("%-7s %.8f %.8f", name, p, co / nTrials);
}
[edit] E
This implementation converts the list of probabilities to sub-intervals of [0.0,1.0), then arranges those intervals in a binary tree for searching based on a random number input.
It is rather verbose, due to using the tree rather than a linear search, and having code to print the tree (which was used to debug it).
pragma.syntax("0.9")
First, the algorithm:
/** Makes leaves of the binary tree */
def leaf(value) {
return def leaf {
to run(_) { return value }
to __printOn(out) { out.print("=> ", value) }
}
}
/** Makes branches of the binary tree */
def split(leastRight, left, right) {
return def tree {
to run(specimen) {
return if (specimen < leastRight) {
left(specimen)
} else {
right(specimen)
}
}
to __printOn(out) {
out.print(" ")
out.indent().print(left)
out.lnPrint("< ")
out.print(leastRight)
out.indent().lnPrint(right)
}
}
}
def makeIntervalTree(assocs :List[Tuple[any, float64]]) {
def size :int := assocs.size()
if (size > 1) {
def midpoint := size // 2
return split(assocs[midpoint][1], makeIntervalTree(assocs.run(0, midpoint)),
makeIntervalTree(assocs.run(midpoint)))
} else {
def <nowiki>[[value, _]] := assocs</nowiki>
return leaf(value)
}
}
def setupProbabilisticChoice(entropy, table :Map[any, float64]) {
var cumulative := 0.0
var intervalTable := []
for value => probability in table {
intervalTable with= [value, cumulative]
cumulative += probability
}
def total := cumulative
def selector := makeIntervalTree(intervalTable)
return def probChoice {
# Multiplying by the total helps correct for any error in the sum of the inputs
to run() { return selector(entropy.nextDouble() * total) }
to __printOn(out) {
out.print("Probabilistic choice using tree:")
out.indent().lnPrint(selector)
}
}
}
Then the test setup:
def rosetta := setupProbabilisticChoice(entropy, def probTable := [
"aleph" => 1/5,
"beth" => 1/6.0,
"gimel" => 1/7.0,
"daleth" => 1/8.0,
"he" => 1/9.0,
"waw" => 1/10.0,
"zayin" => 1/11.0,
"heth" => 0.063455988455988432,
])
var trials := 1000000
var timesFound := [].asMap()
for i in 1..trials {
if (i % 1000 == 0) { print(`${i//1000} `) }
def value := rosetta()
timesFound with= (value, timesFound.fetch(value, fn { 0 }) + 1)
}
stdout.println()
for item in probTable.domain() {
stdout.print(item, "\t", timesFound[item] / trials, "\t", probTable[item], "\n")
}
[edit] Euphoria
constant MAX = #3FFFFFFF
constant times = 1e6
atom d,e
sequence Mapps
Mapps = {
{ "aleph", 1/5, 0},
{ "beth", 1/6, 0},
{ "gimel", 1/7, 0},
{ "daleth", 1/8, 0},
{ "he", 1/9, 0},
{ "waw", 1/10, 0},
{ "zayin", 1/11, 0},
{ "heth", 1759/27720, 0}
}
for i = 1 to times do
d = (rand(MAX)-1)/MAX
e = 0
for j = 1 to length(Mapps) do
e += Mapps[j][2]
if d <= e then
Mapps[j][3] += 1
exit
end if
end for
end for
printf(1,"Sample times: %d\n",times)
for j = 1 to length(Mapps) do
d = Mapps[j][3]/times
printf(1,"%-7s should be %f is %f | Deviatation %6.3f%%\n",
{Mapps[j][1],Mapps[j][2],d,(1-Mapps[j][2]/d)*100})
end for
Output:
Sample times: 1000000 aleph should be 0.200000 is 0.200492 | Deviatation 0.245% beth should be 0.166667 is 0.166855 | Deviatation 0.113% gimel should be 0.142857 is 0.143169 | Deviatation 0.218% daleth should be 0.125000 is 0.124923 | Deviatation -0.062% he should be 0.111111 is 0.110511 | Deviatation -0.543% waw should be 0.100000 is 0.099963 | Deviatation -0.037% zayin should be 0.090909 is 0.090647 | Deviatation -0.289% heth should be 0.063456 is 0.063440 | Deviatation -0.025%
[edit] Factor
USING: arrays assocs combinators.random io kernel macros math
math.statistics prettyprint quotations sequences sorting formatting ;
IN: rosettacode.proba
CONSTANT: data
{
{ "aleph" 1/5.0 }
{ "beth" 1/6.0 }
{ "gimel" 1/7.0 }
{ "daleth" 1/8.0 }
{ "he" 1/9.0 }
{ "waw" 1/10.0 }
{ "zayin" 1/11.0 }
{ "heth" f }
}
MACRO: case-probas ( data -- case-probas )
[ first2 [ swap 1quotation 2array ] [ 1quotation ] if* ] map 1quotation ;
: expected ( name data -- float )
2dup at [ 2nip ] [ nip values sift sum 1 swap - ] if* ;
: generate ( # case-probas -- seq )
H{ } clone
[ [ [ casep ] [ inc-at ] bi* ] 2curry times ] keep ; inline
: normalize ( seq # -- seq )
[ clone ] dip [ /f ] curry assoc-map ;
: summarize1 ( name value data -- )
[ over ] dip expected
"%6s: %10f %10f\n" printf ;
: summarize ( generated data -- )
"Key" "Value" "expected" "%6s %10s %10s\n" printf
[ summarize1 ] curry assoc-each ;
: generate-normalized ( # proba -- seq )
[ generate ] [ drop normalize ] 2bi ; inline
: example ( # data -- )
[ case-probas generate-normalized ]
[ summarize ] bi ; inline
In a REPL:
USE: rosettacode.proba
1000000 data example
outputs
Key Value expected
heth: 0.063469 0.063456
waw: 0.100226 0.100000
daleth: 0.125844 0.125000
beth: 0.166264 0.166667
zayin: 0.090806 0.090909
he: 0.110562 0.111111
aleph: 0.199868 0.200000
gimel: 0.142961 0.142857
[edit] Forth
include random.fs
\ common factors of desired probabilities (1/5 .. 1/11)
2 2 * 2 * 3 * 3 * 5 * 7 * 11 * constant denom \ 27720
\ represent each probability as the numerator with 27720 as the denominator
: ,numerators ( max min -- )
do denom i / , loop ;
\ final item is 27720 - sum(probs)
: ,remainder ( denom addr len -- )
cells bounds do i @ - 1 cells +loop , ;
create probs 12 5 ,numerators denom probs 7 ,remainder
create bins 8 cells allot
: choose ( -- 0..7 )
denom random
8 0 do
probs i cells + @ -
dup 0< if drop i unloop exit then
loop
abort" can't get here" ;
: trials ( n -- )
0 do 1 bins choose cells + +! loop ;
: str-table
create ( c-str ... n -- ) 0 do , loop
does> ( n -- str len ) swap cells + @ count ;
here ," heth" here ," zayin" here ," waw" here ," he"
here ," daleth" here ," gimel" here ," beth" here ," aleph"
8 str-table names
: .header
cr ." Name" #tab emit ." Prob" #tab emit ." Actual" #tab emit ." Error" ;
: .result ( n -- )
cr dup names type #tab emit
dup cells probs + @ s>f denom s>f f/ fdup f. #tab emit
dup cells bins + @ s>f 1e6 f/ fdup f. #tab emit
f- fabs fs. ;
: .results .header 8 0 do i .result loop ;
bins 8 cells erase 3 set-precision 1000000 trials .results Name Prob Actual Error aleph 0.2 0.2 9.90E-5 beth 0.167 0.167 4.51E-4 gimel 0.143 0.142 4.99E-4 daleth 0.125 0.125 1.82E-4 he 0.111 0.111 2.10E-4 waw 0.1 0.1 3.30E-5 zayin 0.0909 0.0912 2.77E-4 heth 0.0635 0.0636 9.70E-5 ok
[edit] Fortran
PROGRAM PROBS
IMPLICIT NONE
INTEGER, PARAMETER :: trials = 1000000
INTEGER :: i, j, probcount(8) = 0
REAL :: expected(8), mapping(8), rnum
CHARACTER(6) :: items(8) = (/ "aleph ", "beth ", "gimel ", "daleth", "he ", "waw ", "zayin ", "heth " /)
expected(1:7) = (/ (1.0/i, i=5,11) /)
expected(8) = 1.0 - SUM(expected(1:7))
mapping(1) = 1.0 / 5.0
DO i = 2, 7
mapping(i) = mapping(i-1) + 1.0/(i+4.0)
END DO
mapping(8) = 1.0
DO i = 1, trials
CALL RANDOM_NUMBER(rnum)
DO j = 1, 8
IF (rnum < mapping(j)) THEN
probcount(j) = probcount(j) + 1
EXIT
END IF
END DO
END DO
WRITE(*, "(A,I10)") "Trials: ", trials
WRITE(*, "(A,8A10)") "Items: ", items
WRITE(*, "(A,8F10.6)") "Target Probability: ", expected
WRITE(*, "(A,8F10.6)") "Attained Probability:", REAL(probcount) / REAL(trials)
ENDPROGRAM PROBS
Sample Output:
Trials: 1000000 Items: aleph beth gimel daleth he waw zayin heth Target Probability: 0.200000 0.166667 0.142857 0.125000 0.111111 0.100000 0.090909 0.063456 Attained Probability: 0.199631 0.166907 0.142488 0.124920 0.110906 0.099943 0.091775 0.063430
[edit] Go
package main
import (
"fmt"
"math/rand"
"time"
)
type mapping struct {
item string
pr float64
}
func main() {
// input mapping
m := []mapping{
{"aleph", 1 / 5.},
{"beth", 1 / 6.},
{"gimel", 1 / 7.},
{"daleth", 1 / 8.},
{"he", 1 / 9.},
{"waw", 1 / 10.},
{"zayin", 1 / 11.},
{"heth", 1759 / 27720.}} // adjusted so that probabilities add to 1
// cumulative probability
cpr := make([]float64, len(m)-1)
var c float64
for i := 0; i < len(m)-1; i++ {
c += m[i].pr
cpr[i] = c
}
// generate
const samples = 1e6
occ := make([]int, len(m))
rand.Seed(time.Now().UnixNano())
for i := 0; i < samples; i++ {
r := rand.Float64()
for j := 0; ; j++ {
if r < cpr[j] {
occ[j]++
break
}
if j == len(cpr)-1 {
occ[len(cpr)]++
break
}
}
}
// report
fmt.Println(" Item Target Generated")
var totalTarget, totalGenerated float64
for i := 0; i < len(m); i++ {
target := m[i].pr
generated := float64(occ[i]) / samples
fmt.Printf("%6s %8.6f %8.6f\n", m[i].item, target, generated)
totalTarget += target
totalGenerated += generated
}
fmt.Printf("Totals %8.6f %8.6f\n", totalTarget, totalGenerated)
}
Output:
Item Target Generated
aleph 0.200000 0.199509
beth 0.166667 0.167194
gimel 0.142857 0.143293
daleth 0.125000 0.124869
he 0.111111 0.110896
waw 0.100000 0.099849
zayin 0.090909 0.090789
heth 0.063456 0.063601
Totals 1.000000 1.000000
[edit] Haskell
import System.Random
import Data.List
import Control.Monad
import Control.Arrow
labels = ["aleph", "beth", "gimel", "daleth", "he", "waw", "zayin", "heth" ]
piv n = take n . (++ repeat ' ')
main = do
g <- newStdGen
let rs,ps :: [Float]
rs = take 1000000 $ randomRs(0,1) g
ps = ap (++) (return. (1 -) .sum) $ map recip [5..11]
sps = scanl1 (+) ps
qs = (\xs -> map ((/1000000.0).fromIntegral.length. flip filter xs. (==))sps)
$ map (head . flip dropWhile sps . (>)) rs
putStrLn $ " expected actual"
mapM_ putStrLn $ zipWith3
(\l s c-> (piv 7 l) ++ (piv 13 $ show $ s)
++(piv 12 $ show $ c)) labels ps qs
Example run:
*Main> main
expected actual
aleph 0.2 0.201063
beth 0.16666667 0.166593
gimel 0.14285715 0.142174
daleth 0.125 0.124918
he 0.11111111 0.110926
waw 0.1 9.984e-2
zayin 9.090909e-2 9.1111e-2
heth 6.345594e-2 6.3375e-2
[edit] HicEst
REAL :: trials=1E6, n=8, map(n), limit(n), expected(n), outcome(n)
expected = 1 / ($ + 4)
expected(n) = 1 - SUM(expected) + expected(n)
map = expected
map = map($) + map($-1)
DO i = 1, trials
random = RAN(1)
limit = random > map
item = INDEX(limit, 0)
outcome(item) = outcome(item) + 1
ENDDO
outcome = outcome / trials
DLG(Text=expected, Text=outcome, Y=0)
Exported from the spreadsheet-like DLG function:
0.2 0.199908 0.1666667 0.166169 0.1428571 0.142722 0.125 0.124929 0.1111111 0.111706 0.1 0.099863 0.0909091 0.090965 0.063456 0.063738
[edit] Icon and Unicon
record Item(value, probability)
procedure find_item (items, v)
sum := 0.0
every item := !items do {
if v < sum+item.probability
then return item.value
else sum +:= item.probability
}
fail # v exceeded 1.0
end
# -- helper procedures
# count the number of occurrences of i in list l,
# assuming the items are strings
procedure count (l, i)
result := 0.0
every x := !l do
if x == i then result +:= 1
return result
end
procedure rand_float ()
return ?1000/1000.0
end
# -- test the procedure
procedure main ()
items := [
Item("aleph", 1/5.0),
Item("beth", 1/6.0),
Item("gimel", 1/7.0),
Item("daleth", 1/8.0),
Item("he", 1/9.0),
Item("waw", 1/10.0),
Item("zayin", 1/11.0),
Item("heth", 1759/27720.0)
]
# collect a sample of results
sample := []
every (1 to 1000000) do push (sample, find_item(items, rand_float ()))
# return comparison of expected vs actual probability
every item := !items do
write (right(item.value, 7) || " " ||
left(item.probability, 15) || " " ||
left(count(sample, item.value)/*sample, 6))
end
Output:
aleph 0.2 0.1988
beth 0.1666666667 0.1676
gimel 0.1428571429 0.1431
daleth 0.125 0.1249
he 0.1111111111 0.1112
waw 0.1 0.0996
zayin 0.09090909091 0.0908
heth 0.06345598846 0.0636
[edit] J
main=: verb define
hdr=. ' target actual '
lbls=. ; ,:&.> ;:'aleph beth gimel daleth he waw zayin heth'
prtn=. +/\ pt=. (, 1-+/)1r1%5+i.7
da=. prtn I. ?y # 0
pa=. y%~ +/ da =/ i.8
hdr, lbls,. 9j6 ": |: pt,:pa
)
Note 'named abbreviations'
hdr (header)
lbls (labels)
pt (target proportions)
prtn (partitions corresponding to target proportions)
da (distribution of actual values among partitions)
pa (actual proportions)
)
Example use:
main 1e6
target actual
aleph 0.200000 0.200344
beth 0.166667 0.166733
gimel 0.142857 0.142611
daleth 0.125000 0.124458
he 0.111111 0.111455
waw 0.100000 0.099751
zayin 0.090909 0.091121
heth 0.063456 0.063527
Note that there is no rounding error in summing the proportions, as they are represented as rational numbers, not floating-point approximations.
pt=. (, 1-+/)1r1%5+i.7
pt
1r5 1r6 1r7 1r8 1r9 1r10 1r11 1759r27720
+/pt
1
[edit] Java
public class Prob{
static long TRIALS= 1000000;
private static class Expv{
public String name;
public int probcount;
public double expect;
public double mapping;
public Expv(String name, int probcount, double expect, double mapping){
this.name= name;
this.probcount= probcount;
this.expect= expect;
this.mapping= mapping;
}
}
static Expv[] items=
{new Expv("aleph", 0, 0.0, 0.0), new Expv("beth", 0, 0.0, 0.0),
new Expv("gimel", 0, 0.0, 0.0),
new Expv("daleth", 0, 0.0, 0.0),
new Expv("he", 0, 0.0, 0.0), new Expv("waw", 0, 0.0, 0.0),
new Expv("zayin", 0, 0.0, 0.0),
new Expv("heth", 0, 0.0, 0.0)};
public static void main(String[] args){
int i, j;
double rnum, tsum= 0.0;
for(i= 0, rnum= 5.0;i < 7;i++, rnum+= 1.0){
items[i].expect= 1.0 / rnum;
tsum+= items[i].expect;
}
items[7].expect= 1.0 - tsum;
items[0].mapping= 1.0 / 5.0;
for(i= 1;i < 7;i++){
items[i].mapping= items[i - 1].mapping + 1.0 / ((double)i + 5.0);
}
items[7].mapping= 1.0;
for(i= 0;i < TRIALS;i++){
rnum= Math.random();
for(j= 0;j < 8;j++){
if(rnum < items[j].mapping){
items[j].probcount++;
break;
}
}
}
System.out.printf("Trials: %d\n", TRIALS);
System.out.printf("Items: ");
for(i= 0;i < 8;i++)
System.out.printf("%-8s ", items[i].name);
System.out.printf("\nTarget prob.: ");
for(i= 0;i < 8;i++)
System.out.printf("%8.6f ", items[i].expect);
System.out.printf("\nAttained prob.: ");
for(i= 0;i < 8;i++)
System.out.printf("%8.6f ", (double)(items[i].probcount)
/ (double)TRIALS);
System.out.printf("\n");
}
}
Output:
Trials: 1000000 Items: aleph beth gimel daleth he waw zayin heth Target prob.: 0.200000 0.166667 0.142857 0.125000 0.111111 0.100000 0.090909 0.063456 Attained prob.: 0.199615 0.167517 0.142612 0.125211 0.110970 0.099614 0.091002 0.063459
import java.util.EnumMap;
public class Prob {
public static long TRIALS= 1000000;
public enum Glyph{
ALEPH, BETH, GIMEL, DALETH, HE, WAW, ZAYIN, HETH;
}
public static EnumMap<Glyph, Double> probs = new EnumMap<Glyph, Double>(Glyph.class){{
put(Glyph.ALEPH, 1/5.0);
put(Glyph.BETH, 1/6.0);
put(Glyph.GIMEL, 1/7.0);
put(Glyph.DALETH, 1/8.0);
put(Glyph.HE, 1/9.0);
put(Glyph.WAW, 1/10.0);
put(Glyph.ZAYIN, 1/11.0);
put(Glyph.HETH, 1759./27720);
}};
public static EnumMap<Glyph, Double> counts = new EnumMap<Glyph, Double>(Glyph.class){{
put(Glyph.ALEPH, 0.);put(Glyph.BETH, 0.);
put(Glyph.GIMEL, 0.);put(Glyph.DALETH, 0.);
put(Glyph.HE, 0.);put(Glyph.WAW, 0.);
put(Glyph.ZAYIN, 0.);put(Glyph.HETH, 0.);
}};
public static void main(String[] args){
System.out.println("Target probabliities:\t" + probs);
for(long i = 0; i < TRIALS; i++){
Glyph choice = getChoice();
counts.put(choice, counts.get(choice) + 1);
}
//correct the counts to probablities in (0..1]
for(Glyph glyph:counts.keySet()){
counts.put(glyph, counts.get(glyph) / TRIALS);
}
System.out.println("Actual probabliities:\t" + counts);
}
private static Glyph getChoice() {
double rand = Math.random();
for(Glyph item:Glyph.values()){
if(rand < probs.get(item)){
return item;
}
rand -= probs.get(item);
}
return null;
}
}
Output:
Target probabliities: {ALEPH=0.2, BETH=0.16666666666666666, GIMEL=0.14285714285714285, DALETH=0.125, HE=0.1111111111111111, WAW=0.1, ZAYIN=0.09090909090909091, HETH=0.06345598845598846}
Actual probabliities: {ALEPH=0.200794, BETH=0.165916, GIMEL=0.143286, DALETH=0.124727, HE=0.110818, WAW=0.100168, ZAYIN=0.090878, HETH=0.063413}
[edit] JavaScript
Fortunately, iterating over properties added to an object maintains the insertion order.
var probabilities = {
aleph: 1/5.0,
beth: 1/6.0,
gimel: 1/7.0,
daleth: 1/8.0,
he: 1/9.0,
waw: 1/10.0,
zayin: 1/11.0,
heth: 1759/27720
};
var sum = 0;
var iterations = 1000000;
var cumulative = {};
var randomly = {};
for (var name in probabilities) {
sum += probabilities[name];
cumulative[name] = sum;
randomly[name] = 0;
}
for (var i = 0; i < iterations; i++) {
var r = Math.random();
for (var name in cumulative) {
if (r <= cumulative[name]) {
randomly[name]++;
break;
}
}
}
for (var name in probabilities)
// using WSH
WScript.Echo(name + "\t" + probabilities[name] + "\t" + randomly[name]/iterations);
output:
aleph 0.2 0.200597 beth 0.16666666666666666 0.166527 gimel 0.14285714285714285 0.142646 daleth 0.125 0.124613 he 0.1111111111111111 0.111342 waw 0.1 0.099888 zayin 0.09090909090909091 0.091141 heth 0.06345598845598846 0.063246
[edit] Liberty BASIC
names$="aleph beth gimel daleth he waw zayin heth"
dim sum(8)
dim counter(8)
s = 0
for i = 1 to 7
s = s+1/(i+4)
sum(i)=s
next
N =1000000 ' number of throws
for i =1 to N
rand =rnd( 1)
for j = 1 to 7
if sum(j)> rand then exit for
next
counter(j)=counter(j)+1
next
print "Observed", "Intended"
for i = 1 to 8
print word$(names$, i), using( "#.#####", counter(i) /N), using( "#.#####", 1/(i+4))
next
[edit] Lua
items = {}
items["aleph"] = 1/5.0
items["beth"] = 1/6.0
items["gimel"] = 1/7.0
items["daleth"] = 1/8.0
items["he"] = 1/9.0
items["waw"] = 1/10.0
items["zayin"] = 1/11.0
items["heth"] = 1759/27720
num_trials = 1000000
samples = {}
for item, _ in pairs( items ) do
samples[item] = 0
end
math.randomseed( os.time() )
for i = 1, num_trials do
z = math.random()
for item, _ in pairs( items ) do
if z < items[item] then
samples[item] = samples[item] + 1
break;
else
z = z - items[item]
end
end
end
for item, _ in pairs( items ) do
print( item, samples[item]/num_trials, items[item] )
end
Output
gimel 0.142606 0.14285714285714 heth 0.063434 0.063455988455988 beth 0.166788 0.16666666666667 zayin 0.091097 0.090909090909091 daleth 0.124772 0.125 aleph 0.200541 0.2 he 0.1107 0.11111111111111 waw 0.100062 0.1
[edit] Mathematica
Built-in function can already do a weighted random choosing. Example for making a million random choices would be:
choices={{"aleph", 1/5},{"beth", 1/6},{"gimel", 1/7},{"daleth", 1/8},{"he", 1/9},{"waw", 1/10},{"zayin", 1/11},{"heth", 1759/27720}};
data=RandomChoice[choices[[All,2]]->choices[[All,1]],10^6];
To compare the data we use the following code to make a table:
Grid[{#[[1]],N[Count[data,#[[1]]]/10^6],N[#[[2]]]}&/@choices]
gives back (item, attained prob., target prob.):
aleph 0.200036 0.2 beth 0.166591 0.166667 gimel 0.142699 0.142857 daleth 0.125018 0.125 he 0.111306 0.111111 waw 0.100433 0.1 zayin 0.090671 0.0909091 heth 0.063246 0.063456
[edit] OCaml
let p = [
"Aleph", 1.0 /. 5.0;
"Beth", 1.0 /. 6.0;
"Gimel", 1.0 /. 7.0;
"Daleth", 1.0 /. 8.0;
"He", 1.0 /. 9.0;
"Waw", 1.0 /. 10.0;
"Zayin", 1.0 /. 11.0;
"Heth", 1759.0 /. 27720.0;
]
let rec take k = function
| (v, p)::tl -> if k < p then v else take (k -. p) tl
| _ -> invalid_arg "take"
let () =
let n = 1_000_000 in
Random.self_init();
let h = Hashtbl.create 3 in
List.iter (fun (v, _) -> Hashtbl.add h v 0) p;
let tot = List.fold_left (fun acc (_, p) -> acc +. p) 0.0 p in
for i = 1 to n do
let sel = take (Random.float tot) p in
let n = Hashtbl.find h sel in
Hashtbl.replace h sel (succ n) (* count the number of each item *)
done;
List.iter (fun (v, p) ->
let d = Hashtbl.find h v in
Printf.printf "%s \t %f %f\n" v p (float d /. float n)
) p
Output:
Aleph 0.200000 0.200272 Beth 0.166667 0.166381 Gimel 0.142857 0.142497 Daleth 0.125000 0.125005 He 0.111111 0.111272 Waw 0.100000 0.100069 Zayin 0.090909 0.091136 Heth 0.063456 0.063368
[edit] PARI/GP
pc()={
my(v=[5544,10164,14124,17589,20669,23441,25961,27720],u=vector(8),e);
for(i=1,1e6,
my(r=random(27720));
for(j=1,8,
if(r<v[j], u[j]++; break)
)
);
e=precision([1/5,1/6,1/7,1/8,1/9,1/10,1/11,1759/27720]*1e6,9); \\ truncate to 9 decimal places
print("Totals: "u);
print("Expected: "e);
print("Diff: ",u-e);
print("StDev: ",vector(8,i,sqrt(abs(u[i]-v[i])/e[i])));
};
[edit] Perl
use List::Util qw(first sum);
use constant TRIALS => 1e6;
sub prob_choice_picker {
my %options = @_;
my ($n, @a) = 0;
while (my ($k,$v) = each %options) {
$n += $v;
push @a, [$n, $k];
}
return sub {
my $r = rand;
( first {$r <= $_->[0]} @a )->[1];
};
}
my %ps =
(aleph => 1/5,
beth => 1/6,
gimel => 1/7,
daleth => 1/8,
he => 1/9,
waw => 1/10,
zayin => 1/11);
$ps{heth} = 1 - sum values %ps;
my $picker = prob_choice_picker %ps;
my %results;
for (my $n = 0 ; $n < TRIALS ; ++$n) {
++$results{$picker->()};
}
print "Event Occurred Expected Difference\n";
foreach (sort {$results{$b} <=> $results{$a}} keys %results) {
printf "%-6s %f %f %f\n",
$_, $results{$_}/TRIALS, $ps{$_},
abs($results{$_}/TRIALS - $ps{$_});
}
Sample output:
Event Occurred Expected Difference aleph 0.198915 0.200000 0.001085 beth 0.166804 0.166667 0.000137 gimel 0.142992 0.142857 0.000135 daleth 0.125155 0.125000 0.000155 he 0.111160 0.111111 0.000049 waw 0.100229 0.100000 0.000229 zayin 0.091014 0.090909 0.000105 heth 0.063731 0.063456 0.000275
[edit] Perl 6
constant TRIALS = 1e4;
sub prob_choice_picker (%options is copy) {
my $n = 0;
$_ = ($n += $_) for values %options;
return sub {
my $r = rand;
(first { $r < .value }, %options).key;
};
}
my %ps = (
(map {$^w => 1/$^n}, (5 .. 11 Z <aleph beth gimel daleth he waw zayin>)),
heth => 0
);
%ps<heth> = 1 - [+] values %ps;
&picker = prob_choice_picker %ps;
my %results;
++%results{picker} for ^TRIALS;
say 'Event Occurred Expected Difference';
for sort { .value }, %results {
printf "%-6s %f %f %f\n",
.key, .value/TRIALS, %ps{.key},
abs( .value/TRIALS - %ps{.key} );
}
Sample output:
Event Occurred Expected Difference heth 0.060900 0.063456 0.002556 zayin 0.090200 0.090909 0.000709 waw 0.096300 0.100000 0.003700 he 0.111400 0.111111 0.000289 daleth 0.130100 0.125000 0.005100 gimel 0.143800 0.142857 0.000943 beth 0.161200 0.166667 0.005467 aleph 0.206100 0.200000 0.006100
[edit] PicoLisp
(let (Count 1000000 Denom 27720 N Denom)
(let Probs
(mapcar
'((I S)
(prog1 (cons N (*/ Count I) 0 S)
(dec 'N (/ Denom I)) ) )
(range 5 12)
'(aleph beth gimel daleth he waw zayin heth) )
(do Count
(inc (cddr (rank (rand 1 Denom) Probs T))) )
(let Fmt (-6 12 12)
(tab Fmt NIL "Probability" "Result")
(for X Probs
(tab Fmt
(cdddr X)
(format (cadr X) 6)
(format (caddr X) 6) ) ) ) ) )
Output:
Probability Result aleph 0.200000 0.199760 beth 0.166667 0.166878 gimel 0.142857 0.142977 daleth 0.125000 0.124983 he 0.111111 0.111200 waw 0.100000 0.100173 zayin 0.090909 0.090591 heth 0.083333 0.063438
[edit] PureBasic
#times=1000000
Structure Item
name.s
prob.d
Amount.i
EndStructure
If OpenConsole()
Define i, j, d.d, e.d, txt.s
Dim Mapps.Item(7)
Mapps(0)\name="aleph": Mapps(0)\prob=1/5.0
Mapps(1)\name="beth": Mapps(1)\prob=1/6.0
Mapps(2)\name="gimel": Mapps(2)\prob=1/7.0
Mapps(3)\name="daleth":Mapps(3)\prob=1/8.0
Mapps(4)\name="he": Mapps(4)\prob=1/9.0
Mapps(5)\name="waw": Mapps(5)\prob=1/10.0
Mapps(6)\name="zayin": Mapps(6)\prob=1/11.0
Mapps(7)\name="heth": Mapps(7)\prob=1759/27720.0
For i=1 To #times
d=Random(#MAXLONG)/#MAXLONG ; Get a random number
e=0.0
For j=0 To ArraySize(Mapps())
e+Mapps(j)\prob ; Get span for current itme
If d<=e ; Check if it is within this span?
Mapps(j)\Amount+1 ; If so, count it.
Break
EndIf
Next j
Next i
PrintN("Sample times: "+Str(#times)+#CRLF$)
For j=0 To ArraySize(Mapps())
d=Mapps(j)\Amount/#times
txt=LSet(Mapps(j)\name,7)+" should be "+StrD(Mapps(j)\prob)+" is "+StrD(d)
PrintN(txt+" | Deviatation "+RSet(StrD(100.0-100.0*Mapps(j)\prob/d,3),6)+"%")
Next
Print(#CRLF$+"Press ENTER to exit"):Input()
CloseConsole()
EndIf
Output may look like
Sample times: 1000000 aleph should be 0.2000000000 is 0.1995520000 | Deviatation -0.225% beth should be 0.1666666667 is 0.1673270000 | Deviatation 0.395% gimel should be 0.1428571429 is 0.1432040000 | Deviatation 0.242% daleth should be 0.1250000000 is 0.1251850000 | Deviatation 0.148% he should be 0.1111111111 is 0.1109550000 | Deviatation -0.141% waw should be 0.1000000000 is 0.0999220000 | Deviatation -0.078% zayin should be 0.0909090909 is 0.0902240000 | Deviatation -0.759% heth should be 0.0634559885 is 0.0636310000 | Deviatation 0.275% Press ENTER to exit
[edit] Python
Two different algorithms are coded.
import random, bisect
def probchoice(items, probs):
'''\
Splits the interval 0.0-1.0 in proportion to probs
then finds where each random.random() choice lies
'''
prob_accumulator = 0
accumulator = []
for p in probs:
prob_accumulator += p
accumulator.append(prob_accumulator)
while True:
r = random.random()
yield items[bisect.bisect(accumulator, r)]
def probchoice2(items, probs, bincount=10000):
'''\
Puts items in bins in proportion to probs
then uses random.choice() to select items.
Larger bincount for more memory use but
higher accuracy (on avarage).
'''
bins = []
for item,prob in zip(items, probs):
bins += [item]*int(bincount*prob)
while True:
yield random.choice(bins)
def tester(func=probchoice, items='good bad ugly'.split(),
probs=[0.5, 0.3, 0.2],
trials = 100000
):
def problist2string(probs):
'''\
Turns a list of probabilities into a string
Also rounds FP values
'''
return ",".join('%8.6f' % (p,) for p in probs)
from collections import defaultdict
counter = defaultdict(int)
it = func(items, probs)
for dummy in xrange(trials):
counter[it.next()] += 1
print "\n##\n## %s\n##" % func.func_name.upper()
print "Trials: ", trials
print "Items: ", ' '.join(items)
print "Target probability: ", problist2string(probs)
print "Attained probability:", problist2string(
counter[x]/float(trials) for x in items)
if __name__ == '__main__':
items = 'aleph beth gimel daleth he waw zayin heth'.split()
probs = [1/(float(n)+5) for n in range(len(items))]
probs[-1] = 1-sum(probs[:-1])
tester(probchoice, items, probs, 1000000)
tester(probchoice2, items, probs, 1000000)
Sample output:
## ## PROBCHOICE ## Trials: 1000000 Items: aleph beth gimel daleth he waw zayin heth Target probability: 0.200000,0.166667,0.142857,0.125000,0.111111,0.100000,0.090909,0.063456 Attained probability: 0.200050,0.167109,0.143364,0.124690,0.111237,0.099661,0.090338,0.063551 ## ## PROBCHOICE2 ## Trials: 1000000 Items: aleph beth gimel daleth he waw zayin heth Target probability: 0.200000,0.166667,0.142857,0.125000,0.111111,0.100000,0.090909,0.063456 Attained probability: 0.199720,0.166424,0.142474,0.124561,0.111511,0.100313,0.091316,0.063681
[edit] R
prob = c(aleph=1/5, beth=1/6, gimel=1/7, daleth=1/8, he=1/9, waw=1/10, zayin=1/11, heth=1759/27720)
# Note that R doesn't actually require the weights
# vector for rmultinom to sum to 1.
hebrew = c(rmultinom(1, 1e6, prob))
d = data.frame(
Requested = prob,
Obtained = hebrew/sum(hebrew))
print(d)
Sample output:
Requested Obtained aleph 0.20000000 0.200311 beth 0.16666667 0.167160 gimel 0.14285714 0.141997 daleth 0.12500000 0.124644 he 0.11111111 0.110984 waw 0.10000000 0.099927 zayin 0.09090909 0.091365 heth 0.06345599 0.063612
A histogram of the data is also possible using, for example,
library(ggplot2)
qplot(factor(names(prob), levels = names(prob)), hebrew, geom = "histogram")
[edit] Racket
probabalistic-choice uses inexact (float) arithmetic
probabalistic-choice/exact uses fractions and greatest common denominators and the likes
The test submodule is used for unit tests, and is not run when this code is loaded as a module. Either run the program in DrRacket or run `raco test prob-choice.rkt`
#lang racket
;;; returns a probabalistic choice from the sequence choices
;;; choices generates two values -- the chosen value and a
;;; probability (weight) of the choice.
;;;
;;; Note that a hash where keys are choices and values are probabilities
;;; is such a sequence.
;;;
;;; if the total probability < 1 then choice could return #f
;;; if the total probability > 1 then some choices may be impossible
(define (probabalistic-choice choices)
(let-values
(((_ choice) ;; the fold provides two values, we only need the second
;; the first will always be a negative number showing that
;; I've run out of random steam
(for/fold
((rnd (random))
(choice #f))
(((v p) choices)
#:break (<= rnd 0))
(values (- rnd p) v))))
choice))
;;; ditto, but all probabilities must be exact rationals
;;; the optional lcd
;;;
;;; not the most efficient, since it provides a wrapper (and demo)
;;; for p-c/i-w below
(define (probabalistic-choice/exact
choices
#:gcd (GCD (/ (apply gcd (hash-values choices)))))
(probabalistic-choice/integer-weights
(for/hash (((k v) choices))
(values k (* v GCD)))
#:sum-of-weights GCD))
;;; this proves useful in Rock-Paper-Scissors
(define (probabalistic-choice/integer-weights
choices
#:sum-of-weights (sum-of-weights (apply + (hash-values choices))))
(let-values
(((_ choice)
(for/fold
((rnd (random sum-of-weights))
(choice #f))
(((v p) choices)
#:break (< rnd 0))
(values (- rnd p) v))))
choice))
(module+ test
(define test-samples (make-parameter 1000000))
(define (test-p-c-function f w)
(define test-selection (make-hash))
(for* ((i (in-range 0 (test-samples)))
(c (in-value (f w))))
(when (zero? (modulo i 100000)) (eprintf "~a," (quotient i 100000)))
(hash-update! test-selection c add1 0))
(printf "~a~%choice\tcount\texpected\tratio\terror~%" f)
(for* (((k v) (in-hash test-selection))
(e (in-value (* (test-samples) (hash-ref w k)))))
(printf "~a\t~a\t~a\t~a\t~a%~%"
k v e
(/ v (test-samples))
(real->decimal-string
(exact->inexact (* 100 (/ (- v e) e)))))))
(define test-weightings/rosetta
(hash
'aleph 1/5
'beth 1/6
'gimel 1/7
'daleth 1/8
'he 1/9
'waw 1/10
'zayin 1/11
'heth 1759/27720; adjusted so that probabilities add to 1
))
(define test-weightings/50:50 (hash 'woo 1/2 'yay 1/2))
(define test-weightings/1:2:3 (hash 'woo 1 'yay 2 'foo 3))
(test-p-c-function probabalistic-choice test-weightings/50:50)
(test-p-c-function probabalistic-choice/exact test-weightings/50:50)
(test-p-c-function probabalistic-choice test-weightings/rosetta)
(test-p-c-function probabalistic-choice/exact test-weightings/rosetta))
Output (note that the progress counts, which go to standard error, are interleaved with the output on standard out)
0,1,2,3,4,5,6,7,8,9,#<procedure:probabalistic-choice> choice count expected ratio error yay 499744 500000 15617/31250 -0.05% woo 500256 500000 15633/31250 0.05% 0,1,2,3,4,5,6,7,8,9,#<procedure:probabalistic-choice/exact> choice count expected ratio error yay 499852 500000 124963/250000 -0.03% woo 500148 500000 125037/250000 0.03% 0,1,2,3,4,5,6,7,8,9,#<procedure:probabalistic-choice> choice count expected ratio error daleth 124964 125000 31241/250000 -0.03% zayin 90233 1000000/11 90233/1000000 -0.74% gimel 142494 1000000/7 71247/500000 -0.25% heth 64045 43975000/693 12809/200000 0.93% aleph 199690 200000 19969/100000 -0.15% beth 166861 500000/3 166861/1000000 0.12% waw 100075 100000 4003/40000 0.07% he 111638 1000000/9 55819/500000 0.47% 0,1,2,3,4,5,6,7,8,9,#<procedure:probabalistic-choice/exact> choice count expected ratio error beth 166423 500000/3 166423/1000000 -0.15% heth 63462 43975000/693 31731/500000 0.01% daleth 125091 125000 125091/1000000 0.07% waw 99820 100000 4991/50000 -0.18% aleph 200669 200000 200669/1000000 0.33% gimel 142782 1000000/7 71391/500000 -0.05% zayin 90478 1000000/11 45239/500000 -0.47% he 111275 1000000/9 4451/40000 0.15%
[edit] REXX
/*REXX pg shows results of probabilistic choices (gen rand#s per prob.) */
parse arg trials digits seed . /*obtain some optional arguments.*/
if trials=='' | trials==',' then trials=1000000
if digits=='' | digits==',' then digits=15; digits=max(10,digits)
if seed\=='' then call random ,,seed /*for repeatability*/
names='aleph beth gimel daleth he waw zayin heth ──totals───►'
cells=words(names) - 1; high=100000; s=0; !.=0
_=4
do n=1 for 7; _=_+1; prob.n=1/_; Hprob.n=prob.n*high; s=s+prob.n
end /*n*/ /* [↑] determine probabilities. */
prob.8=1759/27720; Hprob.8=prob.8*high; s=s+prob.8; prob.9=s; !.9=trials
do j=1 for trials; r=random(1,high) /*generate X number of random #s.*/
do k=1 for cells /*now, for each cell, compute %s.*/
if r<=Hprob.k then !.k=!.k+1 /*for each range, bump da counter*/
end /*k*/
end /*j*/
w=digits+6; d=max(length(trials), length('count')) + 4
say center('name',15,'─') center('count',d,'─') center('target %',w,'─'),
center('actual %',w,'─') /*display a formatted header line*/
do i=1 for cells+1 /*show for each cell and totals. */
say ' ' left(word(names,i) , 12),
right(!.i , d-2) ' ',
left(format(prob.i *100, d), w-2),
left(format(!.i/trials*100, d), w-2)
if i==8 then say center('',15,'─') center('',d,'─'),
center('', w,'─') center('',w,'─')
end /*i*/
/*stick a fork in it, we're done.*/
output when using the default input
─────name────── ───count─── ──────target %─────── ──────actual %─────── aleph 200099 20 20.0099 beth 166722 16.6666667 16.6722 gimel 142792 14.2857143 14.2792 daleth 125060 12.5 12.506 he 111242 11.1111111 11.1242 waw 100216 10 10.0216 zayin 91126 9.0909090 9.1126 heth 63584 6.3455988 6.3584 ─────────────── ─────────── ───────────────────── ───────────────────── ──totals───► 1000000 100 100
[edit] Ruby
ordered_keys = ["aleph", "beth", "gimel", "daleth", "he", "waw", "zayin", "heth"]
probabilities = {
"aleph" => 1/5.0,
"beth" => 1/6.0,
"gimel" => 1/7.0,
"daleth" => 1/8.0,
"he" => 1/9.0,
"waw" => 1/10.0,
"zayin" => 1/11.0,
}
probabilities["heth"] = probabilities.each_value.inject(1) {|heth, value| heth -= value}
sums = {}
ordered_keys.each.inject(0) do |sum, key|
sum += probabilities[key]
sums[key] = sum
end
actual = Hash.new(0)
samples = 1_000_000
samples.times do
r = rand
for k in ordered_keys
if r < sums[k]
actual[k] += 1
break
end
end
end
printf "%-6s %-19s %s\n", "key", "expected", "actual"
for k in ordered_keys
printf "%-6s %.17f %.6f\n", k, probabilities[k], Float(actual[k])/samples
end
key expected actual aleph 0.20000000000000001 0.199616 beth 0.16666666666666666 0.166648 gimel 0.14285714285714285 0.142863 daleth 0.12500000000000000 0.125620 he 0.11111111111111110 0.111113 waw 0.10000000000000001 0.099600 zayin 0.09090909090909091 0.090842 heth 0.06345598845598854 0.063698
[edit] Seed7
To reduce the runtime this program should be compiled.
$ include "seed7_05.s7i";
include "float.s7i";
const type: letter is new enum
aleph, beth, gimel, daleth, he, waw, zayin, heth
end enum;
const func string: str (in letter: aLetter) is
return [] ("aleph", "beth", "gimel", "daleth", "he", "waw", "zayin", "heth") [succ(ord(aLetter))];
enable_output(letter);
const array [letter] integer: table is [letter] (
5544, 4620, 3960, 3465, 3080, 2772, 2520, 1759);
const func letter: randomLetter is func
result
var letter: resultLetter is aleph;
local
var integer: number is 0;
begin
number := rand(1, 27720);
while number > table[resultLetter] do
number -:= table[resultLetter];
incr(resultLetter);
end while;
end func;
const proc: main is func
local
var integer: count is 0;
var letter: aLetter is aleph;
var array [letter] integer: occurrence is letter times 0;
begin
for count range 1 to 1000000 do
aLetter := randomLetter;
incr(occurrence[aLetter]);
end for;
writeln("Name Count Ratio Expected");
for aLetter range letter.first to letter.last do
writeln(aLetter rpad 7 <& occurrence[aLetter] lpad 6 <&
flt(occurrence[aLetter]) / 10000.9 digits 4 lpad 8 <& "%" <&
100.0 * flt(table[aLetter]) / 27720.0 digits 4 lpad 8 <& "%");
end for;
end func;
Outout:
Name Count Ratio Expected aleph 199788 19.9770% 20.0000% beth 166897 16.6882% 16.6667% gimel 143103 14.3090% 14.2857% daleth 125060 12.5049% 12.5000% he 110848 11.0838% 11.1111% waw 99550 9.9541% 10.0000% zayin 90918 9.0910% 9.0909% heth 63836 6.3830% 6.3456%
[edit] scheme
[edit] Imperative
An imperative interpretation.
SRFI-27 at schemers.org#!/usr/local/bin/gosh
(use srfi-27)
(random-source-randomize! default-random-source)
(define +iterations+ 1000000)
(define *sym-probs* (list
(list 'aleph (inexact (/ 1.0 5.0)) 0)
(list 'beth (inexact (/ 1.0 6.0)) 0)
(list 'gimel (inexact (/ 1.0 7.0)) 0)
(list 'daleth (inexact (/ 1.0 8.0)) 0)
(list 'he (inexact (/ 1.0 9.0)) 0)
(list 'waw (inexact (/ 1.0 10.0)) 0)
(list 'zayin (inexact (/ 1.0 11.0)) 0)
(list 'heth (inexact (/ 1759.0 27720.0)) 0)))
(define (get-sym sym-num)
(car (list-ref *sym-probs* sym-num)))
(define (get-prob sym-num)
(cadr (list-ref *sym-probs* sym-num)))
(define (get-count sym-num)
(caddr (list-ref *sym-probs* sym-num)))
(define (inc-count sym-num)
(inc! (caddr (car (list-tail *sym-probs* sym-num)))))
(define (main args)
(distribute +iterations+)
(report))
(define (distribute iter)
(if (> iter 0)
(let ((rnd (random-real)))
(accumulate rnd)
(distribute (- iter 1)))))
(define (accumulate rnd)
(let loop ((r rnd) (sym-num 0))
(if (or (<= r (get-prob sym-num)) (>= sym-num (length *sym-probs*)))
(inc-count sym-num)
(loop (- r (get-prob sym-num)) (+ sym-num 1)))))
(define (report)
(format #t "symbol count actual expected~%")
(format #t "-------- -------- -------- --------~%")
(let loop ((sym-num 0))
(if (< sym-num (length *sym-probs*))
(let* ((sym (get-sym sym-num))
(prb (get-prob sym-num))
(cnt (get-count sym-num))
(act-prb (inexact (/ cnt +iterations+))))
(format #t "~8a ~8a ~8a ~8a~%" sym cnt act-prb prb)
(loop (+ sym-num 1))))))
Example output:
symbol count actual expected -------- -------- -------- -------- aleph 200044 0.200044 0.2 beth 166691 0.166691 0.16666666666666666 gimel 142440 0.14244 0.14285714285714285 daleth 124751 0.124751 0.125 he 111712 0.111712 0.1111111111111111 waw 99894 0.099894 0.1 zayin 90971 0.090971 0.09090909090909091 heth 63497 0.063497 0.06345598845598846
[edit] Tcl
package require Tcl 8.5
set map [dict create]
set sum 0.0
foreach name {aleph beth gimel daleth he waw zayin} \
prob {1/5.0 1/6.0 1/7.0 1/8.0 1/9.0 1/10.0 1/11.0} \
{
set prob [expr $prob]
set sum [expr {$sum + $prob}]
dict set map $name [dict create probability $prob limit $sum count 0]
}
dict set map heth [dict create probability [expr {1.0 - $sum}] limit 1.0 count 0]
set samples 1000000
for {set i 0} {$i < $samples} {incr i} {
set n [expr {rand()}]
foreach name [dict keys $map] {
if {$n <= [dict get $map $name limit]} {
set count [dict get $map $name count]
dict set map $name count [incr count]
break
}
}
}
puts "using $samples samples:"
puts [format "%-10s %-21s %-9s %s" "" expected actual difference]
dict for {name submap} $map {
dict with submap {
set actual [expr {$count * 1.0 / $samples}]
puts [format "%-10s %-21s %-9s %4.2f%%" $name $probability $actual \
[expr {abs($actual - $probability)/$probability*100.0}]
]
}
}
using 1000000 samples:
expected actual difference
aleph 0.2 0.199641 0.18%
beth 0.16666666666666666 0.1674 0.44%
gimel 0.14285714285714285 0.143121 0.18%
daleth 0.125 0.124864 0.11%
he 0.1111111111111111 0.111036 0.07%
waw 0.1 0.100021 0.02%
zayin 0.09090909090909091 0.09018 0.80%
heth 0.06345598845598843 0.063737 0.44%
[edit] Ursala
The stochasm library function used here constructs a weighted non-deterministic choice of a set of functions. The pseudo-random number generator is a 64 bit Mersenne twistor implemented by the run time system.
#import std
#import nat
#import flo
outcomes = <'aleph ','beth ','gimel ','daleth','he ','waw ','zayin ','heth '>
probabilities = ^lrNCT(~&,minus/1.+ plus:-0) div/*1. float* skip/5 iota12
simulation =
^(~&rn,div+ float~~rmPlX)^*D/~& iota; ^A(~&h,length)*K2+ * stochasm@p/probabilities !* outcomes
format =
:/' frequency probability'+ * ^lrlrTPT/~&n (printf/'%12.8f')^~/~&m outcomes-$probabilities@n
#show+
results = format simulation 1000000
output:
frequency probability
daleth 0.12484500 0.12500000
beth 0.16680600 0.16666667
aleph 0.19973700 0.20000000
waw 0.10016900 0.10000000
gimel 0.14293100 0.14285714
he 0.11131100 0.11111111
zayin 0.09104700 0.09090909
heth 0.06315400 0.06345599
[edit] XPL0
include c:\cxpl\codes;
def Size = 10_000_000;
int Tbl(12+1);
int I, J, N;
real X, S0, S1;
[for J:= 5 to 12 do Tbl(J):= 0;
for I:= 0 to 1_000_000-1 do \generate one million items
[N:= Ran(Size);
for J:= 5 to 11 do
[N:= N - Size/J;
if N < 0 then [Tbl(J):= Tbl(J)+1; J:= 100];
];
if J=12 then Tbl(12):= Tbl(12)+1;
];
S0:= 0.0; S1:= 0.0;
for J:= 5 to 11 do
[X:= 1.0/float(J); RlOut(0, X); S0:= S0+X;
X:= float(Tbl(J)) / 1_000_000.0; RlOut(0, X); S1:= S1+X;
CrLf(0);
];
X:= 1759.0 / 27720.0; RlOut(0, X); S0:= S0+X;
X:= float(Tbl(12)) / 1_000_000.0; RlOut(0, X); S1:= S1+X;
CrLf(0);
Text(0, " ------- -------
");
RlOut(0, S0); RlOut(0, S1);
]
Output:
0.20000 0.20012
0.16667 0.16679
0.14286 0.14305
0.12500 0.12510
0.11111 0.11113
0.10000 0.09990
0.09091 0.09077
0.06346 0.06313
------- -------
1.00000 1.00000
- Programming Tasks
- Probability and statistics
- Ada
- ALGOL 68
- AutoHotkey
- AWK
- BBC BASIC
- C
- C++
- C sharp
- Clojure
- Common Lisp
- D
- E
- Euphoria
- Factor
- Forth
- Fortran
- Go
- Haskell
- HicEst
- Icon
- Unicon
- J
- Java
- JavaScript
- Liberty BASIC
- Lua
- Mathematica
- OCaml
- PARI/GP
- Perl
- Perl 6
- PicoLisp
- PureBasic
- Python
- R
- Racket
- REXX
- Ruby
- Seed7
- Scheme
- Scheme examples needing attention
- Examples needing attention
- SRFI-27
- Tcl
- Ursala
- XPL0