Unbias a random generator: Difference between revisions

Content added Content deleted

Inline

Revision as of 12:32, 29 August 2011

Given a weighted one bit generator of random numbers where the probability of a one occuring, $P_{1}$ , is not the same as $P_{0}$ , the probability of a zero occuring, the probability of the occurrence of a one followed by a zero is $P_{1}\times P_{0}$ . This is the same as the probability of a zero followed by a one: $P_{0}\times P_{1}$ .

Task Details

Use your language's random number generator to create a function/method/subroutine/... randN that returns a one or a zero, but with one occurring, on average, 1 out of N times, where N is an integer from the range 3 to 6 inclusive.
Create a function unbiased that uses only randN as its source of randomness to become an unbiased generator of random ones and zeroes.
For N over its range, generate and show counts of the outputs of randN and unbiased(randN).

The actual unbiasing should be done by generating two numbers at a time from randN and only returning a 1 or 0 if they are different. As long as you always return the first number or always return the second number, the probabilities discussed above should take over the biased probability of randN.

Ada

<lang Ada>with Ada.Text_IO; with Ada.Numerics.Discrete_Random;

procedure Bias_Unbias is

  Modulus: constant Integer := 60; -- lcm of {3,4,5,6}
  type M is mod Modulus;
  package Rand is new Ada.Numerics.Discrete_Random(M);
  Gen: Rand.Generator;

  subtype Bit is Integer range 0 .. 1;

  function Biased_Bit(Bias_Base: Integer) return Bit is
  begin
     if (Integer(Rand.Random(Gen))* Bias_Base) / Modulus > 0 then
        return 0;
     else
        return 1;
     end if;
  end Biased_Bit;

  function Unbiased_Bit(Bias_Base: Integer) return Bit is
     A, B: Bit := 0;
  begin
     while A = B loop
        A := Biased_Bit(Bias_Base);
        B := Biased_Bit(Bias_Base);
     end loop;
     return A;
  end Unbiased_Bit;

  package FIO is new Ada.Text_IO.Float_IO(Float);

  Counter_B, Counter_U: Natural;
  Number_Of_Samples: constant Natural := 10_000;

begin

  Rand.Reset(Gen);
  Ada.Text_IO.Put_Line(" I  Biased% UnBiased%");
  for I in 3 .. 6 loop
     Counter_B := 0;
     Counter_U := 0;
     for J in 1 .. Number_Of_Samples loop
        Counter_B := Counter_B + Biased_Bit(I);
        Counter_U := Counter_U + Unbiased_Bit(I);
     end loop;
     Ada.Text_IO.Put(Integer'Image(I));
     FIO.Put(100.0 * Float(Counter_B) / Float(Number_Of_Samples), 5, 2, 0);
     FIO.Put(100.0 * Float(Counter_U) / Float(Number_Of_Samples), 5, 2, 0);
     Ada.Text_IO.New_Line;
  end loop;

end Bias_Unbias;</lang>

Output:

 I  Biased% UnBiased%
 3   32.87   49.80
 4   24.49   50.22
 5   19.73   50.05
 6   16.75   50.19

C

<lang C>#include <stdio.h>

include <stdlib.h>

int biased(int bias) { /* balance out the bins, being pedantic */ int r, rand_max = RAND_MAX - (RAND_MAX % bias); while ((r = rand()) > rand_max); return rand() < rand_max / bias; }

int unbiased(int bias) { int a; while ((a = biased(bias)) == biased(bias)); return a; }

int main() { int b, n = 10000, cb, cu, i; for (b = 3; b <= 6; b++) { for (i = cb = cu = 0; i < n; i++) { cb += biased(b); cu += unbiased(b); } printf("bias %d: %5.3f%% vs %5.3f%%\n", b, 100. * cb / n, 100. * cu / n); }

return 0; }</lang>output<lang>bias 3: 33.090% vs 49.710% bias 4: 25.130% vs 49.430% bias 5: 19.760% vs 49.650% bias 6: 16.740% vs 50.030%</lang>

Clojure

<lang Clojure>(defn biased [n]

 (if (< (rand 2) (/ n)) 0 1))

(defn unbiased [n]

 (loop [a 0 b 0]
   (if (= a b)
     (recur (biased n) (biased n))
     a)))

(for [n (range 3 7)]

 [n
  (double (/ (apply + (take 50000 (repeatedly #(biased n)))) 50000))
  (double (/ (apply + (take 50000 (repeatedly #(unbiased n)))) 50000))])

([3 0.83292 0.50422]

[4 0.87684 0.5023]
[5 0.90122 0.49728]
[6 0.91526 0.5])</lang>

D

<lang d>import std.stdio, std.random;

bool biased(int n) {

   return uniform(0.0, 1.0) < (1.0 / n);

}

bool unbiased(int n) {

   bool a, b;
   do {
       a = biased(n);
       b = biased(n);
   } while (a == b);
   return a;

}

void main() {

   int M = 50_000;
   foreach (n; 3 .. 7) {
       int c1, c2;
       foreach (i; 0 .. M) {
           c1 += biased(n);
           c2 += unbiased(n);
       }
       writefln("%d: %2.2f%%  %2.2f%%", n, 100.0*c1/M, 100.0*c2/M);
   }

}</lang> Output:

3: 33.12%  50.15%
4: 25.13%  50.02%
5: 19.61%  50.01%
6: 16.70%  49.98%

Euphoria

<lang euphoria>function randN(integer N)

   return rand(N) = 1

end function

function unbiased(integer N)

   integer a
   while 1 do
       a = randN(N)
       if a != randN(N) then
           return a
       end if
   end while

end function

constant n = 10000 integer cb, cu for b = 3 to 6 do

   cb = 0
   cu = 0
   for i = 1 to n do
       cb += randN(b)
       cu += unbiased(b)
   end for
   printf(1, "%d: %5.2f%%  %5.2f%%\n", {b, 100 * cb / n, 100 * cu / n})

end for</lang>

Output:

3: 33.68%  49.94%
4: 24.93%  50.48%
5: 20.32%  49.97%
6: 16.98%  50.05%

Fortran

Works with: Fortran version 90 and later

<lang fortran>program Bias_Unbias

 implicit none

 integer, parameter :: samples = 1000000
 integer :: i, j
 integer :: c1, c2, rand

 do i = 3, 6
   c1 = 0
   c2 = 0
   do j = 1, samples
     rand = bias(i)
     if (rand == 1) c1 = c1 + 1
     rand = unbias(i)
     if (rand == 1) c2 = c2 + 1
   end do
   write(*, "(i2,a,f8.3,a,f8.3,a)") i, ":", real(c1) * 100.0 / real(samples), &
                                    "%", real(c2) * 100.0 / real(samples), "%"
 end do

contains

function bias(n)

 integer :: bias
 integer, intent(in) :: n
 real :: r

 call random_number(r)
 if (r > 1 / real(n)) then
   bias = 0
 else
   bias = 1
 end if

end function

function unbias(n)

 integer :: unbias
 integer, intent(in) :: n
 integer :: a, b

 do
   a = bias(n)
   b = bias(n)
   if (a /= b) exit
 end do
 unbias = a

end function

end program</lang> Output:

3:  33.337%  49.971%
4:  24.945%  49.944%
5:  19.971%  49.987%
6:  16.688%  50.097%

GAP

<lang gap>RandNGen := function(n) local v, rand; v := [1 .. n - 1]*0; Add(v, 1); rand := function() return Random(v); end; return rand; end;

UnbiasedGen := function(rand) local unbiased; unbiased := function() local a, b; while true do a := rand(); b := rand(); if a <> b then break; fi; od; return a; end; return unbiased; end;

range := [2 .. 6]; v := List(range, RandNGen); w := List(v, UnbiasedGen); apply := gen -> Sum([1 .. 1000000], n -> gen());

Some tests (2 is added as a witness, since in this case RandN is already unbiased)

PrintArray(TransposedMat([range, List(v, apply), List(w, apply)]));

[ [ 2, 499991, 499041 ],
[ 3, 333310, 500044 ],
[ 4, 249851, 500663 ],
[ 5, 200532, 500448 ],
[ 6, 166746, 499859 ] ]</lang>

Go

<lang go>package main

import (

   "fmt"
   "rand"

)

const samples = 1e6

func main() {

   fmt.Println("Generator  1 count  0 count  % 1 count")
   for n := 3; n <= 6; n++ {
       // function randN, per task description
       randN := func() int {
           if rand.Intn(n) == 0 {
               return 1
           }
           return 0
       }
       var b [2]int
       for x := 0; x < samples; x++ {
           b[randN()]++
       }
       fmt.Printf("randN(%d)   %7d  %7d    %5.2f%%\n",
           n, b[1], b[0], float64(b[1])*100/samples)

       // function unbiased, per task description
       unbiased := func() (b int) {
           for b = randN(); b == randN(); b = randN() {
           }
           return
       }
       var u [2]int
       for x := 0; x < samples; x++ {
           u[unbiased()]++
       }
       fmt.Printf("unbiased   %7d  %7d    %5.2f%%\n",
           u[1], u[0], float64(u[1])*100/samples)
   }

}</lang> Output:

Generator  1 count  0 count  % 1 count
randN(3)    332711   667289    33.27%
unbiased    499649   500351    49.96%
randN(4)    249742   750258    24.97%
unbiased    499434   500566    49.94%
randN(5)    200318   799682    20.03%
unbiased    499100   500900    49.91%
randN(6)    166900   833100    16.69%
unbiased    499973   500027    50.00%

J

<lang j>randN=: 0 = ? unbiased=: i.@# { ::$: 2 | 0 3 -.~ _2 #.\ 4&* randN@# ]</lang>

Example use:

<lang j> randN 10#6 1 0 0 0 1 0 0 0 0 0

  unbiased 10#6

1 0 0 1 0 0 1 0 1 1</lang>

Some example counts (these are counts of the number of 1s which appear in a test involving 100 random numbers):

<lang j> +/randN 100#3 30

  +/randN 100#4

20

  +/randN 100#5

18

  +/randN 100#6

18

  +/unbiased 100#3

49

  +/unbiased 100#4

46

  +/unbiased 100#5

49

  +/unbiased 100#6

47</lang>

Note that these results are random. For example, a re-run of +/randN 100#5 gave 25 as its result, and a re-run of +/unbiased 100#5 gave 52 as its result.

Java

<lang java>public class Bias {

   public static boolean biased(int n) {
       return Math.random() < 1.0 / n;
   }

   public static boolean unbiased(int n) {
       boolean a, b;
       do {
           a = biased(n);
           b = biased(n);
       } while (a == b);
       return a;
   }

   public static void main(String[] args) {
       final int M = 50000;
       for (int n = 3; n < 7; n++) {
           int c1 = 0, c2 = 0;
           for (int i = 0; i < M; i++) {
               c1 += biased(n) ? 1 : 0;
               c2 += unbiased(n) ? 1 : 0;
           }
           System.out.format("%d: %2.2f%%  %2.2f%%\n",
                             n, 100.0*c1/M, 100.0*c2/M);
       }
   }

}</lang> Output:

3: 33,11%  50,23%
4: 24.97%  49.78%
5: 20.05%  50.00%
6: 17.00%  49.88%

Lua

<lang lua> local function randN(n)

 return function()
   if math.random() < 1/n then return 1 else return 0 end
 end

end

local function unbiased(n)

 local biased = randN (n)
 return function()
   local a, b = biased(), biased()
   while a==b do
     a, b = biased(), biased()
   end
   return a
 end

end

local function demonstrate (samples)

 for n = 3, 6 do
   biased = randN(n)
   unbias = unbiased(n)
   local bcounts = {[0]=0,[1]=0}
   local ucounts = {[0]=0,[1]=0}
   for i=1, samples do
     local bnum = biased()
     local unum = unbias()
     bcounts[bnum] = bcounts[bnum]+1
     ucounts[unum] = ucounts[unum]+1
   end
   print(string.format("N = %d",n),
     "# 0", "# 1",
     "% 0", "% 1")
   print("biased", bcounts[0], bcounts[1],
     bcounts[0] / samples * 100,
     bcounts[1] / samples * 100)
   print("unbias", ucounts[0], ucounts[1],
     ucounts[0] / samples * 100,
     ucounts[1] / samples * 100)
 end

end

demonstrate(100000) </lang>

Output:

N = 3	# 0	# 1	% 0	% 1
biased	66832	33168	66.832	33.168
unbias	50207	49793	50.207	49.793
N = 4	# 0	# 1	% 0	% 1
biased	75098	24902	75.098	24.902
unbias	49872	50128	49.872	50.128
N = 5	# 0	# 1	% 0	% 1
biased	80142	19858	80.142	19.858
unbias	50049	49951	50.049	49.951
N = 6	# 0	# 1	% 0	% 1
biased	83407	16593	83.407	16.593
unbias	49820	50180	49.82	50.18

PARI/GP

GP's random number generation is high-quality, using Brent's XORGEN. Thus this program is slow: the required 400,000 unbiased numbers generated through this bias/unbias scheme take nearly a second. <lang parigp>randN(N)=!random(N); unbiased(N)={

 my(a,b);
 while(1,
   a=randN(N);
   b=randN(N);
   if(a!=b, return(a))
 )

}; for(n=3,6,print(n,"\t",sum(k=1,1e5,unbiased(n))))</lang>

Output:

Perl

<lang perl>sub randn {

       my $n = shift;
       return int(rand($n) / ($n - 1));

}

for my $n (3 .. 6) {

       print "Bias $n: ";
       my (@raw, @fixed);
       for (1 .. 10000) {
               my $x = randn($n);
               $raw[$x]++;
               $fixed[$x]++    if randn($n) != $x
       }
       print "@raw, ";
       printf("%3g+-%.3g%%\tfixed: ", $raw[0]/100,

100 * sqrt($raw[0] * $raw[1]) / ($raw[0] + $raw[1])**1.5);

       print "@fixed, ";
       printf("%3g+-%.3g%%\n", 100*$fixed[0]/($fixed[0] + $fixed[1]),

100 * sqrt($fixed[0] * $fixed[1]) / ($fixed[0] + $fixed[1])**1.5);

}</lang>Output:<lang>Bias 3: 6684 3316, 66.84+-0.471% fixed: 2188 2228, 49.5471+-0.752% Bias 4: 7537 2463, 75.37+-0.431% fixed: 1924 1845, 51.048+-0.814% Bias 5: 7993 2007, 79.93+-0.401% fixed: 1564 1597, 49.478+-0.889% Bias 6: 8309 1691, 83.09+-0.375% fixed: 1403 1410, 49.8756+-0.943%</lang>

Perl 6

Translation of: Perl

<lang perl6>sub randN ( $n where 3..6 ) {

   return ( $n.rand / ($n - 1) ).Int;

}

sub unbiased ( $n where 3..6 ) {

   my $n1;
   repeat { $n1 = randN($n) } until $n1 != randN($n);
   return $n1;

}

my $iterations = 1000; for 3 .. 6 -> $n {

   my ( @raw, @fixed );
   for ^$iterations {
       @raw[      randN($n) ]++;
       @fixed[ unbiased($n) ]++;
   }
   printf "N=%d   randN: %s, %4.1f%%   unbiased: %s, %4.1f%%\n",
       $n, map { .perl, .[1] * 100 / $iterations }, $(@raw), $(@fixed);

}</lang>

Output:

N=3   randN: [676, 324], 32.4%   unbiased: [517, 483], 48.3%
N=4   randN: [734, 266], 26.6%   unbiased: [489, 511], 51.1%
N=5   randN: [792, 208], 20.8%   unbiased: [494, 506], 50.6%
N=6   randN: [834, 166], 16.6%   unbiased: [514, 486], 48.6%

PicoLisp

<lang PicoLisp>(de randN (N)

  (if (= 1 (rand 1 N)) 1 0) )

(de unbiased (N)

  (use (A B)
     (while
        (=
           (setq A (randN N))
           (setq B (randN N)) ) )
     A ) )</lang>

Test: <lang PicoLisp>(for N (range 3 6)

  (tab (2 1 7 2 7 2)
     N ":"
     (format
        (let S 0 (do 10000 (inc 'S (randN N))))
        2 )
     "%"
     (format
        (let S 0 (do 10000 (inc 'S (unbiased N))))
        2 )
     "%" ) )</lang>

Output:

 3:  33.21 %  50.48 %
 4:  25.06 %  49.79 %
 5:  20.04 %  49.75 %
 6:  16.32 %  49.02 %

PureBasic

<lang PureBasic>Procedure biased(n)

 If Random(n) <> 1
   ProcedureReturn 0
 EndIf 
 ProcedureReturn 1

EndProcedure

Procedure unbiased(n)

 Protected a, b
 Repeat
   a = biased(n)
   b = biased(n)
 Until a <> b
 ProcedureReturn a

EndProcedure

count = 100000

Define n, m, output.s For n = 3 To 6

 Dim b_count(1)
 Dim u_count(1)
 For m = 1 To #count
   x = biased(n)
   b_count(x) + 1
   x = unbiased(n)
   u_count(x) + 1
 Next
 output + "N = " + Str(n) + #LF$
 output + "  biased =>" + #tab$ + "#0=" + Str(b_count(0)) + #tab$ + "#1=" +Str(b_count(1))
 output + #tab$ + " ratio=" + StrF(b_count(1) / #count * 100, 2) + "%" + #LF$
 output + "  unbiased =>" + #tab$ + "#0=" + Str(u_count(0)) + #tab$ + "#1=" + Str(u_count(1))
 output + #tab$ + " ratio=" + StrF(u_count(1) / #count * 100, 2) + "%" + #LF$

Next MessageRequester("Biased and Unbiased random number results", output)</lang> Sample output:

---------------------------
Biased and Unbiased random number results
---------------------------
N = 3
  biased =>	#0=74856	#1=25144	 ratio=25.14%
  unbiased =>	#0=50066	#1=49934	 ratio=49.93%
N = 4
  biased =>	#0=80003	#1=19997	 ratio=20.00%
  unbiased =>	#0=49819	#1=50181	 ratio=50.18%
N = 5
  biased =>	#0=83256	#1=16744	 ratio=16.74%
  unbiased =>	#0=50268	#1=49732	 ratio=49.73%
N = 6
  biased =>	#0=85853	#1=14147	 ratio=14.15%
  unbiased =>	#0=49967	#1=50033	 ratio=50.03%

Python

<lang python>from __future__ import print_function import random

def randN(N):

   " 1,0 random generator factory with 1 appearing 1/N'th of the time"
   choose = [1] + [0] * (N-1)
   def rand():
       return random.choice( rand.choose )
   rand.choose = choose
   return rand

def unbiased(biased):

   'uses a biased() generator of 1 or 0, to create an unbiased one'
   this, that = biased(), biased()
   while this == that: # Loop until 10 or 01
       this, that = biased(), biased()
   return this         # return the first

if __name__ == '__main__':

   from collections import namedtuple

   Stats = namedtuple('Stats', 'count1 count0 percent')

   for N in range(3, 7):
       biased = randN(N)
       v = [biased() for x in range(1000000)]
       v1, v0 = v.count(1), v.count(0)
       print ( "Biased(%i)  = %r" % (N, Stats(v1, v0, 100. * v1/(v1 + v0))) )

       v = [unbiased(biased) for x in range(1000000)]
       v1, v0 = v.count(1), v.count(0)
       print ( "  Unbiased = %r" % (Stats(v1, v0, 100. * v1/(v1 + v0)), ) )</lang>

Sample output

Biased(3)  = Stats(count1=331800, count0=668200, percent=33.18)
  Unbiased = Stats(count1=499740, count0=500260, percent=49.973999999999997)
Biased(4)  = Stats(count1=249770, count0=750230, percent=24.977)
  Unbiased = Stats(count1=499707, count0=500293, percent=49.970700000000001)
Biased(5)  = Stats(count1=199764, count0=800236, percent=19.976400000000002)
  Unbiased = Stats(count1=499456, count0=500544, percent=49.945599999999999)
Biased(6)  = Stats(count1=167561, count0=832439, percent=16.7561)
  Unbiased = Stats(count1=499963, count0=500037, percent=49.996299999999998)

Tcl

<lang tcl># 1,0 random generator factory with 1 appearing 1/N'th of the time proc randN n {expr {rand()*$n < 1}}

uses a biased generator of 1 or 0, to create an unbiased one

proc unbiased {biased} {

   while 1 {

if {[set a [eval $biased]] != [eval $biased]} {return $a}

}

for {set n 3} {$n <= 6} {incr n} {

   set biased [list randN $n]
   for {set i 0;array set c {0 0 1 0}} {$i < 1000000} {incr i} {

incr c([eval $biased])

   }
   puts [format "  biased %d => #0=%d #1=%d ratio=%.2f%%" $n $c(0) $c(1) \

[expr {100.*$c(1)/$i}]]

   for {set i 0;array set c {0 0 1 0}} {$i < 1000000} {incr i} {

incr c([unbiased $biased])

   }
   puts [format "unbiased %d => #0=%d #1=%d ratio=%.2f%%" $n $c(0) $c(1) \

[expr {100.*$c(1)/$i}]] }</lang> Sample output:

  biased 3 => #0=667076 #1=332924 ratio=33.29%
unbiased 3 => #0=500263 #1=499737 ratio=49.97%
  biased 4 => #0=750470 #1=249530 ratio=24.95%
unbiased 4 => #0=500644 #1=499356 ratio=49.94%
  biased 5 => #0=800243 #1=199757 ratio=19.98%
unbiased 5 => #0=500878 #1=499122 ratio=49.91%
  biased 6 => #0=833623 #1=166377 ratio=16.64%
unbiased 6 => #0=500518 #1=499482 ratio=49.95%