Unbias a random generator
You are encouraged to solve this task according to the task description, using any language you may know.
Given a weighted one bit generator of random numbers where the probability of a one occuring, , is not the same as , the probability of a zero occuring, the probability of the occurrence of a one followed by a zero is . This is the same as the probability of a zero followed by a one: .
Task Details
- Use your language's random number generator to create a function/method/subroutine/... randN that returns a one or a zero, but with one occurring, on average, 1 out of N times, where N is an integer from the range 3 to 6 inclusive.
- Create a function unbiased that uses only randN as its source of randomness to become an unbiased generator of random ones and zeroes.
- For N over its range, generate and show counts of the outputs of randN and unbiased(randN).
The actual unbiasing should be done by generating two numbers at a time from randN and only returning a 1 or 0 if they are different. As long as you always return the first number or always return the second number, the probabilities discussed above should take over the biased probability of randN.
C
<lang C>#include <stdio.h>
- include <stdlib.h>
int biased(int bias) { /* balance out the bins, being pedantic */ int r, rand_max = RAND_MAX - (RAND_MAX % bias); while ((r = rand()) > rand_max); return rand() < rand_max / bias; }
int unbiased(int bias) { int a; while ((a = biased(bias)) == biased(bias)); return a; }
int main() { int b, n = 10000, cb, cu, i; for (b = 3; b <= 6; b++) { for (i = cb = cu = 0; i < n; i++) { cb += biased(b); cu += unbiased(b); } printf("bias %d: %5d %5.3f%% vs %5d %5.3f\n", b, cb, 100. * cb / n, cu, 100. * cu / n); }
return 0; }</lang>output<lang>bias 3: 3309 33.090% vs 4971 49.710 bias 4: 2513 25.130% vs 4943 49.430 bias 5: 1976 19.760% vs 4965 49.650 bias 6: 1674 16.740% vs 5003 50.030</lang>
Clojure
<lang Clojure>(defn biased [n]
(if (< (rand 2) (/ n)) 0 1))
(defn unbiased [n]
(loop [a 0 b 0] (if (= a b) (recur (biased n) (biased n)) a)))
(for [n (range 3 7)]
[n (double (/ (apply + (take 50000 (repeatedly #(biased n)))) 50000)) (double (/ (apply + (take 50000 (repeatedly #(unbiased n)))) 50000))])
([3 0.83292 0.50422]
[4 0.87684 0.5023] [5 0.90122 0.49728] [6 0.91526 0.5])</lang>
D
<lang d>import std.stdio, std.random;
bool biased(int n) {
return uniform(0.0, 1.0) < (1.0 / n);
}
bool unbiased(int n) {
bool a, b; do { a = biased(n); b = biased(n); } while (a == b); return a;
}
void main() {
int M = 50_000; foreach (n; 3 .. 7) { int c1, c2; foreach (i; 0 .. M) { c1 += biased(n); c2 += unbiased(n); } writefln("%d: %2.2f%% %2.2f%%", n, 100.0*c1/M, 100.0*c2/M); }
}</lang> Output:
3: 33.12% 50.15% 4: 25.13% 50.02% 5: 19.61% 50.01% 6: 16.70% 49.98%
Fortran
<lang fortran>program Bias_Unbias
implicit none
integer, parameter :: samples = 1000000 integer :: i, j integer :: c1, c2, rand do i = 3, 6 c1 = 0 c2 = 0 do j = 1, samples rand = bias(i) if (rand == 1) c1 = c1 + 1 rand = unbias(i) if (rand == 1) c2 = c2 + 1 end do write(*, "(i2,a,f8.3,a,f8.3,a)") i, ":", real(c1) * 100.0 / real(samples), & "%", real(c2) * 100.0 / real(samples), "%" end do
contains
function bias(n)
integer :: bias integer, intent(in) :: n real :: r
call random_number(r) if (r > 1 / real(n)) then bias = 0 else bias = 1 end if
end function
function unbias(n)
integer :: unbias integer, intent(in) :: n integer :: a, b
do a = bias(n) b = bias(n) if (a /= b) exit end do unbias = a
end function
end program</lang> Output:
3: 33.337% 49.971% 4: 24.945% 49.944% 5: 19.971% 49.987% 6: 16.688% 50.097%
GAP
<lang gap>RandNGen := function(n) local v, rand; v := [1 .. n - 1]*0; Add(v, 1); rand := function() return Random(v); end; return rand; end;
UnbiasedGen := function(rand) local unbiased; unbiased := function() local a, b; while true do a := rand(); b := rand(); if a <> b then break; fi; od; return a; end; return unbiased; end;
range := [2 .. 6]; v := List(range, RandNGen); w := List(v, UnbiasedGen); apply := gen -> Sum([1 .. 1000000], n -> gen());
- Some tests (2 is added as a witness, since in this case RandN is already unbiased)
PrintArray(TransposedMat([range, List(v, apply), List(w, apply)]));
- [ [ 2, 499991, 499041 ],
- [ 3, 333310, 500044 ],
- [ 4, 249851, 500663 ],
- [ 5, 200532, 500448 ],
- [ 6, 166746, 499859 ] ]</lang>
J
<lang j>randN=: 0 = ? unbiased=: i.@# { ::$: 2 | 0 3 -.~ _2 #.\ 4&* randN@# ]</lang>
Example use:
<lang j> randN 10#6 1 0 0 0 1 0 0 0 0 0
unbiased 10#6
1 0 0 1 0 0 1 0 1 1</lang>
Some example counts (these are counts of the number of 1s which appear in a test involving 100 random numbers):
<lang j> +/randN 100#3 30
+/randN 100#4
20
+/randN 100#5
18
+/randN 100#6
18
+/unbiased 100#3
49
+/unbiased 100#4
46
+/unbiased 100#5
49
+/unbiased 100#6
47</lang>
Note that these results are random. For example, a re-run of +/randN 100#5
gave 25 as its result, and a re-run of +/unbiased 100#5
gave 52 as its result.
Java
<lang java>public class Bias {
public static boolean biased(int n) { return Math.random() < 1.0 / n; }
public static boolean unbiased(int n) { boolean a, b; do { a = biased(n); b = biased(n); } while (a == b); return a; }
public static void main(String[] args) { final int M = 50000; for (int n = 3; n < 7; n++) { int c1 = 0, c2 = 0; for (int i = 0; i < M; i++) { c1 += biased(n) ? 1 : 0; c2 += unbiased(n) ? 1 : 0; } System.out.format("%d: %2.2f%% %2.2f%%\n", n, 100.0*c1/M, 100.0*c2/M); } }
}</lang> Output:
3: 33,11% 50,23% 4: 24.97% 49.78% 5: 20.05% 50.00% 6: 17.00% 49.88%
Lua
<lang lua> local function randN(n)
return function() if math.random() < 1/n then return 1 else return 0 end end
end
local function unbiased(n)
local biased = randN (n) return function() local a, b = biased(), biased() while a==b do a, b = biased(), biased() end return a end
end
local function demonstrate (samples)
for n = 3, 6 do biased = randN(n) unbias = unbiased(n) local bcounts = {[0]=0,[1]=0} local ucounts = {[0]=0,[1]=0} for i=1, samples do local bnum = biased() local unum = unbias() bcounts[bnum] = bcounts[bnum]+1 ucounts[unum] = ucounts[unum]+1 end print(string.format("N = %d",n), "# 0", "# 1", "% 0", "% 1") print("biased", bcounts[0], bcounts[1], bcounts[0] / samples * 100, bcounts[1] / samples * 100) print("unbias", ucounts[0], ucounts[1], ucounts[0] / samples * 100, ucounts[1] / samples * 100) end
end
demonstrate(100000) </lang>
Output:
N = 3 # 0 # 1 % 0 % 1 biased 66832 33168 66.832 33.168 unbias 50207 49793 50.207 49.793 N = 4 # 0 # 1 % 0 % 1 biased 75098 24902 75.098 24.902 unbias 49872 50128 49.872 50.128 N = 5 # 0 # 1 % 0 % 1 biased 80142 19858 80.142 19.858 unbias 50049 49951 50.049 49.951 N = 6 # 0 # 1 % 0 % 1 biased 83407 16593 83.407 16.593 unbias 49820 50180 49.82 50.18
PARI/GP
GP's random number generation is high-quality, using Brent's XORGEN. Thus this program is slow: the required 400,000 unbiased numbers generated through this bias/unbias scheme take nearly a second. <lang parigp>randN(N)=!random(N); unbiased(N)={
my(a,b); while(1, a=randN(N); b=randN(N); if(a!=b, return(a)) )
}; for(n=3,6,print(n,"\t",sum(k=1,1e5,unbiased(n))))</lang>
Output:
3 49913 4 49924 5 50240 6 50277
Perl
<lang perl>sub randn {
my $n = shift; return int(rand($n) / ($n - 1));
}
for my $n (3 .. 6) {
print "Bias $n: "; my (@raw, @fixed); for (1 .. 10000) { my $x = randn($n); $raw[$x]++; $fixed[$x]++ if randn($n) != $x } print "@raw, "; printf("%3g+-%.3g%%\tfixed: ", $raw[0]/100,
100 * sqrt($raw[0] * $raw[1]) / ($raw[0] + $raw[1])**1.5);
print "@fixed, "; printf("%3g+-%.3g%%\n", 100*$fixed[0]/($fixed[0] + $fixed[1]),
100 * sqrt($fixed[0] * $fixed[1]) / ($fixed[0] + $fixed[1])**1.5);
}</lang>Output:<lang>Bias 3: 6684 3316, 66.84+-0.471% fixed: 2188 2228, 49.5471+-0.752% Bias 4: 7537 2463, 75.37+-0.431% fixed: 1924 1845, 51.048+-0.814% Bias 5: 7993 2007, 79.93+-0.401% fixed: 1564 1597, 49.478+-0.889% Bias 6: 8309 1691, 83.09+-0.375% fixed: 1403 1410, 49.8756+-0.943%</lang>
Perl 6
<lang perl6>sub randN ( $n where 3..6 ) {
return ( $n.rand / ($n - 1) ).Int;
}
sub unbiased ( $n where 3..6 ) {
my $n1; repeat { $n1 = randN($n) } until $n1 != randN($n); return $n1;
}
my $iterations = 1000; for 3 .. 6 -> $n {
my ( @raw, @fixed ); for ^$iterations { @raw[ randN($n) ]++; @fixed[ unbiased($n) ]++; } printf "N=%d randN: %s, %4.1f%% unbiased: %s, %4.1f%%\n", $n, map { .perl, .[1] * 100 / $iterations }, $(@raw), $(@fixed);
}</lang>
Output:
N=3 randN: [676, 324], 32.4% unbiased: [517, 483], 48.3% N=4 randN: [734, 266], 26.6% unbiased: [489, 511], 51.1% N=5 randN: [792, 208], 20.8% unbiased: [494, 506], 50.6% N=6 randN: [834, 166], 16.6% unbiased: [514, 486], 48.6%
PicoLisp
<lang PicoLisp>(de randN (N)
(if (= 1 (rand 1 N)) 1 0) )
(de unbiased (N)
(use (A B) (while (= (setq A (randN N)) (setq B (randN N)) ) ) A ) )</lang>
Test: <lang PicoLisp>(for N (range 3 6)
(tab (2 1 7 2 7 2) N ":" (format (let S 0 (do 10000 (inc 'S (randN N)))) 2 ) "%" (format (let S 0 (do 10000 (inc 'S (unbiased N)))) 2 ) "%" ) )</lang>
Output:
3: 33.21 % 50.48 % 4: 25.06 % 49.79 % 5: 20.04 % 49.75 % 6: 16.32 % 49.02 %
PureBasic
<lang PureBasic>Procedure biased(n)
If Random(n) <> 1 ProcedureReturn 0 EndIf ProcedureReturn 1
EndProcedure
Procedure unbiased(n)
Protected a, b Repeat a = biased(n) b = biased(n) Until a <> b ProcedureReturn a
EndProcedure
- count = 100000
Define n, m, output.s For n = 3 To 6
Dim b_count(1) Dim u_count(1) For m = 1 To #count x = biased(n) b_count(x) + 1 x = unbiased(n) u_count(x) + 1 Next output + "N = " + Str(n) + #LF$ output + " biased =>" + #tab$ + "#0=" + Str(b_count(0)) + #tab$ + "#1=" +Str(b_count(1)) output + #tab$ + " ratio=" + StrF(b_count(1) / #count * 100, 2) + "%" + #LF$ output + " unbiased =>" + #tab$ + "#0=" + Str(u_count(0)) + #tab$ + "#1=" + Str(u_count(1)) output + #tab$ + " ratio=" + StrF(u_count(1) / #count * 100, 2) + "%" + #LF$
Next MessageRequester("Biased and Unbiased random number results", output)</lang> Sample output:
--------------------------- Biased and Unbiased random number results --------------------------- N = 3 biased => #0=74856 #1=25144 ratio=25.14% unbiased => #0=50066 #1=49934 ratio=49.93% N = 4 biased => #0=80003 #1=19997 ratio=20.00% unbiased => #0=49819 #1=50181 ratio=50.18% N = 5 biased => #0=83256 #1=16744 ratio=16.74% unbiased => #0=50268 #1=49732 ratio=49.73% N = 6 biased => #0=85853 #1=14147 ratio=14.15% unbiased => #0=49967 #1=50033 ratio=50.03%
Python
<lang python>from __future__ import print_function import random
def randN(N):
" 1,0 random generator factory with 1 appearing 1/N'th of the time" choose = [1] + [0] * (N-1) def rand(): return random.choice( rand.choose ) rand.choose = choose return rand
def unbiased(biased):
'uses a biased() generator of 1 or 0, to create an unbiased one' this, that = biased(), biased() while this == that: # Loop until 10 or 01 this, that = biased(), biased() return this # return the first
if __name__ == '__main__':
from collections import namedtuple
Stats = namedtuple('Stats', 'count1 count0 percent')
for N in range(3, 7): biased = randN(N) v = [biased() for x in range(1000000)] v1, v0 = v.count(1), v.count(0) print ( "Biased(%i) = %r" % (N, Stats(v1, v0, 100. * v1/(v1 + v0))) )
v = [unbiased(biased) for x in range(1000000)] v1, v0 = v.count(1), v.count(0) print ( " Unbiased = %r" % (Stats(v1, v0, 100. * v1/(v1 + v0)), ) )</lang>
Sample output
Biased(3) = Stats(count1=331800, count0=668200, percent=33.18) Unbiased = Stats(count1=499740, count0=500260, percent=49.973999999999997) Biased(4) = Stats(count1=249770, count0=750230, percent=24.977) Unbiased = Stats(count1=499707, count0=500293, percent=49.970700000000001) Biased(5) = Stats(count1=199764, count0=800236, percent=19.976400000000002) Unbiased = Stats(count1=499456, count0=500544, percent=49.945599999999999) Biased(6) = Stats(count1=167561, count0=832439, percent=16.7561) Unbiased = Stats(count1=499963, count0=500037, percent=49.996299999999998)
Tcl
<lang tcl># 1,0 random generator factory with 1 appearing 1/N'th of the time proc randN n {expr {rand()*$n < 1}}
- uses a biased generator of 1 or 0, to create an unbiased one
proc unbiased {biased} {
while 1 {
if {[set a [eval $biased]] != [eval $biased]} {return $a}
}
}
for {set n 3} {$n <= 6} {incr n} {
set biased [list randN $n] for {set i 0;array set c {0 0 1 0}} {$i < 1000000} {incr i} {
incr c([eval $biased])
} puts [format " biased %d => #0=%d #1=%d ratio=%.2f%%" $n $c(0) $c(1) \
[expr {100.*$c(1)/$i}]]
for {set i 0;array set c {0 0 1 0}} {$i < 1000000} {incr i} {
incr c([unbiased $biased])
} puts [format "unbiased %d => #0=%d #1=%d ratio=%.2f%%" $n $c(0) $c(1) \
[expr {100.*$c(1)/$i}]] }</lang> Sample output:
biased 3 => #0=667076 #1=332924 ratio=33.29% unbiased 3 => #0=500263 #1=499737 ratio=49.97% biased 4 => #0=750470 #1=249530 ratio=24.95% unbiased 4 => #0=500644 #1=499356 ratio=49.94% biased 5 => #0=800243 #1=199757 ratio=19.98% unbiased 5 => #0=500878 #1=499122 ratio=49.91% biased 6 => #0=833623 #1=166377 ratio=16.64% unbiased 6 => #0=500518 #1=499482 ratio=49.95%