Statistics/Normal distribution: Difference between revisions
(Added Lua version) |
(Put my earlier commit under the misplaced Liberty Basic one. Now moving both.) |
||
Line 431: | Line 431: | ||
0.9: +++++++ |
0.9: +++++++ |
||
1.0: ++++</pre> |
1.0: ++++</pre> |
||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
=={{header|Liberty BASIC}}== |
=={{header|Liberty BASIC}}== |
||
Line 622: | Line 603: | ||
18 3 |
18 3 |
||
19 2</pre> |
19 2</pre> |
||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
⚫ | |||
=={{header|PARI/GP}}== |
=={{header|PARI/GP}}== |
Revision as of 00:02, 21 March 2016
The Normal (or Gaussian) distribution is a freqently used distribution in statistics. While most programming languages provide a uniformly distributed random number generator, one can derive normally distributed random numbers from a uniform generator.
- The task
- Take a uniform random number generator and create a large (you decide how large) set of numbers that follow a normal (Gaussian) distribution. Calculate the dataset's mean and stddev, and show the histogram here.
- Mention any native language support for the generation of normally distributed random numbers.
- Reference
- You may refer to code in Statistics/Basic if available.
C++
showing features of C++11 here <lang cpp>#include <random>
- include <map>
- include <string>
- include <iostream>
- include <cmath>
- include <iomanip>
int main( ) {
std::random_device myseed ; std::mt19937 engine ( myseed( ) ) ; std::normal_distribution<> normDistri ( 2 , 3 ) ; std::map<int , int> normalFreq ; int sum = 0 ; //holds the sum of the randomly created numbers double mean = 0.0 ; double stddev = 0.0 ; for ( int i = 1 ; i < 10001 ; i++ ) ++normalFreq[ normDistri ( engine ) ] ; for ( auto MapIt : normalFreq ) { sum += MapIt.first * MapIt.second ; } mean = sum / 10000 ; stddev = sqrt( sum / 10000 ) ; std::cout << "The mean of the distribution is " << mean << " , the " ; std::cout << "standard deviation " << stddev << " !\n" ; std::cout << "And now the histogram:\n" ; for ( auto MapIt : normalFreq ) { std::cout << std::left << std::setw( 4 ) << MapIt.first <<
std::string( MapIt.second / 100 , '*' ) << std::endl ;
} return 0 ;
}</lang> Output:
The mean of the distribution is 1 , the standard deviation 1 ! And now the histogram: -10 -9 -8 -7 -6 -5 -4 * -3 ** -2 **** -1 ****** 0 ********************* 1 ************ 2 ************ 3 *********** 4 ********* 5 ****** 6 **** 7 ** 8 * 9 10 11 12 13
D
This uses the Box-Muller method as in the Go entry, and the module from the Statistics/Basic. A ziggurat-based normal generator for the Phobos standard library is in the works. <lang d>import std.stdio, std.random, std.math, std.range, std.algorithm,
statistics_basic;
struct Normals {
double mu, sigma; double[2] state; size_t index = state.length; enum empty = false;
void popFront() pure nothrow { index++; }
@property double front() { if (index >= state.length) { immutable r = sqrt(-2 * uniform!"]["(0., 1.0).log) * sigma; immutable x = 2 * PI * uniform01; state = [mu + r * x.sin, mu + r * x.cos]; index = 0; } return state[index]; }
}
void main() {
const data = Normals(0.0, 0.5).take(100_000).array; writefln("Mean: %8.6f, SD: %8.6f\n", data.meanStdDev[]); data.map!q{ max(0.0, min(0.9999, a / 3 + 0.5)) }.showHistogram01;
}</lang>
- Output:
Mean: 0.000528, SD: 0.502245 0.0: * 0.1: ****** 0.2: ***************** 0.3: *********************************** 0.4: ************************************************* 0.5: ************************************************** 0.6: ********************************** 0.7: ***************** 0.8: ****** 0.9: *
Fortran
Using the Marsaglia polar method <lang fortran>program Normal_Distribution
implicit none
integer, parameter :: i64 = selected_int_kind(18) integer, parameter :: r64 = selected_real_kind(15) integer(i64), parameter :: samples = 1000000_i64 real(r64) :: mean, stddev real(r64) :: sumn = 0, sumnsq = 0 integer(i64) :: n = 0 integer(i64) :: bin(-50:50) = 0 integer :: i, ind real(r64) :: ur1, ur2, nr1, nr2, s n = 0 do while(n <= samples) call random_number(ur1) call random_number(ur2) ur1 = ur1 * 2.0 - 1.0 ur2 = ur2 * 2.0 - 1.0 s = ur1*ur1 + ur2*ur2 if(s >= 1.0_r64) cycle nr1 = ur1 * sqrt(-2.0*log(s)/s) ind = floor(5.0*nr1) bin(ind) = bin(ind) + 1_i64 sumn = sumn + nr1 sumnsq = sumnsq + nr1*nr1 nr2 = ur2 * sqrt(-2.0*log(s)/s) ind = floor(5.0*nr2) bin(ind) = bin(ind) + 1_i64 sumn = sumn + nr2 sumnsq = sumnsq + nr2*nr2 n = n + 2_i64 end do mean = sumn / n stddev = sqrt(sumnsq/n - mean*mean) write(*, "(a, i0)") "sample size = ", samples write(*, "(a, f17.15)") "Mean : ", mean, write(*, "(a, f17.15)") "Stddev : ", stddev do i = -15, 15 write(*, "(f4.1, a, a)") real(i)/5.0, ": ", repeat("=", int(bin(i)*500/samples)) end do
end program</lang>
- Output:
sample size = 1000 Mean : 0.043096320705032 Stddev : 0.981532585231540 -3.0: -2.8: -2.6: == -2.4: == -2.2: ==== -2.0: ====== -1.8: ======= -1.6: ============ -1.4: ================ -1.2: ===================== -1.0: =========================== -0.8: ======================= -0.6: ================================== -0.4: ===================================== -0.2: ========================================== 0.0: =============================================== 0.2: ==================================== 0.4: ================================= 0.6: ================================== 0.8: ============================= 1.0: ==================== 1.2: ========================== 1.4: =========== 1.6: ========= 1.8: ==== 2.0: ====== 2.2: === 2.4: 2.6: 2.8: = 3.0: sample size = 1000000 Mean : 0.000166653231289 Stddev : 1.000025612171690 -3.0: -2.8: = -2.6: = -2.4: == -2.2: ==== -2.0: ====== -1.8: ========= -1.6: ============ -1.4: ================= -1.2: ===================== -1.0: ========================== -0.8: =============================== -0.6: =================================== -0.4: ====================================== -0.2: ======================================= 0.0: ======================================= 0.2: ====================================== 0.4: ================================== 0.6: =============================== 0.8: ========================== 1.0: ===================== 1.2: ================= 1.4: ============ 1.6: ========= 1.8: ====== 2.0: ==== 2.2: == 2.4: = 2.6: = 2.8: 3.0:
Go
Box-Muller method shown here. Go has a normally distributed random function in the standard library, as shown in the Go Random numbers solution. It uses the ziggurat method. <lang go>package main
import (
"fmt" "math" "math/rand" "strings"
)
// Box-Muller func norm2() (s, c float64) {
r := math.Sqrt(-2 * math.Log(rand.Float64())) s, c = math.Sincos(2 * math.Pi * rand.Float64()) return s * r, c * r
}
func main() {
const ( n = 10000 bins = 12 sig = 3 scale = 100 ) var sum, sumSq float64 h := make([]int, bins) for i, accum := 0, func(v float64) { sum += v sumSq += v * v b := int((v + sig) * bins / sig / 2) if b >= 0 && b < bins { h[b]++ } }; i < n/2; i++ { v1, v2 := norm2() accum(v1) accum(v2) } m := sum / n fmt.Println("mean:", m) fmt.Println("stddev:", math.Sqrt(sumSq/float64(n)-m*m)) for _, p := range h { fmt.Println(strings.Repeat("*", p/scale)) }
}</lang> Output:
mean: -0.0034970888831523488 stddev: 1.0040682925006286 * **** ********* *************** ******************* ****************** ************** ********* **** *
J
Solution <lang j>runif01=: ?@$ 0: NB. random uniform number generator rnorm01=. (2 o. 2p1 * runif01) * [: %: _2 * ^.@runif01 NB. random normal number generator (Box-Muller)
mean=: +/ % # NB. mean stddev=: (<:@# %~ +/)&.:*:@(- mean) NB. standard deviation histogram=: <:@(#/.~)@(i.@#@[ , I.)</lang> Example Usage <lang j> DataSet=: rnorm01 1e5
(mean , stddev) DataSet
0.000781667 1.00154
require 'plot' plot (5 %~ i: 25) ([;histogram) DataSet</lang>
Lasso
<lang Lasso>define stat1(a) => { if(#a->size) => { local(mean = (with n in #a sum #n) / #a->size) local(sdev = math_pow(((with n in #a sum Math_Pow((#n - #mean),2)) / #a->size),0.5)) return (:#sdev, #mean) else return (:0,0) } } define stat2(a) => { if(#a->size) => { local(sx = 0, sxx = 0) with x in #a do => { #sx += #x #sxx += #x*#x } local(sdev = math_pow((#a->size * #sxx - #sx * #sx),0.5) / #a->size) return (:#sdev, #sx / #a->size) else return (:0,0) } } define histogram(a) => { local( out = '\r', h = array(0,0,0,0,0,0,0,0,0,0,0), maxwidth = 50, sc = 0 ) with n in #a do => { if((#n * 10) <= 0) => { #h->get(1) += 1 else((#n * 10) >= 10) #h->get(#h->size) += 1 else #h->get(integer(decimal(#n)*10)+1) += 1 }
} local(mx = decimal(with n in #h max #n)) with i in #h do => { #out->append((#sc/10.0)->asString(-precision=1)+': '+('+' * integer(#i / #mx * #maxwidth))+'\r') #sc++ } return #out } define normalDist(mean,sdev) => { // Uses Box-Muller transform return ((-2 * decimal_random->log)->sqrt * (2 * pi * decimal_random)->cos) * #sdev + #mean }
with scale in array(100,1000,10000) do => {^ local(n = array) loop(#scale) => { #n->insert(normalDist(0.5, 0.2)) } local(sdev1,mean1) = stat1(#n) local(sdev2,mean2) = stat2(#n) #scale' numbers:\r'
'Naive method: sd: '+#sdev1+', mean: '+#mean1+'\r' 'Second method: sd: '+#sdev2+', mean: '+#mean2+'\r' histogram(#n) '\r\r'
^}</lang>
- Output:
100 numbers: Naive method: sd: 0.199518, mean: 0.506059 Second method: sd: 0.199518, mean: 0.506059 0.0: ++ 0.1: ++++ 0.2: +++++++++++++++++ 0.3: ++++++++++++++++++++++ 0.4: ++++++++++++++++++++++++++++++++++++++++++++++++++ 0.5: +++++++++++++++++++++++++++++++++++++++ 0.6: +++++++++++++++++++++++++++++++++ 0.7: ++++++++++++++++++++++++ 0.8: ++++++++++++++++++++ 0.9: ++++ 1.0: ++ 1000 numbers: Naive method: sd: 0.199653, mean: 0.504046 Second method: sd: 0.199653, mean: 0.504046 0.0: +++ 0.1: ++++++ 0.2: ++++++++++++++++ 0.3: ++++++++++++++++++++++++++++++ 0.4: +++++++++++++++++++++++++++++++++++++++++++++++ 0.5: ++++++++++++++++++++++++++++++++++++++++++++++++++ 0.6: ++++++++++++++++++++++++++++++++++++++++++++++ 0.7: +++++++++++++++++++++++++ 0.8: +++++++++++++++++++ 0.9: +++++++ 1.0: ++++ 10000 numbers: Naive method: sd: 0.202354, mean: 0.502519 Second method: sd: 0.202354, mean: 0.502519 0.0: +++ 0.1: +++++++ 0.2: +++++++++++++++ 0.3: +++++++++++++++++++++++++++++ 0.4: ++++++++++++++++++++++++++++++++++++++++++ 0.5: ++++++++++++++++++++++++++++++++++++++++++++++++++ 0.6: +++++++++++++++++++++++++++++++++++++++++++ 0.7: ++++++++++++++++++++++++++++++ 0.8: ++++++++++++++++ 0.9: +++++++ 1.0: ++++
Liberty BASIC
Uses LB Statistics/Basic <lang lb>call sample 100000
end
sub sample n
dim dat( n) for i =1 to n dat( i) =normalDist( 1, 0.2) next i
'// show mean, standard deviation. Find max, min. mx =-1000 mn = 1000 sum =0 sSq =0 for i =1 to n d =dat( i) mx =max( mx, d) mn =min( mn, d) sum =sum +d sSq =sSq +d^2 next i print n; " data terms used."
mean =sum / n print "Largest term was "; mx; " & smallest was "; mn range =mx -mn print "Mean ="; mean
print "Stddev ="; ( sSq /n -mean^2)^0.5
'// show histogram nBins =50 dim bins( nBins) for i =1 to n z =int( ( dat( i) -mn) /range *nBins) bins( z) =bins( z) +1 next i for b =0 to nBins -1 for j =1 to int( nBins *bins( b)) /n *30) print "#"; next j print next b print
end sub
function normalDist( m, s) ' Box Muller method
u =rnd( 1) v =rnd( 1) normalDist =( -2 *log( u))^0.5 *cos( 2 *3.14159265 *v)
end function</lang>
100000 data terms used. Largest term was 4.12950792 & smallest was -4.37934139 Mean =-0.26785425e-2 Stddev =1.00097669
# ## ### ##### ######## ############ ################ ######################## ############################## ###################################### ############################################## ######################################################## ################################################################### ############################################################################## ####################################################################################### ################################################################################################ #################################################################################################### ######################################################################################################## ##################################################################################################### ############################################################################################## ######################################################################################### ################################################################################## ######################################################################### ############################################################## #################################################### ########################################## ################################# ########################## ################## ############# ######### ###### #### ## # #
Lua
Lua provides math.random() to generate uniformly distributed random numbers. The function gaussian() shown here uses math.random() to generate normally distributed random numbers with given mean and variance. <lang Lua>function gaussian (mean, variance)
return math.sqrt(-2 * variance * math.log(math.random())) * math.cos(2 * variance * math.pi * math.random()) + mean
end
function mean (t)
local sum = 0 for k, v in pairs(t) do sum = sum + v end return sum / #t
end
function std (t)
local squares, avg = 0, mean(t) for k, v in pairs(t) do squares = squares + ((avg - v) ^ 2) end local variance = squares / #t return math.sqrt(variance)
end
function showHistogram (t)
local lo = math.ceil(math.min(unpack(t))) local hi = math.floor(math.max(unpack(t))) local step, histBars, barScale, compVal = (hi - lo) / 2, {}, 200 for range = lo, hi do histBars[range] = 0 for k, v in pairs(t) do compVal = math.ceil(v - 0.5) if compVal == range then histBars[range] = histBars[range] + 1 end end end for k, v in pairs(histBars) do io.write(k .. "\t" .. string.rep('=', v / #t * barScale)) print(" " .. v) end
end
math.randomseed(os.time()) local t, average, variance = {}, 10, 9 for i = 1, 1000 do
table.insert(t, gaussian(average, variance))
end print("Mean:", mean(t) .. ", expected " .. average) print("StdDev:", std(t) .. ", expected " .. math.sqrt(variance) .. "\n") showHistogram(t)</lang>
- Output:
Mean: 10.043466535976, expected 10 StdDev: 3.0658844951091, expected 3 1 1 2 4 3 === 15 4 ==== 24 5 ===== 28 6 ========= 47 7 ================ 82 8 ===================== 109 9 ====================== 114 10 ========================== 133 11 ========================= 125 12 =================== 99 13 ================== 93 14 ========== 51 15 ======= 39 16 ==== 22 17 = 9 18 3 19 2
Mathematica
<lang Mathematica>x:= RandomReal[1] SampleNormal[n_] := (Print[#//Length, " numbers, Mean : ", #//Mean, ", StandardDeviation : ", #//StandardDeviation];
Histogram[#, BarOrigin -> Left,Axes -> False])& [(Table[(-2*Log[x])^0.5*Cos[2*Pi*x], {n} ]]
Invocation: SampleNormal[ 10000 ] ->10000 numbers, Mean : -0.0122308, StandardDeviation : 1.00646 </lang>
MATLAB / Octave
<lang Matlab> N = 100000;
x = randn(N,1); mean(x) std(x) [nn,xx] = hist(x,100); bar(xx,nn);</lang>
PARI/GP
<lang parigp>rnormal()={ my(u1=random(1.),u2=random(1.); sqrt(-2*log(u1))*cos(2*Pi*u1) \\ Could easily be extended with a second normal at very little cost. }; mean(v)={
sum(i=1,#v,v[i])/#v
}; stdev(v,mu="")={
if(mu=="",mu=mean(v)); sqrt(sum(i=1,#v,(v[i]-mu)^2))/#v
}; histogram(v,bins=16,low=0,high=1)={
my(u=vector(bins),width=(high-low)/bins); for(i=1,#v,u[(v[i]-low)\width+1]++); u
}; show(n)={
my(v=vector(n,i,rnormal()),m=mean(v),s=stdev(v,m),h,sz=ceil(n/300)); h=histogram(v,,vecmin(v)-.1,vecmax(v)+.1); for(i=1,#h,for(j=1,h[i]\sz,print1("#"));print());
}; show(10^4)</lang>
For versions before 2.4.3, define <lang parigp>rreal()={
my(pr=32*ceil(default(realprecision)*log(10)/log(4294967296))); \\ Current precision random(2^pr)*1.>>pr
};</lang>
and use rreal()
in place of random(1.)
.
A PARI implementation: <lang C>GEN rnormal(long prec) { pari_sp ltop = avma; GEN u1, u2, left, right, ret; u1 = randomr(prec); u2 = randomr(prec); left = sqrtr_abs(shiftr(mplog(u1), 1)); right = mpcos(mulrr(shiftr(mppi(prec), 1), u2));
ret = mulrr(left, right);
ret = gerepileupto(ltop, ret);
return ret;
}</lang>
Use mpsincos
and caching to generate two values at nearly the same cost.
Perl 6
<lang perl6>constant τ = 2 * pi;
sub normdist ($m, $σ) {
my $r = sqrt -2 * log rand; my $Θ = τ * rand; $r * cos($Θ) * $σ + $m;
}
sub MAIN ($size = 100000, $mean = 50, $stddev = 4) {
my @dataset = normdist($mean,$stddev) xx $size;
my $m = [+](@dataset) / $size; say (:$m);
my $σ = sqrt [+](@dataset X** 2) / $size - $m**2; say (:$σ);
(my %hash){.round}++ for @dataset; my $scale = 180 * $stddev / $size; constant @subbar = < ⎸ ▏ ▎ ▍ ▌ ▋ ▊ ▉ █ >; for %hash.keys».Int.minmax(+*) -> $i { my $x = (%hash{$i} // 0) * $scale; my $full = floor $x; my $part = 8 * ($x - $full); say $i, "\t", '█' x $full, @subbar[$part]; }
}</lang>
- Output:
"m" => 50.006107405837142e0 "σ" => 4.0814435639885254e0 33 ⎸ 34 ⎸ 35 ⎸ 36 ▏ 37 ▎ 38 ▊ 39 █▋ 40 ███⎸ 41 █████▊ 42 ██████████⎸ 43 ███████████████▋ 44 ███████████████████████▏ 45 ████████████████████████████████▌ 46 ███████████████████████████████████████████▍ 47 ██████████████████████████████████████████████████████▏ 48 ███████████████████████████████████████████████████████████████▏ 49 █████████████████████████████████████████████████████████████████████▋ 50 ███████████████████████████████████████████████████████████████████████▊ 51 █████████████████████████████████████████████████████████████████████▌ 52 ███████████████████████████████████████████████████████████████⎸ 53 ██████████████████████████████████████████████████████▎ 54 ███████████████████████████████████████████⎸ 55 ████████████████████████████████▌ 56 ███████████████████████▍ 57 ███████████████▉ 58 █████████▉ 59 █████▍ 60 ███▍ 61 █▋ 62 ▊ 63 ▍ 64 ▏ 65 ⎸ 66 ⎸ 67 ⎸
PureBasic
<lang purebasic>Procedure.f randomf(resolution = 2147483647)
ProcedureReturn Random(resolution) / resolution
EndProcedure
Procedure.f normalDist() ;Box Muller method
ProcedureReturn Sqr(-2 * Log(randomf())) * Cos(2 * #PI * randomf())
EndProcedure
Procedure sample(n, nBins = 50)
Protected i, maxBinValue, binNumber Protected.f d, mean, sum, sumSq, mx, mn, range Dim dat.f(n) For i = 1 To n dat(i) = normalDist() Next ;show mean, standard deviation, find max & min. mx = -1000 mn = 1000 sum = 0 sumSq = 0 For i = 1 To n d = dat(i) If d > mx: mx = d: EndIf If d < mn: mn = d: EndIf sum + d sumSq + d * d Next PrintN(Str(n) + " data terms used.") PrintN("Largest term was " + StrF(mx) + " & smallest was " + StrF(mn)) mean = sum / n PrintN("Mean = " + StrF(mean)) PrintN("Stddev = " + StrF((sumSq / n) - Sqr(mean * mean))) ;show histogram range = mx - mn Dim bins(nBins) For i = 1 To n binNumber = Int(nBins * (dat(i) - mn) / range) bins(binNumber) + 1 Next maxBinValue = 1 For i = 0 To nBins If bins(i) > maxBinValue maxBinValue = bins(i) EndIf Next #normalizedMaxValue = 70 For binNumber = 0 To nBins tickMarks = Round(bins(binNumber) * #normalizedMaxValue / maxBinValue, #PB_Round_Nearest) PrintN(ReplaceString(Space(tickMarks), " ", "#")) Next PrintN("")
EndProcedure
If OpenConsole()
sample(100000) Print(#CRLF$ + #CRLF$ + "Press ENTER to exit"): Input() CloseConsole()
EndIf</lang> Sample output:
100000 data terms used. Largest term was 4.5352029800 & smallest was -4.5405135155 Mean = 0.0012346541 Stddev = 0.9959455132 # ### ###### ########## ################## ############################ ######################################### ##################################################### ################################################################ ###################################################################### ###################################################################### ################################################################ ##################################################### ######################################### ############################# ################## ########## ###### ### #
Python
This uses the external matplotlib package as well as the built-in standardlib function random.gauss. <lang python>from __future__ import division import matplotlib.pyplot as plt import random
mean, stddev, size = 50, 4, 100000 data = [random.gauss(mean, stddev) for c in range(size)]
mn = sum(data) / size sd = (sum(x*x for x in data) / size
- (sum(data) / size) ** 2) ** 0.5
print("Sample mean = %g; Stddev = %g; max = %g; min = %g for %i values"
% (mn, sd, max(data), min(data), size))
plt.hist(data,bins=50)</lang>
- Output:
Sample mean = 49.9822; Stddev = 4.00938; max = 66.8091; min = 33.5283 for 100000 values
Racket
This shows how one would generate samples from a normal distribution, compute statistics and plot a histogram.
<lang racket>
- lang racket
(require math (planet williams/science/histogram-with-graphics))
(define data (sample (normal-dist 50 4) 100000))
(displayln (~a "Mean:\t" (mean data))) (displayln (~a "Stddev:\t" (stddev data))) (displayln (~a "Max:\t" (apply max data))) (displayln (~a "Min:\t" (apply min data)))
(define h (make-histogram-with-ranges-uniform 40 30 70)) (for ([x data]) (histogram-increment! h x)) (histogram-plot h "Normal distribution μ=50 σ=4") </lang>
The other part of the task was to produce normal distributed numbers from a unit distribution. The following code is an implementation of the polar method. It is a slightly modified version of code originally written by Sebastian Egner. <lang racket>
- lang racket
(require math)
(define random-normal
(let ([unit (uniform-dist)] [next #f]) (λ (μ σ) (if next (begin0 (+ μ (* σ next)) (set! next #f)) (let loop () (let* ([v1 (- (* 2.0 (sample unit)) 1.0)] [v2 (- (* 2.0 (sample unit)) 1.0)] [s (+ (sqr v1) (sqr v2))]) (cond [(>= s 1) (loop)] [else (define scale (sqrt (/ (* -2.0 (log s)) s))) (set! next (* scale v2)) (+ μ (* σ scale v1))])))))))
</lang>
REXX
The REXX language doesn't have any "higher math" BIF functions like SIN/COS/LN/LOG/SQRT/POW/etc,
so we hoi polloi programmers have to roll our own.
<lang rexx>/*REXX program generates 10,000 normally distributed numbers (Gaussian distribution).*/
parse arg n seed . /*obtain optional arguments from the CL*/
if n== | n=="," then n=10000 /*Not specified? Then use the default.*/
if datatype(seed,'W') then call random ,,seed /*seed is for repeatable RANDOM numbers*/
call pi /*call subroutine to define pi constant*/
do g=1 for n /*generate N uniform random numbers. */ #.g=sqrt(-2*ln(rand()))*cos(2*pi*rand()) /*assign a uniform random number to #. */ end /*g*/
mn=#.1; mx=mn; s=0; ss=0; noise=n*.0005 /*calculate the noise: 1/20th % of N.*/
do j=1 for n; _=#.j; s=s+_; ss=ss+_*_ /*the sum, and the sum of squares. */ mn=min(mn,#.j); mx=max(mx,#.j) /*find the minimum and the maximum. */ end /*j*/
!.=0 say 'number of data points = ' aa(n ) say ' minimum = ' aa(mn ) say ' maximum = ' aa(mx ) say ' arithmetic mean = ' aa(s/n) say ' standard deviation = ' aa(sqrt(ss/n - (s/n)**2)) r=mx-mn /*is used for scaling the histogram. */ parse value scrSize() with sd sw . /*obtain the (true) screen size of term*/ /*◄──not all REXXes have this BIF*/ sdE=sd-4 /*the effective (useable) screen depth.*/ swE=sw-1 /* " " " " width.*/ ?mn=; ?mx=
do i=1 for n; ?=trunc((#.i-mn)/r*sdE) !.?=!.?+1 /*bump the counter. */ if ?mn== then do; ?mn=!.j; ?mx=!.j; end /*define min, max (1st).*/ ?mn=min(?mn, !.?); ?mx=max(?mx, !.?) /*find the min and max. */ end /*i*/
f=swE/?mx /*limit graph to 1 screen*/
do h=0 for sdE; _=!.h /*obtain a data point. */ if _>noise then say copies('─', trunc(_*f)) /*display the histogram. */ end /*h*/
exit /*\tick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────*/ aa: parse arg a; return left(,(a>=0)+2*datatype(a,'W'))a /*prepend a blank if #>=0, add two blanks if its whole*/ e: e =2.7182818284590452353602874713526624977572470936999595749669676277240766303535; return e pi: pi=3.1415926535897932384626433832795028841971693993751058209749445923078164062862; return pi rand: return random(0,1e5) / 1e5 /*REXX generates uniform random postive integers.*/ r2r: return arg(1) // (2*pi()) /*normalize the given angle (in radians) to ±2pi.*/ .sincos: parse arg z,_,i; x=x*x; p=z; do k=2 by 2; _=-_*x/(k*(k+i)); z=z+_; if z=p then leave; p=z; end; return z ln: procedure; parse arg x,f; call e; ig=x>1.5; is=1-2*(ig\==1); ii=0; xx=x; return .ln_comp() .ln_comp: do while ig&xx>1.5|\ig&xx<.5;_=e; do k=-1;iz=xx*_**-is;if k>=0&(ig&iz<1|\ig&iz>.5) then leave; _=_*_;izz=iz; end
xx=izz;ii=ii+is*2**k;end; x=x*e**-ii-1;z=0;_=-1;p=z; do k=1;_=-_*x;z=z+_/k;if z=p then leave;p=z;end; return z+ii
cos: procedure; parse arg x; x=r2r(x); a=abs(x); hpi=pi*.5; numeric fuzz min(6,digits()-3); if a=pi() then return -1
if a=hpi|a=hpi*3 then return 0; if a=pi()/3 then return .5; if a=pi()*2/3 then return -.5; return .sinCos(1,1,-1)
sqrt: procedure; parse arg x; if x=0 then return 0; d=digits(); m.=9; numeric digits; numeric form; h=d+6
parse value format(x,2,1,,0) 'E0' with g 'E' _ .; g=g*.5'e'_%2; do j=0 while h>9; m.j=h; h=h%2+1; end /*j*/ do k=j+5 to 0 by -1; numeric digits m.k; g=(g+x/g)*.5; end /*k*/; numeric digits d; return g/1</lang>
This REXX program makes use of scrsize REXX program (or BIF) which is used to determine the screen size of the terminal (console).
The SCRSIZE.REX REXX program is included here ──► SCRSIZE.REX.
output when using the default input:
(The output shown when the screen size is 50x80.)
number of data points = 10000 minimum = -3.41571894 maximum = 3.96752904 arithmetic mean = -0.0150910306 standard deviation = 0.99056458 ─ ─ ─── ───── ─────── ─────── ──────────── ───────────────── ───────────────────── ───────────────────────────── ──────────────────────────────────── ─────────────────────────────────────────── ─────────────────────────────────────────────── ────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────── ─────────────────────────────────────────────────────────────────── ─────────────────────────────────────────────────────────────────────────────── ───────────────────────────────────────────────────────────────────────── ─────────────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────── ─────────────────────────────────────────────────────────── ─────────────────────────────────────────────────── ───────────────────────────────────────────── ─────────────────────────────────────── ─────────────────────────────── ───────────────────────────── ────────────────── ───────────── ──────── ─────── ──── ─── ─ ─
output when using the default input:
(The output shown when the screen size is 60x130.)
number of data points = 10000 minimum = -3.83073183 maximum = 3.61051026 arithmetic mean = 0.00421997333 standard deviation = 0.981924955 ── ── ──── ── ───── ──────── ──────── ──────── ─────────── ────────────── ────────────────── ──────────────────── ──────────────────────────── ───────────────────────────────── ──────────────────────────────── ───────────────────────────────────────────── ─────────────────────────────────────────────── ─────────────────────────────────────────────────── ───────────────────────────────────────────────────────────────── ───────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────── ─────────────────────────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────────────────────────────── ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────────────────────────────────── ─────────────────────────────────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────────────────────────── ────────────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────────── ──────────────────────────────────────────────────────────────── ───────────────────────────────────────────────── ─────────────────────────────────── ────────────────────────────────────── ─────────────────────────────────── ─────────────────────────── ─────────────────── ────────────────────── ───────────── ─────────── ─────── ────── ─────── ─── ──── ─── ──
Run BASIC
<lang runbasic> s = 100000 h$ = "=============================================================" h$ = h$ + h$ dim ndis(s) ' mean and standard deviation. mx = -9999 mn = 9999 sum = 0 sumSqr = 0 for i = 1 to s ' find minimum and maximum ms = rnd(1) ss = rnd(1) nd = (-2 * log(ms))^0.5 * cos(2 *3.14159265 * ss) ' normal distribution ndis(i) = nd mx = max(mx, nd) mn = min(mn, nd) sum = sum + nd sumSqr = sumSqr + nd ^ 2 next i
mean = sum / s range = mx - mn
print "Samples :"; s print "Largest :"; mx print "Smallest :"; mn print "Range :"; range print "Mean :"; mean print "Stand Dev :"; (sumSqr /s -mean^2)^0.5
'Show chart of histogram nBins = 50 dim bins(nBins) for i = 1 to s z = int((ndis(i) -mn) /range *nBins) bins(z) = bins(z) + 1 mb = max(bins(z),mb) next i for b = 0 to nBins -1
print using("##",b);" ";using("#####",bins(b));" ";left$(h$,(bins(b) / mb) * 90)
next b END</lang>
- Output:
Samples :100000 Largest :4.61187177 Smallest :-4.21695424 Range :8.82882601 Mean :-9.25042513e-4 Stand Dev :1.00680067 = == === ===== ======== ============= ================= ======================= ============================== ======================================= =============================================== ========================================================= =================================================================== =========================================================================== =================================================================================== ======================================================================================= ========================================================================================== ======================================================================================== ====================================================================================== ================================================================================= ============================================================================ ================================================================== ======================================================== ============================================== ===================================== ============================ ===================== =============== ========== ======= ===== === = =
SAS
<lang sas>data test; n=100000; twopi=2*constant('pi'); do i=1 to n; u=ranuni(0); v=ranuni(0); r=sqrt(-2*log(u)); x=r*cos(twopi*v); y=r*sin(twopi*v); z=rannor(0); output; end; keep x y z;
proc means mean stddev;
proc univariate; histogram /normal;
run;
/* Variable Mean Std Dev
x -0.0052720 0.9988467 y 0.000023995 1.0019996 z 0.0012857 1.0056536
- /</lang>
Sidef
<lang ruby>define τ = Number.tau
func normdist (m, σ) {
var r = sqrt(-2 * 1.rand.log) var Θ = (τ * 1.rand) r * Θ.cos * σ + m
}
var size = 100_000 var mean = 50 var stddev = 4
var dataset = size.of { normdist(mean, stddev) } var m = (dataset.sum(0) / size) say ("m: #{m}")
var σ = sqrt(dataset »**» 2 -> sum(0) / size - m**2) say ("s: #{σ}")
var hash = Hash() dataset.each { |n| hash{ n.round(0) } := 0 ++ }
var scale = (180 * stddev / size) const subbar = < ⎸ ▏ ▎ ▍ ▌ ▋ ▊ ▉ █ >
for i in (hash.keys.map{.to_i}.sort) {
var x = (hash{i} * scale) var full = x.int var part = (8 * (x - full)) say (i, "\t", '█' * full, subbar[part])
}</lang>
- Output:
m: 49.99538275618550306540055142077589 s: 4.00295544816687358837821680496471 33 ⎸ 34 ⎸ 35 ⎸ 36 ▏ 37 ▎ 38 ▊ 39 █▋ 40 ███▏ 41 ██████▏ 42 █████████▍ 43 ███████████████▌ 44 ███████████████████████▋ 45 ████████████████████████████████▍ 46 ████████████████████████████████████████████▎ 47 █████████████████████████████████████████████████████▍ 48 ███████████████████████████████████████████████████████████████▍ 49 █████████████████████████████████████████████████████████████████████▌ 50 ████████████████████████████████████████████████████████████████████████▋ 51 █████████████████████████████████████████████████████████████████████▊ 52 ██████████████████████████████████████████████████████████████▏ 53 ████████████████████████████████████████████████████▉ 54 ███████████████████████████████████████████▉ 55 █████████████████████████████████▎ 56 ███████████████████████⎸ 57 ███████████████▋ 58 █████████▋ 59 █████▍ 60 ███▍ 61 █▊ 62 ▋ 63 ▍ 64 ▏ 65 ⎸ 66 ⎸
Tcl
<lang tcl>package require Tcl 8.5
- Uses the Box-Muller transform to compute a pair of normal random numbers
proc tcl::mathfunc::nrand {mean stddev} {
variable savednormalrandom if {[info exists savednormalrandom]} {
return [expr {$savednormalrandom*$stddev + $mean}][unset savednormalrandom]
} set r [expr {sqrt(-2*log(rand()))}] set theta [expr {2*3.1415927*rand()}] set savednormalrandom [expr {$r*sin($theta)}] expr {$r*cos($theta)*$stddev + $mean}
} proc stats {size {slotfactor 10}} {
set sum 0.0 set sum2 0.0 for {set i 0} {$i < $size} {incr i} {
set r [expr { nrand(0.5, 0.2) }]
incr histo([expr {int(floor($r*$slotfactor))}]) set sum [expr {$sum + $r}] set sum2 [expr {$sum2 + $r**2}]
} set mean [expr {$sum / $size}] set stddev [expr {sqrt($sum2/$size - $mean**2)}] puts "$size numbers" puts "Mean: $mean" puts "StdDev: $stddev" foreach i [lsort -integer [array names histo]] {
puts [string repeat "*" [expr {$histo($i)*350/int($size)}]]
}
}
stats 100 puts "" stats 1000 puts "" stats 10000 puts "" stats 100000 20</lang> Sample output:
100 numbers Mean: 0.49355955990390254 StdDev: 0.19651396178121985 *** ******* ************** *********************************** ******************************************************** ****************************************************************** ************************************************************************* ****************************************** ************************************** ************** 1000 numbers Mean: 0.5066940614105869 StdDev: 0.2016794788065389 * ***** ************** **************************** ********************************************************** **************************************************************** ************************************************************* ****************************************************** *********************************** ************ ********* * 10000 numbers Mean: 0.49980964730768285 StdDev: 0.1968441612522318 * ***** *************** ******************************* ***************************************************** ****************************************************************** ******************************************************************* **************************************************** ********************************* *************** ***** * 100000 numbers Mean: 0.49960438950922254 StdDev: 0.20060211160998606 * ** *** ****** ********* ************** ****************** *********************** ***************************** ******************************** ********************************** ********************************** ******************************** **************************** *********************** ****************** ************* ********* ****** *** ** *
The blank lines in the output are where the number of samples is too small to even merit a single unit on the histogram.