Cumulative standard deviation
Write a stateful function, class, generator or coroutine that takes a series of floating point numbers, one at a time, and returns the running standard deviation of the series. The task implementation should use the most natural programming style of those listed for the function in the implementation language; the task must state which is being used. Do not apply [[wp:Bessel's correction|Bessel's correction]; the returned standard deviation should always be computed as if the sample seen so far is the entire population.
You are encouraged to solve this task according to the task description, using any language you may know.
Use this to compute the standard deviation of the demonstration set:
C
Of course this function is not thread-safe nor it can be used to compute the standard deviation just for a set of values per time.
<lang c>#include <stdio.h>
- include <stdlib.h>
- include <math.h>
enum Action { VALUE, STDDEV, MEAN, VAR, COUNT, RESET }; double stat_object(double v, enum Action action) {
static double sum = 0.0; static double sum2 = 0.0; static size_t num = 0;
double m;
switch(action) { case VALUE: num++; sum += v; sum2 += v*v; return stat_object(0.0, STDDEV); case STDDEV: return sqrt(stat_object(0.0, VAR)); case MEAN: return (num>0) ? sum/(double)num : 0.0; case VAR: m = stat_object(0.0, MEAN); return (num>0) ? (sum2/(double)num - m*m) : 0.0; case COUNT: return num; case RESET: sum = sum2 = 0.0; num = 0; return 0.0; }
}</lang>
<lang c>double v[] = { 2,4,4,4,5,5,7,9 };
int main() {
int i; double sd;
for(i=0; i < sizeof(v)/sizeof(double) ; i++) sd = stat_object(v[i], VALUE);
printf("standard dev = %lf\n", sd);
return 0;
}</lang>
Forth
<lang forth>
- f+! ( x addr -- ) dup f@ f+ f! ;
- st-count ( stats -- n ) f@ ;
- st-sum ( stats -- sum ) float+ f@ ;
- st-sumsq ( stats -- sum*sum ) 2 floats + f@ ;
- st-add ( fnum stats -- )
1e dup f+! float+ fdup dup f+! float+ fdup f* f+! ;
- st-mean ( stats -- mean )
dup st-sum st-count f/ ;
- st-variance ( stats -- var )
dup st-sumsq dup st-mean fdup f* dup st-count f* f- st-count f/ ;
- st-stddev ( stats -- stddev )
st-variance fsqrt ;
</lang> This variation is more numerically stable when there are large numbers of samples or large sample ranges. <lang forth>
- st-count ( stats -- n ) f@ ;
- st-mean ( stats -- mean ) float+ f@ ;
- st-nvar ( stats -- n*var ) 2 floats + f@ ;
- st-variance ( stats -- var ) dup st-nvar st-count f/ ;
- st-stddev ( stats -- stddev ) st-variance fsqrt ;
- st-add ( x stats -- )
1e dup f+! \ update count fdup dup st-mean f- fswap ( delta x ) fover dup st-count f/ ( delta x delta/n ) float+ dup f+! \ update mean ( delta x ) dup f@ f- f* float+ f+! ; \ update nvar
</lang> <lang forth> create stats 0e f, 0e f, 0e f,
2e stats st-add 4e stats st-add 4e stats st-add 4e stats st-add 5e stats st-add 5e stats st-add 7e stats st-add 9e stats st-add
stats st-stddev f. \ 2. </lang>
Fortran
This one imitates C and suffers the same problems: the function is not thread-safe and must be used to compute the stddev for one set per time.
<lang fortran>program Test_Stddev
implicit none
real, dimension(8) :: v = (/ 2,4,4,4,5,5,7,9 /) integer :: i real :: sd
do i = 1, size(v) sd = stat_object(v(i)) end do
print *, "std dev = ", sd
contains
recursive function stat_object(a, cmd) result(stddev) real :: stddev real, intent(in) :: a character(len=*), intent(in), optional :: cmd
real, save :: summa = 0.0, summa2 = 0.0 integer, save :: num = 0
real :: m
if ( .not. present(cmd) ) then num = num + 1 summa = summa + a summa2 = summa2 + a*a stddev = stat_object(0.0, "stddev") else select case(cmd) case("stddev") stddev = sqrt(stat_object(0.0, "variance")) case("variance") m = stat_object(0.0, "mean") if ( num > 0 ) then stddev = summa2/real(num) - m*m else stddev = 0.0 end if case("count") stddev = real(num) case("mean") if ( num > 0 ) then stddev = summa/real(num) else stddev = 0.0 end if case("reset") summa = 0.0 summa2 = 0.0 num = 0 case default stddev = 0.0 end select end if
end function stat_object
end program Test_Stddev</lang>
Objective-C
<lang objc>#import <stdio.h>
- import <math.h>
- import <objc/Object.h>
@interface SDAccum : Object {
double sum, sum2; unsigned int num;
} -(id)init; -(double)value: (double)v; -(unsigned int)count; -(double)mean; -(double)variance; -(double)stddev; @end
@implementation SDAccum -(id)init {
sum = sum2 = 0.0; num = 0; return self;
} -(double)value: (double)v {
sum = sum + v; sum2 = sum2 + v*v; num++; return [self stddev];
} -(unsigned int)count {
return num;
} -(double)mean {
return (num>0) ? sum/(double)num : 0.0;
} -(double)variance {
double m = [self mean]; return (num>0) ? (sum2/(double)num - m*m) : 0.0;
} -(double)stddev {
return sqrt([self variance]);
} @end</lang>
<lang objc>double v[] = { 2,4,4,4,5,5,7,9 };
int main() {
int i; double sd;
id sdacc = [SDAccum new];
for(i=0; i < sizeof(v)/sizeof(double) ; i++) sd = [sdacc value: v[i]];
printf("std dev = %lf\n", sd);
return 0;
}</lang>
Perl
<lang perl>{
package SDAccum; sub new {
my $class = shift; my $self = {}; $self->{sum} = 0.0; $self->{sum2} = 0.0; $self->{num} = 0; bless $self, $class; return $self;
} sub count {
my $self = shift; return $self->{num};
} sub mean {
my $self = shift; return ($self->{num}>0) ? $self->{sum}/$self->{num} : 0.0;
} sub variance {
my $self = shift; my $m = $self->mean; return ($self->{num}>0) ? $self->{sum2}/$self->{num} - $m * $m : 0.0;
} sub stddev {
my $self = shift; return sqrt($self->variance);
} sub value {
my $self = shift; my $v = shift; $self->{sum} += $v; $self->{sum2} += $v * $v; $self->{num}++; return $self->stddev;
}
}</lang>
<lang perl>my $sdacc = SDAccum->new; my $sd;
foreach my $v ( 2,4,4,4,5,5,7,9 ) {
$sd = $sdacc->value($v);
} print "std dev = $sd\n";</lang>
Python
The program should work with Python 2.x and 3.x, although the output would not be a tuple in 3.x <lang python>>>> from math import sqrt >>> def sd(x):
sd.sum += x sd.sum2 += x*x sd.n += 1.0 sum, sum2, n = sd.sum, sd.sum2, sd.n return sqrt(sum2/n - sum*sum/n/n)
>>> sd.sum = sd.sum2 = sd.n = 0 >>> for value in (2,4,4,4,5,5,7,9):
print (value, sd(value))
(2, 0.0)
(4, 1.0)
(4, 0.94280904158206258)
(4, 0.8660254037844386)
(5, 0.97979589711327075)
(5, 1.0)
(7, 1.3997084244475311)
(9, 2.0)
>>> </lang>
REXX
This is indeed Object REXX.
<lang orexx>sdacc = .SDAccum~new x = .array~of(2,4,4,4,5,5,7,9) sd = 0 do i = 1 to x~size
sd = sdacc~value(x[i])
end
say "std dev = "sd
- class SDAccum
- method sum attribute
- method sum2 attribute
- method count attribute
- method init
self~sum = 0.0 self~sum2 = 0.0 self~count = 0
- method value
expose sum sum2 count parse arg x sum = sum + x sum2 = sum2 + x*x count = count + 1 return self~stddev
- method mean
expose sum count return sum/count
- method variance
expose sum2 count m = self~mean return sum2/count - m*m
- method stddev
return self~sqrt(self~variance)
- method sqrt
arg n if n = 0 then return 0 ans = n / 2 prev = n do until prev = ans prev = ans ans = ( prev + ( n / prev ) ) / 2 end return ans</lang>
Ruby
Object
Uses an object to keep state.
"Simplification of the formula [...] for standard deviation [...] can be memorized as taking the square root of (the average of the squares less the square of the average)." [1]
<lang ruby>class StdDevAccumulator
def initialize @n, @sum, @sumofsquares = 0, 0.0, 0.0 end def <<(num) # return self to make this possible: sd << 1 << 2 << 3 # => 0.816496580927726 @n += 1 @sum += num @sumofsquares += num**2 self end def stddev Math.sqrt( (@sumofsquares / @n) - (@sum / @n)**2 ) end def to_s stddev.to_s end
end
sd = StdDevAccumulator.new i = 0 [2,4,4,4,5,5,7,9].each {|n| puts "adding #{n}: stddev of #{i+=1} samples is #{sd << n}" }</lang>
adding 2: stddev of 1 samples is 0.0 adding 4: stddev of 2 samples is 1.0 adding 4: stddev of 3 samples is 0.942809041582063 adding 4: stddev of 4 samples is 0.866025403784439 adding 5: stddev of 5 samples is 0.979795897113272 adding 5: stddev of 6 samples is 1.0 adding 7: stddev of 7 samples is 1.39970842444753 adding 9: stddev of 8 samples is 2.0
Closure
<lang ruby>def sdaccum
n, sum, sum2 = 0, 0.0, 0.0 lambda do |num| n += 1 sum += num sum2 += num**2 Math.sqrt( (sum2 / n) - (sum / n)**2 ) end
end
sd = sdacc [2,4,4,4,5,5,7,9].each {|n| print sd.call(n), ", "}</lang>
0.0, 1.0, 0.942809041582063, 0.866025403784439, 0.979795897113272, 1.0, 1.39970842444753, 2.0,
Smalltalk
<lang smalltalk>Object subclass: SDAccum [
|sum sum2 num| SDAccum class >> new [ |o| o := super basicNew. ^ o init. ] init [ sum := 0. sum2 := 0. num := 0 ] value: aValue [ sum := sum + aValue. sum2 := sum2 + ( aValue * aValue ). num := num + 1. ^ self stddev ] count [ ^ num ] mean [ num>0 ifTrue: [^ sum / num] ifFalse: [ ^ 0.0 ] ] variance [ |m| m := self mean. num>0 ifTrue: [^ (sum2/num) - (m*m) ] ifFalse: [ ^ 0.0 ] ] stddev [ ^ (self variance) sqrt ]
].</lang>
<lang smalltalk>|sdacc sd| sdacc := SDAccum new.
- ( 2 4 4 4 5 5 7 9 ) do: [ :v | sd := sdacc value: v ].
('std dev = %1' % { sd }) displayNl.</lang>
Tcl
With a Class
<lang tcl>oo::class create SDAccum {
variable sum sum2 num constructor {} { set sum 0.0 set sum2 0.0 set num 0 } method value {x} { set sum2 [expr {$sum2 + $x**2}] set sum [expr {$sum + $x}] incr num return [my stddev] } method count {} { return $num } method mean {} { expr {$sum / double($num)} } method variance {} { expr {($sum2 - double($num)*[my mean]**2)/double($num)} } method stddev {} { expr {sqrt([my variance])} }
}
- Demonstration
set sdacc [SDAccum new] foreach val {2 4 4 4 5 5 7 9} {
set sd [$sdacc value $val]
} puts "the standard deviation is: $sd"</lang> which produces the output:
the standard deviation is: 2.0