Statistics/Chi-squared distribution: Difference between revisions

(→‎{{header|jq}}: simplify)
Line 91:
{{works with|jq}}
'''Also works with gojq, the Go implementation of jq.'''
<syntaxhighlight lang=sh>
# f must have arity 0
def integrate($a; $b; $n; f):
(($b - $a) / $n) as $h
| reduce range(0;$n) as $i (0;
($a + $i * $h) as $x
| . + ((( ($x|f) + 4 * (($x + ($h/2))|f) + (($x + $h)|f)) / 6)) )
| . * $h;
 
# '''Lower Incomplete Gamma Function'''
# Integral(from:0; to:$x) t^($a-1)*(-t | exp) dt
def gammaIncomplete($a; $x; $epsilon):
def small($a;$b): (($a-$b)/($a+$b)) | length < $epsilon or
(($a-$b)|length) < 1e-10; # length here is abs
def f0: (if . == 0 then 1 else pow(.; $a-1) end) * ((- .) |exp) ;
 
# If $upper is large then divide to conquer:
def integrate($upper; $n):
if $upper < $a * 2 then integrate(0; $upper; $n; f0)
else integrate( 0; 2*$a; $n; f0) +
integrate(2*$a; $upper; $n; f0)
end ;
 
if $a == 1 then 1 - ((-$x)|exp)
elif $x > $a * 1e4 then 1
else {current: 0, n: 25, iteration: 0}
| .prev = 1
| until( small(.current; .prev);
.n *= 2
| .iteration += 1
| .prev = .current
| .current = integrate($x; .n) )
| .current
end ;
 
def gammaIncomplete($a; $x):
# 1e-3 is adequate for many purposes; 1e-4 takes significantly longer for small $x
gammaIncomplete($a; $x; 1e-5);
def Chi2_cdf($x; $k):
if $x == 0 then 0
else [1, gammaIncomplete($k/2;$x/2) / (($k/2)|gamma)]|min
end ;
 
def Chi2_pdf($x; $k):
if $x <= 0 then 0
else ((-$x/2)|exp) * pow($x; $k/2 -1) / (pow(2;$k/2) * (($k/2)|gamma))
end;
</syntaxhighlight>
'''Formatting'''
<syntaxhighlight lang=shjq>
def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .;
 
Line 158 ⟶ 110:
// .
end;
</syntaxhighlight>
'''Chi-squared pdf and cdf'''
<syntaxhighlight lang=shjq>
def Chi2_pdf($x; $k):
if $x =<= 0 then 0
else # ((-$x/2)|exp) * pow($x; $k/2 -1) / (pow(2;$k/2) * (($k/2)|gamma))
| . + ((( (-$x|f/2) + 4($k/2 -1) * (($x|log) +- ( ($hk/2))*(2|flog) + (($x + $hk/2)|flgamma))) /| 6)) )exp
end ;
 
# $k is the degrees of freedom
# Use lgamma to avoid $x^m (for large $x and large m) and
# to avoid calling gamma for large $k
def Chi2_cdf($x; $k):
if $ax == 10 then 1 - ((-$x)|exp)0
elif $x > $a(1e3 * 1e4$k) then 1
else 1e-15 as $tol # for example
| { s: 0, m: 0, term: $tol}
| until (.term|length < $tol;
# .term = (pow($x/2; .m) / (($k/2 + .m + 1)|gamma))
.term = (((.m * (($x/2)|log)) - (($k/2 + .m + 1)|lgamma)) | exp)
| .ns *+= 2.term
| .m *+= $h;1)
| .s * ( (-$x/2) + ($k/2)*(($x/2)|log)|exp)
end ;
 
</syntaxhighlight>
'''The Tasks'''
<syntaxhighlight lang=shjq>
def tables:
def r: round(4) | lpad(10);
Line 174 ⟶ 151:
| "\($x|lpad(2)) \($cdf|r)\(1-$cdf|r)"
);
 
def airport: [[77, 23], [88, 12], [79, 21], [81, 19]];
def expected: [81.25, 18.75];
Line 195 ⟶ 172:
{{output}}
<pre>
Values of the χ2 probability distribution function
x/k 1 2 3 4 5
0 0 0 0 0 0
Line 211 ⟶ 188:
Values for χ2 with 3 degrees of freedom
χ2 cum pdf p-value
1 0.19881987 0.80128013
2 0.4276 0.5724
4 0.73867385 0.26142615
8 0.9540 0.0460
16 0.9989 0.0011
32 1.0000 0.0000
 
For airport data table:
diff sum : 4.512820512820513
d.o.f. : 3
χ2 value : 0.0887539259844350208875392598443499
p-value : 0.7889
</pre>
 
 
=={{header|Julia}}==
2,496

edits