Statistics/Chi-squared distribution: Difference between revisions
Content added Content deleted
SqrtNegInf (talk | contribs) (Added Perl) |
|||
Line 87: | Line 87: | ||
<br /><br /><br /> |
<br /><br /><br /> |
||
=={{header|jq}}== |
|||
{{works with|jq}} |
|||
'''Also works with gojq, the Go implementation of jq.''' |
|||
<syntaxhighlight lang=sh> |
|||
# f must have arity 0 |
|||
def integrate($a; $b; $n; f): |
|||
(($b - $a) / $n) as $h |
|||
| reduce range(0;$n) as $i (0; |
|||
($a + $i * $h) as $x |
|||
| . + ((( ($x|f) + 4 * (($x + ($h/2))|f) + (($x + $h)|f)) / 6)) ) |
|||
| . * $h; |
|||
# '''Lower Incomplete Gamma Function''' |
|||
# Integral(from:0; to:$x) t^($a-1)*(-t | exp) dt |
|||
def gammaIncomplete($a; $x; $epsilon): |
|||
def small($a;$b): (($a-$b)/($a+$b)) | length < $epsilon or |
|||
(($a-$b)|length) < 1e-10; # length here is abs |
|||
def f0: (if . == 0 then 1 else pow(.; $a-1) end) * ((- .) |exp) ; |
|||
# If $upper is large then divide to conquer: |
|||
def integrate($upper; $n): |
|||
if $upper < $a * 2 then integrate(0; $upper; $n; f0) |
|||
else integrate( 0; 2*$a; $n; f0) + |
|||
integrate(2*$a; $upper; $n; f0) |
|||
end ; |
|||
if $a == 1 then 1 - ((-$x)|exp) |
|||
elif $x > $a * 1e4 then 1 |
|||
else {current: 0, n: 25, iteration: 0} |
|||
| .prev = 1 |
|||
| until( small(.current; .prev); |
|||
.n *= 2 |
|||
| .iteration += 1 |
|||
| .prev = .current |
|||
| .current = integrate($x; .n) ) |
|||
| .current |
|||
end ; |
|||
def gammaIncomplete($a; $x): |
|||
# 1e-3 is adequate for many purposes; 1e-4 takes significantly longer for small $x |
|||
gammaIncomplete($a; $x; 1e-5); |
|||
def Chi2_cdf($x; $k): |
|||
if $x == 0 then 0 |
|||
else [1, gammaIncomplete($k/2;$x/2) / (($k/2)|gamma)]|min |
|||
end ; |
|||
def Chi2_pdf($x; $k): |
|||
if $x <= 0 then 0 |
|||
else ((-$x/2)|exp) * pow($x; $k/2 -1) / (pow(2;$k/2) * (($k/2)|gamma)) |
|||
end; |
|||
</syntaxhighlight> |
|||
'''Formatting''' |
|||
<syntaxhighlight lang=sh> |
|||
def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .; |
|||
# For formatting numbers |
|||
# ... but leave non-numbers, 0, and numbers using E alone |
|||
def round($dec): |
|||
def rpad($len): tostring | ($len - length) as $l | . + ("0" * $l); |
|||
if type == "string" then . |
|||
elif . == 0 then "0" |
|||
else pow(10;$dec) as $m |
|||
| . * $m | round / $m |
|||
| tostring |
|||
| (capture("(?<n>^[^.]*[.])(?<f>[0-9]*$)") |
|||
| .n + (.f|rpad($dec))) |
|||
// if test("^[0-9]+$") then . + "." + ("" | rpad($dec)) else null end |
|||
// . |
|||
end; |
|||
</syntaxhighlight> |
|||
'''The Tasks''' |
|||
<syntaxhighlight lang=sh> |
|||
def tables: |
|||
def r: round(4) | lpad(10); |
|||
" Values of the χ2 probability distribution function", |
|||
" x/k 1 2 3 4 5", |
|||
(range(0;11) as $x |
|||
| "\($x|lpad(2)) \(reduce range(1; 6) as $k (""; . + (Chi2_pdf($x; $k) | r) ))" ), |
|||
"\n Values for χ2 with 3 degrees of freedom", |
|||
"χ2 cum pdf p-value", |
|||
( (1, 2, 4, 8, 16, 32) as $x |
|||
| Chi2_cdf($x; 3) as $cdf |
|||
| "\($x|lpad(2)) \($cdf|r)\(1-$cdf|r)" |
|||
); |
|||
def airport: [[77, 23], [88, 12], [79, 21], [81, 19]]; |
|||
def expected: [81.25, 18.75]; |
|||
def airport($airport; $expected): |
|||
def dof: ($airport|length - 1) / ($airport[0]|length - 1); |
|||
reduce range(0; $airport|length) as $i (0; |
|||
reduce range(0; $airport[0]|length) as $j (.; |
|||
. + (($airport[$i][$j] - $expected[$j]) | .*.) / $expected[$j] ) ) |
|||
| ("\nFor airport data table:", |
|||
" diff sum : \(.)", |
|||
" d.o.f. : \(dof)", |
|||
" χ2 value : \(Chi2_pdf(.; dof))", |
|||
" p-value : \(Chi2_cdf(.; dof)|round(4))" ) ; |
|||
tables, |
|||
airport(airport; expected) |
|||
</syntaxhighlight> |
|||
{{output}} |
|||
<pre> |
|||
Values of the χ2 probability distribution function |
|||
x/k 1 2 3 4 5 |
|||
0 0 0 0 0 0 |
|||
1 0.2420 0.3033 0.2420 0.1516 0.0807 |
|||
2 0.1038 0.1839 0.2076 0.1839 0.1384 |
|||
3 0.0514 0.1116 0.1542 0.1673 0.1542 |
|||
4 0.0270 0.0677 0.1080 0.1353 0.1440 |
|||
5 0.0146 0.0410 0.0732 0.1026 0.1220 |
|||
6 0.0081 0.0249 0.0487 0.0747 0.0973 |
|||
7 0.0046 0.0151 0.0319 0.0528 0.0744 |
|||
8 0.0026 0.0092 0.0207 0.0366 0.0551 |
|||
9 0.0015 0.0056 0.0133 0.0250 0.0399 |
|||
10 0.0009 0.0034 0.0085 0.0168 0.0283 |
|||
Values for χ2 with 3 degrees of freedom |
|||
χ2 cum pdf p-value |
|||
1 0.1988 0.8012 |
|||
2 0.4276 0.5724 |
|||
4 0.7386 0.2614 |
|||
8 0.9540 0.0460 |
|||
16 0.9989 0.0011 |
|||
32 1.0000 0 |
|||
For airport data table: |
|||
diff sum : 4.512820512820513 |
|||
d.o.f. : 3 |
|||
χ2 value : 0.08875392598443502 |
|||
p-value : 0.7889 |
|||
</pre> |
|||
=={{header|Julia}}== |
=={{header|Julia}}== |