Statistics/Chi-squared distribution: Difference between revisions

m
→‎{{header|Wren}}: Changed to Wren S/H
m (→‎{{header|Wren}}: Changed to Wren S/H)
 
(10 intermediate revisions by 5 users not shown)
Line 88:
<br /><br /><br />
 
=={{header|jqC++}}==
<syntaxhighlight lang="c++">
{{works with|jq}}
'''Also works with gojq, the Go implementation of jq.'''
<syntaxhighlight lang=sh>
# f must have arity 0
def integrate($a; $b; $n; f):
(($b - $a) / $n) as $h
| reduce range(0;$n) as $i (0;
($a + $i * $h) as $x
| . + ((( ($x|f) + 4 * (($x + ($h/2))|f) + (($x + $h)|f)) / 6)) )
| . * $h;
 
#include <iostream>
# '''Lower Incomplete Gamma Function'''
#include <cmath>
# Integral(from:0; to:$x) t^($a-1)*(-t | exp) dt
#include <numbers>
def gammaIncomplete($a; $x; $epsilon):
#include <iomanip>
def small($a;$b): (($a-$b)/($a+$b)) | length < $epsilon or
#include <array>
(($a-$b)|length) < 1e-10; # length here is abs
def f0: (if . == 0 then 1 else pow(.; $a-1) end) * ((- .) |exp) ;
 
// The normalised lower incomplete gamma function.
# If $upper is large then divide to conquer:
double gamma_cdf(const double aX, const double aK) {
def integrate($upper; $n):
double result = 0.0;
if $upper < $a * 2 then integrate(0; $upper; $n; f0)
for ( uint32_t m else integrate( = 0; m <= 2*$a99; $n;++m f0) +{
result += pow(aX, m) / tgamma(aK + m + 1);
integrate(2*$a; $upper; $n; f0)
}
end ;
result *= pow(aX, aK) * exp(-aX);
return result;
}
 
// The cumulative probability function of the Chi-squared distribution.
if $a == 1 then 1 - ((-$x)|exp)
double cdf(const double aX, const double aK) {
elif $x > $a * 1e4 then 1
if ( aK > 1'000 && aK < 100 ) {
else {current: 0, n: 25, iteration: 0}
return 1.0;
| .prev = 1
}
| until( small(.current; .prev);
return ( aX > 0.0 && aK > 0.0 ) ? gamma_cdf(aX / 2, aK / 2) : 0.0;
.n *= 2
}
| .iteration += 1
| .prev = .current
| .current = integrate($x; .n) )
| .current
end ;
 
// The probability density function of the Chi-squared distribution.
def gammaIncomplete($a; $x):
double pdf(const double aX, const double aK) {
# 1e-3 is adequate for many purposes; 1e-4 takes significantly longer for small $x
return ( aX > 0.0 ) ? pow(aX, aK / 2 - 1) * exp(-aX / 2) / ( pow(2, aK / 2) * tgamma(aK / 2) ) : 0.0;
gammaIncomplete($a; $x; 1e-5);
}
def Chi2_cdf($x; $k):
if $x == 0 then 0
else [1, gammaIncomplete($k/2;$x/2) / (($k/2)|gamma)]|min
end ;
 
int main() {
def Chi2_pdf($x; $k):
std::cout << " Values of the Chi-squared probability distribution function" << std::endl;
if $x <= 0 then 0
std::cout << " x/k 1 2 3 4 5" << std::endl;
else ((-$x/2)|exp) * pow($x; $k/2 -1) / (pow(2;$k/2) * (($k/2)|gamma))
for ( uint32_t x = 0; x <= 10; x++ ) {
end;
std::cout << std::setw(2) << x;
for ( uint32_t k = 1; k <= 5; ++k ) {
std::cout << std::setw(10) << std::fixed << pdf(x, k);
}
std::cout << std::endl;
}
 
std::cout << "\n Values for the Chi-squared distribution with 3 degrees of freedom" << std::endl;
std::cout << "Chi-squared cumulative pdf p-value" << std::endl;
for ( uint32_t x : { 1, 2, 4, 8, 16, 32 } ) {
const double cumulative_pdf = cdf(x, 3);
std::cout << std::setw(6) << x << std::setw(19) << std::fixed << cumulative_pdf
<< std::setw(14) << ( 1.0 - cumulative_pdf ) << std::endl;
}
 
const std::array<const std::array<int32_t, 2>, 4> observed =
{ { { 77, 23 }, { 88, 12 }, { 79, 21 }, { 81, 19 } } };
const std::array<const std::array<double, 2>, 4> expected =
{ { { 81.25, 18.75 }, { 81.25, 18.75 }, { 81.25, 18.75 }, { 81.25, 18.75 } } };
double testStatistic = 0.0;
for ( uint64_t i = 0; i < observed.size(); ++i ) {
for ( uint64_t j = 0; j < observed[0].size(); ++j ) {
testStatistic += pow(observed[i][j] - expected[i][j], 2.0) / expected[i][j];
}
}
const uint64_t degreesFreedom = ( observed.size() - 1 ) / ( observed[0].size() - 1 );
 
std::cout << "\nFor the airport data:" << std::endl;
std::cout << " test statistic : " << std::fixed << testStatistic << std::endl;
std::cout << " degrees of freedom : " << degreesFreedom << std::endl;
std::cout << " Chi-squared : " << std::fixed << pdf(testStatistic, degreesFreedom) << std::endl;
std::cout << " p-value : " << std::fixed << cdf(testStatistic, degreesFreedom) << std::endl;
}
</syntaxhighlight>
{{ out }}
<pre>
Values of the Chi-squared probability distribution function
x/k 1 2 3 4 5
0 0.000000 0.000000 0.000000 0.000000 0.000000
1 0.241971 0.303265 0.241971 0.151633 0.080657
2 0.103777 0.183940 0.207554 0.183940 0.138369
3 0.051393 0.111565 0.154180 0.167348 0.154180
4 0.026995 0.067668 0.107982 0.135335 0.143976
5 0.014645 0.041042 0.073225 0.102606 0.122042
6 0.008109 0.024894 0.048652 0.074681 0.097304
7 0.004553 0.015099 0.031873 0.052845 0.074371
8 0.002583 0.009158 0.020667 0.036631 0.055112
9 0.001477 0.005554 0.013296 0.024995 0.039887
10 0.000850 0.003369 0.008500 0.016845 0.028335
 
Values for the Chi-squared distribution with 3 degrees of freedom
Chi-squared cumulative pdf p-value
1 0.198748 0.801252
2 0.427593 0.572407
4 0.738536 0.261464
8 0.953988 0.046012
16 0.998866 0.001134
32 0.999999 0.000001
 
For the airport data:
test statistic : 4.512821
degrees of freedom : 3
Chi-squared : 0.088754
p-value : 0.788850
</pre>
 
=={{header|FreeBASIC}}==
{{trans|Wren}}
<syntaxhighlight lang="vb">#define pi 4 * Atn(1)
 
Function gamma (x As Double) As Double
Dim As Integer k
Dim As Double p(12)
Dim As Double accm = p(1)
If accm = 0 Then
accm = Sqr(2 * pi)
p(1) = accm
Dim As Double k1factrl = 1
For k = 2 To 12
p(k) = Exp(13 - k) * (13 - k)^(k - 1.5) / k1factrl
k1factrl *= -(k - 1)
Next k
End If
For k = 2 To 12
accm += p(k) / (x + k - 1)
Next
accm *= Exp(-(x + 12)) * (x + 12)^(x + .5)
Return accm / x
End Function
 
Function pdf(x As Double, k As Double) As Double
'probability density function
Return Iif(x <= 0, 0, x^(k / 2 - 1) * Exp(-x / 2) / (2^(k / 2) * gamma(k / 2)))
End Function
 
Function cpdf(x As Double, k As Double) As Double
'cumulative probability distribution function
Dim As Double t = Exp(-x / 2) * (x / 2)^(k / 2)
Dim As Double s, term
Dim As Uinteger m = 0
Do
term = (x / 2)^m / gamma(k / 2 + m + 1)
s += term
m += 1
Loop Until Abs(term) < 1e-15
Return t * s
End Function
 
Dim As Integer x, k, i, j
Print " Values of the Chi-squared probability distribution function"
Print " x k = 1 k = 2 k = 3 k = 4 k = 5"
 
For x = 0 To 10
Print Using "## "; x;
For k = 1 To 5
Print Using "#.############ "; pdf(x, k);
Next
Print
Next
 
Print !"\n Values for Chi-squared with 3 degrees of freedom"
Print "Chi-squared cum pdf P value"
 
Dim As Uinteger tt(5) = {1, 2, 4, 8, 16, 32}
For x = 0 To Ubound(tt)
Dim As Double cpdff = cpdf(tt(x), 3)
Print Using " ## #.############ #.############"; tt(x); cpdff; 1-cpdff
Next
 
Dim As Uinteger airport(3,1) = {{77, 23}, {88, 12}, {79, 21}, {81, 19}}
Dim As Double expected(1) = {81.25, 18.75}
Dim As Double dsum = 0
For i = 0 To Ubound(airport,1)
For j = 0 To Ubound(airport,2)
dsum += (airport(i, j) - expected(j))^2 / expected(j)
Next
Next
Dim As Double dof = Ubound(airport,1) / Ubound(airport,2)
Print Using !"\nFor the airport data, diff total is #.############"; dsum
Print Spc(14); "degrees of freedom is"; dof
Print Spc(21); Using "Chi-squared is #.############"; pdf(dsum, dof)
Print Spc(25); Using "P value is #.############"; cpdf(dsum, dof)
 
Sleep</syntaxhighlight>
{{out}}
<pre> Values of the Chi-squared probability distribution function
x k = 1 k = 2 k = 3 k = 4 k = 5
0 0.000000000000 0.000000000000 0.000000000000 0.000000000000 0.000000000000
1 0.241970724519 0.303265329856 0.241970724519 0.151632664928 0.080656908173
2 0.103776874355 0.183939720586 0.207553748710 0.183939720586 0.138369165807
3 0.051393443268 0.111565080074 0.154180329804 0.167347620111 0.154180329804
4 0.026995483257 0.067667641618 0.107981933026 0.135335283237 0.143975910702
5 0.014644982562 0.041042499312 0.073224912810 0.102606248280 0.122041521349
6 0.008108695555 0.024893534184 0.048652173330 0.074680602552 0.097304346659
7 0.004553342922 0.015098691711 0.031873400451 0.052845420989 0.074371267720
8 0.002583373169 0.009157819444 0.020666985354 0.036631277777 0.055111960944
9 0.001477282804 0.005554498269 0.013295545236 0.024995242211 0.039886635707
10 0.000850036660 0.003368973500 0.008500366603 0.016844867498 0.028334555342
 
Values for Chi-squared with 3 degrees of freedom
Chi-squared cum pdf P value
1 0.198748043099 0.801251956901
2 0.427593295529 0.572406704471
4 0.738535870051 0.261464129949
8 0.953988294311 0.046011705689
16 0.998866015710 0.001133984290
32 0.999999476654 0.000000523346
 
For the airport data, diff total is 4.512820512821
degrees of freedom is 3
Chi-squared is 0.088753925984
P value is 0.788850426319</pre>
 
=={{header|Java}}==
<syntaxhighlight lang="java">
import java.util.List;
 
public final class StatisticsChiSquaredDistribution {
 
public static void main(String[] aArgs) {
System.out.println(" Values of the Chi-squared probability distribution function");
System.out.println(" x/k 1 2 3 4 5");
for ( int x = 0; x <= 10; x++ ) {
System.out.print(String.format("%2d", x));
for ( int k = 1; k <= 5; k++ ) {
System.out.print(String.format("%10.6f", pdf(x, k)));
}
System.out.println();
}
 
System.out.println();
System.out.println(" Values for the Chi-squared distribution with 3 degrees of freedom");
System.out.println("Chi-squared cumulative pdf p-value");
for ( int x : List.of( 1, 2, 4, 8, 16, 32 ) ) {
final double cdf = cdf(x, 3);
System.out.println(String.format("%6d%19.6f%14.6f", x, cdf, ( 1.0 - cdf )));
}
final int[][] observed = { { 77, 23 }, { 88, 12 }, { 79, 21 }, { 81, 19 } };
final double[][] expected = { { 81.25, 18.75 }, { 81.25, 18.75 }, { 81.25, 18.75 }, { 81.25, 18.75 } };
double testStatistic = 0.0;
for ( int i = 0; i < observed.length; i++ ) {
for ( int j = 0; j < observed[0].length; j++ ) {
testStatistic += Math.pow(observed[i][j] - expected[i][j], 2.0) / expected[i][j];
}
}
final int degreesFreedom = ( observed.length - 1 ) / ( observed[0].length - 1 );
System.out.println();
System.out.println("For the airport data:");
System.out.println(" test statistic : " + String.format("%.6f", testStatistic));
System.out.println(" degrees of freedom : " + degreesFreedom);
System.out.println(" Chi-squared : " + String.format("%.6f", pdf(testStatistic, degreesFreedom)));
System.out.println(" p-value : " + String.format("%.6f", cdf(testStatistic, degreesFreedom)));
}
// The gamma function.
private static double gamma(double aX) {
if ( aX < 0.5 ) {
return Math.PI / ( Math.sin(Math.PI * aX) * gamma(1.0 - aX) );
}
final double[] probabilities = new double[] {
0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313, -176.61502916214059,
12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7 };
aX -= 1.0;
double t = probabilities[0];
for ( int i = 1; i < 9; i++ ) {
t += probabilities[i] / ( aX + i );
}
final double w = aX + 7.5;
return Math.sqrt(2.0 * Math.PI) * Math.pow(w, aX + 0.5) * Math.exp(-w) * t;
}
// The probability density function of the Chi-squared distribution.
private static double pdf(double aX, double aK) {
return ( aX > 0.0 ) ?
Math.pow(aX, aK / 2 - 1) * Math.exp(-aX / 2) / ( Math.pow(2, aK / 2) * gamma(aK / 2) ) : 0.0;
}
// The cumulative probability function of the Chi-squared distribution.
private static double cdf(double aX, double aK) {
if ( aX > 1_000 && aK < 100 ) {
return 1.0;
}
return ( aX > 0.0 && aK > 0.0 ) ? gammaCDF(aX / 2, aK / 2) : 0.0;
}
// The normalised lower incomplete gamma function.
private static double gammaCDF(double aX, double aK) {
double result = 0.0;
for ( int m = 0; m <= 99; m++ ) {
result += Math.pow(aX, m) / gamma(aK + m + 1);
}
result *= Math.pow(aX, aK) * Math.exp(-aX);
return result;
}
 
}
</syntaxhighlight>
{{ out }}
<pre>
Values of the Chi-squared probability distribution function
x/k 1 2 3 4 5
0 0.000000 0.000000 0.000000 0.000000 0.000000
1 0.241971 0.303265 0.241971 0.151633 0.080657
2 0.103777 0.183940 0.207554 0.183940 0.138369
3 0.051393 0.111565 0.154180 0.167348 0.154180
4 0.026995 0.067668 0.107982 0.135335 0.143976
5 0.014645 0.041042 0.073225 0.102606 0.122042
6 0.008109 0.024894 0.048652 0.074681 0.097304
7 0.004553 0.015099 0.031873 0.052845 0.074371
8 0.002583 0.009158 0.020667 0.036631 0.055112
9 0.001477 0.005554 0.013296 0.024995 0.039887
10 0.000850 0.003369 0.008500 0.016845 0.028335
 
Values for the Chi-squared distribution with 3 degrees of freedom
Chi-squared cumulative pdf p-value
1 0.198748 0.801252
2 0.427593 0.572407
4 0.738536 0.261464
8 0.953988 0.046012
16 0.998866 0.001134
32 0.999999 0.000001
 
For the airport data:
test statistic : 4.512821
degrees of freedom : 3
Chi-squared : 0.088754
p-value : 0.788850
</pre>
 
=={{header|jq}}==
{{works with|jq}}
'''Also works with gojq, the Go implementation of jq.'''
 
The implementation of Chi_cdf here uses the recusive relation for the
gamma function for efficiency and to simplify the avoidance of
numerical overflow issues.
 
'''Formatting'''
<syntaxhighlight lang=shjq>
def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .;
 
Line 158 ⟶ 445:
// .
end;
</syntaxhighlight>
'''Chi-squared pdf and cdf'''
<syntaxhighlight lang=jq>
def Chi2_pdf($x; $k):
if $x <= 0 then 0
else # ((-$x/2)|exp) * pow($x; $k/2 -1) / (pow(2;$k/2) * (($k/2)|gamma))
((-$x/2) + ($k/2 -1) * ($x|log) - ( ($k/2)*(2|log) + (($k/2)|lgamma))) | exp
end;
 
# $k is the degrees of freedom
# Use recursive relation for gamma: G(x+1) = x * G(x)
def Chi2_cdf($x; $k):
if $x == 0 then 0
elif $x > (1e3 * $k) then 1
else 1e-15 as $tol # for example
| { s: 0, m: 0, term: (1 / ((($k/2)+1)|gamma)) }
| until (.term|length < $tol;
.s += .term
| .m += 1
| .term *= (($x/2) / (($k/2) + .m )) )
| .s * ( ((-$x/2) + ($k/2)*(($x/2)|log)) | exp)
end ;
</syntaxhighlight>
'''The Tasks'''
<syntaxhighlight lang=shjq>
def tables:
def r: round(46) | lpad(10);
" Values of the χ2 probability distribution function",
" x/k 1 2 3 4 5",
Line 174 ⟶ 483:
| "\($x|lpad(2)) \($cdf|r)\(1-$cdf|r)"
);
 
def airport: [[77, 23], [88, 12], [79, 21], [81, 19]];
def expected: [81.25, 18.75];
Line 191 ⟶ 500:
 
tables,
airport(airport; expected)
</syntaxhighlight>
{{output}}
Line 198 ⟶ 507:
x/k 1 2 3 4 5
0 0 0 0 0 0
1 0.2420 241971 0.3033 303265 0.2420 241971 0.1516 151633 0.0807080657
2 0.1038 103777 0.1839 183940 0.2076 207554 0.1839 183940 0.1384138369
3 0.0514 051393 0.1116 111565 0.1542 154180 0.1673 167348 0.1542154180
4 0.0270 026995 0.0677 067668 0.1080 107982 0.1353 135335 0.1440143976
5 0.0146 014645 0.0410 041042 0.0732 073225 0.1026 102606 0.1220122042
6 0.0081 008109 0.0249 024894 0.0487 048652 0.0747 074681 0.0973097304
7 0.0046 004553 0.0151 015099 0.0319 031873 0.0528 052845 0.0744074371
8 0.0026 002583 0.0092 009158 0.0207 020667 0.0366 036631 0.0551055112
9 0.0015 001477 0.0056 005554 0.0133 013296 0.0250 024995 0.0399039887
10 0.0009 000850 0.0034 003369 0.0085 008500 0.0168 016845 0.0283028335
 
Values for χ2 with 3 degrees of freedom
χ2 cum pdf p-value
1 0.1988 198748 0.8012801252
2 0.4276 427593 0.5724572407
4 0.7386 738536 0.2614261464
8 0.9540 953988 0.0460046012
16 0.9989 998866 0.0011001134
32 10.0000999999 01e-06
 
For airport data table:
diff sum : 4.512820512820513
d.o.f. : 3
χ2 value : 0.08875392598443502088753925984435
p-value : 0.7889
</pre>
 
 
=={{header|Julia}}==
Line 328 ⟶ 636:
For the airport data, diff total is 4.512820512820512, χ2 is 0.08875392598443503, p value 0.7888504263193064
</pre>
 
=={{header|Nim}}==
{{libheader|gnuplotlib.nim}}
Adapted from Python solution.
<syntaxhighlight lang="Nim">import std/[math, strformat, sugar]
 
func χ²(x: float; k: int): float =
## χ² function, the probability distribution function (pdf) for χ².
if x > 0: x.pow(k / 2 - 1) * exp(-x/2) / (2.pow(k / 2) * gamma(k / 2)) else: 0
 
func gammaCdf(k, x: float): float =
## Lower incomplete gamma by series formula with gamma.
for m in 0..99:
result += x^m / gamma(k + m.toFloat + 1)
result *= pow(x, k) * exp(-x)
 
func cdfχ²(x: float; k: int): float =
## Cumulative probability function (cdf) for χ².
if x > 0 and k > 0: gammaCdf(k / 2, x / 2) else: 0
 
 
echo " Values of the χ2 probability distribution function"
echo " x/k 1 2 3 4 5"
for x in 0..10:
stdout.write &"{x:2} "
for k in 1..5:
stdout.write &"{χ²(x.toFloat, k):10.6f}"
echo()
 
echo()
echo " Values for χ2 with 3 degrees of freedom"
echo "χ² cum pdf p-value"
for x in [1, 2, 4, 8, 16, 32]:
let cdf = cdfχ²(x.toFloat, 3)
echo &"{x:2} {cdf:10.6f} {1 - cdf:10.6f}"
 
const
AirportData = [[float 77, 23], [float 88, 12], [float 79, 21], [float 81, 19]]
Expected = [[81.25, 18.75], [81.25, 18.75], [81.25, 18.75], [81.25, 18.75]]
 
var dtotal = 0.0
 
for pos in [0, 1]:
for i, d in AirportData:
dtotal += (d[pos] - Expected[i][pos])^2 / Expected[i][pos]
 
echo()
echo &"For the airport data, diff total is {dtotal:.6f}, " &
&"χ² is {χ²(dtotal, 3):.6f}, p value is {cdfχ²(dtotal, 3):.6f}."
 
 
### Stretch task ###
 
import gnuplot
let x = collect(for n in 0..9999: n / 1000)
withGnuplot:
cmd "set multiplot"
cmd "set yrange [-0.02:0.5]"
for k in 0..3:
let y = collect(for p in x: χ²(p, k))
plot(x, y, &"k = {k}", "with lines")
</syntaxhighlight>
 
{{out}}
<pre> Values of the χ2 probability distribution function
x/k 1 2 3 4 5
0 0.000000 0.000000 0.000000 0.000000 0.000000
1 0.241971 0.303265 0.241971 0.151633 0.080657
2 0.103777 0.183940 0.207554 0.183940 0.138369
3 0.051393 0.111565 0.154180 0.167348 0.154180
4 0.026995 0.067668 0.107982 0.135335 0.143976
5 0.014645 0.041042 0.073225 0.102606 0.122042
6 0.008109 0.024894 0.048652 0.074681 0.097304
7 0.004553 0.015099 0.031873 0.052845 0.074371
8 0.002583 0.009158 0.020667 0.036631 0.055112
9 0.001477 0.005554 0.013296 0.024995 0.039887
10 0.000850 0.003369 0.008500 0.016845 0.028335
 
Values for χ2 with 3 degrees of freedom
χ² cum pdf p-value
1 0.198748 0.801252
2 0.427593 0.572407
4 0.738536 0.261464
8 0.953988 0.046012
16 0.998866 0.001134
32 0.999999 0.000001
 
For the airport data, diff total is 4.512821, χ² is 0.088754, p value is 0.788850.
</pre>
[[File:Chi-squared-nim.png]]
 
=={{header|Perl}}==
Line 713 ⟶ 1,111:
{{libheader|DOME}}
{{libheader|Wren-math}}
{{libheader|Wren-traititerate}}
{{libheader|Wren-fmt}}
{{libheader|Wren-plot}}
<syntaxhighlight lang="ecmascriptwren">import "dome" for Window
import "graphics" for Canvas, Color
import "./math2" for Math
import "./traititerate" for Stepped
import "./fmt" for Fmt
import "./plot" for Axes
9,482

edits