Verify distribution uniformity/Chi-squared test: Difference between revisions

m
(https://rosettacode.org/wiki/Statistics/Chi-squared_distribution)
m (→‎{{header|Wren}}: Minor tidy)
 
(5 intermediate revisions by 3 users not shown)
Line 10:
;Reference:
:*   an entry at the MathWorld website:   [http://mathworld.wolfram.com/Chi-SquaredDistribution.html chi-squared distribution].
<br><br>
; Related task:
:* [[Statistics/Chi-squared_distribution]]
<br><br>
=={{header|11l}}==
{{trans|Python}}
Line 290:
return 0;
}</syntaxhighlight>
 
=={{header|C#}}==
{{trans|Go}}
<syntaxhighlight lang="C#">
using System;
 
class Program
{
public delegate double Func(double x);
public static double Simpson38(Func f, double a, double b, int n)
{
double h = (b - a) / n;
double h1 = h / 3;
double sum = f(a) + f(b);
for (int j = 3 * n - 1; j > 0; j--)
{
if (j % 3 == 0)
{
sum += 2 * f(a + h1 * j);
}
else
{
sum += 3 * f(a + h1 * j);
}
}
 
return h * sum / 8;
}
 
// Lanczos Approximation for Gamma Function
private static double SpecialFunctionGamma(double z)
{
double[] p =
{
676.5203681218851,
-1259.1392167224028,
771.32342877765313,
-176.61502916214059,
12.507343278686905,
-0.13857109526572012,
9.9843695780195716e-6,
1.5056327351493116e-7
};
if (z < 0.5)
return Math.PI / (Math.Sin(Math.PI * z) * SpecialFunctionGamma(1 - z));
z -= 1;
double x = 0.99999999999980993;
for (int i = 0; i < p.Length; i++)
{
x += p[i] / (z + i + 1);
}
 
double t = z + p.Length - 0.5;
return Math.Sqrt(2 * Math.PI) * Math.Pow(t, z + 0.5) * Math.Exp(-t) * x;
}
 
public static double GammaIncQ(double a, double x)
{
double aa1 = a - 1;
Func f = t => Math.Pow(t, aa1) * Math.Exp(-t);
double y = aa1;
double h = 1.5e-2;
while (f(y) * (x - y) > 2e-8 && y < x)
{
y += .4;
}
 
if (y > x)
{
y = x;
}
 
return 1 - Simpson38(f, 0, y, (int)(y / h / SpecialFunctionGamma(a)));
}
 
public static double Chi2Ud(int[] ds)
{
double sum = 0, expected = 0;
foreach (var d in ds)
{
expected += d;
}
 
expected /= ds.Length;
foreach (var d in ds)
{
double x = d - expected;
sum += x * x;
}
 
return sum / expected;
}
 
public static double Chi2P(int dof, double distance)
{
return GammaIncQ(.5 * dof, .5 * distance);
}
 
const double SigLevel = .05;
static void Main(string[] args)
{
int[][] datasets = new int[][]
{
new int[]
{
199809,
200665,
199607,
200270,
199649
},
new int[]
{
522573,
244456,
139979,
71531,
21461
},
};
foreach (var dset in datasets)
{
UTest(dset);
}
}
 
static void UTest(int[] dset)
{
Console.WriteLine("Uniform distribution test");
int sum = 0;
foreach (var c in dset)
{
sum += c;
}
 
Console.WriteLine($" dataset: {string.Join(", ", dset)}");
Console.WriteLine($" samples: {sum}");
Console.WriteLine($" categories: {dset.Length}");
int dof = dset.Length - 1;
Console.WriteLine($" degrees of freedom: {dof}");
double dist = Chi2Ud(dset);
Console.WriteLine($" chi square test statistic: {dist}");
double p = Chi2P(dof, dist);
Console.WriteLine($" p-value of test statistic: {p}");
bool sig = p < SigLevel;
Console.WriteLine($" significant at {SigLevel * 100}% level? {sig}");
Console.WriteLine($" uniform? {!sig}\n");
}
}
</syntaxhighlight>
{{out}}
<pre>
Uniform distribution test
dataset: 199809, 200665, 199607, 200270, 199649
samples: 1000000
categories: 5
degrees of freedom: 4
chi square test statistic: 4.14628
p-value of test statistic: 0.386570833082767
significant at 5% level? False
uniform? True
 
Uniform distribution test
dataset: 522573, 244456, 139979, 71531, 21461
samples: 1000000
categories: 5
degrees of freedom: 4
chi square test statistic: 790063.27594
p-value of test statistic: 2.35282904270662E-11
significant at 5% level? True
uniform? False
 
 
</pre>
 
=={{header|C++}}==
<syntaxhighlight lang="c++">
 
#include <iostream>
#include <vector>
#include <cmath>
#include <iomanip>
 
void print_vector(const std::vector<int32_t>& list) {
std::cout << "[";
for ( uint64_t i = 0; i < list.size() - 1; ++i ) {
std::cout << list[i] << ", ";
}
std::cout << list.back() << "]" << std::endl;
}
 
bool is_significant(const double p_value, const double significance_level) {
return p_value > significance_level;
}
 
// The normalised lower incomplete gamma function.
double gamma_cdf(const double aX, const double aK) {
double result = 0.0;
for ( uint32_t m = 0; m <= 99; ++m ) {
result += pow(aX, m) / tgamma(aK + m + 1);
}
result *= pow(aX, aK) * exp(-aX);
return std::isnan(result) ? 1.0 : result;
}
 
// The cumulative probability function of the Chi-squared distribution.
double cdf(const double aX, const double aK) {
if ( aX > 1'000 && aK < 100 ) {
return 1.0;
}
return ( aX > 0.0 && aK > 0.0 ) ? gamma_cdf(aX / 2, aK / 2) : 0.0;
}
 
void chi_squared_test(const std::vector<int32_t>& observed) {
double sum = 0.0;
for ( uint64_t i = 0; i < observed.size(); ++i ) {
sum += observed[i];
}
const double expected = sum / observed.size();
 
const int32_t degree_freedom = observed.size() - 1;
 
double test_statistic = 0.0;
for ( uint64_t i = 0; i < observed.size(); ++i ) {
test_statistic += pow(observed[i] - expected, 2) / expected;
}
 
const double p_value = 1.0 - cdf(test_statistic, degree_freedom);
 
std::cout << "\nUniform distribution test" << std::setprecision(6) << std::endl;
std::cout << " observed values : "; print_vector(observed);
std::cout << " expected value : " << expected << std::endl;
std::cout << " degrees of freedom: " << degree_freedom << std::endl;
std::cout << " test statistic : " << test_statistic << std::endl;
std::cout.setf(std::ios::fixed);
std::cout << " p-value : " << p_value << std::endl;
std::cout.unsetf(std::ios::fixed);
std::cout << " is 5% significant?: " << std::boolalpha << is_significant(p_value, 0.05) << std::endl;
}
 
int main() {
const std::vector<std::vector<int32_t>> datasets = { { 199809, 200665, 199607, 200270, 199649 },
{ 522573, 244456, 139979, 71531, 21461 } };
for ( std::vector<int32_t> dataset : datasets ) {
chi_squared_test(dataset);
}
}
</syntaxhighlight>
{{ out }}
<pre>
 
Uniform distribution test
observed values : [199809, 200665, 199607, 200270, 199649]
expected value : 200000
degrees of freedom: 4
test statistic : 4.14628
p-value : 0.386571
is 5% significant?: true
 
Uniform distribution test
observed values : [522573, 244456, 139979, 71531, 21461]
expected value : 200000
degrees of freedom: 4
test statistic : 790063
p-value : 0.000000
is 5% significant?: false
</pre>
 
=={{header|D}}==
Line 742 ⟶ 1,009:
enough for the χ2 distribution to be used.
 
The implementation of `Chi2_cdf` belowhere isuses intendedthe torecursion berelation fairlyfor
simplethe gamma function and robustshould ratherbe thanboth highlyfast, accurate. and Forquite anrobust.
For an industrial-strength algorithm, see
e.g. https://people.sc.fsu.edu/~jburkardt/c_src/asa239/asa239.c
 
Line 760 ⟶ 1,027:
# Cumulative density function of the chi-squared distribution with $k
# degrees of freedom
# The recursion formula for gamma is used for efficiency and robustness.
# Use lgamma to avoid $x^m (for large $x and large m) and
# to avoid calling gamma for large $k
def Chi2_cdf($x; $k):
if $x == 0 then 0
elif $x > (1e3 * $k) then 1
else 1e-15 as $tol # for example
| { s: 0, m: 0, term: (1 / ((($tolk/2)+1)|gamma)) }
| until (.term|length < $tol; # length here is abs
.s += .term
# .term = (pow($x/2; .m) / (($k/2 + .m + 1)|gamma))
| .m += 1
.term = (((.m * (($x/2)|log)) - (($k/2 + .m + 1)|lgamma)) | exp)
| .sterm +*= (($x/2) / (($k/2) + .termm )) )
| .s * ( ((-$x/2) + ($k/2)*(($x/2)|log)) | exp)
| .m += 1)
| .s * ( (-$x/2) + ($k/2)*(($x/2)|log)|exp)
end ;
</syntaxhighlight>
Line 824 ⟶ 1,089:
task
</syntaxhighlight>
{{output}}
<pre>
Dataset: [199809,200665,199607,200270,199649]
Line 2,036 ⟶ 2,300:
{{libheader|Wren-math}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="ecmascriptwren">import "./math" for Math, Nums
import "./fmt" for Fmt
 
var integrate = Fn.new { |a, b, n, f|
9,476

edits