Verify distribution uniformity/Chi-squared test: Difference between revisions

Content added Content deleted
(→‎{{header|jq}}: use recursion formula for gamma)
(New post)
Line 290: Line 290:
return 0;
return 0;
}</syntaxhighlight>
}</syntaxhighlight>

=={{header|C++}}==
<syntaxhighlight lang="c++">

#include <iostream>
#include <vector>
#include <cmath>
#include <iomanip>

void print_vector(const std::vector<int32_t>& list) {
std::cout << "[";
for ( uint64_t i = 0; i < list.size(); ++i ) {
std::cout << list[i] << ", ";
}
std::cout << list.back() << "]" << std::endl;
}

bool is_significant(const double p_value, const double significance_level) {
return p_value > significance_level;
}

// The normalised lower incomplete gamma function.
double gamma_cdf(const double aX, const double aK) {
double result = 0.0;
for ( uint32_t m = 0; m <= 99; ++m ) {
result += pow(aX, m) / tgamma(aK + m + 1);
}
result *= pow(aX, aK) * exp(-aX);
return std::isnan(result) ? 1.0 : result;
}

// The cumulative probability function of the Chi-squared distribution.
double cdf(const double aX, const double aK) {
if ( aX > 1'000 && aK < 100 ) {
return 1.0;
}
return ( aX > 0.0 && aK > 0.0 ) ? gamma_cdf(aX / 2, aK / 2) : 0.0;
}

void chi_squared_test(const std::vector<int32_t>& observed) {
double sum = 0.0;
for ( uint64_t i = 0; i < observed.size(); ++i ) {
sum += observed[i];
}
const double expected = sum / observed.size();

const int32_t degree_freedom = observed.size() - 1;

double test_statistic = 0.0;
for ( uint64_t i = 0; i < observed.size(); ++i ) {
test_statistic += pow(observed[i] - expected, 2) / expected;
}

const double p_value = 1.0 - cdf(test_statistic, degree_freedom);

std::cout << "\nUniform distribution test" << std::setprecision(6) << std::endl;
std::cout << " observed values : "; print_vector(observed);
std::cout << " expected value : " << expected << std::endl;
std::cout << " degrees of freedom: " << degree_freedom << std::endl;
std::cout << " test statistic : " << test_statistic << std::endl;
std::cout.setf(std::ios::fixed);
std::cout << " p-value : " << p_value << std::endl;
std::cout.unsetf(std::ios::fixed);
std::cout << " is 5% significant?: " << std::boolalpha << is_significant(p_value, 0.05) << std::endl;
}

int main() {
const std::vector<std::vector<int32_t>> datasets = { { 199809, 200665, 199607, 200270, 199649 },
{ 522573, 244456, 139979, 71531, 21461 } };
for ( std::vector<int32_t> dataset : datasets ) {
chi_squared_test(dataset);
}
}
</syntaxhighlight>
{{ out }}
<pre>

Uniform distribution test
observed values : [199809, 200665, 199607, 200270, 199649, 199649]
expected value : 200000
degrees of freedom: 4
test statistic : 4.14628
p-value : 0.386571
is 5% significant?: true

Uniform distribution test
observed values : [522573, 244456, 139979, 71531, 21461, 21461]
expected value : 200000
degrees of freedom: 4
test statistic : 790063
p-value : 0.000000
is 5% significant?: false
</pre>


=={{header|D}}==
=={{header|D}}==