Verify distribution uniformity/Chi-squared test: Difference between revisions
Content added Content deleted
(→{{header|jq}}: use recursion formula for gamma) |
(New post) |
||
Line 290: | Line 290: | ||
return 0; |
return 0; |
||
}</syntaxhighlight> |
}</syntaxhighlight> |
||
=={{header|C++}}== |
|||
<syntaxhighlight lang="c++"> |
|||
#include <iostream> |
|||
#include <vector> |
|||
#include <cmath> |
|||
#include <iomanip> |
|||
void print_vector(const std::vector<int32_t>& list) { |
|||
std::cout << "["; |
|||
for ( uint64_t i = 0; i < list.size(); ++i ) { |
|||
std::cout << list[i] << ", "; |
|||
} |
|||
std::cout << list.back() << "]" << std::endl; |
|||
} |
|||
bool is_significant(const double p_value, const double significance_level) { |
|||
return p_value > significance_level; |
|||
} |
|||
// The normalised lower incomplete gamma function. |
|||
double gamma_cdf(const double aX, const double aK) { |
|||
double result = 0.0; |
|||
for ( uint32_t m = 0; m <= 99; ++m ) { |
|||
result += pow(aX, m) / tgamma(aK + m + 1); |
|||
} |
|||
result *= pow(aX, aK) * exp(-aX); |
|||
return std::isnan(result) ? 1.0 : result; |
|||
} |
|||
// The cumulative probability function of the Chi-squared distribution. |
|||
double cdf(const double aX, const double aK) { |
|||
if ( aX > 1'000 && aK < 100 ) { |
|||
return 1.0; |
|||
} |
|||
return ( aX > 0.0 && aK > 0.0 ) ? gamma_cdf(aX / 2, aK / 2) : 0.0; |
|||
} |
|||
void chi_squared_test(const std::vector<int32_t>& observed) { |
|||
double sum = 0.0; |
|||
for ( uint64_t i = 0; i < observed.size(); ++i ) { |
|||
sum += observed[i]; |
|||
} |
|||
const double expected = sum / observed.size(); |
|||
const int32_t degree_freedom = observed.size() - 1; |
|||
double test_statistic = 0.0; |
|||
for ( uint64_t i = 0; i < observed.size(); ++i ) { |
|||
test_statistic += pow(observed[i] - expected, 2) / expected; |
|||
} |
|||
const double p_value = 1.0 - cdf(test_statistic, degree_freedom); |
|||
std::cout << "\nUniform distribution test" << std::setprecision(6) << std::endl; |
|||
std::cout << " observed values : "; print_vector(observed); |
|||
std::cout << " expected value : " << expected << std::endl; |
|||
std::cout << " degrees of freedom: " << degree_freedom << std::endl; |
|||
std::cout << " test statistic : " << test_statistic << std::endl; |
|||
std::cout.setf(std::ios::fixed); |
|||
std::cout << " p-value : " << p_value << std::endl; |
|||
std::cout.unsetf(std::ios::fixed); |
|||
std::cout << " is 5% significant?: " << std::boolalpha << is_significant(p_value, 0.05) << std::endl; |
|||
} |
|||
int main() { |
|||
const std::vector<std::vector<int32_t>> datasets = { { 199809, 200665, 199607, 200270, 199649 }, |
|||
{ 522573, 244456, 139979, 71531, 21461 } }; |
|||
for ( std::vector<int32_t> dataset : datasets ) { |
|||
chi_squared_test(dataset); |
|||
} |
|||
} |
|||
</syntaxhighlight> |
|||
{{ out }} |
|||
<pre> |
|||
Uniform distribution test |
|||
observed values : [199809, 200665, 199607, 200270, 199649, 199649] |
|||
expected value : 200000 |
|||
degrees of freedom: 4 |
|||
test statistic : 4.14628 |
|||
p-value : 0.386571 |
|||
is 5% significant?: true |
|||
Uniform distribution test |
|||
observed values : [522573, 244456, 139979, 71531, 21461, 21461] |
|||
expected value : 200000 |
|||
degrees of freedom: 4 |
|||
test statistic : 790063 |
|||
p-value : 0.000000 |
|||
is 5% significant?: false |
|||
</pre> |
|||
=={{header|D}}== |
=={{header|D}}== |