Welch's t-test: Difference between revisions

→‎{{header|C}}: added Burkhart betain function, which converges more quickly and with greater accuracy than Simpson's method.
(→‎{{header|SAS}}: SAS output)
(→‎{{header|C}}: added Burkhart betain function, which converges more quickly and with greater accuracy than Simpson's method.)
Line 79:
 
This shows how pvalue can be calculated from any two arrays, using Welch's 2-sided t-test, which doesn't assume equal variance.
This is the equivalent of R's <lang R> t.test(x, y, paired=FALSE, var.equal=FALSE)</lang> and as such, it is compared against R's pvalues with the same vectors/arrays to show that the differences are very small (here 10^-14).
<lang C>#include <stdio.h>
#include <math.h>
#include <stdlib.h>
 
double betain ( const double x, const double p, const double q, const double beta, int *restrict ifault )
Smaller p-values converge more quickly than larger p-values.
 
/******************************************************************************/
<code>const unsigned short int N = 65535</code>
/*
Purpose:
 
BETAIN computes the incomplete Beta function ratio.
ensures integral convergence of about <math>10^{-15}</math> for p-values < 0.15, about <math>10^{-7}</math> for p-values approximately 0.5, but only <math>10^{-3}</math> for p-values approaching 1.
<lang C>#include <stdio.h>
#include <math.h>
 
Licensing:
double calculate_Pvalue (const double *array1, const size_t array1_size, const double *array2, const size_t array2_size) {
 
if (array1_size <= 1) {
This code is distributed under the GNU LGPL license.
 
Modified:
 
05 November 2010
 
Author:
 
Original FORTRAN77 version by KL Majumder, GP Bhattacharjee.
C version by John Burkardt.
 
Reference:
 
KL Majumder, GP Bhattacharjee,
Algorithm AS 63:
The incomplete Beta Integral,
Applied Statistics,
Volume 22, Number 3, 1973, pages 409-411.
 
Parameters:
https://www.jstor.org/stable/2346797?seq=1#page_scan_tab_contents
Input, double X, the argument, between 0 and 1.
 
Input, double P, Q, the parameters, which
must be positive.
 
Input, double BETA, the logarithm of the complete
beta function.
 
Output, int *IFAULT, error flag.
0, no error.
nonzero, an error occurred.
 
Output, double BETAIN, the value of the incomplete
Beta function ratio.
*/
{
double acu = 0.1E-14;
double ai;
// double betain;
double cx;
int indx;
int ns;
double pp;
double psq;
double qq;
double rx;
double temp;
double term;
double value;
double xx;
 
value = x;
ifault = 0;
//Check the input arguments.
if ( (p <= 0.0) || (q <= 0.0 )){
// *ifault = 1;
return value;
}
if ( x < 0.0 || 1.0 < x )
{
*ifault = 2;
return value;
}
/*
Special cases.
*/
if ( x == 0.0 || x == 1.0 ) {
return value;
}
psq = p + q;
cx = 1.0 - x;
 
if ( p < psq * x )
{
xx = cx;
cx = x;
pp = q;
qq = p;
indx = 1;
}
else
{
xx = x;
pp = p;
qq = q;
indx = 0;
}
 
term = 1.0;
ai = 1.0;
value = 1.0;
ns = ( int ) ( qq + cx * psq );
/*
Use the Soper reduction formula.
*/
rx = xx / cx;
temp = qq - ai;
if ( ns == 0 )
{
rx = xx;
}
 
for ( ; ; )
{
term = term * temp * rx / ( pp + ai );
value = value + term;;
temp = fabs ( term );
 
if ( temp <= acu && temp <= acu * value )
{
value = value * exp ( pp * log ( xx )
+ ( qq - 1.0 ) * log ( cx ) - beta ) / pp;
 
if ( indx )
{
value = 1.0 - value;
}
break;
}
 
ai = ai + 1.0;
ns = ns - 1;
 
if ( 0 <= ns )
{
temp = qq - ai;
if ( ns == 0 )
{
rx = xx;
}
}
else
{
temp = psq;
psq = psq + 1.0;
}
}
 
return value;
}
 
double Pvalue (const double *restrict ARRAY1, const size_t ARRAY1_SIZE, const double *restrict ARRAY2, const size_t ARRAY2_SIZE) {//calculate a p-value based on an array
if (ARRAY1_SIZE <= 1) {
return 1.0;
} else if (ARRAY2_SIZE <= 1) {
}
if (array2_size <= 1) {
return 1.0;
}
double mean1fmean1 = 0.0, mean2fmean2 = 0.0;
for (size_t x = 0; x < array1_sizeARRAY1_SIZE; x++) {//get sum of values in ARRAY1
if (isfinite(ARRAY1[x]) == 0) {//check to make sure this is a real numbere
mean1 += array1[x];
puts("Got a non-finite number in 1st array, can't calculate P-value.");
exit(EXIT_FAILURE);
}
fmean1 += ARRAY1[x];
}
fmean1 /= ARRAY1_SIZE;
for (size_t x = 0; x < array2_size; x++) {
for (size_t x = 0; x < ARRAY2_SIZE; x++) {//get sum of values in ARRAY2
mean2 += array2[x];
if (isfinite(ARRAY2[x]) == 0) {//check to make sure this is a real number
puts("Got a non-finite number in 2nd array, can't calculate P-value.");
exit(EXIT_FAILURE);
}
fmean2 += ARRAY2[x];
}
fmean2 /= ARRAY2_SIZE;
if (mean1 == mean2) {
// printf("mean1 = %lf mean2 = %lf\n", fmean1, fmean2);
return 1.0;
if (fmean1 == fmean2) {
return 1.0;//if the means are equal, the p-value is 1, leave the function
}
double unbiased_sample_variance1 = 0.0, unbiased_sample_variance2 = 0.0;
mean1 /= array1_size;
for (size_t x = 0; x < ARRAY1_SIZE; x++) {//1st part of added unbiased_sample_variance
mean2 /= array2_size;
unbiased_sample_variance1 += (ARRAY1[x]-fmean1)*(ARRAY1[x]-fmean1);
double variance1 = 0.0, variance2 = 0.0;
for (size_t x = 0; x < array1_size; x++) {
variance1 += (array1[x]-mean1)*(array1[x]-mean1);
}
for (size_t x = 0; x < array2_sizeARRAY2_SIZE; x++) {
variance2unbiased_sample_variance2 += (array2ARRAY2[x]-mean2fmean2)*(array2ARRAY2[x]-mean2fmean2);
}
// printf("unbiased_sample_variance1 = %lf\tunbiased_sample_variance2 = %lf\n",unbiased_sample_variance1,unbiased_sample_variance2);//DEBUGGING
if ((variance1 == 0.0) && (variance2 == 0.0)) {
unbiased_sample_variance1 = unbiased_sample_variance1/(ARRAY1_SIZE-1);
return 1.0;
unbiased_sample_variance2 = unbiased_sample_variance2/(ARRAY2_SIZE-1);
}
const double WELCH_T_STATISTIC = (fmean1-fmean2)/sqrt(unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE);
variance1 = variance1/(array1_size-1);
const double DEGREES_OF_FREEDOM = pow((unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE),2.0)//numerator
variance2 = variance2/(array2_size-1);
const double WELCH_T_STATISTIC = (mean1-mean2)/sqrt(variance1/array1_size+variance2/array2_size);
//End of Welch's T-Test
const double DEGREES_OF_FREEDOM = pow((variance1/array1_size+variance2/array2_size),2.0)//numerator
/
(
(unbiased_sample_variance1*unbiased_sample_variance1)/(ARRAY1_SIZE*ARRAY1_SIZE*(ARRAY1_SIZE-1))+
(variance1*variance1)/(array1_size*array1_size*(array1_size-1))+
(unbiased_sample_variance2*unbiased_sample_variance2)/(ARRAY2_SIZE*ARRAY2_SIZE*(ARRAY2_SIZE-1))
(variance2*variance2)/(array2_size*array2_size*(array2_size-1))
);
// printf("Welch = %lf DOF = %lf\n", WELCH_T_STATISTIC, DEGREES_OF_FREEDOM);
const double a = DEGREES_OF_FREEDOM/2, x = DEGREES_OF_FREEDOM/(WELCH_T_STATISTIC*WELCH_T_STATISTIC+DEGREES_OF_FREEDOM);
if ((isinf(x) != 0) || (isnan(x) != 0)) {
const unsigned short int N = 65535;
return 1.0;
const double h = x/N;
double sum1 = 0.0, sum2 = 0.0;
for(unsigned short int i = 0;i < N; i++) {
sum1 += (pow(h * i + h / 2.0,a-1))/(sqrt(1-(h * i + h / 2.0)));
sum2 += (pow(h * i,a-1))/(sqrt(1-h * i));
}
if ((isinf(a) != 0) || (isnan(a) != 0)) {
double return_value = ((h / 6.0) * ((pow(x,a-1))/(sqrt(1-x)) + 4.0 * sum1 + 2.0 * sum2))/(expl(lgammal(a)+0.57236494292470009-lgammal(a+0.5)));
if ((isfinite(return_value) == 0) || (return_value > 1.0)) {
return 1.0;
} else {
return return_value;
}
}
 
int *restrict z = 0;
return betain(x, a, 0.5, lgammal(a)+0.57236494292470009-lgammal(a+0.5), z);
}
//-------------------
int main(void) {
 
const double d1[] = {27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4};
const double d2[] = {27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4};
Line 154 ⟶ 305:
const double x[] = {3.0,4.0,1.0,2.1};
const double y[] = {490.2,340.0,433.9};
const double v1[] = {0.010268,0.000167,0.000167};
const double v2[] = {0.159258,0.136278,0.122389};
const double s1[] = {1.0/15,10.0/62.0};
const double s2[] = {1.0/10,2/50.0};
const double z1[] = {9/23.0,21/45.0,0/38.0};
const double z2[] = {0/44.0,42/94.0,0/22.0};
const double CORRECT_ANSWERS[] = {0.021378001462867,
0.148841696605327,
0.0359722710297968,
0.090773324285671,
0.0107515611497845,
0.00339907162713746,
0.52726574965384,
0.545266866977794};
 
//calculate the pvalues and show that they're the same as the R values
 
double pvalue = Pvalue(d1,sizeof(d1)/sizeof(*d1),d2,sizeof(d2)/sizeof(*d2));
double error = fabs(pvalue - CORRECT_ANSWERS[0]);
printf("Test sets 1 p-value = %g\n", pvalue);
pvalue = Pvalue(d3,sizeof(d3)/sizeof(*d3),d4,sizeof(d4)/sizeof(*d4));
error += fabs(pvalue - CORRECT_ANSWERS[1]);
printf("Test sets 2 p-value = %g\n",pvalue);
 
pvalue = Pvalue(d5,sizeof(d5)/sizeof(*d5),d6,sizeof(d6)/sizeof(*d6));
error += fabs(pvalue - CORRECT_ANSWERS[2]);
printf("Test sets 3 p-value = %g\n", pvalue);
 
pvalue = Pvalue(d7,sizeof(d7)/sizeof(*d7),d8,sizeof(d8)/sizeof(*d8));
printf("Test sets 4 p-value = %g\n", pvalue);
error += fabs(pvalue - CORRECT_ANSWERS[3]);
 
pvalue = Pvalue(x,sizeof(x)/sizeof(*x),y,sizeof(y)/sizeof(*y));
error += fabs(pvalue - CORRECT_ANSWERS[4]);
printf("Test sets 5 p-value = %g\n", pvalue);
 
pvalue = Pvalue(v1,sizeof(v1)/sizeof(*v1),v2,sizeof(v2)/sizeof(*v2));
error += fabs(pvalue - CORRECT_ANSWERS[5]);
printf("Test sets 6 p-value = %g\n", pvalue);
pvalue = Pvalue(s1,sizeof(s1)/sizeof(*s1),s2,sizeof(s2)/sizeof(*s2));
error += fabs(pvalue - CORRECT_ANSWERS[6]);
printf("Test sets 7 p-value = %g\n", pvalue);
pvalue = Pvalue(z1, 3, z2, 3);
error += fabs(pvalue - CORRECT_ANSWERS[7]);
printf("Test sets z p-value = %g\n", pvalue);
 
printf("the cumulative error is %g\n", error);
printf("Test sets 1 p-value = %lf\n",calculate_Pvalue(d1,sizeof(d1)/sizeof(*d1),d2,sizeof(d2)/sizeof(*d2)));
printf("Test sets 2 p-value = %lf\n",calculate_Pvalue(d3,sizeof(d3)/sizeof(*d3),d4,sizeof(d4)/sizeof(*d4)));
printf("Test sets 3 p-value = %lf\n",calculate_Pvalue(d5,sizeof(d5)/sizeof(*d5),d6,sizeof(d6)/sizeof(*d6)));
printf("Test sets 4 p-value = %lf\n",calculate_Pvalue(d7,sizeof(d7)/sizeof(*d7),d8,sizeof(d8)/sizeof(*d8)));
printf("Test sets 5 p-value = %lf\n",calculate_Pvalue(x,sizeof(x)/sizeof(*x),y,sizeof(y)/sizeof(*y)));
return 0;
}
Line 167 ⟶ 363:
<pre>Test sets 1 p-value = 0.021378
Test sets 2 p-value = 0.148842
Test sets 3 p-value = 0.0359720359723
Test sets 4 p-value = 0.0907730907733
Test sets 5 p-value = 0.010751</pre>0107516
Test sets 6 p-value = 0.00339907
Test sets 7 p-value = 0.527266
Test sets z p-value = 0.545267
the cumulative error is 1.06339e-14</pre>
 
'''If''' your computer does not have <code>lgammal</code>, add the following function before <code>main</code> and replace <code>lgammal</code> with <code>lngammal</code> in the <code>calculate_Pvalue</code> function: