Talk:Welch's t-test: Difference between revisions

how can I add betain to my function?
(how can I add betain to my function?)
Line 506:
:gcc prog.c aux1.o aux1.o
:There is also a warning, but it's probably harmless. [[User:Eoraptor|Eoraptor]] ([[User talk:Eoraptor|talk]]) 15:35, 14 December 2017 (UTC)
 
==Implementing Algorithm... existing libraries don't calculate correct numbers==
 
I'm attempting to integrate what I got from the library, but its function <code>betain</code> does not produce the same ratio as the integral (which I know is approximately correct).
How can I use the Betain function in my function <code>Pvalue</code>?
 
<lang c>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <limits.h>
 
/******************************************************************************/
// x =? nu / (t^2 + nu)
// p =? nu/2
// q =? 1/2
//
 
double betain ( double x, double p, double q, double beta, int *restrict ifault )
 
/******************************************************************************/
/*
Purpose:
 
BETAIN computes the incomplete Beta function ratio.
 
Licensing:
 
This code is distributed under the GNU LGPL license.
 
Modified:
 
05 November 2010
 
Author:
 
Original FORTRAN77 version by KL Majumder, GP Bhattacharjee.
C version by John Burkardt.
 
Reference:
 
KL Majumder, GP Bhattacharjee,
Algorithm AS 63:
The incomplete Beta Integral,
Applied Statistics,
Volume 22, Number 3, 1973, pages 409-411.
 
Parameters:
https://www.jstor.org/stable/2346797?seq=1#page_scan_tab_contents
Input, double X, the argument, between 0 and 1.
 
Input, double P, Q, the parameters, which
must be positive.
 
Input, double BETA, the logarithm of the complete
beta function.
 
Output, int *IFAULT, error flag.
0, no error.
nonzero, an error occurred.
 
Output, double BETAIN, the value of the incomplete
Beta function ratio.
*/
{
double acu = 0.1E-14;
double ai;
// double betain;
double cx;
int indx;
int ns;
double pp;
double psq;
double qq;
double rx;
double temp;
double term;
double value;
double xx;
 
value = x;
ifault = 0;
//Check the input arguments.
if ( (p <= 0.0) || (q <= 0.0 )){
// *ifault = 1;
return value;
}
if ( x < 0.0 || 1.0 < x )
{
*ifault = 2;
return value;
}
/*
Special cases.
*/
if ( x == 0.0 || x == 1.0 ) {
return value;
}
psq = p + q;
cx = 1.0 - x;
 
if ( p < psq * x )
{
xx = cx;
cx = x;
pp = q;
qq = p;
indx = 1;
}
else
{
xx = x;
pp = p;
qq = q;
indx = 0;
}
 
term = 1.0;
ai = 1.0;
value = 1.0;
ns = ( int ) ( qq + cx * psq );
/*
Use the Soper reduction formula.
*/
rx = xx / cx;
temp = qq - ai;
if ( ns == 0 )
{
rx = xx;
}
 
for ( ; ; )
{
term = term * temp * rx / ( pp + ai );
value = value + term;;
temp = fabs ( term );
 
if ( temp <= acu && temp <= acu * value )
{
value = value * exp ( pp * log ( xx )
+ ( qq - 1.0 ) * log ( cx ) - beta ) / pp;
 
if ( indx )
{
value = 1.0 - value;
}
break;
}
 
ai = ai + 1.0;
ns = ns - 1;
 
if ( 0 <= ns )
{
temp = qq - ai;
if ( ns == 0 )
{
rx = xx;
}
}
else
{
temp = psq;
psq = psq + 1.0;
}
}
 
return value;
}
/******************************************************************************/
 
double r8_max ( double x, double y )
 
/******************************************************************************/
/*
Purpose:
 
R8_MAX returns the maximum of two R8's.
 
Licensing:
 
This code is distributed under the GNU LGPL license.
 
Modified:
 
18 August 2004
 
Author:
 
John Burkardt
 
Parameters:
 
Input, double X, Y, the quantities to compare.
 
Output, double R8_MAX, the maximum of X and Y.
*/
{
double value;
 
if ( y < x )
{
value = x;
}
else
{
value = y;
}
return value;
}
 
double Pvalue (const double *restrict ARRAY1, const size_t ARRAY1_SIZE, const double *restrict ARRAY2, const size_t ARRAY2_SIZE) {//calculate a p-value based on an array
if (ARRAY1_SIZE <= 1) {
return 1.0;
} else if (ARRAY2_SIZE <= 1) {
return 1.0;
}
double fmean1 = 0.0, fmean2 = 0.0;
for (size_t x = 0; x < ARRAY1_SIZE; x++) {//get sum of values in ARRAY1
if (isfinite(ARRAY1[x]) == 0) {//check to make sure this is a real numbere
puts("Got a non-finite number in 1st array, can't calculate P-value.");
exit(EXIT_FAILURE);
}
fmean1 += ARRAY1[x];
}
fmean1 /= ARRAY1_SIZE;
for (size_t x = 0; x < ARRAY2_SIZE; x++) {//get sum of values in ARRAY2
if (isfinite(ARRAY2[x]) == 0) {//check to make sure this is a real number
puts("Got a non-finite number in 2nd array, can't calculate P-value.");
exit(EXIT_FAILURE);
}
fmean2 += ARRAY2[x];
}
fmean2 /= ARRAY2_SIZE;
// printf("mean1 = %lf mean2 = %lf\n", fmean1, fmean2);
if (fmean1 == fmean2) {
return 1.0;//if the means are equal, the p-value is 1, leave the function
}
double unbiased_sample_variance1 = 0.0, unbiased_sample_variance2 = 0.0;
for (size_t x = 0; x < ARRAY1_SIZE; x++) {//1st part of added unbiased_sample_variance
unbiased_sample_variance1 += (ARRAY1[x]-fmean1)*(ARRAY1[x]-fmean1);
}
for (size_t x = 0; x < ARRAY2_SIZE; x++) {
unbiased_sample_variance2 += (ARRAY2[x]-fmean2)*(ARRAY2[x]-fmean2);
}
// printf("unbiased_sample_variance1 = %lf\tunbiased_sample_variance2 = %lf\n",unbiased_sample_variance1,unbiased_sample_variance2);//DEBUGGING
unbiased_sample_variance1 = unbiased_sample_variance1/(ARRAY1_SIZE-1);
unbiased_sample_variance2 = unbiased_sample_variance2/(ARRAY2_SIZE-1);
const double WELCH_T_STATISTIC = (fmean1-fmean2)/sqrt(unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE);
const double DEGREES_OF_FREEDOM = pow((unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE),2.0)//numerator
/
(
(unbiased_sample_variance1*unbiased_sample_variance1)/(ARRAY1_SIZE*ARRAY1_SIZE*(ARRAY1_SIZE-1))+
(unbiased_sample_variance2*unbiased_sample_variance2)/(ARRAY2_SIZE*ARRAY2_SIZE*(ARRAY2_SIZE-1))
);
// printf("Welch = %lf DOF = %lf\n", WELCH_T_STATISTIC, DEGREES_OF_FREEDOM);
const double a = DEGREES_OF_FREEDOM/2, x = DEGREES_OF_FREEDOM/(WELCH_T_STATISTIC*WELCH_T_STATISTIC+DEGREES_OF_FREEDOM);
const unsigned int N = 65536;//increase N for a tighter convergence
const double h = x/N;//necessary for integrating with Simpson's method
double sum1 = 0.0, sum2 = 0.0;
for(unsigned int i = 1;i < N+1; i++) {//integrate by Simpson's method (sometimes blows up at i = 0, so I start @ 1
sum1 += (pow(h * i + h / 2.0,a-1))/(sqrt(1-(h * i + h / 2.0)));
sum2 += (pow(h * i,a-1))/(sqrt(1-h * i));
}
// printf("sum1 = %lf sum2 = %lf\n",sum1, sum2);
// double return_value = ((3*h / 8.0) * ((pow(x,a-1))/(sqrt(1-x)) + sum1 + 0))/(exp(lgammal(a)+0.57236494292470009-lgammal(a+0.5)));//0.5723649... is lgammal(0.5)
double return_value = ((h / 6.0) * ((pow(x,a-1))/(sqrt(1-x)) + 4.0 * sum1 + 2.0 * sum2))/(exp(lgammal(a)+0.57236494292470009-lgammal(a+0.5)));//0.5723649... is lgammal(0.5)
if ((isinf(return_value) != 0) || (isnan(return_value) != 0) || (return_value > 1.0)) {
return 1.0;
} else {
int *restrict z = 0;
printf("%g =? %g\n", return_value, betain(x, a, 0.5, exp(lgammal(a)+0.57236494292470009-lgammal(a+0.5)), z));
return return_value;
}
}
//-------------------
int main(void) {
const double d1[] = {27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4};
const double d2[] = {27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4};
const double d3[] = {17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8};
const double d4[] = {21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8};
const double d5[] = {19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0};
const double d6[] = {28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2};
const double d7[] = {30.02,29.99,30.11,29.97,30.01,29.99};
const double d8[] = {29.89,29.93,29.72,29.98,30.02,29.98};
const double x[] = {3.0,4.0,1.0,2.1};
const double y[] = {490.2,340.0,433.9};
const double v1[] = {0.010268,0.000167,0.000167};
const double v2[] = {0.159258,0.136278,0.122389};
const double s1[] = {1.0/15,10.0/62.0};
const double s2[] = {1.0/10,2/50.0};
const double z1[] = {9/23.0,21/45.0,0/38.0};
const double z2[] = {0/44.0,42/94.0,0/22.0};
printf("Test sets 1 p-value = %g\n",Pvalue(d1,sizeof(d1)/sizeof(*d1),d2,sizeof(d2)/sizeof(*d2)));
printf("Test sets 2 p-value = %g\n",Pvalue(d3,sizeof(d3)/sizeof(*d3),d4,sizeof(d4)/sizeof(*d4)));
printf("Test sets 3 p-value = %g\n",Pvalue(d5,sizeof(d5)/sizeof(*d5),d6,sizeof(d6)/sizeof(*d6)));
printf("Test sets 4 p-value = %g\n",Pvalue(d7,sizeof(d7)/sizeof(*d7),d8,sizeof(d8)/sizeof(*d8)));
printf("Test sets 5 p-value = %g\n",Pvalue(x,sizeof(x)/sizeof(*x),y,sizeof(y)/sizeof(*y)));
printf("Test sets 6 p-value = %g\n",Pvalue(v1,sizeof(v1)/sizeof(*v1),v2,sizeof(v2)/sizeof(*v2)));
printf("Test sets 7 p-value = %g\n",Pvalue(s1,sizeof(s1)/sizeof(*s1),s2,sizeof(s2)/sizeof(*s2)));
printf("Test sets z p-value = %g\n", Pvalue(z1, 3, z2, 3));
const double g41[] = {1.062, 0.774, 0.909};
const double g412[] = {1.459, 0.674, 0.732};
const double g414[]= {1.174, 1.406, 1.536};
printf("41 gr -vs- 41.2 gr p-value = %g\n", Pvalue(g41, 3, g412, 3));
printf("41 gr -vs- 41.4 gr p-value = %g\n", Pvalue(g41, 3, g414, 3));
printf("41.2 gr -vs- 41.4 gr p-value = %g\n", Pvalue(g412, 3, g414, 3));
/*
Test sets 1 p-value = 2.137830e-02
Test sets 2 p-value = 1.488426e-01
Test sets 3 p-value = 3.597278e-02
Test sets 4 p-value = 9.077370e-02
Test sets 5 p-value = 1.075156e-02
Test sets 6 p-value = 3.399076e-03
Test sets 7 p-value = 5.272635e-01
 
with N = 199 and Simpson's 1st method:
Test sets 1 p-value = 2.292482e-02
Test sets 2 p-value = 1.535105e-01
Test sets 3 p-value = 3.857763e-02
Test sets 4 p-value = 9.265142e-02
Test sets 5 p-value = 1.076123e-02
Test sets 6 p-value = 3.414684e-03
Test sets 7 p-value = 5.266487e-01
*/
return 0;
}
</lang>