Welch's t-test: Difference between revisions
Content added Content deleted
m (→{{header|C}}: removed redundant variable 'x', should save RAM and make easier to read) |
(→{{header|Python}}: added lower level calculation, with benchmarks to make more similar to other languages) |
||
Line 1,216: | Line 1,216: | ||
=={{header|Python}}== |
=={{header|Python}}== |
||
=== Using Burkardt's betain === |
|||
{{trans|Perl}} |
|||
<lang python>import math |
|||
def calculate_Pvalue (array1, array2): |
|||
ARRAY1_SIZE = len(array1) |
|||
ARRAY2_SIZE = len(array2) |
|||
if ARRAY1_SIZE <= 1: |
|||
return 1.0 |
|||
elif ARRAY1_SIZE <= 1: |
|||
return 1.0 |
|||
fmean1 = sum(array1)/ARRAY1_SIZE |
|||
fmean2 = sum(array2)/ARRAY2_SIZE |
|||
if fmean1 == fmean2: |
|||
return 1.0 |
|||
unbiased_sample_variance1 = 0.0 |
|||
unbiased_sample_variance2 = 0.0 |
|||
for v in range(ARRAY1_SIZE): |
|||
unbiased_sample_variance1 += (array1[v] - fmean1) * (array1[v] - fmean1) |
|||
for v in range(ARRAY2_SIZE): |
|||
unbiased_sample_variance2 += (array2[v] - fmean2) * (array2[v] - fmean2) |
|||
unbiased_sample_variance1 = unbiased_sample_variance1/(ARRAY1_SIZE-1) |
|||
unbiased_sample_variance2 = unbiased_sample_variance2/(ARRAY2_SIZE-1) |
|||
WELCH_T_STATISTIC = (fmean1-fmean2)/math.sqrt(unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE) |
|||
DEGREES_OF_FREEDOM = pow((unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE),2.0) / ((unbiased_sample_variance1*unbiased_sample_variance1)/(ARRAY1_SIZE*ARRAY1_SIZE*(ARRAY1_SIZE-1))+ (unbiased_sample_variance2*unbiased_sample_variance2)/(ARRAY2_SIZE*ARRAY2_SIZE*(ARRAY2_SIZE-1)) |
|||
) |
|||
a = DEGREES_OF_FREEDOM/2 |
|||
value = DEGREES_OF_FREEDOM/(WELCH_T_STATISTIC*WELCH_T_STATISTIC+DEGREES_OF_FREEDOM) |
|||
beta = math.lgamma(a) + 0.57236494292470009-math.lgamma(a+0.5) |
|||
acu = 0.1E-14 |
|||
if a <= 0.0: |
|||
return value |
|||
if value < 0.0 or 1.0 < value: |
|||
return value |
|||
if value == 0 or value == 1.0: |
|||
return value |
|||
psq = a + 0.5 |
|||
cx = 1.0 - value |
|||
if a < (psq * value): |
|||
xx = cx |
|||
cx = value |
|||
pp = 0.5 |
|||
qq = a |
|||
indx = 1 |
|||
else: |
|||
xx = value |
|||
pp = a |
|||
qq = 0.5 |
|||
indx = 0 |
|||
term = 1.0 |
|||
ai = 1.0 |
|||
value = 1.0 |
|||
ns = int(qq + cx*psq) |
|||
rx = xx/cx |
|||
temp = qq - ai |
|||
if ns == 0: |
|||
rx = xx |
|||
while (1): |
|||
term = term * temp * rx / ( pp + ai ) |
|||
value = value + term |
|||
temp = abs ( term ) |
|||
if ((temp <= acu) and (temp <= acu * value)): |
|||
value = value * math.exp ( pp * math.log ( xx ) + ( qq - 1.0 ) * math.log ( cx ) - beta ) / pp |
|||
if indx: |
|||
value = 1.0 - value |
|||
break |
|||
ai = ai + 1.0 |
|||
ns = ns - 1 |
|||
if 0 <= ns: |
|||
temp = qq - ai |
|||
if ns == 0: |
|||
rx = xx |
|||
else: |
|||
temp = psq |
|||
psq = psq + 1.0 |
|||
return value |
|||
#end of function |
|||
d1 = [27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4] |
|||
d2 = [27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4] |
|||
d3 = [17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8] |
|||
d4 = [21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8] |
|||
d5 = [19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0] |
|||
d6 = [28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2] |
|||
d7 = [30.02,29.99,30.11,29.97,30.01,29.99] |
|||
d8 = [29.89,29.93,29.72,29.98,30.02,29.98] |
|||
x = [3.0,4.0,1.0,2.1] |
|||
y = [490.2,340.0,433.9] |
|||
v1 = [0.010268,0.000167,0.000167] |
|||
v2 = [0.159258,0.136278,0.122389] |
|||
s1 = [1.0/15,10.0/62.0] |
|||
s2 = [1.0/10,2/50.0] |
|||
z1 = [9/23.0,21/45.0,0/38.0] |
|||
z2 = [0/44.0,42/94.0,0/22.0] |
|||
CORRECT_ANSWERS = [0.021378001462867, 0.148841696605327, 0.0359722710297968, 0.090773324285671, 0.0107515611497845, 0.00339907162713746, 0.52726574965384, .545266866977794] |
|||
pvalue = calculate_Pvalue(d1,d2) |
|||
error = abs(pvalue - CORRECT_ANSWERS[0]) |
|||
print "Test sets 1 p-value = %.14g" % pvalue |
|||
pvalue = calculate_Pvalue(d3,d4) |
|||
error += abs(pvalue - CORRECT_ANSWERS[1]) |
|||
print "Test sets 2 p-value = %.14g" % pvalue |
|||
pvalue = calculate_Pvalue(d5,d6) |
|||
error += abs(pvalue - CORRECT_ANSWERS[2]) |
|||
print "Test sets 3 p-value = %.14g" % pvalue |
|||
pvalue = calculate_Pvalue(d7,d8) |
|||
error += abs(pvalue - CORRECT_ANSWERS[3]) |
|||
print "Test sets 4 p-value = %.14g" % pvalue |
|||
pvalue = calculate_Pvalue(x,y) |
|||
error += abs(pvalue - CORRECT_ANSWERS[4]) |
|||
print "Test sets 5 p-value = %.14g" % pvalue |
|||
pvalue = calculate_Pvalue(v1,v2) |
|||
error += abs(pvalue - CORRECT_ANSWERS[5]) |
|||
print "Test sets 6 p-value = %.14g" % pvalue |
|||
pvalue = calculate_Pvalue(s1,s2) |
|||
error += abs(pvalue - CORRECT_ANSWERS[6]) |
|||
print "Test sets 7 p-value = %.14g" % pvalue |
|||
pvalue = calculate_Pvalue(z1,z2) |
|||
error += abs(pvalue - CORRECT_ANSWERS[7]) |
|||
print "Test sets z p-value = %.14g" % pvalue |
|||
print 'the cumulative error is %g' % error</lang> |
|||
=== Using NumPy & SciPy === |
|||
<lang python>import numpy as np |
<lang python>import numpy as np |
||
import scipy as sp |
import scipy as sp |
||
Line 1,234: | Line 1,378: | ||
welch_ttest(np.array([3.0, 4.0, 1.0, 2.1]), np.array([490.2, 340.0, 433.9])) |
welch_ttest(np.array([3.0, 4.0, 1.0, 2.1]), np.array([490.2, 340.0, 433.9])) |
||
(-9.559497721932658, 2.0008523488562844, 0.01075156114978449)</lang> |
(-9.559497721932658, 2.0008523488562844, 0.01075156114978449)</lang> |
||
=={{header|R}}== |
=={{header|R}}== |
||
<lang R>#!/usr/bin/R |
<lang R>#!/usr/bin/R |