Welch's t-test: Difference between revisions

→‎{{header|Python}}: added lower level calculation, with benchmarks to make more similar to other languages
m (→‎{{header|C}}: removed redundant variable 'x', should save RAM and make easier to read)
(→‎{{header|Python}}: added lower level calculation, with benchmarks to make more similar to other languages)
Line 1,216:
 
=={{header|Python}}==
 
=== Using Burkardt's betain ===
{{trans|Perl}}
<lang python>import math
 
def calculate_Pvalue (array1, array2):
ARRAY1_SIZE = len(array1)
ARRAY2_SIZE = len(array2)
if ARRAY1_SIZE <= 1:
return 1.0
elif ARRAY1_SIZE <= 1:
return 1.0
fmean1 = sum(array1)/ARRAY1_SIZE
fmean2 = sum(array2)/ARRAY2_SIZE
if fmean1 == fmean2:
return 1.0
unbiased_sample_variance1 = 0.0
unbiased_sample_variance2 = 0.0
for v in range(ARRAY1_SIZE):
unbiased_sample_variance1 += (array1[v] - fmean1) * (array1[v] - fmean1)
for v in range(ARRAY2_SIZE):
unbiased_sample_variance2 += (array2[v] - fmean2) * (array2[v] - fmean2)
unbiased_sample_variance1 = unbiased_sample_variance1/(ARRAY1_SIZE-1)
unbiased_sample_variance2 = unbiased_sample_variance2/(ARRAY2_SIZE-1)
WELCH_T_STATISTIC = (fmean1-fmean2)/math.sqrt(unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE)
DEGREES_OF_FREEDOM = pow((unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE),2.0) / ((unbiased_sample_variance1*unbiased_sample_variance1)/(ARRAY1_SIZE*ARRAY1_SIZE*(ARRAY1_SIZE-1))+ (unbiased_sample_variance2*unbiased_sample_variance2)/(ARRAY2_SIZE*ARRAY2_SIZE*(ARRAY2_SIZE-1))
)
a = DEGREES_OF_FREEDOM/2
value = DEGREES_OF_FREEDOM/(WELCH_T_STATISTIC*WELCH_T_STATISTIC+DEGREES_OF_FREEDOM)
beta = math.lgamma(a) + 0.57236494292470009-math.lgamma(a+0.5)
acu = 0.1E-14
if a <= 0.0:
return value
if value < 0.0 or 1.0 < value:
return value
if value == 0 or value == 1.0:
return value
psq = a + 0.5
cx = 1.0 - value
if a < (psq * value):
xx = cx
cx = value
pp = 0.5
qq = a
indx = 1
else:
xx = value
pp = a
qq = 0.5
indx = 0
term = 1.0
ai = 1.0
value = 1.0
ns = int(qq + cx*psq)
rx = xx/cx
temp = qq - ai
if ns == 0:
rx = xx
while (1):
term = term * temp * rx / ( pp + ai )
value = value + term
temp = abs ( term )
if ((temp <= acu) and (temp <= acu * value)):
value = value * math.exp ( pp * math.log ( xx ) + ( qq - 1.0 ) * math.log ( cx ) - beta ) / pp
if indx:
value = 1.0 - value
break
 
ai = ai + 1.0
ns = ns - 1
 
if 0 <= ns:
temp = qq - ai
if ns == 0:
rx = xx
else:
temp = psq
psq = psq + 1.0
return value
#end of function
 
d1 = [27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4]
d2 = [27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4]
d3 = [17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8]
d4 = [21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8]
d5 = [19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0]
d6 = [28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2]
d7 = [30.02,29.99,30.11,29.97,30.01,29.99]
d8 = [29.89,29.93,29.72,29.98,30.02,29.98]
x = [3.0,4.0,1.0,2.1]
y = [490.2,340.0,433.9]
v1 = [0.010268,0.000167,0.000167]
v2 = [0.159258,0.136278,0.122389]
s1 = [1.0/15,10.0/62.0]
s2 = [1.0/10,2/50.0]
z1 = [9/23.0,21/45.0,0/38.0]
z2 = [0/44.0,42/94.0,0/22.0]
 
CORRECT_ANSWERS = [0.021378001462867, 0.148841696605327, 0.0359722710297968, 0.090773324285671, 0.0107515611497845, 0.00339907162713746, 0.52726574965384, .545266866977794]
 
pvalue = calculate_Pvalue(d1,d2)
error = abs(pvalue - CORRECT_ANSWERS[0])
print "Test sets 1 p-value = %.14g" % pvalue
 
pvalue = calculate_Pvalue(d3,d4)
error += abs(pvalue - CORRECT_ANSWERS[1])
print "Test sets 2 p-value = %.14g" % pvalue
 
pvalue = calculate_Pvalue(d5,d6)
error += abs(pvalue - CORRECT_ANSWERS[2])
print "Test sets 3 p-value = %.14g" % pvalue
 
pvalue = calculate_Pvalue(d7,d8)
error += abs(pvalue - CORRECT_ANSWERS[3])
print "Test sets 4 p-value = %.14g" % pvalue
 
pvalue = calculate_Pvalue(x,y)
error += abs(pvalue - CORRECT_ANSWERS[4])
print "Test sets 5 p-value = %.14g" % pvalue
 
pvalue = calculate_Pvalue(v1,v2)
error += abs(pvalue - CORRECT_ANSWERS[5])
print "Test sets 6 p-value = %.14g" % pvalue
 
pvalue = calculate_Pvalue(s1,s2)
error += abs(pvalue - CORRECT_ANSWERS[6])
print "Test sets 7 p-value = %.14g" % pvalue
 
pvalue = calculate_Pvalue(z1,z2)
error += abs(pvalue - CORRECT_ANSWERS[7])
print "Test sets z p-value = %.14g" % pvalue
 
print 'the cumulative error is %g' % error</lang>
 
=== Using NumPy & SciPy ===
<lang python>import numpy as np
import scipy as sp
Line 1,234 ⟶ 1,378:
welch_ttest(np.array([3.0, 4.0, 1.0, 2.1]), np.array([490.2, 340.0, 433.9]))
(-9.559497721932658, 2.0008523488562844, 0.01075156114978449)</lang>
 
=={{header|R}}==
<lang R>#!/usr/bin/R