Welch's t-test: Difference between revisions

m
m (→‎Using Burkardt's betain: removed assignment of $cx to itself, added 'return' at end, made spacing more consistent)
Line 1,234:
welch_ttest(np.array([3.0, 4.0, 1.0, 2.1]), np.array([490.2, 340.0, 433.9]))
(-9.559497721932658, 2.0008523488562844, 0.01075156114978449)</lang>
=== Using Burkardt's betain from AS 63 ===
First, the implementation of betain (translated from the Stata program in the discussion page). The original Fortran code is under copyrighted by the Royal Statistical Society. The C translation is under GPL, written by John Burkardt. The exact statement of the RSS license is unclear.
{{trans|Perl}}
 
<lang python>import math
 
def calculate_Pvalue betain(array1x, p, array2q):
if p <= 0 or q <= 0 or x < 0 or x > 1:
ARRAY1_SIZE = len(array1)
raise ValueError
if ARRAY1_SIZE <= 1:
return 1.0
if x == 0 or x == 1:
ARRAY2_SIZE = len(array2)
if ARRAY2_SIZE <= 1: return x
return 1.0
acu = 1e-15
lnbeta = math.lgamma(p) + math.lgamma(q) - math.lgamma(p + q)
psq = p + q
fmean1 = sum(array1)/ARRAY1_SIZE
if p < psq * x:
fmean2 = sum(array2)/ARRAY2_SIZE
if fmean1 xx == fmean2:1 - x
returncx 1.0= x
pp = q
unbiased_sample_variance1 = 0.0
qq = p
unbiased_sample_variance2 = 0.0
indx = True
for v in range(ARRAY1_SIZE):
unbiased_sample_variance1 += (array1[v] - fmean1) * (array1[v] - fmean1)
for v in range(ARRAY2_SIZE):
unbiased_sample_variance2 += (array2[v] - fmean2) * (array2[v] - fmean2)
unbiased_sample_variance1 = unbiased_sample_variance1/(ARRAY1_SIZE-1)
unbiased_sample_variance2 = unbiased_sample_variance2/(ARRAY2_SIZE-1)
WELCH_T_STATISTIC = (fmean1-fmean2)/math.sqrt(unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE)
DEGREES_OF_FREEDOM = pow((unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE),2.0) / ((unbiased_sample_variance1*unbiased_sample_variance1)/(ARRAY1_SIZE*ARRAY1_SIZE*(ARRAY1_SIZE-1))+ (unbiased_sample_variance2*unbiased_sample_variance2)/(ARRAY2_SIZE*ARRAY2_SIZE*(ARRAY2_SIZE-1))
)
a = DEGREES_OF_FREEDOM/2
value = DEGREES_OF_FREEDOM/(WELCH_T_STATISTIC*WELCH_T_STATISTIC+DEGREES_OF_FREEDOM)
beta = math.lgamma(a) + 0.57236494292470009-math.lgamma(a+0.5)
acu = 0.1E-14
if a <= 0.0:
return value
if value < 0.0 or 1.0 < value:
return value
if value == 0 or value == 1.0:
return value
psq = a + 0.5
cx = 1.0 - value
if a < (psq * value):
xx = cx
cx = value
pp = 0.5
qq = a
indx = 1
else:
xx = valuex
ppcx = a1 - x
qqpp = 0.5p
indxqq = 0q
term indx = 1.0False
ai = 1.0
term = ai = value = 1.0
ns = math.floor(qq + cx * psq)
nsrx = int(qqxx +/ cx*psq)
rx = xx/cx
temp = qq - ai
if ns == 0:
rx = xx
while (True):
while True:
term = term * temp * rx / ( pp + ai )
valueterm *= valuetemp * rx / (pp + termai)
tempvalue += abs ( term )
if ((temp <= acu) and abs(temp <= acu * value)term):
value = value * math.exp ( pp * math.log ( xx ) + ( qq - 1.0 ) * math.log ( cx ) - beta ) / pp
if temp <= acu ifand temp <= acu * indxvalue:
value *= math.exp(pp * valuemath.log(xx) =+ (qq - 1) * math.0log(cx) - valuelnbeta) / pp
breakreturn 1 - value if indx else value
 
ai += ai + 1.0
ns = ns -= 1
if ns >= 0:
 
if 0 <= ns:
temp = qq - ai
if ns == 0:
Line 1,319 ⟶ 1,287:
else:
temp = psq
psq += psq + 1.0</lang>
return value
#end of function
 
The Python code is then straightforward:
d1 = [27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4]
d2 = [27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4]
d3 = [17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8]
d4 = [21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8]
d5 = [19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0]
d6 = [28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2]
d7 = [30.02,29.99,30.11,29.97,30.01,29.99]
d8 = [29.89,29.93,29.72,29.98,30.02,29.98]
x = [3.0,4.0,1.0,2.1]
y = [490.2,340.0,433.9]
v1 = [0.010268,0.000167,0.000167]
v2 = [0.159258,0.136278,0.122389]
s1 = [1.0/15,10.0/62.0]
s2 = [1.0/10,2/50.0]
z1 = [9/23.0,21/45.0,0/38.0]
z2 = [0/44.0,42/94.0,0/22.0]
 
<lang python>import math
CORRECT_ANSWERS = [0.021378001462867, 0.148841696605327, 0.0359722710297968, 0.090773324285671, 0.0107515611497845, 0.00339907162713746, 0.52726574965384, .545266866977794]
 
def welch_ttest(a1, a2):
pvalue = calculate_Pvalue(d1,d2)
n1 = len(a1)
error = abs(pvalue - CORRECT_ANSWERS[0])
n2 = len(a2)
print "Test sets 1 p-value = %.14g" % pvalue
if n1 <= 1 or n2 <= 1:
 
return 1.0
pvalue = calculate_Pvalue(d3,d4)
error += abs(pvalue - CORRECT_ANSWERS[1])
mean1 = sum(a1) / n1
print "Test sets 2 p-value = %.14g" % pvalue
mean2 = sum(a2) / n2
 
pvalue = calculate_Pvalue(d5,d6)
if mean1 == mean2:
error += abs(pvalue - CORRECT_ANSWERS[2])
return 1.0
print "Test sets 3 p-value = %.14g" % pvalue
 
var1 = sum((x - mean1)**2 for x in a1) / (n1 - 1)
pvalue = calculate_Pvalue(d7,d8)
var2 = sum((x - mean2)**2 for x in a2) / (n2 - 1)
error += abs(pvalue - CORRECT_ANSWERS[3])
print "Test sets 4 p-value = %.14g" % pvalue
t = (mean1 - mean2) / math.sqrt(var1 / n1 + var2 / n2)
 
df = (var1 / n1 + var2 / n2)**2 / (var1**2 / (n1**2 * (n1 - 1)) + var2**2 / (n2**2 * (n2 - 1)))
pvalue = calculate_Pvalue(x,y)
p = betain(df / (t**2 + df), df / 2, 1 / 2)
error += abs(pvalue - CORRECT_ANSWERS[4])
print "Test sets 5 p-value = %.14g" % pvalue
return [t, df, p]</lang>
 
pvalue = calculate_Pvalue(v1,v2)
error += abs(pvalue - CORRECT_ANSWERS[5])
print "Test sets 6 p-value = %.14g" % pvalue
 
'''Example'''
pvalue = calculate_Pvalue(s1,s2)
error += abs(pvalue - CORRECT_ANSWERS[6])
print "Test sets 7 p-value = %.14g" % pvalue
 
<lang python>a1 = [3, 4, 1, 2.1]
pvalue = calculate_Pvalue(z1,z2)
a2 = [490.2, 340, 433.9]
error += abs(pvalue - CORRECT_ANSWERS[7])
print(welch_ttest(a1, a2))</lang>
print "Test sets z p-value = %.14g" % pvalue
 
'''Output'''
print 'the cumulative error is %g' % error</lang>
<pre>[-9.559497721932658, 2.0008523488562844, 0.01075156114978449]</pre>
{{out}}
<pre>
Test sets 1 p-value = 0.021378001462867
Test sets 2 p-value = 0.14884169660533
Test sets 3 p-value = 0.035972271029797
Test sets 4 p-value = 0.090773324285661
Test sets 5 p-value = 0.010751561149784
Test sets 6 p-value = 0.0033990716271375
Test sets 7 p-value = 0.52726574965384
Test sets z p-value = 0.54526686697779
the cumulative error is 1.13104e-14
</pre>
 
=={{header|R}}==
1,336

edits