Revision as of 13:11, 12 March 2018 (view source) rosettacode>Hailholyghost m (→‎Using Burkardt's betain: removed assignment of $cx to itself, added 'return' at end, made spacing more consistent) ← Older edit		Revision as of 21:33, 12 March 2018 (view source) Eoraptor (talk \| contribs) m (→‎Using Burkardt's betain) Newer edit →
Line 1,234: welch_ttest(np.array([3.0, 4.0, 1.0, 2.1]), np.array([490.2, 340.0, 433.9])) (-9.559497721932658, 2.0008523488562844, 0.01075156114978449)</lang> === Using ~~Burkardt's~~ betain from AS 63 === First, the implementation of betain (translated from the Stata program in the discussion page). The original Fortran code is under copyrighted by the Royal Statistical Society. The C translation is under GPL, written by John Burkardt. The exact statement of the RSS license is unclear. ~~{{trans\|Perl}}~~ <lang python>import math def ~~calculate_Pvalue~~ betain(~~array1~~x, p, ~~array2~~q): if p <= 0 or q <= 0 or x < 0 or x > 1: ~~ARRAY1_SIZE = len(array1)~~ raise ValueError ~~if ARRAY1_SIZE <= 1:~~ ~~return 1.0~~ if x == 0 or x == 1: ~~ARRAY2_SIZE = len(array2)~~ if ~~ARRAY2_SIZE~~ <= 1: return x ~~return 1.0~~ acu = 1e-15 lnbeta = math.lgamma(p) + math.lgamma(q) - math.lgamma(p + q) psq = p + q ~~fmean1 = sum(array1)/ARRAY1_SIZE~~ if p < psq * x: ~~fmean2 = sum(array2)/ARRAY2_SIZE~~ if ~~fmean1~~ xx == ~~fmean2:~~1 - x ~~return~~cx ~~1.0~~= x pp = q ~~unbiased_sample_variance1 = 0.0~~ qq = p ~~unbiased_sample_variance2 = 0.0~~ indx = True ~~for v in range(ARRAY1_SIZE):~~ ~~unbiased_sample_variance1 += (array1[v] - fmean1) * (array1[v] - fmean1)~~ ~~for v in range(ARRAY2_SIZE):~~ ~~unbiased_sample_variance2 += (array2[v] - fmean2) * (array2[v] - fmean2)~~ ~~unbiased_sample_variance1 = unbiased_sample_variance1/(ARRAY1_SIZE-1)~~ ~~unbiased_sample_variance2 = unbiased_sample_variance2/(ARRAY2_SIZE-1)~~ ~~WELCH_T_STATISTIC = (fmean1-fmean2)/math.sqrt(unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE)~~ DEGREES_OF_FREEDOM = pow((unbiased_sample_variance1/ARRAY1_SIZE+unbiased_sample_variance2/ARRAY2_SIZE),2.0) / ((unbiased_sample_variance1unbiased_sample_variance1)/(ARRAY1_SIZEARRAY1_SIZE(ARRAY1_SIZE-1))+ (unbiased_sample_variance2unbiased_sample_variance2)/(ARRAY2_SIZEARRAY2_SIZE(ARRAY2_SIZE-1)) ) ~~a = DEGREES_OF_FREEDOM/2~~ ~~value = DEGREES_OF_FREEDOM/(WELCH_T_STATISTICWELCH_T_STATISTIC+DEGREES_OF_FREEDOM)~~ ~~beta = math.lgamma(a) + 0.57236494292470009-math.lgamma(a+0.5)~~ ~~acu = 0.1E-14~~ ~~if a <= 0.0:~~ ~~return value~~ ~~if value < 0.0 or 1.0 < value:~~ ~~return value~~ ~~if value == 0 or value == 1.0:~~ ~~return value~~ ~~psq = a + 0.5~~ ~~cx = 1.0 - value~~ ~~if a < (psq value):~~ ~~xx = cx~~ ~~cx = value~~ ~~pp = 0.5~~ ~~qq = a~~ ~~indx = 1~~ else: xx = ~~value~~x ppcx = a1 - x qqpp = ~~0.5~~p ~~indx~~qq = 0q ~~term~~ indx = ~~1.0~~False ai = ~~1.0~~ term = ai = value = 1.0 ns = math.floor(qq + cx * psq) nsrx = ~~int(qq~~xx +/ cxpsq) ~~rx = xx/cx~~ temp = qq - ai if ns == 0: rx = xx ~~while~~ ~~(True):~~ while True: ~~term = term temp * rx / ( pp + ai )~~ ~~value~~term = ~~value~~temp rx / (pp + ~~term~~ai) ~~temp~~value += ~~abs (~~ term ) ~~if ((~~temp <= ~~acu) and~~ abs(~~temp <= acu * value)~~term): ~~value = value * math.exp ( pp * math.log ( xx ) + ( qq - 1.0 ) * math.log ( cx ) - beta ) / pp~~ if temp <= acu ifand temp <= acu * ~~indx~~value: value = math.exp(pp ~~value~~math.log(xx) =+ (qq - 1) * math.0log(cx) - ~~value~~lnbeta) / pp ~~break~~return 1 - value if indx else value ai += ~~ai +~~ 1.0 ~~ns =~~ ns -= 1 if ns >= 0: ~~if 0 <= ns:~~ temp = qq - ai if ns == 0: Line 1,319 ⟶ 1,287: else: temp = psq psq += ~~psq +~~ 1.0</lang> ~~return value~~ ~~#end of function~~ The Python code is then straightforward: ~~d1 = [27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4]~~ ~~d2 = [27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4]~~ ~~d3 = [17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8]~~ ~~d4 = [21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8]~~ ~~d5 = [19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0]~~ ~~d6 = [28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2]~~ ~~d7 = [30.02,29.99,30.11,29.97,30.01,29.99]~~ ~~d8 = [29.89,29.93,29.72,29.98,30.02,29.98]~~ ~~x = [3.0,4.0,1.0,2.1]~~ ~~y = [490.2,340.0,433.9]~~ ~~v1 = [0.010268,0.000167,0.000167]~~ ~~v2 = [0.159258,0.136278,0.122389]~~ ~~s1 = [1.0/15,10.0/62.0]~~ ~~s2 = [1.0/10,2/50.0]~~ ~~z1 = [9/23.0,21/45.0,0/38.0]~~ ~~z2 = [0/44.0,42/94.0,0/22.0]~~ <lang python>import math ~~CORRECT_ANSWERS = [0.021378001462867, 0.148841696605327, 0.0359722710297968, 0.090773324285671, 0.0107515611497845, 0.00339907162713746, 0.52726574965384, .545266866977794]~~ def welch_ttest(a1, a2): ~~pvalue = calculate_Pvalue(d1,d2)~~ n1 = len(a1) ~~error = abs(pvalue - CORRECT_ANSWERS[0])~~ n2 = len(a2) ~~print "Test sets 1 p-value = %.14g" % pvalue~~ if n1 <= 1 or n2 <= 1: return 1.0 ~~pvalue = calculate_Pvalue(d3,d4)~~ ~~error += abs(pvalue - CORRECT_ANSWERS[1])~~ mean1 = sum(a1) / n1 ~~print "Test sets 2 p-value = %.14g" % pvalue~~ mean2 = sum(a2) / n2 ~~pvalue = calculate_Pvalue(d5,d6)~~ if mean1 == mean2: ~~error += abs(pvalue - CORRECT_ANSWERS[2])~~ return 1.0 ~~print "Test sets 3 p-value = %.14g" % pvalue~~ var1 = sum((x - mean1)2 for x in a1) / (n1 - 1) ~~pvalue = calculate_Pvalue(d7,d8)~~ var2 = sum((x - mean2)2 for x in a2) / (n2 - 1) ~~error += abs(pvalue - CORRECT_ANSWERS[3])~~ ~~print "Test sets 4 p-value = %.14g" % pvalue~~ t = (mean1 - mean2) / math.sqrt(var1 / n1 + var2 / n2) df = (var1 / n1 + var2 / n2)2 / (var12 / (n1*2 (n1 - 1)) + var22 / (n22 * (n2 - 1))) ~~pvalue = calculate_Pvalue(x,y)~~ p = betain(df / (t**2 + df), df / 2, 1 / 2) ~~error += abs(pvalue - CORRECT_ANSWERS[4])~~ ~~print "Test sets 5 p-value = %.14g" % pvalue~~ return [t, df, p]</lang> ~~pvalue = calculate_Pvalue(v1,v2)~~ ~~error += abs(pvalue - CORRECT_ANSWERS[5])~~ ~~print "Test sets 6 p-value = %.14g" % pvalue~~ '''Example''' ~~pvalue = calculate_Pvalue(s1,s2)~~ ~~error += abs(pvalue - CORRECT_ANSWERS[6])~~ ~~print "Test sets 7 p-value = %.14g" % pvalue~~ <lang python>a1 = [3, 4, 1, 2.1] ~~pvalue = calculate_Pvalue(z1,z2)~~ a2 = [490.2, 340, 433.9] ~~error += abs(pvalue - CORRECT_ANSWERS[7])~~ print(welch_ttest(a1, a2))</lang> ~~print "Test sets z p-value = %.14g" % pvalue~~ '''Output''' ~~print 'the cumulative error is %g' % error</lang>~~ <pre>[-9.559497721932658, 2.0008523488562844, 0.01075156114978449]</pre> ~~{{out}}~~ ~~<pre>~~ ~~Test sets 1 p-value = 0.021378001462867~~ ~~Test sets 2 p-value = 0.14884169660533~~ ~~Test sets 3 p-value = 0.035972271029797~~ ~~Test sets 4 p-value = 0.090773324285661~~ ~~Test sets 5 p-value = 0.010751561149784~~ ~~Test sets 6 p-value = 0.0033990716271375~~ ~~Test sets 7 p-value = 0.52726574965384~~ ~~Test sets z p-value = 0.54526686697779~~ ~~the cumulative error is 1.13104e-14~~ ~~</pre>~~ =={{header\|R}}==

Welch's t-test: Difference between revisions

Welch's t-test (view source)

Revision as of 21:33, 12 March 2018