Welch's t-test: Difference between revisions

Content added Content deleted

Inline

@@ Line 885: / Line 885: @@
 =={{header|Perl}}==
+=== Using Burkardt's betain ===
+We use a slightly more accurate lgamma than the C code.  Note that Perl can be compiled with different underlying floating point representations -- double, long double, or quad double.
+{{trans|C}}
+<lang perl>
+use warnings;
+use strict;
+sub betain {   # Translation of John Burkardt's C
+  my($x,$p,$q,$beta) = @_;
+  my $acu = 10**(-15);
+  my($ai,$cx,$indx,$ns,$pp,$psq,$qq,$rx,$temp,$term,$value,$xx);
+  $value = $x;
+  # Check the input arguments.
+  return $value if $p <= 0.0 || $q <= 0.0;
+  return $value if $x < 0.0 || 1.0 < $x;
+  # Special cases
+  return $value if $x == 0.0 || $x == 1.0;
+  $psq = $p + $q;
+  $cx = 1.0 - $x;
+  if ($p < $psq * $x) {
+    ($xx, $cx, $pp, $qq, $indx) = ($cx, $x, $q, $p, 1);
+  } else {
+    ($xx, $cx, $pp, $qq, $indx) = ($x, $cx, $p, $q, 0);
+  }
+  $term = 1.0;
+  $ai = 1.0;
+  $value = 1.0;
+  $ns = int($qq + $cx * $psq);
+  # Soper reduction formula.
+  $rx = $xx / $cx;
+  $temp = $qq - $ai;
+  $rx = $xx if $ns == 0;
+  while (1) {
+    $term = $term * $temp * $rx / ( $pp + $ai );
+    $value = $value + $term;
+    $temp = abs ($term);
+    if ($temp <= $acu && $temp <= $acu * $value) {
+      $value = $value * exp ($pp * log($xx)
+                             + ($qq - 1.0) * log($cx) - $beta) / $pp;
+      $value = 1.0 - $value if $indx;
+      last;
+    }
+    $ai = $ai + 1.0;
+    $ns = $ns - 1;
+    if (0 <= $ns) {
+      $temp = $qq - $ai;
+      $rx = $xx if $ns == 0;
+    } else {
+      $temp = $psq;
+      $psq = $psq + 1.0;
+    }
+  }
+  $value;
+}
+sub lgamma {
+  my $x = shift;
+  my $log_sqrt_two_pi = 0.91893853320467274178;
+  my @lanczos_coef = (
+.99999999999980993, 676.5203681218851, -1259.1392167224028,
+.32342877765313, -176.61502916214059, 12.507343278686905,
+      -0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7 );
+  my $base = $x + 7.5;
+  my $sum = 0;
+  $sum += $lanczos_coef[$_] / ($x + $_)  for reverse (1..8);
+  $sum += $lanczos_coef[0];
+  $sum = $log_sqrt_two_pi + log($sum/$x) + ( ($x+0.5)*log($base) - $base );
+  $sum;
+}
+use List::Util 'sum';
+sub calculate_Pvalue {
+  my($array1, $array2) = @_;
+  return 1.0 if scalar @$array1 <= 1;
+  return 1.0 if scalar @$array2 <= 1;
+  my $mean1 = sum(@$array1) / scalar @$array1;
+  my $mean2 = sum(@$array2) / scalar @$array2;
+  return 1.0 if $mean1 == $mean2;
+  my($variance1, $variance2) = (0.0,0.0);
+  for my $x (@$array1) {
+    $variance1 += ($x-$mean1)*($x-$mean1);
+  }
+  for my $x (@$array2) {
+    $variance2 += ($x-$mean2)*($x-$mean2);
+  }
+  return 1.0 if $variance1 == 0.0 || $variance2 == 0.0;
+  my ($a1_size, $a2_size) = (scalar @$array1, scalar @$array2);
+  $variance1 = $variance1 / ($a1_size-1);
+  $variance2 = $variance2 / ($a2_size-1);
+  my $WELCH_T_STATISTIC = ($mean1-$mean2)/sqrt($variance1/$a1_size+$variance2/$a2_size);
+  my $DEGREES_OF_FREEDOM = (($variance1/$a1_size+$variance2/$a2_size)**2)  /
+    (
+      ($variance1*$variance1)/($a1_size*$a1_size*($a1_size-1))+
+      ($variance2*$variance2)/($a2_size*$a2_size*($a2_size-1))
+    );
+  my $A = $DEGREES_OF_FREEDOM/2;
+  my $x = $DEGREES_OF_FREEDOM/($WELCH_T_STATISTIC*$WELCH_T_STATISTIC+$DEGREES_OF_FREEDOM);
+  return betain($x, $A, 0.5, lgamma($A)+0.57236494292470009-lgamma($A+0.5));
+}
+my @d1 = (27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4);
+my @d2 = (27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4);
+my @d3 = (17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8);
+my @d4 = (21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8);
+my @d5 = (19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0);
+my @d6 = (28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2);
+my @d7 = (30.02,29.99,30.11,29.97,30.01,29.99);
+my @d8 = (29.89,29.93,29.72,29.98,30.02,29.98);
+my @x = (3.0,4.0,1.0,2.1);
+my @y = (490.2,340.0,433.9);
+my @s1 = (1.0/15,10.0/62.0);
+my @s2 = (1.0/10,2/50.0);
+my @v1 = (0.010268,0.000167,0.000167);
+my @v2 = (0.159258,0.136278,0.122389);
+my @z1 = (9/23.0,21/45.0,0/38.0);
+my @z2 = (0/44.0,42/94.0,0/22.0);
+my @CORRECT_ANSWERS = (0.021378001462867,
+.148841696605327,
+.0359722710297968,
+.090773324285671,
+.0107515611497845,
+.00339907162713746,
+.52726574965384,
+.545266866977794);
+my $pvalue = calculate_Pvalue(\@d1,\@d2);
+my $error = abs($pvalue - $CORRECT_ANSWERS[0]);
+printf("Test sets 1 p-value = %.14g\n",$pvalue);
+$pvalue = calculate_Pvalue(\@d3,\@d4);
+$error += abs($pvalue - $CORRECT_ANSWERS[1]);
+printf("Test sets 2 p-value = %.14g\n",$pvalue);
+$pvalue = calculate_Pvalue(\@d5,\@d6);
+$error += abs($pvalue - $CORRECT_ANSWERS[2]);
+printf("Test sets 3 p-value = %.14g\n",$pvalue);
+$pvalue = calculate_Pvalue(\@d7,\@d8);
+$error += abs($pvalue - $CORRECT_ANSWERS[3]);
+printf("Test sets 4 p-value = %.14g\n",$pvalue);
+$pvalue = calculate_Pvalue(\@x,\@y);
+$error += abs($pvalue - $CORRECT_ANSWERS[4]);
+$pvalue = calculate_Pvalue(\@v1,\@v2);
+$error += abs($pvalue - $CORRECT_ANSWERS[5]);
+printf("Test sets 6 p-value = %.14g\n",$pvalue);
+$pvalue = calculate_Pvalue(\@s1,\@s2);
+$error += abs($pvalue - $CORRECT_ANSWERS[6]);
+printf("Test sets 7 p-value = %.14g\n",$pvalue);
+$pvalue = calculate_Pvalue(\@z1,\@z2);
+$error += abs($pvalue - $CORRECT_ANSWERS[7]);
+printf("Test sets z p-value = %.14g\n",$pvalue);
+printf("the cumulative error is %g\n", $error);</lang>
+{{out}}
+<pre>Test sets 1 p-value = 0.021378001462867
+Test sets 2 p-value = 0.14884169660533
+Test sets 3 p-value = 0.035972271029797
+Test sets 4 p-value = 0.090773324285667
+Test sets 5 p-value = 0.010751561149784
+Test sets 6 p-value = 0.0033990716271375
+Test sets 7 p-value = 0.52726574965384
+Test sets z p-value = 0.54526686697779
+the cumulative error is 4.87106e-15</pre>
+=== Using Math::AnyNum like Sidef ===
+This is us a simpler solution, and uses Math::AnyNum for gamma and Pi.  It is possible to use some other modules (e.g. Math::Cephes) if Math::AnyNum has problematic dependencies.
 {{trans|Sidef}}
 <lang perl>use utf8;