Welch's t-test: Difference between revisions
Content added Content deleted
(→{{header|C}}: merged betain function inside Pvalue function, should run faster and produce smaller executable) |
(→Using Burkardt's betain: merged betain function inside calculate_pvalue, eliminated redundant variables to clean code, should run a little faster) |
||
Line 882: | Line 882: | ||
use warnings; |
use warnings; |
||
use strict; |
use strict; |
||
sub betain { # Translation of John Burkardt's C |
|||
my($x,$p,$q,$beta) = @_; |
|||
my $acu = 10**(-15); |
|||
my($ai,$cx,$indx,$ns,$pp,$psq,$qq,$rx,$temp,$term,$value,$xx); |
|||
$value = $x; |
|||
# Check the input arguments. |
|||
return $value if $p <= 0.0 || $q <= 0.0; |
|||
return $value if $x < 0.0 || 1.0 < $x; |
|||
# Special cases |
|||
return $value if $x == 0.0 || $x == 1.0; |
|||
$psq = $p + $q; |
|||
$cx = 1.0 - $x; |
|||
if ($p < $psq * $x) { |
|||
($xx, $cx, $pp, $qq, $indx) = ($cx, $x, $q, $p, 1); |
|||
} else { |
|||
($xx, $cx, $pp, $qq, $indx) = ($x, $cx, $p, $q, 0); |
|||
} |
|||
$term = 1.0; |
|||
$ai = 1.0; |
|||
$value = 1.0; |
|||
$ns = int($qq + $cx * $psq); |
|||
# Soper reduction formula. |
|||
$rx = $xx / $cx; |
|||
$temp = $qq - $ai; |
|||
$rx = $xx if $ns == 0; |
|||
while (1) { |
|||
$term = $term * $temp * $rx / ( $pp + $ai ); |
|||
$value = $value + $term; |
|||
$temp = abs ($term); |
|||
if ($temp <= $acu && $temp <= $acu * $value) { |
|||
$value = $value * exp ($pp * log($xx) |
|||
+ ($qq - 1.0) * log($cx) - $beta) / $pp; |
|||
$value = 1.0 - $value if $indx; |
|||
last; |
|||
} |
|||
$ai = $ai + 1.0; |
|||
$ns = $ns - 1; |
|||
if (0 <= $ns) { |
|||
$temp = $qq - $ai; |
|||
$rx = $xx if $ns == 0; |
|||
} else { |
|||
$temp = $psq; |
|||
$psq = $psq + 1.0; |
|||
} |
|||
} |
|||
$value; |
|||
} |
|||
sub lgamma { |
sub lgamma { |
||
Line 954: | Line 900: | ||
use List::Util 'sum'; |
use List::Util 'sum'; |
||
sub calculate_Pvalue { |
sub calculate_Pvalue { |
||
my $array1 = shift; |
|||
my $array2 = shift; |
|||
if (scalar @$array1 <= 1) { |
|||
return 1.0; |
|||
} |
|||
my $mean1 = sum(@$array1) / scalar @$array1; |
|||
if (scalar @$array2 <= 1) { |
|||
my $mean2 = sum(@$array2) / scalar @$array2; |
|||
return 1.0; |
|||
} |
|||
my($variance1, $variance2) = (0.0,0.0); |
|||
my $mean1 = sum(@{ $array1 }); |
|||
$mean1 /= scalar @$array1; |
|||
$variance1 += ($x-$mean1)*($x-$mean1); |
|||
my $mean2 = sum(@{ $array2 }); |
|||
} |
|||
$mean2 /= scalar @$array2; |
|||
if ($mean1 == $mean2) { |
|||
return 1.0; |
|||
} |
|||
} |
|||
return 1.0 if $variance1 == 0.0 || $variance2 == 0.0; |
|||
# say "mean1 = $mean1 mean2 = $mean2"; |
|||
my ($a1_size, $a2_size) = (scalar @$array1, scalar @$array2); |
|||
my $variance1 = 0.0; |
|||
my $variance2 = 0.0; |
|||
foreach my $x (@$array1) { |
|||
my $WELCH_T_STATISTIC = ($mean1-$mean2)/sqrt($variance1/$a1_size+$variance2/$a2_size); |
|||
$variance1 += ($x-$mean1)*($x-$mean1); |
|||
my $DEGREES_OF_FREEDOM = (($variance1/$a1_size+$variance2/$a2_size)**2) / |
|||
} |
|||
( |
|||
foreach my $x (@$array2) { |
|||
($variance1*$variance1)/($a1_size*$a1_size*($a1_size-1))+ |
|||
$variance2 += ($x-$mean2)*($x-$mean2); |
|||
($variance2*$variance2)/($a2_size*$a2_size*($a2_size-1)) |
|||
} |
|||
); |
|||
if (($variance1 == 0.0) && ($variance2 == 0.0)) { |
|||
my $A = $DEGREES_OF_FREEDOM/2; |
|||
return 1.0; |
|||
my $x = $DEGREES_OF_FREEDOM/($WELCH_T_STATISTIC*$WELCH_T_STATISTIC+$DEGREES_OF_FREEDOM); |
|||
} |
|||
return betain($x, $A, 0.5, lgamma($A)+0.57236494292470009-lgamma($A+0.5)); |
|||
$variance1 = $variance1/(scalar @$array1-1); |
|||
$variance2 = $variance2/(scalar @$array2-1); |
|||
my $array1_size = scalar @$array1; |
|||
my $array2_size = scalar @$array2; |
|||
my $WELCH_T_STATISTIC = ($mean1-$mean2)/sqrt($variance1/$array1_size+$variance2/$array2_size); |
|||
my $DEGREES_OF_FREEDOM = (($variance1/$array1_size+$variance2/(scalar @$array2))**2) |
|||
/ |
|||
( |
|||
($variance1*$variance1)/($array1_size*$array1_size*($array1_size-1))+ |
|||
($variance2*$variance2)/($array2_size*$array2_size*($array2_size-1)) |
|||
); |
|||
my $A = $DEGREES_OF_FREEDOM/2; |
|||
my $value = $DEGREES_OF_FREEDOM/($WELCH_T_STATISTIC*$WELCH_T_STATISTIC+$DEGREES_OF_FREEDOM); |
|||
#from here, translation of John Burkhardt's C |
|||
my $beta = lgamma($A)+0.57236494292470009-lgamma($A+0.5); |
|||
my $acu = 10**(-15); |
|||
my($ai,$cx,$indx,$ns,$pp,$psq,$qq,$rx,$temp,$term,$xx); |
|||
# Check the input arguments. |
|||
return $value if $A <= 0.0;# || $q <= 0.0; |
|||
return $value if $value < 0.0 || 1.0 < $value; |
|||
# Special cases |
|||
return $value if $value == 0.0 || $value == 1.0; |
|||
$psq = $A + 0.5; |
|||
$cx = 1.0 - $value; |
|||
if ($A < $psq * $value) { |
|||
($xx, $cx, $pp, $qq, $indx) = ($cx, $value, 0.5, $A, 1); |
|||
} else { |
|||
($xx, $cx, $pp, $qq, $indx) = ($value, $cx, $A, 0.5, 0); |
|||
} |
|||
$term = 1.0; |
|||
$ai = 1.0; |
|||
$value = 1.0; |
|||
$ns = int($qq + $cx * $psq); |
|||
#Soper reduction formula. |
|||
$rx = $xx / $cx; |
|||
$temp = $qq - $ai; |
|||
$rx = $xx if $ns == 0; |
|||
while (1) { |
|||
$term = $term * $temp * $rx / ( $pp + $ai ); |
|||
$value = $value + $term; |
|||
$temp = abs ($term); |
|||
if ($temp <= $acu && $temp <= $acu * $value) { |
|||
$value = $value * exp ($pp * log($xx) |
|||
+ ($qq - 1.0) * log($cx) - $beta) / $pp; |
|||
$value = 1.0 - $value if $indx; |
|||
last; |
|||
} |
|||
$ai = $ai + 1.0; |
|||
$ns = $ns - 1; |
|||
if (0 <= $ns) { |
|||
$temp = $qq - $ai; |
|||
$rx = $xx if $ns == 0; |
|||
} else { |
|||
$temp = $psq; |
|||
$psq = $psq + 1.0; |
|||
} |
|||
} |
|||
$value; |
|||
} |
} |
||