Welch's t-test: Difference between revisions

m
→‎{{header|Perl}}: cleaned up testing
(→‎{{header|Ruby}}: used corrections suggested by RuboCop)
m (→‎{{header|Perl}}: cleaned up testing)
Line 883:
 
=={{header|Perl}}==
=== Using Math::AnyNum ===
Uses Math::AnyNum for gamma and pi. It is possible to use some other modules (e.g. Math::Cephes) if Math::AnyNum has problematic dependencies.
{{trans|Sidef}}
<lang perl>use utf8;
use List::Util qw(sum);
use Math::AnyNum qw(gamma pi);
 
sub p_value ($$) {
my ($A, $B) = @_;
 
(@$A > 1 && @$B > 1) || return 1;
 
my $x̄_a = sum(@$A) / @$A;
my $x̄_b = sum(@$B) / @$B;
 
my $a_var = sum(map { ($x̄_a - $_)**2 } @$A) / (@$A - 1);
my $b_var = sum(map { ($x̄_b - $_)**2 } @$B) / (@$B - 1);
 
($a_var && $b_var) || return 1;
 
my $Welsh_𝒕_statistic = ($x̄_a - $x̄_b) / sqrt($a_var/@$A + $b_var/@$B);
 
my $DoF = ($a_var/@$A + $b_var/@$B)**2 / (
$a_var**2 / (@$A**3 - @$A**2) +
$b_var**2 / (@$B**3 - @$B**2));
 
my $sa = $DoF / 2 - 1;
my $x = $DoF / ($Welsh_𝒕_statistic**2 + $DoF);
my $N = 65355;
my $h = $x / $N;
 
my ($sum1, $sum2) = (0, 0);
 
foreach my $k (0 .. $N - 1) {
my $i = $h * $k;
$sum1 += ($i + $h/2)**$sa / sqrt(1 - ($i + $h/2));
$sum2 += $i**$sa / sqrt(1-$i);
}
 
($h/6 * ($x**$sa / sqrt(1-$x) + 4*$sum1 + 2*$sum2) /
(gamma($sa + 1) * sqrt(pi) / gamma($sa + 1.5)))->numify;
}
 
my @tests = (
[27.5, 21.0, 19.0, 23.6, 17.0, 17.9, 16.9, 20.1, 21.9, 22.6, 23.1, 19.6, 19.0, 21.7, 21.4],
[27.1, 22.0, 20.8, 23.4, 23.4, 23.5, 25.8, 22.0, 24.8, 20.2, 21.9, 22.1, 22.9, 20.5, 24.4],
 
[17.2, 20.9, 22.6, 18.1, 21.7, 21.4, 23.5, 24.2, 14.7, 21.8],
[21.5, 22.8, 21.0, 23.0, 21.6, 23.6, 22.5, 20.7, 23.4, 21.8, 20.7, 21.7, 21.5, 22.5, 23.6, 21.5, 22.5, 23.5, 21.5, 21.8],
 
[19.8, 20.4, 19.6, 17.8, 18.5, 18.9, 18.3, 18.9, 19.5, 22.0],
[28.2, 26.6, 20.1, 23.3, 25.2, 22.1, 17.7, 27.6, 20.6, 13.7, 23.2, 17.5, 20.6, 18.0, 23.9, 21.6, 24.3, 20.4, 24.0, 13.2],
 
[30.02, 29.99, 30.11, 29.97, 30.01, 29.99],
[29.89, 29.93, 29.72, 29.98, 30.02, 29.98],
 
[3.0, 4.0, 1.0, 2.1],
[490.2, 340.0, 433.9],
);
 
while (@tests) {
my ($left, $right) = splice(@tests, 0, 2);
print p_value($left, $right), "\n";
}</lang>
{{out}}
<pre>
0.0213780014628667
0.148841696605327
0.0359722710297968
0.0907733242856612
0.0107515340333929
</pre>
 
=== Using Burkardt's betain ===
We use a slightly more accurate lgamma than the C code. Note that Perl can be compiled with different underlying floating point representations -- double, long double, or quad double.
Line 992 ⟶ 1,065:
return $value;
}
my @d1 = (27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4);
my @d2 = (27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4);
my @d3 = (17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8);
my @d4 = (21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8);
my @d5 = (19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0);
my @d6 = (28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2);
my @d7 = (30.02,29.99,30.11,29.97,30.01,29.99);
my @d8 = (29.89,29.93,29.72,29.98,30.02,29.98);
my @x = (3.0,4.0,1.0,2.1);
my @y = (490.2,340.0,433.9);
my @s1 = (1.0/15,10.0/62.0);
my @s2 = (1.0/10,2/50.0);
my @v1 = (0.010268,0.000167,0.000167);
my @v2 = (0.159258,0.136278,0.122389);
my @z1 = (9/23.0,21/45.0,0/38.0);
my @z2 = (0/44.0,42/94.0,0/22.0);
 
my @answers = (
my @CORRECT_ANSWERS = (0.021378001462867,
0.021378001462867,
0.148841696605327,
0.0359722710297968,
Line 1,016 ⟶ 1,074:
0.00339907162713746,
0.52726574965384,
0.545266866977794);,
);
 
my @tests = (
my $pvalue = calculate_Pvalue(\@d1,\@d2);
[27.5,21.0,19.0,23.6,17.0,17.9,16.9,20.1,21.9,22.6,23.1,19.6,19.0,21.7,21.4],
my $error = abs($pvalue - $CORRECT_ANSWERS[0]);
[27.1,22.0,20.8,23.4,23.4,23.5,25.8,22.0,24.8,20.2,21.9,22.1,22.9,20.5,24.4],
printf("Test sets 1 p-value = %.14g\n",$pvalue);
 
[17.2,20.9,22.6,18.1,21.7,21.4,23.5,24.2,14.7,21.8],
$pvalue = calculate_Pvalue(\@d3,\@d4);
[21.5,22.8,21.0,23.0,21.6,23.6,22.5,20.7,23.4,21.8,20.7,21.7,21.5,22.5,23.6,21.5,22.5,23.5,21.5,21.8],
$error += abs($pvalue - $CORRECT_ANSWERS[1]);
printf("Test sets 2 p-value = %.14g\n",$pvalue);
 
[19.8,20.4,19.6,17.8,18.5,18.9,18.3,18.9,19.5,22.0],
$pvalue = calculate_Pvalue(\@d5,\@d6);
[28.2,26.6,20.1,23.3,25.2,22.1,17.7,27.6,20.6,13.7,23.2,17.5,20.6,18.0,23.9,21.6,24.3,20.4,24.0,13.2],
$error += abs($pvalue - $CORRECT_ANSWERS[2]);
printf("Test sets 3 p-value = %.14g\n",$pvalue);
 
[30.02,29.99,30.11,29.97,30.01,29.99],
$pvalue = calculate_Pvalue(\@d7,\@d8);
[29.89,29.93,29.72,29.98,30.02,29.98],
$error += abs($pvalue - $CORRECT_ANSWERS[3]);
printf("Test sets 4 p-value = %.14g\n",$pvalue);
 
[3.0,4.0,1.0,2.1],
$pvalue = calculate_Pvalue(\@x,\@y);
[490.2,340.0,433.9],
$error += abs($pvalue - $CORRECT_ANSWERS[4]);
 
[0.010268,0.000167,0.000167],
$pvalue = calculate_Pvalue(\@v1,\@v2);
[0.159258,0.136278,0.122389],
$error += abs($pvalue - $CORRECT_ANSWERS[5]);
printf("Test sets 6 p-value = %.14g\n",$pvalue);
 
[1.0/15,10.0/62.0],
$pvalue = calculate_Pvalue(\@s1,\@s2);
[1.0/10,2/50.0],
$error += abs($pvalue - $CORRECT_ANSWERS[6]);
printf("Test sets 7 p-value = %.14g\n",$pvalue);
 
[9/23.0,21/45.0,0/38.0],
$pvalue = calculate_Pvalue(\@z1,\@z2);
[0/44.0,42/94.0,0/22.0],
$error += abs($pvalue - $CORRECT_ANSWERS[7]);
printf("Test sets z p-value = %.14g\n",$pvalue);
 
printf("the cumulative error is %g\n", $error);</lang>
{{out}}
<pre>Test sets 1 p-value = 0.021378001462867
Test sets 2 p-value = 0.14884169660533
Test sets 3 p-value = 0.035972271029797
Test sets 4 p-value = 0.090773324285667
Test sets 5 p-value = 0.010751561149784
Test sets 6 p-value = 0.0033990716271375
Test sets 7 p-value = 0.52726574965384
Test sets z p-value = 0.54526686697779
the cumulative error is 4.87106e-15</pre>
 
=== Using Math::AnyNum like Sidef ===
This is us a simpler solution, and uses Math::AnyNum for gamma and Pi. It is possible to use some other modules (e.g. Math::Cephes) if Math::AnyNum has problematic dependencies.
{{trans|Sidef}}
<lang perl>use utf8;
use List::Util qw(sum);
use Math::AnyNum qw(gamma pi);
 
sub p_value ($$) {
my ($A, $B) = @_;
 
(@$A > 1 && @$B > 1) || return 1;
 
my $x̄_a = sum(@$A) / @$A;
my $x̄_b = sum(@$B) / @$B;
 
my $a_var = sum(map { ($x̄_a - $_)**2 } @$A) / (@$A - 1);
my $b_var = sum(map { ($x̄_b - $_)**2 } @$B) / (@$B - 1);
 
($a_var && $b_var) || return 1;
 
my $Welsh_𝒕_statistic = ($x̄_a - $x̄_b) / sqrt($a_var/@$A + $b_var/@$B);
 
my $DoF = ($a_var/@$A + $b_var/@$B)**2 / (
$a_var**2 / (@$A**3 - @$A**2) +
$b_var**2 / (@$B**3 - @$B**2));
 
my $sa = $DoF / 2 - 1;
my $x = $DoF / ($Welsh_𝒕_statistic**2 + $DoF);
my $N = 65355;
my $h = $x / $N;
 
my ($sum1, $sum2) = (0, 0);
 
foreach my $k (0 .. $N - 1) {
my $i = $h * $k;
$sum1 += ($i + $h/2)**$sa / sqrt(1 - ($i + $h/2));
$sum2 += $i**$sa / sqrt(1-$i);
}
 
($h/6 * ($x**$sa / sqrt(1-$x) + 4*$sum1 + 2*$sum2) /
(gamma($sa + 1) * sqrt(pi) / gamma($sa + 1.5)))->numify;
}
 
my @tests = (
[27.5, 21.0, 19.0, 23.6, 17.0, 17.9, 16.9, 20.1, 21.9, 22.6, 23.1, 19.6, 19.0, 21.7, 21.4],
[27.1, 22.0, 20.8, 23.4, 23.4, 23.5, 25.8, 22.0, 24.8, 20.2, 21.9, 22.1, 22.9, 20.5, 24.4],
 
[17.2, 20.9, 22.6, 18.1, 21.7, 21.4, 23.5, 24.2, 14.7, 21.8],
[21.5, 22.8, 21.0, 23.0, 21.6, 23.6, 22.5, 20.7, 23.4, 21.8, 20.7, 21.7, 21.5, 22.5, 23.6, 21.5, 22.5, 23.5, 21.5, 21.8],
 
[19.8, 20.4, 19.6, 17.8, 18.5, 18.9, 18.3, 18.9, 19.5, 22.0],
[28.2, 26.6, 20.1, 23.3, 25.2, 22.1, 17.7, 27.6, 20.6, 13.7, 23.2, 17.5, 20.6, 18.0, 23.9, 21.6, 24.3, 20.4, 24.0, 13.2],
 
[30.02, 29.99, 30.11, 29.97, 30.01, 29.99],
[29.89, 29.93, 29.72, 29.98, 30.02, 29.98],
 
[3.0, 4.0, 1.0, 2.1],
[490.2, 340.0, 433.9],
);
 
my $error = 0;
while (@tests) {
my ($left, $right) = splice(@tests, 0, 2);
printmy p_value$pvalue = calculate_Pvalue($left, $right), "\n";
$error += abs($pvalue - shift @answers);
}</lang>
printf("p-value = %.14g\n",$pvalue);
}
printf("cumulative error is %g\n", $error);</lang>
{{out}}
<pre>p-value = 0.021378001462867
<pre>
p-value = 0.14884169660533
0.0213780014628667
p-value = 0.035972271029797
0.148841696605327
p-value = 0.090773324285661
0.0359722710297968
p-value = 0.010751561149784
0.0907733242856612
p-value = 0.0033990716271375
0.0107515340333929
p-value = 0.52726574965384
</pre>
p-value = 0.54526686697779
cumulative error is 1.11139e-14</pre>
 
=={{header|Perl 6}}==
2,392

edits