Selection bias in clinical sciences: Difference between revisions

m
→‎{{header|Wren}}: Changed to Wren S/H
m (use wiki formatting for links)
m (→‎{{header|Wren}}: Changed to Wren S/H)
(6 intermediate revisions by 4 users not shown)
Line 1:
{{draft task|Probability and statistics}}
 
In epidemiology, retrospective analyses have well-known limitations compared to prospective studies.
Line 199:
adjustment for ties: 0.417533
</pre>
 
=={{header|Nim}}==
{{trans|Python}}
{{trans|Wren}}
<syntaxhighlight lang="Nim">import std/[algorithm, math, random, sequtils, strformat, sugar]
 
type
 
TreatmentClass {.pure.} = enum Untreated, Irregular, Regular
 
Subject = object
cumDose: float
category: TreatmentClass
hadCovid: bool
 
const DoseForRegular = 100
 
func categorize(subject: var Subject) =
## Set treatment category based on cumulative treatment taken.
subject.category = if subject.cumDose == 0: Untreated
elif subject.cumDose >= DoseForRegular: Regular
else: Irregular
 
proc update(subject: var Subject; pCovid = 0.001;
pStart = 0.005; pDosing = 0.25; doses = @[3, 6, 9]) =
## Daily update on the subject to check for infection and randomly dose.
if not subject.hadCovid:
if rand(1.0) < pCovid:
subject.hadCovid = true
elif (subject.cumDose == 0 and rand(1.0) < pStart) or
(subject.cumDose > 0 and rand(1.0) < pDosing):
subject.cumDose += sample(doses).toFloat
subject.categorize()
 
func kruskalWallis(a, b, c: seq[bool]): float =
 
# Aggregate and sort.
let s = sorted(a & b & c)
# Find rank of first occurrence of "true".
let ix = s.find(true) + 1
# Calculate average ranks for "false" and "true".
let n = s.len
let arf = ix / 2
let art = (ix + n) / 2
# Calculate sum of ranks for each list.
let sra = sum(a.mapIt(if it: art else: arf))
let srb = sum(b.mapIt(if it: art else: arf))
let src = sum(c.mapIt(if it: art else: arf))
# Calculate H.
result = 12 / (n * (n + 1)) * (sra * sra / a.len.toFloat + srb * srb / b.len.toFloat +
src * src / c.len.toFloat) - 3 * float(n + 1)
 
proc runStudy(numSubjects = 1000; duration = 180; interval= 30) =
## Run the study using the population of size "numSubjects" for "duration" days.
var population = newSeqWith(numSubjects, Subject())
var unt, untCovid, irr, irrCovid, reg, regCovid = 0
echo &"Total subjects: {num_subjects}"
 
for day in 1..duration:
for subj in population.mitems:
subj.update()
 
if day mod interval == 0:
echo &"\nDay {day}:"
unt = population.countIt(it.category == Untreated)
untCovid = population.countIt(it.category == Untreated and it.hadCovid)
echo &"Untreated: N = {unt}, with infection = {untCovid}"
irr = population.countIt(it.category == IRREGULAR)
irrCovid = population.countIt(it.category == IRREGULAR and it.hadCovid)
echo &"Irregular Use: N = {irr}, with infection = {irrCovid}"
reg = population.countIt(it.category == REGULAR)
regCovid = population.countIt(it.category == REGULAR and it.hadCovid)
echo &"Regular Use: N = {reg}, with infection = {reg_covid}"
 
if day == duration div 2:
echo "\nAt midpoint, infection case percentages are:"
echo " Untreated : ", 100 * untCovid / unt
echo " Irregulars: ", 100 * irrCovid / irr
echo " Regulars : ", 100 * regCovid / reg
 
echo "\nAt study end, infection case percentages are:"
echo &" Untreated : {100 * untCovid / unt} of group size of {unt}"
echo &" Irregulars: {100 * irrCovid / irr} of group size of {irr}"
echo &" Regulars : {100 * regCovid / reg} of group size of {reg}"
let untreated = collect:
for s in population:
if s.category == Untreated:
s.hadCovid
let irregular = collect:
for s in population:
if s.category == Irregular:
s.hadCovid
let regular = collect:
for s in population:
if s.category == Regular:
s.hadCovid
echo "\nFinal statistics: ", kruskalWallis(untreated, irregular, regular)
 
randomize()
runStudy(10_000)
</syntaxhighlight>
 
{{out}}
<pre>Total subjects: 10000
 
Day 30:
Untreated: N = 8619, with infection = 283
Irregular Use: N = 1379, with infection = 27
Regular Use: N = 2, with infection = 0
 
Day 60:
Untreated: N = 7480, with infection = 498
Irregular Use: N = 2362, with infection = 77
Regular Use: N = 158, with infection = 3
 
Day 90:
Untreated: N = 6479, with infection = 712
Irregular Use: N = 2440, with infection = 151
Regular Use: N = 1081, with infection = 24
 
At midpoint, infection case percentages are:
Untreated : 10.98935020836549
Irregulars: 6.188524590163935
Regulars : 2.220166512488436
 
Day 120:
Untreated: N = 5655, with infection = 873
Irregular Use: N = 2199, with infection = 210
Regular Use: N = 2146, with infection = 70
 
Day 150:
Untreated: N = 4998, with infection = 1007
Irregular Use: N = 1910, with infection = 257
Regular Use: N = 3092, with infection = 135
 
Day 180:
Untreated: N = 4479, with infection = 1122
Irregular Use: N = 1575, with infection = 302
Regular Use: N = 3946, with infection = 232
 
At study end, infection case percentages are:
Untreated : 25.05023442732753 of group size of 4479
Irregulars: 19.17460317460317 of group size of 1575
Regulars : 5.879371515458693 of group size of 3946
 
Final statistics: 235.1089104422972
</pre>
 
=={{header|Perl}}==
{{trans|Raku}}
<syntaxhighlight lang="perl" line>use v5.036;
use List::AllUtils <sum0 indexes firstidx>;
use enum qw<False True UNTREATED REGULAR IRREGULAR>;
use constant DOSE_FOR_REGULAR => 100;
 
my ($nSubjects,$duration,$interval) = (10000, 180, 30);
my (@dosage) = (0) x $nSubjects;
my (@category) = (UNTREATED) x $nSubjects;
my (@hadcovid) = (False) x $nSubjects;
 
sub update {
my $pCovid = 1e-3;
my $pStartTreatment = 5e-3;
my $pRedose = 1/4;
my @dRange = <3 6 9>;
for my $i (0 .. $nSubjects-1) {
unless ($hadcovid[$i]) {
if (rand() < $pCovid) {
$hadcovid[$i] = True
} else {
my $dose = $dosage[$i];
if ($dose==0 && rand() < $pStartTreatment or $dose > 0 && rand() < $pRedose) {
$dosage[$i] = $dose += $dRange[3*rand()];
$category[$i] = ($dose > DOSE_FOR_REGULAR) ? REGULAR : IRREGULAR
}
}
}
}
}
 
sub kruskal (@sets) {
my @ranked = sort @{$sets[0]}, @{$sets[1]}, @{$sets[2]};
my $n = @ranked;
my @sr = (0) x @sets;
my $ix = firstidx { $_ == 1 } @ranked;
my ($arf,$art) = ($ix/2, ($ix+$n)/2);
 
for my $i (0..2) {
$sr[$i] += $_ ? $art : $arf for @{$sets[$i]}
}
 
my $H = sum0 map { my $s = $sr[$_]; $s**2 / @{$sets[$_]} } 0..$#sr;
12/($n*($n+1)) * $H - 3 * ($n + 1)
}
 
my($unt,$irr,$reg,$hunt,$hirr,$hreg,@sunt,@sirr,@sreg);
my $midpoint = int $duration / 2;
 
say "Total subjects: $nSubjects\n";
 
for my $day (1 .. $duration) {
update();
if (0 == $day % $interval or $day == $duration or $day == $midpoint) {
@sunt = @hadcovid[ indexes { $_ == UNTREATED } @category];
@sirr = @hadcovid[ indexes { $_ == IRREGULAR } @category];
@sreg = @hadcovid[ indexes { $_ == REGULAR } @category];
( $unt, $irr, $reg) = (scalar(@sunt), scalar(@sirr), scalar(@sreg));
($hunt,$hirr,$hreg) = ( sum0(@sunt), sum0(@sirr), sum0(@sreg));
}
 
if (0 == $day % $interval) {
printf "Day %d:\n", $day;
printf "Untreated: N = %4d, with infection = %4d\n", $unt,$hunt;
printf "Irregular Use: N = %4d, with infection = %4d\n", $irr,$hirr;
printf "Regular Use: N = %4d, with infection = %4d\n\n",$reg,$hreg;
}
 
if ($day == $midpoint or $day == $duration) {
my $stage = $day == $midpoint ? 'midpoint' : 'study end';
printf "At $stage, Infection case percentages are:\n";
printf " Untreated : %6.2f\n", 100*$hunt/$unt;
printf " Irregulars: %6.2f\n", 100*$hirr/$irr;
printf " Regulars : %6.2f\n\n", 100*$hreg/$reg;
}
}
 
printf "Final statistics: H = %.2f\n", kruskal ( \@sunt, \@sirr, \@sreg );
 
</syntaxhighlight>
{{out}}
<pre>Total subjects: 10000
 
Day 30:
Untreated: N = 8640, with infection = 283
Irregular Use: N = 1360, with infection = 18
Regular Use: N = 0, with infection = 0
 
Day 60:
Untreated: N = 7499, with infection = 532
Irregular Use: N = 2338, with infection = 75
Regular Use: N = 163, with infection = 1
 
Day 90:
Untreated: N = 6537, with infection = 709
Irregular Use: N = 2382, with infection = 144
Regular Use: N = 1081, with infection = 12
 
At midpoint, Infection case percentages are:
Untreated : 10.85
Irregulars: 6.05
Regulars : 1.11
 
Day 120:
Untreated: N = 5739, with infection = 855
Irregular Use: N = 2102, with infection = 216
Regular Use: N = 2159, with infection = 52
 
Day 150:
Untreated: N = 5091, with infection = 1012
Irregular Use: N = 1792, with infection = 270
Regular Use: N = 3117, with infection = 134
 
Day 180:
Untreated: N = 4468, with infection = 1126
Irregular Use: N = 1669, with infection = 315
Regular Use: N = 3863, with infection = 243
 
At study end, Infection case percentages are:
Untreated : 25.20
Irregulars: 18.87
Regulars : 6.29
 
Final statistics: H = 218.74</pre>
 
=={{header|Phix}}==
Line 462 ⟶ 735:
Final statistics: KruskalResult(statistic=55.48204323818349, pvalue=8.95833684545873e-13)
</pre>
 
=={{header|Raku}}==
{{trans|Phix}}
<syntaxhighlight lang="raku" line># 20221025 Raku programming solution
 
enum <UNTREATED REGULAR IRREGULAR>;
my \DOSE_FOR_REGULAR = 100;
my ($nSubjects,$duration,$interval) = 10000, 180, 30;
my (@dosage,@category,@hadcovid) := (0,UNTREATED,False)>>.&{($_ xx $nSubjects).Array};
 
sub update($pCovid=1e-3, $pStartTreatment=5e-3, $pRedose=¼, @dRange=<3 6 9>) {
for 0 ..^ @dosage.elems -> \i {
unless @hadcovid[i] {
if rand < $pCovid {
@hadcovid[i] = True
} else {
my $dose = @dosage[i];
if $dose==0 && rand < $pStartTreatment or $dose > 0 && rand < $pRedose {
@dosage[i] = $dose += @dRange.roll;
@category[i] = ($dose > DOSE_FOR_REGULAR) ?? REGULAR !! IRREGULAR
}
}
}
}
}
 
sub kruskal (@sets) {
my $n = ( my @ranked = @sets>>.List.flat.sort ).elems;
my @sr = 0 xx @sets.elems;
my $ix = (@ranked.first: * == True, :k)+1,
my ($arf,$art) = ($ix, $ix+$n) >>/>> 2;
 
for @sets.kv -> \i,@set { for @set -> $b { @sr[i] += $b ?? $art !! $arf } }
 
my $H = [+] @sr.kv.map: -> \i,\s { s*s/@sets[i].elems }
return 12/($n*($n+1)) * $H - 3 * ($n + 1)
}
 
say "Total subjects: $nSubjects\n";
 
my ($midpoint,$unt,$irr,$reg,$hunt,$hirr,$hreg,@sunt,@sirr,@sreg)=$duration div 2;
 
for 1 .. $duration -> \day {
update();
if day %% $interval or day == $duration or day == $midpoint {
@sunt = @hadcovid[ @category.grep: UNTREATED,:k ];
@sirr = @hadcovid[ @category.grep: IRREGULAR,:k ];
@sreg = @hadcovid[ @category.grep: REGULAR, :k ];
($unt,$hunt,$irr,$hirr,$reg,$hreg)=(@sunt,@sirr,@sreg).map:{|(.elems,.sum)}
}
if day %% $interval {
printf "Day %d:\n",day;
printf "Untreated: N = %4d, with infection = %4d\n", $unt,$hunt;
printf "Irregular Use: N = %4d, with infection = %4d\n", $irr,$hirr;
printf "Regular Use: N = %4d, with infection = %4d\n\n",$reg,$hreg
}
if day == $midpoint | $duration {
my $stage = day == $midpoint ?? 'midpoint' !! 'study end';
printf "At $stage, Infection case percentages are:\n";
printf " Untreated : %f\n", 100*$hunt/$unt;
printf " Irregulars: %f\n", 100*$hirr/$irr;
printf " Regulars : %f\n\n",100*$hreg/$reg
}
}
 
printf "Final statistics: H = %f\n", kruskal ( @sunt, @sirr, @sreg )</syntaxhighlight>
{{out}}
<pre>Total subjects: 10000
 
Day 30:
Untreated: N = 8616, with infection = 314
Irregular Use: N = 1383, with infection = 27
Regular Use: N = 1, with infection = 0
 
Day 60:
Untreated: N = 7511, with infection = 561
Irregular Use: N = 2308, with infection = 87
Regular Use: N = 181, with infection = 1
 
Day 90:
Untreated: N = 6571, with infection = 760
Irregular Use: N = 2374, with infection = 141
Regular Use: N = 1055, with infection = 19
 
At midpoint, Infection case percentages are:
Untreated : 11.565972
Irregulars: 5.939343
Regulars : 1.800948
 
Day 120:
Untreated: N = 5757, with infection = 909
Irregular Use: N = 2097, with infection = 189
Regular Use: N = 2146, with infection = 61
 
Day 150:
Untreated: N = 5092, with infection = 1054
Irregular Use: N = 1832, with infection = 238
Regular Use: N = 3076, with infection = 132
 
Day 180:
Untreated: N = 4530, with infection = 1172
Irregular Use: N = 1615, with infection = 282
Regular Use: N = 3855, with infection = 229
 
At study end, Infection case percentages are:
Untreated : 25.871965
Irregulars: 17.461300
Regulars : 5.940337
 
Final statistics: H = 248.419454</pre>
 
=={{header|Wren}}==
{{trans|Python}}
{{libheader|Wren-math}}
<syntaxhighlight lang="ecmascriptwren">import "random" for Random
import "./math" for Nums
 
9,485

edits