Statistics/Chi-squared distribution: Difference between revisions

Content added Content deleted

Inline

@@ Line 91: / Line 91: @@
 {{works with|jq}}
 '''Also works with gojq, the Go implementation of jq.'''
-<syntaxhighlight lang=sh>
-# f must have arity 0
-def integrate($a; $b; $n; f):
-  (($b - $a) / $n) as $h
-  | reduce range(0;$n) as $i (0;
-      ($a + $i * $h) as $x
-      | . + ((( ($x|f) + 4 * (($x + ($h/2))|f) + (($x + $h)|f)) / 6)) )
-  | . * $h;
-# '''Lower Incomplete Gamma Function'''
-# Integral(from:0; to:$x) t^($a-1)*(-t | exp) dt
-def gammaIncomplete($a; $x; $epsilon):
-  def small($a;$b): (($a-$b)/($a+$b)) | length < $epsilon or
-                    (($a-$b)|length) < 1e-10;  # length here is abs
-  def f0: (if . == 0 then 1 else pow(.; $a-1) end) * ((- .) |exp) ;
-  # If $upper is large then divide to conquer:
-  def integrate($upper; $n):
-     if $upper < $a * 2 then integrate(0; $upper; $n; f0)
-     else integrate(   0;   2*$a; $n; f0) +
-          integrate(2*$a; $upper; $n; f0)
-     end ;
-  if $a == 1 then 1 - ((-$x)|exp)
-  elif $x > $a * 1e4 then 1
-  else {current: 0, n: 25, iteration: 0}
-  | .prev = 1
-  | until( small(.current; .prev);
-      .n *= 2
-      | .iteration += 1
-      | .prev = .current
-      | .current = integrate($x; .n) )
-  | .current
-  end ;
-def gammaIncomplete($a; $x):
-   # 1e-3 is adequate for many purposes; 1e-4 takes significantly longer for small $x
-   gammaIncomplete($a; $x; 1e-5);
-def Chi2_cdf($x; $k):
-  if $x == 0 then 0
-  else [1, gammaIncomplete($k/2;$x/2) / (($k/2)|gamma)]|min
-  end ;
-def Chi2_pdf($x; $k):
-  if $x <= 0 then 0
-  else ((-$x/2)|exp) * pow($x; $k/2 -1) / (pow(2;$k/2) * (($k/2)|gamma))
-  end;
-</syntaxhighlight>
 '''Formatting'''
-<syntaxhighlight lang=sh>
+<syntaxhighlight lang=jq>
 def lpad($len): tostring | ($len - length) as $l | (" " * $l)[:$l] + .;
@@ Line 158: / Line 110: @@
      // .
    end;
+</syntaxhighlight>
+'''Chi-squared pdf and cdf'''
+<syntaxhighlight lang=jq>
+def Chi2_pdf($x; $k):
+  if $x <= 0 then 0
+  else # ((-$x/2)|exp) * pow($x; $k/2 -1) / (pow(2;$k/2) * (($k/2)|gamma))
+         ((-$x/2) + ($k/2 -1) * ($x|log) - ( ($k/2)*(2|log) + (($k/2)|lgamma))) | exp
+  end;
+# $k is the degrees of freedom
+# Use lgamma to avoid $x^m (for large $x and large m) and
+# to avoid calling gamma for large $k
+def Chi2_cdf($x; $k):
+  if $x == 0 then 0
+  elif $x > (1e3 * $k) then 1
+  else 1e-15 as $tol  # for example
+  | { s: 0, m: 0, term: $tol}
+  | until (.term|length < $tol;
+      # .term = (pow($x/2; .m) / (($k/2 + .m + 1)|gamma))
+        .term = (((.m * (($x/2)|log)) - (($k/2 + .m + 1)|lgamma)) | exp)
+      | .s += .term
+      | .m += 1)
+  | .s * ( (-$x/2) + ($k/2)*(($x/2)|log)|exp)
+  end ;
 </syntaxhighlight>
 '''The Tasks'''
-<syntaxhighlight lang=sh>
+<syntaxhighlight lang=jq>
 def tables:
   def r: round(4) | lpad(10);
@@ Line 174: / Line 151: @@
    | "\($x|lpad(2)) \($cdf|r)\(1-$cdf|r)"
   );
 def airport:  [[77, 23], [88, 12], [79, 21], [81, 19]];
 def expected: [81.25, 18.75];
@@ Line 195: / Line 172: @@
 {{output}}
 <pre>
     Values of the χ2 probability distribution function
  x/k    1         2         3         4         5
 0         0         0         0         0
@@ Line 211: / Line 188: @@
     Values for χ2 with 3 degrees of freedom
 χ2     cum pdf   p-value
-0.1988    0.8012
+0.1987    0.8013
 0.4276    0.5724
-0.7386    0.2614
+0.7385    0.2615
 0.9540    0.0460
 0.9989    0.0011
-1.0000         0
+1.0000    0.0000
 For airport data table:
   diff sum : 4.512820512820513
   d.o.f.   : 3
-  χ2 value : 0.08875392598443502
+  χ2 value : 0.08875392598443499
   p-value  : 0.7889
 </pre>
 =={{header|Julia}}==