Fivenum: Difference between revisions
m (Python before R) |
(Go solution) |
||
Line 96: | Line 96: | ||
[-1.950595940, -0.676741205, 0.233247060, 0.746070945, 1.731315070] |
[-1.950595940, -0.676741205, 0.233247060, 0.746070945, 1.731315070] |
||
</pre> |
|||
=={{header|Go}}== |
|||
{{trans|Perl}} |
|||
<lang go>package main |
|||
import ( |
|||
"fmt" |
|||
"math" |
|||
"sort" |
|||
) |
|||
func fivenum(a []float64) (n5 [5]float64) { |
|||
sort.Float64s(a) |
|||
n := float64(len(a)) |
|||
n4 := float64((len(a)+3)/2) / 2 |
|||
d := []float64{1, n4, (n + 1) / 2, n + 1 - n4, n} |
|||
for e, de := range d { |
|||
floor := int(de - 1) |
|||
ceil := int(math.Ceil(de - 1)) |
|||
n5[e] = .5 * (a[floor] + a[ceil]) |
|||
} |
|||
return |
|||
} |
|||
var ( |
|||
x1 = []float64{36, 40, 7, 39, 41, 15} |
|||
x2 = []float64{15, 6, 42, 41, 7, 36, 49, 40, 39, 47, 43} |
|||
x3 = []float64{ |
|||
0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, |
|||
0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, |
|||
0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, |
|||
0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578, |
|||
} |
|||
) |
|||
func main() { |
|||
fmt.Println(fivenum(x1)) |
|||
fmt.Println(fivenum(x2)) |
|||
fmt.Println(fivenum(x3)) |
|||
}</lang> |
|||
{{out}} |
|||
<pre> |
|||
[7 15 37.5 40 41] |
|||
[6 25.5 40 42.5 49] |
|||
[-1.95059594 -0.676741205 0.23324706 0.746070945 1.73131507] |
|||
</pre> |
|||
'''Alternate:''' |
|||
This solution is aimed at handling larger data sets more efficiently. It replaces the O(n log n) sort with O(n) quickselect. It also does not attempt to reproduce the R result exactly, to average values to get a median of an even number of data values, or otherwise estimate quantiles. The quickselect here leaves the input partitioned around the selected value, which allows another small optimization: The first quickselect call partitions the full input around the median. The second call, to get the first quartile, thus only has to process the partition up to the median. The third call, to get the minimum, only has to process the partition up to the first quartile. The 3rd quartile and maximum are obtained similarly. |
|||
<lang go>package main |
|||
import ( |
|||
"fmt" |
|||
"math/rand" |
|||
) |
|||
func fivenum(a []float64) (n [5]float64) { |
|||
last := len(a) - 1 |
|||
m := last / 2 |
|||
n[2] = qsel(a, m) |
|||
q1 := len(a) / 4 |
|||
n[1] = qsel(a[:m], q1) |
|||
n[0] = qsel(a[:q1], 0) |
|||
a = a[m:] |
|||
q3 := last - m - q1 |
|||
n[3] = qsel(a, q3) |
|||
a = a[q3:] |
|||
n[4] = qsel(a, len(a)-1) |
|||
return |
|||
} |
|||
func qsel(a []float64, k int) float64 { |
|||
for len(a) > 1 { |
|||
px := rand.Intn(len(a)) |
|||
pv := a[px] |
|||
last := len(a) - 1 |
|||
a[px], a[last] = a[last], pv |
|||
px = 0 |
|||
for i, v := range a[:last] { |
|||
if v < pv { |
|||
a[px], a[i] = v, a[px] |
|||
px++ |
|||
} |
|||
} |
|||
a[px], a[last] = pv, a[px] |
|||
if px == k { |
|||
return pv |
|||
} |
|||
if k < px { |
|||
a = a[:px] |
|||
} else { |
|||
a = a[px+1:] |
|||
k -= px + 1 |
|||
} |
|||
} |
|||
return a[0] |
|||
} |
|||
var ( |
|||
x1 = []float64{36, 40, 7, 39, 41, 15} |
|||
x2 = []float64{15, 6, 42, 41, 7, 36, 49, 40, 39, 47, 43} |
|||
x3 = []float64{ |
|||
0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, |
|||
0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, |
|||
0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, |
|||
0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578, |
|||
} |
|||
) |
|||
func main() { |
|||
fmt.Println(fivenum(x1)) |
|||
fmt.Println(fivenum(x2)) |
|||
fmt.Println(fivenum(x3)) |
|||
}</lang> |
|||
{{out}} |
|||
<pre> |
|||
[7 15 36 40 41] |
|||
[6 15 40 43 49] |
|||
[-1.95059594 -0.62759469 0.14082834 0.73438555 1.73131507] |
|||
</pre> |
</pre> |
||
Revision as of 22:36, 13 March 2018
Many big data or scientific programs use boxplots to show distributions of data. In addition, sometimes saving large arrays for boxplots can be impractical and use extreme amounts of RAM. It can be useful to save large arrays as arrays with five numbers to save memory.
For example, the R programming language implements Tukey's five-number summary as the fivenum function.
- Task
Given an array of numbers, compute the five-number summary.
- Note
While these five numbers can be used to draw a boxplot, statistical packages will typically need extra data. Moreover, while there is a consensus about the "box" of the boxplot, there are variations among statistical packages for the whiskers.
C
<lang c>#include <stdio.h>
- include <stdlib.h>
double median(double *x, int start, int end_inclusive) {
int size = end_inclusive - start + 1; if (size <= 0) { printf("Array slice cannot be empty\n"); exit(1); } int m = start + size / 2; if (size % 2) return x[m]; return (x[m - 1] + x[m]) / 2.0;
}
int compare (const void *a, const void *b) {
double aa = *(double*)a; double bb = *(double*)b; if (aa > bb) return 1; if (aa < bb) return -1; return 0;
}
int fivenum(double *x, double *result, int x_len) {
int i, m, lower_end; for (i = 0; i < x_len; i++) { if (x[i] != x[i]) { printf("Unable to deal with arrays containing NaN\n\n"); return 1; } } qsort(x, x_len, sizeof(double), compare); result[0] = x[0]; result[2] = median(x, 0, x_len - 1); result[4] = x[x_len - 1]; m = x_len / 2; lower_end = (x_len % 2) ? m : m - 1; result[1] = median(x, 0, lower_end); result[3] = median(x, m, x_len - 1); return 0;
}
int show(double *result, int places) {
int i; char f[7]; sprintf(f, "%%.%dlf", places); printf("["); for (i = 0; i < 5; i++) { printf(f, result[i]); if (i < 4) printf(", "); } printf("]\n\n");
}
int main() {
double result[5];
double x1[11] = {15.0, 6.0, 42.0, 41.0, 7.0, 36.0, 49.0, 40.0, 39.0, 47.0, 43.0}; if (!fivenum(x1, result, 11)) show(result, 1);
double x2[6] = {36.0, 40.0, 7.0, 39.0, 41.0, 15.0}; if (!fivenum(x2, result, 6)) show(result, 1);
double x3[20] = { 0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578 }; if (!fivenum(x3, result, 20)) show(result, 9);
return 0;
}</lang>
- Output:
[6.0, 25.5, 40.0, 42.5, 49.0] [7.0, 15.0, 37.5, 40.0, 41.0] [-1.950595940, -0.676741205, 0.233247060, 0.746070945, 1.731315070]
Go
<lang go>package main
import (
"fmt" "math" "sort"
)
func fivenum(a []float64) (n5 [5]float64) {
sort.Float64s(a) n := float64(len(a)) n4 := float64((len(a)+3)/2) / 2 d := []float64{1, n4, (n + 1) / 2, n + 1 - n4, n} for e, de := range d { floor := int(de - 1) ceil := int(math.Ceil(de - 1)) n5[e] = .5 * (a[floor] + a[ceil]) } return
}
var (
x1 = []float64{36, 40, 7, 39, 41, 15} x2 = []float64{15, 6, 42, 41, 7, 36, 49, 40, 39, 47, 43} x3 = []float64{ 0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578, }
)
func main() {
fmt.Println(fivenum(x1)) fmt.Println(fivenum(x2)) fmt.Println(fivenum(x3))
}</lang>
- Output:
[7 15 37.5 40 41] [6 25.5 40 42.5 49] [-1.95059594 -0.676741205 0.23324706 0.746070945 1.73131507]
Alternate:
This solution is aimed at handling larger data sets more efficiently. It replaces the O(n log n) sort with O(n) quickselect. It also does not attempt to reproduce the R result exactly, to average values to get a median of an even number of data values, or otherwise estimate quantiles. The quickselect here leaves the input partitioned around the selected value, which allows another small optimization: The first quickselect call partitions the full input around the median. The second call, to get the first quartile, thus only has to process the partition up to the median. The third call, to get the minimum, only has to process the partition up to the first quartile. The 3rd quartile and maximum are obtained similarly. <lang go>package main
import (
"fmt" "math/rand"
)
func fivenum(a []float64) (n [5]float64) {
last := len(a) - 1 m := last / 2 n[2] = qsel(a, m) q1 := len(a) / 4 n[1] = qsel(a[:m], q1) n[0] = qsel(a[:q1], 0) a = a[m:] q3 := last - m - q1 n[3] = qsel(a, q3) a = a[q3:] n[4] = qsel(a, len(a)-1) return
}
func qsel(a []float64, k int) float64 {
for len(a) > 1 { px := rand.Intn(len(a)) pv := a[px] last := len(a) - 1 a[px], a[last] = a[last], pv px = 0 for i, v := range a[:last] { if v < pv { a[px], a[i] = v, a[px] px++ } } a[px], a[last] = pv, a[px] if px == k { return pv } if k < px { a = a[:px] } else { a = a[px+1:] k -= px + 1 } } return a[0]
}
var (
x1 = []float64{36, 40, 7, 39, 41, 15} x2 = []float64{15, 6, 42, 41, 7, 36, 49, 40, 39, 47, 43} x3 = []float64{ 0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578, }
)
func main() {
fmt.Println(fivenum(x1)) fmt.Println(fivenum(x2)) fmt.Println(fivenum(x3))
}</lang>
- Output:
[7 15 36 40 41] [6 15 40 43 49] [-1.95059594 -0.62759469 0.14082834 0.73438555 1.73131507]
Java
<lang java>import java.util.Arrays;
public class Fivenum {
static double median(double[] x, int start, int endInclusive) { int size = endInclusive - start + 1; if (size <= 0) throw new IllegalArgumentException("Array slice cannot be empty"); int m = start + size / 2; return (size % 2 == 1) ? x[m] : (x[m - 1] + x[m]) / 2.0; }
static double[] fivenum(double[] x) { for (Double d : x) { if (d.isNaN()) throw new IllegalArgumentException("Unable to deal with arrays containing NaN"); } double[] result = new double[5]; Arrays.sort(x); result[0] = x[0]; result[2] = median(x, 0, x.length - 1); result[4] = x[x.length - 1]; int m = x.length / 2; int lowerEnd = (x.length % 2 == 1) ? m : m - 1; result[1] = median(x, 0, lowerEnd); result[3] = median(x, m, x.length - 1); return result; }
public static void main(String[] args) { double xl[][] = { {15.0, 6.0, 42.0, 41.0, 7.0, 36.0, 49.0, 40.0, 39.0, 47.0, 43.0}, {36.0, 40.0, 7.0, 39.0, 41.0, 15.0}, { 0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578 } }; for (double[] x : xl) System.out.printf("%s\n\n", Arrays.toString(fivenum(x))); }
}</lang>
- Output:
[6.0, 25.5, 40.0, 42.5, 49.0] [7.0, 15.0, 37.5, 40.0, 41.0] [-1.95059594, -0.676741205, 0.23324706, 0.746070945, 1.73131507]
Julia
<lang julia>function mediansorted(x::AbstractVector{T}, i::Integer, l::Integer)::T where T
len = l - i + 1 len > zero(len) || throw(ArgumentError("Array slice cannot be empty.")) mid = i + len ÷ 2 return isodd(len) ? x[mid] : (x[mid-1] + x[mid]) / 2
end
function fivenum(x::AbstractVector{T}) where T<:AbstractFloat
r = Vector{T}(5) xs = sort(x) mid::Int = length(xs) ÷ 2 lowerend::Int = isodd(length(xs)) ? mid : mid - 1 r[1] = xs[1] r[2] = mediansorted(xs, 1, lowerend) r[3] = mediansorted(xs, 1, endof(xs)) r[4] = mediansorted(xs, mid, endof(xs)) r[end] = xs[end] return r
end
for v in ([15.0, 6.0, 42.0, 41.0, 7.0, 36.0, 49.0, 40.0, 39.0, 47.0, 43.0],
[36.0, 40.0, 7.0, 39.0, 41.0, 15.0], [0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578]) println("# ", v, "\n -> ", fivenum(v))
end</lang>
- Output:
# [15.0, 6.0, 42.0, 41.0, 7.0, 36.0, 49.0, 40.0, 39.0, 47.0, 43.0] -> [6.0, 15.0, 40.0, 42.0, 49.0] # [36.0, 40.0, 7.0, 39.0, 41.0, 15.0] -> [7.0, 11.0, 37.5, 39.5, 41.0] # [0.140828, 0.0974879, 1.73132, 0.87636, -1.9506, 0.734386, -0.0303573, 1.46676, -0.746213, -0.725888, 0.639052, 0.615015, -0.989838, -1.00448, -0.627595,0.662062, 1.04312, -0.103054, 0.757756, 0.325666] -> [-1.9506, -0.725888, 0.233247, 0.734386, 1.73132]
Kotlin
The following uses Tukey's method for calculating the lower and upper quartiles (or 'hinges') which is what the R function, fivenum, appears to use.
As arrays containing NaNs and nulls cannot really be dealt with in a sensible fashion in Kotlin, they've been excluded altogether. <lang scala>// version 1.2.21
fun median(x: DoubleArray, start: Int, endInclusive: Int): Double {
val size = endInclusive - start + 1 require (size > 0) { "Array slice cannot be empty" } val m = start + size / 2 return if (size % 2 == 1) x[m] else (x[m - 1] + x[m]) / 2.0
}
fun fivenum(x: DoubleArray): DoubleArray {
require(x.none { it.isNaN() }) { "Unable to deal with arrays containing NaN" } val result = DoubleArray(5) x.sort() result[0] = x[0] result[2] = median(x, 0, x.size - 1) result[4] = x[x.lastIndex] val m = x.size / 2 var lowerEnd = if (x.size % 2 == 1) m else m - 1 result[1] = median(x, 0, lowerEnd) result[3] = median(x, m, x.size - 1) return result
}
fun main(args: Array<String>) {
var xl = listOf( doubleArrayOf(15.0, 6.0, 42.0, 41.0, 7.0, 36.0, 49.0, 40.0, 39.0, 47.0, 43.0), doubleArrayOf(36.0, 40.0, 7.0, 39.0, 41.0, 15.0), doubleArrayOf( 0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578 ) ) xl.forEach { println("${fivenum(it).asList()}\n") }
}</lang>
- Output:
[6.0, 25.5, 40.0, 42.5, 49.0] [7.0, 15.0, 37.5, 40.0, 41.0] [-1.95059594, -0.676741205, 0.23324706, 0.746070945, 1.73131507]
Perl
<lang Perl>#!/usr/bin/env perl
use strict; use warnings; use Cwd 'getcwd'; use feature 'say'; my $TOP_DIRECTORY = getcwd();
sub log_error_and_die {
my $error = shift;
my $fail_filename = "$TOP_DIRECTORY/$0.FAIL"; open my $fh, '>', $fail_filename or die "Can't write $fail_filename: $!"; print $fh $error;
die $error;
}
local $SIG{__WARN__} = sub {
my $message = shift; log_error_and_die( sprintf( '%s @ %s', $message, getcwd() ) );
};
use POSIX qw(ceil floor);
sub fivenum {
my $array = shift; my $n = scalar @{ $array }; if ($n == 0) { print "no values were entered into fivenum.\n"; die; } my @x = sort {$a <=> $b} @{ $array }; my $n4 = floor(($n+3)/2)/2; my @d = (1, $n4, ($n +1)/2, $n+1-$n4, $n);#d <- c(1, n4, (n + 1)/2, n + 1 - n4, n) my @sum_array; foreach my $e (0..4) { my $floor = floor($d[$e]-1); my $ceil = ceil($d[$e]-1); push @sum_array, (0.5 * ($x[$floor] + $x[$ceil])); } return @sum_array;
}
my @x = qw(0.14082834 0.09748790 1.73131507 0.87636009 -1.95059594 0.73438555
-0.03035726 1.46675970 -0.74621349 -0.72588772 0.63905160 0.61501527
-0.98983780 -1.00447874 -0.62759469 0.66206163 1.04312009 -0.10305385 0.75775634 0.32566578);
my @y = fivenum(\@x);
say join (',', @y); </lang>
- Output:
-1.95059594,-0.676741205,0.23324706,0.746070945,1.73131507
Perl 6
<lang perl6>sub fourths ( Int $end ) {
my $end_22 = $end div 2 / 2;
return 0, $end_22, $end/2, $end - $end_22, $end;
} sub fivenum ( @nums ) {
my @x = @nums.sort(+*) or die 'Input must have at least one element';
my @d = fourths(@x.end);
return ( @x[@d».floor] Z+ @x[@d».ceiling] ) »/» 2;
}
say .&fivenum for [15, 6, 42, 41, 7, 36, 49, 40, 39, 47, 43],
[36, 40, 7, 39, 41, 15], [ 0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578,
]; </lang>
- Output:
(6 25.5 40 42.5 49) (7 15 37.5 40 41) (-1.95059594 -0.676741205 0.23324706 0.746070945 1.73131507)
Python
Work with: Python 2 <lang python>from __future__ import division import math import sys
def fivenum(array):
n = len(array) if n == 0: print "you entered an empty array." sys.exit() x = sorted(array) n4 = math.floor((n+3.0)/2.0)/2.0 d = [1, n4, (n+1)/2, n+1-n4, n] sum_array = [] for e in range(5): floor = int(math.floor(d[e] - 1)) ceil = int(math.ceil(d[e] - 1)) sum_array.append(0.5 * (x[floor] + x[ceil])) return sum_array
x = [0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578]
y = fivenum(x) print y</lang>
- Output:
[-1.95059594, -0.676741205, 0.23324706, 0.746070945, 1.73131507]
R
The fivenum function is built-in, see R manual.
<lang R>x <- c(0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555,-0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578) > fivenum(x) [1] -1.9505959 -0.6767412 0.2332471 0.7460709 1.7313151</lang>
REXX
Programming note: this REXX program uses a unity─based array. <lang rexx>/*REXX program computes the five─number summary (LO─value, p25, medium, p75, HI─value).*/ parse arg x if x= then x= 15 6 42 41 7 36 49 40 39 47 43 /*Not specified? Then use the defaults*/ say 'input numbers: ' space(x) /*display the original list of numbers.*/ call 5num /*invoke the five-number function. */ say ' five-numbers: ' result /*display " " " results. */ exit /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ bSort: procedure expose @.; parse arg n; m=n-1 /*N: is the number of @ array elements.*/
do m=m for m by -1 until ok; ok=1 /*keep sorting the @ array 'til done.*/ do j=1 for m; k=j + 1; if @.j<=@.k then iterate /*In order? Good.*/ _=@.j @.k 0; parse var _ @.k @.j ok /*swap two elements; flag as not done.*/ end /*j*/ end /*m*/; return
/*──────────────────────────────────────────────────────────────────────────────────────*/ med: arg s,e; $=e-s+1; m=s+$%2; if $//2 then return @.m; _=m-1; return (@._+@.m)/2 /*──────────────────────────────────────────────────────────────────────────────────────*/ 5num: #=words(x); if #==0 then return '***error*** array is empty.'
parse var x . 1 LO . 1 HI . /*assume values for LO and HI (for now)*/ q2=# % 2 do j=1 for #; @.j=word(x,j); ?=datatype(@.j, 'N') if \? then return '***error*** element' j "isn't numeric: " @.j LO=min(LO, @.j); HI=max(HI, @.j) end /*j*/ /* [↑] traipse thru array, find min,max*/ call bSort # /*use a bubble sort (easiest to code). */ if #//2 then p25=q2; else p25=q2 - 1 /*calculate the second quartile number. */ return LO med(1,p25) med(1,#) med(q2,#) HI /*return list of the five numbers*/</lang>
- output when using the default input of: 15 6 42 41 7 36 49 40 39 47 43
input numbers: 15 6 42 41 7 36 49 40 39 47 43 five-numbers: 6 15 40 42 49
- output when using the (internal) default inputs of: 36 40 7 39 41 15
input numbers: 36 40 7 39 41 15 five-numbers: 7 11 37.5 39.5 41
SAS
<lang sas>/* build a dataset */ data test; do i=1 to 10000; x=rannor(12345); output; end; keep x; run;
/* compute the five numbers */ proc means data=test min p25 median p75 max; var x; run;</lang>
Output
Analysis Variable : x | ||||
Minimum | 25th Pctl | Median | 75th Pctl | Maximum |
-4.0692299 | -0.6533022 | 0.0066299 | 0.6768043 | 4.1328026 |
Stata
First build a dataset:
<lang stata>clear set seed 17760704 qui set obs 10000 gen x=rnormal()</lang>
The summarize command produces all the required statistics, and more:
<lang stata>qui sum x, detail di r(min),r(p25),r(p50),r(p75),r(max)</lang>
Output
-3.6345866 -.66536 .0026834 .68398139 3.7997103
It's also possible to use the tabstat command
<lang stata>tabstat x, s(mi q ma)</lang>
Output
variable | min p25 p50 p75 max -------------+-------------------------------------------------- x | -3.634587 -.66536 .0026834 .6839814 3.79971 ----------------------------------------------------------------
Another example:
<lang stata>clear mat a=0.14082834\0.09748790\1.73131507\0.87636009\-1.95059594\ /// 0.73438555\-0.03035726\1.46675970\-0.74621349\-0.72588772\ /// 0.63905160\0.61501527\-0.98983780\-1.00447874\-0.62759469\ /// 0.66206163\1.04312009\-0.10305385\0.75775634\0.32566578 svmat a tabstat a1, s(mi q ma)</lang>
Output
variable | min p25 p50 p75 max -------------+-------------------------------------------------- a1 | -1.950596 -.6767412 .2332471 .746071 1.731315 ----------------------------------------------------------------
zkl
Uses GNU GSL library. <lang zkl>var [const] GSL=Import("zklGSL"); // libGSL (GNU Scientific Library) fcn fiveNum(v){ // V is a GSL Vector, --> min, 1st qu, median, 3rd qu, max
v.sort(); return(v.min(),v.quantile(0.25),v.median(),v.quantile(0.75),v.max())
}</lang> <lang zkl>fiveNum(GSL.VectorFromData(
15.0, 6.0, 42.0, 41.0, 7.0, 36.0, 49.0, 40.0, 39.0, 47.0, 43.0)).println();
println(fiveNum(GSL.VectorFromData(36.0, 40.0, 7.0, 39.0, 41.0, 15.0)));
v:=GSL.VectorFromData(
0.14082834, 0.09748790, 1.73131507, 0.87636009, -1.95059594, 0.73438555, -0.03035726, 1.46675970, -0.74621349, -0.72588772, 0.63905160, 0.61501527, -0.98983780, -1.00447874, -0.62759469, 0.66206163, 1.04312009, -0.10305385, 0.75775634, 0.32566578);
println(fiveNum(v));</lang>
- Output:
L(6,25.5,40,42.5,49) L(7,20.25,37.5,39.75,41) L(-1.9506,-0.652168,0.233247,0.740228,1.73132)