Multiple regression
You are encouraged to solve this task according to the task description, using any language you may know.
- Task
Given a set of data vectors in the following format:
Compute the vector using ordinary least squares regression using the following equation:
You can assume y is given to you as a vector (a one-dimensional array), and X is given to you as a two-dimensional array (i.e. matrix).
Ada
Extension of Reduced row echelon form#Ada:
matrices.ads:
generic
type Element_Type is private;
Zero : Element_Type;
One : Element_Type;
with function "+" (Left, Right : Element_Type) return Element_Type is <>;
with function "-" (Left, Right : Element_Type) return Element_Type is <>;
with function "*" (Left, Right : Element_Type) return Element_Type is <>;
with function "/" (Left, Right : Element_Type) return Element_Type is <>;
package Matrices is
type Vector is array (Positive range <>) of Element_Type;
type Matrix is
array (Positive range <>, Positive range <>) of Element_Type;
function "*" (Left, Right : Matrix) return Matrix;
function Invert (Source : Matrix) return Matrix;
function Reduced_Row_Echelon_Form (Source : Matrix) return Matrix;
function Regression_Coefficients
(Source : Vector;
Regressors : Matrix)
return Vector;
function To_Column_Vector
(Source : Matrix;
Row : Positive := 1)
return Vector;
function To_Matrix
(Source : Vector;
Column_Vector : Boolean := True)
return Matrix;
function To_Row_Vector
(Source : Matrix;
Column : Positive := 1)
return Vector;
function Transpose (Source : Matrix) return Matrix;
Size_Mismatch : exception;
Not_Square_Matrix : exception;
Not_Invertible : exception;
end Matrices;
matrices.adb:
package body Matrices is
function "*" (Left, Right : Matrix) return Matrix is
Result : Matrix (Left'Range (1), Right'Range (2)) :=
(others => (others => Zero));
begin
if Left'Length (2) /= Right'Length (1) then
raise Size_Mismatch;
end if;
for I in Result'Range (1) loop
for K in Result'Range (2) loop
for J in Left'Range (2) loop
Result (I, K) := Result (I, K) + Left (I, J) * Right (J, K);
end loop;
end loop;
end loop;
return Result;
end "*";
function Invert (Source : Matrix) return Matrix is
Expanded : Matrix (Source'Range (1),
Source'First (2) .. Source'Last (2) * 2);
Result : Matrix (Source'Range (1), Source'Range (2));
begin
-- Matrix has to be square.
if Source'Length (1) /= Source'Length (2) then
raise Not_Square_Matrix;
end if;
-- Copy Source into Expanded matrix and attach identity matrix to right
for Row in Source'Range (1) loop
for Col in Source'Range (2) loop
Expanded (Row, Col) := Source (Row, Col);
Expanded (Row, Source'Last (2) + Col) := Zero;
end loop;
Expanded (Row, Source'Last (2) + Row) := One;
end loop;
Expanded := Reduced_Row_Echelon_Form (Source => Expanded);
-- Copy right side to Result (= inverted Source)
for Row in Result'Range (1) loop
for Col in Result'Range (2) loop
Result (Row, Col) := Expanded (Row, Source'Last (2) + Col);
end loop;
end loop;
return Result;
end Invert;
function Reduced_Row_Echelon_Form (Source : Matrix) return Matrix is
procedure Divide_Row
(From : in out Matrix;
Row : Positive;
Divisor : Element_Type)
is
begin
for Col in From'Range (2) loop
From (Row, Col) := From (Row, Col) / Divisor;
end loop;
end Divide_Row;
procedure Subtract_Rows
(From : in out Matrix;
Subtrahend, Minuend : Positive;
Factor : Element_Type)
is
begin
for Col in From'Range (2) loop
From (Minuend, Col) := From (Minuend, Col) -
From (Subtrahend, Col) * Factor;
end loop;
end Subtract_Rows;
procedure Swap_Rows (From : in out Matrix; First, Second : Positive) is
Temporary : Element_Type;
begin
for Col in From'Range (2) loop
Temporary := From (First, Col);
From (First, Col) := From (Second, Col);
From (Second, Col) := Temporary;
end loop;
end Swap_Rows;
Result : Matrix := Source;
Lead : Positive := Result'First (2);
I : Positive;
begin
Rows : for Row in Result'Range (1) loop
exit Rows when Lead > Result'Last (2);
I := Row;
while Result (I, Lead) = Zero loop
I := I + 1;
if I = Result'Last (1) then
I := Row;
Lead := Lead + 1;
exit Rows when Lead = Result'Last (2);
end if;
end loop;
if I /= Row then
Swap_Rows (From => Result, First => I, Second => Row);
end if;
Divide_Row
(From => Result,
Row => Row,
Divisor => Result (Row, Lead));
for Other_Row in Result'Range (1) loop
if Other_Row /= Row then
Subtract_Rows
(From => Result,
Subtrahend => Row,
Minuend => Other_Row,
Factor => Result (Other_Row, Lead));
end if;
end loop;
Lead := Lead + 1;
end loop Rows;
return Result;
end Reduced_Row_Echelon_Form;
function Regression_Coefficients
(Source : Vector;
Regressors : Matrix)
return Vector
is
Result : Matrix (Regressors'Range (2), 1 .. 1);
begin
if Source'Length /= Regressors'Length (1) then
raise Size_Mismatch;
end if;
declare
Regressors_T : constant Matrix := Transpose (Regressors);
begin
Result := Invert (Regressors_T * Regressors) *
Regressors_T *
To_Matrix (Source);
end;
return To_Row_Vector (Source => Result);
end Regression_Coefficients;
function To_Column_Vector
(Source : Matrix;
Row : Positive := 1)
return Vector
is
Result : Vector (Source'Range (2));
begin
for Column in Result'Range loop
Result (Column) := Source (Row, Column);
end loop;
return Result;
end To_Column_Vector;
function To_Matrix
(Source : Vector;
Column_Vector : Boolean := True)
return Matrix
is
Result : Matrix (1 .. 1, Source'Range);
begin
for Column in Source'Range loop
Result (1, Column) := Source (Column);
end loop;
if Column_Vector then
return Transpose (Result);
else
return Result;
end if;
end To_Matrix;
function To_Row_Vector
(Source : Matrix;
Column : Positive := 1)
return Vector
is
Result : Vector (Source'Range (1));
begin
for Row in Result'Range loop
Result (Row) := Source (Row, Column);
end loop;
return Result;
end To_Row_Vector;
function Transpose (Source : Matrix) return Matrix is
Result : Matrix (Source'Range (2), Source'Range (1));
begin
for Row in Result'Range (1) loop
for Column in Result'Range (2) loop
Result (Row, Column) := Source (Column, Row);
end loop;
end loop;
return Result;
end Transpose;
end Matrices;
Example multiple_regression.adb:
with Ada.Text_IO;
with Matrices;
procedure Multiple_Regression is
package Float_Matrices is new Matrices (
Element_Type => Float,
Zero => 0.0,
One => 1.0);
subtype Vector is Float_Matrices.Vector;
subtype Matrix is Float_Matrices.Matrix;
use type Matrix;
procedure Output_Matrix (X : Matrix) is
begin
for Row in X'Range (1) loop
for Col in X'Range (2) loop
Ada.Text_IO.Put (Float'Image (X (Row, Col)) & ' ');
end loop;
Ada.Text_IO.New_Line;
end loop;
end Output_Matrix;
-- example from Ruby solution
V : constant Vector := (1.0, 2.0, 3.0, 4.0, 5.0);
M : constant Matrix :=
((1 => 2.0),
(1 => 1.0),
(1 => 3.0),
(1 => 4.0),
(1 => 5.0));
C : constant Vector :=
Float_Matrices.Regression_Coefficients (Source => V, Regressors => M);
-- Wikipedia example
Weight : constant Vector (1 .. 15) :=
(52.21, 53.12, 54.48, 55.84, 57.20,
58.57, 59.93, 61.29, 63.11, 64.47,
66.28, 68.10, 69.92, 72.19, 74.46);
Height : Vector (1 .. 15) :=
(1.47, 1.50, 1.52, 1.55, 1.57,
1.60, 1.63, 1.65, 1.68, 1.70,
1.73, 1.75, 1.78, 1.80, 1.83);
Height_Matrix : Matrix (1 .. 15, 1 .. 3);
begin
Ada.Text_IO.Put_Line ("Example from Ruby solution:");
Ada.Text_IO.Put_Line ("V:");
Output_Matrix (Float_Matrices.To_Matrix (V));
Ada.Text_IO.Put_Line ("M:");
Output_Matrix (M);
Ada.Text_IO.Put_Line ("C:");
Output_Matrix (Float_Matrices.To_Matrix (C));
Ada.Text_IO.New_Line;
Ada.Text_IO.Put_Line ("Example from Wikipedia:");
for I in Height'Range loop
Height_Matrix (I, 1) := 1.0;
Height_Matrix (I, 2) := Height (I);
Height_Matrix (I, 3) := Height (I) ** 2;
end loop;
Ada.Text_IO.Put_Line ("Matrix:");
Output_Matrix (Height_Matrix);
declare
Coefficients : constant Vector :=
Float_Matrices.Regression_Coefficients
(Source => Weight,
Regressors => Height_Matrix);
begin
Ada.Text_IO.Put_Line ("Coefficients:");
Output_Matrix (Float_Matrices.To_Matrix (Coefficients));
end;
end Multiple_Regression;
- Output:
Example from Ruby solution: V: 1.00000E+00 2.00000E+00 3.00000E+00 4.00000E+00 5.00000E+00 M: 2.00000E+00 1.00000E+00 3.00000E+00 4.00000E+00 5.00000E+00 C: 9.81818E-01 Example from Wikipedia: Matrix: 1.00000E+00 1.47000E+00 2.16090E+00 1.00000E+00 1.50000E+00 2.25000E+00 1.00000E+00 1.52000E+00 2.31040E+00 1.00000E+00 1.55000E+00 2.40250E+00 1.00000E+00 1.57000E+00 2.46490E+00 1.00000E+00 1.60000E+00 2.56000E+00 1.00000E+00 1.63000E+00 2.65690E+00 1.00000E+00 1.65000E+00 2.72250E+00 1.00000E+00 1.68000E+00 2.82240E+00 1.00000E+00 1.70000E+00 2.89000E+00 1.00000E+00 1.73000E+00 2.99290E+00 1.00000E+00 1.75000E+00 3.06250E+00 1.00000E+00 1.78000E+00 3.16840E+00 1.00000E+00 1.80000E+00 3.24000E+00 1.00000E+00 1.83000E+00 3.34890E+00 Coefficients: 1.35403E+02 -1.51161E+02 6.43514E+01
ALGOL 68
...but using the "to reduced echelon form" routine from Reduced row echelon form#ALGOL_68.
Algol 68 doesn't have classes, though it does have structures.
Other than that, the main differences between this and the VB.NET sample are that Algol 68 has array slicing built in and generally uses a lower bound of 1 rather than 0 for arrays.
Also, although ( ( 1, 2, 3 ), ( 6, 5, 4 ) )
is a 2x3 array, ( ( 1, 2, 3 ) )
is a 3x1 array (because ( x )
is not an array, so the outer brackets are superfluous, leaving ( 1, 2, 3 )
- i.e. a 1-dimensoional array - as the context requires a two-dimensional array, each value is coerced to an array resulting in a 3x1 two-dimensional array).
BEGIN # Multiple Regression - trnslation of the VB.NET sample but using the #
# "to reduced row echelon form" routine from the Reduced row echelon Task #
PROC require = ( BOOL condition, STRING message )VOID:
IF NOT condition THEN
print( ( message, newline ) );
stop
FI # requiree # ;
MODE MATRIX = STRUCT( REF[,]REAL data
, INT row count
, INT col count
);
PRIO NEWMATRIX = 1;
OP NEWMATRIX = ( INT m rows, INT m cols )MATRIX:
BEGIN
MATRIX result;
require( m rows > 0, "Need at least one row" );
row count OF result := m rows;
require( m cols > 0, "Need at least one column" );
col count OF result := m cols;
data OF result := HEAP[ 1 : m rows, 1 : m cols ]REAL;
FOR r TO m rows DO FOR c TO m cols DO ( data OF result )[ r, c ] := 0 OD OD;
result
END # NEWMATRIX # ;
OP NEWMATRIX = ( [,]REAL source )MATRIX:
BEGIN
MATRIX result;
INT m rows = 1 + ( 1 UPB source - 1 LWB source );
require( m rows > 0, "Need at least one row" );
row count OF result := m rows;
INT m cols = 1 + ( 2 UPB source - 2 LWB source );
require( m cols > 0, "Need at least one column" );
col count OF result := m cols;
data OF result := HEAP[ 1 : m rows, 1 : m cols ]REAL := source[ AT 1, AT 1 ];
result
END # NEWMATRIX # ;
OP NEWMATRIX = ( []REAL source )MATRIX: # New Matrix(ConvertArray(source)) #
BEGIN
INT len = 1 + ( UPB source - LWB source );
[ 1 : 1, 1 : len ]REAL dest;
dest[ 1, : ] := source;
NEWMATRIX dest
END # NEWMATRIX # ;
OP * = ( MATRIX m1, m2 )MATRIX:
BEGIN
INT rc1 = row count OF m1;
INT cc1 = col count OF m1;
INT rc2 = row count OF m2;
INT cc2 = col count OF m2;
require( cc1 = rc2, "Cannot multiply if the first columns does not equal the second rows" );
MATRIX result := rc1 NEWMATRIX cc2;
FOR i TO rc1 DO
FOR j TO cc2 DO
FOR k TO rc2 DO
( data OF result ) [ i, j ] +:= ( data OF m1 )[ i, k ]
* ( data OF m2 )[ k, j ]
OD
OD
OD;
result
END # * # ;
PROC transpose = ( MATRIX m )MATRIX:
BEGIN
INT rc = row count OF m;
INT cc = col count OF m;
MATRIX trans := cc NEWMATRIX rc;
FOR i TO cc DO
FOR j TO rc DO
( data OF trans )[ i, j ] := ( data OF m )[ j, i ]
OD
OD;
trans
END # transpose # ;
# BEGIN code from the Reduced row echelon form task #
MODE FIELD = REAL; # FIELD can be REAL, LONG REAL etc, or COMPL, FRAC etc #
MODE VEC = [0]FIELD;
MODE MAT = [0,0]FIELD;
PROC to reduced row echelon form = (REF MAT m)VOID: (
INT lead col := 2 LWB m;
FOR this row FROM LWB m TO UPB m DO
IF lead col > 2 UPB m THEN return FI;
INT other row := this row;
WHILE m[other row,lead col] = 0 DO
other row +:= 1;
IF other row > UPB m THEN
other row := this row;
lead col +:= 1;
IF lead col > 2 UPB m THEN return FI
FI
OD;
IF this row /= other row THEN
VEC swap = m[this row,lead col:];
m[this row,lead col:] := m[other row,lead col:];
m[other row,lead col:] := swap
FI;
FIELD scalef = 1/m[this row,lead col];
IF scalef /= 1 THEN
m[this row,lead col] := 1;
FOR col FROM lead col+1 TO 2 UPB m DO m[this row,col] *:= scalef OD
FI;
FOR other row pos FROM LWB m TO UPB m DO
IF this row /= other row pos THEN
REAL o scale = m[other row pos,lead col];
m[other row pos,lead col]:=0;
FOR col FROM lead col+1 TO 2 UPB m DO m[other row pos,col] -:= o scale*m[this row,col] OD
FI
OD;
lead col +:= 1
OD;
return: EMPTY
);
# END code from the Reduced row echelon form task #
PROC inverse = ( MATRIX m )MATRIX:
BEGIN
require( row count OF m = col count OF m, "Not a square matrix" );
INT len = row count OF m;
MATRIX aug := len NEWMATRIX ( 2 * len );
FOR i TO len DO
FOR j TO len DO
( data OF aug )[ i, j ] := ( data OF m )[ i, j ];
( data OF aug )[ i, j + len ] := 0
OD;
# augment identity matrix to right #
( data OF aug )[ i, i + len ] := 1.0
OD;
to reduced row echelon form( data OF aug );
MATRIX inv := len NEWMATRIX len;
FOR i TO len DO
FOR j FROM len + 1 TO 2 * len DO
( data OF inv)[ i, j - len ] := ( data OF aug )[ i, j ]
OD
OD;
inv
END # inverse # ;
PROC multiple regression = ( []REAL y, MATRIX x )[]REAL:
BEGIN
MATRIX tm := NEWMATRIX y;
MATRIX cy := NEWMATRIX data OF transpose( tm );
MATRIX cx := NEWMATRIX data OF transpose( x );
( data OF transpose( inverse( x * cx ) * x * cy ) )[ 1, : ]
END # multiple regression # ;
OP PRINTARRAY = ( []REAL list )VOID:
BEGIN
print( ( "[" ) );
FOR i FROM LWB list TO UPB list DO
# convert list[ i ] to a string, remove trailing 0s and leading spaces #
STRING v := fixed( list[ i ], -20, 15 )[ AT 1 ];
WHILE v[ UPB v ] = "0" DO v := v[ : UPB v - 1 ] OD;
IF v[ UPB v ] = "." THEN v := v[ : UPB v - 1 ] FI;
WHILE v[ 1 ] = " " DO v := v[ 2 : ] OD;
print( ( IF i > LWB list THEN ", " ELSE "" FI, v ) )
OD;
print( ( "]" ) )
END # PRINTARRAY # ;
BEGIN
[]REAL y = ( 1.0, 2.0, 3.0, 4.0, 5.0 );
MATRIX x := NEWMATRIX []REAL( 2.0, 1.0, 3.0, 4.0, 5.0 );
[]REAL v = multiple regression( y, x );
PRINTARRAY v;
print( ( newline ) )
END;
BEGIN
[]REAL y = ( 3.0, 4.0, 5.0 );
MATRIX x := NEWMATRIX [,]REAL( ( 1.0, 2.0, 1.0 )
, ( 1.0, 1.0, 2.0 )
);
[]REAL v = multiple regression( y, x );
PRINTARRAY v;
print( ( newline ) )
END;
BEGIN
[]REAL y = ( 52.21, 53.12, 54.48, 55.84, 57.2, 58.57, 59.93
, 61.29, 63.11, 64.47, 66.28, 68.1, 69.92, 72.19, 74.46
);
[]REAL a = ( 1.47, 1.5, 1.52, 1.55, 1.57, 1.6, 1.63, 1.65
, 1.68, 1.7, 1.73, 1.75, 1.78, 1.8, 1.83
);
[ 1 : 3, 1 : 1 + ( UPB a - LWB a ) ]REAL xs;
FOR i FROM LWB a TO UPB a DO
xs[ 1, i ] := 1.0;
xs[ 2, i ] := a[ i ];
xs[ 3, i ] := a[ i ] * a[ i ]
OD;
MATRIX x := NEWMATRIX xs;
[]REAL v = multiple regression( y, x );
PRINTARRAY v;
print( ( newline ) )
END
END
- Output:
[0.981818181818182] [1, 2] [128.812803581374, -143.162022866676, 61.9603254433538]
BASIC
BASIC256
arraybase 0
N = 14: M = 2: Q = 3 # number of points and M.R. polynom degree
dim X(N+1) # data points
X[0] = 1.47 : X[1] = 1.50 : X[2] = 1.52
X[3] = 1.55 : X[4] = 1.57 : X[5] = 1.60
X[6] = 1.63 : X[7] = 1.65 : X[8] = 1.68
X[9] = 1.70 : X[10] = 1.73 : X[11] = 1.75
X[12] = 1.78 : X[13] = 1.80 : X[14] = 1.83
dim Y(N+1) # data points
Y[0] = 52.21 : Y[1] = 53.12 : Y[2] = 54.48
Y[3] = 55.84 : Y[4] = 57.20 : Y[5] = 58.57
Y[6] = 59.93 : Y[7] = 61.29 : Y[8] = 63.11
Y[9] = 64.47 : Y[10] = 66.28 : Y[11] = 68.10
Y[12] = 69.92 : Y[13] = 72.19 : Y[14] = 74.46
dim S(N+1): dim T(N+1) # linear system coefficient
dim A(M+1, Q+1) # system to be solved
dim p(M+1, Q+1)
for k = 0 to 2 * M
S[k] = 0: T[k] = 0
for i = 0 to N
S[k] = S[k] + X[i] ^ k
if k <= M then T[k] = T[k] + Y[i] * X[i] ^ k
next i
next k
# build linear system
for fila = 0 to M
for columna = 0 to M
A[fila, columna] = S[fila + columna]
next columna
A[fila, columna] = T[fila]
next fila
print "Linear system coefficents:"
for i = 0 to M
for j = 0 to M + 1
print rjust(A[i, j], 14.1);
next j
print
next i
for j = 0 to M
for i = j to M
if A[i, j] <> 0 then exit for
next i
if i = M + 1 then print: print "SINGULAR MATRIX '" : end
for k = 0 to M + 1
p[j,k] = A[i,k] : A[i,k] = p[j,k] : A[j,k] = A[i,k]
next k
z = 1 / A[j, j]
for k = 0 to M + 1
A[j, k] = z * A[j, k]
next k
for i = 0 to M
if i <> j then
z = -A[i, j]
for k = 0 to M + 1
A[i, k] = A[i, k] + z * A[j, k]
next k
end if
next i
next j
print: print "Solutions:"
for i = 0 to M
print rjust(A[i, M + 1], 16.7);
next i
end
- Output:
Linear system coefficents: 15.0 24.76 41.0532 931.17 24.76 41.0532 68.367934 1548.2453 41.0532 68.367934 114.34828728 2585.543151 Solutions: 128.81280358 -143.162022867 61.9603254432
Chipmunk Basic
100 cls
110 n = 14 : m = 2 : q = 3 ' number of points and M.R. polynom degree
120 dim x(n)' data points
130 data 1.47,1.5,1.52,1.55,1.57,1.6,1.63,1.65,1.68,1.7,1.73,1.75,1.78,1.8,1.83
140 for c = 0 to n
150 read x(c)
160 next c
170 dim y(n)' data points
180 data 52.21,53.12,54.48,55.84,57.2,58.57,59.93,61.29,63.11,64.47,66.28,68.1,69.92,72.19,74.46
190 for c = 0 to n
200 read y(c)
210 next c
220 dim s(n) : dim t(n)' linear system coefficient
230 dim a(m,q)' sistem to be solved
240 dim p(m,q)
250 for k = 0 to 2*m
260 s(k) = 0 : t(k) = 0
270 for i = 0 to n
280 s(k) = s(k)+x(i)^k
290 if k <= m then t(k) = t(k)+y(i)*x(i)^k
300 next i
310 next k
320 ' build linear system
330 for fila = 0 to m
340 for columna = 0 to m
350 a(fila,columna) = s(fila+columna)
360 next columna
370 a(fila,columna) = t(fila)
380 next fila
390 print "Linear system coefficents:"
400 for i = 0 to m
410 for j = 0 to m+1
420 print using "######.#";a(i,j);
430 next j
440 print
450 next i
460 for j = 0 to m
470 for i = j to m
480 if a(i,j) <> 0 then goto 500
490 next i
500 if i = m+1 then
510 print : print "SINGULAR MATRIX '"
520 end
530 endif
540 for k = 0 to m+1
560 p(j,k) = a(i,k)
570 a(i,k) = p(j,k)
580 a(j,k) = a(i,k)
590 next k
600 z = 1/a(j,j)
610 for k = 0 to m+1
620 a(j,k) = z*a(j,k)
630 next k
640 for i = 0 to m
650 if i <> j then
660 z = -a(i,j)
670 for k = 0 to m+1
680 a(i,k) = a(i,k)+z*a(j,k)
690 next k
700 endif
710 next i
720 next j
730 print : print "Solutions:"
740 for i = 0 to m
750 print using " #####.#######";a(i,m+1);
760 next i
770 print
780 end
- Output:
Same as FreeBASIC entry.
QBasic
The QB64 solution works without any changes.
QB64
Const N = 14, M = 2, Q = 3 ' number of points and M.R. polynom degree
Dim X(N) As Double ' data points
Data 1.47,1.50,1.52,1.55,1.57,1.60,1.63,1.65,1.68,1.70,1.73,1.75,1.78,1.80,1.83
For c = LBound(X) To UBound(X)
Read X(c)
Next c
Dim As Double Y(N) ' data points
Data 52.21,53.12,54.48,55.84,57.20,58.57,59.93,61.29,63.11,64.47,66.28,68.10,69.92,72.19,74.46
For c = LBound(Y) To UBound(Y)
Read Y(c)
Next c
Dim As Double S(N), T(N) ' linear system coefficient
Dim As Double A(M, Q) ' system to be solved
Dim As Integer i, k, j, fila, columna
Dim As Double z
For k = 0 To 2 * M
S(k) = 0: T(k) = 0
For i = 0 To N
S(k) = S(k) + X(i) ^ k
If k <= M Then T(k) = T(k) + Y(i) * X(i) ^ k
Next i
Next k
' build linear system
For fila = 0 To M
For columna = 0 To M
A(fila, columna) = S(fila + columna)
Next columna
A(fila, columna) = T(fila)
Next fila
Print "Linear system coefficents:"
For i = 0 To M
For j = 0 To M + 1
Print Using "######.#"; A(i, j);
Next j
Print
Next i
For j = 0 To M
For i = j To M
If A(i, j) <> 0 Then Exit For
Next i
If i = M + 1 Then
Print: Print "SINGULAR MATRIX '"
End
End If
For k = 0 To M + 1
Swap A(j, k), A(i, k)
Next k
z = 1 / A(j, j)
For k = 0 To M + 1
A(j, k) = z * A(j, k)
Next k
For i = 0 To M
If i <> j Then
z = -A(i, j)
For k = 0 To M + 1
A(i, k) = A(i, k) + z * A(j, k)
Next k
End If
Next i
Next j
Print: Print "Solutions:"
For i = 0 To M
Print Using " #####.#######"; A(i, M + 1);
Next i
End
- Output:
Same as FreeBASIC entry.
True BASIC
OPTION BASE 0
LET n = 14 ! number of points and M.R. polynom degree
LET m = 2
LET q = 3
DIM x(0) ! data points
MAT REDIM x(n)
DATA 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83
FOR c = LBOUND(x) TO UBOUND(x)
READ x(c)
NEXT c
DIM y(0) ! data points
MAT REDIM y(n)
DATA 52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46
FOR c = LBOUND(y) TO UBOUND(y)
READ y(c)
NEXT c
DIM s(0) ! linear system coefficient
MAT REDIM s(n)
DIM t(0)
MAT REDIM t(n)
DIM a(0,0) ! system to be solved
MAT REDIM a(m, q)
DIM p(0,0)
MAT REDIM p(m, q)
FOR k = 0 TO 2*m
LET s(k) = 0
LET t(k) = 0
FOR i = 0 TO n
LET s(k) = s(k)+x(i)^k
IF k <= m THEN LET t(k) = t(k)+y(i)*x(i)^k
NEXT i
NEXT k
! build linear system
FOR fila = 0 TO m
FOR columna = 0 TO m
LET a(fila, columna) = s(fila+columna)
NEXT columna
LET a(fila, columna) = t(fila)
NEXT fila
PRINT "Linear system coefficents:"
FOR i = 0 TO m
FOR j = 0 TO m+1
PRINT USING "######.#": a(i, j);
NEXT j
PRINT
NEXT i
FOR j = 0 TO m
FOR i = j TO m
IF a(i, j) <> 0 THEN EXIT FOR
NEXT i
IF i = m+1 THEN
PRINT
PRINT "SINGULAR MATRIX '"
STOP
END IF
FOR k = 0 TO m+1
LET p(j, k) = a(i, k)
LET a(i, k) = p(j, k)
LET a(j, k) = a(i, k)
NEXT k
LET z = 1/a(j, j)
FOR k = 0 TO m+1
LET a(j, k) = z*a(j, k)
NEXT k
FOR i = 0 TO m
IF i <> j THEN
LET z = -a(i, j)
FOR k = 0 TO m+1
LET a(i, k) = a(i, k)+z*a(j, k)
NEXT k
END IF
NEXT i
NEXT j
PRINT
PRINT "Solutions:"
FOR i = 0 TO m
PRINT USING " #####.#######": a(i, m+1);
NEXT i
END
Yabasic
// number of points and M.R. polynom degree
N = 14
M = 2
Q = 3
dim X(N) // data points
X(0) = 1.47 : X(1) = 1.50 : X(2) = 1.52
X(3) = 1.55 : X(4) = 1.57 : X(5) = 1.60
X(6) = 1.63 : X(7) = 1.65 : X(8) = 1.68
X(9) = 1.70 : X(10) = 1.73 : X(11) = 1.75
X(12) = 1.78 : X(13) = 1.80 : X(14) = 1.83
dim Y(N) // data points
Y(0) = 52.21 : Y(1) = 53.12 : Y(2) = 54.48
Y(3) = 55.84 : Y(4) = 57.20 : Y(5) = 58.57
Y(6) = 59.93 : Y(7) = 61.29 : Y(8) = 63.11
Y(9) = 64.47 : Y(10) = 66.28 : Y(11) = 68.10
Y(12) = 69.92 : Y(13) = 72.19 : Y(14) = 74.46
dim S(N), T(N) // linear system coefficient
dim A(M, Q) // sistem to be solved
dim p(M, Q)
for k = 0 to 2*M
S(k) = 0 : T(k) = 0
for i = 0 to N
S(k) = S(k) + X(i) ^ k
if k <= M T(k) = T(k) + Y(i) * X(i) ^ k
next i
next k
// build linear system
for fila = 0 to M
for columna = 0 to M
A(fila, columna) = S(fila+columna)
next columna
A(fila, columna) = T(fila)
next fila
print "Linear system coefficents:"
for i = 0 to M
for j = 0 to M+1
print A(i,j) using "#####.#";
next j
print
next i
for j = 0 to M
for i = j to M
if A(i,j) <> 0 break
next i
if i = M+1 then
print "\nSINGULAR MATRIX '"
end
end if
for k = 0 to M+1
p(j,k) = A(i,k) : A(i,k) = p(j,k) : A(j,k) = A(i,k)
next k
z = 1 / A(j,j)
for k = 0 to M+1
A(j,k) = z * A(j,k)
next k
for i = 0 to M
if i <> j then
z = -A(i,j)
for k = 0 to M+1
A(i,k) = A(i,k) + z * A(j,k)
next k
end if
next i
next j
print "\nSolutions:"
for i = 0 to M
print A(i,M+1) using "#######.#######";
next i
print
end
BBC BASIC
*FLOAT 64
INSTALL @lib$+"ARRAYLIB"
DIM y(14), x(2,14), c(2)
y() = 52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, \
\ 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46
x() = 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, \
\ 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83
FOR row% = DIM(x(),1) TO 0 STEP -1
FOR col% = 0 TO DIM(x(),2)
x(row%,col%) = x(0,col%) ^ row%
NEXT
NEXT row%
PROCmultipleregression(y(), x(), c())
FOR i% = 0 TO DIM(c(),1) : PRINT c(i%) " "; : NEXT
PRINT
END
DEF PROCmultipleregression(y(), x(), c())
LOCAL m(), t()
DIM m(DIM(x(),1), DIM(x(),1)), t(DIM(x(),2),DIM(x(),1))
PROC_transpose(x(), t())
m() = x().t()
PROC_invert(m())
t() = t().m()
c() = y().t()
ENDPROC
- Output:
128.812804 -143.162023 61.9603254
C
Using GNU gsl and c99, with the WP data
#include <stdio.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_multifit.h>
double w[] = { 52.21, 53.12, 54.48, 55.84, 57.20,
58.57, 59.93, 61.29, 63.11, 64.47,
66.28, 68.10, 69.92, 72.19, 74.46 };
double h[] = { 1.47, 1.50, 1.52, 1.55, 1.57,
1.60, 1.63, 1.65, 1.68, 1.70,
1.73, 1.75, 1.78, 1.80, 1.83 };
int main()
{
int n = sizeof(h)/sizeof(double);
gsl_matrix *X = gsl_matrix_calloc(n, 3);
gsl_vector *Y = gsl_vector_alloc(n);
gsl_vector *beta = gsl_vector_alloc(3);
for (int i = 0; i < n; i++) {
gsl_vector_set(Y, i, w[i]);
gsl_matrix_set(X, i, 0, 1);
gsl_matrix_set(X, i, 1, h[i]);
gsl_matrix_set(X, i, 2, h[i] * h[i]);
}
double chisq;
gsl_matrix *cov = gsl_matrix_alloc(3, 3);
gsl_multifit_linear_workspace * wspc = gsl_multifit_linear_alloc(n, 3);
gsl_multifit_linear(X, Y, beta, cov, &chisq, wspc);
printf("Beta:");
for (int i = 0; i < 3; i++)
printf(" %g", gsl_vector_get(beta, i));
printf("\n");
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(Y);
gsl_vector_free(beta);
gsl_multifit_linear_free(wspc);
}
C++
#include <array>
#include <iostream>
void require(bool condition, const std::string &message) {
if (condition) {
return;
}
throw std::runtime_error(message);
}
template<typename T, size_t N>
std::ostream &operator<<(std::ostream &os, const std::array<T, N> &a) {
auto it = a.cbegin();
auto end = a.cend();
os << '[';
if (it != end) {
os << *it;
it = std::next(it);
}
while (it != end) {
os << ", " << *it;
it = std::next(it);
}
return os << ']';
}
template <size_t RC, size_t CC>
class Matrix {
std::array<std::array<double, CC>, RC> data;
public:
Matrix() : data{} {
// empty
}
Matrix(std::initializer_list<std::initializer_list<double>> values) {
size_t rp = 0;
for (auto row : values) {
size_t cp = 0;
for (auto col : row) {
data[rp][cp] = col;
cp++;
}
rp++;
}
}
double get(size_t row, size_t col) const {
return data[row][col];
}
void set(size_t row, size_t col, double value) {
data[row][col] = value;
}
std::array<double, CC> get(size_t row) {
return data[row];
}
void set(size_t row, const std::array<double, CC> &values) {
std::copy(values.begin(), values.end(), data[row].begin());
}
template <size_t D>
Matrix<RC, D> operator*(const Matrix<CC, D> &rhs) const {
Matrix<RC, D> result;
for (size_t i = 0; i < RC; i++) {
for (size_t j = 0; j < D; j++) {
for (size_t k = 0; k < CC; k++) {
double prod = get(i, k) * rhs.get(k, j);
result.set(i, j, result.get(i, j) + prod);
}
}
}
return result;
}
Matrix<CC, RC> transpose() const {
Matrix<CC, RC> trans;
for (size_t i = 0; i < RC; i++) {
for (size_t j = 0; j < CC; j++) {
trans.set(j, i, data[i][j]);
}
}
return trans;
}
void toReducedRowEchelonForm() {
size_t lead = 0;
for (size_t r = 0; r < RC; r++) {
if (CC <= lead) {
return;
}
auto i = r;
while (get(i, lead) == 0.0) {
i++;
if (RC == i) {
i = r;
lead++;
if (CC == lead) {
return;
}
}
}
auto temp = get(i);
set(i, get(r));
set(r, temp);
if (get(r, lead) != 0.0) {
auto div = get(r, lead);
for (size_t j = 0; j < CC; j++) {
set(r, j, get(r, j) / div);
}
}
for (size_t k = 0; k < RC; k++) {
if (k != r) {
auto mult = get(k, lead);
for (size_t j = 0; j < CC; j++) {
auto prod = get(r, j) * mult;
set(k, j, get(k, j) - prod);
}
}
}
lead++;
}
}
Matrix<RC, RC> inverse() {
require(RC == CC, "Not a square matrix");
Matrix<RC, 2 * RC> aug;
for (size_t i = 0; i < RC; i++) {
for (size_t j = 0; j < RC; j++) {
aug.set(i, j, get(i, j));
}
// augment identify matrix to right
aug.set(i, i + RC, 1.0);
}
aug.toReducedRowEchelonForm();
// remove identity matrix to left
Matrix<RC, RC> inv;
for (size_t i = 0; i < RC; i++) {
for (size_t j = RC; j < 2 * RC; j++) {
inv.set(i, j - RC, aug.get(i, j));
}
}
return inv;
}
template <size_t RC, size_t CC>
friend std::ostream &operator<<(std::ostream &, const Matrix<RC, CC> &);
};
template <size_t RC, size_t CC>
std::ostream &operator<<(std::ostream &os, const Matrix<RC, CC> &m) {
for (size_t i = 0; i < RC; i++) {
os << '[';
for (size_t j = 0; j < CC; j++) {
if (j > 0) {
os << ", ";
}
os << m.get(i, j);
}
os << "]\n";
}
return os;
}
template <size_t RC, size_t CC>
std::array<double, RC> multiple_regression(const std::array<double, CC> &y, const Matrix<RC, CC> &x) {
Matrix<1, CC> tm;
tm.set(0, y);
auto cy = tm.transpose();
auto cx = x.transpose();
return ((x * cx).inverse() * x * cy).transpose().get(0);
}
void case1() {
std::array<double, 5> y{ 1.0, 2.0, 3.0, 4.0, 5.0 };
Matrix<1, 5> x{ {2.0, 1.0, 3.0, 4.0, 5.0} };
auto v = multiple_regression(y, x);
std::cout << v << '\n';
}
void case2() {
std::array<double, 3> y{ 3.0, 4.0, 5.0 };
Matrix<2, 3> x{
{1.0, 2.0, 1.0},
{1.0, 1.0, 2.0}
};
auto v = multiple_regression(y, x);
std::cout << v << '\n';
}
void case3() {
std::array<double, 15> y{ 52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46 };
std::array<double, 15> a{ 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83 };
Matrix<3, 15> x;
for (size_t i = 0; i < 15; i++) {
x.set(0, i, 1.0);
}
x.set(1, a);
for (size_t i = 0; i < 15; i++) {
x.set(2, i, a[i] * a[i]);
}
auto v = multiple_regression(y, x);
std::cout << v << '\n';
}
int main() {
case1();
case2();
case3();
return 0;
}
- Output:
[0.981818] [1, 2] [128.813, -143.162, 61.9603]
C#
using System;
using MathNet.Numerics.LinearRegression;
using MathNet.Numerics.LinearAlgebra;
using MathNet.Numerics.LinearAlgebra.Double;
class Program
{
static void Main(string[] args)
{
var col = DenseVector.OfArray(new double[] { 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65,
1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83 });
var X = DenseMatrix.OfColumns(new Vector<double>[] { col.PointwisePower(0), col, col.PointwisePower(2) });
var y = DenseVector.OfArray(new double[] { 52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93,
61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46 });
var β = MultipleRegression.QR(X, y);
Console.WriteLine(β);
}
}
- Output:
DenseVector 3-Double 128.813 -143.162 61.9603
Common Lisp
Uses the routine (chol A) from Cholesky decomposition, (mmul A B) from Matrix multiplication, (mtp A) from Matrix transposition.
;; Solve a linear system AX=B where A is symmetric and positive definite, so it can be Cholesky decomposed.
(defun linsys (A B)
(let* ((n (car (array-dimensions A)))
(m (cadr (array-dimensions B)))
(y (make-array n :element-type 'long-float :initial-element 0.0L0))
(X (make-array `(,n ,m) :element-type 'long-float :initial-element 0.0L0))
(L (chol A))) ; A=LL'
(loop for col from 0 to (- m 1) do
;; Forward substitution: y = L\B
(loop for k from 0 to (- n 1)
do (setf (aref y k)
(/ (- (aref B k col)
(loop for j from 0 to (- k 1)
sum (* (aref L k j)
(aref y j))))
(aref L k k))))
;; Back substitution. x=L'\y
(loop for k from (- n 1) downto 0
do (setf (aref X k col)
(/ (- (aref y k)
(loop for j from (+ k 1) to (- n 1)
sum (* (aref L j k)
(aref X j col))))
(aref L k k)))))
X))
;; Solve a linear least squares problem. Ax=b, with A being mxn, with m>n.
;; Solves the linear system A'Ax=A'b.
(defun lsqr (A b)
(linsys (mmul (mtp A) A)
(mmul (mtp A) b)))
To show an example of multiple regression, (polyfit x y n) from Polynomial regression, which itself uses (linsys A B) and (lsqr A b), will be used to fit a second degree order polynomial to data.
(let ((x (make-array '(1 11) :initial-contents '((0 1 2 3 4 5 6 7 8 9 10))))
(y (make-array '(1 11) :initial-contents '((1 6 17 34 57 86 121 162 209 262 321)))))
(polyfit x y 2))
#2A((0.9999999999999759d0) (2.000000000000005d0) (3.0d0))
D
import std.algorithm;
import std.array;
import std.exception;
import std.range;
import std.stdio;
public class Matrix {
private double[][] data;
private size_t rowCount;
private size_t colCount;
public this(size_t size)
in(size > 0, "Must have at least one element")
{
this(size, size);
}
public this(size_t rows, size_t cols)
in(rows > 0, "Must have at least one row")
in(cols > 0, "Must have at least one column")
{
rowCount = rows;
colCount = cols;
data = uninitializedArray!(double[][])(rows, cols);
foreach (ref row; data) {
row[] = 0.0;
}
}
public this(const double[][] source) {
enforce(source.length > 0, "Must have at least one row");
rowCount = source.length;
enforce(source[0].length > 0, "Must have at least one column");
colCount = source[0].length;
data = uninitializedArray!(double[][])(rowCount, colCount);
foreach (i; 0 .. rowCount) {
enforce(source[i].length == colCount, "All rows must have equal columns");
data[i] = source[i].dup;
}
}
public auto opIndex(size_t r, size_t c) const {
return data[r][c];
}
public auto opIndex(size_t r) const {
return data[r];
}
public auto opBinary(string op)(const Matrix rhs) const {
static if (op == "*") {
auto rc1 = rowCount;
auto cc1 = colCount;
auto rc2 = rhs.rowCount;
auto cc2 = rhs.colCount;
enforce(cc1 == rc2, "Cannot multiply if the first columns does not equal the second rows");
auto result = new Matrix(rc1, cc2);
foreach (i; 0 .. rc1) {
foreach (j; 0 .. cc2) {
foreach (k; 0 .. rc2) {
result[i, j] += this[i, k] * rhs[k, j];
}
}
}
return result;
} else {
assert(false, "Not implemented");
}
}
public void opIndexAssign(double value, size_t r, size_t c) {
data[r][c] = value;
}
public void opIndexAssign(const double[] value, size_t r) {
enforce(colCount == value.length, "Slice size must match column size");
data[r] = value.dup;
}
public void opIndexOpAssign(string op)(double value, size_t r, size_t c) {
mixin("data[r][c] " ~ op ~ "= value;");
}
public auto transpose() const {
auto rc = rowCount;
auto cc = colCount;
auto t = new Matrix(cc, rc);
foreach (i; 0 .. cc) {
foreach (j; 0 .. rc) {
t[i, j] = this[j, i];
}
}
return t;
}
public void toReducedRowEchelonForm() {
auto lead = 0;
auto rc = rowCount;
auto cc = colCount;
foreach (r; 0 .. rc) {
if (cc <= lead) {
return;
}
auto i = r;
while (this[i, lead] == 0.0) {
i++;
if (rc == i) {
i = r;
lead++;
if (cc == lead) {
return;
}
}
}
auto temp = this[i];
this[i] = this[r];
this[r] = temp;
if (this[r, lead] != 0.0) {
auto div = this[r, lead];
foreach (j; 0 .. cc) {
this[r, j] = this[r, j] / div;
}
}
foreach (k; 0 .. rc) {
if (k != r) {
auto mult = this[k, lead];
foreach (j; 0 .. cc) {
this[k, j] -= this[r, j] * mult;
}
}
}
lead++;
}
}
public auto inverse() const {
enforce(rowCount == colCount, "Not a square matrix");
auto len = rowCount;
auto aug = new Matrix(len, 2 * len);
foreach (i; 0 .. len) {
foreach (j; 0 .. len) {
aug[i, j] = this[i, j];
}
// augment identity matrix to right
aug[i, i + len] = 1.0;
}
aug.toReducedRowEchelonForm;
auto inv = new Matrix(len);
// remove identify matrix to left
foreach (i; 0 .. len) {
foreach (j; len .. 2 * len) {
inv[i, j - len] = aug[i, j];
}
}
return inv;
}
void toString(scope void delegate(const(char)[]) sink) const {
import std.format;
auto fmt = FormatSpec!char("%s");
put(sink, "[");
foreach (i; 0 .. rowCount) {
if (i > 0) {
put(sink, " [");
} else {
put(sink, "[");
}
formatValue(sink, this[i, 0], fmt);
foreach (j; 1 .. colCount) {
put(sink, ", ");
formatValue(sink, this[i, j], fmt);
}
if (i + 1 < rowCount) {
put(sink, "]\n");
} else {
put(sink, "]");
}
}
put(sink, "]");
}
}
auto multipleRegression(double[] y, Matrix x) {
auto tm = new Matrix([y]);
auto cy = tm.transpose;
auto cx = x.transpose;
return ((x * cx).inverse * x * cy).transpose[0].dup;
}
void main() {
auto y = [1.0, 2.0, 3.0, 4.0, 5.0];
auto x = new Matrix([[2.0, 1.0, 3.0, 4.0, 5.0]]);
auto v = multipleRegression(y, x);
v.writeln;
y = [3.0, 4.0, 5.0];
x = new Matrix([
[1.0, 2.0, 1.0],
[1.0, 1.0, 2.0]
]);
v = multipleRegression(y, x);
v.writeln;
y = [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46];
auto a = [1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83];
x = new Matrix([
repeat(1.0, a.length).array,
a,
a.map!"a * a".array
]);
v = multipleRegression(y, x);
v.writeln;
}
- Output:
[0.981818] [1, 2] [128.813, -143.162, 61.9603]
Emacs Lisp
(let ((x1 '(0 1 2 3 4 5 6 7 8 9 10))
(x2 '(0 1 1 3 3 7 6 7 3 9 8))
(y '(1 6 17 34 57 86 121 162 209 262 321)))
(apply #'calc-eval "fit(a*X1+b*X2+c,[X1,X2],[a,b,c],[$1 $2 $3])" nil
(mapcar (lambda (items) (cons 'vec items)) (list x1 x2 y))))
- Output:
"35.2014388489*X1 - 3.95683453237*X2 - 42.7410071942"
ERRE
PROGRAM MULTIPLE_REGRESSION
!$DOUBLE
CONST N=14,M=2,Q=3 ! number of points and M.R. polynom degree
DIM X[N],Y[N] ! data points
DIM S[N],T[N] ! linear system coefficient
DIM A[M,Q] ! sistem to be solved
BEGIN
DATA(1.47,1.50,1.52,1.55,1.57,1.60,1.63,1.65,1.68,1.70,1.73,1.75,1.78,1.80,1.83)
DATA(52.21,53.12,54.48,55.84,57.20,58.57,59.93,61.29,63.11,64.47,66.28,68.10,69.92,72.19,74.46)
FOR I%=0 TO N DO
READ(X[I%])
END FOR
FOR I%=0 TO N DO
READ(Y[I%])
END FOR
FOR K%=0 TO 2*M DO
S[K%]=0 T[K%]=0
FOR I%=0 TO N DO
S[K%]=S[K%]+X[I%]^K%
IF K%<=M THEN T[K%]=T[K%]+Y[I%]*X[I%]^K% END IF
END FOR
END FOR
! build linear system
FOR ROW%=0 TO M DO
FOR COL%=0 TO M DO
A[ROW%,COL%]=S[ROW%+COL%]
END FOR
A[ROW%,COL%]=T[ROW%]
END FOR
PRINT("LINEAR SYSTEM COEFFICENTS") PRINT
FOR I%=0 TO M DO
FOR J%=0 TO M+1 DO
WRITE(" ######.#";A[I%,J%];)
END FOR
PRINT
END FOR
PRINT
FOR J%=0 TO M DO
FOR I%=J% TO M DO
EXIT IF A[I%,J%]<>0
END FOR
IF I%=M+1 THEN
PRINT("SINGULAR MATRIX !")
!$STOP
END IF
FOR K%=0 TO M+1 DO
SWAP(A[J%,K%],A[I%,K%])
END FOR
Y=1/A[J%,J%]
FOR K%=0 TO M+1 DO
A[J%,K%]=Y*A[J%,K%]
END FOR
FOR I%=0 TO M DO
IF I%<>J% THEN
Y=-A[I%,J%]
FOR K%=0 TO M+1 DO
A[I%,K%]=A[I%,K%]+Y*A[J%,K%]
END FOR
END IF
END FOR
END FOR
PRINT
PRINT("SOLUTIONS") PRINT
FOR I%=0 TO M DO
PRINT("c";I%;"=";)
WRITE("#####.#######";A[I%,M+1])
END FOR
END PROGRAM
- Output:
LINEAR SYSTEM COEFFICENTS 15.0 24.8 41.1 931.2 24.8 41.1 68.4 1548.2 41.1 68.4 114.3 2585.5 SOLUTIONS c 0 = 128.8128036 c 1 = -143.1620229 c 2 = 61.9603254
Fortran
*-----------------------------------------------------------------------
* MR - multiple regression using the SLATEC library routine DHFTI
*
* Finds the nearest approximation to BETA in the system of linear equations:
*
* X(j,i) . BETA(i) = Y(j)
* where
* 1 ... j ... N
* 1 ... i ... K
* and
* K .LE. N
*
* INPUT ARRAYS ARE DESTROYED!
*
*___Name___________Type_______________In/Out____Description_____________
* X(N,K) Double precision In Predictors
* Y(N) Double precision Both On input: N Observations
* On output: K beta weights
* N Integer In Number of observations
* K Integer In Number of predictor variables
* DWORK(N+2*K) Double precision Neither Workspace
* IWORK(K) Integer Neither Workspace
*-----------------------------------------------------------------------
SUBROUTINE MR (X, Y, N, K, DWORK, IWORK)
IMPLICIT NONE
INTEGER K, N, IWORK
DOUBLE PRECISION X, Y, DWORK
DIMENSION X(N,K), Y(N), DWORK(N+2*K), IWORK(K)
* local variables
INTEGER I, J
DOUBLE PRECISION TAU, TOT
* maximum of all column sums of magnitudes
TAU = 0.
DO J = 1, K
TOT = 0.
DO I = 1, N
TOT = TOT + ABS(X(I,J))
END DO
IF (TOT > TAU) TAU = TOT
END DO
TAU = TAU * EPSILON(TAU) ! tolerance argument
* call function
CALL DHFTI (X, N, N, K, Y, N, 1, TAU,
$ J, DWORK(1), DWORK(N+1), DWORK(N+K+1), IWORK)
IF (J < K) PRINT *, 'mr: solution is rank deficient!'
RETURN
END ! of MR
*-----------------------------------------------------------------------
PROGRAM t_mr ! polynomial regression example
IMPLICIT NONE
INTEGER N, K
PARAMETER (N=15, K=3)
INTEGER IWORK(K), I, J
DOUBLE PRECISION XIN(N), X(N,K), Y(N), DWORK(N+2*K)
DATA XIN / 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68,
$ 1.70, 1.73, 1.75, 1.78, 1.80, 1.83 /
DATA Y / 52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29,
$ 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46 /
* make coefficient matrix
DO J = 1, K
DO I = 1, N
X(I,J) = XIN(I) **(J-1)
END DO
END DO
* solve
CALL MR (X, Y, N, K, DWORK, IWORK)
* print result
10 FORMAT ('beta: ', $)
20 FORMAT (F12.4, $)
30 FORMAT ()
PRINT 10
DO J = 1, K
PRINT 20, Y(J)
END DO
PRINT 30
STOP 'program complete'
END
- Output:
beta: 128.8126 -143.1618 61.9603 STOP program complete
FreeBASIC
Const N = 14, M = 2, Q = 3 ' number of points and M.R. polynom degree
Dim As Double X(0 to N) = {1.47,1.50,1.52,1.55,1.57, _
1.60,1.63,1.65,1.68,1.70,1.73,1.75,1.78,1.80,1.83} ' data points
Dim As Double Y(0 to N) = {52.21,53.12,54.48,55.84,57.20, _
58.57,59.93,61.29,63.11,64.47,66.28,68.10,69.92,72.19,74.46} ' data points
Dim As Double S(N), T(N) ' linear system coefficient
Dim As Double A(M, Q) ' sistem to be solved
Dim As Integer i, k, j, fila, columna
Dim as Double z
For k = 0 To 2*M
S(k) = 0 : T(k) = 0
For i = 0 To N
S(k) += X(i) ^ k
If k <= M Then T(k) += Y(i) * X(i) ^ k
Next i
Next k
' build linear system
For fila = 0 To M
For columna = 0 To M
A(fila, columna) = S(fila+columna)
Next columna
A(fila, columna) = T(fila)
Next fila
Print "Linear system coefficents:"
For i = 0 To M
For j = 0 To M+1
Print Using "######.#"; A(i,j);
Next j
Print
Next i
For j = 0 To M
For i = j To M
If A(i,j) <> 0 Then Exit For
Next i
If i = M+1 Then
Print !"\nSINGULAR MATRIX '"
Sleep: End
End If
For k = 0 To M+1
Swap A(j,k), A(i,k)
Next k
z = 1 / A(j,j)
For k = 0 To M+1
A(j,k) = z * A(j,k)
Next k
For i = 0 To M
If i <> j Then
z = -A(i,j)
For k = 0 To M+1
A(i,k) += z * A(j,k)
Next k
End If
Next i
Next j
Print !"\nSolutions:"
For i = 0 To M
Print Using " #####.#######"; A(i,M+1);
Next i
Sleep
- Output:
Linear system coefficents: 15.0 24.8 41.1 931.2 24.8 41.1 68.4 1548.2 41.1 68.4 114.3 2585.5 Solutions: 128.8128036 -143.1620229 61.9603254
Go
The example on WP happens to be a polynomial regression example, and so code from the Polynomial regression task can be reused here. The only difference here is that givens x and y are computed in a separate function as a task prerequisite.
Library gonum/matrix
package main
import (
"fmt"
"github.com/gonum/matrix/mat64"
)
func givens() (x, y *mat64.Dense) {
height := []float64{1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63,
1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83}
weight := []float64{52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93,
61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46}
degree := 2
x = Vandermonde(height, degree)
y = mat64.NewDense(len(weight), 1, weight)
return
}
func Vandermonde(a []float64, degree int) *mat64.Dense {
x := mat64.NewDense(len(a), degree+1, nil)
for i := range a {
for j, p := 0, 1.; j <= degree; j, p = j+1, p*a[i] {
x.Set(i, j, p)
}
}
return x
}
func main() {
x, y := givens()
fmt.Printf("%.4f\n", mat64.Formatted(mat64.QR(x).Solve(y)))
}
- Output:
⎡ 128.8128⎤ ⎢-143.1620⎥ ⎣ 61.9603⎦
Library go.matrix
package main
import (
"fmt"
"github.com/skelterjohn/go.matrix"
)
func givens() (x, y *matrix.DenseMatrix) {
height := []float64{1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63,
1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83}
weight := []float64{52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93,
61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46}
m := len(height)
n := 3
y = matrix.MakeDenseMatrix(weight, m, 1)
x = matrix.Zeros(m, n)
for i := 0; i < m; i++ {
ip := float64(1)
for j := 0; j < n; j++ {
x.Set(i, j, ip)
ip *= height[i]
}
}
return
}
func main() {
x, y := givens()
n := x.Cols()
q, r := x.QR()
qty, err := q.Transpose().Times(y)
if err != nil {
fmt.Println(err)
return
}
c := make([]float64, n)
for i := n - 1; i >= 0; i-- {
c[i] = qty.Get(i, 0)
for j := i + 1; j < n; j++ {
c[i] -= c[j] * r.Get(i, j)
}
c[i] /= r.Get(i, i)
}
fmt.Println(c)
}
- Output:
[128.8128035784373 -143.16202286476116 61.960325442472865]
Haskell
Using package hmatrix from HackageDB
import Numeric.LinearAlgebra
import Numeric.LinearAlgebra.LAPACK
m :: Matrix Double
m = (3><3)
[7.589183,1.703609,-4.477162,
-4.597851,9.434889,-6.543450,
0.4588202,-6.115153,1.331191]
v :: Matrix Double
v = (3><1)
[1.745005,-4.448092,-4.160842]
Using lapack::dgels
*Main> linearSolveLSR m v
(3><1)
[ 0.9335611922087276
, 1.101323491272865
, 1.6117769115824 ]
Or
*Main> inv m `multiply` v
(3><1)
[ 0.9335611922087278
, 1.101323491272865
, 1.6117769115824006 ]
Hy
(import
[numpy [ones column-stack]]
[numpy.random [randn]]
[numpy.linalg [lstsq]])
(setv n 1000)
(setv x1 (randn n))
(setv x2 (randn n))
(setv y (+ 3 (* 1 x1) (* -2 x2) (* .25 x1 x2) (randn n)))
(print (first (lstsq
(column-stack (, (ones n) x1 x2 (* x1 x2)))
y)))
J
NB. Wikipedia data
x=: 1.47 1.50 1.52 1.55 1.57 1.60 1.63 1.65 1.68 1.70 1.73 1.75 1.78 1.80 1.83
y=: 52.21 53.12 54.48 55.84 57.20 58.57 59.93 61.29 63.11 64.47 66.28 68.10 69.92 72.19 74.46
y %. x ^/ i.3 NB. calculate coefficients b1, b2 and b3 for 2nd degree polynomial
128.813 _143.162 61.9603
Breaking it down:
X=: x ^/ i.3 NB. form Design matrix
X=: (x^0) ,. (x^1) ,. (x^2) NB. equivalent of previous line
4{.X NB. show first 4 rows of X
1 1.47 2.1609
1 1.5 2.25
1 1.52 2.3104
1 1.55 2.4025
NB. Where y is a set of observations and X is the design matrix
NB. y %. X does matrix division and gives the regression coefficients
y %. X
128.813 _143.162 61.9603
In other words beta=: y %. X is the equivalent of:
To confirm:
mp=: +/ .* NB. matrix product
NB. %.X is matrix inverse of X
NB. |:X is transpose of X
(%.(|:X) mp X) mp (|:X) mp y
128.814 _143.163 61.9606
xpy=: mp~ |: NB. Or factoring out "X prime y" (monadically "X prime X")
X (%.@:xpy@[ mp xpy) y
128.814 _143.163 61.9606
LAPACK routines are also available via the Addon math/lapack.
load 'math/lapack'
load 'math/lapack/gels'
gels_jlapack_ X;y
128.813 _143.162 61.9603
Java
import java.util.Arrays;
import java.util.Objects;
public class MultipleRegression {
public static void require(boolean condition, String message) {
if (condition) {
return;
}
throw new IllegalArgumentException(message);
}
public static class Matrix {
private final double[][] data;
private final int rowCount;
private final int colCount;
public Matrix(int rows, int cols) {
require(rows > 0, "Need at least one row");
this.rowCount = rows;
require(cols > 0, "Need at least one column");
this.colCount = cols;
this.data = new double[rows][cols];
for (double[] row : this.data) {
Arrays.fill(row, 0.0);
}
}
public Matrix(double[][] source) {
require(source.length > 0, "Need at least one row");
this.rowCount = source.length;
require(source[0].length > 0, "Need at least one column");
this.colCount = source[0].length;
this.data = new double[this.rowCount][this.colCount];
for (int i = 0; i < this.rowCount; i++) {
set(i, source[i]);
}
}
public double[] get(int row) {
Objects.checkIndex(row, this.rowCount);
return this.data[row];
}
public void set(int row, double[] data) {
Objects.checkIndex(row, this.rowCount);
require(data.length == this.colCount, "The column in the row must match the number of columns in the matrix");
System.arraycopy(data, 0, this.data[row], 0, this.colCount);
}
public double get(int row, int col) {
Objects.checkIndex(row, this.rowCount);
Objects.checkIndex(col, this.colCount);
return this.data[row][col];
}
public void set(int row, int col, double value) {
Objects.checkIndex(row, this.rowCount);
Objects.checkIndex(col, this.colCount);
this.data[row][col] = value;
}
@SuppressWarnings("UnnecessaryLocalVariable")
public Matrix times(Matrix that) {
var rc1 = this.rowCount;
var cc1 = this.colCount;
var rc2 = that.rowCount;
var cc2 = that.colCount;
require(cc1 == rc2, "Cannot multiply if the first columns does not equal the second rows");
var result = new Matrix(rc1, cc2);
for (int i = 0; i < rc1; i++) {
for (int j = 0; j < cc2; j++) {
for (int k = 0; k < rc2; k++) {
var prod = get(i, k) * that.get(k, j);
result.set(i, j, result.get(i, j) + prod);
}
}
}
return result;
}
public Matrix transpose() {
var rc = this.rowCount;
var cc = this.colCount;
var trans = new Matrix(cc, rc);
for (int i = 0; i < cc; i++) {
for (int j = 0; j < rc; j++) {
trans.set(i, j, get(j, i));
}
}
return trans;
}
public void toReducedRowEchelonForm() {
int lead = 0;
var rc = this.rowCount;
var cc = this.colCount;
for (int r = 0; r < rc; r++) {
if (cc <= lead) {
return;
}
var i = r;
while (get(i, lead) == 0.0) {
i++;
if (rc == i) {
i = r;
lead++;
if (cc == lead) {
return;
}
}
}
var temp = get(i);
set(i, get(r));
set(r, temp);
if (get(r, lead) != 0.0) {
var div = get(r, lead);
for (int j = 0; j < cc; j++) {
set(r, j, get(r, j) / div);
}
}
for (int k = 0; k < rc; k++) {
if (k != r) {
var mult = get(k, lead);
for (int j = 0; j < cc; j++) {
var prod = get(r, j) * mult;
set(k, j, get(k, j) - prod);
}
}
}
lead++;
}
}
public Matrix inverse() {
require(this.rowCount == this.colCount, "Not a square matrix");
var len = this.rowCount;
var aug = new Matrix(len, 2 * len);
for (int i = 0; i < len; i++) {
for (int j = 0; j < len; j++) {
aug.set(i, j, get(i, j));
}
// augment identity matrix to right
aug.set(i, i + len, 1.0);
}
aug.toReducedRowEchelonForm();
var inv = new Matrix(len, len);
// remove identity matrix to left
for (int i = 0; i < len; i++) {
for (int j = len; j < 2 * len; j++) {
inv.set(i, j - len, aug.get(i, j));
}
}
return inv;
}
}
public static double[] multipleRegression(double[] y, Matrix x) {
var tm = new Matrix(new double[][]{y});
var cy = tm.transpose();
var cx = x.transpose();
return x.times(cx).inverse().times(x).times(cy).transpose().get(0);
}
public static void printVector(double[] v) {
System.out.println(Arrays.toString(v));
System.out.println();
}
public static double[] repeat(int size, double value) {
var a = new double[size];
Arrays.fill(a, value);
return a;
}
public static void main(String[] args) {
double[] y = new double[]{1.0, 2.0, 3.0, 4.0, 5.0};
var x = new Matrix(new double[][]{{2.0, 1.0, 3.0, 4.0, 5.0}});
var v = multipleRegression(y, x);
printVector(v);
y = new double[]{3.0, 4.0, 5.0};
x = new Matrix(new double[][]{
{1.0, 2.0, 1.0},
{1.0, 1.0, 2.0}
});
v = multipleRegression(y, x);
printVector(v);
y = new double[]{52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46};
var a = new double[]{1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83};
x = new Matrix(new double[][]{
repeat(a.length, 1.0),
a,
Arrays.stream(a).map(it -> it * it).toArray()
});
v = multipleRegression(y, x);
printVector(v);
}
}
- Output:
[0.9818181818181818] [0.9999999999999996, 2.000000000000001] [128.8128035798277, -143.1620228653037, 61.960325442985436]
JavaScript
for the print()
and Array.map()
functions.
Extends the Matrix class from Matrix Transpose#JavaScript, Matrix multiplication#JavaScript, Reduced row echelon form#JavaScript. Uses the IdentityMatrix from Matrix exponentiation operator#JavaScript
// modifies the matrix "in place"
Matrix.prototype.inverse = function() {
if (this.height != this.width) {
throw "can't invert a non-square matrix";
}
var I = new IdentityMatrix(this.height);
for (var i = 0; i < this.height; i++)
this.mtx[i] = this.mtx[i].concat(I.mtx[i])
this.width *= 2;
this.toReducedRowEchelonForm();
for (var i = 0; i < this.height; i++)
this.mtx[i].splice(0, this.height);
this.width /= 2;
return this;
}
function ColumnVector(ary) {
return new Matrix(ary.map(function(v) {return [v]}))
}
ColumnVector.prototype = Matrix.prototype
Matrix.prototype.regression_coefficients = function(x) {
var x_t = x.transpose();
return x_t.mult(x).inverse().mult(x_t).mult(this);
}
// the Ruby example
var y = new ColumnVector([1,2,3,4,5]);
var x = new ColumnVector([2,1,3,4,5]);
print(y.regression_coefficients(x));
print();
// the Tcl example
y = new ColumnVector([
52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29,
63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46
]);
x = new Matrix(
[1.47,1.50,1.52,1.55,1.57,1.60,1.63,1.65,1.68,1.70,1.73,1.75,1.78,1.80,1.83].map(
function(v) {return [Math.pow(v,0), Math.pow(v,1), Math.pow(v,2)]}
)
);
print(y.regression_coefficients(x));
- Output:
0.9818181818181818 128.8128035798277 -143.1620228653037 61.960325442985436
jq
Works with gojq, the Go implementation of jq
See e.g. https://rosettacode.org/wiki/Gauss-Jordan_matrix_inversion#jq for a suitable definition of `inverse` as used here.
Preliminaries
def dot_product(a; b):
reduce range(0;a|length) as $i (0; . + (a[$i] * b[$i]) );
# A and B should both be numeric matrices, A being m by n, and B being n by p.
def multiply(A; B):
(B[0]|length) as $p
| (B|transpose) as $BT
| reduce range(0; A|length) as $i
([];
reduce range(0; $p) as $j
(.;
.[$i][$j] = dot_product( A[$i]; $BT[$j] ) ));
Multiple Regression
def multipleRegression(y; x):
(y|transpose) as $cy
| (x|transpose) as $cx
| multiply( multiply( multiply(x;$cx)|inverse; x); $cy)
| transpose[0];
def ys: [
[ [1, 2, 3, 4, 5] ],
[ [3, 4, 5] ],
[ [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29,
63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46] ]
];
def a:
[1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83];
def xs:[
[ [2, 1, 3, 4, 5] ],
[ [1, 2, 1], [1, 1, 2] ],
[ [range(0;a|length) | 1], a, (a|map(.*.))]
];
range(0; ys|length) as $i
| multipleRegression(ys[$i]; xs[$i])
- Output:
[0.9818181818181818] [0.9999999999999996,2.000000000000001] [128.8128035798277,-143.1620228653037,61.960325442985436]
Julia
As in Matlab, the backslash or slash operator (depending on the matrix ordering) can be used for solving this problem, for example:
x = [1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83]
y = [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46]
X = [x.^0 x.^1 x.^2];
b = X \ y
- Output:
3-element Array{Float64,1}: 128.813 -143.162 61.9603
Kotlin
As neither the JDK nor the Kotlin Standard Library has matrix operations built-in, we re-use functions written for various other tasks.
// Version 1.2.31
typealias Vector = DoubleArray
typealias Matrix = Array<Vector>
operator fun Matrix.times(other: Matrix): Matrix {
val rows1 = this.size
val cols1 = this[0].size
val rows2 = other.size
val cols2 = other[0].size
require(cols1 == rows2)
val result = Matrix(rows1) { Vector(cols2) }
for (i in 0 until rows1) {
for (j in 0 until cols2) {
for (k in 0 until rows2) {
result[i][j] += this[i][k] * other[k][j]
}
}
}
return result
}
fun Matrix.transpose(): Matrix {
val rows = this.size
val cols = this[0].size
val trans = Matrix(cols) { Vector(rows) }
for (i in 0 until cols) {
for (j in 0 until rows) trans[i][j] = this[j][i]
}
return trans
}
fun Matrix.inverse(): Matrix {
val len = this.size
require(this.all { it.size == len }) { "Not a square matrix" }
val aug = Array(len) { DoubleArray(2 * len) }
for (i in 0 until len) {
for (j in 0 until len) aug[i][j] = this[i][j]
// augment by identity matrix to right
aug[i][i + len] = 1.0
}
aug.toReducedRowEchelonForm()
val inv = Array(len) { DoubleArray(len) }
// remove identity matrix to left
for (i in 0 until len) {
for (j in len until 2 * len) inv[i][j - len] = aug[i][j]
}
return inv
}
fun Matrix.toReducedRowEchelonForm() {
var lead = 0
val rowCount = this.size
val colCount = this[0].size
for (r in 0 until rowCount) {
if (colCount <= lead) return
var i = r
while (this[i][lead] == 0.0) {
i++
if (rowCount == i) {
i = r
lead++
if (colCount == lead) return
}
}
val temp = this[i]
this[i] = this[r]
this[r] = temp
if (this[r][lead] != 0.0) {
val div = this[r][lead]
for (j in 0 until colCount) this[r][j] /= div
}
for (k in 0 until rowCount) {
if (k != r) {
val mult = this[k][lead]
for (j in 0 until colCount) this[k][j] -= this[r][j] * mult
}
}
lead++
}
}
fun printVector(v: Vector) {
println(v.asList())
println()
}
fun multipleRegression(y: Vector, x: Matrix): Vector {
val cy = (arrayOf(y)).transpose() // convert 'y' to column vector
val cx = x.transpose() // convert 'x' to column vector array
return ((x * cx).inverse() * x * cy).transpose()[0]
}
fun main(args: Array<String>) {
var y = doubleArrayOf(1.0, 2.0, 3.0, 4.0, 5.0)
var x = arrayOf(doubleArrayOf(2.0, 1.0, 3.0, 4.0, 5.0))
var v = multipleRegression(y, x)
printVector(v)
y = doubleArrayOf(3.0, 4.0, 5.0)
x = arrayOf(
doubleArrayOf(1.0, 2.0, 1.0),
doubleArrayOf(1.0, 1.0, 2.0)
)
v = multipleRegression(y, x)
printVector(v)
y = doubleArrayOf(52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29,
63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46)
val a = doubleArrayOf(1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70,
1.73, 1.75, 1.78, 1.80, 1.83)
x = arrayOf(DoubleArray(a.size) { 1.0 }, a, a.map { it * it }.toDoubleArray())
v = multipleRegression(y, x)
printVector(v)
}
- Output:
[0.9818181818181818] [0.9999999999999996, 2.000000000000001] [128.8128035798277, -143.1620228653037, 61.960325442985436]
Maple
First build a random dataset.
n:=200:
X:=<ArrayTools[RandomArray](n,4,distribution=normal)|Vector(n,1,datatype=float[8])>:
Y:=X.<4.2,-2.8,-1.4,3.1,1.75>+convert(ArrayTools[RandomArray](n,1,distribution=normal),Vector):
Now the linear regression, with either the LinearAlgebra package, or the Statistics package.
LinearAlgebra[LeastSquares](X,Y)^+;
# [4.33701132468683, -2.78654498997457, -1.41840666085642, 2.92065133466547, 1.76076442997642]
Statistics[LinearFit]([x1,x2,x3,x4,c],X,Y,[x1,x2,x3,x4,c],summarize=true)
# Summary:
# ----------------
# Model: 4.3370113*x1-2.7865450*x2-1.4184067*x3+2.9206513*x4+1.7607644*c
# ----------------
# Coefficients:
# Estimate Std. Error t-value P(>|t|)
# Parameter 1 4.3370 0.0691 62.7409 0.0000
# Parameter 2 -2.7865 0.0661 -42.1637 0.0000
# Parameter 3 -1.4184 0.0699 -20.2937 0.0000
# Parameter 4 2.9207 0.0687 42.5380 0.0000
# Parameter 5 1.7608 0.0701 25.1210 0.0000
# ----------------
# R-squared: 0.9767, Adjusted R-squared: 0.9761
# 4.33701132468683 x1 - 2.78654498997457 x2 - 1.41840666085642 x3
# + 2.92065133466547 x4 + 1.76076442997642 c
Mathematica /Wolfram Language
x = {1.47, 1.50 , 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83};
y = {52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46};
X = {x^0, x^1, x^2};
LeastSquares[Transpose@X, y]
- Output:
{128.813, -143.162, 61.9603}
MATLAB
The slash and backslash operator can be used for solving this problem. Here some random data is generated.
n=100; k=10;
y = randn (1,n); % generate random vector y
X = randn (k,n); % generate random matrix X
b = y / X
b = 0.1457109 -0.0777564 -0.0712427 -0.0166193 0.0292955 -0.0079111 0.2265894 -0.0561589 -0.1752146 -0.2577663
In its transposed form yt = Xt * bt, the backslash operator can be used.
yt = y'; Xt = X';
bt = Xt \ yt
bt =
0.1457109
-0.0777564
-0.0712427
-0.0166193
0.0292955
-0.0079111
0.2265894
-0.0561589
-0.1752146
-0.2577663
Here is the example for estimating the polynomial fit
x = [1.47 1.50 1.52 1.55 1.57 1.60 1.63 1.65 1.68 1.70 1.73 1.75 1.78 1.80 1.83]
y = [52.21 53.12 54.48 55.84 57.20 58.57 59.93 61.29 63.11 64.47 66.28 68.10 69.92 72.19 74.46]
X = [x.^0;x.^1;x.^2];
b = y/X
128.813 -143.162 61.960
Instead of "/", the slash operator, one can also write :
b = y * X' * inv(X * X')
or
b = y * pinv(X)
Nim
# Using Wikipedia data sample.
import math
import arraymancer, sequtils
var
height = [1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65,
1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83].toTensor()
weight = [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29,
63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46].toTensor()
# Create Vandermonde matrix.
var a = stack(height.ones_like, height, height *. height, axis = 1)
echo toSeq(least_squares_solver(a, weight).solution.items)
- Output:
@[128.8128035784397, -143.1620228647656, 61.96032544247468]
PARI/GP
pseudoinv(M)=my(sz=matsize(M),T=conj(M))~;if(sz[1]<sz[2],T/(M*T),(T*M)^-1*T)
addhelp(pseudoinv, "pseudoinv(M): Moore pseudoinverse of the matrix M.");
y*pseudoinv(X)
Perl
use strict;
use warnings;
use Statistics::Regression;
my @y = (52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46);
my @x = ( 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83);
my @model = ('const', 'X', 'X**2');
my $reg = Statistics::Regression->new( '', [@model] );
$reg->include( $y[$_], [ 1.0, $x[$_], $x[$_]**2 ]) for 0..@y-1;
my @coeff = $reg->theta();
printf "%-6s %8.3f\n", $model[$_], $coeff[$_] for 0..@model-1;
- Output:
const 128.813 X -143.162 X**2 61.960
Phix
with javascript_semantics constant N = 15, M=3 sequence x = {1.47,1.50,1.52,1.55,1.57, 1.60,1.63,1.65,1.68,1.70, 1.73,1.75,1.78,1.80,1.83}, y = {52.21,53.12,54.48,55.84,57.20, 58.57,59.93,61.29,63.11,64.47, 66.28,68.10,69.92,72.19,74.46}, s = repeat(0,N), t = repeat(0,N), a = repeat(repeat(0,M+1),M) for k=1 to 2*M do for i=1 to N do s[k] += power(x[i],k-1) if k<=M then t[k] += y[i]*power(x[i],k-1) end if end for end for -- build linear system for row=1 to M do for col=1 to M do a[row,col] = s[row+col-1] end for a[row,M+1] = t[row] end for puts(1,"Linear system coefficents:\n") pp(a,{pp_Nest,1,pp_IntFmt,"%7.1f",pp_FltFmt,"%7.1f"}) for j=1 to M do integer i = j while a[i,j]=0 do i += 1 end while if i=M+1 then ?"SINGULAR MATRIX !" ?9/0 end if for k=1 to M+1 do {a[j,k],a[i,k]} = {a[i,k],a[j,k]} end for atom Y = 1/a[j,j] a[j] = sq_mul(a[j],Y) for k=1 to M do if k<>j then Y=-a[k,j] for m=1 to M+1 do a[k,m] += Y*a[j,m] end for end if end for end for puts(1,"Solutions:\n") ?columnize(a,M+1)[1]
- Output:
Linear system coefficents: {{ 15.0, 24.8, 41.1, 931.2}, { 24.8, 41.1, 68.4, 1548.2}, { 41.1, 68.4, 114.3, 2585.5}} Solutions: {128.8128036,-143.1620229,61.96032544}
PicoLisp
(scl 20)
# Matrix transposition
(de matTrans (Mat)
(apply mapcar Mat list) )
# Matrix multiplication
(de matMul (Mat1 Mat2)
(mapcar
'((Row)
(apply mapcar Mat2
'(@ (sum */ Row (rest) (1.0 .))) ) )
Mat1 ) )
# Matrix identity
(de matIdent (N)
(let L (need N (1.0) 0)
(mapcar '(() (copy (rot L))) L) ) )
# Reduced row echelon form
(de reducedRowEchelonForm (Mat)
(let (Lead 1 Cols (length (car Mat)))
(for (X Mat X (cdr X))
(NIL
(loop
(T (seek '((R) (n0 (get R 1 Lead))) X)
@ )
(T (> (inc 'Lead) Cols)) ) )
(xchg @ X)
(let D (get X 1 Lead)
(map
'((R) (set R (*/ (car R) 1.0 D)))
(car X) ) )
(for Y Mat
(unless (== Y (car X))
(let N (- (get Y Lead))
(map
'((Dst Src)
(inc Dst (*/ N (car Src) 1.0)) )
Y
(car X) ) ) ) )
(T (> (inc 'Lead) Cols)) ) )
Mat )
(de matInverse (Mat)
(let N (length Mat)
(unless (= N (length (car Mat)))
(quit "can't invert a non-square matrix") )
(mapc conc Mat (matIdent N))
(mapcar '((L) (tail N L)) (reducedRowEchelonForm Mat)) ) )
(de columnVector (Ary)
(mapcar cons Ary) )
(de regressionCoefficients (Mat X)
(let Xt (matTrans X)
(matMul (matMul (matInverse (matMul Xt X)) Xt) Mat) ) )
(setq
Y (columnVector (1.0 2.0 3.0 4.0 5.0))
X (columnVector (2.0 1.0 3.0 4.0 5.0)) )
(round (caar (regressionCoefficients Y X)) 17)
- Output:
-> "0.98181818181818182"
Python
Method with matrix operations
import numpy as np
height = [1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63,
1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83]
weight = [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93,
61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46]
X = np.mat(height**np.arange(3)[:, None])
y = np.mat(weight)
print(y * X.T * (X*X.T).I)
- Output:
[[ 128.81280359 -143.16202288 61.96032545]]
Using numpy lstsq function
import numpy as np
height = [1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63,
1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83]
weight = [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93,
61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46]
X = np.array(height)[:, None]**range(3)
y = weight
print(np.linalg.lstsq(X, y)[0])
- Output:
[ 128.81280358 -143.16202286 61.96032544]
R
R provides the lm function for linear regression.
x <- c(1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83)
y <- c(52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46)
lm( y ~ x + I(x^2))
- Output:
Call: lm(formula = y ~ x + I(x^2)) Coefficients: (Intercept) x I(x^2) 128.81 -143.16 61.96
A simple implementation of multiple regression in native R is useful to illustrate R's model description and linear algebra capabilities.
simpleMultipleReg <- function(formula) {
## parse and evaluate the model formula
mf <- model.frame(formula)
## create design matrix
X <- model.matrix(mf)
## create dependent variable
Y <- model.response(mf)
## solve
solve(t(X) %*% X) %*% t(X) %*% Y
}
simpleMultipleReg(y ~ x + I(x^2))
This produces the same coefficients as lm()
[,1] (Intercept) 128.81280 x -143.16202 I(x^2) 61.96033
A more efficient way to solve ,
than the method above, is to solve the linear system directly
and use the crossprod function:
solve(crossprod(X), crossprod(X, Y))
A numerically more stable way is to use the QR decomposition of the design matrix:
lm.impl <- function(formula) {
mf <- model.frame(formula)
X <- model.matrix(mf)
Y <- model.response(mf)
qr.coef(qr(X), Y)
}
lm(y ~ x + I(x^2))
# Call:
# lm(formula = y ~ x + I(x^2))
#
# Coefficients:
# (Intercept) x I(x^2)
# 128.81 -143.16 61.96
lm.impl(y ~ x + I(x^2))
# (Intercept) x I(x^2)
# 128.81280 -143.16202 61.96033
Racket
#lang racket
(require math)
(define T matrix-transpose)
(define (fit X y)
(matrix-solve (matrix* (T X) X) (matrix* (T X) y)))
Test:
(fit (matrix [[1 2]
[2 5]
[3 7]
[4 9]])
(matrix [[1]
[2]
[3]
[9]]))
{{out}}
(array #[#[9 1/3] #[-3 1/3]])
Raku
(formerly Perl 6)
We're going to solve the example on the Wikipedia article using Clifford, a geometric algebra module. Optimization for large vector space does not quite work yet, so it's going to take (a lof of) time and a fair amount of memory, but it should work.
Let's create four vectors containing our input data:
Then what we're looking for are three scalars , and such that:
To get for instance we can first make the and terms disappear:
Noting , we then get:
Note: a number of the formulae above are invisible to the majority of browsers, including Chrome, IE/Edge, Safari and Opera. They may (subject to the installation of necessary fronts) be visible to Firefox.
use Clifford;
my @height = <1.47 1.50 1.52 1.55 1.57 1.60 1.63 1.65 1.68 1.70 1.73 1.75 1.78 1.80 1.83>;
my @weight = <52.21 53.12 54.48 55.84 57.20 58.57 59.93 61.29 63.11 64.47 66.28 68.10 69.92 72.19 74.46>;
my $w = [+] @weight Z* @e;
my $h0 = [+] @e[^@weight];
my $h1 = [+] @height Z* @e;
my $h2 = [+] (@height X** 2) Z* @e;
my $I = $h0∧$h1∧$h2;
my $I2 = ($I·$I.reversion).Real;
say "α = ", ($w∧$h1∧$h2)·$I.reversion/$I2;
say "β = ", ($w∧$h2∧$h0)·$I.reversion/$I2;
say "γ = ", ($w∧$h0∧$h1)·$I.reversion/$I2;
- Output:
α = 128.81280357844 β = -143.1620228648 γ = 61.960325442
April 2016: This computation took over an hour with MoarVM, running in a VirtualBox linux system guest hosted by a windows laptop with a i7 intel processor.
March 2020: With various improvements to the language, 6.d releases of Raku now run the code 2x to 3x as fast, depending on the hardware used, but even so it is generous to describe it as 'slow'.
Ruby
Using the standard library Matrix class:
require 'matrix'
def regression_coefficients y, x
y = Matrix.column_vector y.map { |i| i.to_f }
x = Matrix.columns x.map { |xi| xi.map { |i| i.to_f }}
(x.t * x).inverse * x.t * y
end
Testing 2-dimension:
puts regression_coefficients([1, 2, 3, 4, 5], [ [2, 1, 3, 4, 5] ])
- Output:
Matrix[[0.981818181818182]]
Testing 3-dimension: Points(x,y,z): [1,1,3], [2,1,4] and [1,2,5]
puts regression_coefficients([3,4,5], [ [1,2,1], [1,1,2] ])
- Output:
Matrix[[0.9999999999999996], [2.0]]
SPSS
First, build a random dataset:
set rng=mc seed=17760704.
new file.
input program.
vector x(4).
loop #i=1 to 200.
loop #j=1 to 4.
compute x(#j)=rv.normal(0,1).
end loop.
end case.
end loop.
end file.
end input program.
compute y=1.5+0.8*x1-0.7*x2+1.1*x3-1.7*x4+rv.normal(0,1).
execute.
Now use the regression command:
regression /dependent=y
/method=enter x1 x2 x3 x4.
- Output:
Regression
Notes
|--------------------------------------------------------------------|---------------------------------------------------------------------------|
|Output Created |21-MAR-2020 23:17:33 |
|--------------------------------------------------------------------|---------------------------------------------------------------------------|
|Comments | |
|----------------------|---------------------------------------------|---------------------------------------------------------------------------|
|Input |Filter |<none> |
| |---------------------------------------------|---------------------------------------------------------------------------|
| |Weight |<none> |
| |---------------------------------------------|---------------------------------------------------------------------------|
| |Split File |<none> |
| |---------------------------------------------|---------------------------------------------------------------------------|
| |N of Rows in Working Data File |200 |
|----------------------|---------------------------------------------|---------------------------------------------------------------------------|
|Missing Value Handling|Definition of Missing |User-defined missing values are treated as missing. |
| |---------------------------------------------|---------------------------------------------------------------------------|
| |Cases Used |Statistics are based on cases with no missing values for any variable used.|
|--------------------------------------------------------------------|---------------------------------------------------------------------------|
|Syntax |regression /dependent=y /method=enter x1 x2 x3 x4. |
|----------------------|---------------------------------------------|---------------------------------------------------------------------------|
|Resources |Processor Time |00:00:00,00 |
| |---------------------------------------------|---------------------------------------------------------------------------|
| |Elapsed Time |00:00:00,00 |
| |---------------------------------------------|---------------------------------------------------------------------------|
| |Memory Required |4080 bytes |
| |---------------------------------------------|---------------------------------------------------------------------------|
| |Additional Memory Required for Residual Plots|0 bytes |
|------------------------------------------------------------------------------------------------------------------------------------------------|
Variables Entered/Removeda
|-----|-----------------|-----------------|------|
|Model|Variables Entered|Variables Removed|Method|
|-----|-----------------|-----------------|------|
|1 |x4, x3, x2, x1b |. |Enter |
|------------------------------------------------|
a Dependent Variable: y
b All requested variables entered.
Model Summary
|-----|-----|--------|-----------------|--------------------------|
|Model|R |R Square|Adjusted R Square|Std. Error of the Estimate|
|-----|-----|--------|-----------------|--------------------------|
|1 |,929a|,863 |,860 |,94928 |
|-----------------------------------------------------------------|
a Predictors: (Constant), x4, x3, x2, x1
ANOVAa
|----------------|--------------|---|-----------|-------|-----|
|Model |Sum of Squares|df |Mean Square|F |Sig. |
|-----|----------|--------------|---|-----------|-------|-----|
|1 |Regression|1106,659 |4 |276,665 |307,021|,000b|
| |----------|--------------|---|-----------|-------|-----|
| |Residual |175,720 |195|,901 | | |
| |----------|--------------|---|-----------|-------|-----|
| |Total |1282,379 |199| | | |
|-------------------------------------------------------------|
a Dependent Variable: y
b Predictors: (Constant), x4, x3, x2, x1
Coefficientsa
|----------------|--------------------------------------|-------------------------|-------|----|
|Model |Unstandardized Coefficients |Standardized Coefficients|t |Sig.|
| |---------------------------|----------|-------------------------| | |
| |B |Std. Error|Beta | | |
|-----|----------|---------------------------|----------|-------------------------|-------|----|
|1 |(Constant)|1,550 |,067 | |23,003 |,000|
| |----------|---------------------------|----------|-------------------------|-------|----|
| |x1 |,831 |,062 |,360 |13,457 |,000|
| |----------|---------------------------|----------|-------------------------|-------|----|
| |x2 |-,604 |,075 |-,215 |-8,051 |,000|
| |----------|---------------------------|----------|-------------------------|-------|----|
| |x3 |1,098 |,065 |,451 |16,989 |,000|
| |----------|---------------------------|----------|-------------------------|-------|----|
| |x4 |-1,770 |,073 |-,656 |-24,306|,000|
|----------------------------------------------------------------------------------------------|
a Dependent Variable: y
Stata
First, build a random dataset:
clear
set seed 17760704
set obs 200
forv i=1/4 {
gen x`i'=rnormal()
}
gen y=1.5+0.8*x1-0.7*x2+1.1*x3-1.7*x4+rnormal()
Now, use the regress command:
reg y x*
Output
The command shows the coefficients along with a bunch of useful information, such as R2, F statistic, standard errors of the coefficients...
Source | SS df MS Number of obs = 200 -------------+---------------------------------- F(4, 195) = 355.15 Model | 1343.81757 4 335.954392 Prob > F = 0.0000 Residual | 184.458622 195 .945941649 R-squared = 0.8793 -------------+---------------------------------- Adj R-squared = 0.8768 Total | 1528.27619 199 7.67977985 Root MSE = .9726 ------------------------------------------------------------------------------ y | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- x1 | .7525247 .0689559 10.91 0.000 .6165295 .8885198 x2 | -.7036303 .0697456 -10.09 0.000 -.8411828 -.5660778 x3 | 1.157477 .072189 16.03 0.000 1.015106 1.299849 x4 | -1.718201 .0621758 -27.63 0.000 -1.840824 -1.595577 _cons | 1.399131 .0697862 20.05 0.000 1.261499 1.536764 ------------------------------------------------------------------------------
The regress command also sets a number of ereturn values, which can be used by subsequent commands. The coefficients and their standard errors also have a special syntax:
. di _b[x1]
.75252466
. di _b[_cons]
1.3991314
. di _se[x1]
.06895593
. di _se[_cons]
.06978623
See regress postestimation for a list of commands that can be used after a regression.
Here we compute Akaike's AIC, the covariance matrix of the estimates, the predicted values and residuals:
. estat ic
Akaike's information criterion and Bayesian information criterion
-----------------------------------------------------------------------------
Model | Obs ll(null) ll(model) df AIC BIC
-------------+---------------------------------------------------------------
. | 200 -487.1455 -275.6985 5 561.397 577.8886
-----------------------------------------------------------------------------
Note: N=Obs used in calculating BIC; see [R] BIC note.
. estat vce
Covariance matrix of coefficients of regress model
e(V) | x1 x2 x3 x4 _cons
-------------+------------------------------------------------------------
x1 | .00475492
x2 | -.00040258 .00486445
x3 | -.00042516 .00017355 .00521125
x4 | -.00011915 -.0002568 .00054646 .00386583
_cons | .00030777 -.00031109 -.00023794 .00058926 .00487012
. predict yhat, xb
. predict r, r
Tcl
package require math::linearalgebra
namespace eval multipleRegression {
namespace export regressionCoefficients
namespace import ::math::linearalgebra::*
# Matrix inversion is defined in terms of Gaussian elimination
# Note that we assume (correctly) that we have a square matrix
proc invert {matrix} {
solveGauss $matrix [mkIdentity [lindex [shape $matrix] 0]]
}
# Implement the Ordinary Least Squares method
proc regressionCoefficients {y x} {
matmul [matmul [invert [matmul $x [transpose $x]]] $x] $y
}
}
namespace import multipleRegression::regressionCoefficients
Using an example from the Wikipedia page on the correlation of height and weight:
# Simple helper just for this example
proc map {n exp list} {
upvar 1 $n v
set r {}; foreach v $list {lappend r [uplevel 1 $exp]}; return $r
}
# Data from wikipedia
set x {
1.47 1.50 1.52 1.55 1.57 1.60 1.63 1.65 1.68 1.70 1.73 1.75 1.78 1.80 1.83
}
set y {
52.21 53.12 54.48 55.84 57.20 58.57 59.93 61.29 63.11 64.47 66.28 68.10
69.92 72.19 74.46
}
# Wikipedia states that fitting up to the square of x[i] is worth it
puts [regressionCoefficients $y [map n {map v {expr {$v**$n}} $x} {0 1 2}]]
- Output:
(a 3-vector of coefficients)
128.81280358170625 -143.16202286630732 61.96032544293041
TI-83 BASIC
{1.47,1.50,1.52,1.55,1.57,1.60,1.63,1.65,1.68,1.70,1.73,1.75,1.78,1.80,1.83}→L₁
{52.21,53.12,54.48,55.84,57.20,58.57,59.93,61.29,63.11,64.47,66.28,68.10,69.92,72.19,74.46}→L₂
QuadReg L₁,L₂
- Output:
y=ax²+bx+c a=61.96032544 b=–143.1620229 c=128.8128036
Ursala
This exact problem is solved by the DGELSD function from the Lapack library [1], which is callable in Ursala like this:
regression_coefficients = lapack..dgelsd
test program:
x =
<
<7.589183e+00,1.703609e+00,-4.477162e+00>,
<-4.597851e+00,9.434889e+00,-6.543450e+00>,
<4.588202e-01,-6.115153e+00,1.331191e+00>>
y = <1.745005e+00,-4.448092e+00,-4.160842e+00>
#cast %eL
example = regression_coefficients(x,y)
The matrix x needn't be square, and has one row for each data point. The length of y must equal the number of rows in x, and the number of coefficients returned will be the number of columns in x. It would be more typical in practice to initialize x by evaluating a set of basis functions chosen to model some empirical data, but the regression solver is indifferent to the model.
- Output:
<9.335612e-01,1.101323e+00,1.611777e+00>
A similar method can be used for regression with complex numbers by substituting zgelsd for dgelsd, above.
Visual Basic .NET
Module Module1
Sub Swap(Of T)(ByRef x As T, ByRef y As T)
Dim temp = x
x = y
y = temp
End Sub
Sub Require(condition As Boolean, message As String)
If condition Then
Return
End If
Throw New ArgumentException(message)
End Sub
Class Matrix
Private data As Double(,)
Private rowCount As Integer
Private colCount As Integer
Public Sub New(rows As Integer, cols As Integer)
Require(rows > 0, "Need at least one row")
rowCount = rows
Require(cols > 0, "Need at least one column")
colCount = cols
data = New Double(rows - 1, cols - 1) {}
End Sub
Public Sub New(source As Double(,))
Dim rows = source.GetLength(0)
Require(rows > 0, "Need at least one row")
rowCount = rows
Dim cols = source.GetLength(1)
Require(cols > 0, "Need at least one column")
colCount = cols
data = New Double(rows - 1, cols - 1) {}
For i = 1 To rows
For j = 1 To cols
data(i - 1, j - 1) = source(i - 1, j - 1)
Next
Next
End Sub
Default Public Property Index(i As Integer, j As Integer) As Double
Get
Return data(i, j)
End Get
Set(value As Double)
data(i, j) = value
End Set
End Property
Public Property Slice(i As Integer) As Double()
Get
Dim m(colCount - 1) As Double
For j = 1 To colCount
m(j - 1) = Index(i, j - 1)
Next
Return m
End Get
Set(value As Double())
Require(colCount = value.Length, "Slice must match the number of columns")
For j = 1 To colCount
Index(i, j - 1) = value(j - 1)
Next
End Set
End Property
Public Shared Operator *(m1 As Matrix, m2 As Matrix) As Matrix
Dim rc1 = m1.rowCount
Dim cc1 = m1.colCount
Dim rc2 = m2.rowCount
Dim cc2 = m2.colCount
Require(cc1 = rc2, "Cannot multiply if the first columns does not equal the second rows")
Dim result As New Matrix(rc1, cc2)
For i = 1 To rc1
For j = 1 To cc2
For k = 1 To rc2
result(i - 1, j - 1) += m1(i - 1, k - 1) * m2(k - 1, j - 1)
Next
Next
Next
Return result
End Operator
Public Function Transpose() As Matrix
Dim rc = rowCount
Dim cc = colCount
Dim trans As New Matrix(cc, rc)
For i = 1 To cc
For j = 1 To rc
trans(i - 1, j - 1) = Index(j - 1, i - 1)
Next
Next
Return trans
End Function
Public Sub ToReducedRowEchelonForm()
Dim lead = 0
Dim rc = rowCount
Dim cc = colCount
For r = 1 To rc
If cc <= lead Then
Return
End If
Dim i = r
While Index(i - 1, lead) = 0.0
i += 1
If rc = i Then
i = r
lead += 1
If cc = lead Then
Return
End If
End If
End While
Dim temp = Slice(i - 1)
Slice(i - 1) = Slice(r - 1)
Slice(r - 1) = temp
If Index(r - 1, lead) <> 0.0 Then
Dim div = Index(r - 1, lead)
For j = 1 To cc
Index(r - 1, j - 1) /= div
Next
End If
For k = 1 To rc
If k <> r Then
Dim mult = Index(k - 1, lead)
For j = 1 To cc
Index(k - 1, j - 1) -= Index(r - 1, j - 1) * mult
Next
End If
Next
lead += 1
Next
End Sub
Public Function Inverse() As Matrix
Require(rowCount = colCount, "Not a square matrix")
Dim len = rowCount
Dim aug As New Matrix(len, 2 * len)
For i = 1 To len
For j = 1 To len
aug(i - 1, j - 1) = Index(i - 1, j - 1)
Next
REM augment identity matrix to right
aug(i - 1, i + len - 1) = 1.0
Next
aug.ToReducedRowEchelonForm()
Dim inv As New Matrix(len, len)
For i = 1 To len
For j = len + 1 To 2 * len
inv(i - 1, j - len - 1) = aug(i - 1, j - 1)
Next
Next
Return inv
End Function
End Class
Function ConvertArray(source As Double()) As Double(,)
Dim dest(0, source.Length - 1) As Double
For i = 1 To source.Length
dest(0, i - 1) = source(i - 1)
Next
Return dest
End Function
Function MultipleRegression(y As Double(), x As Matrix) As Double()
Dim tm As New Matrix(ConvertArray(y))
Dim cy = tm.Transpose
Dim cx = x.Transpose
Return ((x * cx).Inverse * x * cy).Transpose.Slice(0)
End Function
Sub Print(v As Double())
Dim it = v.GetEnumerator()
Console.Write("[")
If it.MoveNext() Then
Console.Write(it.Current)
End If
While it.MoveNext
Console.Write(", ")
Console.Write(it.Current)
End While
Console.Write("]")
End Sub
Sub Main()
Dim y() = {1.0, 2.0, 3.0, 4.0, 5.0}
Dim x As New Matrix({{2.0, 1.0, 3.0, 4.0, 5.0}})
Dim v = MultipleRegression(y, x)
Print(v)
Console.WriteLine()
y = {3.0, 4.0, 5.0}
x = New Matrix({
{1.0, 2.0, 1.0},
{1.0, 1.0, 2.0}
})
v = MultipleRegression(y, x)
Print(v)
Console.WriteLine()
y = {52.21, 53.12, 54.48, 55.84, 57.2, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.1, 69.92, 72.19, 74.46}
Dim a = {1.47, 1.5, 1.52, 1.55, 1.57, 1.6, 1.63, 1.65, 1.68, 1.7, 1.73, 1.75, 1.78, 1.8, 1.83}
Dim xs(2, a.Length - 1) As Double
For i = 1 To a.Length
xs(0, i - 1) = 1.0
xs(1, i - 1) = a(i - 1)
xs(2, i - 1) = a(i - 1) * a(i - 1)
Next
x = New Matrix(xs)
v = MultipleRegression(y, x)
Print(v)
Console.WriteLine()
End Sub
End Module
- Output:
[0.981818181818182] [1, 2] [128.812803579828, -143.162022865304, 61.9603254429854]
Wren
import "./matrix" for Matrix
var multipleRegression = Fn.new { |y, x|
var cy = y.transpose
var cx = x.transpose
return ((x * cx).inverse * x * cy).transpose[0]
}
var ys = [
Matrix.new([ [1, 2, 3, 4, 5] ]),
Matrix.new([ [3, 4, 5] ]),
Matrix.new([ [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29,
63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46] ])
]
var a = [1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83]
var xs = [
Matrix.new([ [2, 1, 3, 4, 5] ]),
Matrix.new([ [1, 2, 1], [1, 1, 2] ]),
Matrix.new([ List.filled(a.count, 1), a, a.map { |e| e * e }.toList ])
]
for (i in 0...ys.count) {
var v = multipleRegression.call(ys[i], xs[i])
System.print(v)
System.print()
}
- Output:
[0.98181818181818] [1, 2] [128.81280357983, -143.1620228653, 61.960325442985]
zkl
Using the GNU Scientific Library:
var [const] GSL=Import("zklGSL"); // libGSL (GNU Scientific Library)
height:=GSL.VectorFromData(1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63,
1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83);
weight:=GSL.VectorFromData(52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93,
61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46);
v:=GSL.polyFit(height,weight,2);
v.format().println();
GSL.Helpers.polyString(v).println();
GSL.Helpers.polyEval(v,height).format().println();
- Output:
128.81,-143.16,61.96 128.813 - 143.162x + 61.9603x^2 52.25,53.48,54.36,55.77,56.77,58.37,60.08,61.28,63.18,64.50,66.58,68.03,70.30,71.87,74.33
Or, using Lists:
// Solve a linear system AX=B where A is symmetric and positive definite, so it can be Cholesky decomposed.
fcn linsys(A,B){
n,m:=A.len(),B[1].len(); // A.rows,B.cols
y:=n.pump(List.createLong(n).write,0.0); // writable vector of n zeros
X:=make_array(n,m,0.0);
L:=cholesky(A); // A=LL'
foreach col in (m){
foreach k in (n){ // Forward substitution: y = L\B
y[k]=( B[k][col] - k.reduce('wrap(s,j){ s + L[k][j]*y[j] },0.0) )
/L[k][k];
}
foreach k in ([n-1..0,-1]){ // Back substitution. x=L'\y
X[k][col]=
( y[k] - (k+1).reduce(n-k-1,'wrap(s,j){ s + L[j][k]*X[j][col] },0.0) )
/L[k][k];
}
}
X
}
fcn cholesky(mat){ // Cholesky decomposition task
rows:=mat.len();
r:=(0).pump(rows,List().write, (0).pump(rows,List,0.0).copy); // matrix of zeros
foreach i,j in (rows,i+1){
s:=(0).reduce(j,'wrap(s,k){ s + r[i][k]*r[j][k] },0.0);
r[i][j]=( if(i==j)(mat[i][i] - s).sqrt()
else 1.0/r[j][j]*(mat[i][j] - s) );
}
r
}
// Solve a linear least squares problem. Ax=b, with A being mxn, with m>n.
// Solves the linear system A'Ax=A'b.
fcn lsqr(A,b){
at:=transpose(A);
linsys(matMult(at,A), matMult(at,b));
}
// Least square fit of a polynomial of order n the x-y-curve.
fcn polyfit(x,y,n){
n+=1;
m:=x[0].len(); // columns
A:=make_array(m,n,0.0);
foreach i,j in (m,n){ A[i][j]=x[0][i].pow(j); }
lsqr(A, transpose(y));
}
fcn make_array(n,m,v){ (m).pump(List.createLong(m).write,v)*n }
fcn matMult(a,b){
n,m,p:=a[0].len(),a.len(),b[0].len();
ans:=make_array(m,p,0.0);
foreach i,j,k in (m,p,n){ ans[i][j]+=a[i][k]*b[k][j]; }
ans
}
fcn transpose(M){
if(M.len()==1) M[0].pump(List,List.create); // 1 row --> n columns
else M[0].zip(M.xplode(1));
}
height:=T(T(1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63,
1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83));
weight:=T(T(52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93,
61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46));
polyfit(height,weight,2).flatten().println();
- Output:
L(128.813,-143.162,61.9603)
- Programming Tasks
- Probability and statistics
- Ada
- ALGOL 68
- BASIC
- BASIC256
- Chipmunk Basic
- QBasic
- QB64
- True BASIC
- Yabasic
- BBC BASIC
- C
- C++
- C sharp
- Math.Net
- Common Lisp
- D
- Emacs Lisp
- Calc
- ERRE
- Fortran
- SLATEC
- FreeBASIC
- Go
- Haskell
- Hy
- J
- Java
- JavaScript
- Jq
- Julia
- Kotlin
- Maple
- Mathematica
- Wolfram Language
- MATLAB
- Nim
- Arraymancer
- PARI/GP
- Perl
- Phix
- PicoLisp
- Python
- NumPy
- R
- Racket
- Raku
- Examples needing attention
- Ruby
- SPSS
- Stata
- Tcl
- Tcllib
- TI-83 BASIC
- Ursala
- Visual Basic .NET
- Wren
- Wren-matrix
- Zkl