Multiple regression

From Rosetta Code
Jump to: navigation, search
Task
Multiple regression
You are encouraged to solve this task according to the task description, using any language you may know.

Given a set of data vectors in the following format:

y = \{ y_1, y_2, ..., y_n \}\,

X_i = \{ x_{i1}, x_{i2}, ..., x_{in} \}, i \in 1..k\,

Compute the vector β = {β12,...,βk} using ordinary least squares regression using the following equation:

y_j = \Sigma_i \beta_i \cdot x_{ij} ,    j \in 1..n

You can assume y is given to you as a vector (a one-dimensional array), and X is given to you as a two-dimensional array (i.e. matrix).


Contents

[edit] Ada

Extension of Reduced row echelon form#Ada:

matrices.ads:

generic
type Element_Type is private;
Zero : Element_Type;
One : Element_Type;
with function "+" (Left, Right : Element_Type) return Element_Type is <>;
with function "-" (Left, Right : Element_Type) return Element_Type is <>;
with function "*" (Left, Right : Element_Type) return Element_Type is <>;
with function "/" (Left, Right : Element_Type) return Element_Type is <>;
package Matrices is
type Vector is array (Positive range <>) of Element_Type;
type Matrix is
array (Positive range <>, Positive range <>) of Element_Type;
 
function "*" (Left, Right : Matrix) return Matrix;
function Invert (Source : Matrix) return Matrix;
function Reduced_Row_Echelon_Form (Source : Matrix) return Matrix;
function Regression_Coefficients
(Source  : Vector;
Regressors : Matrix)
return Vector;
function To_Column_Vector
(Source : Matrix;
Row  : Positive := 1)
return Vector;
function To_Matrix
(Source  : Vector;
Column_Vector : Boolean := True)
return Matrix;
function To_Row_Vector
(Source : Matrix;
Column : Positive := 1)
return Vector;
function Transpose (Source : Matrix) return Matrix;
 
Size_Mismatch  : exception;
Not_Square_Matrix : exception;
Not_Invertible  : exception;
end Matrices;

matrices.adb:

package body Matrices is
function "*" (Left, Right : Matrix) return Matrix is
Result : Matrix (Left'Range (1), Right'Range (2)) :=
(others => (others => Zero));
begin
if Left'Length (2) /= Right'Length (1) then
raise Size_Mismatch;
end if;
for I in Result'Range (1) loop
for K in Result'Range (2) loop
for J in Left'Range (2) loop
Result (I, K) := Result (I, K) + Left (I, J) * Right (J, K);
end loop;
end loop;
end loop;
return Result;
end "*";
 
function Invert (Source : Matrix) return Matrix is
Expanded : Matrix (Source'Range (1),
Source'First (2) .. Source'Last (2) * 2);
Result  : Matrix (Source'Range (1), Source'Range (2));
begin
-- Matrix has to be square.
if Source'Length (1) /= Source'Length (2) then
raise Not_Square_Matrix;
end if;
-- Copy Source into Expanded matrix and attach identity matrix to right
for Row in Source'Range (1) loop
for Col in Source'Range (2) loop
Expanded (Row, Col)  := Source (Row, Col);
Expanded (Row, Source'Last (2) + Col)  := Zero;
end loop;
Expanded (Row, Source'Last (2) + Row)  := One;
end loop;
Expanded := Reduced_Row_Echelon_Form (Source => Expanded);
-- Copy right side to Result (= inverted Source)
for Row in Result'Range (1) loop
for Col in Result'Range (2) loop
Result (Row, Col) := Expanded (Row, Source'Last (2) + Col);
end loop;
end loop;
return Result;
end Invert;
 
function Reduced_Row_Echelon_Form (Source : Matrix) return Matrix is
procedure Divide_Row
(From  : in out Matrix;
Row  : Positive;
Divisor : Element_Type)
is
begin
for Col in From'Range (2) loop
From (Row, Col) := From (Row, Col) / Divisor;
end loop;
end Divide_Row;
 
procedure Subtract_Rows
(From  : in out Matrix;
Subtrahend, Minuend : Positive;
Factor  : Element_Type)
is
begin
for Col in From'Range (2) loop
From (Minuend, Col) := From (Minuend, Col) -
From (Subtrahend, Col) * Factor;
end loop;
end Subtract_Rows;
 
procedure Swap_Rows (From : in out Matrix; First, Second : Positive) is
Temporary : Element_Type;
begin
for Col in From'Range (2) loop
Temporary  := From (First, Col);
From (First, Col)  := From (Second, Col);
From (Second, Col) := Temporary;
end loop;
end Swap_Rows;
 
Result : Matrix  := Source;
Lead  : Positive := Result'First (2);
I  : Positive;
begin
Rows : for Row in Result'Range (1) loop
exit Rows when Lead > Result'Last (2);
I := Row;
while Result (I, Lead) = Zero loop
I := I + 1;
if I = Result'Last (1) then
I  := Row;
Lead := Lead + 1;
exit Rows when Lead = Result'Last (2);
end if;
end loop;
if I /= Row then
Swap_Rows (From => Result, First => I, Second => Row);
end if;
Divide_Row
(From => Result,
Row => Row,
Divisor => Result (Row, Lead));
for Other_Row in Result'Range (1) loop
if Other_Row /= Row then
Subtract_Rows
(From => Result,
Subtrahend => Row,
Minuend => Other_Row,
Factor => Result (Other_Row, Lead));
end if;
end loop;
Lead := Lead + 1;
end loop Rows;
return Result;
end Reduced_Row_Echelon_Form;
 
function Regression_Coefficients
(Source  : Vector;
Regressors : Matrix)
return Vector
is
Result : Matrix (Regressors'Range (2), 1 .. 1);
begin
if Source'Length /= Regressors'Length (1) then
raise Size_Mismatch;
end if;
declare
Regressors_T : constant Matrix := Transpose (Regressors);
begin
Result := Invert (Regressors_T * Regressors) *
Regressors_T *
To_Matrix (Source);
end;
return To_Row_Vector (Source => Result);
end Regression_Coefficients;
 
function To_Column_Vector
(Source : Matrix;
Row  : Positive := 1)
return Vector
is
Result : Vector (Source'Range (2));
begin
for Column in Result'Range loop
Result (Column) := Source (Row, Column);
end loop;
return Result;
end To_Column_Vector;
 
function To_Matrix
(Source  : Vector;
Column_Vector : Boolean := True)
return Matrix
is
Result : Matrix (1 .. 1, Source'Range);
begin
for Column in Source'Range loop
Result (1, Column) := Source (Column);
end loop;
if Column_Vector then
return Transpose (Result);
else
return Result;
end if;
end To_Matrix;
 
function To_Row_Vector
(Source : Matrix;
Column : Positive := 1)
return Vector
is
Result : Vector (Source'Range (1));
begin
for Row in Result'Range loop
Result (Row) := Source (Row, Column);
end loop;
return Result;
end To_Row_Vector;
 
function Transpose (Source : Matrix) return Matrix is
Result : Matrix (Source'Range (2), Source'Range (1));
begin
for Row in Result'Range (1) loop
for Column in Result'Range (2) loop
Result (Row, Column) := Source (Column, Row);
end loop;
end loop;
return Result;
end Transpose;
end Matrices;

Example multiple_regression.adb:

with Ada.Text_IO;
with Matrices;
procedure Multiple_Regression is
package Float_Matrices is new Matrices (
Element_Type => Float,
Zero => 0.0,
One => 1.0);
subtype Vector is Float_Matrices.Vector;
subtype Matrix is Float_Matrices.Matrix;
use type Matrix;
 
procedure Output_Matrix (X : Matrix) is
begin
for Row in X'Range (1) loop
for Col in X'Range (2) loop
Ada.Text_IO.Put (Float'Image (X (Row, Col)) & ' ');
end loop;
Ada.Text_IO.New_Line;
end loop;
end Output_Matrix;
 
-- example from Ruby solution
V : constant Vector := (1.0, 2.0, 3.0, 4.0, 5.0);
M : constant Matrix :=
((1 => 2.0),
(1 => 1.0),
(1 => 3.0),
(1 => 4.0),
(1 => 5.0));
C : constant Vector :=
Float_Matrices.Regression_Coefficients (Source => V, Regressors => M);
-- Wikipedia example
Weight  : constant Vector (1 .. 15) :=
(52.21, 53.12, 54.48, 55.84, 57.20,
58.57, 59.93, 61.29, 63.11, 64.47,
66.28, 68.10, 69.92, 72.19, 74.46);
Height  : Vector (1 .. 15)  :=
(1.47, 1.50, 1.52, 1.55, 1.57,
1.60, 1.63, 1.65, 1.68, 1.70,
1.73, 1.75, 1.78, 1.80, 1.83);
Height_Matrix : Matrix (1 .. 15, 1 .. 3);
begin
Ada.Text_IO.Put_Line ("Example from Ruby solution:");
Ada.Text_IO.Put_Line ("V:");
Output_Matrix (Float_Matrices.To_Matrix (V));
Ada.Text_IO.Put_Line ("M:");
Output_Matrix (M);
Ada.Text_IO.Put_Line ("C:");
Output_Matrix (Float_Matrices.To_Matrix (C));
Ada.Text_IO.New_Line;
Ada.Text_IO.Put_Line ("Example from Wikipedia:");
for I in Height'Range loop
Height_Matrix (I, 1) := 1.0;
Height_Matrix (I, 2) := Height (I);
Height_Matrix (I, 3) := Height (I) ** 2;
end loop;
Ada.Text_IO.Put_Line ("Matrix:");
Output_Matrix (Height_Matrix);
declare
Coefficients : constant Vector :=
Float_Matrices.Regression_Coefficients
(Source => Weight,
Regressors => Height_Matrix);
begin
Ada.Text_IO.Put_Line ("Coefficients:");
Output_Matrix (Float_Matrices.To_Matrix (Coefficients));
end;
end Multiple_Regression;

Output:

Example from Ruby solution:
V:
 1.00000E+00
 2.00000E+00
 3.00000E+00
 4.00000E+00
 5.00000E+00
M:
 2.00000E+00
 1.00000E+00
 3.00000E+00
 4.00000E+00
 5.00000E+00
C:
 9.81818E-01

Example from Wikipedia:
Matrix:
 1.00000E+00  1.47000E+00  2.16090E+00
 1.00000E+00  1.50000E+00  2.25000E+00
 1.00000E+00  1.52000E+00  2.31040E+00
 1.00000E+00  1.55000E+00  2.40250E+00
 1.00000E+00  1.57000E+00  2.46490E+00
 1.00000E+00  1.60000E+00  2.56000E+00
 1.00000E+00  1.63000E+00  2.65690E+00
 1.00000E+00  1.65000E+00  2.72250E+00
 1.00000E+00  1.68000E+00  2.82240E+00
 1.00000E+00  1.70000E+00  2.89000E+00
 1.00000E+00  1.73000E+00  2.99290E+00
 1.00000E+00  1.75000E+00  3.06250E+00
 1.00000E+00  1.78000E+00  3.16840E+00
 1.00000E+00  1.80000E+00  3.24000E+00
 1.00000E+00  1.83000E+00  3.34890E+00
Coefficients:
 1.35403E+02
-1.51161E+02
 6.43514E+01

[edit] BBC BASIC

      *FLOAT 64
INSTALL @lib$+"ARRAYLIB"
 
DIM y(14), x(2,14), c(2)
y() = 52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, \
\ 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46
x() = 1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, \
\ 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83
 
FOR row% = DIM(x(),1) TO 0 STEP -1
FOR col% = 0 TO DIM(x(),2)
x(row%,col%) = x(0,col%) ^ row%
NEXT
NEXT row%
 
PROCmultipleregression(y(), x(), c())
FOR i% = 0 TO DIM(c(),1) : PRINT c(i%) " "; : NEXT
PRINT
END
 
DEF PROCmultipleregression(y(), x(), c())
LOCAL m(), t()
DIM m(DIM(x(),1), DIM(x(),1)), t(DIM(x(),2),DIM(x(),1))
PROC_transpose(x(), t())
m() = x().t()
PROC_invert(m())
t() = t().m()
c() = y().t()
ENDPROC

Output:

128.812804  -143.162023  61.9603254

[edit] C

Using GNU gsl and c99, with the WP data
#include <stdio.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_multifit.h>
 
double w[] = { 52.21, 53.12, 54.48, 55.84, 57.20,
58.57, 59.93, 61.29, 63.11, 64.47,
66.28, 68.10, 69.92, 72.19, 74.46 };
double h[] = { 1.47, 1.50, 1.52, 1.55, 1.57,
1.60, 1.63, 1.65, 1.68, 1.70,
1.73, 1.75, 1.78, 1.80, 1.83 };
 
int main()
{
int n = sizeof(h)/sizeof(double);
gsl_matrix *X = gsl_matrix_calloc(n, 3);
gsl_vector *Y = gsl_vector_alloc(n);
gsl_vector *beta = gsl_vector_alloc(3);
 
for (int i = 0; i < n; i++) {
gsl_vector_set(Y, i, w[i]);
 
gsl_matrix_set(X, i, 0, 1);
gsl_matrix_set(X, i, 1, h[i]);
gsl_matrix_set(X, i, 2, h[i] * h[i]);
}
 
double chisq;
gsl_matrix *cov = gsl_matrix_alloc(3, 3);
gsl_multifit_linear_workspace * wspc = gsl_multifit_linear_alloc(n, 3);
gsl_multifit_linear(X, Y, beta, cov, &chisq, wspc);
 
printf("Beta:");
for (int i = 0; i < 3; i++)
printf("  %g", gsl_vector_get(beta, i));
printf("\n");
 
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(Y);
gsl_vector_free(beta);
gsl_multifit_linear_free(wspc);
 
}

[edit] Common Lisp

Uses the routine (chol A) from Cholesky decomposition, (mmul A B) from Matrix multiplication, (mtp A) from Matrix transposition.

 
;; Solve a linear system AX=B where A is symmetric and positive definite, so it can be Cholesky decomposed.
(defun linsys (A B)
(let* ((n (car (array-dimensions A)))
(m (cadr (array-dimensions B)))
(y (make-array n :element-type 'long-float :initial-element 0.0L0))
(X (make-array `(,n ,m) :element-type 'long-float :initial-element 0.0L0))
(L (chol A))) ; A=LL'
 
(loop for col from 0 to (- m 1) do
;; Forward substitution: y = L\B
(loop for k from 0 to (- n 1)
do (setf (aref y k)
(/ (- (aref B k col)
(loop for j from 0 to (- k 1)
sum (* (aref L k j)
(aref y j))))
(aref L k k))))
 
;; Back substitution. x=L'\y
(loop for k from (- n 1) downto 0
do (setf (aref X k col)
(/ (- (aref y k)
(loop for j from (+ k 1) to (- n 1)
sum (* (aref L j k)
(aref X j col))))
(aref L k k)))))
X))
 
;; Solve a linear least squares problem. Ax=b, with A being mxn, with m>n.
;; Solves the linear system A'Ax=A'b.
(defun lsqr (A b)
(linsys (mmul (mtp A) A)
(mmul (mtp A) b)))
 

To show an example of multiple regression, (polyfit x y n) from Polynomial regression, which itself uses (linsys A B) and (lsqr A b), will be used to fit a second degree order polynomial to data.

(let ((x (make-array '(1 11) :initial-contents '((0 1 2 3 4 5 6 7 8 9 10))))
(y (make-array '(1 11) :initial-contents '((1 6 17 34 57 86 121 162 209 262 321)))))
(polyfit x y 2))
 
#2A((0.9999999999999759d0) (2.000000000000005d0) (3.0d0))

[edit] Go

The example on WP happens to be a polynomial regression example, and so code from the Polynomial regression task can be reused here. The only difference here is that givens x and y are computed in a separate function as a task prerequisite.

package main
 
import (
"code.google.com/p/gomatrix/matrix"
"fmt"
)
 
func givens() (x, y *matrix.DenseMatrix) {
height := []float64{1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63,
1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83}
weight := []float64{52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93,
61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46}
m := len(height)
n := 3
y = matrix.MakeDenseMatrix(weight, m, 1)
x = matrix.Zeros(m, n)
for i := 0; i < m; i++ {
ip := float64(1)
for j := 0; j < n; j++ {
x.Set(i, j, ip)
ip *= height[i]
}
}
return
}
 
func main() {
x, y := givens()
n := x.Cols()
q, r := x.QR()
qty, err := q.Transpose().Times(y)
if err != nil {
fmt.Println(err)
return
}
c := make([]float64, n)
for i := n - 1; i >= 0; i-- {
c[i] = qty.Get(i, 0)
for j := i + 1; j < n; j++ {
c[i] -= c[j] * r.Get(i, j)
}
c[i] /= r.Get(i, i)
}
fmt.Println(c)
}

Output:

[128.8128035784373 -143.16202286476116 61.960325442472865]

[edit] Haskell

Using package hmatrix from HackageDB

import Numeric.LinearAlgebra
import Numeric.LinearAlgebra.LAPACK
 
m :: Matrix Double
m = (3><3)
[7.589183,1.703609,-4.477162,
-4.597851,9.434889,-6.543450,
0.4588202,-6.115153,1.331191]
 
v :: Matrix Double
v = (3><1)
[1.745005,-4.448092,-4.160842]

Using lapack::dgels

*Main> linearSolveLSR m v
(3><1)
[ 0.9335611922087276
, 1.101323491272865
, 1.6117769115824 ]

Or

*Main> inv m `multiply`  v
(3><1)
[ 0.9335611922087278
, 1.101323491272865
, 1.6117769115824006 ]

[edit] Hy

(import
[numpy [ones column-stack]]
[numpy.random [randn]]
[numpy.linalg [lstsq]])
 
(setv n 1000)
(setv x1 (randn n))
(setv x2 (randn n))
(setv y (+ 3 (* 1 x1) (* -2 x2) (* .25 x1 x2) (randn n)))
 
(print (first (lstsq
(column-stack (, (ones n) x1 x2 (* x1 x2)))
y)))

[edit] J

   NB. Wikipedia data
x=: 1.47 1.50 1.52 1.55 1.57 1.60 1.63 1.65 1.68 1.70 1.73 1.75 1.78 1.80 1.83
y=: 52.21 53.12 54.48 55.84 57.20 58.57 59.93 61.29 63.11 64.47 66.28 68.10 69.92 72.19 74.46
 
y %. x ^/ i.3 NB. calculate coefficients b1, b2 and b3 for 2nd degree polynomial
128.813 _143.162 61.9603

Breaking it down:

   X=: x ^/ i.3                  NB. form Design matrix
X=: (x^0) ,. (x^1) ,. (x^2) NB. equivalent of previous line
4{.X NB. show first 4 rows of X
1 1.47 2.1609
1 1.5 2.25
1 1.52 2.3104
1 1.55 2.4025
 
NB. Where y is a set of observations and X is the design matrix
NB. y %. X does matrix division and gives the regression coefficients
y %. X
128.813 _143.162 61.9603

In other words beta=: y %. X is the equivalent of:
 \hat\beta = (X'X)^{-1}X'y

To confirm:

   mp=: +/ .*                    NB. matrix product
NB. %.X is matrix inverse of X
NB. |:X is transpose of X
 
(%.(|:X) mp X) mp (|:X) mp y
128.814 _143.163 61.9606
xpy=: mp~ |: NB. Or factoring out "X prime y" (monadically "X prime X")
X (%.@:xpy@[ mp xpy) y
128.814 _143.163 61.9606
 

LAPACK routines are also available via the Addon math/lapack.

[edit] Julia

Translation of: MATLAB

As in Matlab, the backslash or slash operator (depending on the matrix ordering) can be used for solving this problem, for example:

x = [1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83]
y = [52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46]
X = [x.^0 x.^1 x.^2];
b = X \ y

Output:

3-element Array{Float64,1}:
  128.813 
 -143.162 
   61.9603

[edit] JavaScript

Works with: SpiderMonkey
for the print() and Array.map() functions.
Translation of: Ruby

Extends the Matrix class from Matrix Transpose#JavaScript, Matrix multiplication#JavaScript, Reduced row echelon form#JavaScript. Uses the IdentityMatrix from Matrix exponentiation operator#JavaScript

// modifies the matrix "in place"
Matrix.prototype.inverse = function() {
if (this.height != this.width) {
throw "can't invert a non-square matrix";
}
 
var I = new IdentityMatrix(this.height);
for (var i = 0; i < this.height; i++)
this.mtx[i] = this.mtx[i].concat(I.mtx[i])
this.width *= 2;
 
this.toReducedRowEchelonForm();
 
for (var i = 0; i < this.height; i++)
this.mtx[i].splice(0, this.height);
this.width /= 2;
 
return this;
}
 
function ColumnVector(ary) {
return new Matrix(ary.map(function(v) {return [v]}))
}
ColumnVector.prototype = Matrix.prototype
 
Matrix.prototype.regression_coefficients = function(x) {
var x_t = x.transpose();
return x_t.mult(x).inverse().mult(x_t).mult(this);
}
 
// the Ruby example
var y = new ColumnVector([1,2,3,4,5]);
var x = new ColumnVector([2,1,3,4,5]);
print(y.regression_coefficients(x));
print();
 
// the Tcl example
y = new ColumnVector([
52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29,
63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46
]);
x = new Matrix(
[1.47,1.50,1.52,1.55,1.57,1.60,1.63,1.65,1.68,1.70,1.73,1.75,1.78,1.80,1.83].map(
function(v) {return [Math.pow(v,0), Math.pow(v,1), Math.pow(v,2)]}
)
);
print(y.regression_coefficients(x));

output

0.9818181818181818

128.8128035798277
-143.1620228653037
61.960325442985436

[edit] Mathematica

x = {1.47, 1.50 , 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83};
y = {52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46};
X = {x^0, x^1, x^2};
b = y.PseudoInverse[X]
 
->{128.813, -143.162, 61.9603}

[edit] MATLAB

The slash and backslash operator can be used for solving this problem. Here some random data is generated.

  n=100; k=10; 
y = randn (1,n); % generate random vector y
X = randn (k,n); % generate random matrix X
b = y / X
b = 0.1457109 -0.0777564 -0.0712427 -0.0166193 0.0292955 -0.0079111 0.2265894 -0.0561589 -0.1752146 -0.2577663

In its transposed form yt = Xt * bt, the backslash operator can be used.

  yt = y'; Xt = X';
bt = Xt \ yt
bt =
0.1457109
-0.0777564
-0.0712427
-0.0166193
0.0292955
-0.0079111
0.2265894
-0.0561589
-0.1752146
-0.2577663

Here is the example for estimating the polynomial fit

  x = [1.47 1.50 1.52 1.55 1.57 1.60 1.63 1.65 1.68 1.70 1.73 1.75 1.78 1.80 1.83]
y = [52.21 53.12 54.48 55.84 57.20 58.57 59.93 61.29 63.11 64.47 66.28 68.10 69.92 72.19 74.46]
X = [x.^0;x.^1;x.^2];
b = y/X
 
128.813 -143.162 61.960

Instead of "/", the slash operator, one can also write :

 b = y * X' * inv(X * X') 

or

 b = y * pinv(X) 

[edit] PARI/GP

pseudoinv(M)=my(sz=matsize(M),T=conj(M))~;if(sz[1]<sz[2],T/(M*T),(T*M)^-1*T)
addhelp(pseudoinv, "pseudoinv(M): Moore pseudoinverse of the matrix M.");
 
y*pseudoinv(X)

[edit] PicoLisp

(scl 20)
 
# Matrix transposition
(de matTrans (Mat)
(apply mapcar Mat list) )
 
# Matrix multiplication
(de matMul (Mat1 Mat2)
(mapcar
'((Row)
(apply mapcar Mat2
'(@ (sum */ Row (rest) (1.0 .))) ) )
Mat1 ) )
 
# Matrix identity
(de matIdent (N)
(let L (need N (1.0) 0)
(mapcar '(() (copy (rot L))) L) ) )
 
# Reduced row echelon form
(de reducedRowEchelonForm (Mat)
(let (Lead 1 Cols (length (car Mat)))
(for (X Mat X (cdr X))
(NIL
(loop
(T (seek '((R) (n0 (get R 1 Lead))) X)
@ )
(T (> (inc 'Lead) Cols)) ) )
(xchg @ X)
(let D (get X 1 Lead)
(map
'((R) (set R (*/ (car R) 1.0 D)))
(car X) ) )
(for Y Mat
(unless (== Y (car X))
(let N (- (get Y Lead))
(map
'((Dst Src)
(inc Dst (*/ N (car Src) 1.0)) )
Y
(car X) ) ) ) )
(T (> (inc 'Lead) Cols)) ) )
Mat )
Translation of: JavaScript
(de matInverse (Mat)
(let N (length Mat)
(unless (= N (length (car Mat)))
(quit "can't invert a non-square matrix") )
(mapc conc Mat (matIdent N))
(mapcar '((L) (tail N L)) (reducedRowEchelonForm Mat)) ) )
 
(de columnVector (Ary)
(mapcar cons Ary) )
 
(de regressionCoefficients (Mat X)
(let Xt (matTrans X)
(matMul (matMul (matInverse (matMul Xt X)) Xt) Mat) ) )
 
(setq
Y (columnVector (1.0 2.0 3.0 4.0 5.0))
X (columnVector (2.0 1.0 3.0 4.0 5.0)) )
 
(round (caar (regressionCoefficients Y X)) 17)

Output:

-> "0.98181818181818182"

[edit] Python

With Numpy the solution is quite similar to Matlab. [1]

 
import numpy as np
from numpy.random import random
 
n=100; k=10
y = np.mat(random((1,n)))
X = np.mat(random((k,n)))
 
b= y * X.T * np.linalg.inv(X*X.T)
print(b)
 

Output:

[[ 0.19733805  0.06266746  0.04091507  0.03584238  0.22663745  0.22091783
   0.08189627 -0.30304339  0.17634972  0.09765649]]

[edit] R

R provides the lm() function for linear regression.

## Wikipedia Data
x <- c(1.47, 1.50, 1.52, 1.55, 1.57, 1.60, 1.63, 1.65, 1.68, 1.70, 1.73, 1.75, 1.78, 1.80, 1.83)
}
y <- c(52.21, 53.12, 54.48, 55.84, 57.20, 58.57, 59.93, 61.29, 63.11, 64.47, 66.28, 68.10, 69.92, 72.19, 74.46)
 
lm( y ~ x + I(x^2))

Producing output,

Call:
lm(formula = y ~ x + I(x^2))

Coefficients:
(Intercept)            x       I(x^2)  
     128.81      -143.16        61.96  

A simple implementation of multiple regression in native R is useful to illustrate R's model description and linear algebra capabilities.

simpleMultipleReg <- function(formula) {
 
## parse and evaluate the model formula
mf <- model.frame(formula)
 
## create design matrix
X <- model.matrix(attr(mf, "terms"), mf)
 
## create dependent variable
Y <- model.response(mf)
 
## solve
solve(t(X) %*% X) %*% t(X) %*% Y
}
 
simpleMultipleReg(y ~ x + I(x^2))

This produces the same coefficients as lm()

                  [,1]
(Intercept)  128.81280
x           -143.16202
I(x^2)        61.96033


A more efficient way to solve (X'X) − 1X'y, than the method above, is to solve the linear system directly and use the crossprod function.

solve( crossprod(X), crossprod(X, Y))

[edit] Racket

 
#lang racket
(require math)
(define T matrix-transpose)
 
(define (fit X y)
(matrix-solve (matrix* (T X) X) (matrix* (T X) y)))
 

Test:

 
(fit (matrix [[1 2]
[2 5]
[3 7]
[4 9]])
(matrix [[1]
[2]
[3]
[9]]))
; Output:
(array #[#[9 1/3] #[-3 1/3]])
 

[edit] Ruby

Using the standard library Matrix class:

require 'matrix'
 
def regression_coefficients y, x
y = Matrix.column_vector y.map { |i| i.to_f }
x = Matrix.columns x.map { |xi| xi.map { |i| i.to_f }}
 
(x.t * x).inverse * x.t * y
end

Testing:

puts regression_coefficients([1, 2, 3, 4, 5], [ [2, 1, 3, 4, 5] ])

Output:

Matrix[[0.981818181818182]]

[edit] Tcl

Library: Tcllib (Package: math::linearalgebra)
package require math::linearalgebra
namespace eval multipleRegression {
namespace export regressionCoefficients
namespace import ::math::linearalgebra::*
 
# Matrix inversion is defined in terms of Gaussian elimination
# Note that we assume (correctly) that we have a square matrix
proc invert {matrix} {
solveGauss $matrix [mkIdentity [lindex [shape $matrix] 0]]
}
# Implement the Ordinary Least Squares method
proc regressionCoefficients {y x} {
matmul [matmul [invert [matmul $x [transpose $x]]] $x] $y
}
}
namespace import multipleRegression::regressionCoefficients

Using an example from the Wikipedia page on the correlation of height and weight:

# Simple helper just for this example
proc map {n exp list} {
upvar 1 $n v
set r {}; foreach v $list {lappend r [uplevel 1 $exp]}; return $r
}
 
# Data from wikipedia
set x {
1.47 1.50 1.52 1.55 1.57 1.60 1.63 1.65 1.68 1.70 1.73 1.75 1.78 1.80 1.83
}
set y {
52.21 53.12 54.48 55.84 57.20 58.57 59.93 61.29 63.11 64.47 66.28 68.10
69.92 72.19 74.46
}
# Wikipedia states that fitting up to the square of x[i] is worth it
puts [regressionCoefficients $y [map n {map v {expr {$v**$n}} $x} {0 1 2}]]

Produces this output (a 3-vector of coefficients):

128.81280358170625 -143.16202286630732 61.96032544293041

[edit] Ursala

This exact problem is solved by the DGELSD function from the Lapack library [2], which is callable in Ursala like this.

regression_coefficients = lapack..dgelsd

test program:

x = 
 
<
<7.589183e+00,1.703609e+00,-4.477162e+00>,
<-4.597851e+00,9.434889e+00,-6.543450e+00>,
<4.588202e-01,-6.115153e+00,1.331191e+00>>
 
y = <1.745005e+00,-4.448092e+00,-4.160842e+00>
 
#cast %eL
 
example = regression_coefficients(x,y)

The matrix x needn't be square, and has one row for each data point. The length of y must equal the number of rows in x, and the number of coefficients returned will be the number of columns in x. It would be more typical in practice to initialize x by evaluating a set of basis functions chosen to model some empirical data, but the regression solver is indifferent to the model.

output:

<9.335612e-01,1.101323e+00,1.611777e+00>

A similar method can be used for regression with complex numbers by substituting zgelsd for dgelsd, above.

Personal tools
Namespaces

Variants
Actions
Community
Explore
Misc
Toolbox