Gradient descent: Difference between revisions
m (→{{header|Phix}}: added syntax colouring, made p2js compatible, lowered/improved tolerance, added iteration check/limit) |
Thundergnat (talk | contribs) m (syntax highlighting fixup automation) |
||
Line 18: | Line 18: | ||
{{Trans|Fortran}} |
{{Trans|Fortran}} |
||
THe results agree with the Fortran sample and the Julia sample to 6 places. |
THe results agree with the Fortran sample and the Julia sample to 6 places. |
||
< |
<syntaxhighlight lang="algol68">PROC steepest descent = ( REF[]LONG REAL x, LONG REAL alphap, tolerance )VOID: |
||
BEGIN |
BEGIN |
||
LONG REAL alpha := alphap; |
LONG REAL alpha := alphap; |
||
Line 89: | Line 89: | ||
print( ( "Testing steepest descent method:", newline ) ); |
print( ( "Testing steepest descent method:", newline ) ); |
||
print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) ) |
print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) ) |
||
END</ |
END</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 99: | Line 99: | ||
{{Trans|ALGOL 68}} which is a {{Trans|Go}} with the gradient function from {{Trans|Fortran}} |
{{Trans|ALGOL 68}} which is a {{Trans|Go}} with the gradient function from {{Trans|Fortran}} |
||
The results agree (to 6 places) with the Fortran and Julia samples. |
The results agree (to 6 places) with the Fortran and Julia samples. |
||
< |
<syntaxhighlight lang="algolw">begin |
||
procedure steepestDescent ( long real array x ( * ); long real value alphap, tolerance ) ; |
procedure steepestDescent ( long real array x ( * ); long real value alphap, tolerance ) ; |
||
begin |
begin |
||
Line 163: | Line 163: | ||
write( "The minimum is at x(0) = ", x( 0 ), ", x(1) = ", x( 1 ) ) |
write( "The minimum is at x(0) = ", x( 0 ), ", x(1) = ", x( 1 ) ) |
||
end |
end |
||
end.</ |
end.</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 193: | Line 193: | ||
(%o7) done |
(%o7) done |
||
The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the gradient to be done separately. |
The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the gradient to be done separately. |
||
< |
<syntaxhighlight lang="fortran"> SUBROUTINE EVALFG (N, X, F, G) |
||
IMPLICIT NONE |
IMPLICIT NONE |
||
INTEGER N |
INTEGER N |
||
Line 274: | Line 274: | ||
STOP 'program complete' |
STOP 'program complete' |
||
END |
END |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre>After 31 steps, found minimum at x= 0.107626843548372 y= -1.223259663839920 of f= -0.750063420551493 |
<pre>After 31 steps, found minimum at x= 0.107626843548372 y= -1.223259663839920 of f= -0.750063420551493 |
||
Line 284: | Line 284: | ||
However, since it was originally written, I've substituted Fortran's gradient function for the original one (see Talk page) which now gives results which agree (to 6 decimal places) with those of the Fortran, Julia, Algol 68 and Algol W solutions. As a number of other solutions are based on this one, I suggest their authors update them accordingly. |
However, since it was originally written, I've substituted Fortran's gradient function for the original one (see Talk page) which now gives results which agree (to 6 decimal places) with those of the Fortran, Julia, Algol 68 and Algol W solutions. As a number of other solutions are based on this one, I suggest their authors update them accordingly. |
||
< |
<syntaxhighlight lang="go">package main |
||
import ( |
import ( |
||
Line 361: | Line 361: | ||
fmt.Println("Testing steepest descent method:") |
fmt.Println("Testing steepest descent method:") |
||
fmt.Printf("The minimum is at x = %f, y = %f for which f(x, y) = %f.\n", x[0], x[1], g(x)) |
fmt.Printf("The minimum is at x = %f, y = %f for which f(x, y) = %f.\n", x[0], x[1], g(x)) |
||
}</ |
}</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 370: | Line 370: | ||
=={{header|Julia}}== |
=={{header|Julia}}== |
||
< |
<syntaxhighlight lang="julia">using Optim, Base.MathConstants |
||
f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2) |
f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2) |
||
println(optimize(f, [0.1, -1.0], GradientDescent())) |
println(optimize(f, [0.1, -1.0], GradientDescent())) |
||
</ |
</syntaxhighlight>{{out}} |
||
<pre> |
<pre> |
||
Results of Optimization Algorithm |
Results of Optimization Algorithm |
||
Line 400: | Line 400: | ||
The gradient function has been rewritten to remove a term in the partial derivative with respect to y (two terms instead of three). This doesn’t change the result which agrees with those of Go, Fortran, Julia, etc. |
The gradient function has been rewritten to remove a term in the partial derivative with respect to y (two terms instead of three). This doesn’t change the result which agrees with those of Go, Fortran, Julia, etc. |
||
< |
<syntaxhighlight lang="nim">import math, strformat |
||
Line 466: | Line 466: | ||
steepDescent(g, gradG, x, Alpha, Tolerance) |
steepDescent(g, gradG, x, Alpha, Tolerance) |
||
echo "Testing steepest descent method:" |
echo "Testing steepest descent method:" |
||
echo &"The minimum is at x = {x[0]:.12f}, y = {x[1]:.12f} for which f(x, y) = {g(x):.12f}"</ |
echo &"The minimum is at x = {x[0]:.12f}, y = {x[1]:.12f} for which f(x, y) = {g(x):.12f}"</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 475: | Line 475: | ||
Calculate with <code>bignum</code> for numerical stability. |
Calculate with <code>bignum</code> for numerical stability. |
||
{{trans|Raku}} |
{{trans|Raku}} |
||
< |
<syntaxhighlight lang="perl">use strict; |
||
use warnings; |
use warnings; |
||
use bignum; |
use bignum; |
||
Line 528: | Line 528: | ||
my @x = <0.1 -1>; # Initial guess of location of minimum. |
my @x = <0.1 -1>; # Initial guess of location of minimum. |
||
printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);</ |
printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre>The minimum is at x[0] = 0.107653, x[1] = -1.223370</pre> |
<pre>The minimum is at x[0] = 0.107653, x[1] = -1.223370</pre> |
||
Line 534: | Line 534: | ||
=={{header|Phix}}== |
=={{header|Phix}}== |
||
{{trans|Go}} |
{{trans|Go}} |
||
<!--< |
<!--<syntaxhighlight lang="phix">(phixonline)--> |
||
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> |
||
<span style="color: #000080;font-style:italic;">-- Function for which minimum is to be found.</span> |
<span style="color: #000080;font-style:italic;">-- Function for which minimum is to be found.</span> |
||
Line 582: | Line 582: | ||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Testing steepest descent method:\n"</span><span style="color: #0000FF;">)</span> |
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Testing steepest descent method:\n"</span><span style="color: #0000FF;">)</span> |
||
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)})</span> |
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)})</span> |
||
<!--</ |
<!--</syntaxhighlight>--> |
||
{{out}} |
{{out}} |
||
Results match Fortran, most others to 6 or 7dp<br> |
Results match Fortran, most others to 6 or 7dp<br> |
||
Line 600: | Line 600: | ||
I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with <var>grad-</var> and <var>del-</var> |
I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with <var>grad-</var> and <var>del-</var> |
||
< |
<syntaxhighlight lang="racket">#lang racket |
||
(define (apply-vector f v) |
(define (apply-vector f v) |
||
Line 647: | Line 647: | ||
(module+ main |
(module+ main |
||
(Gradient-descent)) |
(Gradient-descent)) |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
Line 655: | Line 655: | ||
(formerly Perl 6) |
(formerly Perl 6) |
||
{{trans|Go}} |
{{trans|Go}} |
||
<lang |
<syntaxhighlight lang="raku" line># 20200904 Updated Raku programming solution |
||
sub steepestDescent(@x, $alpha is copy, $h) { |
sub steepestDescent(@x, $alpha is copy, $h) { |
||
Line 694: | Line 694: | ||
say "Testing steepest descent method:"; |
say "Testing steepest descent method:"; |
||
say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1]; |
say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1]; |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre>Testing steepest descent method: |
<pre>Testing steepest descent method: |
||
Line 702: | Line 702: | ||
=={{header|REXX}}== |
=={{header|REXX}}== |
||
The ''tolerance'' can be much smaller; a tolerance of '''1e-200''' was tested. It works, but causes the program to execute a bit slower, but still sub-second execution time. |
The ''tolerance'' can be much smaller; a tolerance of '''1e-200''' was tested. It works, but causes the program to execute a bit slower, but still sub-second execution time. |
||
< |
<syntaxhighlight lang="rexx">/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/ |
||
numeric digits (length( e() ) - length(.) ) % 2 /*use half of number decimal digs in E.*/ |
numeric digits (length( e() ) - length(.) ) % 2 /*use half of number decimal digs in E.*/ |
||
tolerance= 1e-30 /*use a much smaller tolerance for REXX*/ |
tolerance= 1e-30 /*use a much smaller tolerance for REXX*/ |
||
Line 743: | Line 743: | ||
numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2 |
numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2 |
||
do j=0 while h>9; m.j=h; h= h % 2 + 1; end /*j*/ |
do j=0 while h>9; m.j=h; h= h % 2 + 1; end /*j*/ |
||
do k=j+5 to 0 by -1; numeric digits m.k; g=(g+x/g)*.5; end /*k*/; return g</ |
do k=j+5 to 0 by -1; numeric digits m.k; g=(g+x/g)*.5; end /*k*/; return g</syntaxhighlight> |
||
{{out|output|text= when using the internal default inputs:}} |
{{out|output|text= when using the internal default inputs:}} |
||
<pre> |
<pre> |
||
Line 752: | Line 752: | ||
=={{header|Scala}}== |
=={{header|Scala}}== |
||
{{trans|Go}} |
{{trans|Go}} |
||
< |
<syntaxhighlight lang="scala">object GradientDescent { |
||
/** Steepest descent method modifying input values*/ |
/** Steepest descent method modifying input values*/ |
||
Line 826: | Line 826: | ||
} |
} |
||
} |
} |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 838: | Line 838: | ||
<br><br> |
<br><br> |
||
<syntaxhighlight lang="typescript"> |
|||
<lang Typescript> |
|||
// Using the steepest-descent method to search |
// Using the steepest-descent method to search |
||
// for minimum values of a multi-variable function |
// for minimum values of a multi-variable function |
||
Line 935: | Line 935: | ||
gradientDescentMain(); |
gradientDescentMain(); |
||
</syntaxhighlight> |
|||
</lang> |
|||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
||
Line 946: | Line 946: | ||
* [Linear Regression using Gradient Descent by Adarsh Menon] |
* [Linear Regression using Gradient Descent by Adarsh Menon] |
||
<br><br> |
<br><br> |
||
<syntaxhighlight lang="typescript"> |
|||
<lang Typescript> |
|||
let data: number[][] = |
let data: number[][] = |
||
[[32.5023452694530, 31.70700584656990], |
[[32.5023452694530, 31.70700584656990], |
||
Line 1,094: | Line 1,094: | ||
gradientDescentMain(); |
gradientDescentMain(); |
||
</syntaxhighlight> |
|||
</lang> |
|||
=={{header|Wren}}== |
=={{header|Wren}}== |
||
{{trans|Go}} |
{{trans|Go}} |
||
{{libheader|Wren-fmt}} |
{{libheader|Wren-fmt}} |
||
< |
<syntaxhighlight lang="ecmascript">import "/fmt" for Fmt |
||
// Function for which minimum is to be found. |
// Function for which minimum is to be found. |
||
Line 1,161: | Line 1,161: | ||
steepestDescent.call(x, alpha, tolerance) |
steepestDescent.call(x, alpha, tolerance) |
||
System.print("Testing steepest descent method:") |
System.print("Testing steepest descent method:") |
||
Fmt.print("The minimum is at x = $f, y = $f for which f(x, y) = $f.", x[0], x[1], g.call(x))</ |
Fmt.print("The minimum is at x = $f, y = $f for which f(x, y) = $f.", x[0], x[1], g.call(x))</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
Line 1,171: | Line 1,171: | ||
=={{header|zkl}}== |
=={{header|zkl}}== |
||
{{trans|Go}} with tweaked gradG |
{{trans|Go}} with tweaked gradG |
||
< |
<syntaxhighlight lang="zkl">fcn steepestDescent(f, x,y, alpha, h){ |
||
g0:=f(x,y); # Initial estimate of result. |
g0:=f(x,y); # Initial estimate of result. |
||
fix,fiy := gradG(f,x,y,h); # Calculate initial gradient |
fix,fiy := gradG(f,x,y,h); # Calculate initial gradient |
||
Line 1,190: | Line 1,190: | ||
g0:=f(x,y); |
g0:=f(x,y); |
||
return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h) |
return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h) |
||
}</ |
}</syntaxhighlight> |
||
< |
<syntaxhighlight lang="zkl">fcn f(x,y){ # Function for which minimum is to be found. |
||
(x - 1).pow(2)*(-y.pow(2)).exp() + |
(x - 1).pow(2)*(-y.pow(2)).exp() + |
||
y*(y + 2)*(-2.0*x.pow(2)).exp() |
y*(y + 2)*(-2.0*x.pow(2)).exp() |
||
Line 1,202: | Line 1,202: | ||
println("Testing steepest descent method:"); |
println("Testing steepest descent method:"); |
||
println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));</ |
println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));</syntaxhighlight> |
||
{{out}} |
{{out}} |
||
<pre> |
<pre> |
Revision as of 14:16, 27 August 2022
Gradient descent (also known as steepest descent) is a first-order iterative optimization algorithm for finding the minimum of a function which is described in this Wikipedia article.
- Task
Use this algorithm to search for minimum values of the bi-variate function:
f(x, y) = (x - 1)(x - 1)e^(-y^2) + y(y+2)e^(-2x^2)
around x = 0.1 and y = -1.
This book excerpt shows sample C# code for solving this task.
ALGOL 68
modified to use the actual gradient function -
THe results agree with the Fortran sample and the Julia sample to 6 places.
PROC steepest descent = ( REF[]LONG REAL x, LONG REAL alphap, tolerance )VOID:
BEGIN
LONG REAL alpha := alphap;
LONG REAL g0 := g( x ); # Initial estimate of result. #
# Calculate initial gradient. #
[ LWB x : UPB x ]LONG REAL fi := grad g( x );
# Calculate initial norm. #
LONG REAL del g := 0.0;
FOR i FROM LWB x TO UPB x DO
del g +:= fi[ i ] * fi[ i ]
OD;
del g := long sqrt( del g );
LONG REAL b := alpha / del g;
# Iterate until value is <= tolerance. #
WHILE del g > tolerance DO
# Calculate next value. #
FOR i FROM LWB x TO UPB x DO
x[i] -:= b * fi[i]
OD;
# Calculate next gradient. #
fi := grad g( x );
# Calculate next norm. #
del g := 0;
FOR i FROM LWB x TO UPB x DO
del g +:= fi[ i ] * fi[ i ]
OD;
del g := long sqrt( del g );
IF del g > 0 THEN
b := alpha / del g;
# Calculate next value. #
LONG REAL g1 := g( x );
# Adjust parameter. #
IF g1 > g0 THEN
alpha /:= 2
ELSE
g0 := g1
FI
FI
OD
END # steepest descent # ;
# calculates the gradient of g(p). #
# The derivitives wrt x and y are (as in the Fortran sample ): #
# g' wrt x = 2( x - 1 )e^( - ( y^2 ) ) - 4xe^( -2( x^2) )y( y + 2 ) #
# g' wrt y = ( -2( x-1 )^2ye^( - (y^2) )) + e^(-2( x^2 ) )( y + 2 ) + e^( -2( x^2 ) )y #
PROC grad g = ( []LONG REAL p )[]LONG REAL:
BEGIN
[ LWB p : UPB p ]LONG REAL z;
LONG REAL x = p[ 0 ];
LONG REAL y = p[ 1 ];
z[ 0 ] := 2 * ( x - 1 ) * long exp( - ( y * y ) )
- 4 * x * long exp( -2 * ( x * x ) ) * y * ( y + 2 );
z[ 1 ] := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * long exp( - y * y )
+ long exp( -2 * x * x ) * ( y + 2 )
+ long exp( -2 * x * x ) * y;
z
END # grad g # ;
# Function for which minimum is to be found. #
# g( x, y ) = ( ( x - 1 )^2 )e^( - ( x^2 ) ) + y( y + 2 )e^( - 2(x^2)) #
PROC g = ( []LONG REAL x )LONG REAL:
( x[ 0 ] - 1 )
* ( x[ 0 ] - 1 )
* long exp( - x[ 1 ] * x[ 1 ] ) + x[ 1 ] * ( x[ 1 ] + 2 )
* long exp( - 2 * x[ 0 ] * x[ 0 ] )
;
BEGIN
LONG REAL tolerance := 0.0000006;
LONG REAL alpha := 0.1;
[ 0 : 1 ]LONG REAL x := ( []LONG REAL( 0.1, -1 ) )[ AT 0 ]; # Initial guess of location of minimum. #
steepest descent( x, alpha, tolerance );
print( ( "Testing steepest descent method:", newline ) );
print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) )
END
- Output:
Testing steepest descent method: The minimum is at x[0] = 0.107627, x[1] = -1.223260
ALGOL W
which is a
with the gradient function from
The results agree (to 6 places) with the Fortran and Julia samples.
begin
procedure steepestDescent ( long real array x ( * ); long real value alphap, tolerance ) ;
begin
long real array fi ( 0 :: 1 );
long real alpha, g0, g1, delG, b;
alpha := alphap;
g0 := g( x ); % Initial estimate of result. %
% Calculate initial gradient. %
gradG( fi, x );
% Calculate initial norm. %
delG := 0.0;
for i := 0 until 1 do delG := delG + ( fi( i ) * fi( i ) );
delG := longSqrt( delG );
b := alpha / delG;
% Iterate until value is <= tolerance. %
while delG > tolerance do begin
% Calculate next value. %
for i := 0 until 1 do x(i) := x( i ) - ( b * fi(i) );
% Calculate next gradient. %
gradG( fi, x );
% Calculate next norm. %
delG := 0;
for i := 0 until 1 do delG := delg + ( fi( i ) * fi( i ) );
delG := longSqrt( delG );
if delG > 0 then begin
b := alpha / delG;
% Calculate next value. %
g1 := g( x );
% Adjust parameter. %
if g1 > g0
then alpha := alpha / 2
else g0 := g1
end if_delG_gt_0
end while_delG_gt_tolerance
end steepestDescent ;
% Provides a rough calculation of gradient g(x). %
procedure gradG ( long real array z, p ( * ) ) ;
begin
long real x, y;
x := p( 0 );
y := p( 1 );
z( 0 ) := 2 * ( x - 1 ) * longExp( - ( y * y ) )
- 4 * x * longExp( -2 * ( x * x ) ) * y * ( y + 2 );
z( 1 ) := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * longExp( - y * y )
+ longExp( -2 * x * x ) * ( y + 2 )
+ longExp( -2 * x * x ) * y
end gradG ;
% Function for which minimum is to be found. %
long real procedure g ( long real array x ( * ) ) ;
( x( 0 ) - 1 ) * ( x( 0 ) - 1 ) * longExp( - x( 1 ) * x( 1 ) )
+ x( 1 ) * ( x( 1 ) + 2 ) * longExp( - 2 * x( 0 ) * x( 0 ) )
;
begin
long real alpha, tolerance;
long real array x ( 0 :: 1 );
x( 0 ) := 0.1; % Initial guess of location of minimum. %
x( 1 ) := -1;
tolerance := 0.0000006;
alpha := 0.1;
steepestDescent( x, alpha, tolerance );
r_format := "A"; r_w := 11; r_d := 7; s_w := 0; % output formatting %
write( "Testing steepest descent method:" );
write( "The minimum is at x(0) = ", x( 0 ), ", x(1) = ", x( 1 ) )
end
end.
- Output:
Testing steepest descent method: The minimum is at x(0) = 0.1076268, x(1) = -1.2232596
Fortran
Compiler: gfortran 8.3.0
The way a FORTRAN programmer would do this would be to automatically differentiate the function using the diff command in Maxima:
(%i3) (x-1)*(x-1)*exp(-y^2)+y*(y+2)*exp(-2*x^2); 2 2 2 - y - 2 x (%o3) (x - 1) %e + %e y (y + 2) (%i4) diff(%o3,x); 2 2 - y - 2 x (%o4) 2 (x - 1) %e - 4 x %e y (y + 2) (%i5) diff(%o3,y); 2 2 2 2 - y - 2 x - 2 x (%o5) (- 2 (x - 1) y %e ) + %e (y + 2) + %e y
and then have it automatically turned into statements with the fortran command:
(%i6) fortran(%o4); 2*(x-1)*exp(-y**2)-4*x*exp(-2*x**2)*y*(y+2) (%o6) done (%i7) fortran(%o5); (-2*(x-1)**2*y*exp(-y**2))+exp(-2*x**2)*(y+2)+exp(-2*x**2)*y (%o7) done
The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the gradient to be done separately.
SUBROUTINE EVALFG (N, X, F, G)
IMPLICIT NONE
INTEGER N
DOUBLE PRECISION X(N), F, G(N)
F = (X(1) - 1.D0)**2 * EXP(-X(2)**2) +
$ X(2) * (X(2) + 2.D0) * EXP(-2.D0 * X(1)**2)
G(1) = 2.D0 * (X(1) - 1.D0) * EXP(-X(2)**2) - 4.D0 * X(1) *
$ EXP(-2.D0 * X(1)**2) * X(2) * (X(2) + 2.D0)
G(2) = (-2.D0 * (X(1) - 1.D0)**2 * X(2) * EXP(-X(2)**2)) +
$ EXP(-2.D0 * X(1)**2) * (X(2) + 2.D0) +
$ EXP(-2.D0 * X(1)**2) * X(2)
RETURN
END
*-----------------------------------------------------------------------
* gd - Gradient descent
* G must be set correctly at the initial point X.
*
*___Name______Type________In/Out___Description_________________________
* N Integer In Number of Variables.
* X(N) Double Both Variables
* G(N) Double Both Gradient
* TOL Double In Relative convergence tolerance
* IFLAG Integer Out Reverse Communication Flag
* on output: 0 done
* 1 compute G and call again
*-----------------------------------------------------------------------
SUBROUTINE GD (N, X, G, TOL, IFLAG)
IMPLICIT NONE
INTEGER N, IFLAG
DOUBLE PRECISION X(N), G(N), TOL
DOUBLE PRECISION ETA
PARAMETER (ETA = 0.3D0) ! Learning rate
INTEGER I
DOUBLE PRECISION GNORM ! norm of gradient
GNORM = 0.D0 ! convergence test
DO I = 1, N
GNORM = GNORM + G(I)**2
END DO
GNORM = SQRT(GNORM)
IF (GNORM < TOL) THEN
IFLAG = 0
RETURN ! success
END IF
DO I = 1, N ! take step
X(I) = X(I) - ETA * G(I)
END DO
IFLAG = 1
RETURN ! let main program evaluate G
END ! of gd
PROGRAM GDDEMO
IMPLICIT NONE
INTEGER N
PARAMETER (N = 2)
INTEGER ITER, J, IFLAG
DOUBLE PRECISION X(N), F, G(N), TOL
X(1) = -0.1D0 ! initial values
X(2) = -1.0D0
TOL = 1.D-15
CALL EVALFG (N, X, F, G)
IFLAG = 0
DO J = 1, 1 000 000
CALL GD (N, X, G, TOL, IFLAG)
IF (IFLAG .EQ. 1) THEN
CALL EVALFG (N, X, F, G)
ELSE
ITER = J
GO TO 50
END IF
END DO
STOP 'too many iterations!'
50 PRINT '(A, I7, A, F19.15, A, F19.15, A, F19.15)',
$ 'After ', ITER, ' steps, found minimum at x=',
$ X(1), ' y=', X(2), ' of f=', F
STOP 'program complete'
END
- Output:
After 31 steps, found minimum at x= 0.107626843548372 y= -1.223259663839920 of f= -0.750063420551493 STOP program complete
Go
This is a translation of the C# code in the book excerpt linked to above and hence also of the first Typescript example below.
However, since it was originally written, I've substituted Fortran's gradient function for the original one (see Talk page) which now gives results which agree (to 6 decimal places) with those of the Fortran, Julia, Algol 68 and Algol W solutions. As a number of other solutions are based on this one, I suggest their authors update them accordingly.
package main
import (
"fmt"
"math"
)
func steepestDescent(x []float64, alpha, tolerance float64) {
n := len(x)
g0 := g(x) // Initial estimate of result.
// Calculate initial gradient.
fi := gradG(x)
// Calculate initial norm.
delG := 0.0
for i := 0; i < n; i++ {
delG += fi[i] * fi[i]
}
delG = math.Sqrt(delG)
b := alpha / delG
// Iterate until value is <= tolerance.
for delG > tolerance {
// Calculate next value.
for i := 0; i < n; i++ {
x[i] -= b * fi[i]
}
// Calculate next gradient.
fi = gradG(x)
// Calculate next norm.
delG = 0
for i := 0; i < n; i++ {
delG += fi[i] * fi[i]
}
delG = math.Sqrt(delG)
b = alpha / delG
// Calculate next value.
g1 := g(x)
// Adjust parameter.
if g1 > g0 {
alpha /= 2
} else {
g0 = g1
}
}
}
// Provides a rough calculation of gradient g(p).
func gradG(p []float64) []float64 {
z := make([]float64, len(p))
x := p[0]
y := p[1]
z[0] = 2*(x-1)*math.Exp(-y*y) - 4*x*math.Exp(-2*x*x)*y*(y+2)
z[1] = -2*(x-1)*(x-1)*y*math.Exp(-y*y) + math.Exp(-2*x*x)*(y+2) + math.Exp(-2*x*x)*y
return z
}
// Function for which minimum is to be found.
func g(x []float64) float64 {
return (x[0]-1)*(x[0]-1)*
math.Exp(-x[1]*x[1]) + x[1]*(x[1]+2)*
math.Exp(-2*x[0]*x[0])
}
func main() {
tolerance := 0.0000006
alpha := 0.1
x := []float64{0.1, -1} // Initial guess of location of minimum.
steepestDescent(x, alpha, tolerance)
fmt.Println("Testing steepest descent method:")
fmt.Printf("The minimum is at x = %f, y = %f for which f(x, y) = %f.\n", x[0], x[1], g(x))
}
- Output:
Testing steepest descent method: The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.
Julia
using Optim, Base.MathConstants
f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2)
println(optimize(f, [0.1, -1.0], GradientDescent()))
- Output:
Results of Optimization Algorithm * Algorithm: Gradient Descent * Starting Point: [0.1,-1.0] * Minimizer: [0.107626844383003,-1.2232596628723371] * Minimum: -7.500634e-01 * Iterations: 14 * Convergence: true * |x - x'| ≤ 0.0e+00: false |x - x'| = 2.97e-09 * |f(x) - f(x')| ≤ 0.0e+00 |f(x)|: true |f(x) - f(x')| = 0.00e+00 |f(x)| * |g(x)| ≤ 1.0e-08: true |g(x)| = 2.54e-09 * Stopped by an increasing objective: false * Reached Maximum Number of Iterations: false * Objective Calls: 35 * Gradient Calls: 35
Nim
The gradient function has been rewritten to remove a term in the partial derivative with respect to y (two terms instead of three). This doesn’t change the result which agrees with those of Go, Fortran, Julia, etc.
import math, strformat
func steepDescent(g: proc(x: openArray[float]): float;
gradG: proc(p: openArray[float]): seq[float];
x: var openArray[float]; alpha, tolerance: float) =
let n = x.len
var alpha = alpha
var g0 = g(x) # Initial estimate of result.
# Calculate initial gradient.
var fi = gradG(x)
# Calculate initial norm.
var delG = 0.0
for i in 0..<n:
delG += fi[i] * fi[i]
delG = sqrt(delG)
var b = alpha / delG
# Iterate until value is <= tolerance.
while delG > tolerance:
# Calculate next value.
for i in 0..<n:
x[i] -= b * fi[i]
# Calculate next gradient.
fi = gradG(x)
# Calculate next norm.
delG = 0
for i in 0..<n:
delG += fi[i] * fi[i]
delG = sqrt(delG)
b = alpha / delG
# Calculate next value.
let g1 = g(x)
# Adjust parameter.
if g1 > g0: alpha *= 0.5
else: g0 = g1
when isMainModule:
func g(x: openArray[float]): float =
## Function for which minimum is to be found.
(x[0]-1) * (x[0]-1) * exp(-x[1]*x[1]) + x[1] * (x[1]+2) * exp(-2*x[0]*x[0])
func gradG(p: openArray[float]): seq[float] =
## Provides a rough calculation of gradient g(p).
result = newSeq[float](p.len)
let x = p[0]
let y = p[1]
result[0] = 2 * (x-1) * exp(-y*y) - 4 * x * exp(-2*x*x) * y * (y+2)
result[1] = -2 * (x-1) * (x-1) * y * exp(-y*y) + 2 * (y+1) * exp(-2*x*x)
const
Tolerance = 0.0000006
Alpha = 0.1
var x = [0.1, -1.0] # Initial guess of location of minimum.
steepDescent(g, gradG, x, Alpha, Tolerance)
echo "Testing steepest descent method:"
echo &"The minimum is at x = {x[0]:.12f}, y = {x[1]:.12f} for which f(x, y) = {g(x):.12f}"
- Output:
Testing steepest descent method: The minimum is at x = 0.107626824329, y = -1.223259654882 for which f(x, y) = -0.750063420551
Perl
Calculate with bignum
for numerical stability.
use strict;
use warnings;
use bignum;
sub steepestDescent {
my($alpha, $tolerance, @x) = @_;
my $N = @x;
my $h = $tolerance;
my $g0 = g(@x) ; # Initial estimate of result.
my @fi = gradG($h, @x) ; # Calculate initial gradient
# Calculate initial norm.
my $delG = 0;
for (0..$N-1) { $delG += $fi[$_]**2 }
my $b = $alpha / sqrt($delG);
while ( $delG > $tolerance ) { # Iterate until value is <= tolerance.
# Calculate next value.
for (0..$N-1) { $x[$_] -= $b * $fi[$_] }
$h /= 2;
@fi = gradG($h, @x); # Calculate next gradient.
# Calculate next norm.
$delG = 0;
for (0..$N-1) { $delG += $fi[$_]**2 }
$b = $alpha / sqrt($delG);
my $g1 = g(@x); # Calculate next value.
$g1 > $g0 ? ($alpha /= 2) : ($g0 = $g1); # Adjust parameter.
}
@x
}
# Provides a rough calculation of gradient g(x).
sub gradG {
my($h, @x) = @_;
my $N = @x;
my @y = @x;
my $g0 = g(@x);
my @z;
for (0..$N-1) { $y[$_] += $h ; $z[$_] = (g(@y) - $g0) / $h }
return @z
}
# Function for which minimum is to be found.
sub g { my(@x) = @_; ($x[0]-1)**2 * exp(-$x[1]**2) + $x[1]*($x[1]+2) * exp(-2*$x[0]**2) };
my $tolerance = 0.0000001;
my $alpha = 0.01;
my @x = <0.1 -1>; # Initial guess of location of minimum.
printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);
- Output:
The minimum is at x[0] = 0.107653, x[1] = -1.223370
Phix
with javascript_semantics -- Function for which minimum is to be found. function g(sequence x) atom {x0,x1} = x return (x0-1)*(x0-1)*exp(-x1*x1) + x1*(x1+2)*exp(-2*x0*x0) end function -- Provides a rough calculation of gradient g(x). function gradG(sequence p) atom {x,y} = p, xm1 = x-1, emyy = exp(-y*y), em2xx = exp(-2*x*x), xm12emyy = xm1*2*emyy p = {xm12emyy - 4*x*em2xx*y*(y+2), -xm12emyy*xm1*y + em2xx*(2*y+2)} return p end function function steepestDescent(sequence x, atom alpha, tolerance) sequence fi = gradG(x) -- Calculate initial gradient atom g0 = g(x), -- Initial estimate of result delG = sqrt(sum(sq_mul(fi,fi))), -- & norm b = alpha / delG integer icount = 0 -- iteration limitor/sanity while delG>tolerance do -- Iterate until <= tolerance x = sq_sub(x,sq_mul(b,fi)) -- Calculate next value fi = gradG(x) -- next gradient delG = sqrt(sum(sq_mul(fi,fi))) -- next norm b = alpha / delG atom g1 = g(x) -- next value if g1>g0 then alpha /= 2 -- Adjust parameter else g0 = g1 end if icount += 1 assert(icount<100) -- (increase if/when necessary) end while return x end function constant alpha = 0.1, tolerance = 0.00000000000001 sequence x = steepestDescent({0.1,-1}, alpha, tolerance) printf(1,"Testing steepest descent method:\n") printf(1,"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n", {x[1], x[2], g(x)})
- Output:
Results match Fortran, most others to 6 or 7dp
Some slightly unexpected/unusual (but I think acceptable) variance was noted when playing with different tolerances, be warned.
Results on 32/64 bit Phix agree to 13dp, which I therefore choose to show in full here (but otherwise would not really trust).
Testing steepest descent method: The minimum is at x = 0.1076268435484, y = -1.2232596638399 for which f(x, y) = -0.750063420551493
Racket
Note the different implementation of grad
. I believe that the vector should be reset and only the partial derivative in a particular dimension is to be used. For this reason, I've _yet another_ result!
I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with grad- and del-
#lang racket
(define (apply-vector f v)
(apply f (vector->list v)))
;; Provides a rough calculation of gradient g(v).
(define ((grad/del f) v δ #:fv (fv (apply-vector f v)))
(define dim (vector-length v))
(define tmp (vector-copy v))
(define grad (for/vector #:length dim ((i dim)
(v_i v))
(vector-set! tmp i (+ v_i δ))
(define ∂f/∂v_i (/ (- (apply-vector f tmp) fv) δ))
(vector-set! tmp i v_i)
∂f/∂v_i))
(values grad (sqrt (for/sum ((∂_i grad)) (sqr ∂_i)))))
(define (steepest-descent g x α tolerance)
(define grad/del-g (grad/del g))
(define (loop x δ α gx grad-gx del-gx b)
(cond
[(<= del-gx tolerance) x]
[else
(define δ´ (/ δ 2))
(define x´ (vector-map + (vector-map (curry * (- b)) grad-gx) x))
(define gx´ (apply-vector g x´))
(define-values (grad-gx´ del-gx´) (grad/del-g x´ δ´ #:fv gx´))
(define b´ (/ α del-gx´))
(if (> gx´ gx)
(loop x´ δ´ (/ α 2) gx grad-gx´ del-gx´ b´)
(loop x´ δ´ α gx´ grad-gx´ del-gx´ b´))]))
(define gx (apply-vector g x))
(define δ tolerance)
(define-values (grad-gx del-gx) (grad/del-g x δ #:fv gx))
(loop x δ α gx grad-gx del-gx (/ α del-gx)))
(define (Gradient-descent)
(steepest-descent
(λ (x y)
(+ (* (- x 1) (- x 1) (exp (- (sqr y))))
(* y (+ y 2) (exp (- (* 2 (sqr x)))))))
#(0.1 -1.) 0.1 0.0000006))
(module+ main
(Gradient-descent))
- Output:
'#(0.10760797905122492 -1.2232993981966753)
Raku
(formerly Perl 6)
# 20200904 Updated Raku programming solution
sub steepestDescent(@x, $alpha is copy, $h) {
my $g0 = g |@x ; # Initial estimate of result.
my @fi = gradG |@x ; # Calculate initial gradient
my $b = $alpha / my $delG = sqrt ( sum @fi»² ) ; # Calculate initial norm.
while ( $delG > $h ) { # Iterate until value is <= tolerance.
@x «-»= $b «*« @fi; # Calculate next value.
@fi = gradG |@x ; # Calculate next gradient and next value
$b = $alpha / ($delG = sqrt( sum @fi»² )); # Calculate next norm.
my $g1 = g |@x ;
$g1 > $g0 ?? ( $alpha /= 2 ) !! ( $g0 = $g1 ) # Adjust parameter.
}
}
sub gradG(\x,\y) { # gives a rough calculation of gradient g(x).
2*(x-1)*exp(-y²) - 4*x*exp(-2*x²)*y*(y+2) , -2*(x-1)²*y*exp(-y²) + exp(-2*x²)*(2*y+2)
}
# Function for which minimum is to be found.
sub g(\x,\y) { (x-1)² * exp(-y²) + y*(y+2) * exp(-2*x²) }
my $tolerance = 0.0000006 ; my $alpha = 0.1;
my @x = 0.1, -1; # Initial guess of location of minimum.
steepestDescent(@x, $alpha, $tolerance);
say "Testing steepest descent method:";
say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1];
- Output:
Testing steepest descent method: The minimum is at x[0] = 0.10762682432947938, x[1] = -1.2232596548816097
REXX
The tolerance can be much smaller; a tolerance of 1e-200 was tested. It works, but causes the program to execute a bit slower, but still sub-second execution time.
/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/
numeric digits (length( e() ) - length(.) ) % 2 /*use half of number decimal digs in E.*/
tolerance= 1e-30 /*use a much smaller tolerance for REXX*/
alpha= 0.1
x.0= 0.1; x.1= -1
say center(' testing for the steepest descent method ', 79, "═")
call steepestD /* ┌──◄── # digs past dec. point ─►───┐*/
say 'The minimum is at: x[0]=' format(x.0,,9) " x[1]=" format(x.1,,9)
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
e: return 2.718281828459045235360287471352662497757247093699959574966967627724
g: return (x.0-1)**2 * exp(- (x.1**2) ) + x.1 * (x.1 + 2) * exp(-2 * x.0**2)
/*──────────────────────────────────────────────────────────────────────────────────────*/
gradG: x= x.0; y= x.1 /*define X and Y from the X array. */
xm= x-1; eny2= exp(-y*y); enx2= exp(-2 * x**2) /*shortcuts.*/
z.0= 2 * xm * eny2 - 4 * x * enx2 * y * (y+2)
z.1= -2 * xm**2 * y * eny2 + enx2 * (y+2) + enx2 * y
return /*a rough calculation of the gradient. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
steepestD: g0= g() /*the initial estimate of the result. */
call gradG /*calculate the initial gradient. */
delG= sqrt(z.0**2 + z.1**2) /* " " " norm. */
b= alpha / delG
do while delG>tolerance
x.0= x.0 - b * z.0; x.1= x.1 - b * z.1
call gradG
delG= sqrt(z.0**2 + z.1**2); if delG=0 then return
b= alpha / delG
g1= g() /*find minimum.*/
if g1>g0 then alpha= alpha * .5 /*adjust ALPHA.*/
else g0= g1 /* " G0. */
end /*while*/
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
exp: procedure; parse arg x; ix= x%1; if abs(x-ix)>.5 then ix= ix + sign(x); x= x - ix
z=1; _=1; w=z; do j=1; _= _*x/j; z= (z+_)/1; if z==w then leave; w= z; end
if z\==0 then z= z * e() ** ix; return z/1
/*──────────────────────────────────────────────────────────────────────────────────────*/
sqrt: procedure; parse arg x; if x=0 then return 0; d= digits(); numeric digits; h= d+6
numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2
do j=0 while h>9; m.j=h; h= h % 2 + 1; end /*j*/
do k=j+5 to 0 by -1; numeric digits m.k; g=(g+x/g)*.5; end /*k*/; return g
- output when using the internal default inputs:
═══════════════════ testing for the steepest descent method ═══════════════════ The minimum is at: x[0]= 0.107626824 x[1]= -1.223259655
Scala
object GradientDescent {
/** Steepest descent method modifying input values*/
def steepestDescent(x : Array[Double], learningRate : Double, tolerance : Double) = {
val n = x.size
var h = tolerance
var alpha = learningRate
var g0 = g(x) // Initial estimate of result.
// Calculate initial gradient.
var fi = gradG(x,h)
// Calculate initial norm.
var delG = 0.0
for (i <- 0 until n by 1) delG += fi(i) * fi(i)
delG = math.sqrt(delG)
var b = alpha / delG
// Iterate until value is <= tolerance.
while(delG > tolerance){
// Calculate next value.
for (i <- 0 until n by 1) x(i) -= b * fi(i)
h /= 2
// Calculate next gradient.
fi = gradG(x,h)
// Calculate next norm.
delG = 0.0
for (i <- 0 until n by 1) delG += fi(i) * fi(i)
delG = math.sqrt(delG)
b = alpha / delG
// Calculate next value.
var g1 = g(x)
// Adjust parameter.
if(g1 > g0) alpha = alpha / 2
else g0 = g1
}
}
/** Gradient of the input function given in the task*/
def gradG(x : Array[Double], h : Double) : Array[Double] = {
val n = x.size
val z : Array[Double] = Array.fill(n){0}
val y = x
val g0 = g(x)
for(i <- 0 until n by 1){
y(i) += h
z(i) = (g(y) - g0) / h
}
z
}
/** Bivariate function given in the task*/
def g( x : Array[Double]) : Double = {
( (x(0)-1) * (x(0)-1) * math.exp( -x(1)*x(1) ) + x(1) * (x(1)+2) * math.exp( -2*x(0)*x(0) ) )
}
def main(args: Array[String]): Unit = {
val tolerance = 0.0000006
val learningRate = 0.1
val x = Array(0.1, -1) // Initial guess of location of minimum.
steepestDescent(x, learningRate, tolerance)
println("Testing steepest descent method")
println("The minimum is at x : " + x(0) + ", y : " + x(1))
}
}
- Output:
Testing steepest descent method The minimum is at x : 0.10756393294495799, y : -1.2234116852966237
TypeScript
- Translation of
- [Numerical Methods, Algorithms and Tools in C# by Waldemar Dos Passos (18.2 Gradient Descent Method]
// Using the steepest-descent method to search
// for minimum values of a multi-variable function
export const steepestDescent = (x: number[], alpha: number, tolerance: number) => {
let n: number = x.length; // size of input array
let h: number = 0.0000006; //Tolerance factor
let g0: number = g(x); //Initial estimate of result
//Calculate initial gradient
let fi: number[] = [n];
//Calculate initial norm
fi = GradG(x, h);
// console.log("fi:"+fi);
//Calculate initial norm
let DelG: number = 0.0;
for (let i: number = 0; i < n; ++i) {
DelG += fi[i] * fi[i];
}
DelG = Math.sqrt(DelG);
let b: number = alpha / DelG;
//Iterate until value is <= tolerance limit
while (DelG > tolerance) {
//Calculate next value
for (let i = 0; i < n; ++i) {
x[i] -= b * fi[i];
}
h /= 2;
//Calculate next gradient
fi = GradG(x, h);
//Calculate next norm
DelG = 0;
for (let i: number = 0; i < n; ++i) {
DelG += fi[i] * fi[i];
}
DelG = Math.sqrt(DelG);
b = alpha / DelG;
//Calculate next value
let g1: number = g(x);
//Adjust parameter
if (g1 > g0) alpha /= 2;
else g0 = g1;
}
}
// Provides a rough calculation of gradient g(x).
export const GradG = (x: number[], h: number) => {
let n: number = x.length;
let z: number[] = [n];
let y: number[] = x;
let g0: number = g(x);
// console.log("y:" + y);
for (let i = 0; i < n; ++i) {
y[i] += h;
z[i] = (g(y) - g0) / h;
}
// console.log("z:"+z);
return z;
}
// Method to provide function g(x).
export const g = (x: number[]) => {
return (x[0] - 1) * (x[0] - 1)
* Math.exp(-x[1] * x[1]) + x[1] * (x[1] + 2)
* Math.exp(-2 * x[0] * x[0]);
}
export const gradientDescentMain = () => {
let tolerance: number = 0.0000006;
let alpha: number = 0.1;
let x: number[] = [2];
//Initial guesses
x[0] = 0.1;
//of location of minimums
x[1] = -1;
steepestDescent(x, alpha, tolerance);
console.log("Testing steepest descent method");
console.log("The minimum is at x[0] = " + x[0]
+ ", x[1] = " + x[1]);
// console.log("");
}
gradientDescentMain();
- Output:
Testing steepest descent method The minimum is at x[0] = 0.10768224291553158, x[1] = -1.2233090211217854
Linear Regression
- Translation of
- [Linear Regression using Gradient Descent by Adarsh Menon]
let data: number[][] =
[[32.5023452694530, 31.70700584656990],
[53.4268040332750, 68.77759598163890],
[61.5303580256364, 62.56238229794580],
[47.4756396347860, 71.54663223356770],
[59.8132078695123, 87.23092513368730],
[55.1421884139438, 78.21151827079920],
[52.2117966922140, 79.64197304980870],
[39.2995666943170, 59.17148932186950],
[48.1050416917682, 75.33124229706300],
[52.5500144427338, 71.30087988685030],
[45.4197301449737, 55.16567714595910],
[54.3516348812289, 82.47884675749790],
[44.1640494967733, 62.00892324572580],
[58.1684707168577, 75.39287042599490],
[56.7272080570966, 81.43619215887860],
[48.9558885660937, 60.72360244067390],
[44.6871962314809, 82.89250373145370],
[60.2973268513334, 97.37989686216600],
[45.6186437729558, 48.84715331735500],
[38.8168175374456, 56.87721318626850],
[66.1898166067526, 83.87856466460270],
[65.4160517451340, 118.59121730252200],
[47.4812086078678, 57.25181946226890],
[41.5756426174870, 51.39174407983230],
[51.8451869056394, 75.38065166531230],
[59.3708220110895, 74.76556403215130],
[57.3100034383480, 95.45505292257470],
[63.6155612514533, 95.22936601755530],
[46.7376194079769, 79.05240616956550],
[50.5567601485477, 83.43207142132370],
[52.2239960855530, 63.35879031749780],
[35.5678300477466, 41.41288530370050],
[42.4364769440556, 76.61734128007400],
[58.1645401101928, 96.76956642610810],
[57.5044476153417, 74.08413011660250],
[45.4405307253199, 66.58814441422850],
[61.8962226802912, 77.76848241779300],
[33.0938317361639, 50.71958891231200],
[36.4360095113868, 62.12457081807170],
[37.6756548608507, 60.81024664990220],
[44.5556083832753, 52.68298336638770],
[43.3182826318657, 58.56982471769280],
[50.0731456322890, 82.90598148507050],
[43.8706126452183, 61.42470980433910],
[62.9974807475530, 115.24415280079500],
[32.6690437634671, 45.57058882337600],
[40.1668990087037, 54.08405479622360],
[53.5750775316736, 87.99445275811040],
[33.8642149717782, 52.72549437590040],
[64.7071386661212, 93.57611869265820],
[38.1198240268228, 80.16627544737090],
[44.5025380646451, 65.10171157056030],
[40.5995383845523, 65.56230126040030],
[41.7206763563412, 65.28088692082280],
[51.0886346783367, 73.43464154632430],
[55.0780959049232, 71.13972785861890],
[41.3777265348952, 79.10282968354980],
[62.4946974272697, 86.52053844034710],
[49.2038875408260, 84.74269780782620],
[41.1026851873496, 59.35885024862490],
[41.1820161051698, 61.68403752483360],
[50.1863894948806, 69.84760415824910],
[52.3784462192362, 86.09829120577410],
[50.1354854862861, 59.10883926769960],
[33.6447060061917, 69.89968164362760],
[39.5579012229068, 44.86249071116430],
[56.1303888168754, 85.49806777884020],
[57.3620521332382, 95.53668684646720],
[60.2692143939979, 70.25193441977150],
[35.6780938894107, 52.72173496477490],
[31.5881169981328, 50.39267013507980],
[53.6609322616730, 63.64239877565770],
[46.6822286494719, 72.24725106866230],
[43.1078202191024, 57.81251297618140],
[70.3460756150493, 104.25710158543800],
[44.4928558808540, 86.64202031882200],
[57.5045333032684, 91.48677800011010],
[36.9300766091918, 55.23166088621280],
[55.8057333579427, 79.55043667850760],
[38.9547690733770, 44.84712424246760],
[56.9012147022470, 80.20752313968270],
[56.8689006613840, 83.14274979204340],
[34.3331247042160, 55.72348926054390],
[59.0497412146668, 77.63418251167780],
[57.7882239932306, 99.05141484174820],
[54.2823287059674, 79.12064627468000],
[51.0887198989791, 69.58889785111840],
[50.2828363482307, 69.51050331149430],
[44.2117417520901, 73.68756431831720],
[38.0054880080606, 61.36690453724010],
[32.9404799426182, 67.17065576899510],
[53.6916395710700, 85.66820314500150],
[68.7657342696216, 114.85387123391300],
[46.2309664983102, 90.12357206996740],
[68.3193608182553, 97.91982103524280],
[50.0301743403121, 81.53699078301500],
[49.2397653427537, 72.11183246961560],
[50.0395759398759, 85.23200734232560],
[48.1498588910288, 66.22495788805460],
[25.1284846477723, 53.45439421485050]];
function lossFunction(arr0: number[], arr1: number[], arr2: number[]) {
let n: number = arr0.length; // Number of elements in X
//D_m = (-2/n) * sum(X * (Y - Y_pred)) # Derivative wrt m
let a: number = (-2 / n) * (arr0.map((a, i) => a * (arr1[i] - arr2[i]))).reduce((sum, current) => sum + current);
//D_c = (-2/n) * sum(Y - Y_pred) # Derivative wrt c
let b: number = (-2 / n) * (arr1.map((a, i) => (a - arr2[i]))).reduce((sum, current) => sum + current);
return [a, b];
}
export const gradientDescentMain = () => {
// Building the model
let m: number = 0;
let c: number = 0;
let X_arr: number[];
let Y_arr: number[];
let Y_pred_arr: number[];
let D_m: number = 0;
let D_c: number = 0;
let L: number = 0.00000001; // The learning Rate
let epochs: number = 10000000; // The number of iterations to perform gradient descent
//Initial guesses
for (let i = 0; i < epochs; i++) {
X_arr = data.map(function (value, index) { return value[0]; });
Y_arr = data.map(function (value, index) { return value[1]; });
// The current predicted value of Y
Y_pred_arr = X_arr.map((a) => ((m * a) + c));
let all = lossFunction(X_arr, Y_arr, Y_pred_arr);
D_m = all[0];
D_c = all[1];
m = m - L * D_m; // Update m
c = c - L * D_c; // Update c
}
console.log("m: " + m + " c: " + c);
}
gradientDescentMain();
Wren
import "/fmt" for Fmt
// Function for which minimum is to be found.
var g = Fn.new { |x|
return (x[0]-1)*(x[0]-1)*
(-x[1]*x[1]).exp + x[1]*(x[1]+2)*
(-2*x[0]*x[0]).exp
}
// Provides a rough calculation of gradient g(p).
var gradG = Fn.new { |p|
var x = p[0]
var y = p[1]
return [2*(x-1)*(-y*y).exp - 4*x*(-2*x*x).exp*y*(y+2),
-2*(x-1)*(x-1)*y*(-y*y).exp + (-2*x*x).exp*(y+2) + (-2*x*x).exp*y]
}
var steepestDescent = Fn.new { |x, alpha, tolerance|
var n = x.count
var g0 = g.call(x) // // Initial estimate of result.
// Calculate initial gradient.
var fi = gradG.call(x)
// Calculate initial norm.
var delG = 0
for (i in 0...n) delG = delG + fi[i]*fi[i]
delG = delG.sqrt
var b = alpha/delG
// Iterate until value is <= tolerance.
while (delG > tolerance) {
// Calculate next value.
for (i in 0...n) x[i] = x[i] - b*fi[i]
// Calculate next gradient.
fi = gradG.call(x)
// Calculate next norm.
delG = 0
for (i in 0...n) delG = delG + fi[i]*fi[i]
delG = delG.sqrt
b = alpha/delG
// Calculate next value.
var g1 = g.call(x)
// Adjust parameter.
if (g1 > g0) {
alpha = alpha / 2
} else {
g0 = g1
}
}
}
var tolerance = 0.0000006
var alpha = 0.1
var x = [0.1, -1] // Initial guess of location of minimum.
steepestDescent.call(x, alpha, tolerance)
System.print("Testing steepest descent method:")
Fmt.print("The minimum is at x = $f, y = $f for which f(x, y) = $f.", x[0], x[1], g.call(x))
- Output:
Testing steepest descent method: The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.
zkl
with tweaked gradG
fcn steepestDescent(f, x,y, alpha, h){
g0:=f(x,y); # Initial estimate of result.
fix,fiy := gradG(f,x,y,h); # Calculate initial gradient
# Calculate initial norm.
b:=alpha / (delG := (fix*fix + fiy*fiy).sqrt());
while(delG > h){ # Iterate until value is <= tolerance.
x,y = x - b*fix, y - b*fiy;
# Calculate next gradient and next value
fix,fiy = gradG(f,x,y, h/=2);
b=alpha / (delG = (fix*fix + fiy*fiy).sqrt()); # Calculate next norm.
if((g1:=f(x,y)) > g0) alpha/=2 else g0 = g1; # Adjust parameter.
}
return(x,y)
}
fcn gradG(f,x,y,h){ # gives a rough calculation of gradient f(x,y).
g0:=f(x,y);
return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h)
}
fcn f(x,y){ # Function for which minimum is to be found.
(x - 1).pow(2)*(-y.pow(2)).exp() +
y*(y + 2)*(-2.0*x.pow(2)).exp()
}
tolerance,alpha := 0.0000006, 0.1;
x,y := 0.1, -1.0; # Initial guess of location of minimum.
x,y = steepestDescent(f,x,y,alpha,tolerance);
println("Testing steepest descent method:");
println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));
- Output:
Testing steepest descent method: The minimum is at (x,y) = (0.107608,-1.223299). f(x,y) = -0.750063