Gradient descent: Difference between revisions

From Rosetta Code
Content added Content deleted
(→‎{{header|Wren}}: Now uses new core library method.)
m (→‎{{header|Phix}}: added syntax colouring, made p2js compatible, lowered/improved tolerance, added iteration check/limit)
Line 534: Line 534:
=={{header|Phix}}==
=={{header|Phix}}==
{{trans|Go}}
{{trans|Go}}
<!--<lang Phix>(phixonline)-->
<lang Phix>-- Function for which minimum is to be found.
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
function g(sequence x)
<span style="color: #000080;font-style:italic;">-- Function for which minimum is to be found.</span>
atom {x0,x1} = x
<span style="color: #008080;">function</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">)</span>
return (x0-1)*(x0-1)*exp(-x1*x1) +
<span style="color: #004080;">atom</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">x</span>
x1*(x1+2)*exp(-2*x0*x0)
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*(</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*</span><span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">)</span> <span style="color: #0000FF;">+</span>
end function
<span style="color: #000000;">x1</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)*</span><span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">)</span>

<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
-- Provides a rough calculation of gradient g(x).
function gradG(sequence p)
<span style="color: #000080;font-style:italic;">-- Provides a rough calculation of gradient g(x).</span>
atom {x,y} = p
<span style="color: #008080;">function</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">p</span><span style="color: #0000FF;">)</span>
p[1] = 2*(x-1)*exp(-y*y) - 4*x*exp(-2*x*x)*y*(y+2)
<span style="color: #004080;">atom</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">,</span><span style="color: #000000;">y</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">p</span><span style="color: #0000FF;">,</span>
p[2] = -2*(x-1)*(x-1)*y*exp(-y*y) + exp(-2*x*x)*(y+2) + exp(-2*x*x)*y
<span style="color: #000000;">xm1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
return p
<span style="color: #000000;">emyy</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">y</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">),</span>
end function
<span style="color: #000000;">em2xx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">xm12emyy</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">xm1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">emyy</span>
function steepestDescent(sequence x, atom alpha, tolerance)
<span style="color: #000000;">p</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">xm12emyy</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">4</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">*</span><span style="color: #000000;">em2xx</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">y</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
integer n = length(x)
<span style="color: #0000FF;">-</span><span style="color: #000000;">xm12emyy</span><span style="color: #0000FF;">*</span><span style="color: #000000;">xm1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">em2xx</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)}</span>
atom g0 = g(x) -- Initial estimate of result.
<span style="color: #008080;">return</span> <span style="color: #000000;">p</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
-- Calculate initial gradient.
sequence fi = gradG(x)
<span style="color: #008080;">function</span> <span style="color: #000000;">steepestDescent</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">atom</span> <span style="color: #000000;">alpha</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">fi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- Calculate initial gradient</span>
-- Calculate initial norm.
<span style="color: #004080;">atom</span> <span style="color: #000000;">g0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">),</span> <span style="color: #000080;font-style:italic;">-- Initial estimate of result</span>
atom delG = sqrt(sum(sq_mul(fi,fi))),
<span style="color: #000000;">delG</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">))),</span> <span style="color: #000080;font-style:italic;">-- & norm</span>
b = alpha / delG
<span style="color: #000000;">b</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/</span> <span style="color: #000000;">delG</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">icount</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> <span style="color: #000080;font-style:italic;">-- iteration limitor/sanity</span>
-- Iterate until value is <= tolerance.
<span style="color: #008080;">while</span> <span style="color: #000000;">delG</span><span style="color: #0000FF;">></span><span style="color: #000000;">tolerance</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">-- Iterate until &lt;= tolerance</span>
while delG>tolerance do
<span style="color: #000000;">x</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sq_sub</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">))</span> <span style="color: #000080;font-style:italic;">-- Calculate next value</span>
-- Calculate next value.
<span style="color: #000000;">fi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- next gradient</span>
x = sq_sub(x,sq_mul(b,fi))
<span style="color: #000000;">delG</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">)))</span> <span style="color: #000080;font-style:italic;">-- next norm</span>
<span style="color: #000000;">b</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/</span> <span style="color: #000000;">delG</span>
-- Calculate next gradient.
<span style="color: #004080;">atom</span> <span style="color: #000000;">g1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- next value</span>
fi = gradG(x)
<span style="color: #008080;">if</span> <span style="color: #000000;">g1</span><span style="color: #0000FF;">></span><span style="color: #000000;">g0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/=</span> <span style="color: #000000;">2</span> <span style="color: #000080;font-style:italic;">-- Adjust parameter</span>
-- Calculate next norm.
<span style="color: #008080;">else</span>
delG = sqrt(sum(sq_mul(fi,fi)))
<span style="color: #000000;">g0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g1</span>
b = alpha / delG
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">icount</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
-- Calculate next value.
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">icount</span><span style="color: #0000FF;"><</span><span style="color: #000000;">100</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (increase if/when necessary)</span>
atom g1 = g(x)
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">x</span>
-- Adjust parameter.
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
if g1>g0 then
alpha /= 2
<span style="color: #008080;">constant</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0.1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0.00000000000001</span>
else
<span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">steepestDescent</span><span style="color: #0000FF;">({</span><span style="color: #000000;">0.1</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">},</span> <span style="color: #000000;">alpha</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span><span style="color: #0000FF;">)</span>
g0 = g1
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Testing steepest descent method:\n"</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)})</span>
end while
<!--</lang>-->
return x
end function
constant tolerance = 0.0000001, alpha = 0.1
sequence x = steepestDescent({0.1,-1}, alpha, tolerance)
printf(1,"Testing steepest descent method:\n")
printf(1,"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.16f\n", {x[1], x[2], g(x)})</lang>
{{out}}
{{out}}
Results now match (at least) Algol 68/W, Fortran, Go, Julia, Raku, REXX, and Wren [to 6dp or better anyway].<br>
Results match Fortran, most others to 6 or 7dp<br>
Some slightly unexpected/unusual (but I think acceptable) variance was noted when playing with different tolerances, be warned.<br>
Note that specifying a tolerance < 1e-7 causes an infinite loop on Phix, whereas REXX copes with a much smaller tolerance.<br>
Results on 32/64 bit Phix agree to 13dp, which I therefore choose to show in full here (but otherwise would not really trust).
Results on 32/64 bit Phix agree to 13dp, which I therefore choose to show in full here (but otherwise would not really trust).
<pre>
<pre>
Testing steepest descent method:
Testing steepest descent method:
The minimum is at x = 0.1076268243295, y = -1.2232596548816 for which f(x, y) = -0.7500634205514924
The minimum is at x = 0.1076268435484, y = -1.2232596638399 for which f(x, y) = -0.750063420551493
</pre>
</pre>



Revision as of 05:19, 12 December 2021

Gradient descent is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Gradient descent (also known as steepest descent) is a first-order iterative optimization algorithm for finding the minimum of a function which is described in this Wikipedia article.

Task

Use this algorithm to search for minimum values of the bi-variate function:

  f(x, y) = (x - 1)(x - 1)e^(-y^2) + y(y+2)e^(-2x^2)

around x = 0.1 and y = -1.

This book excerpt shows sample C# code for solving this task.

ALGOL 68

Works with: ALGOL 68G version Any - tested with release 2.8.3.win32
Translation of: Go

modified to use the actual gradient function -

Translation of: Fortran

THe results agree with the Fortran sample and the Julia sample to 6 places. <lang algol68>PROC steepest descent = ( REF[]LONG REAL x, LONG REAL alphap, tolerance )VOID: BEGIN

   LONG REAL alpha := alphap;
   LONG REAL g0    := g( x ); # Initial estimate of result. #
   # Calculate initial gradient. #
   [ LWB x : UPB x ]LONG REAL fi  := grad g( x );
   # Calculate initial norm. #
   LONG REAL del g := 0.0;
   FOR i FROM LWB x TO UPB x DO
       del g +:= fi[ i ] * fi[ i ]
   OD;
   del g       := long sqrt( del g );
   LONG REAL b := alpha / del g;
   # Iterate until value is <= tolerance. #
   WHILE del g > tolerance DO
       # Calculate next value. #
       FOR i FROM LWB x TO UPB x DO
           x[i] -:= b * fi[i]
       OD;
       # Calculate next gradient. #
       fi := grad g( x );
       # Calculate next norm. #
       del g := 0;
       FOR i FROM LWB x TO UPB x DO
           del g +:= fi[ i ] * fi[ i ]
       OD;
       del g := long sqrt( del g );
       IF del g > 0 THEN
           b     := alpha / del g; 
           # Calculate next value. #
           LONG REAL g1 := g( x );
           # Adjust parameter. #
           IF g1 > g0 THEN
               alpha /:= 2
           ELSE
               g0 := g1
           FI
       FI
   OD

END # steepest descent # ;

  1. calculates the gradient of g(p). #
  2. The derivitives wrt x and y are (as in the Fortran sample ): #
  3. g' wrt x = 2( x - 1 )e^( - ( y^2 ) ) - 4xe^( -2( x^2) )y( y + 2 ) #
  4. g' wrt y = ( -2( x-1 )^2ye^( - (y^2) )) + e^(-2( x^2 ) )( y + 2 ) + e^( -2( x^2 ) )y #

PROC grad g = ( []LONG REAL p )[]LONG REAL: BEGIN

   [ LWB p : UPB p ]LONG REAL z;
   LONG REAL x = p[ 0 ];
   LONG REAL y = p[ 1 ];
   z[ 0 ] := 2 * ( x - 1 ) * long exp( - ( y * y ) )
           - 4 * x * long exp( -2 * ( x * x ) ) * y * ( y + 2 );
   z[ 1 ] := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * long exp( - y * y )
           + long exp( -2 * x * x ) * ( y + 2 )
           + long exp( -2 * x * x ) * y;
   z

END # grad g # ;

  1. Function for which minimum is to be found. #
  2. g( x, y ) = ( ( x - 1 )^2 )e^( - ( x^2 ) ) + y( y + 2 )e^( - 2(x^2)) #

PROC g = ( []LONG REAL x )LONG REAL:

   ( x[ 0 ] - 1 )
 * ( x[ 0 ] - 1 )
 * long exp( - x[ 1 ] * x[ 1 ] ) + x[ 1 ] * ( x[ 1 ] + 2 )
 * long exp( - 2 * x[ 0 ] * x[ 0 ] )
 ;

BEGIN

   LONG REAL          tolerance := 0.0000006;
   LONG REAL          alpha     := 0.1;
   [ 0 : 1 ]LONG REAL x         := ( []LONG REAL( 0.1, -1 ) )[ AT 0 ]; # Initial guess of location of minimum. #
   steepest descent( x, alpha, tolerance );
   print( ( "Testing steepest descent method:", newline ) );
   print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) )

END</lang>

Output:
Testing steepest descent method:
The minimum is at x[0] =   0.107627, x[1] =  -1.223260

ALGOL W

Translation of: ALGOL 68

which is a

Translation of: Go

with the gradient function from

Translation of: Fortran

The results agree (to 6 places) with the Fortran and Julia samples. <lang algolw>begin

   procedure steepestDescent ( long real array x ( * ); long real value alphap, tolerance ) ;
   begin
       long real array fi ( 0 :: 1 );
       long real alpha, g0, g1, delG, b;
       alpha := alphap;
       g0    := g( x ); % Initial estimate of result. %
       % Calculate initial gradient. %
       gradG( fi, x );
       % Calculate initial norm. %
       delG  := 0.0;
       for i := 0 until 1 do delG := delG + ( fi( i ) * fi( i ) );
       delG  := longSqrt( delG );
       b     := alpha / delG;
       % Iterate until value is <= tolerance. %
       while delG > tolerance do begin
           % Calculate next value. %
           for i := 0 until 1 do x(i) := x( i ) - ( b * fi(i) );
           % Calculate next gradient. %
           gradG( fi, x );
           % Calculate next norm. %
           delG  := 0;
           for i := 0 until 1 do delG := delg + ( fi( i ) * fi( i ) );
           delG  := longSqrt( delG );
           if delG > 0 then begin
               b     := alpha / delG; 
               % Calculate next value. %
               g1 := g( x );
               % Adjust parameter. %
               if g1 > g0
               then alpha := alpha / 2
               else g0 := g1
           end if_delG_gt_0
       end while_delG_gt_tolerance
   end steepestDescent ;
   % Provides a rough calculation of gradient g(x). %
   procedure gradG ( long real array z, p ( * ) ) ;
   begin
       long real x, y;
       x := p( 0 );
       y := p( 1 );
       z( 0 ) := 2 * ( x - 1 ) * longExp( - ( y * y ) )
               - 4 * x * longExp( -2 * ( x * x ) ) * y * ( y + 2 );
       z( 1 ) := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * longExp( - y * y )
               + longExp( -2 * x * x ) * ( y + 2 )
               + longExp( -2 * x * x ) * y
   end gradG ;
   % Function for which minimum is to be found. %
   long real procedure g ( long real array x ( * ) ) ;
         ( x( 0 ) - 1 ) * ( x( 0 ) - 1 ) * longExp( - x( 1 ) * x( 1 ) )
       + x( 1 ) * ( x( 1 ) + 2 ) * longExp( - 2 * x( 0 ) * x( 0 ) )
       ;
   begin
       long real alpha, tolerance;
       long real array x ( 0 :: 1 );
       x( 0 ) :=  0.1; % Initial guess of location of minimum. %
       x( 1 ) := -1;       
       tolerance := 0.0000006;
       alpha     := 0.1;
       steepestDescent( x, alpha, tolerance );
       r_format := "A"; r_w := 11; r_d := 7; s_w := 0; % output formatting %
       write( "Testing steepest descent method:" );
       write( "The minimum is at x(0) = ", x( 0 ), ", x(1) = ", x( 1 ) )
   end

end.</lang>

Output:
Testing steepest descent method:
The minimum is at x(0) =   0.1076268, x(1) =  -1.2232596

Fortran

Compiler: gfortran 8.3.0
The way a FORTRAN programmer would do this would be to automatically differentiate the function using the diff command in Maxima:

(%i3) (x-1)*(x-1)*exp(-y^2)+y*(y+2)*exp(-2*x^2);
                                   2          2
                            2   - y      - 2 x
(%o3)                (x - 1)  %e     + %e       y (y + 2)
(%i4) diff(%o3,x);
                                  2              2
                               - y          - 2 x
(%o4)              2 (x - 1) %e     - 4 x %e       y (y + 2)
(%i5) diff(%o3,y);
                                 2           2                  2
                        2     - y       - 2 x              - 2 x
(%o5)       (- 2 (x - 1)  y %e    ) + %e       (y + 2) + %e       y

and then have it automatically turned into statements with the fortran command:

(%i6) fortran(%o4);
      2*(x-1)*exp(-y**2)-4*x*exp(-2*x**2)*y*(y+2)
(%o6)                                done
(%i7) fortran(%o5);
      (-2*(x-1)**2*y*exp(-y**2))+exp(-2*x**2)*(y+2)+exp(-2*x**2)*y
(%o7)                                done

The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the gradient to be done separately. <lang Fortran> SUBROUTINE EVALFG (N, X, F, G)

      IMPLICIT NONE
      INTEGER N
      DOUBLE PRECISION X(N), F, G(N)
      F = (X(1) - 1.D0)**2 * EXP(-X(2)**2) + 
    $      X(2) * (X(2) + 2.D0) * EXP(-2.D0 * X(1)**2)
      G(1) = 2.D0 * (X(1) - 1.D0) * EXP(-X(2)**2) - 4.D0 * X(1) * 
    $        EXP(-2.D0 * X(1)**2) * X(2) * (X(2) + 2.D0)
      G(2) = (-2.D0 * (X(1) - 1.D0)**2 * X(2) * EXP(-X(2)**2)) + 
    $        EXP(-2.D0 * X(1)**2) * (X(2) + 2.D0) + 
    $        EXP(-2.D0 * X(1)**2) * X(2)
      RETURN
     END
  • -----------------------------------------------------------------------
  • gd - Gradient descent
  • G must be set correctly at the initial point X.
  • ___Name______Type________In/Out___Description_________________________
  • N Integer In Number of Variables.
  • X(N) Double Both Variables
  • G(N) Double Both Gradient
  • TOL Double In Relative convergence tolerance
  • IFLAG Integer Out Reverse Communication Flag
  • on output: 0 done
  • 1 compute G and call again
  • -----------------------------------------------------------------------
     SUBROUTINE GD (N, X, G, TOL, IFLAG)
      IMPLICIT NONE
      INTEGER N, IFLAG
      DOUBLE PRECISION X(N), G(N), TOL
      DOUBLE PRECISION ETA
      PARAMETER (ETA = 0.3D0)     ! Learning rate
      INTEGER I
      DOUBLE PRECISION GNORM      ! norm of gradient
      GNORM = 0.D0                ! convergence test
      DO I = 1, N
        GNORM = GNORM + G(I)**2
      END DO
      GNORM = SQRT(GNORM)
      IF (GNORM < TOL) THEN
        IFLAG = 0
        RETURN                     ! success
      END IF
      DO I = 1, N                 ! take step
        X(I) = X(I) - ETA * G(I)
      END DO
      IFLAG = 1
      RETURN                      ! let main program evaluate G
     END  ! of gd
     PROGRAM GDDEMO
      IMPLICIT NONE
      INTEGER N
      PARAMETER (N = 2)
      INTEGER ITER, J, IFLAG
      DOUBLE PRECISION X(N), F, G(N), TOL
      X(1) = -0.1D0         ! initial values
      X(2) = -1.0D0
      TOL = 1.D-15
      CALL EVALFG (N, X, F, G)
      IFLAG = 0
      DO J = 1, 1 000 000
        CALL GD (N, X, G, TOL, IFLAG)
        IF (IFLAG .EQ. 1) THEN
          CALL EVALFG (N, X, F, G)
        ELSE
          ITER = J
          GO TO 50
        END IF
      END DO
      STOP 'too many iterations!'
 50   PRINT '(A, I7, A, F19.15, A, F19.15, A, F19.15)', 
    $          'After ', ITER, ' steps, found minimum at x=', 
    $           X(1), ' y=', X(2), ' of f=', F
      STOP 'program complete'
     END

</lang>

Output:
After      31 steps, found minimum at x=  0.107626843548372 y= -1.223259663839920 of f= -0.750063420551493
STOP program complete

Go

This is a translation of the C# code in the book excerpt linked to above and hence also of the first Typescript example below.

However, since it was originally written, I've substituted Fortran's gradient function for the original one (see Talk page) which now gives results which agree (to 6 decimal places) with those of the Fortran, Julia, Algol 68 and Algol W solutions. As a number of other solutions are based on this one, I suggest their authors update them accordingly. <lang go>package main

import (

   "fmt"
   "math"

)

func steepestDescent(x []float64, alpha, tolerance float64) {

   n := len(x)
   g0 := g(x) // Initial estimate of result.
   // Calculate initial gradient.
   fi := gradG(x)
   // Calculate initial norm.
   delG := 0.0
   for i := 0; i < n; i++ {
       delG += fi[i] * fi[i]
   }
   delG = math.Sqrt(delG)
   b := alpha / delG
   // Iterate until value is <= tolerance.
   for delG > tolerance {
       // Calculate next value.
       for i := 0; i < n; i++ {
           x[i] -= b * fi[i]
       }
       // Calculate next gradient.
       fi = gradG(x)
       // Calculate next norm.
       delG = 0
       for i := 0; i < n; i++ {
           delG += fi[i] * fi[i]
       }
       delG = math.Sqrt(delG)
       b = alpha / delG
       // Calculate next value.
       g1 := g(x)
       // Adjust parameter.
       if g1 > g0 {
           alpha /= 2
       } else {
           g0 = g1
       }
   }

}

// Provides a rough calculation of gradient g(p). func gradG(p []float64) []float64 {

   z := make([]float64, len(p))
   x := p[0]
   y := p[1]
   z[0] = 2*(x-1)*math.Exp(-y*y) - 4*x*math.Exp(-2*x*x)*y*(y+2)
   z[1] = -2*(x-1)*(x-1)*y*math.Exp(-y*y) + math.Exp(-2*x*x)*(y+2) + math.Exp(-2*x*x)*y
   return z

}

// Function for which minimum is to be found. func g(x []float64) float64 {

   return (x[0]-1)*(x[0]-1)*
       math.Exp(-x[1]*x[1]) + x[1]*(x[1]+2)*
       math.Exp(-2*x[0]*x[0])

}

func main() {

   tolerance := 0.0000006
   alpha := 0.1
   x := []float64{0.1, -1} // Initial guess of location of minimum.
   steepestDescent(x, alpha, tolerance)
   fmt.Println("Testing steepest descent method:")
   fmt.Printf("The minimum is at x = %f, y = %f for which f(x, y) = %f.\n", x[0], x[1], g(x))

}</lang>

Output:
Testing steepest descent method:
The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.

Julia

<lang julia>using Optim, Base.MathConstants

f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2)

println(optimize(f, [0.1, -1.0], GradientDescent()))

</lang>

Output:
Results of Optimization Algorithm
 * Algorithm: Gradient Descent
 * Starting Point: [0.1,-1.0]
 * Minimizer: [0.107626844383003,-1.2232596628723371]
 * Minimum: -7.500634e-01
 * Iterations: 14
 * Convergence: true
   * |x - x'| ≤ 0.0e+00: false
     |x - x'| = 2.97e-09
   * |f(x) - f(x')| ≤ 0.0e+00 |f(x)|: true
     |f(x) - f(x')| = 0.00e+00 |f(x)|
   * |g(x)| ≤ 1.0e-08: true
     |g(x)| = 2.54e-09
   * Stopped by an increasing objective: false
   * Reached Maximum Number of Iterations: false
 * Objective Calls: 35
 * Gradient Calls: 35

Nim

Translation of: Go

The gradient function has been rewritten to remove a term in the partial derivative with respect to y (two terms instead of three). This doesn’t change the result which agrees with those of Go, Fortran, Julia, etc.

<lang Nim>import math, strformat


func steepDescent(g: proc(x: openArray[float]): float;

                 gradG: proc(p: openArray[float]): seq[float];
                 x: var openArray[float]; alpha, tolerance: float) =
 let n = x.len
 var alpha = alpha
 var g0 = g(x)   # Initial estimate of result.
 # Calculate initial gradient.
 var fi = gradG(x)
 # Calculate initial norm.
 var delG = 0.0
 for i in 0..<n:
   delG += fi[i] * fi[i]
 delG = sqrt(delG)
 var b = alpha / delG
 # Iterate until value is <= tolerance.
 while delG > tolerance:
   # Calculate next value.
   for i in 0..<n:
     x[i] -= b * fi[i]
   # Calculate next gradient.
   fi = gradG(x)
   # Calculate next norm.
   delG = 0
   for i in 0..<n:
     delG += fi[i] * fi[i]
   delG = sqrt(delG)
   b = alpha / delG
   # Calculate next value.
   let g1 = g(x)
   # Adjust parameter.
   if g1 > g0: alpha *= 0.5
   else: g0 = g1


when isMainModule:

 func g(x: openArray[float]): float =
   ## Function for which minimum is to be found.
   (x[0]-1) * (x[0]-1) * exp(-x[1]*x[1]) + x[1] * (x[1]+2) * exp(-2*x[0]*x[0])
 func gradG(p: openArray[float]): seq[float] =
   ## Provides a rough calculation of gradient g(p).
   result = newSeq[float](p.len)
   let x = p[0]
   let y = p[1]
   result[0] = 2 * (x-1) * exp(-y*y) - 4 * x * exp(-2*x*x) * y * (y+2)
   result[1] = -2 * (x-1) * (x-1) * y * exp(-y*y) + 2 * (y+1) * exp(-2*x*x)
 const
   Tolerance = 0.0000006
   Alpha = 0.1
 var x = [0.1, -1.0]   # Initial guess of location of minimum.
 steepDescent(g, gradG, x, Alpha, Tolerance)
 echo "Testing steepest descent method:"
 echo &"The minimum is at x = {x[0]:.12f}, y = {x[1]:.12f} for which f(x, y) = {g(x):.12f}"</lang>
Output:
Testing steepest descent method:
The minimum is at x = 0.107626824329, y = -1.223259654882 for which f(x, y) = -0.750063420551

Perl

Calculate with bignum for numerical stability.

Translation of: Raku

<lang perl>use strict; use warnings; use bignum;

sub steepestDescent {

   my($alpha, $tolerance, @x) = @_;
   my $N = @x;
   my $h = $tolerance;
   my $g0 = g(@x) ;    # Initial estimate of result.
   my @fi = gradG($h, @x) ;    #  Calculate initial gradient
   # Calculate initial norm.
   my $delG = 0;
   for (0..$N-1) { $delG += $fi[$_]**2 }
   my $b = $alpha / sqrt($delG);
   while ( $delG > $tolerance ) {   # Iterate until value is <= tolerance.
      #  Calculate next value.
      for (0..$N-1) { $x[$_] -= $b * $fi[$_] }
      $h /= 2;
      @fi = gradG($h, @x);    # Calculate next gradient.
      # Calculate next norm.
      $delG = 0;
      for (0..$N-1) { $delG += $fi[$_]**2 }
      $b = $alpha / sqrt($delG);
      my $g1 = g(@x);   # Calculate next value.
      $g1 > $g0 ? ($alpha /= 2) : ($g0 = $g1);  # Adjust parameter.
   }
   @x

}

  1. Provides a rough calculation of gradient g(x).

sub gradG {

   my($h, @x) = @_;
   my $N = @x;
   my @y = @x;
   my $g0 = g(@x);
   my @z;
   for (0..$N-1) { $y[$_] += $h ; $z[$_] = (g(@y) - $g0) / $h }
   return @z

}

  1. Function for which minimum is to be found.

sub g { my(@x) = @_; ($x[0]-1)**2 * exp(-$x[1]**2) + $x[1]*($x[1]+2) * exp(-2*$x[0]**2) };

my $tolerance = 0.0000001; my $alpha = 0.01; my @x = <0.1 -1>; # Initial guess of location of minimum.

printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);</lang>

Output:
The minimum is at x[0] = 0.107653, x[1] = -1.223370

Phix

Translation of: Go
with javascript_semantics
-- Function for which minimum is to be found.
function g(sequence x)
    atom {x0,x1} = x
    return (x0-1)*(x0-1)*exp(-x1*x1) + 
               x1*(x1+2)*exp(-2*x0*x0)
end function
 
-- Provides a rough calculation of gradient g(x).
function gradG(sequence p)
    atom {x,y} = p,
         xm1 = x-1,
         emyy = exp(-y*y),
         em2xx = exp(-2*x*x),
         xm12emyy = xm1*2*emyy
    p = {xm12emyy   - 4*x*em2xx*y*(y+2),
        -xm12emyy*xm1*y + em2xx*(2*y+2)}
    return p
end function
 
function steepestDescent(sequence x, atom alpha, tolerance)
    sequence fi = gradG(x)  -- Calculate initial gradient
    atom g0 = g(x),         -- Initial estimate of result
         delG = sqrt(sum(sq_mul(fi,fi))),       -- & norm
         b = alpha / delG
    integer icount = 0      -- iteration limitor/sanity
    while delG>tolerance do     -- Iterate until <= tolerance
        x = sq_sub(x,sq_mul(b,fi))  -- Calculate next value
        fi = gradG(x)               --           next gradient
        delG = sqrt(sum(sq_mul(fi,fi))) --       next norm
        b = alpha / delG
        atom g1 = g(x)                  --       next value
        if g1>g0 then
            alpha /= 2                  -- Adjust parameter
        else
            g0 = g1
        end if
        icount += 1
        assert(icount<100)  -- (increase if/when necessary)
    end while
    return x
end function
 
constant alpha = 0.1, tolerance = 0.00000000000001
sequence x = steepestDescent({0.1,-1}, alpha, tolerance)
printf(1,"Testing steepest descent method:\n")
printf(1,"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n", {x[1], x[2], g(x)})
Output:

Results match Fortran, most others to 6 or 7dp
Some slightly unexpected/unusual (but I think acceptable) variance was noted when playing with different tolerances, be warned.
Results on 32/64 bit Phix agree to 13dp, which I therefore choose to show in full here (but otherwise would not really trust).

Testing steepest descent method:
The minimum is at x = 0.1076268435484, y = -1.2232596638399 for which f(x, y) = -0.750063420551493

Racket

Translation of: Go

Note the different implementation of grad. I believe that the vector should be reset and only the partial derivative in a particular dimension is to be used. For this reason, I've _yet another_ result!

I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with grad- and del-

<lang racket>#lang racket

(define (apply-vector f v)

 (apply f (vector->list v)))
Provides a rough calculation of gradient g(v).

(define ((grad/del f) v δ #:fv (fv (apply-vector f v)))

 (define dim (vector-length v))
 (define tmp (vector-copy v))
 (define grad (for/vector #:length dim ((i dim)
                           (v_i v))
             (vector-set! tmp i (+ v_i δ))
             (define ∂f/∂v_i (/ (- (apply-vector f tmp) fv) δ))
             (vector-set! tmp i v_i)
             ∂f/∂v_i))
 (values grad (sqrt (for/sum ((∂_i grad)) (sqr ∂_i)))))

(define (steepest-descent g x α tolerance)

 (define grad/del-g (grad/del g))
 (define (loop x δ α gx grad-gx del-gx b)
   (cond
     [(<= del-gx tolerance) x]
     [else
       (define δ´ (/ δ 2))
       (define x´ (vector-map + (vector-map (curry * (- b)) grad-gx) x))
       (define gx´ (apply-vector g x´))
       (define-values (grad-gx´ del-gx´) (grad/del-g x´ δ´ #:fv gx´))
       (define b´ (/ α del-gx´))
       (if (> gx´ gx)
           (loop x´ δ´ (/ α 2) gx  grad-gx´ del-gx´ b´)
           (loop x´ δ´ α       gx´ grad-gx´ del-gx´ b´))]))
 (define gx (apply-vector g x))
 (define δ tolerance)
 (define-values (grad-gx del-gx) (grad/del-g x δ #:fv gx))
 (loop x δ α gx grad-gx del-gx (/ α del-gx)))

(define (Gradient-descent)

 (steepest-descent
   (λ (x y)
      (+ (* (- x 1) (- x 1) (exp (- (sqr y))))
       (* y (+ y 2) (exp (- (* 2 (sqr x)))))))
   #(0.1 -1.) 0.1 0.0000006))

(module+ main

 (Gradient-descent))

</lang>

Output:
'#(0.10760797905122492 -1.2232993981966753)

Raku

(formerly Perl 6)

Translation of: Go

<lang perl6># 20200904 Updated Raku programming solution

sub steepestDescent(@x, $alpha is copy, $h) {

  my $g0 = g |@x ; # Initial estimate of result.
  my @fi = gradG |@x ; #  Calculate initial gradient
  my $b = $alpha / my $delG = sqrt ( sum @fi»² ) ;  # Calculate initial norm.
  while ( $delG > $h ) {   # Iterate until value is <= tolerance.
     @x «-»= $b «*« @fi; #  Calculate next value.
     @fi = gradG |@x ; # Calculate next gradient and next value
     $b = $alpha / ($delG = sqrt( sum @fi»² ));  # Calculate next norm.
     my $g1 = g |@x ;
     $g1 > $g0 ?? ( $alpha /= 2 ) !! ( $g0 = $g1 )   # Adjust parameter.
  }

}

sub gradG(\x,\y) { # gives a rough calculation of gradient g(x).

  2*(x-1)*exp(-y²) - 4*x*exp(-2*x²)*y*(y+2) , -2*(x-1)²*y*exp(-y²) + exp(-2*x²)*(2*y+2)

}

  1. Function for which minimum is to be found.

sub g(\x,\y) { (x-1)² * exp(-y²) + y*(y+2) * exp(-2*x²) }

my $tolerance = 0.0000006 ; my $alpha = 0.1;

my @x = 0.1, -1; # Initial guess of location of minimum.

steepestDescent(@x, $alpha, $tolerance);

say "Testing steepest descent method:"; say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1]; </lang>

Output:
Testing steepest descent method:
The minimum is at x[0] = 0.10762682432947938, x[1] = -1.2232596548816097

REXX

The   tolerance   can be much smaller;   a tolerance of   1e-200   was tested.   It works, but causes the program to execute a bit slower, but still sub-second execution time. <lang rexx>/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/ numeric digits (length( e() ) - length(.) ) % 2 /*use half of number decimal digs in E.*/ tolerance= 1e-30 /*use a much smaller tolerance for REXX*/

   alpha=  0.1
     x.0=  0.1;     x.1= -1

say center(' testing for the steepest descent method ', 79, "═") call steepestD /* ┌──◄── # digs past dec. point ─►───┐*/ say 'The minimum is at: x[0]=' format(x.0,,9) " x[1]=" format(x.1,,9) exit /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ e: return 2.718281828459045235360287471352662497757247093699959574966967627724 g: return (x.0-1)**2 * exp(- (x.1**2) ) + x.1 * (x.1 + 2) * exp(-2 * x.0**2) /*──────────────────────────────────────────────────────────────────────────────────────*/ gradG: x= x.0; y= x.1 /*define X and Y from the X array. */

      xm= x-1;              eny2= exp(-y*y);   enx2= exp(-2 * x**2)        /*shortcuts.*/
      z.0=  2 * xm        * eny2   -   4 * x * enx2 * y * (y+2)
      z.1= -2 * xm**2 * y * eny2   +           enx2     * (y+2)   +   enx2 * y
      return                                    /*a rough calculation of the gradient. */

/*──────────────────────────────────────────────────────────────────────────────────────*/ steepestD: g0= g() /*the initial estimate of the result. */

          call gradG                            /*calculate the initial gradient.      */
          delG= sqrt(z.0**2  +  z.1**2)         /*    "      "     "    norm.          */
          b= alpha / delG
                            do while delG>tolerance
                            x.0= x.0   -   b * z.0;               x.1= x.1   -   b * z.1
                            call gradG
                            delG= sqrt(z.0**2  +  z.1**2);        if delG=0  then return
                            b= alpha / delG
                            g1= g()                                     /*find minimum.*/
                            if g1>g0  then alpha= alpha * .5            /*adjust ALPHA.*/
                                      else    g0= g1                    /*   "   G0.   */
                            end   /*while*/
          return

/*──────────────────────────────────────────────────────────────────────────────────────*/ exp: procedure; parse arg x; ix= x%1; if abs(x-ix)>.5 then ix= ix + sign(x); x= x - ix

     z=1;  _=1;  w=z;   do j=1;  _= _*x/j;  z= (z+_)/1;  if z==w  then leave;  w= z;  end
     if z\==0  then z= z * e() ** ix;                                          return z/1

/*──────────────────────────────────────────────────────────────────────────────────────*/ sqrt: procedure; parse arg x; if x=0 then return 0; d= digits(); numeric digits; h= d+6

     numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2
       do j=0  while h>9;      m.j=h;                h= h % 2 + 1;   end  /*j*/
       do k=j+5  to 0  by -1;  numeric digits m.k;   g=(g+x/g)*.5;   end  /*k*/; return g</lang>
output   when using the internal default inputs:
═══════════════════ testing for the steepest descent method ═══════════════════
The minimum is at:     x[0]= 0.107626824      x[1]= -1.223259655

Scala

Translation of: Go

<lang scala>object GradientDescent {

 /** Steepest descent method modifying input values*/
 def steepestDescent(x : Array[Double], learningRate : Double, tolerance : Double) = {
   val n = x.size
   var h = tolerance
   var alpha = learningRate
   var g0 = g(x) // Initial estimate of result.
   // Calculate initial gradient.
   var fi = gradG(x,h)
   // Calculate initial norm.
   var delG = 0.0
   for (i <- 0 until n by 1)  delG += fi(i) * fi(i)
   delG = math.sqrt(delG)
   var b = alpha / delG
   // Iterate until value is <= tolerance.
   while(delG > tolerance){
     // Calculate next value.
     for (i <- 0 until n by 1) x(i) -= b * fi(i)
     h /= 2
     // Calculate next gradient.
     fi = gradG(x,h)
     // Calculate next norm.
     delG = 0.0
     for (i <- 0 until n by 1) delG += fi(i) * fi(i)
     delG = math.sqrt(delG)
     b = alpha / delG
     // Calculate next value.
     var g1 = g(x)
     // Adjust parameter.
     if(g1 > g0) alpha = alpha / 2
     else g0 = g1
   }
 }
 /** Gradient of the input function given in the task*/
 def gradG(x : Array[Double], h : Double) : Array[Double] = {
   val n = x.size
   val z : Array[Double] = Array.fill(n){0}
   val y = x
   val g0 = g(x)
   for(i <- 0 until n by 1){
     y(i) += h
     z(i) = (g(y) - g0) / h
   }
   z
 }
 /** Bivariate function given in the task*/
 def g( x : Array[Double]) : Double = {
   ( (x(0)-1) * (x(0)-1) * math.exp( -x(1)*x(1) ) + x(1) * (x(1)+2) * math.exp( -2*x(0)*x(0) ) )
 }
 def main(args: Array[String]): Unit = {
   val tolerance = 0.0000006
   val learningRate = 0.1
   val x =  Array(0.1, -1) // Initial guess of location of minimum.
   steepestDescent(x, learningRate, tolerance)
   println("Testing steepest descent method")
   println("The minimum is at x : " + x(0) + ", y : " + x(1))
 }

} </lang>

Output:
Testing steepest descent method
The minimum is at x : 0.10756393294495799, y : -1.2234116852966237

TypeScript

Translation of
  •   [Numerical Methods, Algorithms and Tools in C# by Waldemar Dos Passos (18.2 Gradient Descent Method]



<lang Typescript> // Using the steepest-descent method to search // for minimum values of a multi-variable function export const steepestDescent = (x: number[], alpha: number, tolerance: number) => {

   let n: number = x.length; // size of input array
   let h: number = 0.0000006; //Tolerance factor
   let g0: number = g(x); //Initial estimate of result
   //Calculate initial gradient
   let fi: number[] = [n];
   //Calculate initial norm
   fi = GradG(x, h);
   // console.log("fi:"+fi);
   //Calculate initial norm
   let DelG: number = 0.0;
   for (let i: number = 0; i < n; ++i) {
       DelG += fi[i] * fi[i];
   }
   DelG = Math.sqrt(DelG);
   let b: number = alpha / DelG;
   //Iterate until value is <= tolerance limit
   while (DelG > tolerance) {
       //Calculate next value
       for (let i = 0; i < n; ++i) {
           x[i] -= b * fi[i];
       }
       h /= 2;
       //Calculate next gradient
       fi = GradG(x, h);
       //Calculate next norm
       DelG = 0;
       for (let i: number = 0; i < n; ++i) {
           DelG += fi[i] * fi[i];
       }
       DelG = Math.sqrt(DelG);
       b = alpha / DelG;
       //Calculate next value
       let g1: number = g(x);
       //Adjust parameter
       if (g1 > g0) alpha /= 2;
       else g0 = g1;
   }

}

// Provides a rough calculation of gradient g(x). export const GradG = (x: number[], h: number) => {

   let n: number = x.length;
   let z: number[] = [n];
   let y: number[] = x;
   let g0: number = g(x);
   // console.log("y:" + y);
   for (let i = 0; i < n; ++i) {
       y[i] += h;
       z[i] = (g(y) - g0) / h;
   }
   // console.log("z:"+z);
   return z;

}

// Method to provide function g(x). export const g = (x: number[]) => {

   return (x[0] - 1) * (x[0] - 1)
       * Math.exp(-x[1] * x[1]) + x[1] * (x[1] + 2)
       * Math.exp(-2 * x[0] * x[0]);

}

export const gradientDescentMain = () => {

   let tolerance: number = 0.0000006;
   let alpha: number = 0.1;
   let x: number[] = [2];
   //Initial guesses
   x[0] = 0.1;
   //of location of minimums 
   x[1] = -1;
   steepestDescent(x, alpha, tolerance);
   console.log("Testing steepest descent method");
   console.log("The minimum is at x[0] = " + x[0]
       + ", x[1] = " + x[1]);
   // console.log("");

}

gradientDescentMain();

</lang>

Output:
Testing steepest descent method
The minimum is at x[0] = 0.10768224291553158, x[1] = -1.2233090211217854

Linear Regression

Translation of
  •   [Linear Regression using Gradient Descent by Adarsh Menon]



<lang Typescript> let data: number[][] =

   [[32.5023452694530, 31.70700584656990],
   [53.4268040332750, 68.77759598163890],
   [61.5303580256364, 62.56238229794580],
   [47.4756396347860, 71.54663223356770],
   [59.8132078695123, 87.23092513368730],
   [55.1421884139438, 78.21151827079920],
   [52.2117966922140, 79.64197304980870],
   [39.2995666943170, 59.17148932186950],
   [48.1050416917682, 75.33124229706300],
   [52.5500144427338, 71.30087988685030],
   [45.4197301449737, 55.16567714595910],
   [54.3516348812289, 82.47884675749790],
   [44.1640494967733, 62.00892324572580],
   [58.1684707168577, 75.39287042599490],
   [56.7272080570966, 81.43619215887860],
   [48.9558885660937, 60.72360244067390],
   [44.6871962314809, 82.89250373145370],
   [60.2973268513334, 97.37989686216600],
   [45.6186437729558, 48.84715331735500],
   [38.8168175374456, 56.87721318626850],
   [66.1898166067526, 83.87856466460270],
   [65.4160517451340, 118.59121730252200],
   [47.4812086078678, 57.25181946226890],
   [41.5756426174870, 51.39174407983230],
   [51.8451869056394, 75.38065166531230],
   [59.3708220110895, 74.76556403215130],
   [57.3100034383480, 95.45505292257470],
   [63.6155612514533, 95.22936601755530],
   [46.7376194079769, 79.05240616956550],
   [50.5567601485477, 83.43207142132370],
   [52.2239960855530, 63.35879031749780],
   [35.5678300477466, 41.41288530370050],
   [42.4364769440556, 76.61734128007400],
   [58.1645401101928, 96.76956642610810],
   [57.5044476153417, 74.08413011660250],
   [45.4405307253199, 66.58814441422850],
   [61.8962226802912, 77.76848241779300],
   [33.0938317361639, 50.71958891231200],
   [36.4360095113868, 62.12457081807170],
   [37.6756548608507, 60.81024664990220],
   [44.5556083832753, 52.68298336638770],
   [43.3182826318657, 58.56982471769280],
   [50.0731456322890, 82.90598148507050],
   [43.8706126452183, 61.42470980433910],
   [62.9974807475530, 115.24415280079500],
   [32.6690437634671, 45.57058882337600],
   [40.1668990087037, 54.08405479622360],
   [53.5750775316736, 87.99445275811040],
   [33.8642149717782, 52.72549437590040],
   [64.7071386661212, 93.57611869265820],
   [38.1198240268228, 80.16627544737090],
   [44.5025380646451, 65.10171157056030],
   [40.5995383845523, 65.56230126040030],
   [41.7206763563412, 65.28088692082280],
   [51.0886346783367, 73.43464154632430],
   [55.0780959049232, 71.13972785861890],
   [41.3777265348952, 79.10282968354980],
   [62.4946974272697, 86.52053844034710],
   [49.2038875408260, 84.74269780782620],
   [41.1026851873496, 59.35885024862490],
   [41.1820161051698, 61.68403752483360],
   [50.1863894948806, 69.84760415824910],
   [52.3784462192362, 86.09829120577410],
   [50.1354854862861, 59.10883926769960],
   [33.6447060061917, 69.89968164362760],
   [39.5579012229068, 44.86249071116430],
   [56.1303888168754, 85.49806777884020],
   [57.3620521332382, 95.53668684646720],
   [60.2692143939979, 70.25193441977150],
   [35.6780938894107, 52.72173496477490],
   [31.5881169981328, 50.39267013507980],
   [53.6609322616730, 63.64239877565770],
   [46.6822286494719, 72.24725106866230],
   [43.1078202191024, 57.81251297618140],
   [70.3460756150493, 104.25710158543800],
   [44.4928558808540, 86.64202031882200],
   [57.5045333032684, 91.48677800011010],
   [36.9300766091918, 55.23166088621280],
   [55.8057333579427, 79.55043667850760],
   [38.9547690733770, 44.84712424246760],
   [56.9012147022470, 80.20752313968270],
   [56.8689006613840, 83.14274979204340],
   [34.3331247042160, 55.72348926054390],
   [59.0497412146668, 77.63418251167780],
   [57.7882239932306, 99.05141484174820],
   [54.2823287059674, 79.12064627468000],
   [51.0887198989791, 69.58889785111840],
   [50.2828363482307, 69.51050331149430],
   [44.2117417520901, 73.68756431831720],
   [38.0054880080606, 61.36690453724010],
   [32.9404799426182, 67.17065576899510],
   [53.6916395710700, 85.66820314500150],
   [68.7657342696216, 114.85387123391300],
   [46.2309664983102, 90.12357206996740],
   [68.3193608182553, 97.91982103524280],
   [50.0301743403121, 81.53699078301500],
   [49.2397653427537, 72.11183246961560],
   [50.0395759398759, 85.23200734232560],
   [48.1498588910288, 66.22495788805460],
   [25.1284846477723, 53.45439421485050]];

function lossFunction(arr0: number[], arr1: number[], arr2: number[]) {

   let n: number = arr0.length; // Number of elements in X
   //D_m = (-2/n) * sum(X * (Y - Y_pred))  # Derivative wrt m
   let a: number = (-2 / n) * (arr0.map((a, i) => a * (arr1[i] - arr2[i]))).reduce((sum, current) => sum + current);
   //D_c = (-2/n) * sum(Y - Y_pred)  # Derivative wrt c
   let b: number = (-2 / n) * (arr1.map((a, i) => (a - arr2[i]))).reduce((sum, current) => sum + current);
   return [a, b];

}

export const gradientDescentMain = () => {

   // Building the model
   let m: number = 0;
   let c: number = 0;
   let X_arr: number[];
   let Y_arr: number[];
   let Y_pred_arr: number[];
   let D_m: number = 0;
   let D_c: number = 0;
   let L: number = 0.00000001;  // The learning Rate
   let epochs: number = 10000000;  // The number of iterations to perform gradient descent
   //Initial guesses
   for (let i = 0; i < epochs; i++) {
       X_arr = data.map(function (value, index) { return value[0]; });
       Y_arr = data.map(function (value, index) { return value[1]; });
       // The current predicted value of Y
       Y_pred_arr = X_arr.map((a) => ((m * a) + c));
       let all = lossFunction(X_arr, Y_arr, Y_pred_arr);
       D_m = all[0];
       D_c = all[1];
       m = m - L * D_m;  // Update m
       c = c - L * D_c;  // Update c
   }
   console.log("m: " + m + " c: " + c);

}

gradientDescentMain(); </lang>

Wren

Translation of: Go
Library: Wren-fmt

<lang ecmascript>import "/fmt" for Fmt

// Function for which minimum is to be found. var g = Fn.new { |x|

   return (x[0]-1)*(x[0]-1)*
       (-x[1]*x[1]).exp + x[1]*(x[1]+2)*
       (-2*x[0]*x[0]).exp

}

// Provides a rough calculation of gradient g(p). var gradG = Fn.new { |p|

   var x = p[0]
   var y = p[1]
   return [2*(x-1)*(-y*y).exp - 4*x*(-2*x*x).exp*y*(y+2),
           -2*(x-1)*(x-1)*y*(-y*y).exp + (-2*x*x).exp*(y+2) + (-2*x*x).exp*y]

}

var steepestDescent = Fn.new { |x, alpha, tolerance|

   var n = x.count
   var g0 = g.call(x) // // Initial estimate of result.
   // Calculate initial gradient.
   var fi = gradG.call(x)
   // Calculate initial norm.
   var delG = 0
   for (i in 0...n) delG = delG + fi[i]*fi[i]
   delG = delG.sqrt
   var b = alpha/delG
   // Iterate until value is <= tolerance.
   while (delG > tolerance) {
       // Calculate next value.
       for (i in 0...n) x[i] = x[i] - b*fi[i]
       // Calculate next gradient.
       fi = gradG.call(x)
       // Calculate next norm.
       delG = 0
       for (i in 0...n) delG = delG + fi[i]*fi[i]
       delG = delG.sqrt
       b = alpha/delG
       // Calculate next value.
       var g1 = g.call(x)
       // Adjust parameter.
       if (g1 > g0) {
           alpha = alpha / 2
       } else {
           g0 = g1
       }
   }

}

var tolerance = 0.0000006 var alpha = 0.1 var x = [0.1, -1] // Initial guess of location of minimum.

steepestDescent.call(x, alpha, tolerance) System.print("Testing steepest descent method:") Fmt.print("The minimum is at x = $f, y = $f for which f(x, y) = $f.", x[0], x[1], g.call(x))</lang>

Output:
Testing steepest descent method:
The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.

zkl

Translation of: Go

with tweaked gradG

<lang zkl>fcn steepestDescent(f, x,y, alpha, h){

  g0:=f(x,y);	# Initial estimate of result.
  fix,fiy := gradG(f,x,y,h);	# Calculate initial gradient

  # Calculate initial norm.
  b:=alpha / (delG := (fix*fix + fiy*fiy).sqrt());
  while(delG > h){	# Iterate until value is <= tolerance.
     x,y = x - b*fix, y - b*fiy;
     # Calculate next gradient and next value
     fix,fiy = gradG(f,x,y, h/=2);
     b=alpha / (delG = (fix*fix + fiy*fiy).sqrt());	# Calculate next norm.
     if((g1:=f(x,y)) > g0) alpha/=2 else g0 = g1;	# Adjust parameter.
  }
  return(x,y)

}

fcn gradG(f,x,y,h){ # gives a rough calculation of gradient f(x,y).

  g0:=f(x,y);
  return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h)

}</lang> <lang zkl>fcn f(x,y){ # Function for which minimum is to be found.

  (x - 1).pow(2)*(-y.pow(2)).exp() + 
  y*(y + 2)*(-2.0*x.pow(2)).exp()

}

tolerance,alpha := 0.0000006, 0.1;

x,y := 0.1, -1.0; # Initial guess of location of minimum. x,y = steepestDescent(f,x,y,alpha,tolerance);

println("Testing steepest descent method:"); println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));</lang>

Output:
Testing steepest descent method:
The minimum is at (x,y) = (0.107608,-1.223299). f(x,y) = -0.750063