Gradient descent: Difference between revisions

m
(Added Wren)
m (→‎{{header|Wren}}: Minor tidy)
 
(32 intermediate revisions by 10 users not shown)
Line 12:
[https://books.google.co.uk/books?id=dFHvBQAAQBAJ&pg=PA543&lpg=PA543&dq=c%23+steepest+descent+method+to+find+minima+of+two+variable+function&source=bl&ots=TCyD-ts9ui&sig=ACfU3U306Og2fOhTjRv2Ms-BW00IhomoBg&hl=en&sa=X&ved=2ahUKEwitzrmL3aXjAhWwVRUIHSEYCU8Q6AEwCXoECAgQAQ#v=onepage&q=c%23%20steepest%20descent%20method%20to%20find%20minima%20of%20two%20variable%20function&f=false This book excerpt] shows sample C# code for solving this task.
<br><br>
 
=={{header|ALGOL 68}}==
{{works with|ALGOL 68G|Any - tested with release 2.8.3.win32}}
{{Trans|Go}} modified to use the actual gradient function -
{{Trans|Fortran}}
THe results agree with the Fortran sample and the Julia sample to 6 places.
<syntaxhighlight lang="algol68">PROC steepest descent = ( REF[]LONG REAL x, LONG REAL alphap, tolerance )VOID:
BEGIN
LONG REAL alpha := alphap;
LONG REAL g0 := g( x ); # Initial estimate of result. #
# Calculate initial gradient. #
[ LWB x : UPB x ]LONG REAL fi := grad g( x );
# Calculate initial norm. #
LONG REAL del g := 0.0;
FOR i FROM LWB x TO UPB x DO
del g +:= fi[ i ] * fi[ i ]
OD;
del g := long sqrt( del g );
LONG REAL b := alpha / del g;
# Iterate until value is <= tolerance. #
WHILE del g > tolerance DO
# Calculate next value. #
FOR i FROM LWB x TO UPB x DO
x[i] -:= b * fi[i]
OD;
# Calculate next gradient. #
fi := grad g( x );
# Calculate next norm. #
del g := 0;
FOR i FROM LWB x TO UPB x DO
del g +:= fi[ i ] * fi[ i ]
OD;
del g := long sqrt( del g );
IF del g > 0 THEN
b := alpha / del g;
# Calculate next value. #
LONG REAL g1 := g( x );
# Adjust parameter. #
IF g1 > g0 THEN
alpha /:= 2
ELSE
g0 := g1
FI
FI
OD
END # steepest descent # ;
# calculates the gradient of g(p). #
# The derivitives wrt x and y are (as in the Fortran sample ): #
# g' wrt x = 2( x - 1 )e^( - ( y^2 ) ) - 4xe^( -2( x^2) )y( y + 2 ) #
# g' wrt y = ( -2( x-1 )^2ye^( - (y^2) )) + e^(-2( x^2 ) )( y + 2 ) + e^( -2( x^2 ) )y #
PROC grad g = ( []LONG REAL p )[]LONG REAL:
BEGIN
[ LWB p : UPB p ]LONG REAL z;
LONG REAL x = p[ 0 ];
LONG REAL y = p[ 1 ];
z[ 0 ] := 2 * ( x - 1 ) * long exp( - ( y * y ) )
- 4 * x * long exp( -2 * ( x * x ) ) * y * ( y + 2 );
z[ 1 ] := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * long exp( - y * y )
+ long exp( -2 * x * x ) * ( y + 2 )
+ long exp( -2 * x * x ) * y;
z
END # grad g # ;
# Function for which minimum is to be found. #
# g( x, y ) = ( ( x - 1 )^2 )e^( - ( x^2 ) ) + y( y + 2 )e^( - 2(x^2)) #
PROC g = ( []LONG REAL x )LONG REAL:
( x[ 0 ] - 1 )
* ( x[ 0 ] - 1 )
* long exp( - x[ 1 ] * x[ 1 ] ) + x[ 1 ] * ( x[ 1 ] + 2 )
* long exp( - 2 * x[ 0 ] * x[ 0 ] )
;
BEGIN
LONG REAL tolerance := 0.0000006;
LONG REAL alpha := 0.1;
[ 0 : 1 ]LONG REAL x := ( []LONG REAL( 0.1, -1 ) )[ AT 0 ]; # Initial guess of location of minimum. #
steepest descent( x, alpha, tolerance );
print( ( "Testing steepest descent method:", newline ) );
print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) )
END</syntaxhighlight>
{{out}}
<pre>
Testing steepest descent method:
The minimum is at x[0] = 0.107627, x[1] = -1.223260
</pre>
 
=={{header|ALGOL W}}==
{{Trans|ALGOL 68}} which is a {{Trans|Go}} with the gradient function from {{Trans|Fortran}}
The results agree (to 6 places) with the Fortran and Julia samples.
<syntaxhighlight lang="algolw">begin
procedure steepestDescent ( long real array x ( * ); long real value alphap, tolerance ) ;
begin
long real array fi ( 0 :: 1 );
long real alpha, g0, g1, delG, b;
alpha := alphap;
g0 := g( x ); % Initial estimate of result. %
% Calculate initial gradient. %
gradG( fi, x );
% Calculate initial norm. %
delG := 0.0;
for i := 0 until 1 do delG := delG + ( fi( i ) * fi( i ) );
delG := longSqrt( delG );
b := alpha / delG;
% Iterate until value is <= tolerance. %
while delG > tolerance do begin
% Calculate next value. %
for i := 0 until 1 do x(i) := x( i ) - ( b * fi(i) );
% Calculate next gradient. %
gradG( fi, x );
% Calculate next norm. %
delG := 0;
for i := 0 until 1 do delG := delg + ( fi( i ) * fi( i ) );
delG := longSqrt( delG );
if delG > 0 then begin
b := alpha / delG;
% Calculate next value. %
g1 := g( x );
% Adjust parameter. %
if g1 > g0
then alpha := alpha / 2
else g0 := g1
end if_delG_gt_0
end while_delG_gt_tolerance
end steepestDescent ;
% Provides a rough calculation of gradient g(x). %
procedure gradG ( long real array z, p ( * ) ) ;
begin
long real x, y;
x := p( 0 );
y := p( 1 );
z( 0 ) := 2 * ( x - 1 ) * longExp( - ( y * y ) )
- 4 * x * longExp( -2 * ( x * x ) ) * y * ( y + 2 );
z( 1 ) := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * longExp( - y * y )
+ longExp( -2 * x * x ) * ( y + 2 )
+ longExp( -2 * x * x ) * y
end gradG ;
% Function for which minimum is to be found. %
long real procedure g ( long real array x ( * ) ) ;
( x( 0 ) - 1 ) * ( x( 0 ) - 1 ) * longExp( - x( 1 ) * x( 1 ) )
+ x( 1 ) * ( x( 1 ) + 2 ) * longExp( - 2 * x( 0 ) * x( 0 ) )
;
begin
long real alpha, tolerance;
long real array x ( 0 :: 1 );
x( 0 ) := 0.1; % Initial guess of location of minimum. %
x( 1 ) := -1;
tolerance := 0.0000006;
alpha := 0.1;
steepestDescent( x, alpha, tolerance );
r_format := "A"; r_w := 11; r_d := 7; s_w := 0; % output formatting %
write( "Testing steepest descent method:" );
write( "The minimum is at x(0) = ", x( 0 ), ", x(1) = ", x( 1 ) )
end
end.</syntaxhighlight>
{{out}}
<pre>
Testing steepest descent method:
The minimum is at x(0) = 0.1076268, x(1) = -1.2232596
</pre>
 
 
 
 
=={{header|Common Lisp}}==
{{works with|SBCL}}
 
<syntaxhighlight lang="Lisp">
(defparameter *epsilon* (sqrt 0.0000001d0))
 
(defun myfun (x y)
(+ (* (- x 1)
(- x 1)
(exp (- (expt y 2))))
(* y (+ 2 y)
(exp (- (* 2 (expt x 2)))))))
 
(defun xd (x y)
(/ (- (myfun (* x (+ 1 *epsilon*)) y)
(myfun (* x (- 1 *epsilon*)) y))
(* 2 x *epsilon*)))
 
(defun yd (x y)
(/ (- (myfun x (* y (+ 1 *epsilon*)))
(myfun x (* y (- 1 *epsilon*))))
(* 2 y *epsilon*)))
 
(defun gd ()
(let ((x 0.1d0)
(y -1.0d0))
(dotimes (i 31)
(setf x (- x (* 0.3d0 (xd x y))))
(setf y (- y (* 0.3d0 (yd x y )))))
(list x y)))
 
 
 
</syntaxhighlight>
{{out}}
<pre>
(0.10762684380416455 -1.2232596637047382)
</pre>
 
 
=={{header|Forth}}==
{{works with|SwiftForth}}
{{libheader|fpmath}}
 
 
Approach with partial differential equations GD1, GD2
 
<syntaxhighlight lang="forth">
 
0e fvalue px
0e fvalue py
fvariable x 0.1e x F!
fvariable y -1e y F!
 
: ^2 FDUP F* ;
 
\ gradient for x, analytical function
 
: GD1 ( F: x y -- dfxy)
to py to px
2e px 1e F- F* py ^2 FNEGATE FEXP F*
-4e px F* py F* py 2e F+ F* px ^2 -2e F* FEXP F*
F+
;
 
\ gradient for y, analytical function
 
: GD2 ( F: x y -- dfxy )
to py to px
px ^2 -2e F* FEXP FDUP
py F* FSWAP
py 2e F+ F* F+
py ^2 FNEGATE FEXP py F*
px 1e F- ^2 F*
-2e F*
F+
;
 
\ gradient descent
: GD ( x y n -- ) \ gradient descent, initial guess and number of iterations
FOVER FOVER y F! x F!
0 do
FOVER FOVER
GD1 0.3e F* x F@ FSWAP F- x F!
GD2 0.3e F* y F@ FSWAP F- y F!
x F@ y F@ FOVER FOVER FSWAP F. F. CR
loop
;
 
</syntaxhighlight>
 
Approach with main function and numerical gradients xd xy:
 
<syntaxhighlight lang="forth">
 
0e fvalue px
0e fvalue py
fvariable x 0.1e x F!
fvariable y -1e y F!
 
1e-6 fvalue EPSILON
 
: fep EPSILON FSQRT ;
 
 
: MYFUN ( x y -- fxy )
FDUP FROT FDUP ( y y x x )
( x ) 1e F- FDUP F* ( y y x a )
FROT ( y x a y )
( y ) FDUP F* FNEGATE FEXP F* ( y x b )
FROT ( x b y )
( y ) FDUP 2e F+ F* ( x b c )
FROT ( b c x )
( x ) FDUP F* -2e F* FEXP F*
F+
;
 
 
: xd ( x y )
to py to px
px 1e fep F+ F* py myfun px 1e fep F- F* py myfun F- 2e px F* fep F* F/
;
 
: xy ( x y )
to py to px
py 1e fep F+ F* px FSWAP myfun py 1e fep F- F* px FSWAP myfun F- 2e py F* fep F* F/
;
 
 
: GDB ( x y n -- ) \ gradient descent, initial guess and number of iterations
CR FOVER FOVER y F! x F!
0 do
FOVER FOVER
xd 0.3e F* x F@ FSWAP F- x F!
xy 0.3e F* y F@ FSWAP F- y F!
x F@ y F@ FOVER FOVER FSWAP F. F. CR
loop
;
</syntaxhighlight>
 
{{out}}
<pre>
 
0.1e -1e 20 GD
0.1810310 -1.1787894
0.1065262 -1.1965306
0.1144636 -1.2181779
0.1075002 -1.2206140
0.1082816 -1.2227594
0.1076166 -1.2230041
0.1076894 -1.2232108
0.1076261 -1.2232350
0.1076328 -1.2232549
0.1076267 -1.2232573
0.1076274 -1.2232592
0.1076268 -1.2232594
0.1076269 -1.2232596
0.1076268 -1.2232596
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
 
0.1e -1e 20 GDB
0.1810310 -1.1787893
0.1065262 -1.1965305
0.1144636 -1.2181779
0.1075002 -1.2206140
0.1082816 -1.2227594
0.1076166 -1.2230041
0.1076894 -1.2232108
0.1076261 -1.2232350
0.1076328 -1.2232549
0.1076268 -1.2232573
0.1076274 -1.2232592
0.1076268 -1.2232594
0.1076269 -1.2232596
0.1076268 -1.2232596
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
</pre>
 
 
 
 
=={{header|Fortran}}==
Line 35 ⟶ 388:
(-2*(x-1)**2*y*exp(-y**2))+exp(-2*x**2)*(y+2)+exp(-2*x**2)*y
(%o7) done
The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the function and the gradient to be done separately.
<langsyntaxhighlight Fortranlang="fortran"> SUBROUTINE EVALFG (N, X, F, G)
IMPLICIT NONE
INTEGER N
REALDOUBLE PRECISION X(N), F, G(N)
F = (X(1) - 1.D0)**2 * EXP(-X(2)**2) +
$ X(2) * (X(2) + 2.D0) * EXP(-2.D0 * X(1)**2)
G(1) = 2.D0 * (X(1) - 1.D0) * EXP(-X(2)**2) - 4.D0 * X(1) *
$ EXP(-2.D0 * X(1)**2) * X(2) * (X(2) + 2.D0)
G(2) = (-2.D0 * (X(1) - 1.D0)**2 * X(2) * EXP(-X(2)**2)) +
$ EXP(-2.D0 * X(1)**2) * (X(2) + 2.D0) +
$ EXP(-2.D0 * X(1)**2) * X(2)
RETURN
END
Line 56 ⟶ 409:
*___Name______Type________In/Out___Description_________________________
* N Integer In Number of Variables.
* X(N) Real Double Both Variables
* G(N) Real Double Both Gradient
* TOL Real Double In Relative convergence tolerance
* IFLAG Integer BothOut Reverse Communication Flag
* 0 on inputoutput: begin0 done
* 0 on output: done 1 compute G and call again
* 1 compute G
*-----------------------------------------------------------------------
SUBROUTINE GD (N, X, G, TOL, IFLAG)
IMPLICIT NONE
INTEGER N, IFLAG
REALDOUBLE PRECISION X(N), G(N), TOL
REALDOUBLE PRECISION ETA
PARAMETER (ETA = 0.33D0) ! Learning rate
INTEGER I
REALDOUBLE PRECISION GNORM ! norm of gradient
 
IFGNORM (IFLAG= 0.EQ.D0 1) GO! TOconvergence 20test
 
10 DO I = 1, N ! take step
X(I) = X(I) - ETA * G(I)
END DO
IFLAG = 1
RETURN ! let main program evaluate G
 
20 GNORM = 0. ! convergence test
DO I = 1, N
GNORM = GNORM + G(I)**2
Line 90 ⟶ 434:
RETURN ! success
END IF
 
GO TO 10
DO I = 1, N ! take step
X(I) = X(I) - ETA * G(I)
END DO
IFLAG = 1
RETURN ! let main program evaluate G
END ! of gd
 
Line 98 ⟶ 447:
PARAMETER (N = 2)
INTEGER ITER, J, IFLAG
REALDOUBLE PRECISION X(N), F, G(N), TOL
 
X(1) = -0.1 1D0 ! initial values
X(2) = -1.00D0
TOL = 1.ED-615
CALL EVALFG (N, X, F, G)
IFLAG = 0
DO J = 1, 1 000 000
Line 115 ⟶ 465:
STOP 'too many iterations!'
 
50 PRINT '(A, I7, A, F10F19.615, A, F10F19.615, A, F10F19.615)',
$ 'After ', ITER, ' steps, found minimum at x=',
$ X(1), ' y=', X(2), ' of f=', F
STOP 'program complete'
END
</syntaxhighlight>
</lang>
{{out}}
<pre>After 1431 steps, found minimum at x= 0.107627107626843548372 y= -1.223260223259663839920 of f= -0.750063750063420551493
STOP program complete
</pre>
Line 129 ⟶ 479:
This is a translation of the C# code in the book excerpt linked to above and hence also of the first Typescript example below.
 
However, since it was originally written, I've substituted Fortran's gradient function for the original one (see Talk page) which now gives results which agree (to 6 decimal places) with those of the Fortran, Julia, Algol 68 and Algol W solutions. As a number of other solutions are based on this one, I suggest their authors update them accordingly.
For some unknown reason the results differ from the other solutions after the first 4 decimal places but are near enough for an approximate method such as this.
<langsyntaxhighlight lang="go">package main
 
import (
Line 139 ⟶ 489:
func steepestDescent(x []float64, alpha, tolerance float64) {
n := len(x)
h := tolerance
g0 := g(x) // Initial estimate of result.
 
// Calculate initial gradient.
fi := gradG(x, h)
 
// Calculate initial norm.
Line 159 ⟶ 508:
x[i] -= b * fi[i]
}
h /= 2
 
// Calculate next gradient.
fi = gradG(x, h)
 
// Calculate next norm.
Line 184 ⟶ 532:
}
 
// Provides a rough calculation of gradient g(xp).
func gradG(xp []float64, h float64) []float64 {
nz := make([]float64, len(xp))
zx := make(p[0]float64, n)
y := make(p[1]float64, n)
z[0] = 2*(x-1)*math.Exp(-y*y) - 4*x*math.Exp(-2*x*x)*y*(y+2)
copy(y, x)
z[1] = -2*(x-1)*(x-1)*y*math.Exp(-y*y) + math.Exp(-2*x*x)*(y+2) + math.Exp(-2*x*x)*y
g0 := g(x)
 
for i := 0; i < n; i++ {
y[i] += h
z[i] = (g(y) - g0) / h
}
return z
}
Line 213 ⟶ 556:
steepestDescent(x, alpha, tolerance)
fmt.Println("Testing steepest descent method:")
fmt.PrintlnPrintf("The minimum is at x[0] =" %f, y = %f for which f(x[0], "y) = %f.\bn", x[10] =", x[1], g(x))
}</syntaxhighlight>
}
</lang>
 
{{out}}
<pre>
Testing steepest descent method:
The minimum is at x[0] = 0.10764302056464771107627, x[1]y = -1.223351901171944223260 for which f(x, y) = -0.750063.
</pre>
 
=={{header|J}}==
{{works with|Jsoftware|>= 9.0.1}}
{{libheader|math/calculus}}
 
In earlier versions without calculus library, D. can be used instead of pderiv
 
<syntaxhighlight lang="J">
 
 
load 'math/calculus'
coinsert 'jcalculus'
 
NB. initial guess
 
go=: 0.1 _1
 
NB. function
 
func=: monad define
'xp yp' =. y
((1-xp)*(1-xp) * ] ^-(yp)^2) + yp*(yp+2)* ] ^ _2 * xp^2 NB. evaluates from right to left without precedence
)
 
NB. gradient descent
 
shortygd =: monad define
go=.y
go=. go - 0.03 * func pderiv 1 ] go NB. use D. instead of pderiv for earlier versions
)
 
NB. use:
 
shortygd ^:_ go
 
</syntaxhighlight>
{{out}}
<pre>
0.107627 _1.22326
</pre>
 
 
 
 
=={{header|jq}}==
'''Adapted from [[#Go|Go]]'''
 
'''Works with jq and gojq, the C and Go implementations of jq'''
 
The function, g, and its gradient, gradG, are defined as 0-arity filters so that they
can be passed as formal parameters to the `steepestDescent` function.
<syntaxhighlight lang=jq>
def sq: .*.;
 
def ss(stream): reduce stream as $x (0; . + ($x|sq));
 
# Function for which minimum is to be found: input is a 2-array
def g:
. as $x
| (($x[0]-1)|sq) *
(( - ($x[1]|sq))|exp) + $x[1]*($x[1]+2) *
((-2*($x[0]|sq))|exp);
 
# Provide a rough calculation of gradient g(p):
def gradG:
. as [$x, $y]
| ($x*$x) as $x2
| [2*($x-1)*(-($y|sq)|exp) - 4 * $x * (-2*($x|sq)|exp) * $y*($y+2),
- 2*($x-1)*($x-1)*$y*((-$y*$y)|exp)
+ ((-2*$x2)|exp)*($y+2) + ((-2*$x2)|exp)*$y] ;
 
def steepestDescent(g; gradG; $x; $alpha; $tolerance):
($x|length) as $n
| {g0 : (x|g), # Initial estimate of result
alpha: $alpha,
fi: ($x|gradG), # Calculate initial gradient
x: $x,
iterations: 0
}
 
# Calculate initial norm:
| .delG = (ss( .fi[] ) | sqrt )
| .b = .alpha/.delG
 
# Iterate until value is <= $tolerance:
| until (.delG <= $tolerance;
.iterations += 1
# Calculate next value:
| reduce range(0; $n) as $i(.;
.x[$i] += (- .b * .fi[$i]) )
 
# Calculate next gradient:
| .fi = (.x|gradG)
 
# Calculate next norm:
| .delG = (ss( .fi[] ) | sqrt)
| .b = .alpha/.delG
 
# Calculate next value:
| .g1 = (.x|g)
 
# Adjust parameter:
| if .g1 > .g0
then .alpha /= 2
else .g0 = .g1
end
) ;
 
def tolerance: 0.0000006;
def alpha: 0.1;
def x: [0.1, -1]; # Initial guess of location of minimum.
 
def task:
steepestDescent(g; gradG; x; alpha; tolerance)
| "Testing the steepest descent method:",
"After \(.iterations) iterations,",
"the minimum is at x = \(.x[0]), y = \(.x[1]) for which f(x, y) = \(.x|g)." ;
 
task
</syntaxhighlight>
{{output}}
<pre>
Testing the steepest descent method:
After 24 iterations,
the minimum is at x = 0.10762682432948058, y = -1.2232596548816101 for which f(x, y) = -0.7500634205514924.
</pre>
 
=={{header|Julia}}==
<langsyntaxhighlight lang="julia">using Optim, Base.MathConstants
 
f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2)
 
println(optimize(f, [0.1, -1.0], GradientDescent()))
</langsyntaxhighlight>{{out}}
<pre>
Results of Optimization Algorithm
Line 249 ⟶ 717:
* Gradient Calls: 35
</pre>
 
=={{header|Nim}}==
{{trans|Go}}
The gradient function has been rewritten to remove a term in the partial derivative with respect to y (two terms instead of three). This doesn’t change the result which agrees with those of Go, Fortran, Julia, etc.
 
<syntaxhighlight lang="nim">import math, strformat
 
 
func steepDescent(g: proc(x: openArray[float]): float;
gradG: proc(p: openArray[float]): seq[float];
x: var openArray[float]; alpha, tolerance: float) =
let n = x.len
var alpha = alpha
var g0 = g(x) # Initial estimate of result.
 
# Calculate initial gradient.
var fi = gradG(x)
 
# Calculate initial norm.
var delG = 0.0
for i in 0..<n:
delG += fi[i] * fi[i]
delG = sqrt(delG)
var b = alpha / delG
 
# Iterate until value is <= tolerance.
while delG > tolerance:
# Calculate next value.
for i in 0..<n:
x[i] -= b * fi[i]
 
# Calculate next gradient.
fi = gradG(x)
 
# Calculate next norm.
delG = 0
for i in 0..<n:
delG += fi[i] * fi[i]
delG = sqrt(delG)
b = alpha / delG
 
# Calculate next value.
let g1 = g(x)
 
# Adjust parameter.
if g1 > g0: alpha *= 0.5
else: g0 = g1
 
 
when isMainModule:
 
func g(x: openArray[float]): float =
## Function for which minimum is to be found.
(x[0]-1) * (x[0]-1) * exp(-x[1]*x[1]) + x[1] * (x[1]+2) * exp(-2*x[0]*x[0])
 
func gradG(p: openArray[float]): seq[float] =
## Provides a rough calculation of gradient g(p).
result = newSeq[float](p.len)
let x = p[0]
let y = p[1]
result[0] = 2 * (x-1) * exp(-y*y) - 4 * x * exp(-2*x*x) * y * (y+2)
result[1] = -2 * (x-1) * (x-1) * y * exp(-y*y) + 2 * (y+1) * exp(-2*x*x)
 
const
Tolerance = 0.0000006
Alpha = 0.1
 
var x = [0.1, -1.0] # Initial guess of location of minimum.
 
steepDescent(g, gradG, x, Alpha, Tolerance)
echo "Testing steepest descent method:"
echo &"The minimum is at x = {x[0]:.12f}, y = {x[1]:.12f} for which f(x, y) = {g(x):.12f}"</syntaxhighlight>
 
{{out}}
<pre>Testing steepest descent method:
The minimum is at x = 0.107626824329, y = -1.223259654882 for which f(x, y) = -0.750063420551</pre>
 
=={{header|Perl}}==
Calculate with <code>bignum</code> for numerical stability.
{{trans|Raku}}
<langsyntaxhighlight lang="perl">use strict;
use warnings;
use bignum;
Line 306 ⟶ 850:
my @x = <0.1 -1>; # Initial guess of location of minimum.
 
printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);</langsyntaxhighlight>
{{out}}
<pre>The minimum is at x[0] = 0.107653, x[1] = -1.223370</pre>
Line 312 ⟶ 856:
=={{header|Phix}}==
{{trans|Go}}
<!--<syntaxhighlight lang="phix">(phixonline)-->
... and just like Go, the results don't quite match anything else.
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<lang Phix>-- Function for which minimum is to be found.
<span style="color: #000080;font-style:italic;">-- Function for which minimum is to be found.</span>
function g(sequence x)
<span style="color: #008080;">function</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">)</span>
atom {x0,x1} = x
<span style="color: #004080;">atom</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">x</span>
return (x0-1)*(x0-1)*exp(-x1*x1) +
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*(</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*</span><span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">)</span> <span style="color: #0000FF;">+</span>
x1*(x1+2)*exp(-2*x0*x0)
<span style="color: #000000;">x1</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)*</span><span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">)</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
 
-- Provides a rough calculation of gradient g(x).
<span style="color: #000080;font-style:italic;">-- Provides a rough calculation of gradient g(x).</span>
function gradG(sequence x, atom h)
<span style="color: #008080;">function</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">p</span><span style="color: #0000FF;">)</span>
integer n = length(x)
<span style="color: #004080;">atom</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">,</span><span style="color: #000000;">y</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">p</span><span style="color: #0000FF;">,</span>
sequence z = repeat(0, n)
<span style="color: #000000;">xm1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
atom g0 := g(x)
<span style="color: #000000;">emyy</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">y</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">),</span>
for i=1 to n do
<span style="color: #000000;">em2xx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">),</span>
x[i] += h
<span style="color: #000000;">xm12emyy</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">xm1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">emyy</span>
z[i] = (g(x) - g0) / h
<span style="color: #000000;">p</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">xm12emyy</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">4</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">*</span><span style="color: #000000;">em2xx</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">y</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
end for
<span style="color: #0000FF;">-</span><span style="color: #000000;">xm12emyy</span><span style="color: #0000FF;">*</span><span style="color: #000000;">xm1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">em2xx</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)}</span>
return z
<span style="color: #008080;">return</span> <span style="color: #000000;">p</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
 
function steepestDescent(sequence x, atom alpha, tolerance)
<span style="color: #008080;">function</span> <span style="color: #000000;">steepestDescent</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">atom</span> <span style="color: #000000;">alpha</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span><span style="color: #0000FF;">)</span>
integer n = length(x)
<span style="color: #004080;">sequence</span> <span style="color: #000000;">fi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- Calculate initial gradient</span>
atom h = tolerance,
<span style="color: #004080;">atom</span> <span style="color: #000000;">g0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">),</span> <span style="color: #000080;font-style:italic;">-- Initial estimate of result</span>
g0 = g(x) -- Initial estimate of result.
<span style="color: #000000;">delG</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">))),</span> <span style="color: #000080;font-style:italic;">-- & norm</span>
 
<span style="color: #000000;">b</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/</span> <span style="color: #000000;">delG</span>
-- Calculate initial gradient.
<span style="color: #004080;">integer</span> <span style="color: #000000;">icount</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> <span style="color: #000080;font-style:italic;">-- iteration limitor/sanity</span>
sequence fi = gradG(x, h)
<span style="color: #008080;">while</span> <span style="color: #000000;">delG</span><span style="color: #0000FF;">></span><span style="color: #000000;">tolerance</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">-- Iterate until &lt;= tolerance</span>
 
<span style="color: #000000;">x</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sq_sub</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">))</span> <span style="color: #000080;font-style:italic;">-- Calculate next value</span>
-- Calculate initial norm.
<span style="color: #000000;">fi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- next gradient</span>
atom delG = sqrt(sum(sq_mul(fi,fi))),
<span style="color: #000000;">delG</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">)))</span> <span style="color: #000080;font-style:italic;">-- next norm</span>
b = alpha / delG
<span style="color: #000000;">b</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/</span> <span style="color: #000000;">delG</span>
 
<span style="color: #004080;">atom</span> <span style="color: #000000;">g1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- next value</span>
-- Iterate until value is <= tolerance.
<span style="color: #008080;">if</span> <span style="color: #000000;">g1</span><span style="color: #0000FF;">></span><span style="color: #000000;">g0</span> <span style="color: #008080;">then</span>
while delG>tolerance do
<span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/=</span> <span style="color: #000000;">2</span> <span style="color: #000080;font-style:italic;">-- Adjust parameter</span>
-- Calculate next value.
x <span style="color: sq_sub(x,sq_mul(b,fi))#008080;">else</span>
<span style="color: #000000;">g0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g1</span>
h /= 2
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">icount</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
-- Calculate next gradient.
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">icount</span><span style="color: #0000FF;"><</span><span style="color: #000000;">100</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (increase if/when necessary)</span>
fi = gradG(x, h)
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">x</span>
-- Calculate next norm.
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
delG = sqrt(sum(sq_mul(fi,fi)))
b = alpha / delG
<span style="color: #008080;">constant</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0.1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0.00000000000001</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">steepestDescent</span><span style="color: #0000FF;">({</span><span style="color: #000000;">0.1</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">},</span> <span style="color: #000000;">alpha</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span><span style="color: #0000FF;">)</span>
-- Calculate next value.
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Testing steepest descent method:\n"</span><span style="color: #0000FF;">)</span>
atom g1 = g(x)
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)})</span>
<!--</syntaxhighlight>-->
-- Adjust parameter.
if g1>g0 then
alpha /= 2
else
g0 = g1
end if
end while
return x
end function
constant tolerance = 0.0000006, alpha = 0.1
sequence x = steepestDescent({0.1,-1}, alpha, tolerance)
printf(1,"Testing steepest descent method:\n")
printf(1,"The minimum is at x[1] = %.16f, x[1] = %.16f\n", x)</lang>
{{out}}
Results match Fortran, most others to 6 or 7dp<br>
Some slightly unexpected/unusual (but I think acceptable) variance was noted when playing with different tolerances, be warned.<br>
Results on 32/64 bit Phix agree to 13dp, which I therefore choose to show in full here (but otherwise would not really trust).
<pre>
Testing steepest descent method:
The minimum is at x[1] = 0.10765720809349961076268435484, x[1]y = -1.22329760804758902232596638399 for --which f(64x, bity) = -0.750063420551493
The minimum is at x[1] = 0.1073980565405569, x[1] = -1.2233251778997771 -- (32 bit)
</pre>
 
Line 389 ⟶ 922:
I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with <var>grad-</var> and <var>del-</var>
 
<langsyntaxhighlight lang="racket">#lang racket
 
(define (apply-vector f v)
Line 436 ⟶ 969:
(module+ main
(Gradient-descent))
</syntaxhighlight>
</lang>
 
{{out}}
Line 444 ⟶ 977:
(formerly Perl 6)
{{trans|Go}}
<syntaxhighlight lang="raku" line># 20200904 Updated Raku programming solution
<lang perl6>use v6.d;
 
sub steepestDescent(@x, $alpha is copy, $h is copy) {
 
my $g0 = g( |@x) ; # Initial estimate of result.
 
my @fi = gradG( |@x, $h, $g0) ; # Calculate initial gradient
 
my $b = $alpha / my $delG = sqrt ( sum @fi»² ) ; # Calculate initial norm.
my $b = $alpha / sqrt(my $delG = sum(map {$_²}, @fi));
 
while ( $delG > $h ) { # Iterate until value is <= tolerance.
 
for @fi.kv -> $i, $j { @x[$i] «-»= $b «*« $j }@fi; # Calculate next value.
# Calculate next gradient and next value
@fi = gradG(@x, $h /= 2, my $g1 = g(@x));
 
$b@fi = $alpha / sqrt($delG = sum(map {$_²},gradG |@fi)x ); # Calculate next norm.gradient and next value
 
$b = $alpha / ($delG = sqrt( sum @fi»² )); # Calculate next norm.
 
my $g1 = g |@x ;
 
$g1 > $g0 ?? ( $alpha /= 2 ) !! ( $g0 = $g1 ) # Adjust parameter.
Line 468 ⟶ 1,001:
}
 
sub gradG(@\x is copy, $h, $g0\y) { # gives a rough calculation of gradient g(x).
2*(x-1)*exp(-y²) - 4*x*exp(-2*x²)*y*(y+2) , -2*(x-1)²*y*exp(-y²) + exp(-2*x²)*(2*y+2)
return map { $_ += $h ; (g(@x) - $g0) / $h }, @x
}
 
# Function for which minimum is to be found.
sub g(\x,\y) { (x[0]-1)² * exp(-x[1]y²) + x[1]y*(x[1]y+2) * exp(-2*x[0]²) }
 
 
my $tolerance = 0.0000006 ; my $alpha = 0.1;
Line 484 ⟶ 1,016:
say "Testing steepest descent method:";
say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1];
</syntaxhighlight>
</lang>
{{out}}
<pre>Testing steepest descent method:
The minimum is at x[0] = 0.1074345079465696410762682432947938, x[1] = -1.22339567117745432232596548816097
</pre>
 
=={{header|REXX}}==
The &nbsp; ''tolerance'' &nbsp; can be much smaller; &nbsp; a tolerance of &nbsp; '''1e-200''' &nbsp; was tested. &nbsp; It works, but causes the program to execute a bit slower, but still sub-second execution time.
<lang rexx>/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/
<syntaxhighlight lang="rexx">/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/
numeric digits length( e() ) % 2
numeric digits (length( e() ) - length(.) ) % 2 /*use half of number decimal digs in E.*/
tolerance= 0.0000006
tolerance= 1e-30 /*use a much smaller tolerance for REXX*/
alpha= 0.1
x.0= 0.1; x.1= -1; n= 2
say center(' testing for the steepest descent method ', 79, "═")
call steepestD /* ┌──◄── # digs past dec. point ─►───┐*/
call steepestD
say 'The minimum is at: x[0]=' format(x.0,,49) " x[1]=" format(x.1,,49)
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
e: return 2.718281828459045235360287471352662497757247093699959574966967627724
gx: return (x.0-1)**2 * exp(- (x.1**2) ) + x.1 * (x.1 + 2) * exp(-2 * x.0**2)
gyg: return (yx.0-1)**2 * exp(- (yx.1**2) ) + yx.1 * (yx.1 + 2) * exp(-2 * yx.0**2)
/*──────────────────────────────────────────────────────────────────────────────────────*/
gradG: x= x.0; do jy=0 x.1 for n; y.j= x.j/*define X and Y from the X array. /*copy X ──► Y */
xm= x-1; eny2= exp(-y*y); enx2= exp(-2 * x**2) end /*jshortcuts.*/
z.0= 2 * xm * eny2 - 4 * x * enx2 * y * (y+2)
g0= gx()
z.1= -2 * xm**2 * y * eny2 + enx2 do j=0 for n; * (y+2) + enx2 * y.j= y.j + h
return /*a rough calculation of the zgradient.j= (gy() - g0) */ h
end /*j*/
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
steepestD: g0= g() /*the initial estimate of the result. */
steepestD: h= tolerance
call gradG /*calculate the initial gradient. */
g0= gx()
delG= sqrt(z.0**2 + z.1**2) /* " " " norm. */
call gradG
delG= 0
do j=0 for n; delG= delG + z.j**2
end /*j*/
delG= sqrt(delG)
b= alpha / delG
 
do while delG>tolerance
x.0= x.0 - b * z.0; do j=0 for n; x.j1= x.j1 - b * z.j1
end /*j*/
h= h / 2
call gradG
delG= sqrt(z.0**2 + z.1**2); if delG=0 then return
do j=0 for n; delG= delG + z.j**2
end /*j*/
delG= sqrt(delG)
if delG=0 then return
b= alpha / delG
g1= gxg() /*find minimum.*/
if g1>g0 then alpha= alpha * .5 /*adjust 2ALPHA.*/
else g0= g1 /* " G0. */
end /*while*/
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
e: e= 2.7182818284590452353602874713526624977572470936999595749669676277240766303 || ,
535475945713821785; return e
/*──────────────────────────────────────────────────────────────────────────────────────*/
exp: procedure; parse arg x; ix= x%1; if abs(x-ix)>.5 then ix= ix + sign(x); x= x - ix
Line 548 ⟶ 1,065:
numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2
do j=0 while h>9; m.j=h; h= h % 2 + 1; end /*j*/
do k=j+5 to 0 by -1; numeric digits m.k; g=(g+x/g)*.5; end /*k*/; return g</langsyntaxhighlight>
{{out|output|text=&nbsp; when using the internal default inputs:}}
<pre>
═══════════════════ testing for the steepest descent method ═══════════════════
The minimum is at: x[0]= 0.1062107626824 x[1]= -1.2264223259655
</pre>
 
=={{header|Scala}}==
{{trans|Go}}
<langsyntaxhighlight lang="scala">object GradientDescent {
 
/** Steepest descent method modifying input values*/
Line 631 ⟶ 1,148:
}
}
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 643 ⟶ 1,160:
<br><br>
 
<syntaxhighlight lang="typescript">
<lang Typescript>
// Using the steepest-descent method to search
// for minimum values of a multi-variable function
Line 740 ⟶ 1,257:
gradientDescentMain();
 
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 751 ⟶ 1,268:
* &nbsp; [Linear Regression using Gradient Descent by Adarsh Menon]
<br><br>
<syntaxhighlight lang="typescript">
<lang Typescript>
let data: number[][] =
[[32.5023452694530, 31.70700584656990],
Line 899 ⟶ 1,416:
 
gradientDescentMain();
</syntaxhighlight>
</lang>
 
=={{header|Wren}}==
{{trans|zklGo}}
{{libheader|Wren-math}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="wren">import "./fmt" for Fmt
Slightly different result but close enough.
<lang ecmascript>import "/math" for Math
import "/fmt" for Fmt
 
// Function for which minimum is to be found.
var fg = Fn.new { |x, y| (x-1)*(x-1)*Math.exp(-y*y) + y*(y+2)*Math.exp(-2*x*x) }
return (x[0]-1)*(x[0]-1)*
(-x[1]*x[1]).exp + x[1]*(x[1]+2)*
(-2*x[0]*x[0]).exp
}
 
// Provides a rough calculation of gradient fg(x, yp).
var gradG = Fn.new { |f, x, y, hp|
var g0x = f.call(x, y)p[0]
var y = p[1]
return [(f.call(x + h, y) - g0) / h, (f.call(x, y + h) - g0) / h]
return [2*(x-1)*(-y*y).exp - 4*x*(-2*x*x).exp*y*(y+2),
-2*(x-1)*(x-1)*y*(-y*y).exp + (-2*x*x).exp*(y+2) + (-2*x*x).exp*y]
}
 
var steepestDescent = Fn.new { |f, x, y, alpha, htolerance|
var g0n = f.call(x, y) // Initial estimate of result.count
var grag0 = gradGg.call(f, x, y, h) // Calculate// Initial estimate initialof gradientresult.
 
var fix = gra[0]
// Calculate initial gradient.
var fiy = gra[1]
var fi = gradG.call(x)
 
// Calculate initial norm.
var delG = (fix*fix + fiy*fiy).sqrt0
for (i in 0...n) delG = delG + fi[i]*fi[i]
delG = delG.sqrt
var b = alpha/delG
 
// Iterate until value is <= tolerance.
while (delG > htolerance) {
x// =Calculate xnext - b*fixvalue.
yfor (i in 0...n) x[i] = yx[i] - b*fiyfi[i]
 
// Calculate next gradient and next value.
hfi = h / 2gradG.call(x)
gra = gradG.call(f, x, y, h)
fix = gra[0]
fiy = gra[1]
 
// Calculate next norm.
delG = (fix*fix + fiy*fiy).sqrt0
for (i in 0...n) delG = delG + fi[i]*fi[i]
delG = delG.sqrt
b = alpha/delG
 
// Calculate next value.
var g1 = g.call(x)
 
// Adjust parameter.
var g1 = f.call(x, y)
if (g1 > g0) {
alpha = alpha / 2
Line 951 ⟶ 1,475:
}
}
return [x, y]
}
 
var tolerance = 0.0000006
var alpha = 0.1
var x = [0.1, -1] // Initial guess of location of minimum.
 
steepestDescent.call(x, alpha, tolerance)
// Initial guess of location of minimum.
var x = 0.1
var y = -1
var sd = steepestDescent.call(f, x, y, alpha, tolerance)
x = sd[0]
y = sd[1]
System.print("Testing steepest descent method:")
Fmt.print("The minimum is at (x, y) = ($f, y = $f). for which f(x, y) = $f.", x[0], yx[1], fg.call(x, y))</langsyntaxhighlight>
 
{{out}}
<pre>
Testing steepest descent method:
The minimum is at (x, y) = (0.107612107627, y = -1.223291).223260 for which f(x, y) = -0.750063.
</pre>
 
=={{header|zkl}}==
{{trans|Go}} with tweaked gradG
<langsyntaxhighlight lang="zkl">fcn steepestDescent(f, x,y, alpha, h){
g0:=f(x,y); # Initial estimate of result.
fix,fiy := gradG(f,x,y,h); # Calculate initial gradient
Line 993 ⟶ 1,512:
g0:=f(x,y);
return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h)
}</langsyntaxhighlight>
<langsyntaxhighlight lang="zkl">fcn f(x,y){ # Function for which minimum is to be found.
(x - 1).pow(2)*(-y.pow(2)).exp() +
y*(y + 2)*(-2.0*x.pow(2)).exp()
Line 1,005 ⟶ 1,524:
println("Testing steepest descent method:");
println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));</langsyntaxhighlight>
{{out}}
<pre>
9,485

edits