gonum.org/v1/gonum@v0.14.0/optimize/newton.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package optimize 6 7 import ( 8 "math" 9 10 "gonum.org/v1/gonum/mat" 11 ) 12 13 const maxNewtonModifications = 20 14 15 var ( 16 _ Method = (*Newton)(nil) 17 _ localMethod = (*Newton)(nil) 18 _ NextDirectioner = (*Newton)(nil) 19 ) 20 21 // Newton implements a modified Newton's method for Hessian-based unconstrained 22 // minimization. It applies regularization when the Hessian is not positive 23 // definite, and it can converge to a local minimum from any starting point. 24 // 25 // Newton iteratively forms a quadratic model to the objective function f and 26 // tries to minimize this approximate model. It generates a sequence of 27 // locations x_k by means of 28 // 29 // solve H_k d_k = -∇f_k for d_k, 30 // x_{k+1} = x_k + α_k d_k, 31 // 32 // where H_k is the Hessian matrix of f at x_k and α_k is a step size found by 33 // a line search. 34 // 35 // Away from a minimizer H_k may not be positive definite and d_k may not be a 36 // descent direction. Newton implements a Hessian modification strategy that 37 // adds successively larger multiples of identity to H_k until it becomes 38 // positive definite. Note that the repeated trial factorization of the 39 // modified Hessian involved in this process can be computationally expensive. 40 // 41 // If the Hessian matrix cannot be formed explicitly or if the computational 42 // cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method 43 // can be used instead. 44 type Newton struct { 45 // Linesearcher is used for selecting suitable steps along the descent 46 // direction d. Accepted steps should satisfy at least one of the Wolfe, 47 // Goldstein or Armijo conditions. 48 // If Linesearcher == nil, an appropriate default is chosen. 49 Linesearcher Linesearcher 50 // Increase is the factor by which a scalar tau is successively increased 51 // so that (H + tau*I) is positive definite. Larger values reduce the 52 // number of trial Hessian factorizations, but also reduce the second-order 53 // information in H. 54 // Increase must be greater than 1. If Increase is 0, it is defaulted to 5. 55 Increase float64 56 // GradStopThreshold sets the threshold for stopping if the gradient norm 57 // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and 58 // if it is NaN the setting is not used. 59 GradStopThreshold float64 60 61 status Status 62 err error 63 64 ls *LinesearchMethod 65 66 hess *mat.SymDense // Storage for a copy of the Hessian matrix. 67 chol mat.Cholesky // Storage for the Cholesky factorization. 68 tau float64 69 } 70 71 func (n *Newton) Status() (Status, error) { 72 return n.status, n.err 73 } 74 75 func (*Newton) Uses(has Available) (uses Available, err error) { 76 return has.hessian() 77 } 78 79 func (n *Newton) Init(dim, tasks int) int { 80 n.status = NotTerminated 81 n.err = nil 82 return 1 83 } 84 85 func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) { 86 n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks) 87 close(operation) 88 } 89 90 func (n *Newton) initLocal(loc *Location) (Operation, error) { 91 if n.Increase == 0 { 92 n.Increase = 5 93 } 94 if n.Increase <= 1 { 95 panic("optimize: Newton.Increase must be greater than 1") 96 } 97 if n.Linesearcher == nil { 98 n.Linesearcher = &Bisection{} 99 } 100 if n.ls == nil { 101 n.ls = &LinesearchMethod{} 102 } 103 n.ls.Linesearcher = n.Linesearcher 104 n.ls.NextDirectioner = n 105 return n.ls.Init(loc) 106 } 107 108 func (n *Newton) iterateLocal(loc *Location) (Operation, error) { 109 return n.ls.Iterate(loc) 110 } 111 112 func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) { 113 dim := len(loc.X) 114 n.hess = resizeSymDense(n.hess, dim) 115 n.tau = 0 116 return n.NextDirection(loc, dir) 117 } 118 119 func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) { 120 // This method implements Algorithm 3.3 (Cholesky with Added Multiple of 121 // the Identity) from Nocedal, Wright (2006), 2nd edition. 122 123 dim := len(loc.X) 124 d := mat.NewVecDense(dim, dir) 125 grad := mat.NewVecDense(dim, loc.Gradient) 126 n.hess.CopySym(loc.Hessian) 127 128 // Find the smallest diagonal entry of the Hessian. 129 minA := n.hess.At(0, 0) 130 for i := 1; i < dim; i++ { 131 a := n.hess.At(i, i) 132 if a < minA { 133 minA = a 134 } 135 } 136 // If the smallest diagonal entry is positive, the Hessian may be positive 137 // definite, and so first attempt to apply the Cholesky factorization to 138 // the un-modified Hessian. If the smallest entry is negative, use the 139 // final tau from the last iteration if regularization was needed, 140 // otherwise guess an appropriate value for tau. 141 if minA > 0 { 142 n.tau = 0 143 } else if n.tau == 0 { 144 n.tau = -minA + 0.001 145 } 146 147 for k := 0; k < maxNewtonModifications; k++ { 148 if n.tau != 0 { 149 // Add a multiple of identity to the Hessian. 150 for i := 0; i < dim; i++ { 151 n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau) 152 } 153 } 154 // Try to apply the Cholesky factorization. 155 pd := n.chol.Factorize(n.hess) 156 if pd { 157 // Store the solution in d's backing array, dir. 158 err := n.chol.SolveVecTo(d, grad) 159 if err == nil { 160 d.ScaleVec(-1, d) 161 return 1 162 } 163 } 164 // Modified Hessian is not PD, so increase tau. 165 n.tau = math.Max(n.Increase*n.tau, 0.001) 166 } 167 168 // Hessian modification failed to get a PD matrix. Return the negative 169 // gradient as the descent direction. 170 d.ScaleVec(-1, grad) 171 return 1 172 } 173 174 func (n *Newton) needs() struct { 175 Gradient bool 176 Hessian bool 177 } { 178 return struct { 179 Gradient bool 180 Hessian bool 181 }{true, true} 182 }