github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/optimize/newton.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package optimize 6 7 import ( 8 "math" 9 10 "github.com/jingcheng-WU/gonum/mat" 11 ) 12 13 const maxNewtonModifications = 20 14 15 var ( 16 _ Method = (*Newton)(nil) 17 _ localMethod = (*Newton)(nil) 18 _ NextDirectioner = (*Newton)(nil) 19 ) 20 21 // Newton implements a modified Newton's method for Hessian-based unconstrained 22 // minimization. It applies regularization when the Hessian is not positive 23 // definite, and it can converge to a local minimum from any starting point. 24 // 25 // Newton iteratively forms a quadratic model to the objective function f and 26 // tries to minimize this approximate model. It generates a sequence of 27 // locations x_k by means of 28 // solve H_k d_k = -∇f_k for d_k, 29 // x_{k+1} = x_k + α_k d_k, 30 // where H_k is the Hessian matrix of f at x_k and α_k is a step size found by 31 // a line search. 32 // 33 // Away from a minimizer H_k may not be positive definite and d_k may not be a 34 // descent direction. Newton implements a Hessian modification strategy that 35 // adds successively larger multiples of identity to H_k until it becomes 36 // positive definite. Note that the repeated trial factorization of the 37 // modified Hessian involved in this process can be computationally expensive. 38 // 39 // If the Hessian matrix cannot be formed explicitly or if the computational 40 // cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method 41 // can be used instead. 42 type Newton struct { 43 // Linesearcher is used for selecting suitable steps along the descent 44 // direction d. Accepted steps should satisfy at least one of the Wolfe, 45 // Goldstein or Armijo conditions. 46 // If Linesearcher == nil, an appropriate default is chosen. 47 Linesearcher Linesearcher 48 // Increase is the factor by which a scalar tau is successively increased 49 // so that (H + tau*I) is positive definite. Larger values reduce the 50 // number of trial Hessian factorizations, but also reduce the second-order 51 // information in H. 52 // Increase must be greater than 1. If Increase is 0, it is defaulted to 5. 53 Increase float64 54 // GradStopThreshold sets the threshold for stopping if the gradient norm 55 // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and 56 // if it is NaN the setting is not used. 57 GradStopThreshold float64 58 59 status Status 60 err error 61 62 ls *LinesearchMethod 63 64 hess *mat.SymDense // Storage for a copy of the Hessian matrix. 65 chol mat.Cholesky // Storage for the Cholesky factorization. 66 tau float64 67 } 68 69 func (n *Newton) Status() (Status, error) { 70 return n.status, n.err 71 } 72 73 func (*Newton) Uses(has Available) (uses Available, err error) { 74 return has.hessian() 75 } 76 77 func (n *Newton) Init(dim, tasks int) int { 78 n.status = NotTerminated 79 n.err = nil 80 return 1 81 } 82 83 func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) { 84 n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks) 85 close(operation) 86 } 87 88 func (n *Newton) initLocal(loc *Location) (Operation, error) { 89 if n.Increase == 0 { 90 n.Increase = 5 91 } 92 if n.Increase <= 1 { 93 panic("optimize: Newton.Increase must be greater than 1") 94 } 95 if n.Linesearcher == nil { 96 n.Linesearcher = &Bisection{} 97 } 98 if n.ls == nil { 99 n.ls = &LinesearchMethod{} 100 } 101 n.ls.Linesearcher = n.Linesearcher 102 n.ls.NextDirectioner = n 103 return n.ls.Init(loc) 104 } 105 106 func (n *Newton) iterateLocal(loc *Location) (Operation, error) { 107 return n.ls.Iterate(loc) 108 } 109 110 func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) { 111 dim := len(loc.X) 112 n.hess = resizeSymDense(n.hess, dim) 113 n.tau = 0 114 return n.NextDirection(loc, dir) 115 } 116 117 func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) { 118 // This method implements Algorithm 3.3 (Cholesky with Added Multiple of 119 // the Identity) from Nocedal, Wright (2006), 2nd edition. 120 121 dim := len(loc.X) 122 d := mat.NewVecDense(dim, dir) 123 grad := mat.NewVecDense(dim, loc.Gradient) 124 n.hess.CopySym(loc.Hessian) 125 126 // Find the smallest diagonal entry of the Hessian. 127 minA := n.hess.At(0, 0) 128 for i := 1; i < dim; i++ { 129 a := n.hess.At(i, i) 130 if a < minA { 131 minA = a 132 } 133 } 134 // If the smallest diagonal entry is positive, the Hessian may be positive 135 // definite, and so first attempt to apply the Cholesky factorization to 136 // the un-modified Hessian. If the smallest entry is negative, use the 137 // final tau from the last iteration if regularization was needed, 138 // otherwise guess an appropriate value for tau. 139 if minA > 0 { 140 n.tau = 0 141 } else if n.tau == 0 { 142 n.tau = -minA + 0.001 143 } 144 145 for k := 0; k < maxNewtonModifications; k++ { 146 if n.tau != 0 { 147 // Add a multiple of identity to the Hessian. 148 for i := 0; i < dim; i++ { 149 n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau) 150 } 151 } 152 // Try to apply the Cholesky factorization. 153 pd := n.chol.Factorize(n.hess) 154 if pd { 155 // Store the solution in d's backing array, dir. 156 err := n.chol.SolveVecTo(d, grad) 157 if err == nil { 158 d.ScaleVec(-1, d) 159 return 1 160 } 161 } 162 // Modified Hessian is not PD, so increase tau. 163 n.tau = math.Max(n.Increase*n.tau, 0.001) 164 } 165 166 // Hessian modification failed to get a PD matrix. Return the negative 167 // gradient as the descent direction. 168 d.ScaleVec(-1, grad) 169 return 1 170 } 171 172 func (n *Newton) needs() struct { 173 Gradient bool 174 Hessian bool 175 } { 176 return struct { 177 Gradient bool 178 Hessian bool 179 }{true, true} 180 }