gonum.org/v1/gonum@v0.14.0/optimize/bfgs.go (about) 1 // Copyright ©2014 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package optimize 6 7 import ( 8 "math" 9 10 "gonum.org/v1/gonum/mat" 11 ) 12 13 var ( 14 _ Method = (*BFGS)(nil) 15 _ localMethod = (*BFGS)(nil) 16 _ NextDirectioner = (*BFGS)(nil) 17 ) 18 19 // BFGS implements the Broyden–Fletcher–Goldfarb–Shanno optimization method. It 20 // is a quasi-Newton method that performs successive rank-one updates to an 21 // estimate of the inverse Hessian of the objective function. It exhibits 22 // super-linear convergence when in proximity to a local minimum. It has memory 23 // cost that is O(n^2) relative to the input dimension. 24 type BFGS struct { 25 // Linesearcher selects suitable steps along the descent direction. 26 // Accepted steps should satisfy the strong Wolfe conditions. 27 // If Linesearcher == nil, an appropriate default is chosen. 28 Linesearcher Linesearcher 29 // GradStopThreshold sets the threshold for stopping if the gradient norm 30 // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and 31 // if it is NaN the setting is not used. 32 GradStopThreshold float64 33 34 ls *LinesearchMethod 35 36 status Status 37 err error 38 39 dim int 40 x mat.VecDense // Location of the last major iteration. 41 grad mat.VecDense // Gradient at the last major iteration. 42 s mat.VecDense // Difference between locations in this and the previous iteration. 43 y mat.VecDense // Difference between gradients in this and the previous iteration. 44 tmp mat.VecDense 45 46 invHess *mat.SymDense 47 48 first bool // Indicator of the first iteration. 49 } 50 51 func (b *BFGS) Status() (Status, error) { 52 return b.status, b.err 53 } 54 55 func (*BFGS) Uses(has Available) (uses Available, err error) { 56 return has.gradient() 57 } 58 59 func (b *BFGS) Init(dim, tasks int) int { 60 b.status = NotTerminated 61 b.err = nil 62 return 1 63 } 64 65 func (b *BFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) { 66 b.status, b.err = localOptimizer{}.run(b, b.GradStopThreshold, operation, result, tasks) 67 close(operation) 68 } 69 70 func (b *BFGS) initLocal(loc *Location) (Operation, error) { 71 if b.Linesearcher == nil { 72 b.Linesearcher = &Bisection{} 73 } 74 if b.ls == nil { 75 b.ls = &LinesearchMethod{} 76 } 77 b.ls.Linesearcher = b.Linesearcher 78 b.ls.NextDirectioner = b 79 80 return b.ls.Init(loc) 81 } 82 83 func (b *BFGS) iterateLocal(loc *Location) (Operation, error) { 84 return b.ls.Iterate(loc) 85 } 86 87 func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) { 88 dim := len(loc.X) 89 b.dim = dim 90 b.first = true 91 92 x := mat.NewVecDense(dim, loc.X) 93 grad := mat.NewVecDense(dim, loc.Gradient) 94 b.x.CloneFromVec(x) 95 b.grad.CloneFromVec(grad) 96 97 b.y.Reset() 98 b.s.Reset() 99 b.tmp.Reset() 100 101 if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim { 102 b.invHess = mat.NewSymDense(dim, nil) 103 } else { 104 b.invHess = mat.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim]) 105 } 106 // The values of the inverse Hessian are initialized in the first call to 107 // NextDirection. 108 109 // Initial direction is just negative of the gradient because the Hessian 110 // is an identity matrix. 111 d := mat.NewVecDense(dim, dir) 112 d.ScaleVec(-1, grad) 113 return 1 / mat.Norm(d, 2) 114 } 115 116 func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { 117 dim := b.dim 118 if len(loc.X) != dim { 119 panic("bfgs: unexpected size mismatch") 120 } 121 if len(loc.Gradient) != dim { 122 panic("bfgs: unexpected size mismatch") 123 } 124 if len(dir) != dim { 125 panic("bfgs: unexpected size mismatch") 126 } 127 128 x := mat.NewVecDense(dim, loc.X) 129 grad := mat.NewVecDense(dim, loc.Gradient) 130 131 // s = x_{k+1} - x_{k} 132 b.s.SubVec(x, &b.x) 133 // y = g_{k+1} - g_{k} 134 b.y.SubVec(grad, &b.grad) 135 136 sDotY := mat.Dot(&b.s, &b.y) 137 138 if b.first { 139 // Rescale the initial Hessian. 140 // From: Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). 141 // Springer (2006), page 143, eq. 6.20. 142 yDotY := mat.Dot(&b.y, &b.y) 143 scale := sDotY / yDotY 144 for i := 0; i < dim; i++ { 145 for j := i; j < dim; j++ { 146 if i == j { 147 b.invHess.SetSym(i, i, scale) 148 } else { 149 b.invHess.SetSym(i, j, 0) 150 } 151 } 152 } 153 b.first = false 154 } 155 156 if math.Abs(sDotY) != 0 { 157 // Update the inverse Hessian according to the formula 158 // 159 // B_{k+1}^-1 = B_k^-1 160 // + (s_kᵀ y_k + y_kᵀ B_k^-1 y_k) / (s_kᵀ y_k)^2 * (s_k s_kᵀ) 161 // - (B_k^-1 y_k s_kᵀ + s_k y_kᵀ B_k^-1) / (s_kᵀ y_k). 162 // 163 // Note that y_kᵀ B_k^-1 y_k is a scalar, and that the third term is a 164 // rank-two update where B_k^-1 y_k is one vector and s_k is the other. 165 yBy := mat.Inner(&b.y, b.invHess, &b.y) 166 b.tmp.MulVec(b.invHess, &b.y) 167 scale := (1 + yBy/sDotY) / sDotY 168 b.invHess.SymRankOne(b.invHess, scale, &b.s) 169 b.invHess.RankTwo(b.invHess, -1/sDotY, &b.tmp, &b.s) 170 } 171 172 // Update the stored BFGS data. 173 b.x.CopyVec(x) 174 b.grad.CopyVec(grad) 175 176 // New direction is stored in dir. 177 d := mat.NewVecDense(dim, dir) 178 d.MulVec(b.invHess, grad) 179 d.ScaleVec(-1, d) 180 181 return 1 182 } 183 184 func (*BFGS) needs() struct { 185 Gradient bool 186 Hessian bool 187 } { 188 return struct { 189 Gradient bool 190 Hessian bool 191 }{true, false} 192 }