github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/optimize/lbfgs.go (about) 1 // Copyright ©2014 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package optimize 6 7 import ( 8 "github.com/jingcheng-WU/gonum/floats" 9 ) 10 11 var ( 12 _ Method = (*LBFGS)(nil) 13 _ localMethod = (*LBFGS)(nil) 14 _ NextDirectioner = (*LBFGS)(nil) 15 ) 16 17 // LBFGS implements the limited-memory BFGS method for gradient-based 18 // unconstrained minimization. 19 // 20 // It stores a modified version of the inverse Hessian approximation H 21 // implicitly from the last Store iterations while the normal BFGS method 22 // stores and manipulates H directly as a dense matrix. Therefore LBFGS is more 23 // appropriate than BFGS for large problems as the cost of LBFGS scales as 24 // O(Store * dim) while BFGS scales as O(dim^2). The "forgetful" nature of 25 // LBFGS may also make it perform better than BFGS for functions with Hessians 26 // that vary rapidly spatially. 27 type LBFGS struct { 28 // Linesearcher selects suitable steps along the descent direction. 29 // Accepted steps should satisfy the strong Wolfe conditions. 30 // If Linesearcher is nil, a reasonable default will be chosen. 31 Linesearcher Linesearcher 32 // Store is the size of the limited-memory storage. 33 // If Store is 0, it will be defaulted to 15. 34 Store int 35 // GradStopThreshold sets the threshold for stopping if the gradient norm 36 // gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and 37 // if it is NaN the setting is not used. 38 GradStopThreshold float64 39 40 status Status 41 err error 42 43 ls *LinesearchMethod 44 45 dim int // Dimension of the problem 46 x []float64 // Location at the last major iteration 47 grad []float64 // Gradient at the last major iteration 48 49 // History 50 oldest int // Index of the oldest element of the history 51 y [][]float64 // Last Store values of y 52 s [][]float64 // Last Store values of s 53 rho []float64 // Last Store values of rho 54 a []float64 // Cache of Hessian updates 55 } 56 57 func (l *LBFGS) Status() (Status, error) { 58 return l.status, l.err 59 } 60 61 func (*LBFGS) Uses(has Available) (uses Available, err error) { 62 return has.gradient() 63 } 64 65 func (l *LBFGS) Init(dim, tasks int) int { 66 l.status = NotTerminated 67 l.err = nil 68 return 1 69 } 70 71 func (l *LBFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) { 72 l.status, l.err = localOptimizer{}.run(l, l.GradStopThreshold, operation, result, tasks) 73 close(operation) 74 } 75 76 func (l *LBFGS) initLocal(loc *Location) (Operation, error) { 77 if l.Linesearcher == nil { 78 l.Linesearcher = &Bisection{} 79 } 80 if l.Store == 0 { 81 l.Store = 15 82 } 83 84 if l.ls == nil { 85 l.ls = &LinesearchMethod{} 86 } 87 l.ls.Linesearcher = l.Linesearcher 88 l.ls.NextDirectioner = l 89 90 return l.ls.Init(loc) 91 } 92 93 func (l *LBFGS) iterateLocal(loc *Location) (Operation, error) { 94 return l.ls.Iterate(loc) 95 } 96 97 func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) { 98 dim := len(loc.X) 99 l.dim = dim 100 l.oldest = 0 101 102 l.a = resize(l.a, l.Store) 103 l.rho = resize(l.rho, l.Store) 104 l.y = l.initHistory(l.y) 105 l.s = l.initHistory(l.s) 106 107 l.x = resize(l.x, dim) 108 copy(l.x, loc.X) 109 110 l.grad = resize(l.grad, dim) 111 copy(l.grad, loc.Gradient) 112 113 copy(dir, loc.Gradient) 114 floats.Scale(-1, dir) 115 return 1 / floats.Norm(dir, 2) 116 } 117 118 func (l *LBFGS) initHistory(hist [][]float64) [][]float64 { 119 c := cap(hist) 120 if c < l.Store { 121 n := make([][]float64, l.Store-c) 122 hist = append(hist[:c], n...) 123 } 124 hist = hist[:l.Store] 125 for i := range hist { 126 hist[i] = resize(hist[i], l.dim) 127 for j := range hist[i] { 128 hist[i][j] = 0 129 } 130 } 131 return hist 132 } 133 134 func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) { 135 // Uses two-loop correction as described in 136 // Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178. 137 138 if len(loc.X) != l.dim { 139 panic("lbfgs: unexpected size mismatch") 140 } 141 if len(loc.Gradient) != l.dim { 142 panic("lbfgs: unexpected size mismatch") 143 } 144 if len(dir) != l.dim { 145 panic("lbfgs: unexpected size mismatch") 146 } 147 148 y := l.y[l.oldest] 149 floats.SubTo(y, loc.Gradient, l.grad) 150 s := l.s[l.oldest] 151 floats.SubTo(s, loc.X, l.x) 152 sDotY := floats.Dot(s, y) 153 l.rho[l.oldest] = 1 / sDotY 154 155 l.oldest = (l.oldest + 1) % l.Store 156 157 copy(l.x, loc.X) 158 copy(l.grad, loc.Gradient) 159 copy(dir, loc.Gradient) 160 161 // Start with the most recent element and go backward, 162 for i := 0; i < l.Store; i++ { 163 idx := l.oldest - i - 1 164 if idx < 0 { 165 idx += l.Store 166 } 167 l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir) 168 floats.AddScaled(dir, -l.a[idx], l.y[idx]) 169 } 170 171 // Scale the initial Hessian. 172 gamma := sDotY / floats.Dot(y, y) 173 floats.Scale(gamma, dir) 174 175 // Start with the oldest element and go forward. 176 for i := 0; i < l.Store; i++ { 177 idx := i + l.oldest 178 if idx >= l.Store { 179 idx -= l.Store 180 } 181 beta := l.rho[idx] * floats.Dot(l.y[idx], dir) 182 floats.AddScaled(dir, l.a[idx]-beta, l.s[idx]) 183 } 184 185 // dir contains H^{-1} * g, so flip the direction for minimization. 186 floats.Scale(-1, dir) 187 188 return 1 189 } 190 191 func (*LBFGS) needs() struct { 192 Gradient bool 193 Hessian bool 194 } { 195 return struct { 196 Gradient bool 197 Hessian bool 198 }{true, false} 199 }