github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/optimize/lbfgs.go (about)

     1  // Copyright ©2014 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package optimize
     6  
     7  import (
     8  	"github.com/jingcheng-WU/gonum/floats"
     9  )
    10  
    11  var (
    12  	_ Method          = (*LBFGS)(nil)
    13  	_ localMethod     = (*LBFGS)(nil)
    14  	_ NextDirectioner = (*LBFGS)(nil)
    15  )
    16  
    17  // LBFGS implements the limited-memory BFGS method for gradient-based
    18  // unconstrained minimization.
    19  //
    20  // It stores a modified version of the inverse Hessian approximation H
    21  // implicitly from the last Store iterations while the normal BFGS method
    22  // stores and manipulates H directly as a dense matrix. Therefore LBFGS is more
    23  // appropriate than BFGS for large problems as the cost of LBFGS scales as
    24  // O(Store * dim) while BFGS scales as O(dim^2). The "forgetful" nature of
    25  // LBFGS may also make it perform better than BFGS for functions with Hessians
    26  // that vary rapidly spatially.
    27  type LBFGS struct {
    28  	// Linesearcher selects suitable steps along the descent direction.
    29  	// Accepted steps should satisfy the strong Wolfe conditions.
    30  	// If Linesearcher is nil, a reasonable default will be chosen.
    31  	Linesearcher Linesearcher
    32  	// Store is the size of the limited-memory storage.
    33  	// If Store is 0, it will be defaulted to 15.
    34  	Store int
    35  	// GradStopThreshold sets the threshold for stopping if the gradient norm
    36  	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
    37  	// if it is NaN the setting is not used.
    38  	GradStopThreshold float64
    39  
    40  	status Status
    41  	err    error
    42  
    43  	ls *LinesearchMethod
    44  
    45  	dim  int       // Dimension of the problem
    46  	x    []float64 // Location at the last major iteration
    47  	grad []float64 // Gradient at the last major iteration
    48  
    49  	// History
    50  	oldest int         // Index of the oldest element of the history
    51  	y      [][]float64 // Last Store values of y
    52  	s      [][]float64 // Last Store values of s
    53  	rho    []float64   // Last Store values of rho
    54  	a      []float64   // Cache of Hessian updates
    55  }
    56  
    57  func (l *LBFGS) Status() (Status, error) {
    58  	return l.status, l.err
    59  }
    60  
    61  func (*LBFGS) Uses(has Available) (uses Available, err error) {
    62  	return has.gradient()
    63  }
    64  
    65  func (l *LBFGS) Init(dim, tasks int) int {
    66  	l.status = NotTerminated
    67  	l.err = nil
    68  	return 1
    69  }
    70  
    71  func (l *LBFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
    72  	l.status, l.err = localOptimizer{}.run(l, l.GradStopThreshold, operation, result, tasks)
    73  	close(operation)
    74  }
    75  
    76  func (l *LBFGS) initLocal(loc *Location) (Operation, error) {
    77  	if l.Linesearcher == nil {
    78  		l.Linesearcher = &Bisection{}
    79  	}
    80  	if l.Store == 0 {
    81  		l.Store = 15
    82  	}
    83  
    84  	if l.ls == nil {
    85  		l.ls = &LinesearchMethod{}
    86  	}
    87  	l.ls.Linesearcher = l.Linesearcher
    88  	l.ls.NextDirectioner = l
    89  
    90  	return l.ls.Init(loc)
    91  }
    92  
    93  func (l *LBFGS) iterateLocal(loc *Location) (Operation, error) {
    94  	return l.ls.Iterate(loc)
    95  }
    96  
    97  func (l *LBFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
    98  	dim := len(loc.X)
    99  	l.dim = dim
   100  	l.oldest = 0
   101  
   102  	l.a = resize(l.a, l.Store)
   103  	l.rho = resize(l.rho, l.Store)
   104  	l.y = l.initHistory(l.y)
   105  	l.s = l.initHistory(l.s)
   106  
   107  	l.x = resize(l.x, dim)
   108  	copy(l.x, loc.X)
   109  
   110  	l.grad = resize(l.grad, dim)
   111  	copy(l.grad, loc.Gradient)
   112  
   113  	copy(dir, loc.Gradient)
   114  	floats.Scale(-1, dir)
   115  	return 1 / floats.Norm(dir, 2)
   116  }
   117  
   118  func (l *LBFGS) initHistory(hist [][]float64) [][]float64 {
   119  	c := cap(hist)
   120  	if c < l.Store {
   121  		n := make([][]float64, l.Store-c)
   122  		hist = append(hist[:c], n...)
   123  	}
   124  	hist = hist[:l.Store]
   125  	for i := range hist {
   126  		hist[i] = resize(hist[i], l.dim)
   127  		for j := range hist[i] {
   128  			hist[i][j] = 0
   129  		}
   130  	}
   131  	return hist
   132  }
   133  
   134  func (l *LBFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
   135  	// Uses two-loop correction as described in
   136  	// Nocedal, J., Wright, S.: Numerical Optimization (2nd ed). Springer (2006), chapter 7, page 178.
   137  
   138  	if len(loc.X) != l.dim {
   139  		panic("lbfgs: unexpected size mismatch")
   140  	}
   141  	if len(loc.Gradient) != l.dim {
   142  		panic("lbfgs: unexpected size mismatch")
   143  	}
   144  	if len(dir) != l.dim {
   145  		panic("lbfgs: unexpected size mismatch")
   146  	}
   147  
   148  	y := l.y[l.oldest]
   149  	floats.SubTo(y, loc.Gradient, l.grad)
   150  	s := l.s[l.oldest]
   151  	floats.SubTo(s, loc.X, l.x)
   152  	sDotY := floats.Dot(s, y)
   153  	l.rho[l.oldest] = 1 / sDotY
   154  
   155  	l.oldest = (l.oldest + 1) % l.Store
   156  
   157  	copy(l.x, loc.X)
   158  	copy(l.grad, loc.Gradient)
   159  	copy(dir, loc.Gradient)
   160  
   161  	// Start with the most recent element and go backward,
   162  	for i := 0; i < l.Store; i++ {
   163  		idx := l.oldest - i - 1
   164  		if idx < 0 {
   165  			idx += l.Store
   166  		}
   167  		l.a[idx] = l.rho[idx] * floats.Dot(l.s[idx], dir)
   168  		floats.AddScaled(dir, -l.a[idx], l.y[idx])
   169  	}
   170  
   171  	// Scale the initial Hessian.
   172  	gamma := sDotY / floats.Dot(y, y)
   173  	floats.Scale(gamma, dir)
   174  
   175  	// Start with the oldest element and go forward.
   176  	for i := 0; i < l.Store; i++ {
   177  		idx := i + l.oldest
   178  		if idx >= l.Store {
   179  			idx -= l.Store
   180  		}
   181  		beta := l.rho[idx] * floats.Dot(l.y[idx], dir)
   182  		floats.AddScaled(dir, l.a[idx]-beta, l.s[idx])
   183  	}
   184  
   185  	// dir contains H^{-1} * g, so flip the direction for minimization.
   186  	floats.Scale(-1, dir)
   187  
   188  	return 1
   189  }
   190  
   191  func (*LBFGS) needs() struct {
   192  	Gradient bool
   193  	Hessian  bool
   194  } {
   195  	return struct {
   196  		Gradient bool
   197  		Hessian  bool
   198  	}{true, false}
   199  }