gonum.org/v1/gonum@v0.14.0/optimize/bfgs.go (about)

     1  // Copyright ©2014 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package optimize
     6  
     7  import (
     8  	"math"
     9  
    10  	"gonum.org/v1/gonum/mat"
    11  )
    12  
    13  var (
    14  	_ Method          = (*BFGS)(nil)
    15  	_ localMethod     = (*BFGS)(nil)
    16  	_ NextDirectioner = (*BFGS)(nil)
    17  )
    18  
    19  // BFGS implements the Broyden–Fletcher–Goldfarb–Shanno optimization method. It
    20  // is a quasi-Newton method that performs successive rank-one updates to an
    21  // estimate of the inverse Hessian of the objective function. It exhibits
    22  // super-linear convergence when in proximity to a local minimum. It has memory
    23  // cost that is O(n^2) relative to the input dimension.
    24  type BFGS struct {
    25  	// Linesearcher selects suitable steps along the descent direction.
    26  	// Accepted steps should satisfy the strong Wolfe conditions.
    27  	// If Linesearcher == nil, an appropriate default is chosen.
    28  	Linesearcher Linesearcher
    29  	// GradStopThreshold sets the threshold for stopping if the gradient norm
    30  	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
    31  	// if it is NaN the setting is not used.
    32  	GradStopThreshold float64
    33  
    34  	ls *LinesearchMethod
    35  
    36  	status Status
    37  	err    error
    38  
    39  	dim  int
    40  	x    mat.VecDense // Location of the last major iteration.
    41  	grad mat.VecDense // Gradient at the last major iteration.
    42  	s    mat.VecDense // Difference between locations in this and the previous iteration.
    43  	y    mat.VecDense // Difference between gradients in this and the previous iteration.
    44  	tmp  mat.VecDense
    45  
    46  	invHess *mat.SymDense
    47  
    48  	first bool // Indicator of the first iteration.
    49  }
    50  
    51  func (b *BFGS) Status() (Status, error) {
    52  	return b.status, b.err
    53  }
    54  
    55  func (*BFGS) Uses(has Available) (uses Available, err error) {
    56  	return has.gradient()
    57  }
    58  
    59  func (b *BFGS) Init(dim, tasks int) int {
    60  	b.status = NotTerminated
    61  	b.err = nil
    62  	return 1
    63  }
    64  
    65  func (b *BFGS) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
    66  	b.status, b.err = localOptimizer{}.run(b, b.GradStopThreshold, operation, result, tasks)
    67  	close(operation)
    68  }
    69  
    70  func (b *BFGS) initLocal(loc *Location) (Operation, error) {
    71  	if b.Linesearcher == nil {
    72  		b.Linesearcher = &Bisection{}
    73  	}
    74  	if b.ls == nil {
    75  		b.ls = &LinesearchMethod{}
    76  	}
    77  	b.ls.Linesearcher = b.Linesearcher
    78  	b.ls.NextDirectioner = b
    79  
    80  	return b.ls.Init(loc)
    81  }
    82  
    83  func (b *BFGS) iterateLocal(loc *Location) (Operation, error) {
    84  	return b.ls.Iterate(loc)
    85  }
    86  
    87  func (b *BFGS) InitDirection(loc *Location, dir []float64) (stepSize float64) {
    88  	dim := len(loc.X)
    89  	b.dim = dim
    90  	b.first = true
    91  
    92  	x := mat.NewVecDense(dim, loc.X)
    93  	grad := mat.NewVecDense(dim, loc.Gradient)
    94  	b.x.CloneFromVec(x)
    95  	b.grad.CloneFromVec(grad)
    96  
    97  	b.y.Reset()
    98  	b.s.Reset()
    99  	b.tmp.Reset()
   100  
   101  	if b.invHess == nil || cap(b.invHess.RawSymmetric().Data) < dim*dim {
   102  		b.invHess = mat.NewSymDense(dim, nil)
   103  	} else {
   104  		b.invHess = mat.NewSymDense(dim, b.invHess.RawSymmetric().Data[:dim*dim])
   105  	}
   106  	// The values of the inverse Hessian are initialized in the first call to
   107  	// NextDirection.
   108  
   109  	// Initial direction is just negative of the gradient because the Hessian
   110  	// is an identity matrix.
   111  	d := mat.NewVecDense(dim, dir)
   112  	d.ScaleVec(-1, grad)
   113  	return 1 / mat.Norm(d, 2)
   114  }
   115  
   116  func (b *BFGS) NextDirection(loc *Location, dir []float64) (stepSize float64) {
   117  	dim := b.dim
   118  	if len(loc.X) != dim {
   119  		panic("bfgs: unexpected size mismatch")
   120  	}
   121  	if len(loc.Gradient) != dim {
   122  		panic("bfgs: unexpected size mismatch")
   123  	}
   124  	if len(dir) != dim {
   125  		panic("bfgs: unexpected size mismatch")
   126  	}
   127  
   128  	x := mat.NewVecDense(dim, loc.X)
   129  	grad := mat.NewVecDense(dim, loc.Gradient)
   130  
   131  	// s = x_{k+1} - x_{k}
   132  	b.s.SubVec(x, &b.x)
   133  	// y = g_{k+1} - g_{k}
   134  	b.y.SubVec(grad, &b.grad)
   135  
   136  	sDotY := mat.Dot(&b.s, &b.y)
   137  
   138  	if b.first {
   139  		// Rescale the initial Hessian.
   140  		// From: Nocedal, J., Wright, S.: Numerical Optimization (2nd ed).
   141  		//       Springer (2006), page 143, eq. 6.20.
   142  		yDotY := mat.Dot(&b.y, &b.y)
   143  		scale := sDotY / yDotY
   144  		for i := 0; i < dim; i++ {
   145  			for j := i; j < dim; j++ {
   146  				if i == j {
   147  					b.invHess.SetSym(i, i, scale)
   148  				} else {
   149  					b.invHess.SetSym(i, j, 0)
   150  				}
   151  			}
   152  		}
   153  		b.first = false
   154  	}
   155  
   156  	if math.Abs(sDotY) != 0 {
   157  		// Update the inverse Hessian according to the formula
   158  		//
   159  		//  B_{k+1}^-1 = B_k^-1
   160  		//             + (s_kᵀ y_k + y_kᵀ B_k^-1 y_k) / (s_kᵀ y_k)^2 * (s_k s_kᵀ)
   161  		//             - (B_k^-1 y_k s_kᵀ + s_k y_kᵀ B_k^-1) / (s_kᵀ y_k).
   162  		//
   163  		// Note that y_kᵀ B_k^-1 y_k is a scalar, and that the third term is a
   164  		// rank-two update where B_k^-1 y_k is one vector and s_k is the other.
   165  		yBy := mat.Inner(&b.y, b.invHess, &b.y)
   166  		b.tmp.MulVec(b.invHess, &b.y)
   167  		scale := (1 + yBy/sDotY) / sDotY
   168  		b.invHess.SymRankOne(b.invHess, scale, &b.s)
   169  		b.invHess.RankTwo(b.invHess, -1/sDotY, &b.tmp, &b.s)
   170  	}
   171  
   172  	// Update the stored BFGS data.
   173  	b.x.CopyVec(x)
   174  	b.grad.CopyVec(grad)
   175  
   176  	// New direction is stored in dir.
   177  	d := mat.NewVecDense(dim, dir)
   178  	d.MulVec(b.invHess, grad)
   179  	d.ScaleVec(-1, d)
   180  
   181  	return 1
   182  }
   183  
   184  func (*BFGS) needs() struct {
   185  	Gradient bool
   186  	Hessian  bool
   187  } {
   188  	return struct {
   189  		Gradient bool
   190  		Hessian  bool
   191  	}{true, false}
   192  }