gonum.org/v1/gonum@v0.14.0/optimize/newton.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package optimize
     6  
     7  import (
     8  	"math"
     9  
    10  	"gonum.org/v1/gonum/mat"
    11  )
    12  
    13  const maxNewtonModifications = 20
    14  
    15  var (
    16  	_ Method          = (*Newton)(nil)
    17  	_ localMethod     = (*Newton)(nil)
    18  	_ NextDirectioner = (*Newton)(nil)
    19  )
    20  
    21  // Newton implements a modified Newton's method for Hessian-based unconstrained
    22  // minimization. It applies regularization when the Hessian is not positive
    23  // definite, and it can converge to a local minimum from any starting point.
    24  //
    25  // Newton iteratively forms a quadratic model to the objective function f and
    26  // tries to minimize this approximate model. It generates a sequence of
    27  // locations x_k by means of
    28  //
    29  //	solve H_k d_k = -∇f_k for d_k,
    30  //	x_{k+1} = x_k + α_k d_k,
    31  //
    32  // where H_k is the Hessian matrix of f at x_k and α_k is a step size found by
    33  // a line search.
    34  //
    35  // Away from a minimizer H_k may not be positive definite and d_k may not be a
    36  // descent direction. Newton implements a Hessian modification strategy that
    37  // adds successively larger multiples of identity to H_k until it becomes
    38  // positive definite. Note that the repeated trial factorization of the
    39  // modified Hessian involved in this process can be computationally expensive.
    40  //
    41  // If the Hessian matrix cannot be formed explicitly or if the computational
    42  // cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method
    43  // can be used instead.
    44  type Newton struct {
    45  	// Linesearcher is used for selecting suitable steps along the descent
    46  	// direction d. Accepted steps should satisfy at least one of the Wolfe,
    47  	// Goldstein or Armijo conditions.
    48  	// If Linesearcher == nil, an appropriate default is chosen.
    49  	Linesearcher Linesearcher
    50  	// Increase is the factor by which a scalar tau is successively increased
    51  	// so that (H + tau*I) is positive definite. Larger values reduce the
    52  	// number of trial Hessian factorizations, but also reduce the second-order
    53  	// information in H.
    54  	// Increase must be greater than 1. If Increase is 0, it is defaulted to 5.
    55  	Increase float64
    56  	// GradStopThreshold sets the threshold for stopping if the gradient norm
    57  	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
    58  	// if it is NaN the setting is not used.
    59  	GradStopThreshold float64
    60  
    61  	status Status
    62  	err    error
    63  
    64  	ls *LinesearchMethod
    65  
    66  	hess *mat.SymDense // Storage for a copy of the Hessian matrix.
    67  	chol mat.Cholesky  // Storage for the Cholesky factorization.
    68  	tau  float64
    69  }
    70  
    71  func (n *Newton) Status() (Status, error) {
    72  	return n.status, n.err
    73  }
    74  
    75  func (*Newton) Uses(has Available) (uses Available, err error) {
    76  	return has.hessian()
    77  }
    78  
    79  func (n *Newton) Init(dim, tasks int) int {
    80  	n.status = NotTerminated
    81  	n.err = nil
    82  	return 1
    83  }
    84  
    85  func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
    86  	n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks)
    87  	close(operation)
    88  }
    89  
    90  func (n *Newton) initLocal(loc *Location) (Operation, error) {
    91  	if n.Increase == 0 {
    92  		n.Increase = 5
    93  	}
    94  	if n.Increase <= 1 {
    95  		panic("optimize: Newton.Increase must be greater than 1")
    96  	}
    97  	if n.Linesearcher == nil {
    98  		n.Linesearcher = &Bisection{}
    99  	}
   100  	if n.ls == nil {
   101  		n.ls = &LinesearchMethod{}
   102  	}
   103  	n.ls.Linesearcher = n.Linesearcher
   104  	n.ls.NextDirectioner = n
   105  	return n.ls.Init(loc)
   106  }
   107  
   108  func (n *Newton) iterateLocal(loc *Location) (Operation, error) {
   109  	return n.ls.Iterate(loc)
   110  }
   111  
   112  func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) {
   113  	dim := len(loc.X)
   114  	n.hess = resizeSymDense(n.hess, dim)
   115  	n.tau = 0
   116  	return n.NextDirection(loc, dir)
   117  }
   118  
   119  func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) {
   120  	// This method implements Algorithm 3.3 (Cholesky with Added Multiple of
   121  	// the Identity) from Nocedal, Wright (2006), 2nd edition.
   122  
   123  	dim := len(loc.X)
   124  	d := mat.NewVecDense(dim, dir)
   125  	grad := mat.NewVecDense(dim, loc.Gradient)
   126  	n.hess.CopySym(loc.Hessian)
   127  
   128  	// Find the smallest diagonal entry of the Hessian.
   129  	minA := n.hess.At(0, 0)
   130  	for i := 1; i < dim; i++ {
   131  		a := n.hess.At(i, i)
   132  		if a < minA {
   133  			minA = a
   134  		}
   135  	}
   136  	// If the smallest diagonal entry is positive, the Hessian may be positive
   137  	// definite, and so first attempt to apply the Cholesky factorization to
   138  	// the un-modified Hessian. If the smallest entry is negative, use the
   139  	// final tau from the last iteration if regularization was needed,
   140  	// otherwise guess an appropriate value for tau.
   141  	if minA > 0 {
   142  		n.tau = 0
   143  	} else if n.tau == 0 {
   144  		n.tau = -minA + 0.001
   145  	}
   146  
   147  	for k := 0; k < maxNewtonModifications; k++ {
   148  		if n.tau != 0 {
   149  			// Add a multiple of identity to the Hessian.
   150  			for i := 0; i < dim; i++ {
   151  				n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau)
   152  			}
   153  		}
   154  		// Try to apply the Cholesky factorization.
   155  		pd := n.chol.Factorize(n.hess)
   156  		if pd {
   157  			// Store the solution in d's backing array, dir.
   158  			err := n.chol.SolveVecTo(d, grad)
   159  			if err == nil {
   160  				d.ScaleVec(-1, d)
   161  				return 1
   162  			}
   163  		}
   164  		// Modified Hessian is not PD, so increase tau.
   165  		n.tau = math.Max(n.Increase*n.tau, 0.001)
   166  	}
   167  
   168  	// Hessian modification failed to get a PD matrix. Return the negative
   169  	// gradient as the descent direction.
   170  	d.ScaleVec(-1, grad)
   171  	return 1
   172  }
   173  
   174  func (n *Newton) needs() struct {
   175  	Gradient bool
   176  	Hessian  bool
   177  } {
   178  	return struct {
   179  		Gradient bool
   180  		Hessian  bool
   181  	}{true, true}
   182  }