github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/optimize/newton.go

github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/optimize/newton.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package optimize
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/jingcheng-WU/gonum/mat"
    11  )
    12  
    13  const maxNewtonModifications = 20
    14  
    15  var (
    16  	_ Method          = (*Newton)(nil)
    17  	_ localMethod     = (*Newton)(nil)
    18  	_ NextDirectioner = (*Newton)(nil)
    19  )
    20  
    21  // Newton implements a modified Newton's method for Hessian-based unconstrained
    22  // minimization. It applies regularization when the Hessian is not positive
    23  // definite, and it can converge to a local minimum from any starting point.
    24  //
    25  // Newton iteratively forms a quadratic model to the objective function f and
    26  // tries to minimize this approximate model. It generates a sequence of
    27  // locations x_k by means of
    28  //  solve H_k d_k = -∇f_k for d_k,
    29  //  x_{k+1} = x_k + α_k d_k,
    30  // where H_k is the Hessian matrix of f at x_k and α_k is a step size found by
    31  // a line search.
    32  //
    33  // Away from a minimizer H_k may not be positive definite and d_k may not be a
    34  // descent direction. Newton implements a Hessian modification strategy that
    35  // adds successively larger multiples of identity to H_k until it becomes
    36  // positive definite. Note that the repeated trial factorization of the
    37  // modified Hessian involved in this process can be computationally expensive.
    38  //
    39  // If the Hessian matrix cannot be formed explicitly or if the computational
    40  // cost of its factorization is prohibitive, BFGS or L-BFGS quasi-Newton method
    41  // can be used instead.
    42  type Newton struct {
    43  	// Linesearcher is used for selecting suitable steps along the descent
    44  	// direction d. Accepted steps should satisfy at least one of the Wolfe,
    45  	// Goldstein or Armijo conditions.
    46  	// If Linesearcher == nil, an appropriate default is chosen.
    47  	Linesearcher Linesearcher
    48  	// Increase is the factor by which a scalar tau is successively increased
    49  	// so that (H + tau*I) is positive definite. Larger values reduce the
    50  	// number of trial Hessian factorizations, but also reduce the second-order
    51  	// information in H.
    52  	// Increase must be greater than 1. If Increase is 0, it is defaulted to 5.
    53  	Increase float64
    54  	// GradStopThreshold sets the threshold for stopping if the gradient norm
    55  	// gets too small. If GradStopThreshold is 0 it is defaulted to 1e-12, and
    56  	// if it is NaN the setting is not used.
    57  	GradStopThreshold float64
    58  
    59  	status Status
    60  	err    error
    61  
    62  	ls *LinesearchMethod
    63  
    64  	hess *mat.SymDense // Storage for a copy of the Hessian matrix.
    65  	chol mat.Cholesky  // Storage for the Cholesky factorization.
    66  	tau  float64
    67  }
    68  
    69  func (n *Newton) Status() (Status, error) {
    70  	return n.status, n.err
    71  }
    72  
    73  func (*Newton) Uses(has Available) (uses Available, err error) {
    74  	return has.hessian()
    75  }
    76  
    77  func (n *Newton) Init(dim, tasks int) int {
    78  	n.status = NotTerminated
    79  	n.err = nil
    80  	return 1
    81  }
    82  
    83  func (n *Newton) Run(operation chan<- Task, result <-chan Task, tasks []Task) {
    84  	n.status, n.err = localOptimizer{}.run(n, n.GradStopThreshold, operation, result, tasks)
    85  	close(operation)
    86  }
    87  
    88  func (n *Newton) initLocal(loc *Location) (Operation, error) {
    89  	if n.Increase == 0 {
    90  		n.Increase = 5
    91  	}
    92  	if n.Increase <= 1 {
    93  		panic("optimize: Newton.Increase must be greater than 1")
    94  	}
    95  	if n.Linesearcher == nil {
    96  		n.Linesearcher = &Bisection{}
    97  	}
    98  	if n.ls == nil {
    99  		n.ls = &LinesearchMethod{}
   100  	}
   101  	n.ls.Linesearcher = n.Linesearcher
   102  	n.ls.NextDirectioner = n
   103  	return n.ls.Init(loc)
   104  }
   105  
   106  func (n *Newton) iterateLocal(loc *Location) (Operation, error) {
   107  	return n.ls.Iterate(loc)
   108  }
   109  
   110  func (n *Newton) InitDirection(loc *Location, dir []float64) (stepSize float64) {
   111  	dim := len(loc.X)
   112  	n.hess = resizeSymDense(n.hess, dim)
   113  	n.tau = 0
   114  	return n.NextDirection(loc, dir)
   115  }
   116  
   117  func (n *Newton) NextDirection(loc *Location, dir []float64) (stepSize float64) {
   118  	// This method implements Algorithm 3.3 (Cholesky with Added Multiple of
   119  	// the Identity) from Nocedal, Wright (2006), 2nd edition.
   120  
   121  	dim := len(loc.X)
   122  	d := mat.NewVecDense(dim, dir)
   123  	grad := mat.NewVecDense(dim, loc.Gradient)
   124  	n.hess.CopySym(loc.Hessian)
   125  
   126  	// Find the smallest diagonal entry of the Hessian.
   127  	minA := n.hess.At(0, 0)
   128  	for i := 1; i < dim; i++ {
   129  		a := n.hess.At(i, i)
   130  		if a < minA {
   131  			minA = a
   132  		}
   133  	}
   134  	// If the smallest diagonal entry is positive, the Hessian may be positive
   135  	// definite, and so first attempt to apply the Cholesky factorization to
   136  	// the un-modified Hessian. If the smallest entry is negative, use the
   137  	// final tau from the last iteration if regularization was needed,
   138  	// otherwise guess an appropriate value for tau.
   139  	if minA > 0 {
   140  		n.tau = 0
   141  	} else if n.tau == 0 {
   142  		n.tau = -minA + 0.001
   143  	}
   144  
   145  	for k := 0; k < maxNewtonModifications; k++ {
   146  		if n.tau != 0 {
   147  			// Add a multiple of identity to the Hessian.
   148  			for i := 0; i < dim; i++ {
   149  				n.hess.SetSym(i, i, loc.Hessian.At(i, i)+n.tau)
   150  			}
   151  		}
   152  		// Try to apply the Cholesky factorization.
   153  		pd := n.chol.Factorize(n.hess)
   154  		if pd {
   155  			// Store the solution in d's backing array, dir.
   156  			err := n.chol.SolveVecTo(d, grad)
   157  			if err == nil {
   158  				d.ScaleVec(-1, d)
   159  				return 1
   160  			}
   161  		}
   162  		// Modified Hessian is not PD, so increase tau.
   163  		n.tau = math.Max(n.Increase*n.tau, 0.001)
   164  	}
   165  
   166  	// Hessian modification failed to get a PD matrix. Return the negative
   167  	// gradient as the descent direction.
   168  	d.ScaleVec(-1, grad)
   169  	return 1
   170  }
   171  
   172  func (n *Newton) needs() struct {
   173  	Gradient bool
   174  	Hessian  bool
   175  } {
   176  	return struct {
   177  		Gradient bool
   178  		Hessian  bool
   179  	}{true, true}
   180  }