github.com/gopherd/gonum@v0.0.4/optimize/stepsizers.go (about)

     1  // Copyright ©2014 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package optimize
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/gopherd/gonum/floats"
    11  	"github.com/gopherd/gonum/floats/scalar"
    12  )
    13  
    14  const (
    15  	initialStepFactor = 1
    16  
    17  	quadraticMinimumStepSize = 1e-3
    18  	quadraticMaximumStepSize = 1
    19  	quadraticThreshold       = 1e-12
    20  
    21  	firstOrderMinimumStepSize = quadraticMinimumStepSize
    22  	firstOrderMaximumStepSize = quadraticMaximumStepSize
    23  )
    24  
    25  var (
    26  	_ StepSizer = ConstantStepSize{}
    27  	_ StepSizer = (*QuadraticStepSize)(nil)
    28  	_ StepSizer = (*FirstOrderStepSize)(nil)
    29  )
    30  
    31  // ConstantStepSize is a StepSizer that returns the same step size for
    32  // every iteration.
    33  type ConstantStepSize struct {
    34  	Size float64
    35  }
    36  
    37  func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 {
    38  	return c.Size
    39  }
    40  
    41  func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 {
    42  	return c.Size
    43  }
    44  
    45  // QuadraticStepSize estimates the initial line search step size as the minimum
    46  // of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k.
    47  // This is useful for line search methods that do not produce well-scaled
    48  // descent directions, such as gradient descent or conjugate gradient methods.
    49  // The step size is bounded away from zero.
    50  type QuadraticStepSize struct {
    51  	// Threshold determines that the initial step size should be estimated by
    52  	// quadratic interpolation when the relative change in the objective
    53  	// function is larger than Threshold.  Otherwise the initial step size is
    54  	// set to 2*previous step size.
    55  	// If Threshold is zero, it will be set to 1e-12.
    56  	Threshold float64
    57  	// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
    58  	// If InitialStepFactor is zero, it will be set to one.
    59  	InitialStepFactor float64
    60  	// MinStepSize is the lower bound on the estimated step size.
    61  	// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
    62  	// If MinStepSize is zero, it will be set to 1e-3.
    63  	MinStepSize float64
    64  	// MaxStepSize is the upper bound on the estimated step size.
    65  	// If MaxStepSize is zero, it will be set to 1.
    66  	MaxStepSize float64
    67  
    68  	fPrev        float64
    69  	dirPrevNorm  float64
    70  	projGradPrev float64
    71  	xPrev        []float64
    72  }
    73  
    74  func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
    75  	if q.Threshold == 0 {
    76  		q.Threshold = quadraticThreshold
    77  	}
    78  	if q.InitialStepFactor == 0 {
    79  		q.InitialStepFactor = initialStepFactor
    80  	}
    81  	if q.MinStepSize == 0 {
    82  		q.MinStepSize = quadraticMinimumStepSize
    83  	}
    84  	if q.MaxStepSize == 0 {
    85  		q.MaxStepSize = quadraticMaximumStepSize
    86  	}
    87  	if q.MaxStepSize <= q.MinStepSize {
    88  		panic("optimize: MinStepSize not smaller than MaxStepSize")
    89  	}
    90  
    91  	gNorm := floats.Norm(loc.Gradient, math.Inf(1))
    92  	stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize))
    93  
    94  	q.fPrev = loc.F
    95  	q.dirPrevNorm = floats.Norm(dir, 2)
    96  	q.projGradPrev = floats.Dot(loc.Gradient, dir)
    97  	q.xPrev = resize(q.xPrev, len(loc.X))
    98  	copy(q.xPrev, loc.X)
    99  	return stepSize
   100  }
   101  
   102  func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
   103  	stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm
   104  	projGrad := floats.Dot(loc.Gradient, dir)
   105  
   106  	stepSize = 2 * stepSizePrev
   107  	if !scalar.EqualWithinRel(q.fPrev, loc.F, q.Threshold) {
   108  		// Two consecutive function values are not relatively equal, so
   109  		// computing the minimum of a quadratic interpolant might make sense
   110  
   111  		df := (loc.F - q.fPrev) / stepSizePrev
   112  		quadTest := df - q.projGradPrev
   113  		if quadTest > 0 {
   114  			// There is a chance of approximating the function well by a
   115  			// quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev
   116  			// is larger than ∇f_{k-1}⋅p_{k-1}
   117  
   118  			// Set the step size to the minimizer of the quadratic function that
   119  			// interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k
   120  			stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2
   121  		}
   122  	}
   123  	// Bound the step size to lie in [MinStepSize, MaxStepSize]
   124  	stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize))
   125  
   126  	q.fPrev = loc.F
   127  	q.dirPrevNorm = floats.Norm(dir, 2)
   128  	q.projGradPrev = projGrad
   129  	copy(q.xPrev, loc.X)
   130  	return stepSize
   131  }
   132  
   133  // FirstOrderStepSize estimates the initial line search step size based on the
   134  // assumption that the first-order change in the function will be the same as
   135  // that obtained at the previous iteration. That is, the initial step size s^0_k
   136  // is chosen so that
   137  //   s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1}
   138  // This is useful for line search methods that do not produce well-scaled
   139  // descent directions, such as gradient descent or conjugate gradient methods.
   140  type FirstOrderStepSize struct {
   141  	// InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞.
   142  	// If InitialStepFactor is zero, it will be set to one.
   143  	InitialStepFactor float64
   144  	// MinStepSize is the lower bound on the estimated step size.
   145  	// MinStepSize times GradientAbsTol should always be greater than machine epsilon.
   146  	// If MinStepSize is zero, it will be set to 1e-3.
   147  	MinStepSize float64
   148  	// MaxStepSize is the upper bound on the estimated step size.
   149  	// If MaxStepSize is zero, it will be set to 1.
   150  	MaxStepSize float64
   151  
   152  	dirPrevNorm  float64
   153  	projGradPrev float64
   154  	xPrev        []float64
   155  }
   156  
   157  func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) {
   158  	if fo.InitialStepFactor == 0 {
   159  		fo.InitialStepFactor = initialStepFactor
   160  	}
   161  	if fo.MinStepSize == 0 {
   162  		fo.MinStepSize = firstOrderMinimumStepSize
   163  	}
   164  	if fo.MaxStepSize == 0 {
   165  		fo.MaxStepSize = firstOrderMaximumStepSize
   166  	}
   167  	if fo.MaxStepSize <= fo.MinStepSize {
   168  		panic("optimize: MinStepSize not smaller than MaxStepSize")
   169  	}
   170  
   171  	gNorm := floats.Norm(loc.Gradient, math.Inf(1))
   172  	stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize))
   173  
   174  	fo.dirPrevNorm = floats.Norm(dir, 2)
   175  	fo.projGradPrev = floats.Dot(loc.Gradient, dir)
   176  	fo.xPrev = resize(fo.xPrev, len(loc.X))
   177  	copy(fo.xPrev, loc.X)
   178  	return stepSize
   179  }
   180  
   181  func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) {
   182  	stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm
   183  	projGrad := floats.Dot(loc.Gradient, dir)
   184  
   185  	stepSize = stepSizePrev * fo.projGradPrev / projGrad
   186  	stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize))
   187  
   188  	fo.dirPrevNorm = floats.Norm(dir, 2)
   189  	fo.projGradPrev = floats.Dot(loc.Gradient, dir)
   190  	copy(fo.xPrev, loc.X)
   191  	return stepSize
   192  }