github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/optimize/stepsizers.go (about) 1 // Copyright ©2014 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package optimize 6 7 import ( 8 "math" 9 10 "github.com/jingcheng-WU/gonum/floats" 11 "github.com/jingcheng-WU/gonum/floats/scalar" 12 ) 13 14 const ( 15 initialStepFactor = 1 16 17 quadraticMinimumStepSize = 1e-3 18 quadraticMaximumStepSize = 1 19 quadraticThreshold = 1e-12 20 21 firstOrderMinimumStepSize = quadraticMinimumStepSize 22 firstOrderMaximumStepSize = quadraticMaximumStepSize 23 ) 24 25 var ( 26 _ StepSizer = ConstantStepSize{} 27 _ StepSizer = (*QuadraticStepSize)(nil) 28 _ StepSizer = (*FirstOrderStepSize)(nil) 29 ) 30 31 // ConstantStepSize is a StepSizer that returns the same step size for 32 // every iteration. 33 type ConstantStepSize struct { 34 Size float64 35 } 36 37 func (c ConstantStepSize) Init(_ *Location, _ []float64) float64 { 38 return c.Size 39 } 40 41 func (c ConstantStepSize) StepSize(_ *Location, _ []float64) float64 { 42 return c.Size 43 } 44 45 // QuadraticStepSize estimates the initial line search step size as the minimum 46 // of a quadratic that interpolates f(x_{k-1}), f(x_k) and ∇f_k⋅p_k. 47 // This is useful for line search methods that do not produce well-scaled 48 // descent directions, such as gradient descent or conjugate gradient methods. 49 // The step size is bounded away from zero. 50 type QuadraticStepSize struct { 51 // Threshold determines that the initial step size should be estimated by 52 // quadratic interpolation when the relative change in the objective 53 // function is larger than Threshold. Otherwise the initial step size is 54 // set to 2*previous step size. 55 // If Threshold is zero, it will be set to 1e-12. 56 Threshold float64 57 // InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞. 58 // If InitialStepFactor is zero, it will be set to one. 59 InitialStepFactor float64 60 // MinStepSize is the lower bound on the estimated step size. 61 // MinStepSize times GradientAbsTol should always be greater than machine epsilon. 62 // If MinStepSize is zero, it will be set to 1e-3. 63 MinStepSize float64 64 // MaxStepSize is the upper bound on the estimated step size. 65 // If MaxStepSize is zero, it will be set to 1. 66 MaxStepSize float64 67 68 fPrev float64 69 dirPrevNorm float64 70 projGradPrev float64 71 xPrev []float64 72 } 73 74 func (q *QuadraticStepSize) Init(loc *Location, dir []float64) (stepSize float64) { 75 if q.Threshold == 0 { 76 q.Threshold = quadraticThreshold 77 } 78 if q.InitialStepFactor == 0 { 79 q.InitialStepFactor = initialStepFactor 80 } 81 if q.MinStepSize == 0 { 82 q.MinStepSize = quadraticMinimumStepSize 83 } 84 if q.MaxStepSize == 0 { 85 q.MaxStepSize = quadraticMaximumStepSize 86 } 87 if q.MaxStepSize <= q.MinStepSize { 88 panic("optimize: MinStepSize not smaller than MaxStepSize") 89 } 90 91 gNorm := floats.Norm(loc.Gradient, math.Inf(1)) 92 stepSize = math.Max(q.MinStepSize, math.Min(q.InitialStepFactor/gNorm, q.MaxStepSize)) 93 94 q.fPrev = loc.F 95 q.dirPrevNorm = floats.Norm(dir, 2) 96 q.projGradPrev = floats.Dot(loc.Gradient, dir) 97 q.xPrev = resize(q.xPrev, len(loc.X)) 98 copy(q.xPrev, loc.X) 99 return stepSize 100 } 101 102 func (q *QuadraticStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) { 103 stepSizePrev := floats.Distance(loc.X, q.xPrev, 2) / q.dirPrevNorm 104 projGrad := floats.Dot(loc.Gradient, dir) 105 106 stepSize = 2 * stepSizePrev 107 if !scalar.EqualWithinRel(q.fPrev, loc.F, q.Threshold) { 108 // Two consecutive function values are not relatively equal, so 109 // computing the minimum of a quadratic interpolant might make sense 110 111 df := (loc.F - q.fPrev) / stepSizePrev 112 quadTest := df - q.projGradPrev 113 if quadTest > 0 { 114 // There is a chance of approximating the function well by a 115 // quadratic only if the finite difference (f_k-f_{k-1})/stepSizePrev 116 // is larger than ∇f_{k-1}⋅p_{k-1} 117 118 // Set the step size to the minimizer of the quadratic function that 119 // interpolates f_{k-1}, ∇f_{k-1}⋅p_{k-1} and f_k 120 stepSize = -q.projGradPrev * stepSizePrev / quadTest / 2 121 } 122 } 123 // Bound the step size to lie in [MinStepSize, MaxStepSize] 124 stepSize = math.Max(q.MinStepSize, math.Min(stepSize, q.MaxStepSize)) 125 126 q.fPrev = loc.F 127 q.dirPrevNorm = floats.Norm(dir, 2) 128 q.projGradPrev = projGrad 129 copy(q.xPrev, loc.X) 130 return stepSize 131 } 132 133 // FirstOrderStepSize estimates the initial line search step size based on the 134 // assumption that the first-order change in the function will be the same as 135 // that obtained at the previous iteration. That is, the initial step size s^0_k 136 // is chosen so that 137 // s^0_k ∇f_k⋅p_k = s_{k-1} ∇f_{k-1}⋅p_{k-1} 138 // This is useful for line search methods that do not produce well-scaled 139 // descent directions, such as gradient descent or conjugate gradient methods. 140 type FirstOrderStepSize struct { 141 // InitialStepFactor sets the step size for the first iteration to be InitialStepFactor / |g|_∞. 142 // If InitialStepFactor is zero, it will be set to one. 143 InitialStepFactor float64 144 // MinStepSize is the lower bound on the estimated step size. 145 // MinStepSize times GradientAbsTol should always be greater than machine epsilon. 146 // If MinStepSize is zero, it will be set to 1e-3. 147 MinStepSize float64 148 // MaxStepSize is the upper bound on the estimated step size. 149 // If MaxStepSize is zero, it will be set to 1. 150 MaxStepSize float64 151 152 dirPrevNorm float64 153 projGradPrev float64 154 xPrev []float64 155 } 156 157 func (fo *FirstOrderStepSize) Init(loc *Location, dir []float64) (stepSize float64) { 158 if fo.InitialStepFactor == 0 { 159 fo.InitialStepFactor = initialStepFactor 160 } 161 if fo.MinStepSize == 0 { 162 fo.MinStepSize = firstOrderMinimumStepSize 163 } 164 if fo.MaxStepSize == 0 { 165 fo.MaxStepSize = firstOrderMaximumStepSize 166 } 167 if fo.MaxStepSize <= fo.MinStepSize { 168 panic("optimize: MinStepSize not smaller than MaxStepSize") 169 } 170 171 gNorm := floats.Norm(loc.Gradient, math.Inf(1)) 172 stepSize = math.Max(fo.MinStepSize, math.Min(fo.InitialStepFactor/gNorm, fo.MaxStepSize)) 173 174 fo.dirPrevNorm = floats.Norm(dir, 2) 175 fo.projGradPrev = floats.Dot(loc.Gradient, dir) 176 fo.xPrev = resize(fo.xPrev, len(loc.X)) 177 copy(fo.xPrev, loc.X) 178 return stepSize 179 } 180 181 func (fo *FirstOrderStepSize) StepSize(loc *Location, dir []float64) (stepSize float64) { 182 stepSizePrev := floats.Distance(loc.X, fo.xPrev, 2) / fo.dirPrevNorm 183 projGrad := floats.Dot(loc.Gradient, dir) 184 185 stepSize = stepSizePrev * fo.projGradPrev / projGrad 186 stepSize = math.Max(fo.MinStepSize, math.Min(stepSize, fo.MaxStepSize)) 187 188 fo.dirPrevNorm = floats.Norm(dir, 2) 189 fo.projGradPrev = floats.Dot(loc.Gradient, dir) 190 copy(fo.xPrev, loc.X) 191 return stepSize 192 }