github.com/letsencrypt/trillian@v1.1.2-0.20180615153820-ae375a99d36a/util/election/runner.go (about) 1 // Copyright 2017 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package election 16 17 import ( 18 "context" 19 "math/rand" 20 "time" 21 22 "github.com/golang/glog" 23 "github.com/google/trillian/util" 24 ) 25 26 // Minimum values for configuration intervals. 27 const ( 28 MinPreElectionPause = 10 * time.Millisecond 29 MinMasterCheckInterval = 50 * time.Millisecond 30 MinMasterHoldInterval = 10 * time.Second 31 ) 32 33 // RunnerConfig describes the parameters for an election Runner. 34 type RunnerConfig struct { 35 // PreElectionPause is the maximum interval to wait before starting a 36 // mastership election for a particular log. 37 PreElectionPause time.Duration 38 // MasterCheckInterval is the interval between checks that we still 39 // hold mastership for a log. 40 MasterCheckInterval time.Duration 41 // MasterHoldInterval is the minimum interval to hold mastership for. 42 MasterHoldInterval time.Duration 43 // ResignOdds gives the chance of resigning mastership after each 44 // check interval, as the N for 1-in-N. 45 ResignOdds int 46 47 TimeSource util.TimeSource 48 } 49 50 // fixupRunnerConfig ensures operation parameters have required minimum values. 51 func fixupRunnerConfig(cfg *RunnerConfig) { 52 if cfg.PreElectionPause < MinPreElectionPause { 53 cfg.PreElectionPause = MinPreElectionPause 54 } 55 if cfg.MasterCheckInterval < MinMasterCheckInterval { 56 cfg.MasterCheckInterval = MinMasterCheckInterval 57 } 58 if cfg.MasterHoldInterval < MinMasterHoldInterval { 59 cfg.MasterHoldInterval = MinMasterHoldInterval 60 } 61 if cfg.ResignOdds < 1 { 62 cfg.ResignOdds = 1 63 } 64 if cfg.TimeSource == nil { 65 cfg.TimeSource = util.SystemTimeSource{} 66 } 67 } 68 69 // Runner controls a continuous election process. 70 type Runner struct { 71 // Allow the user to store a Cancel function with the runner for convenience. 72 Cancel context.CancelFunc 73 id int64 74 cfg *RunnerConfig 75 tracker *MasterTracker 76 election MasterElection 77 } 78 79 // NewRunner builds a new election Runner instance with the given configuration. On calling 80 // Run(), the provided election will be continuously monitored and mastership changes will 81 // be notified to the provided MasterTracker instance. 82 func NewRunner(id int64, cfg *RunnerConfig, tracker *MasterTracker, cancel context.CancelFunc, el MasterElection) *Runner { 83 fixupRunnerConfig(cfg) 84 return &Runner{ 85 Cancel: cancel, 86 id: id, 87 cfg: cfg, 88 tracker: tracker, 89 election: el, 90 } 91 } 92 93 // Run performs a continuous election process. It runs continuously until the 94 // context is canceled or an internal error is encountered. 95 func (er *Runner) Run(ctx context.Context, pending chan<- Resignation) { 96 // Pause for a random interval so that if multiple instances start at the same 97 // time there is less of a thundering herd. 98 pause := rand.Int63n(er.cfg.PreElectionPause.Nanoseconds()) 99 if err := util.SleepContext(ctx, time.Duration(pause)); err != nil { 100 return 101 } 102 103 glog.V(1).Infof("%d: start election-monitoring loop ", er.id) 104 if err := er.election.Start(ctx); err != nil { 105 glog.Errorf("%d: election.Start() failed: %v", er.id, err) 106 return 107 } 108 defer func(ctx context.Context, er *Runner) { 109 glog.Infof("%d: shutdown election-monitoring loop", er.id) 110 er.election.Close(ctx) 111 }(ctx, er) 112 113 for { 114 glog.V(1).Infof("%d: When I left you, I was but the learner", er.id) 115 if err := er.election.WaitForMastership(ctx); err != nil { 116 glog.Errorf("%d: er.election.WaitForMastership() failed: %v", er.id, err) 117 return 118 } 119 glog.V(1).Infof("%d: Now, I am the master", er.id) 120 er.tracker.Set(er.id, true) 121 masterSince := er.cfg.TimeSource.Now() 122 123 // While-master loop 124 for { 125 if err := util.SleepContext(ctx, er.cfg.MasterCheckInterval); err != nil { 126 glog.Infof("%d: termination requested", er.id) 127 return 128 } 129 master, err := er.election.IsMaster(ctx) 130 if err != nil { 131 glog.Errorf("%d: failed to check mastership status", er.id) 132 break 133 } 134 if !master { 135 glog.Errorf("%d: no longer the master!", er.id) 136 er.tracker.Set(er.id, false) 137 break 138 } 139 if er.ShouldResign(masterSince) { 140 glog.Infof("%d: queue up resignation of mastership", er.id) 141 er.tracker.Set(er.id, false) 142 143 done := make(chan bool) 144 r := Resignation{ID: er.id, er: er, done: done} 145 pending <- r 146 <-done // block until acted on 147 break // no longer master 148 } 149 } 150 } 151 } 152 153 // ShouldResign randomly decides whether this runner should resign mastership. 154 func (er *Runner) ShouldResign(masterSince time.Time) bool { 155 now := er.cfg.TimeSource.Now() 156 duration := now.Sub(masterSince) 157 if duration < er.cfg.MasterHoldInterval { 158 // Always hold onto mastership for a minimum interval to prevent churn. 159 return false 160 } 161 // Roll the bones. 162 odds := er.cfg.ResignOdds 163 if odds <= 0 { 164 return true 165 } 166 return rand.Intn(er.cfg.ResignOdds) == 0 167 } 168 169 // Resignation indicates that a master should explicitly resign mastership, by invoking 170 // the Execute() method at a point where no master-related activity is ongoing. 171 type Resignation struct { 172 ID int64 173 er *Runner 174 done chan<- bool 175 } 176 177 // Execute performs the pending deliberate resignation for an election runner. 178 func (r *Resignation) Execute(ctx context.Context) { 179 glog.Infof("%d: deliberately resigning mastership", r.er.id) 180 if err := r.er.election.Resign(ctx); err != nil { 181 glog.Errorf("%d: failed to resign mastership: %v", r.er.id, err) 182 } 183 if err := r.er.election.Start(ctx); err != nil { 184 glog.Errorf("%d: failed to restart election: %v", r.er.id, err) 185 } 186 r.done <- true 187 }