github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/leadership/tracker.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package leadership 5 6 import ( 7 "time" 8 9 "github.com/juju/clock" 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 "gopkg.in/juju/names.v2" 13 "gopkg.in/tomb.v2" 14 15 "github.com/juju/juju/core/leadership" 16 ) 17 18 var logger = loggo.GetLogger("juju.worker.leadership") 19 20 type Tracker struct { 21 tomb tomb.Tomb 22 claimer leadership.Claimer 23 unitName string 24 applicationName string 25 clock clock.Clock 26 duration time.Duration 27 isMinion bool 28 29 claimLease chan error 30 renewLease <-chan time.Time 31 claimTickets chan chan bool 32 waitLeaderTickets chan chan bool 33 waitMinionTickets chan chan bool 34 waitingLeader []chan bool 35 waitingMinion []chan bool 36 } 37 38 // NewTracker returns a *Tracker that attempts to claim and retain service 39 // leadership for the supplied unit. It will claim leadership for twice the 40 // supplied duration, and once it's leader it will renew leadership every 41 // time the duration elapses. 42 // Thus, successful leadership claims on the resulting Tracker will guarantee 43 // leadership for the duration supplied here without generating additional 44 // calls to the supplied manager (which may very well be on the other side of 45 // a network connection). 46 func NewTracker(tag names.UnitTag, claimer leadership.Claimer, clock clock.Clock, duration time.Duration) *Tracker { 47 unitName := tag.Id() 48 serviceName, _ := names.UnitApplication(unitName) 49 t := &Tracker{ 50 unitName: unitName, 51 applicationName: serviceName, 52 claimer: claimer, 53 clock: clock, 54 duration: duration, 55 claimTickets: make(chan chan bool), 56 waitLeaderTickets: make(chan chan bool), 57 waitMinionTickets: make(chan chan bool), 58 isMinion: true, 59 } 60 t.tomb.Go(func() error { 61 defer func() { 62 for _, ticketCh := range t.waitingLeader { 63 close(ticketCh) 64 } 65 for _, ticketCh := range t.waitingMinion { 66 close(ticketCh) 67 } 68 if t.claimLease != nil { 69 // wait for the goroutine started 70 // by setLeader to exit. 71 <-t.claimLease 72 } 73 }() 74 err := t.loop() 75 // TODO: jam 2015-04-02 is this the most elegant way to make 76 // sure we shutdown cleanly? Essentially the lowest level sees 77 // that we are dying, and propagates an ErrDying up to us so 78 // that we shut down, which we then are passing back into 79 // Tomb.Kill(). 80 // Tomb.Kill() special cases the exact object ErrDying, and has 81 // no idea about errors.Cause and the general errors.Trace 82 // mechanisms that we use. 83 // So we explicitly unwrap before calling tomb.Kill() else 84 // tomb.Stop() thinks that we have a genuine error. 85 switch cause := errors.Cause(err); cause { 86 case tomb.ErrDying: 87 err = cause 88 } 89 return err 90 }) 91 return t 92 } 93 94 // Kill is part of the worker.Worker interface. 95 func (t *Tracker) Kill() { 96 t.tomb.Kill(nil) 97 } 98 99 // Wait is part of the worker.Worker interface. 100 func (t *Tracker) Wait() error { 101 return t.tomb.Wait() 102 } 103 104 // ApplicationName is part of the leadership.Tracker interface. 105 func (t *Tracker) ApplicationName() string { 106 return t.applicationName 107 } 108 109 // ClaimDuration is part of the leadership.Tracker interface. 110 func (t *Tracker) ClaimDuration() time.Duration { 111 return t.duration 112 } 113 114 // ClaimLeader is part of the leadership.Tracker interface. 115 func (t *Tracker) ClaimLeader() leadership.Ticket { 116 return t.submit(t.claimTickets) 117 } 118 119 // WaitLeader is part of the leadership.Tracker interface. 120 func (t *Tracker) WaitLeader() leadership.Ticket { 121 return t.submit(t.waitLeaderTickets) 122 } 123 124 // WaitMinion is part of the leadership.Tracker interface. 125 func (t *Tracker) WaitMinion() leadership.Ticket { 126 return t.submit(t.waitMinionTickets) 127 } 128 129 func (t *Tracker) loop() error { 130 logger.Debugf("%s making initial claim for %s leadership", t.unitName, t.applicationName) 131 if err := t.refresh(); err != nil { 132 return errors.Trace(err) 133 } 134 for { 135 select { 136 case <-t.tomb.Dying(): 137 return tomb.ErrDying 138 case err := <-t.claimLease: 139 t.claimLease = nil 140 if errors.Cause(err) == leadership.ErrBlockCancelled { 141 // BlockUntilLeadershipReleased was cancelled, 142 // which means that the tracker is terminating. 143 continue 144 } else if err != nil { 145 return errors.Annotatef(err, 146 "error while %s waiting for %s leadership release", 147 t.unitName, t.applicationName, 148 ) 149 } 150 logger.Tracef("%s claiming lease for %s leadership", t.unitName, t.applicationName) 151 if err := t.refresh(); err != nil { 152 return errors.Trace(err) 153 } 154 case <-t.renewLease: 155 logger.Tracef("%s renewing lease for %s leadership", t.unitName, t.applicationName) 156 t.renewLease = nil 157 if err := t.refresh(); err != nil { 158 return errors.Trace(err) 159 } 160 case ticketCh := <-t.claimTickets: 161 logger.Tracef("%s got claim request for %s leadership", t.unitName, t.applicationName) 162 if err := t.resolveClaim(ticketCh); err != nil { 163 return errors.Trace(err) 164 } 165 case ticketCh := <-t.waitLeaderTickets: 166 logger.Tracef("%s got wait request for %s leadership", t.unitName, t.applicationName) 167 if err := t.resolveWaitLeader(ticketCh); err != nil { 168 return errors.Trace(err) 169 } 170 case ticketCh := <-t.waitMinionTickets: 171 logger.Tracef("%s got wait request for %s leadership loss", t.unitName, t.applicationName) 172 if err := t.resolveWaitMinion(ticketCh); err != nil { 173 return errors.Trace(err) 174 } 175 } 176 } 177 } 178 179 // refresh makes a leadership request, and updates Tracker state to conform to 180 // latest known reality. 181 func (t *Tracker) refresh() error { 182 logger.Tracef("checking %s for %s leadership", t.unitName, t.applicationName) 183 leaseDuration := 2 * t.duration 184 untilTime := t.clock.Now().Add(leaseDuration) 185 err := t.claimer.ClaimLeadership(t.applicationName, t.unitName, leaseDuration) 186 switch { 187 case err == nil: 188 return t.setLeader(untilTime) 189 case errors.Cause(err) == leadership.ErrClaimDenied: 190 return t.setMinion() 191 } 192 return errors.Annotatef(err, "leadership failure") 193 } 194 195 // setLeader arranges for lease renewal. 196 func (t *Tracker) setLeader(untilTime time.Time) error { 197 if t.isMinion { 198 // If we were a minion, we're now the leader, so we can record the transition. 199 logger.Infof("%s promoted to leadership of %s", t.unitName, t.applicationName) 200 } 201 logger.Tracef("%s confirmed for %s leadership until %s", t.unitName, t.applicationName, untilTime) 202 renewTime := untilTime.Add(-t.duration) 203 logger.Tracef("%s will renew %s leadership at %s", t.unitName, t.applicationName, renewTime) 204 t.isMinion = false 205 t.claimLease = nil 206 t.renewLease = t.clock.After(renewTime.Sub(t.clock.Now())) 207 208 for len(t.waitingLeader) > 0 { 209 logger.Tracef("notifying %s ticket of impending %s leadership", t.unitName, t.applicationName) 210 var ticketCh chan bool 211 ticketCh, t.waitingLeader = t.waitingLeader[0], t.waitingLeader[1:] 212 defer close(ticketCh) 213 if err := t.sendTrue(ticketCh); err != nil { 214 return errors.Trace(err) 215 } 216 } 217 return nil 218 } 219 220 // setMinion arranges for lease acquisition when there's an opportunity. 221 func (t *Tracker) setMinion() error { 222 logger.Infof("%s leadership for %s denied", t.applicationName, t.unitName) 223 t.isMinion = true 224 t.renewLease = nil 225 if t.claimLease == nil { 226 t.claimLease = make(chan error, 1) 227 go func() { 228 defer close(t.claimLease) 229 logger.Debugf("%s waiting for %s leadership release", t.unitName, t.applicationName) 230 err := t.claimer.BlockUntilLeadershipReleased(t.applicationName, t.tomb.Dying()) 231 t.claimLease <- err 232 }() 233 } 234 235 for len(t.waitingMinion) > 0 { 236 logger.Debugf("notifying %s ticket of impending loss of %s leadership", t.unitName, t.applicationName) 237 var ticketCh chan bool 238 ticketCh, t.waitingMinion = t.waitingMinion[0], t.waitingMinion[1:] 239 defer close(ticketCh) 240 if err := t.sendTrue(ticketCh); err != nil { 241 return errors.Trace(err) 242 } 243 } 244 return nil 245 } 246 247 // isLeader returns true if leadership is guaranteed for the Tracker's duration. 248 func (t *Tracker) isLeader() (bool, error) { 249 if !t.isMinion { 250 // Last time we looked, we were leader. 251 select { 252 case <-t.tomb.Dying(): 253 return false, errors.Trace(tomb.ErrDying) 254 case <-t.renewLease: 255 logger.Tracef("%s renewing lease for %s leadership", t.unitName, t.applicationName) 256 t.renewLease = nil 257 if err := t.refresh(); err != nil { 258 return false, errors.Trace(err) 259 } 260 default: 261 logger.Tracef("%s still has %s leadership", t.unitName, t.applicationName) 262 } 263 } 264 return !t.isMinion, nil 265 } 266 267 // resolveClaim will send true on the supplied channel if leadership can be 268 // successfully verified, and will always close it whether or not it sent. 269 func (t *Tracker) resolveClaim(ticketCh chan bool) error { 270 logger.Tracef("resolving %s leadership ticket for %s...", t.applicationName, t.unitName) 271 defer close(ticketCh) 272 if leader, err := t.isLeader(); err != nil { 273 return errors.Trace(err) 274 } else if !leader { 275 logger.Debugf("%s is not %s leader", t.unitName, t.applicationName) 276 return nil 277 } 278 logger.Tracef("confirming %s leadership for %s", t.applicationName, t.unitName) 279 return t.sendTrue(ticketCh) 280 } 281 282 // resolveWaitLeader will send true on the supplied channel if leadership can be 283 // guaranteed for the Tracker's duration. It will then close the channel. If 284 // leadership cannot be guaranteed, the channel is left untouched until either 285 // the termination of the Tracker or the next invocation of setLeader; at which 286 // point true is sent if applicable, and the channel is closed. 287 func (t *Tracker) resolveWaitLeader(ticketCh chan bool) error { 288 var dontClose bool 289 defer func() { 290 if !dontClose { 291 close(ticketCh) 292 } 293 }() 294 295 if leader, err := t.isLeader(); err != nil { 296 return errors.Trace(err) 297 } else if leader { 298 logger.Tracef("reporting %s leadership for %s", t.applicationName, t.unitName) 299 return t.sendTrue(ticketCh) 300 } 301 302 logger.Tracef("waiting for %s to attain %s leadership", t.unitName, t.applicationName) 303 t.waitingLeader = append(t.waitingLeader, ticketCh) 304 dontClose = true 305 return nil 306 } 307 308 // resolveWaitMinion will close the supplied channel as soon as leadership cannot 309 // be guaranteed beyond the Tracker's duration. 310 func (t *Tracker) resolveWaitMinion(ticketCh chan bool) error { 311 var dontClose bool 312 defer func() { 313 if !dontClose { 314 close(ticketCh) 315 } 316 }() 317 318 if leader, err := t.isLeader(); err != nil { 319 return errors.Trace(err) 320 } else if leader { 321 logger.Tracef("waiting for %s to lose %s leadership", t.unitName, t.applicationName) 322 t.waitingMinion = append(t.waitingMinion, ticketCh) 323 dontClose = true 324 } else { 325 logger.Tracef("reporting %s leadership loss for %s", t.applicationName, t.unitName) 326 } 327 return nil 328 329 } 330 331 func (t *Tracker) sendTrue(ticketCh chan bool) error { 332 select { 333 case <-t.tomb.Dying(): 334 return tomb.ErrDying 335 case ticketCh <- true: 336 return nil 337 } 338 } 339 340 func (t *Tracker) submit(tickets chan chan bool) leadership.Ticket { 341 ticketCh := make(chan bool, 1) 342 select { 343 case <-t.tomb.Dying(): 344 close(ticketCh) 345 case tickets <- ticketCh: 346 } 347 ticket := &ticket{ 348 ch: ticketCh, 349 ready: make(chan struct{}), 350 } 351 go ticket.run() 352 return ticket 353 } 354 355 // ticket is used by Tracker to communicate leadership status back to a client. 356 type ticket struct { 357 ch chan bool 358 ready chan struct{} 359 success bool 360 } 361 362 func (t *ticket) run() { 363 defer close(t.ready) 364 // This is only safe/sane because the Tracker promises to close all pending 365 // ticket channels when it shuts down. 366 if <-t.ch { 367 t.success = true 368 } 369 } 370 371 // Ready is part of the leadership.Ticket interface. 372 func (t *ticket) Ready() <-chan struct{} { 373 return t.ready 374 } 375 376 // Wait is part of the leadership.Ticket interface. 377 func (t *ticket) Wait() bool { 378 <-t.ready 379 return t.success 380 }