github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/worker/leadership/tracker.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package leadership 5 6 import ( 7 "time" 8 9 "github.com/juju/errors" 10 "github.com/juju/loggo" 11 "github.com/juju/names" 12 "launchpad.net/tomb" 13 14 "github.com/juju/juju/leadership" 15 ) 16 17 var logger = loggo.GetLogger("juju.worker.leadership") 18 19 // tracker implements TrackerWorker. 20 type tracker struct { 21 tomb tomb.Tomb 22 claimer leadership.Claimer 23 unitName string 24 serviceName string 25 duration time.Duration 26 isMinion bool 27 28 claimLease chan struct{} 29 renewLease <-chan time.Time 30 claimTickets chan chan bool 31 waitLeaderTickets chan chan bool 32 waitMinionTickets chan chan bool 33 waitingLeader []chan bool 34 waitingMinion []chan bool 35 } 36 37 // NewTrackerWorker returns a TrackerWorker that attempts to claim and retain 38 // service leadership for the supplied unit. It will claim leadership for twice 39 // the supplied duration, and once it's leader it will renew leadership every 40 // time the duration elapses. 41 // Thus, successful leadership claims on the resulting Tracker will guarantee 42 // leadership for the duration supplied here without generating additional calls 43 // to the supplied manager (which may very well be on the other side of a 44 // network connection). 45 func NewTrackerWorker(tag names.UnitTag, claimer leadership.Claimer, duration time.Duration) TrackerWorker { 46 unitName := tag.Id() 47 serviceName, _ := names.UnitService(unitName) 48 t := &tracker{ 49 unitName: unitName, 50 serviceName: serviceName, 51 claimer: claimer, 52 duration: duration, 53 claimTickets: make(chan chan bool), 54 waitLeaderTickets: make(chan chan bool), 55 waitMinionTickets: make(chan chan bool), 56 } 57 go func() { 58 defer t.tomb.Done() 59 defer func() { 60 for _, ticketCh := range t.waitingLeader { 61 close(ticketCh) 62 } 63 for _, ticketCh := range t.waitingMinion { 64 close(ticketCh) 65 } 66 }() 67 err := t.loop() 68 // TODO: jam 2015-04-02 is this the most elegant way to make 69 // sure we shutdown cleanly? Essentially the lowest level sees 70 // that we are dying, and propagates an ErrDying up to us so 71 // that we shut down, which we then are passing back into 72 // Tomb.Kill(). 73 // Tomb.Kill() special cases the exact object ErrDying, and has 74 // no idea about errors.Cause and the general errors.Trace 75 // mechanisms that we use. 76 // So we explicitly unwrap before calling tomb.Kill() else 77 // tomb.Stop() thinks that we have a genuine error. 78 switch cause := errors.Cause(err); cause { 79 case tomb.ErrDying: 80 err = cause 81 } 82 t.tomb.Kill(err) 83 }() 84 return t 85 } 86 87 // Kill is part of the worker.Worker interface. 88 func (t *tracker) Kill() { 89 t.tomb.Kill(nil) 90 } 91 92 // Wait is part of the worker.Worker interface. 93 func (t *tracker) Wait() error { 94 return t.tomb.Wait() 95 } 96 97 // ServiceName is part of the Tracker interface. 98 func (t *tracker) ServiceName() string { 99 return t.serviceName 100 } 101 102 // ClaimDuration is part of the Tracker interface. 103 func (t *tracker) ClaimDuration() time.Duration { 104 return t.duration 105 } 106 107 // ClaimLeader is part of the Tracker interface. 108 func (t *tracker) ClaimLeader() Ticket { 109 return t.submit(t.claimTickets) 110 } 111 112 // WaitLeader is part of the Tracker interface. 113 func (t *tracker) WaitLeader() Ticket { 114 return t.submit(t.waitLeaderTickets) 115 } 116 117 // WaitMinion is part of the Tracker interface. 118 func (t *tracker) WaitMinion() Ticket { 119 return t.submit(t.waitMinionTickets) 120 } 121 122 func (t *tracker) loop() error { 123 logger.Debugf("%s making initial claim for %s leadership", t.unitName, t.serviceName) 124 if err := t.refresh(); err != nil { 125 return errors.Trace(err) 126 } 127 for { 128 select { 129 case <-t.tomb.Dying(): 130 return tomb.ErrDying 131 case <-t.claimLease: 132 logger.Debugf("%s claiming lease for %s leadership", t.unitName, t.serviceName) 133 t.claimLease = nil 134 if err := t.refresh(); err != nil { 135 return errors.Trace(err) 136 } 137 case <-t.renewLease: 138 logger.Debugf("%s renewing lease for %s leadership", t.unitName, t.serviceName) 139 t.renewLease = nil 140 if err := t.refresh(); err != nil { 141 return errors.Trace(err) 142 } 143 case ticketCh := <-t.claimTickets: 144 logger.Debugf("%s got claim request for %s leadership", t.unitName, t.serviceName) 145 if err := t.resolveClaim(ticketCh); err != nil { 146 return errors.Trace(err) 147 } 148 case ticketCh := <-t.waitLeaderTickets: 149 logger.Debugf("%s got wait request for %s leadership", t.unitName, t.serviceName) 150 if err := t.resolveWaitLeader(ticketCh); err != nil { 151 return errors.Trace(err) 152 } 153 case ticketCh := <-t.waitMinionTickets: 154 logger.Debugf("%s got wait request for %s leadership loss", t.unitName, t.serviceName) 155 if err := t.resolveWaitMinion(ticketCh); err != nil { 156 return errors.Trace(err) 157 } 158 } 159 } 160 } 161 162 // refresh makes a leadership request, and updates tracker state to conform to 163 // latest known reality. 164 func (t *tracker) refresh() error { 165 logger.Debugf("checking %s for %s leadership", t.unitName, t.serviceName) 166 leaseDuration := 2 * t.duration 167 untilTime := time.Now().Add(leaseDuration) 168 err := t.claimer.ClaimLeadership(t.serviceName, t.unitName, leaseDuration) 169 switch { 170 case err == nil: 171 return t.setLeader(untilTime) 172 case errors.Cause(err) == leadership.ErrClaimDenied: 173 return t.setMinion() 174 } 175 return errors.Annotatef(err, "leadership failure") 176 } 177 178 // setLeader arranges for lease renewal. 179 func (t *tracker) setLeader(untilTime time.Time) error { 180 logger.Debugf("%s confirmed for %s leadership until %s", t.unitName, t.serviceName, untilTime) 181 renewTime := untilTime.Add(-t.duration) 182 logger.Infof("%s will renew %s leadership at %s", t.unitName, t.serviceName, renewTime) 183 t.isMinion = false 184 t.claimLease = nil 185 t.renewLease = time.After(renewTime.Sub(time.Now())) 186 187 for len(t.waitingLeader) > 0 { 188 logger.Debugf("notifying %s ticket of impending %s leadership", t.unitName, t.serviceName) 189 var ticketCh chan bool 190 ticketCh, t.waitingLeader = t.waitingLeader[0], t.waitingLeader[1:] 191 defer close(ticketCh) 192 if err := t.sendTrue(ticketCh); err != nil { 193 return errors.Trace(err) 194 } 195 } 196 return nil 197 } 198 199 // setMinion arranges for lease acquisition when there's an opportunity. 200 func (t *tracker) setMinion() error { 201 logger.Infof("%s leadership for %s denied", t.serviceName, t.unitName) 202 t.isMinion = true 203 t.renewLease = nil 204 if t.claimLease == nil { 205 t.claimLease = make(chan struct{}) 206 go func() { 207 defer close(t.claimLease) 208 logger.Debugf("%s waiting for %s leadership release", t.unitName, t.serviceName) 209 err := t.claimer.BlockUntilLeadershipReleased(t.serviceName) 210 if err != nil { 211 logger.Warningf("error while %s waiting for %s leadership release: %v", t.unitName, t.serviceName, err) 212 } 213 // We don't need to do anything else with the error, because we just 214 // close the claimLease channel and trigger a leadership claim on the 215 // main loop; if anything's gone seriously wrong we'll find out right 216 // away and shut down anyway. (And if this goroutine outlives the 217 // tracker, it keeps it around as a zombie, but I don't see a way 218 // around that...) 219 }() 220 } 221 222 for len(t.waitingMinion) > 0 { 223 logger.Debugf("notifying %s ticket of impending loss of %s leadership", t.unitName, t.serviceName) 224 var ticketCh chan bool 225 ticketCh, t.waitingMinion = t.waitingMinion[0], t.waitingMinion[1:] 226 defer close(ticketCh) 227 if err := t.sendTrue(ticketCh); err != nil { 228 return errors.Trace(err) 229 } 230 } 231 return nil 232 } 233 234 // isLeader returns true if leadership is guaranteed for the tracker's duration. 235 func (t *tracker) isLeader() (bool, error) { 236 if !t.isMinion { 237 // Last time we looked, we were leader. 238 select { 239 case <-t.tomb.Dying(): 240 return false, errors.Trace(tomb.ErrDying) 241 case <-t.renewLease: 242 logger.Debugf("%s renewing lease for %s leadership", t.unitName, t.serviceName) 243 t.renewLease = nil 244 if err := t.refresh(); err != nil { 245 return false, errors.Trace(err) 246 } 247 default: 248 logger.Debugf("%s still has %s leadership", t.unitName, t.serviceName) 249 } 250 } 251 return !t.isMinion, nil 252 } 253 254 // resolveClaim will send true on the supplied channel if leadership can be 255 // successfully verified, and will always close it whether or not it sent. 256 func (t *tracker) resolveClaim(ticketCh chan bool) error { 257 logger.Debugf("resolving %s leadership ticket for %s...", t.serviceName, t.unitName) 258 defer close(ticketCh) 259 if leader, err := t.isLeader(); err != nil { 260 return errors.Trace(err) 261 } else if !leader { 262 logger.Debugf("%s is not %s leader", t.unitName, t.serviceName) 263 return nil 264 } 265 logger.Debugf("confirming %s leadership for %s", t.serviceName, t.unitName) 266 return t.sendTrue(ticketCh) 267 } 268 269 // resolveWaitLeader will send true on the supplied channel if leadership can be 270 // guaranteed for the tracker's duration. It will then close the channel. If 271 // leadership cannot be guaranteed, the channel is left untouched until either 272 // the termination of the tracker or the next invocation of setLeader; at which 273 // point true is sent if applicable, and the channel is closed. 274 func (t *tracker) resolveWaitLeader(ticketCh chan bool) error { 275 var dontClose bool 276 defer func() { 277 if !dontClose { 278 close(ticketCh) 279 } 280 }() 281 282 if leader, err := t.isLeader(); err != nil { 283 return errors.Trace(err) 284 } else if leader { 285 logger.Debugf("reporting %s leadership for %s", t.serviceName, t.unitName) 286 return t.sendTrue(ticketCh) 287 } 288 289 logger.Debugf("waiting for %s to attain %s leadership", t.unitName, t.serviceName) 290 t.waitingLeader = append(t.waitingLeader, ticketCh) 291 dontClose = true 292 return nil 293 } 294 295 // resolveWaitMinion will close the supplied channel as soon as leadership cannot 296 // be guaranteed beyond the tracker's duration. 297 func (t *tracker) resolveWaitMinion(ticketCh chan bool) error { 298 var dontClose bool 299 defer func() { 300 if !dontClose { 301 close(ticketCh) 302 } 303 }() 304 305 if leader, err := t.isLeader(); err != nil { 306 return errors.Trace(err) 307 } else if leader { 308 logger.Debugf("waiting for %s to lose %s leadership", t.unitName, t.serviceName) 309 t.waitingMinion = append(t.waitingMinion, ticketCh) 310 dontClose = true 311 } else { 312 logger.Debugf("reporting %s leadership loss for %s", t.serviceName, t.unitName) 313 } 314 return nil 315 316 } 317 318 func (t *tracker) sendTrue(ticketCh chan bool) error { 319 select { 320 case <-t.tomb.Dying(): 321 return tomb.ErrDying 322 case ticketCh <- true: 323 return nil 324 } 325 } 326 327 func (t *tracker) submit(tickets chan chan bool) Ticket { 328 ticketCh := make(chan bool, 1) 329 select { 330 case <-t.tomb.Dying(): 331 close(ticketCh) 332 case tickets <- ticketCh: 333 } 334 ticket := &ticket{ 335 ch: ticketCh, 336 ready: make(chan struct{}), 337 } 338 go ticket.run() 339 return ticket 340 } 341 342 // ticket is used with tracker to communicate leadership status back to a client. 343 type ticket struct { 344 ch chan bool 345 ready chan struct{} 346 success bool 347 } 348 349 func (t *ticket) run() { 350 defer close(t.ready) 351 // This is only safe/sane because the tracker promises to close all pending 352 // ticket channels when it shuts down. 353 if <-t.ch { 354 t.success = true 355 } 356 } 357 358 // Ready is part of the Ticket interface. 359 func (t *ticket) Ready() <-chan struct{} { 360 return t.ready 361 } 362 363 // Wait is part of the Ticket interface. 364 func (t *ticket) Wait() bool { 365 <-t.ready 366 return t.success 367 }