github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/leadership/tracker.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package leadership 5 6 import ( 7 "time" 8 9 "github.com/juju/clock" 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 "github.com/juju/names/v5" 13 "gopkg.in/tomb.v2" 14 15 "github.com/juju/juju/core/leadership" 16 ) 17 18 var logger = loggo.GetLogger("juju.worker.leadership") 19 20 type Tracker struct { 21 tomb tomb.Tomb 22 claimer leadership.Claimer 23 unitName string 24 applicationName string 25 clock clock.Clock 26 duration time.Duration 27 isMinion bool 28 29 claimLease chan error 30 renewLease <-chan time.Time 31 claimTickets chan chan bool 32 waitLeaderTickets chan chan bool 33 waitMinionTickets chan chan bool 34 waitingLeader []chan bool 35 waitingMinion []chan bool 36 } 37 38 // NewTracker returns a *Tracker that attempts to claim and retain application 39 // leadership for the supplied unit. It will claim leadership for twice the 40 // supplied duration, and once it's leader it will renew leadership every 41 // time the duration elapses. 42 // Thus, successful leadership claims on the resulting Tracker will guarantee 43 // leadership for the duration supplied here without generating additional 44 // calls to the supplied manager (which may very well be on the other side of 45 // a network connection). 46 func NewTracker(tag names.UnitTag, claimer leadership.Claimer, clock clock.Clock, duration time.Duration) *Tracker { 47 unitName := tag.Id() 48 applicationName, _ := names.UnitApplication(unitName) 49 t := &Tracker{ 50 unitName: unitName, 51 applicationName: applicationName, 52 claimer: claimer, 53 clock: clock, 54 duration: duration, 55 claimTickets: make(chan chan bool), 56 waitLeaderTickets: make(chan chan bool), 57 waitMinionTickets: make(chan chan bool), 58 isMinion: true, 59 } 60 t.tomb.Go(func() error { 61 defer func() { 62 for _, ticketCh := range t.waitingLeader { 63 close(ticketCh) 64 } 65 for _, ticketCh := range t.waitingMinion { 66 close(ticketCh) 67 } 68 }() 69 err := t.loop() 70 // TODO: jam 2015-04-02 is this the most elegant way to make 71 // sure we shutdown cleanly? Essentially the lowest level sees 72 // that we are dying, and propagates an ErrDying up to us so 73 // that we shut down, which we then are passing back into 74 // Tomb.Kill(). 75 // Tomb.Kill() special cases the exact object ErrDying, and has 76 // no idea about errors.Cause and the general errors.Trace 77 // mechanisms that we use. 78 // So we explicitly unwrap before calling tomb.Kill() else 79 // tomb.Stop() thinks that we have a genuine error. 80 switch cause := errors.Cause(err); cause { 81 case tomb.ErrDying: 82 err = cause 83 } 84 return err 85 }) 86 return t 87 } 88 89 // Kill is part of the worker.Worker interface. 90 func (t *Tracker) Kill() { 91 t.tomb.Kill(nil) 92 } 93 94 // Wait is part of the worker.Worker interface. 95 func (t *Tracker) Wait() error { 96 return t.tomb.Wait() 97 } 98 99 // ApplicationName is part of the leadership.Tracker interface. 100 func (t *Tracker) ApplicationName() string { 101 return t.applicationName 102 } 103 104 // ClaimDuration is part of the leadership.Tracker interface. 105 func (t *Tracker) ClaimDuration() time.Duration { 106 return t.duration 107 } 108 109 // ClaimLeader is part of the leadership.Tracker interface. 110 func (t *Tracker) ClaimLeader() leadership.Ticket { 111 return t.submit(t.claimTickets) 112 } 113 114 // WaitLeader is part of the leadership.Tracker interface. 115 func (t *Tracker) WaitLeader() leadership.Ticket { 116 return t.submit(t.waitLeaderTickets) 117 } 118 119 // WaitMinion is part of the leadership.Tracker interface. 120 func (t *Tracker) WaitMinion() leadership.Ticket { 121 return t.submit(t.waitMinionTickets) 122 } 123 124 func (t *Tracker) loop() error { 125 logger.Debugf("%s making initial claim for %s leadership", t.unitName, t.applicationName) 126 if err := t.refresh(); err != nil { 127 return errors.Trace(err) 128 } 129 for { 130 select { 131 case <-t.tomb.Dying(): 132 return tomb.ErrDying 133 case err, ok := <-t.claimLease: 134 t.claimLease = nil 135 if errors.Cause(err) == leadership.ErrBlockCancelled || !ok { 136 // BlockUntilLeadershipReleased was cancelled, 137 // which means that the tracker is terminating. 138 continue 139 } else if err != nil { 140 return errors.Annotatef(err, 141 "error while %s waiting for %s leadership release", 142 t.unitName, t.applicationName, 143 ) 144 } 145 logger.Tracef("%s claiming lease for %s leadership", t.unitName, t.applicationName) 146 if err := t.refresh(); err != nil { 147 return errors.Trace(err) 148 } 149 case <-t.renewLease: 150 logger.Tracef("%s renewing lease for %s leadership", t.unitName, t.applicationName) 151 t.renewLease = nil 152 if err := t.refresh(); err != nil { 153 return errors.Trace(err) 154 } 155 case ticketCh := <-t.claimTickets: 156 logger.Tracef("%s got claim request for %s leadership", t.unitName, t.applicationName) 157 if err := t.resolveClaim(ticketCh); err != nil { 158 return errors.Trace(err) 159 } 160 case ticketCh := <-t.waitLeaderTickets: 161 logger.Tracef("%s got wait request for %s leadership", t.unitName, t.applicationName) 162 if err := t.resolveWaitLeader(ticketCh); err != nil { 163 return errors.Trace(err) 164 } 165 case ticketCh := <-t.waitMinionTickets: 166 logger.Tracef("%s got wait request for %s leadership loss", t.unitName, t.applicationName) 167 if err := t.resolveWaitMinion(ticketCh); err != nil { 168 return errors.Trace(err) 169 } 170 } 171 } 172 } 173 174 // refresh makes a leadership request, and updates Tracker state to conform to 175 // latest known reality. 176 func (t *Tracker) refresh() error { 177 logger.Tracef("checking %s for %s leadership", t.unitName, t.applicationName) 178 leaseDuration := 2 * t.duration 179 untilTime := t.clock.Now().Add(leaseDuration) 180 err := t.claimer.ClaimLeadership(t.applicationName, t.unitName, leaseDuration) 181 switch { 182 case err == nil: 183 return t.setLeader(untilTime) 184 case errors.Cause(err) == leadership.ErrClaimDenied: 185 return t.setMinion() 186 } 187 return errors.Annotatef(err, "leadership failure") 188 } 189 190 // setLeader arranges for lease renewal. 191 func (t *Tracker) setLeader(untilTime time.Time) error { 192 if t.isMinion { 193 // If we were a minion, we're now the leader, so we can record the transition. 194 logger.Infof("%s promoted to leadership of %s", t.unitName, t.applicationName) 195 } 196 logger.Tracef("%s confirmed for %s leadership until %s", t.unitName, t.applicationName, untilTime) 197 renewTime := untilTime.Add(-t.duration) 198 logger.Tracef("%s will renew %s leadership at %s", t.unitName, t.applicationName, renewTime) 199 t.isMinion = false 200 t.claimLease = nil 201 t.renewLease = t.clock.After(renewTime.Sub(t.clock.Now())) 202 203 for len(t.waitingLeader) > 0 { 204 logger.Tracef("notifying %s ticket of impending %s leadership", t.unitName, t.applicationName) 205 var ticketCh chan bool 206 ticketCh, t.waitingLeader = t.waitingLeader[0], t.waitingLeader[1:] 207 defer close(ticketCh) 208 if err := t.sendTrue(ticketCh); err != nil { 209 return errors.Trace(err) 210 } 211 } 212 return nil 213 } 214 215 // setMinion arranges for lease acquisition when there's an opportunity. 216 func (t *Tracker) setMinion() error { 217 logger.Infof("%s leadership for %s denied", t.applicationName, t.unitName) 218 t.isMinion = true 219 t.renewLease = nil 220 if t.claimLease == nil { 221 t.claimLease = make(chan error, 1) 222 go func() { 223 defer close(t.claimLease) 224 logger.Debugf("%s waiting for %s leadership release", t.unitName, t.applicationName) 225 err := t.claimer.BlockUntilLeadershipReleased(t.applicationName, t.tomb.Dying()) 226 if err != nil { 227 logger.Debugf("%s waiting for %s leadership release gave err: %s", t.unitName, t.applicationName, err) 228 } 229 select { 230 case t.claimLease <- err: 231 case <-t.tomb.Dying(): 232 } 233 }() 234 } 235 236 for len(t.waitingMinion) > 0 { 237 logger.Debugf("notifying %s ticket of impending loss of %s leadership", t.unitName, t.applicationName) 238 var ticketCh chan bool 239 ticketCh, t.waitingMinion = t.waitingMinion[0], t.waitingMinion[1:] 240 defer close(ticketCh) 241 if err := t.sendTrue(ticketCh); err != nil { 242 return errors.Trace(err) 243 } 244 } 245 return nil 246 } 247 248 // isLeader returns true if leadership is guaranteed for the Tracker's duration. 249 func (t *Tracker) isLeader() (bool, error) { 250 if !t.isMinion { 251 // Last time we looked, we were leader. 252 select { 253 case <-t.tomb.Dying(): 254 return false, errors.Trace(tomb.ErrDying) 255 case <-t.renewLease: 256 logger.Tracef("%s renewing lease for %s leadership", t.unitName, t.applicationName) 257 t.renewLease = nil 258 if err := t.refresh(); err != nil { 259 return false, errors.Trace(err) 260 } 261 default: 262 logger.Tracef("%s still has %s leadership", t.unitName, t.applicationName) 263 } 264 } 265 return !t.isMinion, nil 266 } 267 268 // resolveClaim will send true on the supplied channel if leadership can be 269 // successfully verified, and will always close it whether or not it sent. 270 func (t *Tracker) resolveClaim(ticketCh chan bool) error { 271 logger.Tracef("resolving %s leadership ticket for %s...", t.applicationName, t.unitName) 272 defer close(ticketCh) 273 if leader, err := t.isLeader(); err != nil { 274 return errors.Trace(err) 275 } else if !leader { 276 logger.Debugf("%s is not %s leader", t.unitName, t.applicationName) 277 return nil 278 } 279 logger.Tracef("confirming %s leadership for %s", t.applicationName, t.unitName) 280 return t.sendTrue(ticketCh) 281 } 282 283 // resolveWaitLeader will send true on the supplied channel if leadership can be 284 // guaranteed for the Tracker's duration. It will then close the channel. If 285 // leadership cannot be guaranteed, the channel is left untouched until either 286 // the termination of the Tracker or the next invocation of setLeader; at which 287 // point true is sent if applicable, and the channel is closed. 288 func (t *Tracker) resolveWaitLeader(ticketCh chan bool) error { 289 var dontClose bool 290 defer func() { 291 if !dontClose { 292 close(ticketCh) 293 } 294 }() 295 296 if leader, err := t.isLeader(); err != nil { 297 return errors.Trace(err) 298 } else if leader { 299 logger.Tracef("reporting %s leadership for %s", t.applicationName, t.unitName) 300 return t.sendTrue(ticketCh) 301 } 302 303 logger.Tracef("waiting for %s to attain %s leadership", t.unitName, t.applicationName) 304 t.waitingLeader = append(t.waitingLeader, ticketCh) 305 dontClose = true 306 return nil 307 } 308 309 // resolveWaitMinion will close the supplied channel as soon as leadership cannot 310 // be guaranteed beyond the Tracker's duration. 311 func (t *Tracker) resolveWaitMinion(ticketCh chan bool) error { 312 var dontClose bool 313 defer func() { 314 if !dontClose { 315 close(ticketCh) 316 } 317 }() 318 319 if leader, err := t.isLeader(); err != nil { 320 return errors.Trace(err) 321 } else if leader { 322 logger.Tracef("waiting for %s to lose %s leadership", t.unitName, t.applicationName) 323 t.waitingMinion = append(t.waitingMinion, ticketCh) 324 dontClose = true 325 } else { 326 logger.Tracef("reporting %s leadership loss for %s", t.applicationName, t.unitName) 327 } 328 return nil 329 330 } 331 332 func (t *Tracker) sendTrue(ticketCh chan bool) error { 333 select { 334 case <-t.tomb.Dying(): 335 return tomb.ErrDying 336 case ticketCh <- true: 337 return nil 338 } 339 } 340 341 func (t *Tracker) submit(tickets chan chan bool) leadership.Ticket { 342 ticketCh := make(chan bool, 1) 343 select { 344 case <-t.tomb.Dying(): 345 close(ticketCh) 346 case tickets <- ticketCh: 347 } 348 ticket := &ticket{ 349 ch: ticketCh, 350 ready: make(chan struct{}), 351 } 352 go ticket.run() 353 return ticket 354 } 355 356 // ticket is used by Tracker to communicate leadership status back to a client. 357 type ticket struct { 358 ch chan bool 359 ready chan struct{} 360 success bool 361 } 362 363 func (t *ticket) run() { 364 defer close(t.ready) 365 // This is only safe/sane because the Tracker promises to close all pending 366 // ticket channels when it shuts down. 367 if <-t.ch { 368 t.success = true 369 } 370 } 371 372 // Ready is part of the leadership.Ticket interface. 373 func (t *ticket) Ready() <-chan struct{} { 374 return t.ready 375 } 376 377 // Wait is part of the leadership.Ticket interface. 378 func (t *ticket) Wait() bool { 379 <-t.ready 380 return t.success 381 }