github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/state/leadership/manager.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package leadership 5 6 import ( 7 "sort" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 "github.com/juju/utils/clock" 13 "launchpad.net/tomb" 14 15 "github.com/juju/juju/leadership" 16 "github.com/juju/juju/state/lease" 17 ) 18 19 var logger = loggo.GetLogger("juju.state.leadership") 20 21 // NewManager returns a Manager implementation, backed by a lease.Client, 22 // which (in addition to its exposed Manager capabilities) will expire all 23 // known leases as they run out. The caller takes responsibility for killing, 24 // and handling errors from, the returned Worker. 25 func NewManager(config ManagerConfig) (ManagerWorker, error) { 26 if err := config.Validate(); err != nil { 27 return nil, errors.Trace(err) 28 } 29 manager := &manager{ 30 config: config, 31 claims: make(chan claim), 32 checks: make(chan check), 33 blocks: make(chan block), 34 } 35 go func() { 36 defer manager.tomb.Done() 37 // note: we don't directly tomb.Kill, because we may need to 38 // unwrap tomb.ErrDying in order to function correctly. 39 manager.kill(manager.loop()) 40 }() 41 return manager, nil 42 } 43 44 // manager implements ManagerWorker. 45 type manager struct { 46 tomb tomb.Tomb 47 48 // config collects all external configuration and dependencies. 49 config ManagerConfig 50 51 // claims is used to deliver leadership claim requests to the loop. 52 claims chan claim 53 54 // checks is used to deliver leadership check requests to the loop. 55 checks chan check 56 57 // blocks is used to deliver leaderlessness block requests to the loop. 58 blocks chan block 59 } 60 61 // Kill is part of the worker.Worker interface. 62 func (manager *manager) Kill() { 63 manager.kill(nil) 64 } 65 66 // kill unwraps tomb.ErrDying before killing the tomb, thus allowing the worker 67 // to use errors.Trace liberally and still stop cleanly. 68 func (manager *manager) kill(err error) { 69 if errors.Cause(err) == tomb.ErrDying { 70 err = tomb.ErrDying 71 } else if err != nil { 72 logger.Errorf("stopping leadership manager with error: %v", err) 73 } 74 manager.tomb.Kill(err) 75 } 76 77 // Wait is part of the worker.Worker interface. 78 func (manager *manager) Wait() error { 79 return manager.tomb.Wait() 80 } 81 82 // loop runs until the manager is stopped. 83 func (manager *manager) loop() error { 84 blocks := make(blocks) 85 for { 86 if err := manager.choose(blocks); err != nil { 87 return errors.Trace(err) 88 } 89 90 leases := manager.config.Client.Leases() 91 for serviceName := range blocks { 92 if _, found := leases[serviceName]; !found { 93 blocks.unblock(serviceName) 94 } 95 } 96 } 97 } 98 99 // choose breaks the select out of loop to make the blocking logic clearer. 100 func (manager *manager) choose(blocks blocks) error { 101 select { 102 case <-manager.tomb.Dying(): 103 return tomb.ErrDying 104 case <-manager.nextExpiry(): 105 return manager.expire() 106 case claim := <-manager.claims: 107 return manager.handleClaim(claim) 108 case check := <-manager.checks: 109 return manager.handleCheck(check) 110 case block := <-manager.blocks: 111 blocks.add(block) 112 return nil 113 } 114 } 115 116 // ClaimLeadership is part of the leadership.Claimer interface. 117 func (manager *manager) ClaimLeadership(serviceName, unitName string, duration time.Duration) error { 118 return claim{ 119 serviceName: serviceName, 120 unitName: unitName, 121 duration: duration, 122 response: make(chan bool), 123 abort: manager.tomb.Dying(), 124 }.invoke(manager.claims) 125 } 126 127 // handleClaim processes and responds to the supplied claim. It will only return 128 // unrecoverable errors; mere failure to claim just indicates a bad request, and 129 // is communicated back to the claim's originator. 130 func (manager *manager) handleClaim(claim claim) error { 131 client := manager.config.Client 132 request := lease.Request{claim.unitName, claim.duration} 133 err := lease.ErrInvalid 134 for err == lease.ErrInvalid { 135 select { 136 case <-manager.tomb.Dying(): 137 return tomb.ErrDying 138 default: 139 info, found := client.Leases()[claim.serviceName] 140 switch { 141 case !found: 142 err = client.ClaimLease(claim.serviceName, request) 143 case info.Holder == claim.unitName: 144 err = client.ExtendLease(claim.serviceName, request) 145 default: 146 claim.respond(false) 147 return nil 148 } 149 } 150 } 151 if err != nil { 152 return errors.Trace(err) 153 } 154 claim.respond(true) 155 return nil 156 } 157 158 // LeadershipCheck is part of the leadership.Checker interface. 159 // 160 // The token returned will accept a `*[]txn.Op` passed to Check, and will 161 // populate it with transaction operations that will fail if the unit is 162 // not leader of the service. 163 func (manager *manager) LeadershipCheck(serviceName, unitName string) leadership.Token { 164 return token{ 165 serviceName: serviceName, 166 unitName: unitName, 167 checks: manager.checks, 168 abort: manager.tomb.Dying(), 169 } 170 } 171 172 // handleCheck processes and responds to the supplied check. It will only return 173 // unrecoverable errors; mere untruth of the assertion just indicates a bad 174 // request, and is communicated back to the check's originator. 175 func (manager *manager) handleCheck(check check) error { 176 client := manager.config.Client 177 info, found := client.Leases()[check.serviceName] 178 if !found || info.Holder != check.unitName { 179 if err := client.Refresh(); err != nil { 180 return errors.Trace(err) 181 } 182 info, found = client.Leases()[check.serviceName] 183 } 184 if found && info.Holder == check.unitName { 185 check.succeed(info.AssertOp) 186 } else { 187 check.fail() 188 } 189 return nil 190 } 191 192 // BlockUntilLeadershipReleased is part of the leadership.Claimer interface. 193 func (manager *manager) BlockUntilLeadershipReleased(serviceName string) error { 194 return block{ 195 serviceName: serviceName, 196 unblock: make(chan struct{}), 197 abort: manager.tomb.Dying(), 198 }.invoke(manager.blocks) 199 } 200 201 // nextExpiry returns a channel that will send a value at some point when we 202 // expect at least one lease to be ready to expire. If no leases are known, 203 // it will return nil. 204 func (manager *manager) nextExpiry() <-chan time.Time { 205 var nextExpiry *time.Time 206 for _, info := range manager.config.Client.Leases() { 207 if nextExpiry != nil { 208 if info.Expiry.After(*nextExpiry) { 209 continue 210 } 211 } 212 nextExpiry = &info.Expiry 213 } 214 if nextExpiry == nil { 215 logger.Tracef("no leases recorded; never waking for expiry") 216 return nil 217 } 218 logger.Tracef("waking to expire leases at %s", *nextExpiry) 219 return clock.Alarm(manager.config.Clock, *nextExpiry) 220 } 221 222 // expire will attempt to expire all leases that may have expired. There might 223 // be none; they might have been extended or expired already by someone else; so 224 // ErrInvalid is expected, and ignored, in the comfortable knowledge that the 225 // client will have been updated and we'll see fresh info when we scan for new 226 // expiries next time through the loop. It will return only unrecoverable errors. 227 func (manager *manager) expire() error { 228 logger.Tracef("expiring leases...") 229 client := manager.config.Client 230 leases := client.Leases() 231 232 // Sort lease names so we expire in a predictable order for the tests. 233 names := make([]string, 0, len(leases)) 234 for name := range leases { 235 names = append(names, name) 236 } 237 sort.Strings(names) 238 for _, name := range names { 239 now := manager.config.Clock.Now() 240 if leases[name].Expiry.After(now) { 241 continue 242 } 243 switch err := client.ExpireLease(name); err { 244 case nil, lease.ErrInvalid: 245 default: 246 return errors.Trace(err) 247 } 248 } 249 return nil 250 }