github.com/Pankov404/juju@v0.0.0-20150703034450-be266991dceb/state/leadership/manager.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package leadership 5 6 import ( 7 "sort" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 "launchpad.net/tomb" 13 14 "github.com/juju/juju/state/lease" 15 ) 16 17 var logger = loggo.GetLogger("juju.state.leadership") 18 19 // NewManager returns a Manager implementation, backed by a lease.Client, 20 // which (in addition to its exposed Manager capabilities) will expire all 21 // known leases as they run out. The caller takes responsibility for killing, 22 // and handling errors from, the returned Worker. 23 func NewManager(config ManagerConfig) (ManagerWorker, error) { 24 if err := config.Validate(); err != nil { 25 return nil, errors.Trace(err) 26 } 27 manager := &manager{ 28 config: config, 29 claims: make(chan claim), 30 checks: make(chan check), 31 blocks: make(chan block), 32 } 33 go func() { 34 defer manager.tomb.Done() 35 // note: we don't directly tomb.Kill, because we may need to 36 // unwrap tomb.ErrDying in order to function correctly. 37 manager.kill(manager.loop()) 38 }() 39 return manager, nil 40 } 41 42 // manager implements ManagerWorker. 43 type manager struct { 44 tomb tomb.Tomb 45 46 // config collects all external configuration and dependencies. 47 config ManagerConfig 48 49 // claims is used to deliver leadership claim requests to the loop. 50 claims chan claim 51 52 // checks is used to deliver leadership check requests to the loop. 53 checks chan check 54 55 // blocks is used to deliver leaderlessness block requests to the loop. 56 blocks chan block 57 } 58 59 // Kill is part of the worker.Worker interface. 60 func (manager *manager) Kill() { 61 manager.kill(nil) 62 } 63 64 // kill unwraps tomb.ErrDying before killing the tomb, thus allowing the worker 65 // to use errors.Trace liberally and still stop cleanly. 66 func (manager *manager) kill(err error) { 67 if errors.Cause(err) == tomb.ErrDying { 68 err = tomb.ErrDying 69 } else if err != nil { 70 logger.Errorf("stopping leadership manager with error: %v", err) 71 } 72 manager.tomb.Kill(err) 73 } 74 75 // Wait is part of the worker.Worker interface. 76 func (manager *manager) Wait() error { 77 return manager.tomb.Wait() 78 } 79 80 // loop runs until the manager is stopped. 81 func (manager *manager) loop() error { 82 blocks := make(blocks) 83 for { 84 if err := manager.choose(blocks); err != nil { 85 return errors.Trace(err) 86 } 87 88 leases := manager.config.Client.Leases() 89 for serviceName := range blocks { 90 if _, found := leases[serviceName]; !found { 91 blocks.unblock(serviceName) 92 } 93 } 94 } 95 } 96 97 // choose breaks the select out of loop to make the blocking logic clearer. 98 func (manager *manager) choose(blocks blocks) error { 99 select { 100 case <-manager.tomb.Dying(): 101 return tomb.ErrDying 102 case <-manager.nextExpiry(): 103 return manager.expire() 104 case claim := <-manager.claims: 105 return manager.handleClaim(claim) 106 case check := <-manager.checks: 107 return manager.handleCheck(check) 108 case block := <-manager.blocks: 109 blocks.add(block) 110 return nil 111 } 112 } 113 114 // ClaimLeadership is part of the leadership.Manager interface. 115 func (manager *manager) ClaimLeadership(serviceName, unitName string, duration time.Duration) error { 116 return claim{ 117 serviceName: serviceName, 118 unitName: unitName, 119 duration: duration, 120 response: make(chan bool), 121 abort: manager.tomb.Dying(), 122 }.invoke(manager.claims) 123 } 124 125 // handleClaim processes and responds to the supplied claim. It will only return 126 // unrecoverable errors; mere failure to claim just indicates a bad request, and 127 // is communicated back to the claim's originator. 128 func (manager *manager) handleClaim(claim claim) error { 129 client := manager.config.Client 130 request := lease.Request{claim.unitName, claim.duration} 131 err := lease.ErrInvalid 132 for err == lease.ErrInvalid { 133 select { 134 case <-manager.tomb.Dying(): 135 return tomb.ErrDying 136 default: 137 info, found := client.Leases()[claim.serviceName] 138 switch { 139 case !found: 140 err = client.ClaimLease(claim.serviceName, request) 141 case info.Holder == claim.unitName: 142 err = client.ExtendLease(claim.serviceName, request) 143 default: 144 claim.respond(false) 145 return nil 146 } 147 } 148 } 149 if err != nil { 150 return errors.Trace(err) 151 } 152 claim.respond(true) 153 return nil 154 } 155 156 // CheckLeadership is part of the leadership.Manager interface. 157 func (manager *manager) CheckLeadership(serviceName, unitName string) (Token, error) { 158 return check{ 159 serviceName: serviceName, 160 unitName: unitName, 161 response: make(chan Token), 162 abort: manager.tomb.Dying(), 163 }.invoke(manager.checks) 164 } 165 166 // handleCheck processes and responds to the supplied check. It will only return 167 // unrecoverable errors; mere untruth of the assertion just indicates a bad 168 // request, and is communicated back to the check's originator. 169 func (manager *manager) handleCheck(check check) error { 170 client := manager.config.Client 171 info, found := client.Leases()[check.serviceName] 172 if !found || info.Holder != check.unitName { 173 if err := client.Refresh(); err != nil { 174 return errors.Trace(err) 175 } 176 info, found = client.Leases()[check.serviceName] 177 } 178 var result Token 179 if found && info.Holder == check.unitName { 180 result = token{info.AssertOp} 181 } 182 check.respond(result) 183 return nil 184 } 185 186 // BlockUntilLeadershipReleased is part of the leadership.Manager interface. 187 func (manager *manager) BlockUntilLeadershipReleased(serviceName string) error { 188 return block{ 189 serviceName: serviceName, 190 unblock: make(chan struct{}), 191 abort: manager.tomb.Dying(), 192 }.invoke(manager.blocks) 193 } 194 195 // nextExpiry returns a channel that will send a value at some point when we 196 // expect at least one lease to be ready to expire. If no leases are known, 197 // it will return nil. 198 func (manager *manager) nextExpiry() <-chan time.Time { 199 var nextExpiry *time.Time 200 for _, info := range manager.config.Client.Leases() { 201 if nextExpiry != nil { 202 if info.Expiry.After(*nextExpiry) { 203 continue 204 } 205 } 206 nextExpiry = &info.Expiry 207 } 208 if nextExpiry == nil { 209 logger.Debugf("no leases recorded; never waking for expiry") 210 return nil 211 } 212 logger.Debugf("waking to expire leases at %s", *nextExpiry) 213 return manager.config.Clock.Alarm(*nextExpiry) 214 } 215 216 // expire will attempt to expire all leases that may have expired. There might 217 // be none; they might have been extended or expired already by someone else; so 218 // ErrInvalid is expected, and ignored, in the comfortable knowledge that the 219 // client will have been updated and we'll see fresh info when we scan for new 220 // expiries next time through the loop. It will return only unrecoverable errors. 221 func (manager *manager) expire() error { 222 client := manager.config.Client 223 leases := client.Leases() 224 225 // Sort lease names so we expire in a predictable order for the tests. 226 names := make([]string, 0, len(leases)) 227 for name := range leases { 228 names = append(names, name) 229 } 230 sort.Strings(names) 231 for _, name := range names { 232 now := manager.config.Clock.Now() 233 if leases[name].Expiry.After(now) { 234 continue 235 } 236 switch err := client.ExpireLease(name); err { 237 case nil, lease.ErrInvalid: 238 default: 239 return errors.Trace(err) 240 } 241 } 242 return nil 243 }