github.com/cloud-green/juju@v0.0.0-20151002100041-a00291338d3d/worker/peergrouper/worker.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package peergrouper 5 6 import ( 7 "fmt" 8 "sync" 9 "time" 10 11 "github.com/juju/errors" 12 "github.com/juju/replicaset" 13 "launchpad.net/tomb" 14 15 "github.com/juju/juju/instance" 16 "github.com/juju/juju/mongo" 17 "github.com/juju/juju/network" 18 "github.com/juju/juju/state" 19 "github.com/juju/juju/worker" 20 ) 21 22 type stateInterface interface { 23 Machine(id string) (stateMachine, error) 24 WatchStateServerInfo() state.NotifyWatcher 25 StateServerInfo() (*state.StateServerInfo, error) 26 MongoSession() mongoSession 27 } 28 29 type stateMachine interface { 30 Id() string 31 InstanceId() (instance.Id, error) 32 Refresh() error 33 Watch() state.NotifyWatcher 34 WantsVote() bool 35 HasVote() bool 36 SetHasVote(hasVote bool) error 37 APIHostPorts() []network.HostPort 38 MongoHostPorts() []network.HostPort 39 } 40 41 type mongoSession interface { 42 CurrentStatus() (*replicaset.Status, error) 43 CurrentMembers() ([]replicaset.Member, error) 44 Set([]replicaset.Member) error 45 } 46 47 type publisherInterface interface { 48 // publish publishes information about the given state servers 49 // to whomsoever it may concern. When it is called there 50 // is no guarantee that any of the information has actually changed. 51 publishAPIServers(apiServers [][]network.HostPort, instanceIds []instance.Id) error 52 } 53 54 // notifyFunc holds a function that is sent 55 // to the main worker loop to fetch new information 56 // when something changes. It reports whether 57 // the information has actually changed (and by implication 58 // whether the replica set may need to be changed). 59 type notifyFunc func() (changed bool, err error) 60 61 var ( 62 // If we fail to set the mongo replica set members, 63 // we start retrying with the following interval, 64 // before exponentially backing off with each further 65 // attempt. 66 initialRetryInterval = 2 * time.Second 67 68 // maxRetryInterval holds the maximum interval 69 // between retry attempts. 70 maxRetryInterval = 5 * time.Minute 71 72 // pollInterval holds the interval at which the replica set 73 // members will be updated even in the absence of changes 74 // to State. This enables us to make changes to members 75 // that are triggered by changes to member status. 76 pollInterval = 1 * time.Minute 77 ) 78 79 // pgWorker holds all the mutable state that we are watching. 80 // The only goroutine that is allowed to modify this 81 // is worker.loop - other watchers modify the 82 // current state by calling worker.notify instead of 83 // modifying it directly. 84 type pgWorker struct { 85 tomb tomb.Tomb 86 87 // wg represents all the currently running goroutines. 88 // The worker main loop waits for all of these to exit 89 // before finishing. 90 wg sync.WaitGroup 91 92 // st represents the State. It is an interface so we can swap 93 // out the implementation during testing. 94 st stateInterface 95 96 // When something changes that might affect 97 // the peer group membership, it sends a function 98 // on notifyCh that is run inside the main worker 99 // goroutine to mutate the state. It reports whether 100 // the state has actually changed. 101 notifyCh chan notifyFunc 102 103 // machines holds the set of machines we are currently 104 // watching (all the state server machines). Each one has an 105 // associated goroutine that 106 // watches attributes of that machine. 107 machines map[string]*machine 108 109 // publisher holds the implementation of the API 110 // address publisher. 111 publisher publisherInterface 112 } 113 114 // New returns a new worker that maintains the mongo replica set 115 // with respect to the given state. 116 func New(st *state.State) (worker.Worker, error) { 117 cfg, err := st.EnvironConfig() 118 if err != nil { 119 return nil, err 120 } 121 return newWorker(&stateShim{ 122 State: st, 123 mongoPort: cfg.StatePort(), 124 apiPort: cfg.APIPort(), 125 }, newPublisher(st, cfg.PreferIPv6())), nil 126 } 127 128 func newWorker(st stateInterface, pub publisherInterface) worker.Worker { 129 w := &pgWorker{ 130 st: st, 131 notifyCh: make(chan notifyFunc), 132 machines: make(map[string]*machine), 133 publisher: pub, 134 } 135 go func() { 136 defer w.tomb.Done() 137 if err := w.loop(); err != nil { 138 logger.Errorf("peergrouper loop terminated: %v", err) 139 w.tomb.Kill(err) 140 } 141 // Wait for the various goroutines to be killed. 142 // N.B. we don't defer this call because 143 // if we do and a bug causes a panic, Wait will deadlock 144 // waiting for the unkilled goroutines to exit. 145 w.wg.Wait() 146 }() 147 return w 148 } 149 150 func (w *pgWorker) Kill() { 151 w.tomb.Kill(nil) 152 } 153 154 func (w *pgWorker) Wait() error { 155 return w.tomb.Wait() 156 } 157 158 func (w *pgWorker) loop() error { 159 infow := w.watchStateServerInfo() 160 defer infow.stop() 161 162 retry := time.NewTimer(0) 163 retry.Stop() 164 retryInterval := initialRetryInterval 165 for { 166 select { 167 case f := <-w.notifyCh: 168 // Update our current view of the state of affairs. 169 changed, err := f() 170 if err != nil { 171 return err 172 } 173 if !changed { 174 break 175 } 176 // Try to update the replica set immediately. 177 retry.Reset(0) 178 case <-retry.C: 179 ok := true 180 servers, instanceIds, err := w.apiPublishInfo() 181 if err != nil { 182 return fmt.Errorf("cannot get API server info: %v", err) 183 } 184 if err := w.publisher.publishAPIServers(servers, instanceIds); err != nil { 185 logger.Errorf("cannot publish API server addresses: %v", err) 186 ok = false 187 } 188 if err := w.updateReplicaset(); err != nil { 189 if _, isReplicaSetError := err.(*replicaSetError); !isReplicaSetError { 190 return err 191 } 192 logger.Errorf("cannot set replicaset: %v", err) 193 ok = false 194 } 195 if ok { 196 // Update the replica set members occasionally 197 // to keep them up to date with the current 198 // replica set member statuses. 199 retry.Reset(pollInterval) 200 retryInterval = initialRetryInterval 201 } else { 202 retry.Reset(retryInterval) 203 retryInterval *= 2 204 if retryInterval > maxRetryInterval { 205 retryInterval = maxRetryInterval 206 } 207 } 208 209 case <-w.tomb.Dying(): 210 return tomb.ErrDying 211 } 212 } 213 } 214 215 func (w *pgWorker) apiPublishInfo() ([][]network.HostPort, []instance.Id, error) { 216 servers := make([][]network.HostPort, 0, len(w.machines)) 217 instanceIds := make([]instance.Id, 0, len(w.machines)) 218 for _, m := range w.machines { 219 if len(m.apiHostPorts) == 0 { 220 continue 221 } 222 instanceId, err := m.stm.InstanceId() 223 if err != nil { 224 return nil, nil, err 225 } 226 instanceIds = append(instanceIds, instanceId) 227 servers = append(servers, m.apiHostPorts) 228 229 } 230 return servers, instanceIds, nil 231 } 232 233 // notify sends the given notification function to 234 // the worker main loop to be executed. 235 func (w *pgWorker) notify(f notifyFunc) bool { 236 select { 237 case w.notifyCh <- f: 238 return true 239 case <-w.tomb.Dying(): 240 return false 241 } 242 } 243 244 // peerGroupInfo collates current session information about the 245 // mongo peer group with information from state machines. 246 func (w *pgWorker) peerGroupInfo() (*peerGroupInfo, error) { 247 session := w.st.MongoSession() 248 info := &peerGroupInfo{} 249 var err error 250 status, err := session.CurrentStatus() 251 if err != nil { 252 return nil, fmt.Errorf("cannot get replica set status: %v", err) 253 } 254 info.statuses = status.Members 255 info.members, err = session.CurrentMembers() 256 if err != nil { 257 return nil, fmt.Errorf("cannot get replica set members: %v", err) 258 } 259 info.machines = w.machines 260 return info, nil 261 } 262 263 // replicaSetError holds an error returned as a result 264 // of calling replicaset.Set. As this is expected to fail 265 // in the normal course of things, it needs special treatment. 266 type replicaSetError struct { 267 error 268 } 269 270 // updateReplicaset sets the current replica set members, and applies the 271 // given voting status to machines in the state. 272 func (w *pgWorker) updateReplicaset() error { 273 info, err := w.peerGroupInfo() 274 if err != nil { 275 return err 276 } 277 members, voting, err := desiredPeerGroup(info) 278 if err != nil { 279 return fmt.Errorf("cannot compute desired peer group: %v", err) 280 } 281 if members != nil { 282 logger.Debugf("desired peer group members: %#v", members) 283 } else { 284 logger.Debugf("no change in desired peer group (voting %#v)", voting) 285 } 286 287 // We cannot change the HasVote flag of a machine in state at exactly 288 // the same moment as changing its voting status in the replica set. 289 // 290 // Thus we need to be careful that a machine which is actually a voting 291 // member is not seen to not have a vote, because otherwise 292 // there is nothing to prevent the machine being removed. 293 // 294 // To avoid this happening, we make sure when we call SetReplicaSet, 295 // that the voting status of machines is the union of both old 296 // and new voting machines - that is the set of HasVote machines 297 // is a superset of all the actual voting machines. 298 // 299 // Only after the call has taken place do we reset the voting status 300 // of the machines that have lost their vote. 301 // 302 // If there's a crash, the voting status may not reflect the 303 // actual voting status for a while, but when things come 304 // back on line, it will be sorted out, as desiredReplicaSet 305 // will return the actual voting status. 306 // 307 // Note that we potentially update the HasVote status of the machines even 308 // if the members have not changed. 309 var added, removed []*machine 310 for m, hasVote := range voting { 311 switch { 312 case hasVote && !m.stm.HasVote(): 313 added = append(added, m) 314 case !hasVote && m.stm.HasVote(): 315 removed = append(removed, m) 316 } 317 } 318 if err := setHasVote(added, true); err != nil { 319 return err 320 } 321 if members != nil { 322 if err := w.st.MongoSession().Set(members); err != nil { 323 // We've failed to set the replica set, so revert back 324 // to the previous settings. 325 if err1 := setHasVote(added, false); err1 != nil { 326 logger.Errorf("cannot revert machine voting after failure to change replica set: %v", err1) 327 } 328 return &replicaSetError{err} 329 } 330 logger.Infof("successfully changed replica set to %#v", members) 331 } 332 if err := setHasVote(removed, false); err != nil { 333 return err 334 } 335 return nil 336 } 337 338 // start runs the given loop function until it returns. 339 // When it returns, the receiving pgWorker is killed with 340 // the returned error. 341 func (w *pgWorker) start(loop func() error) { 342 w.wg.Add(1) 343 go func() { 344 defer w.wg.Done() 345 if err := loop(); err != nil { 346 w.tomb.Kill(err) 347 } 348 }() 349 } 350 351 // setHasVote sets the HasVote status of all the given 352 // machines to hasVote. 353 func setHasVote(ms []*machine, hasVote bool) error { 354 if len(ms) == 0 { 355 return nil 356 } 357 logger.Infof("setting HasVote=%v on machines %v", hasVote, ms) 358 for _, m := range ms { 359 if err := m.stm.SetHasVote(hasVote); err != nil { 360 return fmt.Errorf("cannot set voting status of %q to %v: %v", m.id, hasVote, err) 361 } 362 } 363 return nil 364 } 365 366 // serverInfoWatcher watches the state server info and 367 // notifies the worker when it changes. 368 type serverInfoWatcher struct { 369 worker *pgWorker 370 watcher state.NotifyWatcher 371 } 372 373 func (w *pgWorker) watchStateServerInfo() *serverInfoWatcher { 374 infow := &serverInfoWatcher{ 375 worker: w, 376 watcher: w.st.WatchStateServerInfo(), 377 } 378 w.start(infow.loop) 379 return infow 380 } 381 382 func (infow *serverInfoWatcher) loop() error { 383 for { 384 select { 385 case _, ok := <-infow.watcher.Changes(): 386 if !ok { 387 return infow.watcher.Err() 388 } 389 infow.worker.notify(infow.updateMachines) 390 case <-infow.worker.tomb.Dying(): 391 return tomb.ErrDying 392 } 393 } 394 } 395 396 func (infow *serverInfoWatcher) stop() { 397 infow.watcher.Stop() 398 } 399 400 // updateMachines is a notifyFunc that updates the current 401 // machines when the state server info has changed. 402 func (infow *serverInfoWatcher) updateMachines() (bool, error) { 403 info, err := infow.worker.st.StateServerInfo() 404 if err != nil { 405 return false, fmt.Errorf("cannot get state server info: %v", err) 406 } 407 changed := false 408 // Stop machine goroutines that no longer correspond to state server 409 // machines. 410 for _, m := range infow.worker.machines { 411 if !inStrings(m.id, info.MachineIds) { 412 m.stop() 413 delete(infow.worker.machines, m.id) 414 changed = true 415 } 416 } 417 // Start machines with no watcher 418 for _, id := range info.MachineIds { 419 if _, ok := infow.worker.machines[id]; ok { 420 continue 421 } 422 logger.Debugf("found new machine %q", id) 423 stm, err := infow.worker.st.Machine(id) 424 if err != nil { 425 if errors.IsNotFound(err) { 426 // If the machine isn't found, it must have been 427 // removed and will soon enough be removed 428 // from the state server list. This will probably 429 // never happen, but we'll code defensively anyway. 430 logger.Warningf("machine %q from state server list not found", id) 431 continue 432 } 433 return false, fmt.Errorf("cannot get machine %q: %v", id, err) 434 } 435 infow.worker.machines[id] = infow.worker.newMachine(stm) 436 changed = true 437 } 438 return changed, nil 439 } 440 441 // machine represents a machine in State. 442 type machine struct { 443 id string 444 wantsVote bool 445 apiHostPorts []network.HostPort 446 mongoHostPorts []network.HostPort 447 448 worker *pgWorker 449 stm stateMachine 450 machineWatcher state.NotifyWatcher 451 } 452 453 func (m *machine) mongoHostPort() string { 454 return mongo.SelectPeerHostPort(m.mongoHostPorts) 455 } 456 457 func (m *machine) String() string { 458 return m.id 459 } 460 461 func (m *machine) GoString() string { 462 return fmt.Sprintf("&peergrouper.machine{id: %q, wantsVote: %v, hostPort: %q}", m.id, m.wantsVote, m.mongoHostPort()) 463 } 464 465 func (w *pgWorker) newMachine(stm stateMachine) *machine { 466 m := &machine{ 467 worker: w, 468 id: stm.Id(), 469 stm: stm, 470 apiHostPorts: stm.APIHostPorts(), 471 mongoHostPorts: stm.MongoHostPorts(), 472 wantsVote: stm.WantsVote(), 473 machineWatcher: stm.Watch(), 474 } 475 w.start(m.loop) 476 return m 477 } 478 479 func (m *machine) loop() error { 480 for { 481 select { 482 case _, ok := <-m.machineWatcher.Changes(): 483 if !ok { 484 return m.machineWatcher.Err() 485 } 486 m.worker.notify(m.refresh) 487 case <-m.worker.tomb.Dying(): 488 return nil 489 } 490 } 491 } 492 493 func (m *machine) stop() { 494 m.machineWatcher.Stop() 495 } 496 497 func (m *machine) refresh() (bool, error) { 498 if err := m.stm.Refresh(); err != nil { 499 if errors.IsNotFound(err) { 500 // We want to be robust when the machine 501 // state is out of date with respect to the 502 // state server info, so if the machine 503 // has been removed, just assume that 504 // no change has happened - the machine 505 // loop will be stopped very soon anyway. 506 return false, nil 507 } 508 return false, err 509 } 510 changed := false 511 if wantsVote := m.stm.WantsVote(); wantsVote != m.wantsVote { 512 m.wantsVote = wantsVote 513 changed = true 514 } 515 if hps := m.stm.MongoHostPorts(); !hostPortsEqual(hps, m.mongoHostPorts) { 516 m.mongoHostPorts = hps 517 changed = true 518 } 519 if hps := m.stm.APIHostPorts(); !hostPortsEqual(hps, m.apiHostPorts) { 520 m.apiHostPorts = hps 521 changed = true 522 } 523 return changed, nil 524 } 525 526 func hostPortsEqual(hps1, hps2 []network.HostPort) bool { 527 if len(hps1) != len(hps2) { 528 return false 529 } 530 for i := range hps1 { 531 if hps1[i] != hps2[i] { 532 return false 533 } 534 } 535 return true 536 } 537 538 func inStrings(t string, ss []string) bool { 539 for _, s := range ss { 540 if s == t { 541 return true 542 } 543 } 544 return false 545 }