github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/worker/peergrouper/worker.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package peergrouper 5 6 import ( 7 "fmt" 8 "sync" 9 "time" 10 11 "github.com/juju/errors" 12 "launchpad.net/tomb" 13 14 "github.com/juju/juju/agent/mongo" 15 "github.com/juju/juju/instance" 16 "github.com/juju/juju/replicaset" 17 "github.com/juju/juju/state" 18 "github.com/juju/juju/worker" 19 ) 20 21 type stateInterface interface { 22 Machine(id string) (stateMachine, error) 23 WatchStateServerInfo() state.NotifyWatcher 24 StateServerInfo() (*state.StateServerInfo, error) 25 MongoSession() mongoSession 26 } 27 28 type stateMachine interface { 29 Id() string 30 InstanceId() (instance.Id, error) 31 Refresh() error 32 Watch() state.NotifyWatcher 33 WantsVote() bool 34 HasVote() bool 35 SetHasVote(hasVote bool) error 36 APIHostPorts() []instance.HostPort 37 MongoHostPorts() []instance.HostPort 38 } 39 40 type mongoSession interface { 41 CurrentStatus() (*replicaset.Status, error) 42 CurrentMembers() ([]replicaset.Member, error) 43 Set([]replicaset.Member) error 44 } 45 46 type publisherInterface interface { 47 // publish publishes information about the given state servers 48 // to whomsoever it may concern. When it is called there 49 // is no guarantee that any of the information has actually changed. 50 publishAPIServers(apiServers [][]instance.HostPort, instanceIds []instance.Id) error 51 } 52 53 // notifyFunc holds a function that is sent 54 // to the main worker loop to fetch new information 55 // when something changes. It reports whether 56 // the information has actually changed (and by implication 57 // whether the replica set may need to be changed). 58 type notifyFunc func() (changed bool, err error) 59 60 var ( 61 // If we fail to set the mongo replica set members, 62 // we start retrying with the following interval, 63 // before exponentially backing off with each further 64 // attempt. 65 initialRetryInterval = 2 * time.Second 66 67 // maxRetryInterval holds the maximum interval 68 // between retry attempts. 69 maxRetryInterval = 5 * time.Minute 70 71 // pollInterval holds the interval at which the replica set 72 // members will be updated even in the absence of changes 73 // to State. This enables us to make changes to members 74 // that are triggered by changes to member status. 75 pollInterval = 1 * time.Minute 76 ) 77 78 // pgWorker holds all the mutable state that we are watching. 79 // The only goroutine that is allowed to modify this 80 // is worker.loop - other watchers modify the 81 // current state by calling worker.notify instead of 82 // modifying it directly. 83 type pgWorker struct { 84 tomb tomb.Tomb 85 86 // wg represents all the currently running goroutines. 87 // The worker main loop waits for all of these to exit 88 // before finishing. 89 wg sync.WaitGroup 90 91 // st represents the State. It is an interface so we can swap 92 // out the implementation during testing. 93 st stateInterface 94 95 // When something changes that might affect 96 // the peer group membership, it sends a function 97 // on notifyCh that is run inside the main worker 98 // goroutine to mutate the state. It reports whether 99 // the state has actually changed. 100 notifyCh chan notifyFunc 101 102 // machines holds the set of machines we are currently 103 // watching (all the state server machines). Each one has an 104 // associated goroutine that 105 // watches attributes of that machine. 106 machines map[string]*machine 107 108 // publisher holds the implementation of the API 109 // address publisher. 110 publisher publisherInterface 111 } 112 113 // New returns a new worker that maintains the mongo replica set 114 // with respect to the given state. 115 func New(st *state.State) (worker.Worker, error) { 116 cfg, err := st.EnvironConfig() 117 if err != nil { 118 return nil, err 119 } 120 return newWorker(&stateShim{ 121 State: st, 122 mongoPort: cfg.StatePort(), 123 apiPort: cfg.APIPort(), 124 }, newPublisher(st)), nil 125 } 126 127 func newWorker(st stateInterface, pub publisherInterface) worker.Worker { 128 w := &pgWorker{ 129 st: st, 130 notifyCh: make(chan notifyFunc), 131 machines: make(map[string]*machine), 132 publisher: pub, 133 } 134 go func() { 135 defer w.tomb.Done() 136 if err := w.loop(); err != nil { 137 logger.Errorf("peergrouper loop terminated: %v", err) 138 w.tomb.Kill(err) 139 } 140 // Wait for the various goroutines to be killed. 141 // N.B. we don't defer this call because 142 // if we do and a bug causes a panic, Wait will deadlock 143 // waiting for the unkilled goroutines to exit. 144 w.wg.Wait() 145 }() 146 return w 147 } 148 149 func (w *pgWorker) Kill() { 150 w.tomb.Kill(nil) 151 } 152 153 func (w *pgWorker) Wait() error { 154 return w.tomb.Wait() 155 } 156 157 func (w *pgWorker) loop() error { 158 infow := w.watchStateServerInfo() 159 defer infow.stop() 160 161 retry := time.NewTimer(0) 162 retry.Stop() 163 retryInterval := initialRetryInterval 164 for { 165 select { 166 case f := <-w.notifyCh: 167 // Update our current view of the state of affairs. 168 changed, err := f() 169 if err != nil { 170 return err 171 } 172 if !changed { 173 break 174 } 175 // Try to update the replica set immediately. 176 retry.Reset(0) 177 case <-retry.C: 178 ok := true 179 servers, instanceIds, err := w.apiPublishInfo() 180 if err != nil { 181 return fmt.Errorf("cannot get API server info: %v", err) 182 } 183 if err := w.publisher.publishAPIServers(servers, instanceIds); err != nil { 184 logger.Errorf("cannot publish API server addresses: %v", err) 185 ok = false 186 } 187 if err := w.updateReplicaset(); err != nil { 188 if _, isReplicaSetError := err.(*replicaSetError); !isReplicaSetError { 189 return err 190 } 191 logger.Errorf("cannot set replicaset: %v", err) 192 ok = false 193 } 194 if ok { 195 // Update the replica set members occasionally 196 // to keep them up to date with the current 197 // replica set member statuses. 198 retry.Reset(pollInterval) 199 retryInterval = initialRetryInterval 200 } else { 201 retry.Reset(retryInterval) 202 retryInterval *= 2 203 if retryInterval > maxRetryInterval { 204 retryInterval = maxRetryInterval 205 } 206 } 207 208 case <-w.tomb.Dying(): 209 return tomb.ErrDying 210 } 211 } 212 } 213 214 func (w *pgWorker) apiPublishInfo() ([][]instance.HostPort, []instance.Id, error) { 215 servers := make([][]instance.HostPort, 0, len(w.machines)) 216 instanceIds := make([]instance.Id, 0, len(w.machines)) 217 for _, m := range w.machines { 218 if len(m.apiHostPorts) == 0 { 219 continue 220 } 221 instanceId, err := m.stm.InstanceId() 222 if err != nil { 223 return nil, nil, err 224 } 225 instanceIds = append(instanceIds, instanceId) 226 servers = append(servers, m.apiHostPorts) 227 228 } 229 return servers, instanceIds, nil 230 } 231 232 // notify sends the given notification function to 233 // the worker main loop to be executed. 234 func (w *pgWorker) notify(f notifyFunc) bool { 235 select { 236 case w.notifyCh <- f: 237 return true 238 case <-w.tomb.Dying(): 239 return false 240 } 241 } 242 243 // peerGroupInfo collates current session information about the 244 // mongo peer group with information from state machines. 245 func (w *pgWorker) peerGroupInfo() (*peerGroupInfo, error) { 246 session := w.st.MongoSession() 247 info := &peerGroupInfo{} 248 var err error 249 status, err := session.CurrentStatus() 250 if err != nil { 251 return nil, fmt.Errorf("cannot get replica set status: %v", err) 252 } 253 info.statuses = status.Members 254 info.members, err = session.CurrentMembers() 255 if err != nil { 256 return nil, fmt.Errorf("cannot get replica set members: %v", err) 257 } 258 info.machines = w.machines 259 return info, nil 260 } 261 262 // replicaSetError holds an error returned as a result 263 // of calling replicaset.Set. As this is expected to fail 264 // in the normal course of things, it needs special treatment. 265 type replicaSetError struct { 266 error 267 } 268 269 // updateReplicaset sets the current replica set members, and applies the 270 // given voting status to machines in the state. 271 func (w *pgWorker) updateReplicaset() error { 272 info, err := w.peerGroupInfo() 273 if err != nil { 274 return err 275 } 276 members, voting, err := desiredPeerGroup(info) 277 if err != nil { 278 return fmt.Errorf("cannot compute desired peer group: %v", err) 279 } 280 if members != nil { 281 logger.Debugf("desired peer group members: %#v", members) 282 } else { 283 logger.Debugf("no change in desired peer group (voting %#v)", voting) 284 } 285 286 // We cannot change the HasVote flag of a machine in state at exactly 287 // the same moment as changing its voting status in the replica set. 288 // 289 // Thus we need to be careful that a machine which is actually a voting 290 // member is not seen to not have a vote, because otherwise 291 // there is nothing to prevent the machine being removed. 292 // 293 // To avoid this happening, we make sure when we call SetReplicaSet, 294 // that the voting status of machines is the union of both old 295 // and new voting machines - that is the set of HasVote machines 296 // is a superset of all the actual voting machines. 297 // 298 // Only after the call has taken place do we reset the voting status 299 // of the machines that have lost their vote. 300 // 301 // If there's a crash, the voting status may not reflect the 302 // actual voting status for a while, but when things come 303 // back on line, it will be sorted out, as desiredReplicaSet 304 // will return the actual voting status. 305 // 306 // Note that we potentially update the HasVote status of the machines even 307 // if the members have not changed. 308 var added, removed []*machine 309 for m, hasVote := range voting { 310 switch { 311 case hasVote && !m.stm.HasVote(): 312 added = append(added, m) 313 case !hasVote && m.stm.HasVote(): 314 removed = append(removed, m) 315 } 316 } 317 if err := setHasVote(added, true); err != nil { 318 return err 319 } 320 if members != nil { 321 if err := w.st.MongoSession().Set(members); err != nil { 322 // We've failed to set the replica set, so revert back 323 // to the previous settings. 324 if err1 := setHasVote(added, false); err1 != nil { 325 logger.Errorf("cannot revert machine voting after failure to change replica set: %v", err1) 326 } 327 return &replicaSetError{err} 328 } 329 logger.Infof("successfully changed replica set to %#v", members) 330 } 331 if err := setHasVote(removed, false); err != nil { 332 return err 333 } 334 return nil 335 } 336 337 // start runs the given loop function until it returns. 338 // When it returns, the receiving pgWorker is killed with 339 // the returned error. 340 func (w *pgWorker) start(loop func() error) { 341 w.wg.Add(1) 342 go func() { 343 defer w.wg.Done() 344 if err := loop(); err != nil { 345 w.tomb.Kill(err) 346 } 347 }() 348 } 349 350 // setHasVote sets the HasVote status of all the given 351 // machines to hasVote. 352 func setHasVote(ms []*machine, hasVote bool) error { 353 if len(ms) == 0 { 354 return nil 355 } 356 logger.Infof("setting HasVote=%v on machines %v", hasVote, ms) 357 for _, m := range ms { 358 if err := m.stm.SetHasVote(hasVote); err != nil { 359 return fmt.Errorf("cannot set voting status of %q to %v: %v", m.id, hasVote, err) 360 } 361 } 362 return nil 363 } 364 365 // serverInfoWatcher watches the state server info and 366 // notifies the worker when it changes. 367 type serverInfoWatcher struct { 368 worker *pgWorker 369 watcher state.NotifyWatcher 370 } 371 372 func (w *pgWorker) watchStateServerInfo() *serverInfoWatcher { 373 infow := &serverInfoWatcher{ 374 worker: w, 375 watcher: w.st.WatchStateServerInfo(), 376 } 377 w.start(infow.loop) 378 return infow 379 } 380 381 func (infow *serverInfoWatcher) loop() error { 382 for { 383 select { 384 case _, ok := <-infow.watcher.Changes(): 385 if !ok { 386 return infow.watcher.Err() 387 } 388 infow.worker.notify(infow.updateMachines) 389 case <-infow.worker.tomb.Dying(): 390 return tomb.ErrDying 391 } 392 } 393 } 394 395 func (infow *serverInfoWatcher) stop() { 396 infow.watcher.Stop() 397 } 398 399 // updateMachines is a notifyFunc that updates the current 400 // machines when the state server info has changed. 401 func (infow *serverInfoWatcher) updateMachines() (bool, error) { 402 info, err := infow.worker.st.StateServerInfo() 403 if err != nil { 404 return false, fmt.Errorf("cannot get state server info: %v", err) 405 } 406 changed := false 407 // Stop machine goroutines that no longer correspond to state server 408 // machines. 409 for _, m := range infow.worker.machines { 410 if !inStrings(m.id, info.MachineIds) { 411 m.stop() 412 delete(infow.worker.machines, m.id) 413 changed = true 414 } 415 } 416 // Start machines with no watcher 417 for _, id := range info.MachineIds { 418 if _, ok := infow.worker.machines[id]; ok { 419 continue 420 } 421 logger.Debugf("found new machine %q", id) 422 stm, err := infow.worker.st.Machine(id) 423 if err != nil { 424 if errors.IsNotFound(err) { 425 // If the machine isn't found, it must have been 426 // removed and will soon enough be removed 427 // from the state server list. This will probably 428 // never happen, but we'll code defensively anyway. 429 logger.Warningf("machine %q from state server list not found", id) 430 continue 431 } 432 return false, fmt.Errorf("cannot get machine %q: %v", id, err) 433 } 434 infow.worker.machines[id] = infow.worker.newMachine(stm) 435 changed = true 436 } 437 return changed, nil 438 } 439 440 // machine represents a machine in State. 441 type machine struct { 442 id string 443 wantsVote bool 444 apiHostPorts []instance.HostPort 445 mongoHostPorts []instance.HostPort 446 447 worker *pgWorker 448 stm stateMachine 449 machineWatcher state.NotifyWatcher 450 } 451 452 func (m *machine) mongoHostPort() string { 453 return mongo.SelectPeerHostPort(m.mongoHostPorts) 454 } 455 456 func (m *machine) String() string { 457 return m.id 458 } 459 460 func (m *machine) GoString() string { 461 return fmt.Sprintf("&peergrouper.machine{id: %q, wantsVote: %v, hostPort: %q}", m.id, m.wantsVote, m.mongoHostPort()) 462 } 463 464 func (w *pgWorker) newMachine(stm stateMachine) *machine { 465 m := &machine{ 466 worker: w, 467 id: stm.Id(), 468 stm: stm, 469 apiHostPorts: stm.APIHostPorts(), 470 mongoHostPorts: stm.MongoHostPorts(), 471 wantsVote: stm.WantsVote(), 472 machineWatcher: stm.Watch(), 473 } 474 w.start(m.loop) 475 return m 476 } 477 478 func (m *machine) loop() error { 479 for { 480 select { 481 case _, ok := <-m.machineWatcher.Changes(): 482 if !ok { 483 return m.machineWatcher.Err() 484 } 485 m.worker.notify(m.refresh) 486 case <-m.worker.tomb.Dying(): 487 return nil 488 } 489 } 490 } 491 492 func (m *machine) stop() { 493 m.machineWatcher.Stop() 494 } 495 496 func (m *machine) refresh() (bool, error) { 497 if err := m.stm.Refresh(); err != nil { 498 if errors.IsNotFound(err) { 499 // We want to be robust when the machine 500 // state is out of date with respect to the 501 // state server info, so if the machine 502 // has been removed, just assume that 503 // no change has happened - the machine 504 // loop will be stopped very soon anyway. 505 return false, nil 506 } 507 return false, err 508 } 509 changed := false 510 if wantsVote := m.stm.WantsVote(); wantsVote != m.wantsVote { 511 m.wantsVote = wantsVote 512 changed = true 513 } 514 if hps := m.stm.MongoHostPorts(); !hostPortsEqual(hps, m.mongoHostPorts) { 515 m.mongoHostPorts = hps 516 changed = true 517 } 518 if hps := m.stm.APIHostPorts(); !hostPortsEqual(hps, m.apiHostPorts) { 519 m.apiHostPorts = hps 520 changed = true 521 } 522 return changed, nil 523 } 524 525 func hostPortsEqual(hps1, hps2 []instance.HostPort) bool { 526 if len(hps1) != len(hps2) { 527 return false 528 } 529 for i := range hps1 { 530 if hps1[i] != hps2[i] { 531 return false 532 } 533 } 534 return true 535 } 536 537 func inStrings(t string, ss []string) bool { 538 for _, s := range ss { 539 if s == t { 540 return true 541 } 542 } 543 return false 544 }