launchpad.net/~rogpeppe/juju-core/500-errgo-fix@v0.0.0-20140213181702-000000002356/worker/instancepoller/updater.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package instancepoller 5 6 import ( 7 "time" 8 9 "github.com/loggo/loggo" 10 11 errgo "launchpad.net/errgo/errors" 12 "launchpad.net/juju-core/errors" 13 "launchpad.net/juju-core/instance" 14 "launchpad.net/juju-core/state" 15 "launchpad.net/juju-core/state/api/params" 16 "launchpad.net/juju-core/state/watcher" 17 ) 18 19 var logger = loggo.GetLogger("juju.worker.instanceupdater") 20 21 var mask = errgo.Mask 22 23 // ShortPoll and LongPoll hold the polling intervals for the instance 24 // updater. When a machine has no address or is not started, it will be 25 // polled at ShortPoll intervals until it does, exponentially backing off 26 // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll. 27 // 28 // When a machine has an address and is started LongPoll will be used to 29 // check that the instance address or status has not changed. 30 var ( 31 ShortPoll = 1 * time.Second 32 ShortPollBackoff = 2.0 33 LongPoll = 15 * time.Minute 34 ) 35 36 type machine interface { 37 Id() string 38 InstanceId() (instance.Id, error) 39 Addresses() []instance.Address 40 SetAddresses([]instance.Address) error 41 InstanceStatus() (string, error) 42 SetInstanceStatus(status string) error 43 String() string 44 Refresh() error 45 Life() state.Life 46 Status() (status params.Status, info string, data params.StatusData, err error) 47 } 48 49 type instanceInfo struct { 50 addresses []instance.Address 51 status string 52 } 53 54 type machineContext interface { 55 killAll(err error) 56 instanceInfo(id instance.Id) (instanceInfo, error) 57 dying() <-chan struct{} 58 } 59 60 type machineAddress struct { 61 machine machine 62 addresses []instance.Address 63 } 64 65 var _ machine = (*state.Machine)(nil) 66 67 type machinesWatcher interface { 68 Changes() <-chan []string 69 Err() error 70 Stop() error 71 } 72 73 type updaterContext interface { 74 newMachineContext() machineContext 75 getMachine(id string) (machine, error) 76 dying() <-chan struct{} 77 } 78 79 type updater struct { 80 context updaterContext 81 machines map[string]chan struct{} 82 machineDead chan machine 83 } 84 85 // watchMachinesLoop watches for changes provided by the given 86 // machinesWatcher and starts machine goroutines to deal 87 // with them, using the provided newMachineContext 88 // function to create the appropriate context for each new machine id. 89 func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) { 90 p := &updater{ 91 context: context, 92 machines: make(map[string]chan struct{}), 93 machineDead: make(chan machine), 94 } 95 defer func() { 96 if stopErr := w.Stop(); stopErr != nil { 97 if err == nil { 98 err = errgo.Newf("error stopping watcher: %v", stopErr) 99 } else { 100 logger.Warningf("ignoring error when stopping watcher: %v", stopErr) 101 } 102 } 103 for len(p.machines) > 0 { 104 delete(p.machines, (<-p.machineDead).Id()) 105 } 106 }() 107 for { 108 select { 109 case ids, ok := <-w.Changes(): 110 if !ok { 111 return watcher.MustErr(w) 112 } 113 if err := p.startMachines(ids); err != nil { 114 return mask(err) 115 } 116 case m := <-p.machineDead: 117 delete(p.machines, m.Id()) 118 case <-p.context.dying(): 119 return nil 120 } 121 } 122 } 123 124 func (p *updater) startMachines(ids []string) error { 125 for _, id := range ids { 126 if c := p.machines[id]; c == nil { 127 // We don't know about the machine - start 128 // a goroutine to deal with it. 129 m, err := p.context.getMachine(id) 130 if errors.IsNotFoundError(err) { 131 logger.Warningf("watcher gave notification of non-existent machine %q", id) 132 continue 133 } 134 if err != nil { 135 return mask(err) 136 } 137 c = make(chan struct{}) 138 p.machines[id] = c 139 go runMachine(p.context.newMachineContext(), m, c, p.machineDead) 140 } else { 141 c <- struct{}{} 142 } 143 } 144 return nil 145 } 146 147 // runMachine processes the address and status publishing for a given machine. 148 // We assume that the machine is alive when this is first called. 149 func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) { 150 defer func() { 151 // We can't just send on the died channel because the 152 // central loop might be trying to write to us on the 153 // changed channel. 154 for { 155 select { 156 case died <- m: 157 return 158 case <-changed: 159 } 160 } 161 }() 162 if err := machineLoop(context, m, changed); err != nil { 163 context.killAll(err) 164 } 165 } 166 167 func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { 168 // Use a short poll interval when initially waiting for 169 // a machine's address and machine agent to start, and a long one when it already 170 // has an address and the machine agent is started. 171 pollInterval := ShortPoll 172 pollInstance := true 173 for { 174 if pollInstance { 175 instInfo, err := pollInstanceInfo(context, m) 176 if err != nil { 177 // If the provider doesn't implement Addresses/Status now, 178 // it never will until we're upgraded, so don't bother 179 // asking any more. We could use less resources 180 // by taking down the entire worker, but this is easier for now 181 // (and hopefully the local provider will implement 182 // Addresses/Status in the not-too-distant future), 183 // so we won't need to worry about this case at all. 184 if errors.IsNotImplementedError(err) { 185 pollInterval = 365 * 24 * time.Hour 186 } else { 187 return err 188 } 189 } 190 machineStatus, _, _, err := m.Status() 191 if err != nil { 192 logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) 193 } 194 if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted { 195 // We've got at least one address and a status and instance is started, so poll infrequently. 196 pollInterval = LongPoll 197 } else if pollInterval < LongPoll { 198 // We have no addresses or not started - poll increasingly rarely 199 // until we do. 200 pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) 201 } 202 pollInstance = false 203 } 204 select { 205 case <-time.After(pollInterval): 206 pollInstance = true 207 case <-context.dying(): 208 return nil 209 case <-changed: 210 if err := m.Refresh(); err != nil { 211 return mask(err) 212 } 213 if m.Life() == state.Dead { 214 return nil 215 } 216 } 217 } 218 } 219 220 // pollInstanceInfo checks the current provider addresses and status 221 // for the given machine's instance, and sets them on the machine if they've changed. 222 func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) { 223 instInfo = instanceInfo{} 224 instId, err := m.InstanceId() 225 if err != nil && !state.IsNotProvisionedError(err) { 226 return instInfo, errgo.Notef(err, "cannot get machine's instance id") 227 } 228 instInfo, err = context.instanceInfo(instId) 229 if err != nil { 230 if errors.IsNotImplementedError(err) { 231 return instInfo, err 232 } 233 logger.Warningf("cannot get instance info for instance %q: %v", instId, err) 234 return instInfo, nil 235 } 236 currentInstStatus, err := m.InstanceStatus() 237 if err != nil { 238 // This should never occur since the machine is provisioned. 239 // But just in case, we reset polled status so we try again next time. 240 logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err) 241 instInfo.status = "" 242 } else { 243 if instInfo.status != currentInstStatus { 244 logger.Infof("machine %q has new instance status: %v", m.Id(), instInfo.status) 245 if err = m.SetInstanceStatus(instInfo.status); err != nil { 246 logger.Errorf("cannot set instance status on %q: %v", m, err) 247 } 248 } 249 } 250 if !addressesEqual(m.Addresses(), instInfo.addresses) { 251 logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses) 252 if err = m.SetAddresses(instInfo.addresses); err != nil { 253 logger.Errorf("cannot set addresses on %q: %v", m, err) 254 } 255 } 256 return instInfo, err 257 } 258 259 func addressesEqual(a0, a1 []instance.Address) bool { 260 if len(a0) != len(a1) { 261 return false 262 } 263 for i := range a0 { 264 if a0[i] != a1[i] { 265 return false 266 } 267 } 268 return true 269 }