github.com/altoros/juju-vmware@v0.0.0-20150312064031-f19ae857ccca/worker/instancepoller/updater.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package instancepoller 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 13 "github.com/juju/juju/instance" 14 "github.com/juju/juju/network" 15 "github.com/juju/juju/state" 16 "github.com/juju/juju/state/watcher" 17 ) 18 19 var logger = loggo.GetLogger("juju.worker.instanceupdater") 20 21 // ShortPoll and LongPoll hold the polling intervals for the instance 22 // updater. When a machine has no address or is not started, it will be 23 // polled at ShortPoll intervals until it does, exponentially backing off 24 // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll. 25 // 26 // When a machine has an address and is started LongPoll will be used to 27 // check that the instance address or status has not changed. 28 var ( 29 ShortPoll = 1 * time.Second 30 ShortPollBackoff = 2.0 31 LongPoll = 15 * time.Minute 32 ) 33 34 type machine interface { 35 Id() string 36 InstanceId() (instance.Id, error) 37 Addresses() []network.Address 38 SetAddresses(...network.Address) error 39 InstanceStatus() (string, error) 40 SetInstanceStatus(status string) error 41 String() string 42 Refresh() error 43 Life() state.Life 44 Status() (status state.Status, info string, data map[string]interface{}, err error) 45 IsManual() (bool, error) 46 } 47 48 type instanceInfo struct { 49 addresses []network.Address 50 status string 51 } 52 53 type machineContext interface { 54 killAll(err error) 55 instanceInfo(id instance.Id) (instanceInfo, error) 56 dying() <-chan struct{} 57 } 58 59 type machineAddress struct { 60 machine machine 61 addresses []network.Address 62 } 63 64 var _ machine = (*state.Machine)(nil) 65 66 type machinesWatcher interface { 67 Changes() <-chan []string 68 Err() error 69 Stop() error 70 } 71 72 type updaterContext interface { 73 newMachineContext() machineContext 74 getMachine(id string) (machine, error) 75 dying() <-chan struct{} 76 } 77 78 type updater struct { 79 context updaterContext 80 machines map[string]chan struct{} 81 machineDead chan machine 82 } 83 84 // watchMachinesLoop watches for changes provided by the given 85 // machinesWatcher and starts machine goroutines to deal 86 // with them, using the provided newMachineContext 87 // function to create the appropriate context for each new machine id. 88 func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) { 89 p := &updater{ 90 context: context, 91 machines: make(map[string]chan struct{}), 92 machineDead: make(chan machine), 93 } 94 defer func() { 95 if stopErr := w.Stop(); stopErr != nil { 96 if err == nil { 97 err = fmt.Errorf("error stopping watcher: %v", stopErr) 98 } else { 99 logger.Warningf("ignoring error when stopping watcher: %v", stopErr) 100 } 101 } 102 for len(p.machines) > 0 { 103 delete(p.machines, (<-p.machineDead).Id()) 104 } 105 }() 106 for { 107 select { 108 case ids, ok := <-w.Changes(): 109 if !ok { 110 return watcher.EnsureErr(w) 111 } 112 if err := p.startMachines(ids); err != nil { 113 return err 114 } 115 case m := <-p.machineDead: 116 delete(p.machines, m.Id()) 117 case <-p.context.dying(): 118 return nil 119 } 120 } 121 } 122 123 func (p *updater) startMachines(ids []string) error { 124 for _, id := range ids { 125 if c := p.machines[id]; c == nil { 126 // We don't know about the machine - start 127 // a goroutine to deal with it. 128 m, err := p.context.getMachine(id) 129 if errors.IsNotFound(err) { 130 logger.Warningf("watcher gave notification of non-existent machine %q", id) 131 continue 132 } 133 if err != nil { 134 return err 135 } 136 // We don't poll manual machines. 137 isManual, err := m.IsManual() 138 if err != nil { 139 return err 140 } 141 if isManual { 142 continue 143 } 144 c = make(chan struct{}) 145 p.machines[id] = c 146 go runMachine(p.context.newMachineContext(), m, c, p.machineDead) 147 } else { 148 c <- struct{}{} 149 } 150 } 151 return nil 152 } 153 154 // runMachine processes the address and status publishing for a given machine. 155 // We assume that the machine is alive when this is first called. 156 func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) { 157 defer func() { 158 // We can't just send on the died channel because the 159 // central loop might be trying to write to us on the 160 // changed channel. 161 for { 162 select { 163 case died <- m: 164 return 165 case <-changed: 166 } 167 } 168 }() 169 if err := machineLoop(context, m, changed); err != nil { 170 context.killAll(err) 171 } 172 } 173 174 func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { 175 // Use a short poll interval when initially waiting for 176 // a machine's address and machine agent to start, and a long one when it already 177 // has an address and the machine agent is started. 178 pollInterval := ShortPoll 179 pollInstance := true 180 for { 181 if pollInstance { 182 instInfo, err := pollInstanceInfo(context, m) 183 if err != nil && !errors.IsNotProvisioned(err) { 184 // If the provider doesn't implement Addresses/Status now, 185 // it never will until we're upgraded, so don't bother 186 // asking any more. We could use less resources 187 // by taking down the entire worker, but this is easier for now 188 // (and hopefully the local provider will implement 189 // Addresses/Status in the not-too-distant future), 190 // so we won't need to worry about this case at all. 191 if errors.IsNotImplemented(err) { 192 pollInterval = 365 * 24 * time.Hour 193 } else { 194 return err 195 } 196 } 197 machineStatus := state.StatusPending 198 if err == nil { 199 if machineStatus, _, _, err = m.Status(); err != nil { 200 logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) 201 } 202 } 203 if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == state.StatusStarted { 204 // We've got at least one address and a status and instance is started, so poll infrequently. 205 pollInterval = LongPoll 206 } else if pollInterval < LongPoll { 207 // We have no addresses or not started - poll increasingly rarely 208 // until we do. 209 pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) 210 } 211 pollInstance = false 212 } 213 select { 214 case <-time.After(pollInterval): 215 pollInstance = true 216 case <-context.dying(): 217 return nil 218 case <-changed: 219 if err := m.Refresh(); err != nil { 220 return err 221 } 222 if m.Life() == state.Dead { 223 return nil 224 } 225 } 226 } 227 } 228 229 // pollInstanceInfo checks the current provider addresses and status 230 // for the given machine's instance, and sets them on the machine if they've changed. 231 func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) { 232 instInfo = instanceInfo{} 233 instId, err := m.InstanceId() 234 // We can't ask the machine for its addresses if it isn't provisioned yet. 235 if errors.IsNotProvisioned(err) { 236 return instInfo, err 237 } 238 if err != nil { 239 return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err) 240 } 241 instInfo, err = context.instanceInfo(instId) 242 if err != nil { 243 if errors.IsNotImplemented(err) { 244 return instInfo, err 245 } 246 logger.Warningf("cannot get instance info for instance %q: %v", instId, err) 247 return instInfo, nil 248 } 249 currentInstStatus, err := m.InstanceStatus() 250 if err != nil { 251 // This should never occur since the machine is provisioned. 252 // But just in case, we reset polled status so we try again next time. 253 logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err) 254 instInfo.status = "" 255 } else { 256 if instInfo.status != currentInstStatus { 257 logger.Infof("machine %q instance status changed from %q to %q", m.Id(), currentInstStatus, instInfo.status) 258 if err = m.SetInstanceStatus(instInfo.status); err != nil { 259 logger.Errorf("cannot set instance status on %q: %v", m, err) 260 } 261 } 262 } 263 if !addressesEqual(m.Addresses(), instInfo.addresses) { 264 logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses) 265 if err = m.SetAddresses(instInfo.addresses...); err != nil { 266 logger.Errorf("cannot set addresses on %q: %v", m, err) 267 } 268 } 269 return instInfo, err 270 } 271 272 // addressesEqual compares the addresses of the machine and the instance information. 273 func addressesEqual(a0, a1 []network.Address) bool { 274 if len(a0) != len(a1) { 275 logger.Tracef("address lists have different lengths %d != %d for %v != %v", 276 len(a0), len(a1), a0, a1) 277 return false 278 } 279 280 ca0 := make([]network.Address, len(a0)) 281 copy(ca0, a0) 282 network.SortAddresses(ca0, true) 283 ca1 := make([]network.Address, len(a1)) 284 copy(ca1, a1) 285 network.SortAddresses(ca1, true) 286 287 for i := range ca0 { 288 if ca0[i] != ca1[i] { 289 logger.Tracef("address entry at offset %d has a different value for %v != %v", 290 i, ca0, ca1) 291 return false 292 } 293 } 294 return true 295 }