github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/worker/instancepoller/updater.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package instancepoller 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/juju/loggo" 11 "github.com/juju/names" 12 13 "github.com/juju/juju/apiserver/params" 14 "github.com/juju/juju/instance" 15 "github.com/juju/juju/network" 16 "github.com/juju/juju/state/watcher" 17 ) 18 19 var logger = loggo.GetLogger("juju.worker.instanceupdater") 20 21 // ShortPoll and LongPoll hold the polling intervals for the instance 22 // updater. When a machine has no address or is not started, it will be 23 // polled at ShortPoll intervals until it does, exponentially backing off 24 // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll. 25 // 26 // When a machine has an address and is started LongPoll will be used to 27 // check that the instance address or status has not changed. 28 var ( 29 ShortPoll = 1 * time.Second 30 ShortPollBackoff = 2.0 31 LongPoll = 15 * time.Minute 32 ) 33 34 type machine interface { 35 Id() string 36 Tag() names.MachineTag 37 InstanceId() (instance.Id, error) 38 ProviderAddresses() ([]network.Address, error) 39 SetProviderAddresses(...network.Address) error 40 InstanceStatus() (string, error) 41 SetInstanceStatus(status string) error 42 String() string 43 Refresh() error 44 Life() params.Life 45 Status() (params.StatusResult, error) 46 IsManual() (bool, error) 47 } 48 49 type instanceInfo struct { 50 addresses []network.Address 51 status string 52 } 53 54 type machineContext interface { 55 killAll(err error) 56 instanceInfo(id instance.Id) (instanceInfo, error) 57 dying() <-chan struct{} 58 } 59 60 type machineAddress struct { 61 machine machine 62 addresses []network.Address 63 } 64 65 type machinesWatcher interface { 66 Changes() <-chan []string 67 Err() error 68 Stop() error 69 } 70 71 type updaterContext interface { 72 newMachineContext() machineContext 73 getMachine(tag names.MachineTag) (machine, error) 74 dying() <-chan struct{} 75 } 76 77 type updater struct { 78 context updaterContext 79 machines map[names.MachineTag]chan struct{} 80 machineDead chan machine 81 } 82 83 // watchMachinesLoop watches for changes provided by the given 84 // machinesWatcher and starts machine goroutines to deal with them, 85 // using the provided newMachineContext function to create the 86 // appropriate context for each new machine tag. 87 func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) { 88 p := &updater{ 89 context: context, 90 machines: make(map[names.MachineTag]chan struct{}), 91 machineDead: make(chan machine), 92 } 93 defer func() { 94 if stopErr := w.Stop(); stopErr != nil { 95 if err == nil { 96 err = fmt.Errorf("error stopping watcher: %v", stopErr) 97 } else { 98 logger.Warningf("ignoring error when stopping watcher: %v", stopErr) 99 } 100 } 101 for len(p.machines) > 0 { 102 delete(p.machines, (<-p.machineDead).Tag()) 103 } 104 }() 105 for { 106 select { 107 case ids, ok := <-w.Changes(): 108 if !ok { 109 return watcher.EnsureErr(w) 110 } 111 tags := make([]names.MachineTag, len(ids)) 112 for i := range ids { 113 tags[i] = names.NewMachineTag(ids[i]) 114 } 115 if err := p.startMachines(tags); err != nil { 116 return err 117 } 118 case m := <-p.machineDead: 119 delete(p.machines, m.Tag()) 120 case <-p.context.dying(): 121 return nil 122 } 123 } 124 } 125 126 func (p *updater) startMachines(tags []names.MachineTag) error { 127 for _, tag := range tags { 128 if c := p.machines[tag]; c == nil { 129 // We don't know about the machine - start 130 // a goroutine to deal with it. 131 m, err := p.context.getMachine(tag) 132 if params.IsCodeNotFound(err) { 133 logger.Warningf("watcher gave notification of non-existent machine %q", tag.Id()) 134 continue 135 } 136 if err != nil { 137 return err 138 } 139 // We don't poll manual machines. 140 isManual, err := m.IsManual() 141 if err != nil { 142 return err 143 } 144 if isManual { 145 continue 146 } 147 c = make(chan struct{}) 148 p.machines[tag] = c 149 go runMachine(p.context.newMachineContext(), m, c, p.machineDead) 150 } else { 151 c <- struct{}{} 152 } 153 } 154 return nil 155 } 156 157 // runMachine processes the address and status publishing for a given machine. 158 // We assume that the machine is alive when this is first called. 159 func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) { 160 defer func() { 161 // We can't just send on the died channel because the 162 // central loop might be trying to write to us on the 163 // changed channel. 164 for { 165 select { 166 case died <- m: 167 return 168 case <-changed: 169 } 170 } 171 }() 172 if err := machineLoop(context, m, changed); err != nil { 173 context.killAll(err) 174 } 175 } 176 177 func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { 178 // Use a short poll interval when initially waiting for 179 // a machine's address and machine agent to start, and a long one when it already 180 // has an address and the machine agent is started. 181 pollInterval := ShortPoll 182 pollInstance := true 183 for { 184 if pollInstance { 185 instInfo, err := pollInstanceInfo(context, m) 186 if err != nil && !params.IsCodeNotProvisioned(err) { 187 // If the provider doesn't implement Addresses/Status now, 188 // it never will until we're upgraded, so don't bother 189 // asking any more. We could use less resources 190 // by taking down the entire worker, but this is easier for now 191 // (and hopefully the local provider will implement 192 // Addresses/Status in the not-too-distant future), 193 // so we won't need to worry about this case at all. 194 if params.IsCodeNotImplemented(err) { 195 pollInterval = 365 * 24 * time.Hour 196 } else { 197 return err 198 } 199 } 200 machineStatus := params.StatusPending 201 if err == nil { 202 if statusInfo, err := m.Status(); err != nil { 203 logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) 204 } else { 205 machineStatus = statusInfo.Status 206 } 207 } 208 if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted { 209 // We've got at least one address and a status and instance is started, so poll infrequently. 210 pollInterval = LongPoll 211 } else if pollInterval < LongPoll { 212 // We have no addresses or not started - poll increasingly rarely 213 // until we do. 214 pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) 215 } 216 pollInstance = false 217 } 218 select { 219 case <-time.After(pollInterval): 220 pollInstance = true 221 case <-context.dying(): 222 return nil 223 case <-changed: 224 if err := m.Refresh(); err != nil { 225 return err 226 } 227 if m.Life() == params.Dead { 228 return nil 229 } 230 } 231 } 232 } 233 234 // pollInstanceInfo checks the current provider addresses and status 235 // for the given machine's instance, and sets them on the machine if they've changed. 236 func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) { 237 instInfo = instanceInfo{} 238 instId, err := m.InstanceId() 239 // We can't ask the machine for its addresses if it isn't provisioned yet. 240 if params.IsCodeNotProvisioned(err) { 241 return instInfo, err 242 } 243 if err != nil { 244 return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err) 245 } 246 instInfo, err = context.instanceInfo(instId) 247 if err != nil { 248 if params.IsCodeNotImplemented(err) { 249 return instInfo, err 250 } 251 logger.Warningf("cannot get instance info for instance %q: %v", instId, err) 252 return instInfo, nil 253 } 254 currentInstStatus, err := m.InstanceStatus() 255 if err != nil { 256 // This should never occur since the machine is provisioned. 257 // But just in case, we reset polled status so we try again next time. 258 logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err) 259 instInfo.status = "" 260 } else { 261 if instInfo.status != currentInstStatus { 262 logger.Infof("machine %q instance status changed from %q to %q", m.Id(), currentInstStatus, instInfo.status) 263 if err = m.SetInstanceStatus(instInfo.status); err != nil { 264 logger.Errorf("cannot set instance status on %q: %v", m, err) 265 } 266 } 267 } 268 providerAddresses, err := m.ProviderAddresses() 269 if err != nil { 270 return instInfo, err 271 } 272 if !addressesEqual(providerAddresses, instInfo.addresses) { 273 logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses) 274 if err = m.SetProviderAddresses(instInfo.addresses...); err != nil { 275 logger.Errorf("cannot set addresses on %q: %v", m, err) 276 } 277 } 278 return instInfo, err 279 } 280 281 // addressesEqual compares the addresses of the machine and the instance information. 282 func addressesEqual(a0, a1 []network.Address) bool { 283 if len(a0) != len(a1) { 284 logger.Tracef("address lists have different lengths %d != %d for %v != %v", 285 len(a0), len(a1), a0, a1) 286 return false 287 } 288 289 ca0 := make([]network.Address, len(a0)) 290 copy(ca0, a0) 291 network.SortAddresses(ca0, true) 292 ca1 := make([]network.Address, len(a1)) 293 copy(ca1, a1) 294 network.SortAddresses(ca1, true) 295 296 for i := range ca0 { 297 if ca0[i] != ca1[i] { 298 logger.Tracef("address entry at offset %d has a different value for %v != %v", 299 i, ca0, ca1) 300 return false 301 } 302 } 303 return true 304 }