github.com/rogpeppe/juju@v0.0.0-20140613142852-6337964b789e/worker/instancepoller/updater.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package instancepoller 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 13 "github.com/juju/juju/instance" 14 "github.com/juju/juju/network" 15 "github.com/juju/juju/state" 16 "github.com/juju/juju/state/api/params" 17 "github.com/juju/juju/state/watcher" 18 ) 19 20 var logger = loggo.GetLogger("juju.worker.instanceupdater") 21 22 // ShortPoll and LongPoll hold the polling intervals for the instance 23 // updater. When a machine has no address or is not started, it will be 24 // polled at ShortPoll intervals until it does, exponentially backing off 25 // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll. 26 // 27 // When a machine has an address and is started LongPoll will be used to 28 // check that the instance address or status has not changed. 29 var ( 30 ShortPoll = 1 * time.Second 31 ShortPollBackoff = 2.0 32 LongPoll = 15 * time.Minute 33 ) 34 35 type machine interface { 36 Id() string 37 InstanceId() (instance.Id, error) 38 Addresses() []network.Address 39 SetAddresses(...network.Address) error 40 InstanceStatus() (string, error) 41 SetInstanceStatus(status string) error 42 String() string 43 Refresh() error 44 Life() state.Life 45 Status() (status params.Status, info string, data params.StatusData, err error) 46 IsManual() (bool, error) 47 } 48 49 type instanceInfo struct { 50 addresses []network.Address 51 status string 52 } 53 54 type machineContext interface { 55 killAll(err error) 56 instanceInfo(id instance.Id) (instanceInfo, error) 57 dying() <-chan struct{} 58 } 59 60 type machineAddress struct { 61 machine machine 62 addresses []network.Address 63 } 64 65 var _ machine = (*state.Machine)(nil) 66 67 type machinesWatcher interface { 68 Changes() <-chan []string 69 Err() error 70 Stop() error 71 } 72 73 type updaterContext interface { 74 newMachineContext() machineContext 75 getMachine(id string) (machine, error) 76 dying() <-chan struct{} 77 } 78 79 type updater struct { 80 context updaterContext 81 machines map[string]chan struct{} 82 machineDead chan machine 83 } 84 85 // watchMachinesLoop watches for changes provided by the given 86 // machinesWatcher and starts machine goroutines to deal 87 // with them, using the provided newMachineContext 88 // function to create the appropriate context for each new machine id. 89 func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) { 90 p := &updater{ 91 context: context, 92 machines: make(map[string]chan struct{}), 93 machineDead: make(chan machine), 94 } 95 defer func() { 96 if stopErr := w.Stop(); stopErr != nil { 97 if err == nil { 98 err = fmt.Errorf("error stopping watcher: %v", stopErr) 99 } else { 100 logger.Warningf("ignoring error when stopping watcher: %v", stopErr) 101 } 102 } 103 for len(p.machines) > 0 { 104 delete(p.machines, (<-p.machineDead).Id()) 105 } 106 }() 107 for { 108 select { 109 case ids, ok := <-w.Changes(): 110 if !ok { 111 return watcher.MustErr(w) 112 } 113 if err := p.startMachines(ids); err != nil { 114 return err 115 } 116 case m := <-p.machineDead: 117 delete(p.machines, m.Id()) 118 case <-p.context.dying(): 119 return nil 120 } 121 } 122 } 123 124 func (p *updater) startMachines(ids []string) error { 125 for _, id := range ids { 126 if c := p.machines[id]; c == nil { 127 // We don't know about the machine - start 128 // a goroutine to deal with it. 129 m, err := p.context.getMachine(id) 130 if errors.IsNotFound(err) { 131 logger.Warningf("watcher gave notification of non-existent machine %q", id) 132 continue 133 } 134 if err != nil { 135 return err 136 } 137 // We don't poll manual machines. 138 isManual, err := m.IsManual() 139 if err != nil { 140 return err 141 } 142 if isManual { 143 continue 144 } 145 c = make(chan struct{}) 146 p.machines[id] = c 147 go runMachine(p.context.newMachineContext(), m, c, p.machineDead) 148 } else { 149 c <- struct{}{} 150 } 151 } 152 return nil 153 } 154 155 // runMachine processes the address and status publishing for a given machine. 156 // We assume that the machine is alive when this is first called. 157 func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) { 158 defer func() { 159 // We can't just send on the died channel because the 160 // central loop might be trying to write to us on the 161 // changed channel. 162 for { 163 select { 164 case died <- m: 165 return 166 case <-changed: 167 } 168 } 169 }() 170 if err := machineLoop(context, m, changed); err != nil { 171 context.killAll(err) 172 } 173 } 174 175 func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { 176 // Use a short poll interval when initially waiting for 177 // a machine's address and machine agent to start, and a long one when it already 178 // has an address and the machine agent is started. 179 pollInterval := ShortPoll 180 pollInstance := true 181 for { 182 if pollInstance { 183 instInfo, err := pollInstanceInfo(context, m) 184 if err != nil && !state.IsNotProvisionedError(err) { 185 // If the provider doesn't implement Addresses/Status now, 186 // it never will until we're upgraded, so don't bother 187 // asking any more. We could use less resources 188 // by taking down the entire worker, but this is easier for now 189 // (and hopefully the local provider will implement 190 // Addresses/Status in the not-too-distant future), 191 // so we won't need to worry about this case at all. 192 if errors.IsNotImplemented(err) { 193 pollInterval = 365 * 24 * time.Hour 194 } else { 195 return err 196 } 197 } 198 machineStatus := params.StatusPending 199 if err == nil { 200 if machineStatus, _, _, err = m.Status(); err != nil { 201 logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) 202 } 203 } 204 if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted { 205 // We've got at least one address and a status and instance is started, so poll infrequently. 206 pollInterval = LongPoll 207 } else if pollInterval < LongPoll { 208 // We have no addresses or not started - poll increasingly rarely 209 // until we do. 210 pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) 211 } 212 pollInstance = false 213 } 214 select { 215 case <-time.After(pollInterval): 216 pollInstance = true 217 case <-context.dying(): 218 return nil 219 case <-changed: 220 if err := m.Refresh(); err != nil { 221 return err 222 } 223 if m.Life() == state.Dead { 224 return nil 225 } 226 } 227 } 228 } 229 230 // pollInstanceInfo checks the current provider addresses and status 231 // for the given machine's instance, and sets them on the machine if they've changed. 232 func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) { 233 instInfo = instanceInfo{} 234 instId, err := m.InstanceId() 235 // We can't ask the machine for its addresses if it isn't provisioned yet. 236 if state.IsNotProvisionedError(err) { 237 return instInfo, err 238 } 239 if err != nil { 240 return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err) 241 } 242 instInfo, err = context.instanceInfo(instId) 243 if err != nil { 244 if errors.IsNotImplemented(err) { 245 return instInfo, err 246 } 247 logger.Warningf("cannot get instance info for instance %q: %v", instId, err) 248 return instInfo, nil 249 } 250 currentInstStatus, err := m.InstanceStatus() 251 if err != nil { 252 // This should never occur since the machine is provisioned. 253 // But just in case, we reset polled status so we try again next time. 254 logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err) 255 instInfo.status = "" 256 } else { 257 if instInfo.status != currentInstStatus { 258 logger.Infof("machine %q has new instance status: %v", m.Id(), instInfo.status) 259 if err = m.SetInstanceStatus(instInfo.status); err != nil { 260 logger.Errorf("cannot set instance status on %q: %v", m, err) 261 } 262 } 263 } 264 if !addressesEqual(m.Addresses(), instInfo.addresses) { 265 logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses) 266 if err = m.SetAddresses(instInfo.addresses...); err != nil { 267 logger.Errorf("cannot set addresses on %q: %v", m, err) 268 } 269 } 270 return instInfo, err 271 } 272 273 func addressesEqual(a0, a1 []network.Address) bool { 274 if len(a0) != len(a1) { 275 return false 276 } 277 for i := range a0 { 278 if a0[i] != a1[i] { 279 return false 280 } 281 } 282 return true 283 }