github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/worker/instancepoller/updater.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package instancepoller 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 13 "github.com/juju/juju/instance" 14 "github.com/juju/juju/state" 15 "github.com/juju/juju/state/api/params" 16 "github.com/juju/juju/state/watcher" 17 ) 18 19 var logger = loggo.GetLogger("juju.worker.instanceupdater") 20 21 // ShortPoll and LongPoll hold the polling intervals for the instance 22 // updater. When a machine has no address or is not started, it will be 23 // polled at ShortPoll intervals until it does, exponentially backing off 24 // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll. 25 // 26 // When a machine has an address and is started LongPoll will be used to 27 // check that the instance address or status has not changed. 28 var ( 29 ShortPoll = 1 * time.Second 30 ShortPollBackoff = 2.0 31 LongPoll = 15 * time.Minute 32 ) 33 34 type machine interface { 35 Id() string 36 InstanceId() (instance.Id, error) 37 Addresses() []instance.Address 38 SetAddresses(...instance.Address) error 39 InstanceStatus() (string, error) 40 SetInstanceStatus(status string) error 41 String() string 42 Refresh() error 43 Life() state.Life 44 Status() (status params.Status, info string, data params.StatusData, err error) 45 IsManual() (bool, error) 46 } 47 48 type instanceInfo struct { 49 addresses []instance.Address 50 status string 51 } 52 53 type machineContext interface { 54 killAll(err error) 55 instanceInfo(id instance.Id) (instanceInfo, error) 56 dying() <-chan struct{} 57 } 58 59 type machineAddress struct { 60 machine machine 61 addresses []instance.Address 62 } 63 64 var _ machine = (*state.Machine)(nil) 65 66 type machinesWatcher interface { 67 Changes() <-chan []string 68 Err() error 69 Stop() error 70 } 71 72 type updaterContext interface { 73 newMachineContext() machineContext 74 getMachine(id string) (machine, error) 75 dying() <-chan struct{} 76 } 77 78 type updater struct { 79 context updaterContext 80 machines map[string]chan struct{} 81 machineDead chan machine 82 } 83 84 // watchMachinesLoop watches for changes provided by the given 85 // machinesWatcher and starts machine goroutines to deal 86 // with them, using the provided newMachineContext 87 // function to create the appropriate context for each new machine id. 88 func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) { 89 p := &updater{ 90 context: context, 91 machines: make(map[string]chan struct{}), 92 machineDead: make(chan machine), 93 } 94 defer func() { 95 if stopErr := w.Stop(); stopErr != nil { 96 if err == nil { 97 err = fmt.Errorf("error stopping watcher: %v", stopErr) 98 } else { 99 logger.Warningf("ignoring error when stopping watcher: %v", stopErr) 100 } 101 } 102 for len(p.machines) > 0 { 103 delete(p.machines, (<-p.machineDead).Id()) 104 } 105 }() 106 for { 107 select { 108 case ids, ok := <-w.Changes(): 109 if !ok { 110 return watcher.MustErr(w) 111 } 112 if err := p.startMachines(ids); err != nil { 113 return err 114 } 115 case m := <-p.machineDead: 116 delete(p.machines, m.Id()) 117 case <-p.context.dying(): 118 return nil 119 } 120 } 121 } 122 123 func (p *updater) startMachines(ids []string) error { 124 for _, id := range ids { 125 if c := p.machines[id]; c == nil { 126 // We don't know about the machine - start 127 // a goroutine to deal with it. 128 m, err := p.context.getMachine(id) 129 if errors.IsNotFound(err) { 130 logger.Warningf("watcher gave notification of non-existent machine %q", id) 131 continue 132 } 133 if err != nil { 134 return err 135 } 136 // We don't poll manual machines. 137 isManual, err := m.IsManual() 138 if err != nil { 139 return err 140 } 141 if isManual { 142 continue 143 } 144 c = make(chan struct{}) 145 p.machines[id] = c 146 go runMachine(p.context.newMachineContext(), m, c, p.machineDead) 147 } else { 148 c <- struct{}{} 149 } 150 } 151 return nil 152 } 153 154 // runMachine processes the address and status publishing for a given machine. 155 // We assume that the machine is alive when this is first called. 156 func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) { 157 defer func() { 158 // We can't just send on the died channel because the 159 // central loop might be trying to write to us on the 160 // changed channel. 161 for { 162 select { 163 case died <- m: 164 return 165 case <-changed: 166 } 167 } 168 }() 169 if err := machineLoop(context, m, changed); err != nil { 170 context.killAll(err) 171 } 172 } 173 174 func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { 175 // Use a short poll interval when initially waiting for 176 // a machine's address and machine agent to start, and a long one when it already 177 // has an address and the machine agent is started. 178 pollInterval := ShortPoll 179 pollInstance := true 180 for { 181 if pollInstance { 182 instInfo, err := pollInstanceInfo(context, m) 183 if err != nil && !state.IsNotProvisionedError(err) { 184 // If the provider doesn't implement Addresses/Status now, 185 // it never will until we're upgraded, so don't bother 186 // asking any more. We could use less resources 187 // by taking down the entire worker, but this is easier for now 188 // (and hopefully the local provider will implement 189 // Addresses/Status in the not-too-distant future), 190 // so we won't need to worry about this case at all. 191 if errors.IsNotImplemented(err) { 192 pollInterval = 365 * 24 * time.Hour 193 } else { 194 return err 195 } 196 } 197 machineStatus := params.StatusPending 198 if err == nil { 199 if machineStatus, _, _, err = m.Status(); err != nil { 200 logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) 201 } 202 } 203 if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted { 204 // We've got at least one address and a status and instance is started, so poll infrequently. 205 pollInterval = LongPoll 206 } else if pollInterval < LongPoll { 207 // We have no addresses or not started - poll increasingly rarely 208 // until we do. 209 pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) 210 } 211 pollInstance = false 212 } 213 select { 214 case <-time.After(pollInterval): 215 pollInstance = true 216 case <-context.dying(): 217 return nil 218 case <-changed: 219 if err := m.Refresh(); err != nil { 220 return err 221 } 222 if m.Life() == state.Dead { 223 return nil 224 } 225 } 226 } 227 } 228 229 // pollInstanceInfo checks the current provider addresses and status 230 // for the given machine's instance, and sets them on the machine if they've changed. 231 func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) { 232 instInfo = instanceInfo{} 233 instId, err := m.InstanceId() 234 // We can't ask the machine for its addresses if it isn't provisioned yet. 235 if state.IsNotProvisionedError(err) { 236 return instInfo, err 237 } 238 if err != nil { 239 return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err) 240 } 241 instInfo, err = context.instanceInfo(instId) 242 if err != nil { 243 if errors.IsNotImplemented(err) { 244 return instInfo, err 245 } 246 logger.Warningf("cannot get instance info for instance %q: %v", instId, err) 247 return instInfo, nil 248 } 249 currentInstStatus, err := m.InstanceStatus() 250 if err != nil { 251 // This should never occur since the machine is provisioned. 252 // But just in case, we reset polled status so we try again next time. 253 logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err) 254 instInfo.status = "" 255 } else { 256 if instInfo.status != currentInstStatus { 257 logger.Infof("machine %q has new instance status: %v", m.Id(), instInfo.status) 258 if err = m.SetInstanceStatus(instInfo.status); err != nil { 259 logger.Errorf("cannot set instance status on %q: %v", m, err) 260 } 261 } 262 } 263 if !addressesEqual(m.Addresses(), instInfo.addresses) { 264 logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses) 265 if err = m.SetAddresses(instInfo.addresses...); err != nil { 266 logger.Errorf("cannot set addresses on %q: %v", m, err) 267 } 268 } 269 return instInfo, err 270 } 271 272 func addressesEqual(a0, a1 []instance.Address) bool { 273 if len(a0) != len(a1) { 274 return false 275 } 276 for i := range a0 { 277 if a0[i] != a1[i] { 278 return false 279 } 280 } 281 return true 282 }