github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/worker/instancepoller/updater.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package instancepoller 5 6 import ( 7 "fmt" 8 "time" 9 10 "github.com/juju/loggo" 11 12 "launchpad.net/juju-core/errors" 13 "launchpad.net/juju-core/instance" 14 "launchpad.net/juju-core/state" 15 "launchpad.net/juju-core/state/api/params" 16 "launchpad.net/juju-core/state/watcher" 17 ) 18 19 var logger = loggo.GetLogger("juju.worker.instanceupdater") 20 21 // ShortPoll and LongPoll hold the polling intervals for the instance 22 // updater. When a machine has no address or is not started, it will be 23 // polled at ShortPoll intervals until it does, exponentially backing off 24 // with an exponent of ShortPollBackoff until a maximum(ish) of LongPoll. 25 // 26 // When a machine has an address and is started LongPoll will be used to 27 // check that the instance address or status has not changed. 28 var ( 29 ShortPoll = 1 * time.Second 30 ShortPollBackoff = 2.0 31 LongPoll = 15 * time.Minute 32 ) 33 34 type machine interface { 35 Id() string 36 InstanceId() (instance.Id, error) 37 Addresses() []instance.Address 38 SetAddresses([]instance.Address) error 39 InstanceStatus() (string, error) 40 SetInstanceStatus(status string) error 41 String() string 42 Refresh() error 43 Life() state.Life 44 Status() (status params.Status, info string, data params.StatusData, err error) 45 } 46 47 type instanceInfo struct { 48 addresses []instance.Address 49 status string 50 } 51 52 type machineContext interface { 53 killAll(err error) 54 instanceInfo(id instance.Id) (instanceInfo, error) 55 dying() <-chan struct{} 56 } 57 58 type machineAddress struct { 59 machine machine 60 addresses []instance.Address 61 } 62 63 var _ machine = (*state.Machine)(nil) 64 65 type machinesWatcher interface { 66 Changes() <-chan []string 67 Err() error 68 Stop() error 69 } 70 71 type updaterContext interface { 72 newMachineContext() machineContext 73 getMachine(id string) (machine, error) 74 dying() <-chan struct{} 75 } 76 77 type updater struct { 78 context updaterContext 79 machines map[string]chan struct{} 80 machineDead chan machine 81 } 82 83 // watchMachinesLoop watches for changes provided by the given 84 // machinesWatcher and starts machine goroutines to deal 85 // with them, using the provided newMachineContext 86 // function to create the appropriate context for each new machine id. 87 func watchMachinesLoop(context updaterContext, w machinesWatcher) (err error) { 88 p := &updater{ 89 context: context, 90 machines: make(map[string]chan struct{}), 91 machineDead: make(chan machine), 92 } 93 defer func() { 94 if stopErr := w.Stop(); stopErr != nil { 95 if err == nil { 96 err = fmt.Errorf("error stopping watcher: %v", stopErr) 97 } else { 98 logger.Warningf("ignoring error when stopping watcher: %v", stopErr) 99 } 100 } 101 for len(p.machines) > 0 { 102 delete(p.machines, (<-p.machineDead).Id()) 103 } 104 }() 105 for { 106 select { 107 case ids, ok := <-w.Changes(): 108 if !ok { 109 return watcher.MustErr(w) 110 } 111 if err := p.startMachines(ids); err != nil { 112 return err 113 } 114 case m := <-p.machineDead: 115 delete(p.machines, m.Id()) 116 case <-p.context.dying(): 117 return nil 118 } 119 } 120 } 121 122 func (p *updater) startMachines(ids []string) error { 123 for _, id := range ids { 124 if c := p.machines[id]; c == nil { 125 // We don't know about the machine - start 126 // a goroutine to deal with it. 127 m, err := p.context.getMachine(id) 128 if errors.IsNotFoundError(err) { 129 logger.Warningf("watcher gave notification of non-existent machine %q", id) 130 continue 131 } 132 if err != nil { 133 return err 134 } 135 c = make(chan struct{}) 136 p.machines[id] = c 137 go runMachine(p.context.newMachineContext(), m, c, p.machineDead) 138 } else { 139 c <- struct{}{} 140 } 141 } 142 return nil 143 } 144 145 // runMachine processes the address and status publishing for a given machine. 146 // We assume that the machine is alive when this is first called. 147 func runMachine(context machineContext, m machine, changed <-chan struct{}, died chan<- machine) { 148 defer func() { 149 // We can't just send on the died channel because the 150 // central loop might be trying to write to us on the 151 // changed channel. 152 for { 153 select { 154 case died <- m: 155 return 156 case <-changed: 157 } 158 } 159 }() 160 if err := machineLoop(context, m, changed); err != nil { 161 context.killAll(err) 162 } 163 } 164 165 func machineLoop(context machineContext, m machine, changed <-chan struct{}) error { 166 // Use a short poll interval when initially waiting for 167 // a machine's address and machine agent to start, and a long one when it already 168 // has an address and the machine agent is started. 169 pollInterval := ShortPoll 170 pollInstance := true 171 for { 172 if pollInstance { 173 instInfo, err := pollInstanceInfo(context, m) 174 if err != nil { 175 // If the provider doesn't implement Addresses/Status now, 176 // it never will until we're upgraded, so don't bother 177 // asking any more. We could use less resources 178 // by taking down the entire worker, but this is easier for now 179 // (and hopefully the local provider will implement 180 // Addresses/Status in the not-too-distant future), 181 // so we won't need to worry about this case at all. 182 if errors.IsNotImplementedError(err) { 183 pollInterval = 365 * 24 * time.Hour 184 } else { 185 return err 186 } 187 } 188 machineStatus, _, _, err := m.Status() 189 if err != nil { 190 logger.Warningf("cannot get current machine status for machine %v: %v", m.Id(), err) 191 } 192 if len(instInfo.addresses) > 0 && instInfo.status != "" && machineStatus == params.StatusStarted { 193 // We've got at least one address and a status and instance is started, so poll infrequently. 194 pollInterval = LongPoll 195 } else if pollInterval < LongPoll { 196 // We have no addresses or not started - poll increasingly rarely 197 // until we do. 198 pollInterval = time.Duration(float64(pollInterval) * ShortPollBackoff) 199 } 200 pollInstance = false 201 } 202 select { 203 case <-time.After(pollInterval): 204 pollInstance = true 205 case <-context.dying(): 206 return nil 207 case <-changed: 208 if err := m.Refresh(); err != nil { 209 return err 210 } 211 if m.Life() == state.Dead { 212 return nil 213 } 214 } 215 } 216 } 217 218 // pollInstanceInfo checks the current provider addresses and status 219 // for the given machine's instance, and sets them on the machine if they've changed. 220 func pollInstanceInfo(context machineContext, m machine) (instInfo instanceInfo, err error) { 221 instInfo = instanceInfo{} 222 instId, err := m.InstanceId() 223 if err != nil && !state.IsNotProvisionedError(err) { 224 return instInfo, fmt.Errorf("cannot get machine's instance id: %v", err) 225 } 226 instInfo, err = context.instanceInfo(instId) 227 if err != nil { 228 if errors.IsNotImplementedError(err) { 229 return instInfo, err 230 } 231 logger.Warningf("cannot get instance info for instance %q: %v", instId, err) 232 return instInfo, nil 233 } 234 currentInstStatus, err := m.InstanceStatus() 235 if err != nil { 236 // This should never occur since the machine is provisioned. 237 // But just in case, we reset polled status so we try again next time. 238 logger.Warningf("cannot get current instance status for machine %v: %v", m.Id(), err) 239 instInfo.status = "" 240 } else { 241 if instInfo.status != currentInstStatus { 242 logger.Infof("machine %q has new instance status: %v", m.Id(), instInfo.status) 243 if err = m.SetInstanceStatus(instInfo.status); err != nil { 244 logger.Errorf("cannot set instance status on %q: %v", m, err) 245 } 246 } 247 } 248 if !addressesEqual(m.Addresses(), instInfo.addresses) { 249 logger.Infof("machine %q has new addresses: %v", m.Id(), instInfo.addresses) 250 if err = m.SetAddresses(instInfo.addresses); err != nil { 251 logger.Errorf("cannot set addresses on %q: %v", m, err) 252 } 253 } 254 return instInfo, err 255 } 256 257 func addressesEqual(a0, a1 []instance.Address) bool { 258 if len(a0) != len(a1) { 259 return false 260 } 261 for i := range a0 { 262 if a0[i] != a1[i] { 263 return false 264 } 265 } 266 return true 267 }