github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/state/manifold.go (about) 1 // Copyright 2016 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "sync" 8 "time" 9 10 "github.com/juju/errors" 11 "github.com/juju/loggo" 12 "github.com/prometheus/client_golang/prometheus" 13 "gopkg.in/juju/worker.v1" 14 "gopkg.in/juju/worker.v1/catacomb" 15 "gopkg.in/juju/worker.v1/dependency" 16 "gopkg.in/tomb.v2" 17 18 coreagent "github.com/juju/juju/agent" 19 "github.com/juju/juju/state" 20 "github.com/juju/juju/state/statemetrics" 21 "github.com/juju/juju/wrench" 22 ) 23 24 var logger = loggo.GetLogger("juju.worker.state") 25 26 // ManifoldConfig provides the dependencies for Manifold. 27 type ManifoldConfig struct { 28 AgentName string 29 StateConfigWatcherName string 30 OpenStatePool func(coreagent.Config) (*state.StatePool, error) 31 PingInterval time.Duration 32 PrometheusRegisterer prometheus.Registerer 33 34 // SetStatePool is called with the state pool when it is created, 35 // and called again with nil just before the state pool is closed. 36 // This is used for publishing the state pool to the agent's 37 // introspection worker, which runs outside of the dependency 38 // engine; hence the manifold's Output cannot be relied upon. 39 SetStatePool func(*state.StatePool) 40 } 41 42 // Validate validates the manifold configuration. 43 func (config ManifoldConfig) Validate() error { 44 if config.AgentName == "" { 45 return errors.NotValidf("empty AgentName") 46 } 47 if config.StateConfigWatcherName == "" { 48 return errors.NotValidf("empty StateConfigWatcherName") 49 } 50 if config.OpenStatePool == nil { 51 return errors.NotValidf("nil OpenStatePool") 52 } 53 if config.PrometheusRegisterer == nil { 54 return errors.NotValidf("nil PrometheusRegisterer") 55 } 56 if config.SetStatePool == nil { 57 return errors.NotValidf("nil SetStatePool") 58 } 59 return nil 60 } 61 62 const defaultPingInterval = 15 * time.Second 63 64 // Manifold returns a manifold whose worker which wraps a 65 // *state.State, which is in turn wrapper by a StateTracker. It will 66 // exit if the State's associated mongodb session dies. 67 func Manifold(config ManifoldConfig) dependency.Manifold { 68 return dependency.Manifold{ 69 Inputs: []string{ 70 config.AgentName, 71 config.StateConfigWatcherName, 72 }, 73 Start: func(context dependency.Context) (worker.Worker, error) { 74 if err := config.Validate(); err != nil { 75 return nil, errors.Trace(err) 76 } 77 78 // Get the agent. 79 var agent coreagent.Agent 80 if err := context.Get(config.AgentName, &agent); err != nil { 81 return nil, err 82 } 83 84 // Confirm we're running in a state server by asking the 85 // stateconfigwatcher manifold. 86 var haveStateConfig bool 87 if err := context.Get(config.StateConfigWatcherName, &haveStateConfig); err != nil { 88 return nil, err 89 } 90 if !haveStateConfig { 91 return nil, errors.Annotate(dependency.ErrMissing, "no StateServingInfo in config") 92 } 93 94 pool, err := config.OpenStatePool(agent.CurrentConfig()) 95 if err != nil { 96 return nil, errors.Trace(err) 97 } 98 stTracker := newStateTracker(pool) 99 100 pingInterval := config.PingInterval 101 if pingInterval == 0 { 102 pingInterval = defaultPingInterval 103 } 104 105 w := &stateWorker{ 106 stTracker: stTracker, 107 pingInterval: pingInterval, 108 prometheusRegisterer: config.PrometheusRegisterer, 109 setStatePool: config.SetStatePool, 110 } 111 if err := catacomb.Invoke(catacomb.Plan{ 112 Site: &w.catacomb, 113 Work: w.loop, 114 }); err != nil { 115 if err := stTracker.Done(); err != nil { 116 logger.Warningf("error releasing state: %v", err) 117 } 118 return nil, errors.Trace(err) 119 } 120 return w, nil 121 }, 122 Output: outputFunc, 123 } 124 } 125 126 // outputFunc extracts a *StateTracker from a *stateWorker. 127 func outputFunc(in worker.Worker, out interface{}) error { 128 inWorker, _ := in.(*stateWorker) 129 if inWorker == nil { 130 return errors.Errorf("in should be a %T; got %T", inWorker, in) 131 } 132 133 switch outPointer := out.(type) { 134 case *StateTracker: 135 *outPointer = inWorker.stTracker 136 default: 137 return errors.Errorf("out should be *StateTracker; got %T", out) 138 } 139 return nil 140 } 141 142 type stateWorker struct { 143 catacomb catacomb.Catacomb 144 stTracker StateTracker 145 pingInterval time.Duration 146 prometheusRegisterer prometheus.Registerer 147 setStatePool func(*state.StatePool) 148 cleanupOnce sync.Once 149 } 150 151 func (w *stateWorker) loop() error { 152 pool, err := w.stTracker.Use() 153 if err != nil { 154 return errors.Trace(err) 155 } 156 defer w.stTracker.Done() 157 158 // Due to the current speed issues around gathering the state metrics, 159 // we allow the controller admins to specify a feature flat to disable 160 // collection. This is a short term measure until we have the model 161 // cache in the apiserver. The state metrics are just counts of models, 162 // machines, and users along with their life and status. When we have 163 // the caching middle tier, this will be almost instant rather than hitting 164 // the database. 165 systemState := pool.SystemState() 166 controllerConfig, err := systemState.ControllerConfig() 167 if err != nil { 168 return errors.Trace(err) 169 } 170 if !controllerConfig.Features().Contains("disable-state-metrics") { 171 collector := statemetrics.New(statemetrics.NewStatePool(pool)) 172 w.prometheusRegisterer.Register(collector) 173 defer w.prometheusRegisterer.Unregister(collector) 174 } 175 176 w.setStatePool(pool) 177 defer w.setStatePool(nil) 178 179 modelWatcher := systemState.WatchModelLives() 180 w.catacomb.Add(modelWatcher) 181 182 modelStateWorkers := make(map[string]worker.Worker) 183 for { 184 select { 185 case <-w.catacomb.Dying(): 186 return w.catacomb.ErrDying() 187 188 case modelUUIDs := <-modelWatcher.Changes(): 189 for _, modelUUID := range modelUUIDs { 190 if err := w.processModelLifeChange( 191 modelUUID, 192 modelStateWorkers, 193 pool, 194 ); err != nil { 195 return errors.Trace(err) 196 } 197 } 198 // Useful for tracking down some bugs that occur when 199 // mongo is overloaded. 200 case <-time.After(30 * time.Second): 201 if wrench.IsActive("state-worker", "io-timeout") { 202 return errors.Errorf("wrench simulating i/o timeout!") 203 } 204 } 205 } 206 } 207 208 // Report conforms to the Dependency Engine Report() interface, giving an opportunity to introspect 209 // what is going on at runtime. 210 func (w *stateWorker) Report() map[string]interface{} { 211 return w.stTracker.Report() 212 } 213 214 func (w *stateWorker) processModelLifeChange( 215 modelUUID string, 216 modelStateWorkers map[string]worker.Worker, 217 pool *state.StatePool, 218 ) error { 219 remove := func() { 220 if w, ok := modelStateWorkers[modelUUID]; ok { 221 w.Kill() 222 delete(modelStateWorkers, modelUUID) 223 } 224 pool.Remove(modelUUID) 225 } 226 227 model, hp, err := pool.GetModel(modelUUID) 228 if err != nil { 229 if errors.IsNotFound(err) { 230 // Model has been removed from state. 231 logger.Debugf("model %q removed from state", modelUUID) 232 remove() 233 return nil 234 } 235 return errors.Trace(err) 236 } 237 defer hp.Release() 238 239 if model.Life() == state.Dead { 240 // Model is Dead, and will soon be removed from state. 241 logger.Debugf("model %q is dead", modelUUID) 242 remove() 243 return nil 244 } 245 246 if modelStateWorkers[modelUUID] == nil { 247 mw := newModelStateWorker(pool, modelUUID, w.pingInterval) 248 modelStateWorkers[modelUUID] = mw 249 w.catacomb.Add(mw) 250 } 251 252 return nil 253 } 254 255 // Kill is part of the worker.Worker interface. 256 func (w *stateWorker) Kill() { 257 w.catacomb.Kill(nil) 258 } 259 260 // Wait is part of the worker.Worker interface. 261 func (w *stateWorker) Wait() error { 262 err := w.catacomb.Wait() 263 w.cleanupOnce.Do(func() { 264 // Make sure the worker has exited before closing state. 265 if err := w.stTracker.Done(); err != nil { 266 logger.Warningf("error releasing state: %v", err) 267 } 268 }) 269 return err 270 } 271 272 type modelStateWorker struct { 273 tomb tomb.Tomb 274 pool *state.StatePool 275 modelUUID string 276 pingInterval time.Duration 277 } 278 279 func newModelStateWorker( 280 pool *state.StatePool, 281 modelUUID string, 282 pingInterval time.Duration, 283 ) worker.Worker { 284 w := &modelStateWorker{ 285 pool: pool, 286 modelUUID: modelUUID, 287 pingInterval: pingInterval, 288 } 289 w.tomb.Go(w.loop) 290 return w 291 } 292 293 func (w *modelStateWorker) loop() error { 294 st, err := w.pool.Get(w.modelUUID) 295 if err != nil { 296 if errors.IsNotFound(err) { 297 // ignore not found error here, because the pooledState has already been removed. 298 return nil 299 } 300 return errors.Trace(err) 301 } 302 defer func() { 303 st.Release() 304 w.pool.Remove(w.modelUUID) 305 }() 306 307 for { 308 select { 309 case <-w.tomb.Dying(): 310 return tomb.ErrDying 311 case <-time.After(w.pingInterval): 312 if err := st.Ping(); err != nil { 313 return errors.Annotate(err, "state ping failed") 314 } 315 } 316 } 317 } 318 319 // Kill is part of the worker.Worker interface. 320 func (w *modelStateWorker) Kill() { 321 w.tomb.Kill(nil) 322 } 323 324 // Wait is part of the worker.Worker interface. 325 func (w *modelStateWorker) Wait() error { 326 return w.tomb.Wait() 327 }