github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/state/manifold.go (about)

     1  // Copyright 2016 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package state
     5  
     6  import (
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/juju/errors"
    11  	"github.com/juju/loggo"
    12  	"github.com/prometheus/client_golang/prometheus"
    13  	"gopkg.in/juju/worker.v1"
    14  	"gopkg.in/juju/worker.v1/catacomb"
    15  	"gopkg.in/juju/worker.v1/dependency"
    16  	"gopkg.in/tomb.v2"
    17  
    18  	coreagent "github.com/juju/juju/agent"
    19  	"github.com/juju/juju/state"
    20  	"github.com/juju/juju/state/statemetrics"
    21  	"github.com/juju/juju/wrench"
    22  )
    23  
    24  var logger = loggo.GetLogger("juju.worker.state")
    25  
    26  // ManifoldConfig provides the dependencies for Manifold.
    27  type ManifoldConfig struct {
    28  	AgentName              string
    29  	StateConfigWatcherName string
    30  	OpenStatePool          func(coreagent.Config) (*state.StatePool, error)
    31  	PingInterval           time.Duration
    32  	PrometheusRegisterer   prometheus.Registerer
    33  
    34  	// SetStatePool is called with the state pool when it is created,
    35  	// and called again with nil just before the state pool is closed.
    36  	// This is used for publishing the state pool to the agent's
    37  	// introspection worker, which runs outside of the dependency
    38  	// engine; hence the manifold's Output cannot be relied upon.
    39  	SetStatePool func(*state.StatePool)
    40  }
    41  
    42  // Validate validates the manifold configuration.
    43  func (config ManifoldConfig) Validate() error {
    44  	if config.AgentName == "" {
    45  		return errors.NotValidf("empty AgentName")
    46  	}
    47  	if config.StateConfigWatcherName == "" {
    48  		return errors.NotValidf("empty StateConfigWatcherName")
    49  	}
    50  	if config.OpenStatePool == nil {
    51  		return errors.NotValidf("nil OpenStatePool")
    52  	}
    53  	if config.PrometheusRegisterer == nil {
    54  		return errors.NotValidf("nil PrometheusRegisterer")
    55  	}
    56  	if config.SetStatePool == nil {
    57  		return errors.NotValidf("nil SetStatePool")
    58  	}
    59  	return nil
    60  }
    61  
    62  const defaultPingInterval = 15 * time.Second
    63  
    64  // Manifold returns a manifold whose worker which wraps a
    65  // *state.State, which is in turn wrapper by a StateTracker.  It will
    66  // exit if the State's associated mongodb session dies.
    67  func Manifold(config ManifoldConfig) dependency.Manifold {
    68  	return dependency.Manifold{
    69  		Inputs: []string{
    70  			config.AgentName,
    71  			config.StateConfigWatcherName,
    72  		},
    73  		Start: func(context dependency.Context) (worker.Worker, error) {
    74  			if err := config.Validate(); err != nil {
    75  				return nil, errors.Trace(err)
    76  			}
    77  
    78  			// Get the agent.
    79  			var agent coreagent.Agent
    80  			if err := context.Get(config.AgentName, &agent); err != nil {
    81  				return nil, err
    82  			}
    83  
    84  			// Confirm we're running in a state server by asking the
    85  			// stateconfigwatcher manifold.
    86  			var haveStateConfig bool
    87  			if err := context.Get(config.StateConfigWatcherName, &haveStateConfig); err != nil {
    88  				return nil, err
    89  			}
    90  			if !haveStateConfig {
    91  				return nil, errors.Annotate(dependency.ErrMissing, "no StateServingInfo in config")
    92  			}
    93  
    94  			pool, err := config.OpenStatePool(agent.CurrentConfig())
    95  			if err != nil {
    96  				return nil, errors.Trace(err)
    97  			}
    98  			stTracker := newStateTracker(pool)
    99  
   100  			pingInterval := config.PingInterval
   101  			if pingInterval == 0 {
   102  				pingInterval = defaultPingInterval
   103  			}
   104  
   105  			w := &stateWorker{
   106  				stTracker:            stTracker,
   107  				pingInterval:         pingInterval,
   108  				prometheusRegisterer: config.PrometheusRegisterer,
   109  				setStatePool:         config.SetStatePool,
   110  			}
   111  			if err := catacomb.Invoke(catacomb.Plan{
   112  				Site: &w.catacomb,
   113  				Work: w.loop,
   114  			}); err != nil {
   115  				if err := stTracker.Done(); err != nil {
   116  					logger.Warningf("error releasing state: %v", err)
   117  				}
   118  				return nil, errors.Trace(err)
   119  			}
   120  			return w, nil
   121  		},
   122  		Output: outputFunc,
   123  	}
   124  }
   125  
   126  // outputFunc extracts a *StateTracker from a *stateWorker.
   127  func outputFunc(in worker.Worker, out interface{}) error {
   128  	inWorker, _ := in.(*stateWorker)
   129  	if inWorker == nil {
   130  		return errors.Errorf("in should be a %T; got %T", inWorker, in)
   131  	}
   132  
   133  	switch outPointer := out.(type) {
   134  	case *StateTracker:
   135  		*outPointer = inWorker.stTracker
   136  	default:
   137  		return errors.Errorf("out should be *StateTracker; got %T", out)
   138  	}
   139  	return nil
   140  }
   141  
   142  type stateWorker struct {
   143  	catacomb             catacomb.Catacomb
   144  	stTracker            StateTracker
   145  	pingInterval         time.Duration
   146  	prometheusRegisterer prometheus.Registerer
   147  	setStatePool         func(*state.StatePool)
   148  	cleanupOnce          sync.Once
   149  }
   150  
   151  func (w *stateWorker) loop() error {
   152  	pool, err := w.stTracker.Use()
   153  	if err != nil {
   154  		return errors.Trace(err)
   155  	}
   156  	defer w.stTracker.Done()
   157  
   158  	// Due to the current speed issues around gathering the state metrics,
   159  	// we allow the controller admins to specify a feature flat to disable
   160  	// collection. This is a short term measure until we have the model
   161  	// cache in the apiserver. The state metrics are just counts of models,
   162  	// machines, and users along with their life and status. When we have
   163  	// the caching middle tier, this will be almost instant rather than hitting
   164  	// the database.
   165  	systemState := pool.SystemState()
   166  	controllerConfig, err := systemState.ControllerConfig()
   167  	if err != nil {
   168  		return errors.Trace(err)
   169  	}
   170  	if !controllerConfig.Features().Contains("disable-state-metrics") {
   171  		collector := statemetrics.New(statemetrics.NewStatePool(pool))
   172  		w.prometheusRegisterer.Register(collector)
   173  		defer w.prometheusRegisterer.Unregister(collector)
   174  	}
   175  
   176  	w.setStatePool(pool)
   177  	defer w.setStatePool(nil)
   178  
   179  	modelWatcher := systemState.WatchModelLives()
   180  	w.catacomb.Add(modelWatcher)
   181  
   182  	modelStateWorkers := make(map[string]worker.Worker)
   183  	for {
   184  		select {
   185  		case <-w.catacomb.Dying():
   186  			return w.catacomb.ErrDying()
   187  
   188  		case modelUUIDs := <-modelWatcher.Changes():
   189  			for _, modelUUID := range modelUUIDs {
   190  				if err := w.processModelLifeChange(
   191  					modelUUID,
   192  					modelStateWorkers,
   193  					pool,
   194  				); err != nil {
   195  					return errors.Trace(err)
   196  				}
   197  			}
   198  		// Useful for tracking down some bugs that occur when
   199  		// mongo is overloaded.
   200  		case <-time.After(30 * time.Second):
   201  			if wrench.IsActive("state-worker", "io-timeout") {
   202  				return errors.Errorf("wrench simulating i/o timeout!")
   203  			}
   204  		}
   205  	}
   206  }
   207  
   208  // Report conforms to the Dependency Engine Report() interface, giving an opportunity to introspect
   209  // what is going on at runtime.
   210  func (w *stateWorker) Report() map[string]interface{} {
   211  	return w.stTracker.Report()
   212  }
   213  
   214  func (w *stateWorker) processModelLifeChange(
   215  	modelUUID string,
   216  	modelStateWorkers map[string]worker.Worker,
   217  	pool *state.StatePool,
   218  ) error {
   219  	remove := func() {
   220  		if w, ok := modelStateWorkers[modelUUID]; ok {
   221  			w.Kill()
   222  			delete(modelStateWorkers, modelUUID)
   223  		}
   224  		pool.Remove(modelUUID)
   225  	}
   226  
   227  	model, hp, err := pool.GetModel(modelUUID)
   228  	if err != nil {
   229  		if errors.IsNotFound(err) {
   230  			// Model has been removed from state.
   231  			logger.Debugf("model %q removed from state", modelUUID)
   232  			remove()
   233  			return nil
   234  		}
   235  		return errors.Trace(err)
   236  	}
   237  	defer hp.Release()
   238  
   239  	if model.Life() == state.Dead {
   240  		// Model is Dead, and will soon be removed from state.
   241  		logger.Debugf("model %q is dead", modelUUID)
   242  		remove()
   243  		return nil
   244  	}
   245  
   246  	if modelStateWorkers[modelUUID] == nil {
   247  		mw := newModelStateWorker(pool, modelUUID, w.pingInterval)
   248  		modelStateWorkers[modelUUID] = mw
   249  		w.catacomb.Add(mw)
   250  	}
   251  
   252  	return nil
   253  }
   254  
   255  // Kill is part of the worker.Worker interface.
   256  func (w *stateWorker) Kill() {
   257  	w.catacomb.Kill(nil)
   258  }
   259  
   260  // Wait is part of the worker.Worker interface.
   261  func (w *stateWorker) Wait() error {
   262  	err := w.catacomb.Wait()
   263  	w.cleanupOnce.Do(func() {
   264  		// Make sure the worker has exited before closing state.
   265  		if err := w.stTracker.Done(); err != nil {
   266  			logger.Warningf("error releasing state: %v", err)
   267  		}
   268  	})
   269  	return err
   270  }
   271  
   272  type modelStateWorker struct {
   273  	tomb         tomb.Tomb
   274  	pool         *state.StatePool
   275  	modelUUID    string
   276  	pingInterval time.Duration
   277  }
   278  
   279  func newModelStateWorker(
   280  	pool *state.StatePool,
   281  	modelUUID string,
   282  	pingInterval time.Duration,
   283  ) worker.Worker {
   284  	w := &modelStateWorker{
   285  		pool:         pool,
   286  		modelUUID:    modelUUID,
   287  		pingInterval: pingInterval,
   288  	}
   289  	w.tomb.Go(w.loop)
   290  	return w
   291  }
   292  
   293  func (w *modelStateWorker) loop() error {
   294  	st, err := w.pool.Get(w.modelUUID)
   295  	if err != nil {
   296  		if errors.IsNotFound(err) {
   297  			// ignore not found error here, because the pooledState has already been removed.
   298  			return nil
   299  		}
   300  		return errors.Trace(err)
   301  	}
   302  	defer func() {
   303  		st.Release()
   304  		w.pool.Remove(w.modelUUID)
   305  	}()
   306  
   307  	for {
   308  		select {
   309  		case <-w.tomb.Dying():
   310  			return tomb.ErrDying
   311  		case <-time.After(w.pingInterval):
   312  			if err := st.Ping(); err != nil {
   313  				return errors.Annotate(err, "state ping failed")
   314  			}
   315  		}
   316  	}
   317  }
   318  
   319  // Kill is part of the worker.Worker interface.
   320  func (w *modelStateWorker) Kill() {
   321  	w.tomb.Kill(nil)
   322  }
   323  
   324  // Wait is part of the worker.Worker interface.
   325  func (w *modelStateWorker) Wait() error {
   326  	return w.tomb.Wait()
   327  }