github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/statemetrics/statemetrics.go (about)

     1  // Copyright 2016 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package statemetrics
     5  
     6  import (
     7  	"github.com/juju/errors"
     8  	"github.com/juju/loggo"
     9  	"github.com/prometheus/client_golang/prometheus"
    10  )
    11  
    12  const (
    13  	metricsNamespace = "juju_state"
    14  
    15  	statusLabel           = "status"
    16  	lifeLabel             = "life"
    17  	disabledLabel         = "disabled"
    18  	deletedLabel          = "deleted"
    19  	controllerAccessLabel = "controller_access"
    20  	domainLabel           = "domain"
    21  	agentStatusLabel      = "agent_status"
    22  	machineStatusLabel    = "machine_status"
    23  )
    24  
    25  var (
    26  	machineLabelNames = []string{
    27  		agentStatusLabel,
    28  		lifeLabel,
    29  		machineStatusLabel,
    30  	}
    31  
    32  	modelLabelNames = []string{
    33  		lifeLabel,
    34  		statusLabel,
    35  	}
    36  
    37  	userLabelNames = []string{
    38  		controllerAccessLabel,
    39  		deletedLabel,
    40  		disabledLabel,
    41  		domainLabel,
    42  	}
    43  
    44  	logger = loggo.GetLogger("juju.state.statemetrics")
    45  )
    46  
    47  // Collector is a prometheus.Collector that collects metrics about
    48  // the Juju global state.
    49  type Collector struct {
    50  	pool StatePool
    51  
    52  	scrapeDuration prometheus.Gauge
    53  	scrapeErrors   prometheus.Gauge
    54  
    55  	models   *prometheus.GaugeVec
    56  	machines *prometheus.GaugeVec
    57  	users    *prometheus.GaugeVec
    58  }
    59  
    60  // New returns a new Collector.
    61  func New(pool StatePool) *Collector {
    62  	return &Collector{
    63  		pool: pool,
    64  		scrapeDuration: prometheus.NewGauge(
    65  			prometheus.GaugeOpts{
    66  				Namespace: metricsNamespace,
    67  				Name:      "scrape_duration_seconds",
    68  				Help:      "Amount of time taken to collect state metrics.",
    69  			},
    70  		),
    71  		scrapeErrors: prometheus.NewGauge(
    72  			prometheus.GaugeOpts{
    73  				Namespace: metricsNamespace,
    74  				Name:      "scrape_errors",
    75  				Help:      "Number of errors observed while collecting state metrics.",
    76  			},
    77  		),
    78  
    79  		models: prometheus.NewGaugeVec(
    80  			prometheus.GaugeOpts{
    81  				Namespace: metricsNamespace,
    82  				Name:      "models",
    83  				Help:      "Number of models in the controller.",
    84  			},
    85  			modelLabelNames,
    86  		),
    87  		machines: prometheus.NewGaugeVec(
    88  			prometheus.GaugeOpts{
    89  				Namespace: metricsNamespace,
    90  				Name:      "machines",
    91  				Help:      "Number of machines managed by the controller.",
    92  			},
    93  			machineLabelNames,
    94  		),
    95  		users: prometheus.NewGaugeVec(
    96  			prometheus.GaugeOpts{
    97  				Namespace: metricsNamespace,
    98  				Name:      "users",
    99  				Help:      "Number of local users in the controller.",
   100  			},
   101  			userLabelNames,
   102  		),
   103  	}
   104  }
   105  
   106  // Describe is part of the prometheus.Collector interface.
   107  func (c *Collector) Describe(ch chan<- *prometheus.Desc) {
   108  	c.machines.Describe(ch)
   109  	c.models.Describe(ch)
   110  	c.users.Describe(ch)
   111  
   112  	c.scrapeErrors.Describe(ch)
   113  	c.scrapeDuration.Describe(ch)
   114  }
   115  
   116  // Collect is part of the prometheus.Collector interface.
   117  func (c *Collector) Collect(ch chan<- prometheus.Metric) {
   118  	timer := prometheus.NewTimer(prometheus.ObserverFunc(c.scrapeDuration.Set))
   119  	defer c.scrapeDuration.Collect(ch)
   120  	defer timer.ObserveDuration()
   121  	c.scrapeErrors.Set(0)
   122  	defer c.scrapeErrors.Collect(ch)
   123  
   124  	c.machines.Reset()
   125  	c.models.Reset()
   126  	c.users.Reset()
   127  
   128  	c.updateMetrics()
   129  
   130  	c.machines.Collect(ch)
   131  	c.models.Collect(ch)
   132  	c.users.Collect(ch)
   133  }
   134  
   135  func (c *Collector) updateMetrics() {
   136  	logger.Tracef("updating state metrics")
   137  	defer logger.Tracef("updated state metrics")
   138  
   139  	st := c.pool.SystemState()
   140  	modelUUIDs, err := st.AllModelUUIDs()
   141  	if err != nil {
   142  		logger.Debugf("error getting models: %v", err)
   143  		c.scrapeErrors.Inc()
   144  	}
   145  	for _, m := range modelUUIDs {
   146  		c.updateModelMetrics(m)
   147  	}
   148  
   149  	// TODO(axw) AllUsers only returns *local* users. We do not have User
   150  	// records for external users. To obtain external users, we will need
   151  	// to get all of the controller and model-level access documents.
   152  	controllerTag := st.ControllerTag()
   153  	localUsers, err := st.AllUsers()
   154  	if err != nil {
   155  		logger.Debugf("error getting local users: %v", err)
   156  		c.scrapeErrors.Inc()
   157  		localUsers = nil
   158  	}
   159  	for _, u := range localUsers {
   160  		userTag := u.UserTag()
   161  		access, err := st.UserAccess(userTag, controllerTag)
   162  		if err != nil && !errors.IsNotFound(err) {
   163  			logger.Debugf("error getting controller user access: %v", err)
   164  			c.scrapeErrors.Inc()
   165  			continue
   166  		}
   167  		var deleted, disabled string
   168  		if u.IsDeleted() {
   169  			deleted = "true"
   170  		}
   171  		if u.IsDisabled() {
   172  			disabled = "true"
   173  		}
   174  		c.users.With(prometheus.Labels{
   175  			controllerAccessLabel: string(access.Access),
   176  			deletedLabel:          deleted,
   177  			disabledLabel:         disabled,
   178  			domainLabel:           userTag.Domain(),
   179  		}).Inc()
   180  	}
   181  }
   182  
   183  func (c *Collector) updateModelMetrics(modelUUID string) {
   184  	model, ph, err := c.pool.GetModel(modelUUID)
   185  	if err != nil {
   186  		logger.Debugf("error getting model: %v", err)
   187  		return
   188  	}
   189  	defer ph.Release()
   190  
   191  	modelStatus, err := model.Status()
   192  	if err != nil {
   193  		if errors.IsNotFound(err) {
   194  			return // Model removed
   195  		}
   196  		c.scrapeErrors.Inc()
   197  		logger.Debugf("error getting model status: %v", err)
   198  		return
   199  	}
   200  
   201  	modelTag := model.ModelTag()
   202  	st, err := c.pool.Get(modelTag.Id())
   203  	if err != nil {
   204  		if errors.IsNotFound(err) {
   205  			return // Model removed
   206  		}
   207  		c.scrapeErrors.Inc()
   208  		logger.Debugf("error getting model state: %v", err)
   209  		return
   210  	}
   211  	defer st.Release()
   212  
   213  	machines, err := st.AllMachines()
   214  	if err != nil {
   215  		c.scrapeErrors.Inc()
   216  		logger.Debugf("error getting machines: %v", err)
   217  		machines = nil
   218  	}
   219  	for _, m := range machines {
   220  		agentStatus, err := m.Status()
   221  		if errors.IsNotFound(err) {
   222  			continue // Machine removed
   223  		} else if err != nil {
   224  			c.scrapeErrors.Inc()
   225  			logger.Debugf("error getting machine status: %v", err)
   226  			continue
   227  		}
   228  
   229  		machineStatus, err := m.InstanceStatus()
   230  		if errors.IsNotFound(err) {
   231  			continue // Machine removed
   232  		} else if errors.IsNotProvisioned(err) {
   233  			machineStatus.Status = ""
   234  		} else if err != nil {
   235  			c.scrapeErrors.Inc()
   236  			logger.Debugf("error getting machine status: %v", err)
   237  			continue
   238  		}
   239  
   240  		c.machines.With(prometheus.Labels{
   241  			agentStatusLabel:   string(agentStatus.Status),
   242  			lifeLabel:          m.Life().String(),
   243  			machineStatusLabel: string(machineStatus.Status),
   244  		}).Inc()
   245  	}
   246  
   247  	c.models.With(prometheus.Labels{
   248  		lifeLabel:   model.Life().String(),
   249  		statusLabel: string(modelStatus.Status),
   250  	}).Inc()
   251  }