github.com/wallyworld/juju@v0.0.0-20161013125918-6cf1bc9d917a/state/metrics.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package state
     5  
     6  import (
     7  	"encoding/json"
     8  	"sort"
     9  	"time"
    10  
    11  	"github.com/juju/errors"
    12  	"github.com/juju/loggo"
    13  	"gopkg.in/juju/charm.v6-unstable"
    14  	"gopkg.in/juju/names.v2"
    15  	"gopkg.in/mgo.v2"
    16  	"gopkg.in/mgo.v2/bson"
    17  	"gopkg.in/mgo.v2/txn"
    18  )
    19  
    20  var metricsLogger = loggo.GetLogger("juju.state.metrics")
    21  
    22  const (
    23  	CleanupAge = time.Hour * 24
    24  )
    25  
    26  // MetricBatch represents a batch of metrics reported from a unit.
    27  // These will be received from the unit in batches.
    28  // The main contents of the metric (key, value) is defined
    29  // by the charm author and sent from the unit via a call to
    30  // add-metric
    31  type MetricBatch struct {
    32  	st  *State
    33  	doc metricBatchDoc
    34  }
    35  
    36  type metricBatchDoc struct {
    37  	UUID        string    `bson:"_id"`
    38  	ModelUUID   string    `bson:"model-uuid"`
    39  	Unit        string    `bson:"unit"`
    40  	CharmURL    string    `bson:"charmurl"`
    41  	Sent        bool      `bson:"sent"`
    42  	DeleteTime  time.Time `bson:"delete-time"`
    43  	Created     time.Time `bson:"created"`
    44  	Metrics     []Metric  `bson:"metrics"`
    45  	Credentials []byte    `bson:"credentials"`
    46  }
    47  
    48  // Metric represents a single Metric.
    49  type Metric struct {
    50  	Key   string    `bson:"key"`
    51  	Value string    `bson:"value"`
    52  	Time  time.Time `bson:"time"`
    53  }
    54  
    55  type byTime []Metric
    56  
    57  func (t byTime) Len() int      { return len(t) }
    58  func (t byTime) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
    59  func (t byTime) Less(i, j int) bool {
    60  	return t[i].Time.Before(t[j].Time)
    61  }
    62  
    63  // validate checks that the MetricBatch contains valid metrics.
    64  func (m *MetricBatch) validate() error {
    65  	charmURL, err := charm.ParseURL(m.doc.CharmURL)
    66  	if err != nil {
    67  		return errors.Trace(err)
    68  	}
    69  	chrm, err := m.st.Charm(charmURL)
    70  	if err != nil {
    71  		return errors.Trace(err)
    72  	}
    73  	chrmMetrics := chrm.Metrics()
    74  	if chrmMetrics == nil {
    75  		return errors.Errorf("charm doesn't implement metrics")
    76  	}
    77  	for _, m := range m.doc.Metrics {
    78  		if err := chrmMetrics.ValidateMetric(m.Key, m.Value); err != nil {
    79  			return errors.Trace(err)
    80  		}
    81  	}
    82  	return nil
    83  }
    84  
    85  // BatchParam contains the properties of the metrics batch used when creating a metrics
    86  // batch.
    87  type BatchParam struct {
    88  	UUID     string
    89  	CharmURL string
    90  	Created  time.Time
    91  	Metrics  []Metric
    92  	Unit     names.UnitTag
    93  }
    94  
    95  // AddMetrics adds a new batch of metrics to the database.
    96  func (st *State) AddMetrics(batch BatchParam) (*MetricBatch, error) {
    97  	if len(batch.Metrics) == 0 {
    98  		return nil, errors.New("cannot add a batch of 0 metrics")
    99  	}
   100  	charmURL, err := charm.ParseURL(batch.CharmURL)
   101  	if err != nil {
   102  		return nil, errors.NewNotValid(err, "could not parse charm URL")
   103  	}
   104  
   105  	unit, err := st.Unit(batch.Unit.Id())
   106  	if err != nil {
   107  		return nil, errors.Trace(err)
   108  	}
   109  	application, err := unit.Application()
   110  	if err != nil {
   111  		return nil, errors.Trace(err)
   112  	}
   113  
   114  	metric := &MetricBatch{
   115  		st: st,
   116  		doc: metricBatchDoc{
   117  			UUID:        batch.UUID,
   118  			ModelUUID:   st.ModelUUID(),
   119  			Unit:        batch.Unit.Id(),
   120  			CharmURL:    charmURL.String(),
   121  			Sent:        false,
   122  			Created:     batch.Created,
   123  			Metrics:     batch.Metrics,
   124  			Credentials: application.MetricCredentials(),
   125  		},
   126  	}
   127  	if err := metric.validate(); err != nil {
   128  		return nil, err
   129  	}
   130  	buildTxn := func(attempt int) ([]txn.Op, error) {
   131  		if attempt > 0 {
   132  			notDead, err := isNotDead(st, unitsC, batch.Unit.Id())
   133  			if err != nil || !notDead {
   134  				return nil, errors.NotFoundf(batch.Unit.Id())
   135  			}
   136  			exists, err := st.MetricBatch(batch.UUID)
   137  			if exists != nil && err == nil {
   138  				return nil, errors.AlreadyExistsf("metrics batch UUID %q", batch.UUID)
   139  			}
   140  			if !errors.IsNotFound(err) {
   141  				return nil, errors.Trace(err)
   142  			}
   143  		}
   144  		ops := []txn.Op{{
   145  			C:      unitsC,
   146  			Id:     st.docID(batch.Unit.Id()),
   147  			Assert: notDeadDoc,
   148  		}, {
   149  			C:      metricsC,
   150  			Id:     metric.UUID(),
   151  			Assert: txn.DocMissing,
   152  			Insert: &metric.doc,
   153  		}}
   154  		return ops, nil
   155  	}
   156  	err = st.run(buildTxn)
   157  	if err != nil {
   158  		return nil, errors.Trace(err)
   159  	}
   160  
   161  	return metric, nil
   162  }
   163  
   164  // AllMetricBatches returns all metric batches currently stored in state.
   165  // TODO (tasdomas): this method is currently only used in the uniter worker test -
   166  //                  it needs to be modified to restrict the scope of the values it
   167  //                  returns if it is to be used outside of tests.
   168  func (st *State) AllMetricBatches() ([]MetricBatch, error) {
   169  	c, closer := st.getCollection(metricsC)
   170  	defer closer()
   171  	docs := []metricBatchDoc{}
   172  	err := c.Find(nil).All(&docs)
   173  	if err != nil {
   174  		return nil, errors.Trace(err)
   175  	}
   176  	results := make([]MetricBatch, len(docs))
   177  	for i, doc := range docs {
   178  		results[i] = MetricBatch{st: st, doc: doc}
   179  	}
   180  	return results, nil
   181  }
   182  
   183  func (st *State) queryMetricBatches(query bson.M) ([]MetricBatch, error) {
   184  	c, closer := st.getCollection(metricsC)
   185  	defer closer()
   186  	docs := []metricBatchDoc{}
   187  	err := c.Find(query).Sort("created").All(&docs)
   188  	if err != nil {
   189  		return nil, errors.Trace(err)
   190  	}
   191  	results := make([]MetricBatch, len(docs))
   192  	for i, doc := range docs {
   193  		results[i] = MetricBatch{st: st, doc: doc}
   194  	}
   195  	return results, nil
   196  }
   197  
   198  // MetricBatchesForUnit returns metric batches for the given unit.
   199  func (st *State) MetricBatchesForUnit(unit string) ([]MetricBatch, error) {
   200  	_, err := st.Unit(unit)
   201  	if err != nil {
   202  		return nil, errors.Trace(err)
   203  	}
   204  	return st.queryMetricBatches(bson.M{"unit": unit})
   205  }
   206  
   207  // MetricBatchesForModel returns metric batches for all the units in the model.
   208  func (st *State) MetricBatchesForModel() ([]MetricBatch, error) {
   209  	return st.queryMetricBatches(bson.M{"model-uuid": st.ModelUUID()})
   210  }
   211  
   212  // MetricBatchesForApplication returns metric batches for the given application.
   213  func (st *State) MetricBatchesForApplication(application string) ([]MetricBatch, error) {
   214  	svc, err := st.Application(application)
   215  	if err != nil {
   216  		return nil, errors.Trace(err)
   217  	}
   218  	units, err := svc.AllUnits()
   219  	if err != nil {
   220  		return nil, errors.Trace(err)
   221  	}
   222  	unitNames := make([]bson.M, len(units))
   223  	for i, u := range units {
   224  		unitNames[i] = bson.M{"unit": u.Name()}
   225  	}
   226  	return st.queryMetricBatches(bson.M{"$or": unitNames})
   227  }
   228  
   229  // MetricBatch returns the metric batch with the given id.
   230  func (st *State) MetricBatch(id string) (*MetricBatch, error) {
   231  	c, closer := st.getCollection(metricsC)
   232  	defer closer()
   233  	doc := metricBatchDoc{}
   234  	err := c.Find(bson.M{"_id": id}).One(&doc)
   235  	if err == mgo.ErrNotFound {
   236  		return nil, errors.NotFoundf("metric %v", id)
   237  	}
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  	return &MetricBatch{st: st, doc: doc}, nil
   242  }
   243  
   244  // CleanupOldMetrics looks for metrics that are 24 hours old (or older)
   245  // and have been sent. Any metrics it finds are deleted.
   246  func (st *State) CleanupOldMetrics() error {
   247  	now := st.clock.Now()
   248  	metrics, closer := st.getCollection(metricsC)
   249  	defer closer()
   250  	// Nothing else in the system will interact with sent metrics, and nothing needs
   251  	// to watch them either; so in this instance it's safe to do an end run around the
   252  	// mgo/txn package. See State.cleanupRelationSettings for a similar situation.
   253  	metricsW := metrics.Writeable()
   254  	// TODO (mattyw) iter over this.
   255  	info, err := metricsW.RemoveAll(bson.M{
   256  		"model-uuid":  st.ModelUUID(),
   257  		"sent":        true,
   258  		"delete-time": bson.M{"$lte": now},
   259  	})
   260  	if err == nil {
   261  		metricsLogger.Tracef("cleanup removed %d metrics", info.Removed)
   262  	}
   263  	return errors.Trace(err)
   264  }
   265  
   266  // MetricsToSend returns batchSize metrics that need to be sent
   267  // to the collector
   268  func (st *State) MetricsToSend(batchSize int) ([]*MetricBatch, error) {
   269  	var docs []metricBatchDoc
   270  	c, closer := st.getCollection(metricsC)
   271  	defer closer()
   272  
   273  	q := bson.M{
   274  		"model-uuid": st.ModelUUID(),
   275  		"sent":       false,
   276  	}
   277  	err := c.Find(q).Limit(batchSize).All(&docs)
   278  	if err != nil {
   279  		return nil, errors.Trace(err)
   280  	}
   281  
   282  	batch := make([]*MetricBatch, len(docs))
   283  	for i, doc := range docs {
   284  		batch[i] = &MetricBatch{st: st, doc: doc}
   285  
   286  	}
   287  
   288  	return batch, nil
   289  }
   290  
   291  // CountOfUnsentMetrics returns the number of metrics that
   292  // haven't been sent to the collection service.
   293  func (st *State) CountOfUnsentMetrics() (int, error) {
   294  	c, closer := st.getCollection(metricsC)
   295  	defer closer()
   296  	return c.Find(bson.M{
   297  		"model-uuid": st.ModelUUID(),
   298  		"sent":       false,
   299  	}).Count()
   300  }
   301  
   302  // CountOfSentMetrics returns the number of metrics that
   303  // have been sent to the collection service and have not
   304  // been removed by the cleanup worker.
   305  func (st *State) CountOfSentMetrics() (int, error) {
   306  	c, closer := st.getCollection(metricsC)
   307  	defer closer()
   308  	return c.Find(bson.M{
   309  		"model-uuid": st.ModelUUID(),
   310  		"sent":       true,
   311  	}).Count()
   312  }
   313  
   314  // MarshalJSON defines how the MetricBatch type should be
   315  // converted to json.
   316  func (m *MetricBatch) MarshalJSON() ([]byte, error) {
   317  	return json.Marshal(m.doc)
   318  }
   319  
   320  // UUID returns to uuid of the metric.
   321  func (m *MetricBatch) UUID() string {
   322  	return m.doc.UUID
   323  }
   324  
   325  // ModelUUID returns the model UUID this metric applies to.
   326  func (m *MetricBatch) ModelUUID() string {
   327  	return m.doc.ModelUUID
   328  }
   329  
   330  // Unit returns the name of the unit this metric was generated in.
   331  func (m *MetricBatch) Unit() string {
   332  	return m.doc.Unit
   333  }
   334  
   335  // CharmURL returns the charm url for the charm this metric was generated in.
   336  func (m *MetricBatch) CharmURL() string {
   337  	return m.doc.CharmURL
   338  }
   339  
   340  // Created returns the time this metric batch was created.
   341  func (m *MetricBatch) Created() time.Time {
   342  	return m.doc.Created
   343  }
   344  
   345  // Sent returns a flag to tell us if this metric has been sent to the metric
   346  // collection service
   347  func (m *MetricBatch) Sent() bool {
   348  	return m.doc.Sent
   349  }
   350  
   351  // Metrics returns the metrics in this batch.
   352  func (m *MetricBatch) Metrics() []Metric {
   353  	result := make([]Metric, len(m.doc.Metrics))
   354  	copy(result, m.doc.Metrics)
   355  	return result
   356  }
   357  
   358  // UniqueMetrics returns only the last value for each
   359  // metric key in this batch.
   360  func (m *MetricBatch) UniqueMetrics() []Metric {
   361  	metrics := m.Metrics()
   362  	sort.Sort(byTime(metrics))
   363  	uniq := map[string]Metric{}
   364  	for _, m := range metrics {
   365  		uniq[m.Key] = m
   366  	}
   367  	results := make([]Metric, len(uniq))
   368  	i := 0
   369  	for _, m := range uniq {
   370  		results[i] = m
   371  		i++
   372  	}
   373  	return results
   374  }
   375  
   376  // SetSent marks the metric has having been sent at
   377  // the specified time.
   378  func (m *MetricBatch) SetSent(t time.Time) error {
   379  	deleteTime := t.UTC().Add(CleanupAge)
   380  	ops := setSentOps([]string{m.UUID()}, deleteTime)
   381  	if err := m.st.runTransaction(ops); err != nil {
   382  		return errors.Annotatef(err, "cannot set metric sent for metric %q", m.UUID())
   383  	}
   384  
   385  	m.doc.Sent = true
   386  	m.doc.DeleteTime = deleteTime
   387  	return nil
   388  }
   389  
   390  // Credentials returns any credentials associated with the metric batch.
   391  func (m *MetricBatch) Credentials() []byte {
   392  	return m.doc.Credentials
   393  }
   394  
   395  func setSentOps(batchUUIDs []string, deleteTime time.Time) []txn.Op {
   396  	ops := make([]txn.Op, len(batchUUIDs))
   397  	for i, u := range batchUUIDs {
   398  		ops[i] = txn.Op{
   399  			C:      metricsC,
   400  			Id:     u,
   401  			Assert: txn.DocExists,
   402  			Update: bson.M{"$set": bson.M{"sent": true, "delete-time": deleteTime}},
   403  		}
   404  	}
   405  	return ops
   406  }
   407  
   408  // SetMetricBatchesSent sets sent on each MetricBatch corresponding to the uuids provided.
   409  func (st *State) SetMetricBatchesSent(batchUUIDs []string) error {
   410  	deleteTime := st.clock.Now().UTC().Add(CleanupAge)
   411  	ops := setSentOps(batchUUIDs, deleteTime)
   412  	if err := st.runTransaction(ops); err != nil {
   413  		return errors.Annotatef(err, "cannot set metric sent in bulk call")
   414  	}
   415  	return nil
   416  }