github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/state/metrics.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "encoding/json" 8 "sort" 9 "time" 10 11 "github.com/juju/errors" 12 "github.com/juju/loggo" 13 "gopkg.in/juju/charm.v6-unstable" 14 "gopkg.in/juju/names.v2" 15 "gopkg.in/mgo.v2" 16 "gopkg.in/mgo.v2/bson" 17 "gopkg.in/mgo.v2/txn" 18 ) 19 20 var metricsLogger = loggo.GetLogger("juju.state.metrics") 21 22 const ( 23 CleanupAge = time.Hour * 24 24 ) 25 26 // MetricBatch represents a batch of metrics reported from a unit. 27 // These will be received from the unit in batches. 28 // The main contents of the metric (key, value) is defined 29 // by the charm author and sent from the unit via a call to 30 // add-metric 31 type MetricBatch struct { 32 st *State 33 doc metricBatchDoc 34 } 35 36 type metricBatchDoc struct { 37 UUID string `bson:"_id"` 38 ModelUUID string `bson:"model-uuid"` 39 Unit string `bson:"unit"` 40 CharmURL string `bson:"charmurl"` 41 Sent bool `bson:"sent"` 42 DeleteTime time.Time `bson:"delete-time"` 43 Created time.Time `bson:"created"` 44 Metrics []Metric `bson:"metrics"` 45 Credentials []byte `bson:"credentials"` 46 } 47 48 // Metric represents a single Metric. 49 type Metric struct { 50 Key string `bson:"key"` 51 Value string `bson:"value"` 52 Time time.Time `bson:"time"` 53 } 54 55 type byTime []Metric 56 57 func (t byTime) Len() int { return len(t) } 58 func (t byTime) Swap(i, j int) { t[i], t[j] = t[j], t[i] } 59 func (t byTime) Less(i, j int) bool { 60 return t[i].Time.Before(t[j].Time) 61 } 62 63 // validate checks that the MetricBatch contains valid metrics. 64 func (m *MetricBatch) validate() error { 65 charmURL, err := charm.ParseURL(m.doc.CharmURL) 66 if err != nil { 67 return errors.Trace(err) 68 } 69 chrm, err := m.st.Charm(charmURL) 70 if err != nil { 71 return errors.Trace(err) 72 } 73 chrmMetrics := chrm.Metrics() 74 if chrmMetrics == nil { 75 return errors.Errorf("charm doesn't implement metrics") 76 } 77 for _, m := range m.doc.Metrics { 78 if err := chrmMetrics.ValidateMetric(m.Key, m.Value); err != nil { 79 return errors.Trace(err) 80 } 81 } 82 return nil 83 } 84 85 // BatchParam contains the properties of the metrics batch used when creating a metrics 86 // batch. 87 type BatchParam struct { 88 UUID string 89 CharmURL string 90 Created time.Time 91 Metrics []Metric 92 Unit names.UnitTag 93 } 94 95 // AddMetrics adds a new batch of metrics to the database. 96 func (st *State) AddMetrics(batch BatchParam) (*MetricBatch, error) { 97 if len(batch.Metrics) == 0 { 98 return nil, errors.New("cannot add a batch of 0 metrics") 99 } 100 charmURL, err := charm.ParseURL(batch.CharmURL) 101 if err != nil { 102 return nil, errors.NewNotValid(err, "could not parse charm URL") 103 } 104 105 unit, err := st.Unit(batch.Unit.Id()) 106 if err != nil { 107 return nil, errors.Trace(err) 108 } 109 application, err := unit.Application() 110 if err != nil { 111 return nil, errors.Trace(err) 112 } 113 114 metric := &MetricBatch{ 115 st: st, 116 doc: metricBatchDoc{ 117 UUID: batch.UUID, 118 ModelUUID: st.ModelUUID(), 119 Unit: batch.Unit.Id(), 120 CharmURL: charmURL.String(), 121 Sent: false, 122 Created: batch.Created, 123 Metrics: batch.Metrics, 124 Credentials: application.MetricCredentials(), 125 }, 126 } 127 if err := metric.validate(); err != nil { 128 return nil, err 129 } 130 buildTxn := func(attempt int) ([]txn.Op, error) { 131 if attempt > 0 { 132 notDead, err := isNotDead(st, unitsC, batch.Unit.Id()) 133 if err != nil || !notDead { 134 return nil, errors.NotFoundf(batch.Unit.Id()) 135 } 136 exists, err := st.MetricBatch(batch.UUID) 137 if exists != nil && err == nil { 138 return nil, errors.AlreadyExistsf("metrics batch UUID %q", batch.UUID) 139 } 140 if !errors.IsNotFound(err) { 141 return nil, errors.Trace(err) 142 } 143 } 144 ops := []txn.Op{{ 145 C: unitsC, 146 Id: st.docID(batch.Unit.Id()), 147 Assert: notDeadDoc, 148 }, { 149 C: metricsC, 150 Id: metric.UUID(), 151 Assert: txn.DocMissing, 152 Insert: &metric.doc, 153 }} 154 return ops, nil 155 } 156 err = st.run(buildTxn) 157 if err != nil { 158 return nil, errors.Trace(err) 159 } 160 161 return metric, nil 162 } 163 164 // AllMetricBatches returns all metric batches currently stored in state. 165 // TODO (tasdomas): this method is currently only used in the uniter worker test - 166 // it needs to be modified to restrict the scope of the values it 167 // returns if it is to be used outside of tests. 168 func (st *State) AllMetricBatches() ([]MetricBatch, error) { 169 c, closer := st.getCollection(metricsC) 170 defer closer() 171 docs := []metricBatchDoc{} 172 err := c.Find(nil).All(&docs) 173 if err != nil { 174 return nil, errors.Trace(err) 175 } 176 results := make([]MetricBatch, len(docs)) 177 for i, doc := range docs { 178 results[i] = MetricBatch{st: st, doc: doc} 179 } 180 return results, nil 181 } 182 183 func (st *State) queryMetricBatches(query bson.M) ([]MetricBatch, error) { 184 c, closer := st.getCollection(metricsC) 185 defer closer() 186 docs := []metricBatchDoc{} 187 err := c.Find(query).Sort("created").All(&docs) 188 if err != nil { 189 return nil, errors.Trace(err) 190 } 191 results := make([]MetricBatch, len(docs)) 192 for i, doc := range docs { 193 results[i] = MetricBatch{st: st, doc: doc} 194 } 195 return results, nil 196 } 197 198 // MetricBatchesForUnit returns metric batches for the given unit. 199 func (st *State) MetricBatchesForUnit(unit string) ([]MetricBatch, error) { 200 _, err := st.Unit(unit) 201 if err != nil { 202 return nil, errors.Trace(err) 203 } 204 return st.queryMetricBatches(bson.M{"unit": unit}) 205 } 206 207 // MetricBatchesForModel returns metric batches for all the units in the model. 208 func (st *State) MetricBatchesForModel() ([]MetricBatch, error) { 209 return st.queryMetricBatches(bson.M{"model-uuid": st.ModelUUID()}) 210 } 211 212 // MetricBatchesForApplication returns metric batches for the given application. 213 func (st *State) MetricBatchesForApplication(application string) ([]MetricBatch, error) { 214 svc, err := st.Application(application) 215 if err != nil { 216 return nil, errors.Trace(err) 217 } 218 units, err := svc.AllUnits() 219 if err != nil { 220 return nil, errors.Trace(err) 221 } 222 unitNames := make([]bson.M, len(units)) 223 for i, u := range units { 224 unitNames[i] = bson.M{"unit": u.Name()} 225 } 226 return st.queryMetricBatches(bson.M{"$or": unitNames}) 227 } 228 229 // MetricBatch returns the metric batch with the given id. 230 func (st *State) MetricBatch(id string) (*MetricBatch, error) { 231 c, closer := st.getCollection(metricsC) 232 defer closer() 233 doc := metricBatchDoc{} 234 err := c.Find(bson.M{"_id": id}).One(&doc) 235 if err == mgo.ErrNotFound { 236 return nil, errors.NotFoundf("metric %v", id) 237 } 238 if err != nil { 239 return nil, err 240 } 241 return &MetricBatch{st: st, doc: doc}, nil 242 } 243 244 // CleanupOldMetrics looks for metrics that are 24 hours old (or older) 245 // and have been sent. Any metrics it finds are deleted. 246 func (st *State) CleanupOldMetrics() error { 247 now := st.clock.Now() 248 metrics, closer := st.getCollection(metricsC) 249 defer closer() 250 // Nothing else in the system will interact with sent metrics, and nothing needs 251 // to watch them either; so in this instance it's safe to do an end run around the 252 // mgo/txn package. See State.cleanupRelationSettings for a similar situation. 253 metricsW := metrics.Writeable() 254 // TODO (mattyw) iter over this. 255 info, err := metricsW.RemoveAll(bson.M{ 256 "model-uuid": st.ModelUUID(), 257 "sent": true, 258 "delete-time": bson.M{"$lte": now}, 259 }) 260 if err == nil { 261 metricsLogger.Tracef("cleanup removed %d metrics", info.Removed) 262 } 263 return errors.Trace(err) 264 } 265 266 // MetricsToSend returns batchSize metrics that need to be sent 267 // to the collector 268 func (st *State) MetricsToSend(batchSize int) ([]*MetricBatch, error) { 269 var docs []metricBatchDoc 270 c, closer := st.getCollection(metricsC) 271 defer closer() 272 273 q := bson.M{ 274 "model-uuid": st.ModelUUID(), 275 "sent": false, 276 } 277 err := c.Find(q).Limit(batchSize).All(&docs) 278 if err != nil { 279 return nil, errors.Trace(err) 280 } 281 282 batch := make([]*MetricBatch, len(docs)) 283 for i, doc := range docs { 284 batch[i] = &MetricBatch{st: st, doc: doc} 285 286 } 287 288 return batch, nil 289 } 290 291 // CountOfUnsentMetrics returns the number of metrics that 292 // haven't been sent to the collection service. 293 func (st *State) CountOfUnsentMetrics() (int, error) { 294 c, closer := st.getCollection(metricsC) 295 defer closer() 296 return c.Find(bson.M{ 297 "model-uuid": st.ModelUUID(), 298 "sent": false, 299 }).Count() 300 } 301 302 // CountOfSentMetrics returns the number of metrics that 303 // have been sent to the collection service and have not 304 // been removed by the cleanup worker. 305 func (st *State) CountOfSentMetrics() (int, error) { 306 c, closer := st.getCollection(metricsC) 307 defer closer() 308 return c.Find(bson.M{ 309 "model-uuid": st.ModelUUID(), 310 "sent": true, 311 }).Count() 312 } 313 314 // MarshalJSON defines how the MetricBatch type should be 315 // converted to json. 316 func (m *MetricBatch) MarshalJSON() ([]byte, error) { 317 return json.Marshal(m.doc) 318 } 319 320 // UUID returns to uuid of the metric. 321 func (m *MetricBatch) UUID() string { 322 return m.doc.UUID 323 } 324 325 // ModelUUID returns the model UUID this metric applies to. 326 func (m *MetricBatch) ModelUUID() string { 327 return m.doc.ModelUUID 328 } 329 330 // Unit returns the name of the unit this metric was generated in. 331 func (m *MetricBatch) Unit() string { 332 return m.doc.Unit 333 } 334 335 // CharmURL returns the charm url for the charm this metric was generated in. 336 func (m *MetricBatch) CharmURL() string { 337 return m.doc.CharmURL 338 } 339 340 // Created returns the time this metric batch was created. 341 func (m *MetricBatch) Created() time.Time { 342 return m.doc.Created 343 } 344 345 // Sent returns a flag to tell us if this metric has been sent to the metric 346 // collection service 347 func (m *MetricBatch) Sent() bool { 348 return m.doc.Sent 349 } 350 351 // Metrics returns the metrics in this batch. 352 func (m *MetricBatch) Metrics() []Metric { 353 result := make([]Metric, len(m.doc.Metrics)) 354 copy(result, m.doc.Metrics) 355 return result 356 } 357 358 // UniqueMetrics returns only the last value for each 359 // metric key in this batch. 360 func (m *MetricBatch) UniqueMetrics() []Metric { 361 metrics := m.Metrics() 362 sort.Sort(byTime(metrics)) 363 uniq := map[string]Metric{} 364 for _, m := range metrics { 365 uniq[m.Key] = m 366 } 367 results := make([]Metric, len(uniq)) 368 i := 0 369 for _, m := range uniq { 370 results[i] = m 371 i++ 372 } 373 return results 374 } 375 376 // SetSent marks the metric has having been sent at 377 // the specified time. 378 func (m *MetricBatch) SetSent(t time.Time) error { 379 deleteTime := t.UTC().Add(CleanupAge) 380 ops := setSentOps([]string{m.UUID()}, deleteTime) 381 if err := m.st.runTransaction(ops); err != nil { 382 return errors.Annotatef(err, "cannot set metric sent for metric %q", m.UUID()) 383 } 384 385 m.doc.Sent = true 386 m.doc.DeleteTime = deleteTime 387 return nil 388 } 389 390 // Credentials returns any credentials associated with the metric batch. 391 func (m *MetricBatch) Credentials() []byte { 392 return m.doc.Credentials 393 } 394 395 func setSentOps(batchUUIDs []string, deleteTime time.Time) []txn.Op { 396 ops := make([]txn.Op, len(batchUUIDs)) 397 for i, u := range batchUUIDs { 398 ops[i] = txn.Op{ 399 C: metricsC, 400 Id: u, 401 Assert: txn.DocExists, 402 Update: bson.M{"$set": bson.M{"sent": true, "delete-time": deleteTime}}, 403 } 404 } 405 return ops 406 } 407 408 // SetMetricBatchesSent sets sent on each MetricBatch corresponding to the uuids provided. 409 func (st *State) SetMetricBatchesSent(batchUUIDs []string) error { 410 deleteTime := st.clock.Now().UTC().Add(CleanupAge) 411 ops := setSentOps(batchUUIDs, deleteTime) 412 if err := st.runTransaction(ops); err != nil { 413 return errors.Annotatef(err, "cannot set metric sent in bulk call") 414 } 415 return nil 416 }