github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/state/metrics.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "encoding/json" 8 "fmt" 9 "sort" 10 "strings" 11 "time" 12 13 "github.com/juju/charm/v12" 14 "github.com/juju/errors" 15 "github.com/juju/loggo" 16 "github.com/juju/mgo/v3" 17 "github.com/juju/mgo/v3/bson" 18 "github.com/juju/mgo/v3/txn" 19 "github.com/juju/names/v5" 20 ) 21 22 var metricsLogger = loggo.GetLogger("juju.state.metrics") 23 24 const ( 25 CleanupAge = time.Hour * 24 26 ) 27 28 // MetricBatch represents a batch of metrics reported from a unit. 29 // These will be received from the unit in batches. 30 // The main contents of the metric (key, value) is defined 31 // by the charm author and sent from the unit via a call to 32 // add-metric 33 type MetricBatch struct { 34 st *State 35 doc metricBatchDoc 36 } 37 38 type metricBatchDoc struct { 39 UUID string `bson:"_id"` 40 ModelUUID string `bson:"model-uuid"` 41 Unit string `bson:"unit"` 42 CharmURL string `bson:"charmurl"` 43 Sent bool `bson:"sent"` 44 DeleteTime time.Time `bson:"delete-time"` 45 Created time.Time `bson:"created"` 46 Metrics []Metric `bson:"metrics"` 47 Credentials []byte `bson:"credentials"` 48 SLACredentials []byte `bson:"sla-credentials,omitempty"` 49 } 50 51 // Metric represents a single Metric. 52 type Metric struct { 53 Key string `bson:"key"` 54 Value string `bson:"value"` 55 Time time.Time `bson:"time"` 56 Labels map[string]string `bson:"labels,omitempty"` 57 } 58 59 type byTime []Metric 60 61 // Len implements sort.Interface. 62 func (t byTime) Len() int { return len(t) } 63 64 // Swap implements sort.Interface. 65 func (t byTime) Swap(i, j int) { t[i], t[j] = t[j], t[i] } 66 67 // Less implements sort.Interface. 68 func (t byTime) Less(i, j int) bool { 69 return t[i].Time.Before(t[j].Time) 70 } 71 72 type byKey []Metric 73 74 // Len implements sort.Interface. 75 func (t byKey) Len() int { return len(t) } 76 77 // Swap implements sort.Interface. 78 func (t byKey) Swap(i, j int) { t[i], t[j] = t[j], t[i] } 79 80 // Less implements sort.Interface. 81 func (t byKey) Less(i, j int) bool { 82 if t[i].Key == t[j].Key { 83 return labelsKey(t[i].Labels) < labelsKey(t[j].Labels) 84 } 85 return t[i].Key < t[j].Key 86 } 87 88 func labelsKey(m map[string]string) string { 89 var result []string 90 for k, v := range m { 91 result = append(result, fmt.Sprintf("%s=%s", k, v)) 92 } 93 sort.Strings(result) 94 return strings.Join(result, ",") 95 } 96 97 // validate checks that the MetricBatch contains valid metrics. 98 func (m *MetricBatch) validate() error { 99 ch, err := m.st.Charm(m.doc.CharmURL) 100 if err != nil { 101 return errors.Trace(err) 102 } 103 chMetrics := ch.Metrics() 104 if chMetrics == nil { 105 return errors.Errorf("charm doesn't implement metrics") 106 } 107 for _, m := range m.doc.Metrics { 108 if err := chMetrics.ValidateMetric(m.Key, m.Value); err != nil { 109 return errors.Trace(err) 110 } 111 } 112 return nil 113 } 114 115 // BatchParam contains the properties of the metrics batch used when creating a metrics 116 // batch. 117 type BatchParam struct { 118 UUID string 119 CharmURL string 120 Created time.Time 121 Metrics []Metric 122 Unit names.UnitTag 123 } 124 125 // ModelBatchParam contains the properties of a metric batch for a model 126 // The model uuid will be attenuated in the call to AddModelMetrics. 127 type ModelBatchParam struct { 128 UUID string 129 Created time.Time 130 Metrics []Metric 131 } 132 133 // AddMetrics adds a new batch of metrics to the database. 134 func (st *State) AddMetrics(batch BatchParam) (*MetricBatch, error) { 135 if len(batch.Metrics) == 0 { 136 return nil, errors.New("cannot add a batch of 0 metrics") 137 } 138 charmURL, err := charm.ParseURL(batch.CharmURL) 139 if err != nil { 140 return nil, errors.NewNotValid(err, "could not parse charm URL") 141 } 142 143 unit, err := st.Unit(batch.Unit.Id()) 144 if err != nil { 145 return nil, errors.Trace(err) 146 } 147 application, err := unit.Application() 148 if err != nil { 149 return nil, errors.Trace(err) 150 } 151 152 slaCreds, err := st.SLACredential() 153 if err != nil { 154 return nil, errors.Trace(err) 155 } 156 157 metric := &MetricBatch{ 158 st: st, 159 doc: metricBatchDoc{ 160 UUID: batch.UUID, 161 ModelUUID: st.ModelUUID(), 162 Unit: batch.Unit.Id(), 163 CharmURL: charmURL.String(), 164 Sent: false, 165 Created: batch.Created, 166 Metrics: batch.Metrics, 167 Credentials: application.MetricCredentials(), 168 SLACredentials: slaCreds, 169 }, 170 } 171 if err := metric.validate(); err != nil { 172 return nil, err 173 } 174 buildTxn := func(attempt int) ([]txn.Op, error) { 175 if attempt > 0 { 176 notDead, err := isNotDead(st, unitsC, batch.Unit.Id()) 177 if err != nil || !notDead { 178 return nil, errors.NotFoundf(batch.Unit.Id()) 179 } 180 exists, err := st.MetricBatch(batch.UUID) 181 if exists != nil && err == nil { 182 return nil, errors.AlreadyExistsf("metrics batch UUID %q", batch.UUID) 183 } 184 if !errors.IsNotFound(err) { 185 return nil, errors.Trace(err) 186 } 187 } 188 ops := []txn.Op{{ 189 C: unitsC, 190 Id: st.docID(batch.Unit.Id()), 191 Assert: notDeadDoc, 192 }, { 193 C: metricsC, 194 Id: metric.UUID(), 195 Assert: txn.DocMissing, 196 Insert: &metric.doc, 197 }} 198 return ops, nil 199 } 200 err = st.db().Run(buildTxn) 201 if err != nil { 202 return nil, errors.Trace(err) 203 } 204 205 return metric, nil 206 } 207 208 // AddModelMetrics adds a new model-centric batch of metrics to the database. 209 func (st *State) AddModelMetrics(batch ModelBatchParam) (*MetricBatch, error) { 210 if len(batch.Metrics) == 0 { 211 return nil, errors.New("cannot add a batch of 0 metrics") 212 } 213 slaCreds, err := st.SLACredential() 214 if err != nil { 215 return nil, errors.Trace(err) 216 } 217 metric := &MetricBatch{ 218 st: st, 219 doc: metricBatchDoc{ 220 UUID: batch.UUID, 221 ModelUUID: st.ModelUUID(), 222 Sent: false, 223 Created: batch.Created, 224 Metrics: batch.Metrics, 225 SLACredentials: slaCreds, 226 }, 227 } 228 buildTxn := func(attempt int) ([]txn.Op, error) { 229 if attempt > 0 { 230 exists, err := st.MetricBatch(batch.UUID) 231 if exists != nil && err == nil { 232 return nil, errors.AlreadyExistsf("metrics batch UUID %q", batch.UUID) 233 } 234 if !errors.IsNotFound(err) { 235 return nil, errors.Trace(err) 236 } 237 } 238 ops := []txn.Op{{ 239 C: metricsC, 240 Id: metric.UUID(), 241 Assert: txn.DocMissing, 242 Insert: &metric.doc, 243 }} 244 return ops, nil 245 } 246 err = st.db().Run(buildTxn) 247 if err != nil { 248 return nil, errors.Trace(err) 249 } 250 251 return metric, nil 252 } 253 254 // AllMetricBatches returns all metric batches currently stored in state. 255 // TODO (tasdomas): this method is currently only used in the uniter worker test - 256 // 257 // it needs to be modified to restrict the scope of the values it 258 // returns if it is to be used outside of tests. 259 func (st *State) AllMetricBatches() ([]MetricBatch, error) { 260 c, closer := st.db().GetCollection(metricsC) 261 defer closer() 262 docs := []metricBatchDoc{} 263 err := c.Find(nil).All(&docs) 264 if err != nil { 265 return nil, errors.Trace(err) 266 } 267 results := make([]MetricBatch, len(docs)) 268 for i, doc := range docs { 269 results[i] = MetricBatch{st: st, doc: doc} 270 } 271 return results, nil 272 } 273 274 func (st *State) queryMetricBatches(query bson.M) ([]MetricBatch, error) { 275 c, closer := st.db().GetCollection(metricsC) 276 defer closer() 277 docs := []metricBatchDoc{} 278 err := c.Find(query).Sort("created").All(&docs) 279 if err != nil { 280 return nil, errors.Trace(err) 281 } 282 results := make([]MetricBatch, len(docs)) 283 for i, doc := range docs { 284 results[i] = MetricBatch{st: st, doc: doc} 285 } 286 return results, nil 287 } 288 289 // MetricBatchesForUnit returns metric batches for the given unit. 290 func (st *State) MetricBatchesForUnit(unit string) ([]MetricBatch, error) { 291 _, err := st.Unit(unit) 292 if err != nil { 293 return nil, errors.Trace(err) 294 } 295 return st.queryMetricBatches(bson.M{"unit": unit}) 296 } 297 298 // MetricBatchesForModel returns metric batches for all the units in the model. 299 func (st *State) MetricBatchesForModel() ([]MetricBatch, error) { 300 return st.queryMetricBatches(bson.M{"model-uuid": st.ModelUUID()}) 301 } 302 303 // MetricBatchesForApplication returns metric batches for the given application. 304 func (st *State) MetricBatchesForApplication(application string) ([]MetricBatch, error) { 305 app, err := st.Application(application) 306 if err != nil { 307 return nil, errors.Trace(err) 308 } 309 units, err := app.AllUnits() 310 if err != nil { 311 return nil, errors.Trace(err) 312 } 313 unitNames := make([]bson.M, len(units)) 314 for i, u := range units { 315 unitNames[i] = bson.M{"unit": u.Name()} 316 } 317 return st.queryMetricBatches(bson.M{"$or": unitNames}) 318 } 319 320 // MetricBatch returns the metric batch with the given id. 321 func (st *State) MetricBatch(id string) (*MetricBatch, error) { 322 c, closer := st.db().GetCollection(metricsC) 323 defer closer() 324 doc := metricBatchDoc{} 325 err := c.Find(bson.M{"_id": id}).One(&doc) 326 if err == mgo.ErrNotFound { 327 return nil, errors.NotFoundf("metric %v", id) 328 } 329 if err != nil { 330 return nil, err 331 } 332 return &MetricBatch{st: st, doc: doc}, nil 333 } 334 335 // CleanupOldMetrics looks for metrics that are 24 hours old (or older) 336 // and have been sent. Any metrics it finds are deleted. 337 func (st *State) CleanupOldMetrics() error { 338 now := st.clock().Now() 339 metrics, closer := st.db().GetCollection(metricsC) 340 defer closer() 341 // Nothing else in the system will interact with sent metrics, and nothing needs 342 // to watch them either; so in this instance it's safe to do an end run around the 343 // mgo/txn package. See State.cleanupRelationSettings for a similar situation. 344 metricsW := metrics.Writeable() 345 // TODO (mattyw) iter over this. 346 info, err := metricsW.RemoveAll(bson.M{ 347 "model-uuid": st.ModelUUID(), 348 "sent": true, 349 "delete-time": bson.M{"$lte": now}, 350 }) 351 if err == nil { 352 metricsLogger.Tracef("cleanup removed %d metrics", info.Removed) 353 } 354 return errors.Trace(err) 355 } 356 357 // MetricsToSend returns batchSize metrics that need to be sent 358 // to the collector 359 func (st *State) MetricsToSend(batchSize int) ([]*MetricBatch, error) { 360 var docs []metricBatchDoc 361 c, closer := st.db().GetCollection(metricsC) 362 defer closer() 363 364 q := bson.M{ 365 "model-uuid": st.ModelUUID(), 366 "sent": false, 367 } 368 err := c.Find(q).Limit(batchSize).All(&docs) 369 if err != nil { 370 return nil, errors.Trace(err) 371 } 372 373 batch := make([]*MetricBatch, len(docs)) 374 for i, doc := range docs { 375 batch[i] = &MetricBatch{st: st, doc: doc} 376 377 } 378 379 return batch, nil 380 } 381 382 // CountOfUnsentMetrics returns the number of metrics that 383 // haven't been sent to the collection service. 384 func (st *State) CountOfUnsentMetrics() (int, error) { 385 c, closer := st.db().GetCollection(metricsC) 386 defer closer() 387 return c.Find(bson.M{ 388 "model-uuid": st.ModelUUID(), 389 "sent": false, 390 }).Count() 391 } 392 393 // CountOfSentMetrics returns the number of metrics that 394 // have been sent to the collection service and have not 395 // been removed by the cleanup worker. 396 func (st *State) CountOfSentMetrics() (int, error) { 397 c, closer := st.db().GetCollection(metricsC) 398 defer closer() 399 return c.Find(bson.M{ 400 "model-uuid": st.ModelUUID(), 401 "sent": true, 402 }).Count() 403 } 404 405 // MarshalJSON defines how the MetricBatch type should be 406 // converted to json. 407 func (m *MetricBatch) MarshalJSON() ([]byte, error) { 408 return json.Marshal(m.doc) 409 } 410 411 // UUID returns to uuid of the metric. 412 func (m *MetricBatch) UUID() string { 413 return m.doc.UUID 414 } 415 416 // ModelUUID returns the model UUID this metric applies to. 417 func (m *MetricBatch) ModelUUID() string { 418 return m.doc.ModelUUID 419 } 420 421 // Unit returns the name of the unit this metric was generated in. 422 func (m *MetricBatch) Unit() string { 423 return m.doc.Unit 424 } 425 426 // CharmURL returns the charm url for the charm this metric was generated in. 427 func (m *MetricBatch) CharmURL() string { 428 return m.doc.CharmURL 429 } 430 431 // Created returns the time this metric batch was created. 432 func (m *MetricBatch) Created() time.Time { 433 return m.doc.Created 434 } 435 436 // Sent returns a flag to tell us if this metric has been sent to the metric 437 // collection service 438 func (m *MetricBatch) Sent() bool { 439 return m.doc.Sent 440 } 441 442 // Metrics returns the metrics in this batch. 443 func (m *MetricBatch) Metrics() []Metric { 444 result := make([]Metric, len(m.doc.Metrics)) 445 copy(result, m.doc.Metrics) 446 return result 447 } 448 449 // UniqueMetrics returns only the last value for each 450 // metric key in this batch. 451 func (m *MetricBatch) UniqueMetrics() []Metric { 452 metrics := m.Metrics() 453 sort.Sort(byTime(metrics)) 454 uniq := map[string]Metric{} 455 for _, m := range metrics { 456 uniq[fmt.Sprintf("%s-%s", m.Key, labelsKey(m.Labels))] = m 457 } 458 results := make([]Metric, len(uniq)) 459 i := 0 460 for _, m := range uniq { 461 results[i] = m 462 i++ 463 } 464 sort.Sort(byKey(results)) 465 return results 466 } 467 468 // SetSent marks the metric has having been sent at 469 // the specified time. 470 func (m *MetricBatch) SetSent(t time.Time) error { 471 deleteTime := t.UTC().Add(CleanupAge) 472 ops := setSentOps([]string{m.UUID()}, deleteTime) 473 if err := m.st.db().RunTransaction(ops); err != nil { 474 return errors.Annotatef(err, "cannot set metric sent for metric %q", m.UUID()) 475 } 476 477 m.doc.Sent = true 478 m.doc.DeleteTime = deleteTime 479 return nil 480 } 481 482 // Credentials returns any credentials associated with the metric batch. 483 func (m *MetricBatch) Credentials() []byte { 484 return m.doc.Credentials 485 } 486 487 // SLACredentials returns any sla credentials associated with the metric batch. 488 func (m *MetricBatch) SLACredentials() []byte { 489 return m.doc.SLACredentials 490 } 491 492 func setSentOps(batchUUIDs []string, deleteTime time.Time) []txn.Op { 493 ops := make([]txn.Op, len(batchUUIDs)) 494 for i, u := range batchUUIDs { 495 ops[i] = txn.Op{ 496 C: metricsC, 497 Id: u, 498 Assert: txn.DocExists, 499 Update: bson.M{"$set": bson.M{"sent": true, "delete-time": deleteTime}}, 500 } 501 } 502 return ops 503 } 504 505 // SetMetricBatchesSent sets sent on each MetricBatch corresponding to the uuids provided. 506 func (st *State) SetMetricBatchesSent(batchUUIDs []string) error { 507 deleteTime := st.clock().Now().UTC().Add(CleanupAge) 508 ops := setSentOps(batchUUIDs, deleteTime) 509 if err := st.db().RunTransaction(ops); err != nil { 510 return errors.Annotatef(err, "cannot set metric sent in bulk call") 511 } 512 return nil 513 }