github.com/Jeffail/benthos/v3@v3.65.0/lib/metrics/cloudwatch.go (about) 1 package metrics 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/Jeffail/benthos/v3/internal/docs" 10 "github.com/Jeffail/benthos/v3/lib/log" 11 "github.com/Jeffail/benthos/v3/lib/types" 12 "github.com/Jeffail/benthos/v3/lib/util/aws/session" 13 "github.com/aws/aws-sdk-go/aws" 14 "github.com/aws/aws-sdk-go/aws/request" 15 "github.com/aws/aws-sdk-go/service/cloudwatch" 16 "github.com/aws/aws-sdk-go/service/cloudwatch/cloudwatchiface" 17 ) 18 19 //------------------------------------------------------------------------------ 20 21 func init() { 22 Constructors[TypeAWSCloudWatch] = TypeSpec{ 23 constructor: NewAWSCloudWatch, 24 Version: "3.36.0", 25 Summary: ` 26 Send metrics to AWS CloudWatch using the PutMetricData endpoint.`, 27 Description: ` 28 It is STRONGLY recommended that you reduce the metrics that are exposed with a 29 ` + "`path_mapping`" + ` like this: 30 31 ` + "```yaml" + ` 32 metrics: 33 aws_cloudwatch: 34 namespace: Foo 35 path_mapping: | 36 if ![ 37 "input.received", 38 "input.latency", 39 "output.sent", 40 ].contains(this) { deleted() } 41 ` + "```" + ``, 42 FieldSpecs: append(docs.FieldSpecs{ 43 docs.FieldCommon("namespace", "The namespace used to distinguish metrics from other services."), 44 docs.FieldAdvanced("flush_period", "The period of time between PutMetricData requests."), 45 pathMappingDocs(true, false), 46 }, session.FieldSpecs()...), 47 } 48 49 Constructors[TypeCloudWatch] = TypeSpec{ 50 constructor: NewCloudWatch, 51 Status: docs.StatusDeprecated, 52 Summary: ` 53 Send metrics to AWS CloudWatch using the PutMetricData endpoint.`, 54 Description: ` 55 ## Alternatives 56 57 This metrics type has been renamed to ` + "[`aws_cloudwatch`](/docs/components/metrics/aws_cloudwatch)" + `. 58 59 It is STRONGLY recommended that you reduce the metrics that are exposed with a 60 ` + "`path_mapping`" + ` like this: 61 62 ` + "```yaml" + ` 63 metrics: 64 aws_cloudwatch: 65 namespace: Foo 66 path_mapping: | 67 if ![ 68 "input.received", 69 "input.latency", 70 "output.sent", 71 ].contains(this) { deleted() } 72 ` + "```" + ``, 73 FieldSpecs: append(docs.FieldSpecs{ 74 docs.FieldCommon("namespace", "The namespace used to distinguish metrics from other services."), 75 docs.FieldAdvanced("flush_period", "The period of time between PutMetricData requests."), 76 pathMappingDocs(true, false), 77 }, session.FieldSpecs()...), 78 } 79 } 80 81 //------------------------------------------------------------------------------ 82 83 // CloudWatchConfig contains config fields for the CloudWatch metrics type. 84 type CloudWatchConfig struct { 85 session.Config `json:",inline" yaml:",inline"` 86 Namespace string `json:"namespace" yaml:"namespace"` 87 FlushPeriod string `json:"flush_period" yaml:"flush_period"` 88 PathMapping string `json:"path_mapping" yaml:"path_mapping"` 89 } 90 91 // NewCloudWatchConfig creates an CloudWatchConfig struct with default values. 92 func NewCloudWatchConfig() CloudWatchConfig { 93 return CloudWatchConfig{ 94 Config: session.NewConfig(), 95 Namespace: "Benthos", 96 FlushPeriod: "100ms", 97 PathMapping: "", 98 } 99 } 100 101 //------------------------------------------------------------------------------ 102 103 const maxCloudWatchMetrics = 20 104 const maxCloudWatchValues = 150 105 const maxCloudWatchDimensions = 10 106 107 type cloudWatchDatum struct { 108 MetricName string 109 Unit string 110 Dimensions []*cloudwatch.Dimension 111 Timestamp time.Time 112 Value int64 113 Values map[int64]int64 114 } 115 116 type cloudWatchStat struct { 117 root *CloudWatch 118 id string 119 name string 120 unit string 121 dimensions []*cloudwatch.Dimension 122 } 123 124 // Trims a map of datum values to a ceiling. The primary goal here is to be fast 125 // and efficient rather than accurately preserving the most common values. 126 func trimValuesMap(m map[int64]int64) { 127 ceiling := maxCloudWatchValues 128 129 // Start off by randomly removing values that have been seen only once. 130 for k, v := range m { 131 if len(m) <= ceiling { 132 // If we reach our ceiling already then we're done. 133 return 134 } 135 if v == 1 { 136 delete(m, k) 137 } 138 } 139 140 // Next, randomly remove any values until ceiling is hit. 141 for k := range m { 142 if len(m) <= ceiling { 143 return 144 } 145 delete(m, k) 146 } 147 } 148 149 func (c *cloudWatchStat) appendValue(v int64) { 150 c.root.datumLock.Lock() 151 existing := c.root.datumses[c.id] 152 if existing == nil { 153 existing = &cloudWatchDatum{ 154 MetricName: c.name, 155 Unit: c.unit, 156 Dimensions: c.dimensions, 157 Timestamp: time.Now(), 158 Values: map[int64]int64{v: 1}, 159 } 160 c.root.datumses[c.id] = existing 161 } else { 162 tally := existing.Values[v] 163 existing.Values[v] = tally + 1 164 if len(existing.Values) > maxCloudWatchValues*5 { 165 trimValuesMap(existing.Values) 166 } 167 } 168 c.root.datumLock.Unlock() 169 } 170 171 func (c *cloudWatchStat) addValue(v int64) { 172 c.root.datumLock.Lock() 173 existing := c.root.datumses[c.id] 174 if existing == nil { 175 existing = &cloudWatchDatum{ 176 MetricName: c.name, 177 Unit: c.unit, 178 Dimensions: c.dimensions, 179 Timestamp: time.Now(), 180 Value: v, 181 } 182 c.root.datumses[c.id] = existing 183 } else { 184 existing.Value += v 185 } 186 c.root.datumLock.Unlock() 187 } 188 189 // Incr increments a metric by an amount. 190 func (c *cloudWatchStat) Incr(count int64) error { 191 c.addValue(count) 192 return nil 193 } 194 195 // Decr decrements a metric by an amount. 196 func (c *cloudWatchStat) Decr(count int64) error { 197 c.addValue(-count) 198 return nil 199 } 200 201 // Timing sets a timing metric. 202 func (c *cloudWatchStat) Timing(delta int64) error { 203 // Most granular value for timing metrics in cloudwatch is microseconds 204 // versus nanoseconds. 205 c.appendValue(delta / 1000) 206 return nil 207 } 208 209 // Set sets a gauge metric. 210 func (c *cloudWatchStat) Set(value int64) error { 211 c.appendValue(value) 212 return nil 213 } 214 215 type cloudWatchStatVec struct { 216 root *CloudWatch 217 name string 218 unit string 219 labelNames []string 220 } 221 222 func (c *cloudWatchStatVec) with(labelValues ...string) *cloudWatchStat { 223 lDim := len(c.labelNames) 224 if lDim >= maxCloudWatchDimensions { 225 lDim = maxCloudWatchDimensions 226 } 227 dimensions := make([]*cloudwatch.Dimension, lDim) 228 for i, k := range c.labelNames { 229 if len(labelValues) <= i || i >= maxCloudWatchDimensions { 230 break 231 } 232 dimensions[i] = &cloudwatch.Dimension{ 233 Name: aws.String(k), 234 Value: aws.String(labelValues[i]), 235 } 236 } 237 return &cloudWatchStat{ 238 root: c.root, 239 id: c.name + fmt.Sprintf("%v", labelValues), 240 name: c.name, 241 unit: c.unit, 242 dimensions: dimensions, 243 } 244 } 245 246 type cloudWatchCounterVec struct { 247 cloudWatchStatVec 248 } 249 250 func (c *cloudWatchCounterVec) With(labelValues ...string) StatCounter { 251 return c.with(labelValues...) 252 } 253 254 type cloudWatchTimerVec struct { 255 cloudWatchStatVec 256 } 257 258 func (c *cloudWatchTimerVec) With(labelValues ...string) StatTimer { 259 return c.with(labelValues...) 260 } 261 262 type cloudWatchGaugeVec struct { 263 cloudWatchStatVec 264 } 265 266 func (c *cloudWatchGaugeVec) With(labelValues ...string) StatGauge { 267 return c.with(labelValues...) 268 } 269 270 //------------------------------------------------------------------------------ 271 272 // CloudWatch is a stats object with capability to hold internal stats as a JSON 273 // endpoint. 274 type CloudWatch struct { 275 client cloudwatchiface.CloudWatchAPI 276 277 datumses map[string]*cloudWatchDatum 278 datumLock *sync.Mutex 279 280 flushPeriod time.Duration 281 282 ctx context.Context 283 cancel func() 284 285 pathMapping *pathMapping 286 config CloudWatchConfig 287 log log.Modular 288 } 289 290 // NewAWSCloudWatch creates and returns a new CloudWatch object. 291 func NewAWSCloudWatch(config Config, opts ...func(Type)) (Type, error) { 292 return newCloudWatch(config.AWSCloudWatch, opts...) 293 } 294 295 // NewCloudWatch creates and returns a new CloudWatch object. 296 func NewCloudWatch(config Config, opts ...func(Type)) (Type, error) { 297 return newCloudWatch(config.CloudWatch, opts...) 298 } 299 300 func newCloudWatch(config CloudWatchConfig, opts ...func(Type)) (Type, error) { 301 c := &CloudWatch{ 302 config: config, 303 datumses: map[string]*cloudWatchDatum{}, 304 datumLock: &sync.Mutex{}, 305 log: log.Noop(), 306 } 307 308 c.ctx, c.cancel = context.WithCancel(context.Background()) 309 for _, opt := range opts { 310 opt(c) 311 } 312 313 var err error 314 if c.pathMapping, err = newPathMapping(config.PathMapping, c.log); err != nil { 315 return nil, fmt.Errorf("failed to init path mapping: %v", err) 316 } 317 318 sess, err := config.GetSession() 319 if err != nil { 320 return nil, err 321 } 322 323 if c.flushPeriod, err = time.ParseDuration(config.FlushPeriod); err != nil { 324 return nil, fmt.Errorf("failed to parse flush period: %v", err) 325 } 326 327 c.client = cloudwatch.New(sess) 328 go c.loop() 329 return c, nil 330 } 331 332 //------------------------------------------------------------------------------ 333 334 func (c *CloudWatch) toCMName(dotSepName string) (outPath string, labelNames, labelValues []string) { 335 return c.pathMapping.mapPathWithTags(dotSepName) 336 } 337 338 // GetCounter returns a stat counter object for a path. 339 func (c *CloudWatch) GetCounter(path string) StatCounter { 340 name, labels, values := c.toCMName(path) 341 if name == "" { 342 return DudStat{} 343 } 344 if len(labels) == 0 { 345 return &cloudWatchStat{ 346 root: c, 347 id: name, 348 name: name, 349 unit: cloudwatch.StandardUnitCount, 350 } 351 } 352 return (&cloudWatchCounterVec{ 353 cloudWatchStatVec: cloudWatchStatVec{ 354 root: c, 355 name: name, 356 unit: cloudwatch.StandardUnitCount, 357 labelNames: labels, 358 }, 359 }).With(values...) 360 } 361 362 // GetCounterVec returns a stat counter object for a path with the labels 363 func (c *CloudWatch) GetCounterVec(path string, n []string) StatCounterVec { 364 name, labels, values := c.toCMName(path) 365 if name == "" { 366 return fakeCounterVec(func([]string) StatCounter { 367 return DudStat{} 368 }) 369 } 370 if len(labels) > 0 { 371 labels = append(labels, n...) 372 return fakeCounterVec(func(vs []string) StatCounter { 373 fvs := append([]string{}, values...) 374 fvs = append(fvs, vs...) 375 return (&cloudWatchCounterVec{ 376 cloudWatchStatVec: cloudWatchStatVec{ 377 root: c, 378 name: name, 379 unit: cloudwatch.StandardUnitCount, 380 labelNames: labels, 381 }, 382 }).With(fvs...) 383 }) 384 } 385 return &cloudWatchCounterVec{ 386 cloudWatchStatVec: cloudWatchStatVec{ 387 root: c, 388 name: name, 389 unit: cloudwatch.StandardUnitCount, 390 labelNames: n, 391 }, 392 } 393 } 394 395 // GetTimer returns a stat timer object for a path. 396 func (c *CloudWatch) GetTimer(path string) StatTimer { 397 name, labels, values := c.toCMName(path) 398 if name == "" { 399 return DudStat{} 400 } 401 if len(labels) == 0 { 402 return &cloudWatchStat{ 403 root: c, 404 id: name, 405 name: name, 406 unit: cloudwatch.StandardUnitMicroseconds, 407 } 408 } 409 return (&cloudWatchTimerVec{ 410 cloudWatchStatVec: cloudWatchStatVec{ 411 root: c, 412 name: name, 413 unit: cloudwatch.StandardUnitMicroseconds, 414 labelNames: labels, 415 }, 416 }).With(values...) 417 } 418 419 // GetTimerVec returns a stat timer object for a path with the labels 420 func (c *CloudWatch) GetTimerVec(path string, n []string) StatTimerVec { 421 name, labels, values := c.toCMName(path) 422 if name == "" { 423 return fakeTimerVec(func([]string) StatTimer { 424 return DudStat{} 425 }) 426 } 427 if len(labels) > 0 { 428 labels = append(labels, n...) 429 return fakeTimerVec(func(vs []string) StatTimer { 430 fvs := append([]string{}, values...) 431 fvs = append(fvs, vs...) 432 return (&cloudWatchTimerVec{ 433 cloudWatchStatVec: cloudWatchStatVec{ 434 root: c, 435 name: name, 436 unit: cloudwatch.StandardUnitMicroseconds, 437 labelNames: labels, 438 }, 439 }).With(fvs...) 440 }) 441 } 442 return &cloudWatchTimerVec{ 443 cloudWatchStatVec: cloudWatchStatVec{ 444 root: c, 445 name: name, 446 unit: cloudwatch.StandardUnitMicroseconds, 447 labelNames: n, 448 }, 449 } 450 } 451 452 // GetGauge returns a stat gauge object for a path. 453 func (c *CloudWatch) GetGauge(path string) StatGauge { 454 name, labels, values := c.toCMName(path) 455 if name == "" { 456 return DudStat{} 457 } 458 if len(labels) == 0 { 459 return &cloudWatchStat{ 460 root: c, 461 id: name, 462 name: name, 463 unit: cloudwatch.StandardUnitNone, 464 } 465 } 466 return (&cloudWatchGaugeVec{ 467 cloudWatchStatVec: cloudWatchStatVec{ 468 root: c, 469 name: name, 470 unit: cloudwatch.StandardUnitNone, 471 labelNames: labels, 472 }, 473 }).With(values...) 474 } 475 476 // GetGaugeVec returns a stat timer object for a path with the labels 477 func (c *CloudWatch) GetGaugeVec(path string, n []string) StatGaugeVec { 478 name, labels, values := c.toCMName(path) 479 if name == "" { 480 return fakeGaugeVec(func([]string) StatGauge { 481 return DudStat{} 482 }) 483 } 484 if len(labels) > 0 { 485 labels = append(labels, n...) 486 return fakeGaugeVec(func(vs []string) StatGauge { 487 fvs := append([]string{}, values...) 488 fvs = append(fvs, vs...) 489 return (&cloudWatchGaugeVec{ 490 cloudWatchStatVec: cloudWatchStatVec{ 491 root: c, 492 name: name, 493 unit: cloudwatch.StandardUnitNone, 494 labelNames: labels, 495 }, 496 }).With(fvs...) 497 }) 498 } 499 return &cloudWatchGaugeVec{ 500 cloudWatchStatVec: cloudWatchStatVec{ 501 root: c, 502 name: name, 503 unit: cloudwatch.StandardUnitNone, 504 labelNames: n, 505 }, 506 } 507 } 508 509 //------------------------------------------------------------------------------ 510 511 func (c *CloudWatch) loop() { 512 ticker := time.NewTicker(c.flushPeriod) 513 defer ticker.Stop() 514 for { 515 select { 516 case <-c.ctx.Done(): 517 return 518 case <-ticker.C: 519 c.flush() 520 } 521 } 522 } 523 524 func valuesMapToSlices(m map[int64]int64) (values, counts []*float64) { 525 ceiling := maxCloudWatchValues 526 lM := len(m) 527 528 useCounts := false 529 if lM < ceiling { 530 values = make([]*float64, 0, lM) 531 counts = make([]*float64, 0, lM) 532 533 for k, v := range m { 534 values = append(values, aws.Float64(float64(k))) 535 counts = append(counts, aws.Float64(float64(v))) 536 if v > 1 { 537 useCounts = true 538 } 539 } 540 541 if !useCounts { 542 counts = nil 543 } 544 return 545 } 546 547 values = make([]*float64, 0, ceiling) 548 counts = make([]*float64, 0, ceiling) 549 550 // Try and make our target without taking values with one count. 551 for k, v := range m { 552 if len(values) == ceiling { 553 return 554 } 555 if v > 1 { 556 values = append(values, aws.Float64(float64(k))) 557 counts = append(counts, aws.Float64(float64(v))) 558 useCounts = true 559 delete(m, k) 560 } 561 } 562 563 // Otherwise take randomly. 564 for k, v := range m { 565 if len(values) == ceiling { 566 break 567 } 568 values = append(values, aws.Float64(float64(k))) 569 counts = append(counts, aws.Float64(float64(v))) 570 } 571 572 if !useCounts { 573 counts = nil 574 } 575 return 576 } 577 578 func (c *CloudWatch) flush() error { 579 c.datumLock.Lock() 580 datumMap := c.datumses 581 c.datumses = map[string]*cloudWatchDatum{} 582 c.datumLock.Unlock() 583 584 datums := []*cloudwatch.MetricDatum{} 585 for _, v := range datumMap { 586 if v != nil { 587 d := cloudwatch.MetricDatum{ 588 MetricName: &v.MetricName, 589 Dimensions: v.Dimensions, 590 Unit: &v.Unit, 591 Timestamp: &v.Timestamp, 592 } 593 if len(v.Values) > 0 { 594 d.Values, d.Counts = valuesMapToSlices(v.Values) 595 } else { 596 d.Value = aws.Float64(float64(v.Value)) 597 } 598 datums = append(datums, &d) 599 } 600 } 601 602 input := cloudwatch.PutMetricDataInput{ 603 Namespace: &c.config.Namespace, 604 MetricData: datums, 605 } 606 607 throttled := false 608 for len(input.MetricData) > 0 { 609 if !throttled { 610 if len(datums) > maxCloudWatchMetrics { 611 input.MetricData, datums = datums[:maxCloudWatchMetrics], datums[maxCloudWatchMetrics:] 612 } else { 613 datums = nil 614 } 615 } 616 throttled = false 617 618 if _, err := c.client.PutMetricData(&input); err != nil { 619 if request.IsErrorThrottle(err) { 620 throttled = true 621 c.log.Warnln("Metrics request was throttled. Either increase flush period or reduce number of services sending metrics.") 622 } else { 623 c.log.Errorf("Failed to send metric data: %v\n", err) 624 } 625 select { 626 case <-time.After(time.Second): 627 case <-c.ctx.Done(): 628 return types.ErrTimeout 629 } 630 } 631 632 if !throttled { 633 input.MetricData = datums 634 } 635 } 636 637 return nil 638 } 639 640 //------------------------------------------------------------------------------ 641 642 // SetLogger sets the logger used to print connection errors. 643 func (c *CloudWatch) SetLogger(log log.Modular) { 644 c.log = log 645 } 646 647 // Close stops the CloudWatch object from aggregating metrics and cleans up 648 // resources. 649 func (c *CloudWatch) Close() error { 650 c.cancel() 651 c.flush() 652 return nil 653 } 654 655 //------------------------------------------------------------------------------