github.com/Jeffail/benthos/v3@v3.65.0/lib/metrics/cloudwatch.go (about)

     1  package metrics
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/Jeffail/benthos/v3/internal/docs"
    10  	"github.com/Jeffail/benthos/v3/lib/log"
    11  	"github.com/Jeffail/benthos/v3/lib/types"
    12  	"github.com/Jeffail/benthos/v3/lib/util/aws/session"
    13  	"github.com/aws/aws-sdk-go/aws"
    14  	"github.com/aws/aws-sdk-go/aws/request"
    15  	"github.com/aws/aws-sdk-go/service/cloudwatch"
    16  	"github.com/aws/aws-sdk-go/service/cloudwatch/cloudwatchiface"
    17  )
    18  
    19  //------------------------------------------------------------------------------
    20  
    21  func init() {
    22  	Constructors[TypeAWSCloudWatch] = TypeSpec{
    23  		constructor: NewAWSCloudWatch,
    24  		Version:     "3.36.0",
    25  		Summary: `
    26  Send metrics to AWS CloudWatch using the PutMetricData endpoint.`,
    27  		Description: `
    28  It is STRONGLY recommended that you reduce the metrics that are exposed with a
    29  ` + "`path_mapping`" + ` like this:
    30  
    31  ` + "```yaml" + `
    32  metrics:
    33    aws_cloudwatch:
    34      namespace: Foo
    35      path_mapping: |
    36        if ![
    37          "input.received",
    38          "input.latency",
    39          "output.sent",
    40        ].contains(this) { deleted() }
    41  ` + "```" + ``,
    42  		FieldSpecs: append(docs.FieldSpecs{
    43  			docs.FieldCommon("namespace", "The namespace used to distinguish metrics from other services."),
    44  			docs.FieldAdvanced("flush_period", "The period of time between PutMetricData requests."),
    45  			pathMappingDocs(true, false),
    46  		}, session.FieldSpecs()...),
    47  	}
    48  
    49  	Constructors[TypeCloudWatch] = TypeSpec{
    50  		constructor: NewCloudWatch,
    51  		Status:      docs.StatusDeprecated,
    52  		Summary: `
    53  Send metrics to AWS CloudWatch using the PutMetricData endpoint.`,
    54  		Description: `
    55  ## Alternatives
    56  
    57  This metrics type has been renamed to ` + "[`aws_cloudwatch`](/docs/components/metrics/aws_cloudwatch)" + `.
    58  
    59  It is STRONGLY recommended that you reduce the metrics that are exposed with a
    60  ` + "`path_mapping`" + ` like this:
    61  
    62  ` + "```yaml" + `
    63  metrics:
    64    aws_cloudwatch:
    65      namespace: Foo
    66      path_mapping: |
    67        if ![
    68          "input.received",
    69          "input.latency",
    70          "output.sent",
    71        ].contains(this) { deleted() }
    72  ` + "```" + ``,
    73  		FieldSpecs: append(docs.FieldSpecs{
    74  			docs.FieldCommon("namespace", "The namespace used to distinguish metrics from other services."),
    75  			docs.FieldAdvanced("flush_period", "The period of time between PutMetricData requests."),
    76  			pathMappingDocs(true, false),
    77  		}, session.FieldSpecs()...),
    78  	}
    79  }
    80  
    81  //------------------------------------------------------------------------------
    82  
    83  // CloudWatchConfig contains config fields for the CloudWatch metrics type.
    84  type CloudWatchConfig struct {
    85  	session.Config `json:",inline" yaml:",inline"`
    86  	Namespace      string `json:"namespace" yaml:"namespace"`
    87  	FlushPeriod    string `json:"flush_period" yaml:"flush_period"`
    88  	PathMapping    string `json:"path_mapping" yaml:"path_mapping"`
    89  }
    90  
    91  // NewCloudWatchConfig creates an CloudWatchConfig struct with default values.
    92  func NewCloudWatchConfig() CloudWatchConfig {
    93  	return CloudWatchConfig{
    94  		Config:      session.NewConfig(),
    95  		Namespace:   "Benthos",
    96  		FlushPeriod: "100ms",
    97  		PathMapping: "",
    98  	}
    99  }
   100  
   101  //------------------------------------------------------------------------------
   102  
   103  const maxCloudWatchMetrics = 20
   104  const maxCloudWatchValues = 150
   105  const maxCloudWatchDimensions = 10
   106  
   107  type cloudWatchDatum struct {
   108  	MetricName string
   109  	Unit       string
   110  	Dimensions []*cloudwatch.Dimension
   111  	Timestamp  time.Time
   112  	Value      int64
   113  	Values     map[int64]int64
   114  }
   115  
   116  type cloudWatchStat struct {
   117  	root       *CloudWatch
   118  	id         string
   119  	name       string
   120  	unit       string
   121  	dimensions []*cloudwatch.Dimension
   122  }
   123  
   124  // Trims a map of datum values to a ceiling. The primary goal here is to be fast
   125  // and efficient rather than accurately preserving the most common values.
   126  func trimValuesMap(m map[int64]int64) {
   127  	ceiling := maxCloudWatchValues
   128  
   129  	// Start off by randomly removing values that have been seen only once.
   130  	for k, v := range m {
   131  		if len(m) <= ceiling {
   132  			// If we reach our ceiling already then we're done.
   133  			return
   134  		}
   135  		if v == 1 {
   136  			delete(m, k)
   137  		}
   138  	}
   139  
   140  	// Next, randomly remove any values until ceiling is hit.
   141  	for k := range m {
   142  		if len(m) <= ceiling {
   143  			return
   144  		}
   145  		delete(m, k)
   146  	}
   147  }
   148  
   149  func (c *cloudWatchStat) appendValue(v int64) {
   150  	c.root.datumLock.Lock()
   151  	existing := c.root.datumses[c.id]
   152  	if existing == nil {
   153  		existing = &cloudWatchDatum{
   154  			MetricName: c.name,
   155  			Unit:       c.unit,
   156  			Dimensions: c.dimensions,
   157  			Timestamp:  time.Now(),
   158  			Values:     map[int64]int64{v: 1},
   159  		}
   160  		c.root.datumses[c.id] = existing
   161  	} else {
   162  		tally := existing.Values[v]
   163  		existing.Values[v] = tally + 1
   164  		if len(existing.Values) > maxCloudWatchValues*5 {
   165  			trimValuesMap(existing.Values)
   166  		}
   167  	}
   168  	c.root.datumLock.Unlock()
   169  }
   170  
   171  func (c *cloudWatchStat) addValue(v int64) {
   172  	c.root.datumLock.Lock()
   173  	existing := c.root.datumses[c.id]
   174  	if existing == nil {
   175  		existing = &cloudWatchDatum{
   176  			MetricName: c.name,
   177  			Unit:       c.unit,
   178  			Dimensions: c.dimensions,
   179  			Timestamp:  time.Now(),
   180  			Value:      v,
   181  		}
   182  		c.root.datumses[c.id] = existing
   183  	} else {
   184  		existing.Value += v
   185  	}
   186  	c.root.datumLock.Unlock()
   187  }
   188  
   189  // Incr increments a metric by an amount.
   190  func (c *cloudWatchStat) Incr(count int64) error {
   191  	c.addValue(count)
   192  	return nil
   193  }
   194  
   195  // Decr decrements a metric by an amount.
   196  func (c *cloudWatchStat) Decr(count int64) error {
   197  	c.addValue(-count)
   198  	return nil
   199  }
   200  
   201  // Timing sets a timing metric.
   202  func (c *cloudWatchStat) Timing(delta int64) error {
   203  	// Most granular value for timing metrics in cloudwatch is microseconds
   204  	// versus nanoseconds.
   205  	c.appendValue(delta / 1000)
   206  	return nil
   207  }
   208  
   209  // Set sets a gauge metric.
   210  func (c *cloudWatchStat) Set(value int64) error {
   211  	c.appendValue(value)
   212  	return nil
   213  }
   214  
   215  type cloudWatchStatVec struct {
   216  	root       *CloudWatch
   217  	name       string
   218  	unit       string
   219  	labelNames []string
   220  }
   221  
   222  func (c *cloudWatchStatVec) with(labelValues ...string) *cloudWatchStat {
   223  	lDim := len(c.labelNames)
   224  	if lDim >= maxCloudWatchDimensions {
   225  		lDim = maxCloudWatchDimensions
   226  	}
   227  	dimensions := make([]*cloudwatch.Dimension, lDim)
   228  	for i, k := range c.labelNames {
   229  		if len(labelValues) <= i || i >= maxCloudWatchDimensions {
   230  			break
   231  		}
   232  		dimensions[i] = &cloudwatch.Dimension{
   233  			Name:  aws.String(k),
   234  			Value: aws.String(labelValues[i]),
   235  		}
   236  	}
   237  	return &cloudWatchStat{
   238  		root:       c.root,
   239  		id:         c.name + fmt.Sprintf("%v", labelValues),
   240  		name:       c.name,
   241  		unit:       c.unit,
   242  		dimensions: dimensions,
   243  	}
   244  }
   245  
   246  type cloudWatchCounterVec struct {
   247  	cloudWatchStatVec
   248  }
   249  
   250  func (c *cloudWatchCounterVec) With(labelValues ...string) StatCounter {
   251  	return c.with(labelValues...)
   252  }
   253  
   254  type cloudWatchTimerVec struct {
   255  	cloudWatchStatVec
   256  }
   257  
   258  func (c *cloudWatchTimerVec) With(labelValues ...string) StatTimer {
   259  	return c.with(labelValues...)
   260  }
   261  
   262  type cloudWatchGaugeVec struct {
   263  	cloudWatchStatVec
   264  }
   265  
   266  func (c *cloudWatchGaugeVec) With(labelValues ...string) StatGauge {
   267  	return c.with(labelValues...)
   268  }
   269  
   270  //------------------------------------------------------------------------------
   271  
   272  // CloudWatch is a stats object with capability to hold internal stats as a JSON
   273  // endpoint.
   274  type CloudWatch struct {
   275  	client cloudwatchiface.CloudWatchAPI
   276  
   277  	datumses  map[string]*cloudWatchDatum
   278  	datumLock *sync.Mutex
   279  
   280  	flushPeriod time.Duration
   281  
   282  	ctx    context.Context
   283  	cancel func()
   284  
   285  	pathMapping *pathMapping
   286  	config      CloudWatchConfig
   287  	log         log.Modular
   288  }
   289  
   290  // NewAWSCloudWatch creates and returns a new CloudWatch object.
   291  func NewAWSCloudWatch(config Config, opts ...func(Type)) (Type, error) {
   292  	return newCloudWatch(config.AWSCloudWatch, opts...)
   293  }
   294  
   295  // NewCloudWatch creates and returns a new CloudWatch object.
   296  func NewCloudWatch(config Config, opts ...func(Type)) (Type, error) {
   297  	return newCloudWatch(config.CloudWatch, opts...)
   298  }
   299  
   300  func newCloudWatch(config CloudWatchConfig, opts ...func(Type)) (Type, error) {
   301  	c := &CloudWatch{
   302  		config:    config,
   303  		datumses:  map[string]*cloudWatchDatum{},
   304  		datumLock: &sync.Mutex{},
   305  		log:       log.Noop(),
   306  	}
   307  
   308  	c.ctx, c.cancel = context.WithCancel(context.Background())
   309  	for _, opt := range opts {
   310  		opt(c)
   311  	}
   312  
   313  	var err error
   314  	if c.pathMapping, err = newPathMapping(config.PathMapping, c.log); err != nil {
   315  		return nil, fmt.Errorf("failed to init path mapping: %v", err)
   316  	}
   317  
   318  	sess, err := config.GetSession()
   319  	if err != nil {
   320  		return nil, err
   321  	}
   322  
   323  	if c.flushPeriod, err = time.ParseDuration(config.FlushPeriod); err != nil {
   324  		return nil, fmt.Errorf("failed to parse flush period: %v", err)
   325  	}
   326  
   327  	c.client = cloudwatch.New(sess)
   328  	go c.loop()
   329  	return c, nil
   330  }
   331  
   332  //------------------------------------------------------------------------------
   333  
   334  func (c *CloudWatch) toCMName(dotSepName string) (outPath string, labelNames, labelValues []string) {
   335  	return c.pathMapping.mapPathWithTags(dotSepName)
   336  }
   337  
   338  // GetCounter returns a stat counter object for a path.
   339  func (c *CloudWatch) GetCounter(path string) StatCounter {
   340  	name, labels, values := c.toCMName(path)
   341  	if name == "" {
   342  		return DudStat{}
   343  	}
   344  	if len(labels) == 0 {
   345  		return &cloudWatchStat{
   346  			root: c,
   347  			id:   name,
   348  			name: name,
   349  			unit: cloudwatch.StandardUnitCount,
   350  		}
   351  	}
   352  	return (&cloudWatchCounterVec{
   353  		cloudWatchStatVec: cloudWatchStatVec{
   354  			root:       c,
   355  			name:       name,
   356  			unit:       cloudwatch.StandardUnitCount,
   357  			labelNames: labels,
   358  		},
   359  	}).With(values...)
   360  }
   361  
   362  // GetCounterVec returns a stat counter object for a path with the labels
   363  func (c *CloudWatch) GetCounterVec(path string, n []string) StatCounterVec {
   364  	name, labels, values := c.toCMName(path)
   365  	if name == "" {
   366  		return fakeCounterVec(func([]string) StatCounter {
   367  			return DudStat{}
   368  		})
   369  	}
   370  	if len(labels) > 0 {
   371  		labels = append(labels, n...)
   372  		return fakeCounterVec(func(vs []string) StatCounter {
   373  			fvs := append([]string{}, values...)
   374  			fvs = append(fvs, vs...)
   375  			return (&cloudWatchCounterVec{
   376  				cloudWatchStatVec: cloudWatchStatVec{
   377  					root:       c,
   378  					name:       name,
   379  					unit:       cloudwatch.StandardUnitCount,
   380  					labelNames: labels,
   381  				},
   382  			}).With(fvs...)
   383  		})
   384  	}
   385  	return &cloudWatchCounterVec{
   386  		cloudWatchStatVec: cloudWatchStatVec{
   387  			root:       c,
   388  			name:       name,
   389  			unit:       cloudwatch.StandardUnitCount,
   390  			labelNames: n,
   391  		},
   392  	}
   393  }
   394  
   395  // GetTimer returns a stat timer object for a path.
   396  func (c *CloudWatch) GetTimer(path string) StatTimer {
   397  	name, labels, values := c.toCMName(path)
   398  	if name == "" {
   399  		return DudStat{}
   400  	}
   401  	if len(labels) == 0 {
   402  		return &cloudWatchStat{
   403  			root: c,
   404  			id:   name,
   405  			name: name,
   406  			unit: cloudwatch.StandardUnitMicroseconds,
   407  		}
   408  	}
   409  	return (&cloudWatchTimerVec{
   410  		cloudWatchStatVec: cloudWatchStatVec{
   411  			root:       c,
   412  			name:       name,
   413  			unit:       cloudwatch.StandardUnitMicroseconds,
   414  			labelNames: labels,
   415  		},
   416  	}).With(values...)
   417  }
   418  
   419  // GetTimerVec returns a stat timer object for a path with the labels
   420  func (c *CloudWatch) GetTimerVec(path string, n []string) StatTimerVec {
   421  	name, labels, values := c.toCMName(path)
   422  	if name == "" {
   423  		return fakeTimerVec(func([]string) StatTimer {
   424  			return DudStat{}
   425  		})
   426  	}
   427  	if len(labels) > 0 {
   428  		labels = append(labels, n...)
   429  		return fakeTimerVec(func(vs []string) StatTimer {
   430  			fvs := append([]string{}, values...)
   431  			fvs = append(fvs, vs...)
   432  			return (&cloudWatchTimerVec{
   433  				cloudWatchStatVec: cloudWatchStatVec{
   434  					root:       c,
   435  					name:       name,
   436  					unit:       cloudwatch.StandardUnitMicroseconds,
   437  					labelNames: labels,
   438  				},
   439  			}).With(fvs...)
   440  		})
   441  	}
   442  	return &cloudWatchTimerVec{
   443  		cloudWatchStatVec: cloudWatchStatVec{
   444  			root:       c,
   445  			name:       name,
   446  			unit:       cloudwatch.StandardUnitMicroseconds,
   447  			labelNames: n,
   448  		},
   449  	}
   450  }
   451  
   452  // GetGauge returns a stat gauge object for a path.
   453  func (c *CloudWatch) GetGauge(path string) StatGauge {
   454  	name, labels, values := c.toCMName(path)
   455  	if name == "" {
   456  		return DudStat{}
   457  	}
   458  	if len(labels) == 0 {
   459  		return &cloudWatchStat{
   460  			root: c,
   461  			id:   name,
   462  			name: name,
   463  			unit: cloudwatch.StandardUnitNone,
   464  		}
   465  	}
   466  	return (&cloudWatchGaugeVec{
   467  		cloudWatchStatVec: cloudWatchStatVec{
   468  			root:       c,
   469  			name:       name,
   470  			unit:       cloudwatch.StandardUnitNone,
   471  			labelNames: labels,
   472  		},
   473  	}).With(values...)
   474  }
   475  
   476  // GetGaugeVec returns a stat timer object for a path with the labels
   477  func (c *CloudWatch) GetGaugeVec(path string, n []string) StatGaugeVec {
   478  	name, labels, values := c.toCMName(path)
   479  	if name == "" {
   480  		return fakeGaugeVec(func([]string) StatGauge {
   481  			return DudStat{}
   482  		})
   483  	}
   484  	if len(labels) > 0 {
   485  		labels = append(labels, n...)
   486  		return fakeGaugeVec(func(vs []string) StatGauge {
   487  			fvs := append([]string{}, values...)
   488  			fvs = append(fvs, vs...)
   489  			return (&cloudWatchGaugeVec{
   490  				cloudWatchStatVec: cloudWatchStatVec{
   491  					root:       c,
   492  					name:       name,
   493  					unit:       cloudwatch.StandardUnitNone,
   494  					labelNames: labels,
   495  				},
   496  			}).With(fvs...)
   497  		})
   498  	}
   499  	return &cloudWatchGaugeVec{
   500  		cloudWatchStatVec: cloudWatchStatVec{
   501  			root:       c,
   502  			name:       name,
   503  			unit:       cloudwatch.StandardUnitNone,
   504  			labelNames: n,
   505  		},
   506  	}
   507  }
   508  
   509  //------------------------------------------------------------------------------
   510  
   511  func (c *CloudWatch) loop() {
   512  	ticker := time.NewTicker(c.flushPeriod)
   513  	defer ticker.Stop()
   514  	for {
   515  		select {
   516  		case <-c.ctx.Done():
   517  			return
   518  		case <-ticker.C:
   519  			c.flush()
   520  		}
   521  	}
   522  }
   523  
   524  func valuesMapToSlices(m map[int64]int64) (values, counts []*float64) {
   525  	ceiling := maxCloudWatchValues
   526  	lM := len(m)
   527  
   528  	useCounts := false
   529  	if lM < ceiling {
   530  		values = make([]*float64, 0, lM)
   531  		counts = make([]*float64, 0, lM)
   532  
   533  		for k, v := range m {
   534  			values = append(values, aws.Float64(float64(k)))
   535  			counts = append(counts, aws.Float64(float64(v)))
   536  			if v > 1 {
   537  				useCounts = true
   538  			}
   539  		}
   540  
   541  		if !useCounts {
   542  			counts = nil
   543  		}
   544  		return
   545  	}
   546  
   547  	values = make([]*float64, 0, ceiling)
   548  	counts = make([]*float64, 0, ceiling)
   549  
   550  	// Try and make our target without taking values with one count.
   551  	for k, v := range m {
   552  		if len(values) == ceiling {
   553  			return
   554  		}
   555  		if v > 1 {
   556  			values = append(values, aws.Float64(float64(k)))
   557  			counts = append(counts, aws.Float64(float64(v)))
   558  			useCounts = true
   559  			delete(m, k)
   560  		}
   561  	}
   562  
   563  	// Otherwise take randomly.
   564  	for k, v := range m {
   565  		if len(values) == ceiling {
   566  			break
   567  		}
   568  		values = append(values, aws.Float64(float64(k)))
   569  		counts = append(counts, aws.Float64(float64(v)))
   570  	}
   571  
   572  	if !useCounts {
   573  		counts = nil
   574  	}
   575  	return
   576  }
   577  
   578  func (c *CloudWatch) flush() error {
   579  	c.datumLock.Lock()
   580  	datumMap := c.datumses
   581  	c.datumses = map[string]*cloudWatchDatum{}
   582  	c.datumLock.Unlock()
   583  
   584  	datums := []*cloudwatch.MetricDatum{}
   585  	for _, v := range datumMap {
   586  		if v != nil {
   587  			d := cloudwatch.MetricDatum{
   588  				MetricName: &v.MetricName,
   589  				Dimensions: v.Dimensions,
   590  				Unit:       &v.Unit,
   591  				Timestamp:  &v.Timestamp,
   592  			}
   593  			if len(v.Values) > 0 {
   594  				d.Values, d.Counts = valuesMapToSlices(v.Values)
   595  			} else {
   596  				d.Value = aws.Float64(float64(v.Value))
   597  			}
   598  			datums = append(datums, &d)
   599  		}
   600  	}
   601  
   602  	input := cloudwatch.PutMetricDataInput{
   603  		Namespace:  &c.config.Namespace,
   604  		MetricData: datums,
   605  	}
   606  
   607  	throttled := false
   608  	for len(input.MetricData) > 0 {
   609  		if !throttled {
   610  			if len(datums) > maxCloudWatchMetrics {
   611  				input.MetricData, datums = datums[:maxCloudWatchMetrics], datums[maxCloudWatchMetrics:]
   612  			} else {
   613  				datums = nil
   614  			}
   615  		}
   616  		throttled = false
   617  
   618  		if _, err := c.client.PutMetricData(&input); err != nil {
   619  			if request.IsErrorThrottle(err) {
   620  				throttled = true
   621  				c.log.Warnln("Metrics request was throttled. Either increase flush period or reduce number of services sending metrics.")
   622  			} else {
   623  				c.log.Errorf("Failed to send metric data: %v\n", err)
   624  			}
   625  			select {
   626  			case <-time.After(time.Second):
   627  			case <-c.ctx.Done():
   628  				return types.ErrTimeout
   629  			}
   630  		}
   631  
   632  		if !throttled {
   633  			input.MetricData = datums
   634  		}
   635  	}
   636  
   637  	return nil
   638  }
   639  
   640  //------------------------------------------------------------------------------
   641  
   642  // SetLogger sets the logger used to print connection errors.
   643  func (c *CloudWatch) SetLogger(log log.Modular) {
   644  	c.log = log
   645  }
   646  
   647  // Close stops the CloudWatch object from aggregating metrics and cleans up
   648  // resources.
   649  func (c *CloudWatch) Close() error {
   650  	c.cancel()
   651  	c.flush()
   652  	return nil
   653  }
   654  
   655  //------------------------------------------------------------------------------