github.com/google/cloudprober@v0.11.3/surfacers/cloudwatch/cloudwatch.go (about)

     1  // Copyright 2021 The Cloudprober Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  /*
    16  Package cloudwatch implements a surfacer to export metrics to AWS Cloudwatch.
    17  */
    18  package cloudwatch
    19  
    20  import (
    21  	"context"
    22  	"strconv"
    23  	"time"
    24  
    25  	"github.com/aws/aws-sdk-go/aws"
    26  	"github.com/aws/aws-sdk-go/aws/session"
    27  	"github.com/aws/aws-sdk-go/service/cloudwatch"
    28  	"github.com/google/cloudprober/logger"
    29  	"github.com/google/cloudprober/metrics"
    30  
    31  	configpb "github.com/google/cloudprober/surfacers/cloudwatch/proto"
    32  	"github.com/google/cloudprober/surfacers/common/options"
    33  )
    34  
    35  // Cloudwatch API limit for metrics included in a PutMetricData call
    36  const maxMetricDatums int = 20
    37  
    38  // The dimension named used to identify distributions
    39  const distributionDimensionName string = "le"
    40  
    41  // CWSurfacer implements AWS Cloudwatch surfacer.
    42  type CWSurfacer struct {
    43  	c         *configpb.SurfacerConf
    44  	opts      *options.Options
    45  	writeChan chan *metrics.EventMetrics
    46  	session   *cloudwatch.CloudWatch
    47  	l         *logger.Logger
    48  
    49  	// A cache of []*cloudwatch.MetricDatum's, used for batch writing to the
    50  	// cloudwatch api.
    51  	cwMetricDatumCache []*cloudwatch.MetricDatum
    52  }
    53  
    54  func (cw *CWSurfacer) processIncomingMetrics(ctx context.Context) {
    55  	for {
    56  		select {
    57  		case <-ctx.Done():
    58  			cw.l.Infof("Context canceled, stopping the surfacer write loop")
    59  			return
    60  		case em := <-cw.writeChan:
    61  			cw.recordEventMetrics(em)
    62  		}
    63  	}
    64  }
    65  
    66  // recordEventMetrics takes an EventMetric, which can contain multiple metrics
    67  // of varying types, and loops through each metric in the EventMetric, parsing
    68  // each metric into a structure that is supported by Cloudwatch
    69  func (cw *CWSurfacer) recordEventMetrics(em *metrics.EventMetrics) {
    70  	for _, metricKey := range em.MetricsKeys() {
    71  		if !cw.opts.AllowMetric(metricKey) {
    72  			continue
    73  		}
    74  
    75  		switch value := em.Metric(metricKey).(type) {
    76  		case metrics.NumValue:
    77  			cw.publishMetrics(cw.newCWMetricDatum(metricKey, value.Float64(), emLabelsToDimensions(em), em.Timestamp, em.LatencyUnit))
    78  
    79  		case *metrics.Map:
    80  			for _, mapKey := range value.Keys() {
    81  				dimensions := emLabelsToDimensions(em)
    82  				dimensions = append(dimensions, &cloudwatch.Dimension{
    83  					Name:  aws.String(value.MapName),
    84  					Value: aws.String(mapKey),
    85  				})
    86  				cw.publishMetrics(cw.newCWMetricDatum(metricKey, value.GetKey(mapKey).Float64(), dimensions, em.Timestamp, em.LatencyUnit))
    87  			}
    88  
    89  		case *metrics.Distribution:
    90  			for i, distributionBound := range value.Data().LowerBounds {
    91  				dimensions := append(emLabelsToDimensions(em), &cloudwatch.Dimension{
    92  					Name:  aws.String(distributionDimensionName),
    93  					Value: aws.String(strconv.FormatFloat(distributionBound, 'f', -1, 64)),
    94  				})
    95  
    96  				cw.publishMetrics(cw.newCWMetricDatum(metricKey, float64(value.Data().BucketCounts[i]), dimensions, em.Timestamp, em.LatencyUnit))
    97  			}
    98  		}
    99  	}
   100  }
   101  
   102  // Publish the metrics to cloudwatch, using the namespace provided from
   103  // configuration.
   104  func (cw *CWSurfacer) publishMetrics(md *cloudwatch.MetricDatum) {
   105  	if len(cw.cwMetricDatumCache) >= maxMetricDatums {
   106  		_, err := cw.session.PutMetricData(&cloudwatch.PutMetricDataInput{
   107  			Namespace:  aws.String(cw.c.GetNamespace()),
   108  			MetricData: cw.cwMetricDatumCache,
   109  		})
   110  
   111  		if err != nil {
   112  			cw.l.Errorf("Failed to publish metrics to cloudwatch: %s", err)
   113  		}
   114  
   115  		cw.cwMetricDatumCache = cw.cwMetricDatumCache[:0]
   116  	}
   117  
   118  	cw.cwMetricDatumCache = append(cw.cwMetricDatumCache, md)
   119  }
   120  
   121  // Create a new cloudwatch metriddatum using the values passed in.
   122  func (cw *CWSurfacer) newCWMetricDatum(metricname string, value float64, dimensions []*cloudwatch.Dimension, timestamp time.Time, latencyUnit time.Duration) *cloudwatch.MetricDatum {
   123  	// define the metric datum with default values
   124  	metricDatum := cloudwatch.MetricDatum{
   125  		Dimensions:        dimensions,
   126  		MetricName:        aws.String(metricname),
   127  		Value:             aws.Float64(value),
   128  		StorageResolution: aws.Int64(cw.c.GetResolution()),
   129  		Timestamp:         aws.Time(timestamp),
   130  		Unit:              aws.String(cloudwatch.StandardUnitCount),
   131  	}
   132  
   133  	// the cloudwatch api will throw warnings when a timeseries has multiple
   134  	// units, to avoid this always calculate the value as milliseconds.
   135  	if metricname == "latency" {
   136  		metricDatum.Unit = aws.String(cloudwatch.StandardUnitMilliseconds)
   137  		metricDatum.Value = aws.Float64(value * float64(latencyUnit) / float64(time.Millisecond))
   138  	}
   139  
   140  	return &metricDatum
   141  }
   142  
   143  // Take metric labels from an event metric and parse them into a Cloudwatch Dimension struct.
   144  func emLabelsToDimensions(em *metrics.EventMetrics) []*cloudwatch.Dimension {
   145  	dimensions := []*cloudwatch.Dimension{}
   146  
   147  	for _, k := range em.LabelsKeys() {
   148  		dimensions = append(dimensions, &cloudwatch.Dimension{
   149  			Name:  aws.String(k),
   150  			Value: aws.String(em.Label(k)),
   151  		})
   152  	}
   153  
   154  	return dimensions
   155  }
   156  
   157  // New creates a new instance of a cloudwatch surfacer, based on the config
   158  // passed in. It then hands off to a goroutine to surface metrics to cloudwatch
   159  // across a buffered channel.
   160  func New(ctx context.Context, config *configpb.SurfacerConf, opts *options.Options, l *logger.Logger) (*CWSurfacer, error) {
   161  
   162  	sess := session.Must(session.NewSessionWithOptions(session.Options{
   163  		SharedConfigState: session.SharedConfigEnable,
   164  	}))
   165  
   166  	cw := &CWSurfacer{
   167  		c:         config,
   168  		opts:      opts,
   169  		writeChan: make(chan *metrics.EventMetrics, opts.MetricsBufferSize),
   170  		session:   cloudwatch.New(sess),
   171  		l:         l,
   172  	}
   173  
   174  	// Set the capacity of this slice to the max metric value, to avoid having to
   175  	// grow the slice.
   176  	cw.cwMetricDatumCache = make([]*cloudwatch.MetricDatum, 0, maxMetricDatums)
   177  
   178  	go cw.processIncomingMetrics(ctx)
   179  
   180  	cw.l.Info("Initialised Cloudwatch surfacer")
   181  	return cw, nil
   182  }
   183  
   184  // Write is a function defined to comply with the surfacer interface, and enables the
   185  // cloudwatch surfacer to receive EventMetrics over the buffered channel.
   186  func (cw *CWSurfacer) Write(ctx context.Context, em *metrics.EventMetrics) {
   187  	select {
   188  	case cw.writeChan <- em:
   189  	default:
   190  		cw.l.Error("Surfacer's write channel is full, dropping new data.")
   191  	}
   192  }