github.com/google/cloudprober@v0.11.3/surfacers/cloudwatch/cloudwatch.go (about) 1 // Copyright 2021 The Cloudprober Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 /* 16 Package cloudwatch implements a surfacer to export metrics to AWS Cloudwatch. 17 */ 18 package cloudwatch 19 20 import ( 21 "context" 22 "strconv" 23 "time" 24 25 "github.com/aws/aws-sdk-go/aws" 26 "github.com/aws/aws-sdk-go/aws/session" 27 "github.com/aws/aws-sdk-go/service/cloudwatch" 28 "github.com/google/cloudprober/logger" 29 "github.com/google/cloudprober/metrics" 30 31 configpb "github.com/google/cloudprober/surfacers/cloudwatch/proto" 32 "github.com/google/cloudprober/surfacers/common/options" 33 ) 34 35 // Cloudwatch API limit for metrics included in a PutMetricData call 36 const maxMetricDatums int = 20 37 38 // The dimension named used to identify distributions 39 const distributionDimensionName string = "le" 40 41 // CWSurfacer implements AWS Cloudwatch surfacer. 42 type CWSurfacer struct { 43 c *configpb.SurfacerConf 44 opts *options.Options 45 writeChan chan *metrics.EventMetrics 46 session *cloudwatch.CloudWatch 47 l *logger.Logger 48 49 // A cache of []*cloudwatch.MetricDatum's, used for batch writing to the 50 // cloudwatch api. 51 cwMetricDatumCache []*cloudwatch.MetricDatum 52 } 53 54 func (cw *CWSurfacer) processIncomingMetrics(ctx context.Context) { 55 for { 56 select { 57 case <-ctx.Done(): 58 cw.l.Infof("Context canceled, stopping the surfacer write loop") 59 return 60 case em := <-cw.writeChan: 61 cw.recordEventMetrics(em) 62 } 63 } 64 } 65 66 // recordEventMetrics takes an EventMetric, which can contain multiple metrics 67 // of varying types, and loops through each metric in the EventMetric, parsing 68 // each metric into a structure that is supported by Cloudwatch 69 func (cw *CWSurfacer) recordEventMetrics(em *metrics.EventMetrics) { 70 for _, metricKey := range em.MetricsKeys() { 71 if !cw.opts.AllowMetric(metricKey) { 72 continue 73 } 74 75 switch value := em.Metric(metricKey).(type) { 76 case metrics.NumValue: 77 cw.publishMetrics(cw.newCWMetricDatum(metricKey, value.Float64(), emLabelsToDimensions(em), em.Timestamp, em.LatencyUnit)) 78 79 case *metrics.Map: 80 for _, mapKey := range value.Keys() { 81 dimensions := emLabelsToDimensions(em) 82 dimensions = append(dimensions, &cloudwatch.Dimension{ 83 Name: aws.String(value.MapName), 84 Value: aws.String(mapKey), 85 }) 86 cw.publishMetrics(cw.newCWMetricDatum(metricKey, value.GetKey(mapKey).Float64(), dimensions, em.Timestamp, em.LatencyUnit)) 87 } 88 89 case *metrics.Distribution: 90 for i, distributionBound := range value.Data().LowerBounds { 91 dimensions := append(emLabelsToDimensions(em), &cloudwatch.Dimension{ 92 Name: aws.String(distributionDimensionName), 93 Value: aws.String(strconv.FormatFloat(distributionBound, 'f', -1, 64)), 94 }) 95 96 cw.publishMetrics(cw.newCWMetricDatum(metricKey, float64(value.Data().BucketCounts[i]), dimensions, em.Timestamp, em.LatencyUnit)) 97 } 98 } 99 } 100 } 101 102 // Publish the metrics to cloudwatch, using the namespace provided from 103 // configuration. 104 func (cw *CWSurfacer) publishMetrics(md *cloudwatch.MetricDatum) { 105 if len(cw.cwMetricDatumCache) >= maxMetricDatums { 106 _, err := cw.session.PutMetricData(&cloudwatch.PutMetricDataInput{ 107 Namespace: aws.String(cw.c.GetNamespace()), 108 MetricData: cw.cwMetricDatumCache, 109 }) 110 111 if err != nil { 112 cw.l.Errorf("Failed to publish metrics to cloudwatch: %s", err) 113 } 114 115 cw.cwMetricDatumCache = cw.cwMetricDatumCache[:0] 116 } 117 118 cw.cwMetricDatumCache = append(cw.cwMetricDatumCache, md) 119 } 120 121 // Create a new cloudwatch metriddatum using the values passed in. 122 func (cw *CWSurfacer) newCWMetricDatum(metricname string, value float64, dimensions []*cloudwatch.Dimension, timestamp time.Time, latencyUnit time.Duration) *cloudwatch.MetricDatum { 123 // define the metric datum with default values 124 metricDatum := cloudwatch.MetricDatum{ 125 Dimensions: dimensions, 126 MetricName: aws.String(metricname), 127 Value: aws.Float64(value), 128 StorageResolution: aws.Int64(cw.c.GetResolution()), 129 Timestamp: aws.Time(timestamp), 130 Unit: aws.String(cloudwatch.StandardUnitCount), 131 } 132 133 // the cloudwatch api will throw warnings when a timeseries has multiple 134 // units, to avoid this always calculate the value as milliseconds. 135 if metricname == "latency" { 136 metricDatum.Unit = aws.String(cloudwatch.StandardUnitMilliseconds) 137 metricDatum.Value = aws.Float64(value * float64(latencyUnit) / float64(time.Millisecond)) 138 } 139 140 return &metricDatum 141 } 142 143 // Take metric labels from an event metric and parse them into a Cloudwatch Dimension struct. 144 func emLabelsToDimensions(em *metrics.EventMetrics) []*cloudwatch.Dimension { 145 dimensions := []*cloudwatch.Dimension{} 146 147 for _, k := range em.LabelsKeys() { 148 dimensions = append(dimensions, &cloudwatch.Dimension{ 149 Name: aws.String(k), 150 Value: aws.String(em.Label(k)), 151 }) 152 } 153 154 return dimensions 155 } 156 157 // New creates a new instance of a cloudwatch surfacer, based on the config 158 // passed in. It then hands off to a goroutine to surface metrics to cloudwatch 159 // across a buffered channel. 160 func New(ctx context.Context, config *configpb.SurfacerConf, opts *options.Options, l *logger.Logger) (*CWSurfacer, error) { 161 162 sess := session.Must(session.NewSessionWithOptions(session.Options{ 163 SharedConfigState: session.SharedConfigEnable, 164 })) 165 166 cw := &CWSurfacer{ 167 c: config, 168 opts: opts, 169 writeChan: make(chan *metrics.EventMetrics, opts.MetricsBufferSize), 170 session: cloudwatch.New(sess), 171 l: l, 172 } 173 174 // Set the capacity of this slice to the max metric value, to avoid having to 175 // grow the slice. 176 cw.cwMetricDatumCache = make([]*cloudwatch.MetricDatum, 0, maxMetricDatums) 177 178 go cw.processIncomingMetrics(ctx) 179 180 cw.l.Info("Initialised Cloudwatch surfacer") 181 return cw, nil 182 } 183 184 // Write is a function defined to comply with the surfacer interface, and enables the 185 // cloudwatch surfacer to receive EventMetrics over the buffered channel. 186 func (cw *CWSurfacer) Write(ctx context.Context, em *metrics.EventMetrics) { 187 select { 188 case cw.writeChan <- em: 189 default: 190 cw.l.Error("Surfacer's write channel is full, dropping new data.") 191 } 192 }