bosun.org@v0.0.0-20250213104149-b8d3e981f37d/cloudwatch/cloudwatch.go (about) 1 // Package cloudwatch defines structures for interacting with Cloudwatch Metrics. 2 package cloudwatch // import "bosun.org/cloudwatch" 3 4 import ( 5 "errors" 6 "fmt" 7 "strings" 8 "sync" 9 "time" 10 11 "bosun.org/opentsdb" 12 "bosun.org/slog" 13 "github.com/aws/aws-sdk-go/aws" 14 "github.com/aws/aws-sdk-go/aws/session" 15 cw "github.com/aws/aws-sdk-go/service/cloudwatch" 16 cwi "github.com/aws/aws-sdk-go/service/cloudwatch/cloudwatchiface" 17 "github.com/ryanuber/go-glob" 18 ) 19 20 var ( 21 once sync.Once 22 context Context 23 ) 24 25 const DefaultConcurrency = 4 26 const DefaultPageLimit = 10 27 const DefaultExpansionLimit = 500 28 29 var ErrExpansionLimit = errors.New("Hit dimension expansion limit") 30 var ErrPagingLimit = errors.New("Hit the page limit when retrieving metrics") 31 var ErrInvalidPeriod = errors.New("Period must be greater than 0") 32 33 // Request holds query objects. Currently only absolute times are supported. 34 type Request struct { 35 Start *time.Time 36 End *time.Time 37 Region string 38 Namespace string 39 Metric string 40 Period int64 41 Statistic string 42 DimensionString string 43 Dimensions [][]Dimension 44 Profile string 45 } 46 type LookupRequest struct { 47 Region string 48 Namespace string 49 Metric string 50 Dimensions [][]Dimension 51 Profile string 52 } 53 54 type Response struct { 55 Raw cw.GetMetricDataOutput 56 TagSet map[string]opentsdb.TagSet 57 } 58 59 type Series struct { 60 Datapoints []DataPoint 61 Label string 62 } 63 64 type DataPoint struct { 65 Aggregator string 66 Timestamp string 67 Unit string 68 } 69 70 type Dimension struct { 71 Name string 72 Value string 73 } 74 75 type Wildcards map[string]string 76 77 type DimensionSet map[string]bool 78 79 func (d Dimension) String() string { 80 return fmt.Sprintf("%s:%s", d.Name, d.Value) 81 } 82 83 type DimensionList struct { 84 Groups [][]Dimension 85 } 86 87 func (r *Request) CacheKey() string { 88 return fmt.Sprintf("cloudwatch-%d-%d-%s-%s-%s-%d-%s-%s-%s", 89 r.Start.Unix(), 90 r.End.Unix(), 91 r.Region, 92 r.Namespace, 93 r.Metric, 94 r.Period, 95 r.Statistic, 96 r.DimensionString, 97 r.Profile, 98 ) 99 } 100 101 // Context is the interface for querying CloudWatch. 102 type Context interface { 103 Query(*Request) (Response, error) 104 LookupDimensions(request *LookupRequest) ([][]Dimension, error) 105 GetExpansionLimit() int 106 GetPagesLimit() int 107 GetConcurrency() int 108 } 109 110 type cloudWatchContext struct { 111 profileProvider ProfileProvider 112 profiles map[string]cwi.CloudWatchAPI 113 profilesLock sync.RWMutex 114 ExpansionLimit int 115 PagesLimit int 116 Concurrency int 117 } 118 119 type ProfileProvider interface { 120 NewProfile(name, region string) cwi.CloudWatchAPI 121 } 122 123 type profileProvider struct{} 124 125 func (p profileProvider) NewProfile(name, region string) cwi.CloudWatchAPI { 126 enableVerboseLogging := true 127 conf := aws.Config{ 128 CredentialsChainVerboseErrors: &enableVerboseLogging, 129 Region: aws.String(region), 130 } 131 132 sess, err := session.NewSessionWithOptions(session.Options{ 133 Profile: name, 134 Config: conf, 135 // Force enable Shared Config support 136 SharedConfigState: session.SharedConfigEnable, 137 }) 138 139 if err != nil { 140 slog.Error(err.Error()) 141 } 142 143 return cw.New(sess) 144 } 145 146 // getProfile returns a previously created profile or creates a new one for the given profile name and region 147 func (c *cloudWatchContext) getProfile(awsProfileName, region string) cwi.CloudWatchAPI { 148 var fullProfileName string 149 150 if awsProfileName == "default" { 151 fullProfileName = "bosun-default" 152 } else { 153 fullProfileName = fmt.Sprintf("user-%s", awsProfileName) 154 } 155 156 fullProfileName = fmt.Sprintf("%s-%s", fullProfileName, region) 157 158 // We don't want to concurrently modify the c.profiles map 159 c.profilesLock.Lock() 160 defer c.profilesLock.Unlock() 161 162 if cwAPI, ok := c.profiles[fullProfileName]; ok { 163 return cwAPI 164 } 165 166 cwAPI := c.profileProvider.NewProfile(awsProfileName, region) 167 c.profiles[fullProfileName] = cwAPI 168 169 return cwAPI 170 } 171 172 func (c *cloudWatchContext) GetPagesLimit() int { 173 if c.PagesLimit == 0 { 174 return DefaultPageLimit 175 } else { 176 return c.PagesLimit 177 178 } 179 } 180 181 func (c *cloudWatchContext) GetExpansionLimit() int { 182 if c.ExpansionLimit == 0 { 183 return DefaultExpansionLimit 184 } else { 185 return c.ExpansionLimit 186 187 } 188 } 189 190 func (c *cloudWatchContext) GetConcurrency() int { 191 if c.Concurrency == 0 { 192 return DefaultConcurrency 193 } else { 194 return c.Concurrency 195 196 } 197 } 198 199 func GetContext() Context { 200 return GetContextWithProvider(profileProvider{}) 201 } 202 203 func GetContextWithProvider(p ProfileProvider) Context { 204 once.Do(func() { 205 context = &cloudWatchContext{ 206 profileProvider: p, 207 profiles: make(map[string]cwi.CloudWatchAPI), 208 } 209 }) 210 return context 211 } 212 213 func buildQuery(r *Request, id string, dimensions []Dimension) cw.MetricDataQuery { 214 awsPeriod := r.Period 215 d := make([]*cw.Dimension, 0) 216 217 for _, i := range dimensions { 218 n := i.Name 219 v := i.Value 220 d = append(d, &cw.Dimension{Name: &n, Value: &v}) 221 } 222 223 metric := cw.Metric{ 224 Dimensions: d, 225 MetricName: &r.Metric, 226 Namespace: &r.Namespace, 227 } 228 stat := cw.MetricStat{ 229 Metric: &metric, 230 Period: &awsPeriod, 231 Stat: &r.Statistic, 232 Unit: nil, 233 } 234 235 returndata := true 236 dq := cw.MetricDataQuery{ 237 Expression: nil, 238 Id: &id, 239 Label: nil, 240 MetricStat: &stat, 241 Period: nil, 242 ReturnData: &returndata, 243 } 244 return dq 245 } 246 247 func buildTags(dims []Dimension) opentsdb.TagSet { 248 var tags opentsdb.TagSet 249 250 tags = make(opentsdb.TagSet) 251 for _, d := range dims { 252 tags[d.Name] = d.Value 253 } 254 255 return tags 256 } 257 258 // Query performs a CloudWatch request to aws. 259 func (c *cloudWatchContext) Query(r *Request) (Response, error) { 260 var response Response 261 var dqs []*cw.MetricDataQuery 262 var tagSet = make(map[string]opentsdb.TagSet) 263 var id string 264 265 api := c.getProfile(r.Profile, r.Region) 266 267 if r.Period <= 0 { 268 return response, ErrInvalidPeriod 269 } 270 // custom metrics can have no dimensions 271 if len(r.Dimensions) == 0 { 272 id = fmt.Sprintf("q0") 273 dq := buildQuery(r, id, nil) 274 dqs = append(dqs, &dq) 275 tagSet[id] = buildTags(nil) 276 } else { 277 for i, j := range r.Dimensions { 278 id = fmt.Sprintf("q%d", i) 279 dq := buildQuery(r, id, j) 280 dqs = append(dqs, &dq) 281 tagSet[id] = buildTags(j) 282 } 283 } 284 285 q := &cw.GetMetricDataInput{ 286 EndTime: aws.Time(*r.End), 287 MaxDatapoints: nil, 288 MetricDataQueries: dqs, 289 NextToken: nil, 290 ScanBy: nil, 291 StartTime: aws.Time(*r.Start), 292 } 293 294 resp, err := api.GetMetricData(q) 295 if err != nil { 296 // Print the error, cast err to awserr.Error to get the Code and 297 // Message from an error. 298 slog.Error(err.Error()) 299 return response, err 300 } 301 response.Raw = *resp 302 response.TagSet = tagSet 303 return response, nil 304 305 } 306 307 func match(d *cw.Dimension, wc Wildcards) bool { 308 if len(*d.Value) == 0 { 309 return false 310 } 311 return glob.Glob(wc[*d.Name], *d.Value) 312 } 313 314 func filter(metric *cw.Metric, wildcard Wildcards, dimSet DimensionSet) (matches int, dims []Dimension) { 315 matches = 0 316 dl := make([]Dimension, 0) 317 318 for _, dim := range metric.Dimensions { 319 // if the metric contains a dimension that isn't in the list 320 // we searched for we should skip it. 321 if !dimSet[*dim.Name] { 322 return 0, nil 323 } 324 325 if wildcard[*dim.Name] != "" { 326 if !match(dim, wildcard) { 327 return 0, nil 328 } 329 matches++ 330 } 331 332 d := Dimension{ 333 Name: *dim.Name, 334 Value: *dim.Value, 335 } 336 dl = append(dl, d) 337 } 338 return matches, dl 339 340 } 341 342 func filterDimensions(metrics []*cw.Metric, wildcard Wildcards, ds DimensionSet, limit int) ([][]Dimension, error) { 343 dimensions := make([][]Dimension, 0) 344 345 for _, m := range metrics { 346 if len(m.Dimensions) == 0 { 347 continue 348 } 349 matches, dl := filter(m, wildcard, ds) 350 // all wildcard dimensions need to be present for it to count as match 351 if matches < len(wildcard) { 352 continue 353 } 354 dimensions = append(dimensions, dl) 355 if len(dimensions) >= limit { 356 return nil, ErrExpansionLimit 357 } 358 } 359 return dimensions, nil 360 } 361 362 // Query performs a CloudWatch request to aws. 363 func (c *cloudWatchContext) LookupDimensions(lr *LookupRequest) ([][]Dimension, error) { 364 api := c.getProfile(lr.Profile, lr.Region) 365 var metrics []*cw.Metric 366 var literal []*cw.DimensionFilter 367 var wildcard = make(Wildcards) 368 var dimensionSet = make(DimensionSet) 369 370 for _, i := range lr.Dimensions { 371 for _, j := range i { 372 dimensionSet[j.Name] = true 373 374 if strings.Contains(j.Value, "*") { 375 wildcard[j.Name] = j.Value 376 } else { 377 name := j.Name 378 value := j.Value 379 literal = append(literal, &cw.DimensionFilter{ 380 Name: &name, 381 Value: &value, 382 }) 383 } 384 } 385 } 386 387 mi := cw.ListMetricsInput{ 388 Dimensions: literal, 389 MetricName: &lr.Metric, 390 Namespace: &lr.Namespace, 391 NextToken: nil, 392 } 393 pages := 0 394 limitHit := false 395 err := api.ListMetricsPages(&mi, func(mo *cw.ListMetricsOutput, lastPage bool) bool { 396 metrics = append(metrics, mo.Metrics...) 397 pages++ 398 if pages > c.GetPagesLimit() { 399 limitHit = true 400 return false 401 } 402 return !lastPage 403 }) 404 405 if limitHit { 406 return nil, ErrPagingLimit 407 } 408 if err != nil { 409 return nil, err 410 } 411 412 return filterDimensions(metrics, wildcard, dimensionSet, c.GetExpansionLimit()) 413 }