bosun.org@v0.0.0-20250213104149-b8d3e981f37d/cloudwatch/cloudwatch.go (about)

     1  // Package cloudwatch defines structures for interacting with Cloudwatch Metrics.
     2  package cloudwatch // import "bosun.org/cloudwatch"
     3  
     4  import (
     5  	"errors"
     6  	"fmt"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"bosun.org/opentsdb"
    12  	"bosun.org/slog"
    13  	"github.com/aws/aws-sdk-go/aws"
    14  	"github.com/aws/aws-sdk-go/aws/session"
    15  	cw "github.com/aws/aws-sdk-go/service/cloudwatch"
    16  	cwi "github.com/aws/aws-sdk-go/service/cloudwatch/cloudwatchiface"
    17  	"github.com/ryanuber/go-glob"
    18  )
    19  
    20  var (
    21  	once    sync.Once
    22  	context Context
    23  )
    24  
    25  const DefaultConcurrency = 4
    26  const DefaultPageLimit = 10
    27  const DefaultExpansionLimit = 500
    28  
    29  var ErrExpansionLimit = errors.New("Hit dimension expansion limit")
    30  var ErrPagingLimit = errors.New("Hit the page limit when retrieving metrics")
    31  var ErrInvalidPeriod = errors.New("Period must be greater than 0")
    32  
    33  // Request holds query objects. Currently only absolute times are supported.
    34  type Request struct {
    35  	Start           *time.Time
    36  	End             *time.Time
    37  	Region          string
    38  	Namespace       string
    39  	Metric          string
    40  	Period          int64
    41  	Statistic       string
    42  	DimensionString string
    43  	Dimensions      [][]Dimension
    44  	Profile         string
    45  }
    46  type LookupRequest struct {
    47  	Region     string
    48  	Namespace  string
    49  	Metric     string
    50  	Dimensions [][]Dimension
    51  	Profile    string
    52  }
    53  
    54  type Response struct {
    55  	Raw    cw.GetMetricDataOutput
    56  	TagSet map[string]opentsdb.TagSet
    57  }
    58  
    59  type Series struct {
    60  	Datapoints []DataPoint
    61  	Label      string
    62  }
    63  
    64  type DataPoint struct {
    65  	Aggregator string
    66  	Timestamp  string
    67  	Unit       string
    68  }
    69  
    70  type Dimension struct {
    71  	Name  string
    72  	Value string
    73  }
    74  
    75  type Wildcards map[string]string
    76  
    77  type DimensionSet map[string]bool
    78  
    79  func (d Dimension) String() string {
    80  	return fmt.Sprintf("%s:%s", d.Name, d.Value)
    81  }
    82  
    83  type DimensionList struct {
    84  	Groups [][]Dimension
    85  }
    86  
    87  func (r *Request) CacheKey() string {
    88  	return fmt.Sprintf("cloudwatch-%d-%d-%s-%s-%s-%d-%s-%s-%s",
    89  		r.Start.Unix(),
    90  		r.End.Unix(),
    91  		r.Region,
    92  		r.Namespace,
    93  		r.Metric,
    94  		r.Period,
    95  		r.Statistic,
    96  		r.DimensionString,
    97  		r.Profile,
    98  	)
    99  }
   100  
   101  // Context is the interface for querying CloudWatch.
   102  type Context interface {
   103  	Query(*Request) (Response, error)
   104  	LookupDimensions(request *LookupRequest) ([][]Dimension, error)
   105  	GetExpansionLimit() int
   106  	GetPagesLimit() int
   107  	GetConcurrency() int
   108  }
   109  
   110  type cloudWatchContext struct {
   111  	profileProvider ProfileProvider
   112  	profiles        map[string]cwi.CloudWatchAPI
   113  	profilesLock    sync.RWMutex
   114  	ExpansionLimit  int
   115  	PagesLimit      int
   116  	Concurrency     int
   117  }
   118  
   119  type ProfileProvider interface {
   120  	NewProfile(name, region string) cwi.CloudWatchAPI
   121  }
   122  
   123  type profileProvider struct{}
   124  
   125  func (p profileProvider) NewProfile(name, region string) cwi.CloudWatchAPI {
   126  	enableVerboseLogging := true
   127  	conf := aws.Config{
   128  		CredentialsChainVerboseErrors: &enableVerboseLogging,
   129  		Region:                        aws.String(region),
   130  	}
   131  
   132  	sess, err := session.NewSessionWithOptions(session.Options{
   133  		Profile: name,
   134  		Config:  conf,
   135  		// Force enable Shared Config support
   136  		SharedConfigState: session.SharedConfigEnable,
   137  	})
   138  
   139  	if err != nil {
   140  		slog.Error(err.Error())
   141  	}
   142  
   143  	return cw.New(sess)
   144  }
   145  
   146  // getProfile returns a previously created profile or creates a new one for the given profile name and region
   147  func (c *cloudWatchContext) getProfile(awsProfileName, region string) cwi.CloudWatchAPI {
   148  	var fullProfileName string
   149  
   150  	if awsProfileName == "default" {
   151  		fullProfileName = "bosun-default"
   152  	} else {
   153  		fullProfileName = fmt.Sprintf("user-%s", awsProfileName)
   154  	}
   155  
   156  	fullProfileName = fmt.Sprintf("%s-%s", fullProfileName, region)
   157  
   158  	// We don't want to concurrently modify the c.profiles map
   159  	c.profilesLock.Lock()
   160  	defer c.profilesLock.Unlock()
   161  
   162  	if cwAPI, ok := c.profiles[fullProfileName]; ok {
   163  		return cwAPI
   164  	}
   165  
   166  	cwAPI := c.profileProvider.NewProfile(awsProfileName, region)
   167  	c.profiles[fullProfileName] = cwAPI
   168  
   169  	return cwAPI
   170  }
   171  
   172  func (c *cloudWatchContext) GetPagesLimit() int {
   173  	if c.PagesLimit == 0 {
   174  		return DefaultPageLimit
   175  	} else {
   176  		return c.PagesLimit
   177  
   178  	}
   179  }
   180  
   181  func (c *cloudWatchContext) GetExpansionLimit() int {
   182  	if c.ExpansionLimit == 0 {
   183  		return DefaultExpansionLimit
   184  	} else {
   185  		return c.ExpansionLimit
   186  
   187  	}
   188  }
   189  
   190  func (c *cloudWatchContext) GetConcurrency() int {
   191  	if c.Concurrency == 0 {
   192  		return DefaultConcurrency
   193  	} else {
   194  		return c.Concurrency
   195  
   196  	}
   197  }
   198  
   199  func GetContext() Context {
   200  	return GetContextWithProvider(profileProvider{})
   201  }
   202  
   203  func GetContextWithProvider(p ProfileProvider) Context {
   204  	once.Do(func() {
   205  		context = &cloudWatchContext{
   206  			profileProvider: p,
   207  			profiles:        make(map[string]cwi.CloudWatchAPI),
   208  		}
   209  	})
   210  	return context
   211  }
   212  
   213  func buildQuery(r *Request, id string, dimensions []Dimension) cw.MetricDataQuery {
   214  	awsPeriod := r.Period
   215  	d := make([]*cw.Dimension, 0)
   216  
   217  	for _, i := range dimensions {
   218  		n := i.Name
   219  		v := i.Value
   220  		d = append(d, &cw.Dimension{Name: &n, Value: &v})
   221  	}
   222  
   223  	metric := cw.Metric{
   224  		Dimensions: d,
   225  		MetricName: &r.Metric,
   226  		Namespace:  &r.Namespace,
   227  	}
   228  	stat := cw.MetricStat{
   229  		Metric: &metric,
   230  		Period: &awsPeriod,
   231  		Stat:   &r.Statistic,
   232  		Unit:   nil,
   233  	}
   234  
   235  	returndata := true
   236  	dq := cw.MetricDataQuery{
   237  		Expression: nil,
   238  		Id:         &id,
   239  		Label:      nil,
   240  		MetricStat: &stat,
   241  		Period:     nil,
   242  		ReturnData: &returndata,
   243  	}
   244  	return dq
   245  }
   246  
   247  func buildTags(dims []Dimension) opentsdb.TagSet {
   248  	var tags opentsdb.TagSet
   249  
   250  	tags = make(opentsdb.TagSet)
   251  	for _, d := range dims {
   252  		tags[d.Name] = d.Value
   253  	}
   254  
   255  	return tags
   256  }
   257  
   258  // Query performs a CloudWatch request to aws.
   259  func (c *cloudWatchContext) Query(r *Request) (Response, error) {
   260  	var response Response
   261  	var dqs []*cw.MetricDataQuery
   262  	var tagSet = make(map[string]opentsdb.TagSet)
   263  	var id string
   264  
   265  	api := c.getProfile(r.Profile, r.Region)
   266  
   267  	if r.Period <= 0 {
   268  		return response, ErrInvalidPeriod
   269  	}
   270  	// custom metrics can have no dimensions
   271  	if len(r.Dimensions) == 0 {
   272  		id = fmt.Sprintf("q0")
   273  		dq := buildQuery(r, id, nil)
   274  		dqs = append(dqs, &dq)
   275  		tagSet[id] = buildTags(nil)
   276  	} else {
   277  		for i, j := range r.Dimensions {
   278  			id = fmt.Sprintf("q%d", i)
   279  			dq := buildQuery(r, id, j)
   280  			dqs = append(dqs, &dq)
   281  			tagSet[id] = buildTags(j)
   282  		}
   283  	}
   284  
   285  	q := &cw.GetMetricDataInput{
   286  		EndTime:           aws.Time(*r.End),
   287  		MaxDatapoints:     nil,
   288  		MetricDataQueries: dqs,
   289  		NextToken:         nil,
   290  		ScanBy:            nil,
   291  		StartTime:         aws.Time(*r.Start),
   292  	}
   293  
   294  	resp, err := api.GetMetricData(q)
   295  	if err != nil {
   296  		// Print the error, cast err to awserr.Error to get the Code and
   297  		// Message from an error.
   298  		slog.Error(err.Error())
   299  		return response, err
   300  	}
   301  	response.Raw = *resp
   302  	response.TagSet = tagSet
   303  	return response, nil
   304  
   305  }
   306  
   307  func match(d *cw.Dimension, wc Wildcards) bool {
   308  	if len(*d.Value) == 0 {
   309  		return false
   310  	}
   311  	return glob.Glob(wc[*d.Name], *d.Value)
   312  }
   313  
   314  func filter(metric *cw.Metric, wildcard Wildcards, dimSet DimensionSet) (matches int, dims []Dimension) {
   315  	matches = 0
   316  	dl := make([]Dimension, 0)
   317  
   318  	for _, dim := range metric.Dimensions {
   319  		// if the metric contains a dimension that isn't in the list
   320  		// we searched for we should skip it.
   321  		if !dimSet[*dim.Name] {
   322  			return 0, nil
   323  		}
   324  
   325  		if wildcard[*dim.Name] != "" {
   326  			if !match(dim, wildcard) {
   327  				return 0, nil
   328  			}
   329  			matches++
   330  		}
   331  
   332  		d := Dimension{
   333  			Name:  *dim.Name,
   334  			Value: *dim.Value,
   335  		}
   336  		dl = append(dl, d)
   337  	}
   338  	return matches, dl
   339  
   340  }
   341  
   342  func filterDimensions(metrics []*cw.Metric, wildcard Wildcards, ds DimensionSet, limit int) ([][]Dimension, error) {
   343  	dimensions := make([][]Dimension, 0)
   344  
   345  	for _, m := range metrics {
   346  		if len(m.Dimensions) == 0 {
   347  			continue
   348  		}
   349  		matches, dl := filter(m, wildcard, ds)
   350  		// all wildcard dimensions need to be present for it to count as match
   351  		if matches < len(wildcard) {
   352  			continue
   353  		}
   354  		dimensions = append(dimensions, dl)
   355  		if len(dimensions) >= limit {
   356  			return nil, ErrExpansionLimit
   357  		}
   358  	}
   359  	return dimensions, nil
   360  }
   361  
   362  // Query performs a CloudWatch request to aws.
   363  func (c *cloudWatchContext) LookupDimensions(lr *LookupRequest) ([][]Dimension, error) {
   364  	api := c.getProfile(lr.Profile, lr.Region)
   365  	var metrics []*cw.Metric
   366  	var literal []*cw.DimensionFilter
   367  	var wildcard = make(Wildcards)
   368  	var dimensionSet = make(DimensionSet)
   369  
   370  	for _, i := range lr.Dimensions {
   371  		for _, j := range i {
   372  			dimensionSet[j.Name] = true
   373  
   374  			if strings.Contains(j.Value, "*") {
   375  				wildcard[j.Name] = j.Value
   376  			} else {
   377  				name := j.Name
   378  				value := j.Value
   379  				literal = append(literal, &cw.DimensionFilter{
   380  					Name:  &name,
   381  					Value: &value,
   382  				})
   383  			}
   384  		}
   385  	}
   386  
   387  	mi := cw.ListMetricsInput{
   388  		Dimensions: literal,
   389  		MetricName: &lr.Metric,
   390  		Namespace:  &lr.Namespace,
   391  		NextToken:  nil,
   392  	}
   393  	pages := 0
   394  	limitHit := false
   395  	err := api.ListMetricsPages(&mi, func(mo *cw.ListMetricsOutput, lastPage bool) bool {
   396  		metrics = append(metrics, mo.Metrics...)
   397  		pages++
   398  		if pages > c.GetPagesLimit() {
   399  			limitHit = true
   400  			return false
   401  		}
   402  		return !lastPage
   403  	})
   404  
   405  	if limitHit {
   406  		return nil, ErrPagingLimit
   407  	}
   408  	if err != nil {
   409  		return nil, err
   410  	}
   411  
   412  	return filterDimensions(metrics, wildcard, dimensionSet, c.GetExpansionLimit())
   413  }