bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cloudwatch/cloudwatch_test.go

bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cloudwatch/cloudwatch_test.go (about)

     1  package cloudwatch
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  	"sync"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/aws/aws-sdk-go/aws/awserr"
    11  	"github.com/aws/aws-sdk-go/service/cloudwatch"
    12  	"github.com/aws/aws-sdk-go/service/cloudwatch/cloudwatchiface"
    13  )
    14  
    15  const namespace = "AWS/Kafka"
    16  const metric = "CpuSystem"
    17  const region = "eu-west-1"
    18  const profile = "default"
    19  const largeCluster = 95
    20  const smallCluster = 15
    21  const expansionLimit = 100
    22  const pagesLimit = 10
    23  
    24  // Singleton in real function prevents injection of appropriate mocks
    25  func MockGetContextWithProvider(p ProfileProvider) Context {
    26  	context = &cloudWatchContext{
    27  		profileProvider: p,
    28  		profiles:        make(map[string]cloudwatchiface.CloudWatchAPI),
    29  		ExpansionLimit:  expansionLimit,
    30  		PagesLimit:      pagesLimit,
    31  	}
    32  	return context
    33  }
    34  
    35  type slowProfileProvider struct {
    36  	callCount int
    37  }
    38  
    39  func (s *slowProfileProvider) NewProfile(name, region string) cloudwatchiface.CloudWatchAPI {
    40  	s.callCount += 1
    41  	time.Sleep(3 * time.Second)
    42  	return &cloudwatch.CloudWatch{}
    43  }
    44  
    45  func TestGetProfilOnlyCalledOnce(t *testing.T) {
    46  	wg := sync.WaitGroup{}
    47  	provider := &slowProfileProvider{}
    48  
    49  	for i := 0; i < 3; i++ {
    50  		wg.Add(1)
    51  		go func() {
    52  			defer wg.Done()
    53  			ctx, _ := GetContextWithProvider(provider).(*cloudWatchContext)
    54  			ctx.getProfile("fake-profile", "fake-region")
    55  		}()
    56  	}
    57  
    58  	wg.Wait()
    59  
    60  	if provider.callCount != 1 {
    61  		t.Errorf("Expected one call to NewProfile, got %d", provider.callCount)
    62  	}
    63  }
    64  
    65  type mockProfileProvider struct {
    66  }
    67  
    68  type mockCloudWatchClient struct {
    69  	cloudwatchiface.CloudWatchAPI
    70  }
    71  
    72  func (m *mockProfileProvider) NewProfile(name, region string) cloudwatchiface.CloudWatchAPI {
    73  	return &mockCloudWatchClient{}
    74  }
    75  
    76  func (c mockCloudWatchClient) ListMetricsPages(li *cloudwatch.ListMetricsInput, callback func(*cloudwatch.ListMetricsOutput, bool) bool) error {
    77  	var metrics []*cloudwatch.Metric
    78  	var n = metric
    79  	var ns = namespace
    80  
    81  	clusters := make(map[string]int)
    82  	if li.Dimensions == nil || (li.Dimensions != nil && *li.Dimensions[0].Value == "big") {
    83  		clusters["big"] = largeCluster
    84  	}
    85  
    86  	if li.Dimensions == nil || (li.Dimensions != nil && *li.Dimensions[0].Value == "small") {
    87  		clusters["small"] = smallCluster
    88  	}
    89  
    90  	for name, size := range clusters {
    91  		for i := 0; i < size; i++ {
    92  			dn := "Broker ID"
    93  			dv := strconv.Itoa(i)
    94  			dim := cloudwatch.Dimension{
    95  				Name:  &dn,
    96  				Value: &dv,
    97  			}
    98  			dimensions := []*cloudwatch.Dimension{&dim}
    99  
   100  			cn := "Cluster Name"
   101  			cv := name
   102  			cdim := cloudwatch.Dimension{
   103  				Name:  &cn,
   104  				Value: &cv,
   105  			}
   106  			dimensions = append(dimensions, &cdim)
   107  			metric := cloudwatch.Metric{
   108  				Dimensions: dimensions,
   109  				MetricName: &n,
   110  				Namespace:  &ns,
   111  			}
   112  			metrics = append(metrics, &metric)
   113  		}
   114  
   115  		// Some aws metrics are logged with varying number of dimensions, to differentiate between cluster
   116  		// level and node level values. The below adds a cluster only metric to test this case
   117  		cn := "Cluster Name"
   118  		cv := name
   119  		dimensions := []*cloudwatch.Dimension{{
   120  			Name:  &cn,
   121  			Value: &cv,
   122  		}}
   123  		metric := cloudwatch.Metric{
   124  			Dimensions: dimensions,
   125  			MetricName: &n,
   126  			Namespace:  &ns,
   127  		}
   128  		metrics = append(metrics, &metric)
   129  	}
   130  
   131  	lmo := &cloudwatch.ListMetricsOutput{
   132  		Metrics:   metrics,
   133  		NextToken: nil,
   134  	}
   135  	callback(lmo, true)
   136  	return nil
   137  }
   138  
   139  func (c mockCloudWatchClient) GetMetricData(input *cloudwatch.GetMetricDataInput) (*cloudwatch.GetMetricDataOutput, error) {
   140  	var mdr []*cloudwatch.MetricDataResult
   141  	cwo := &cloudwatch.GetMetricDataOutput{
   142  		Messages:          nil,
   143  		MetricDataResults: mdr,
   144  		NextToken:         nil,
   145  	}
   146  
   147  	if len(input.MetricDataQueries) == 0 {
   148  		return cwo, nil
   149  	}
   150  
   151  	for i := 0; i < 10; i++ {
   152  		id := fmt.Sprintf("q{i}")
   153  		m := cloudwatch.MetricDataResult{
   154  			Id:         &id,
   155  			Label:      nil,
   156  			Messages:   nil,
   157  			StatusCode: nil,
   158  			Timestamps: nil,
   159  			Values:     nil,
   160  		}
   161  
   162  		mdr = append(mdr, &m)
   163  	}
   164  
   165  	cwo.MetricDataResults = mdr
   166  	return cwo, nil
   167  }
   168  
   169  // Mocks to simulate being rate limited and test error handling
   170  
   171  type rateLimitedProfileProvider struct {
   172  }
   173  type mockCloudWatchRateLimitedClient struct {
   174  	cloudwatchiface.CloudWatchAPI
   175  }
   176  
   177  func (m *rateLimitedProfileProvider) NewProfile(name, region string) cloudwatchiface.CloudWatchAPI {
   178  	return &mockCloudWatchRateLimitedClient{}
   179  }
   180  
   181  func (m *mockCloudWatchRateLimitedClient) GetMetricData(input *cloudwatch.GetMetricDataInput) (*cloudwatch.GetMetricDataOutput, error) {
   182  	e := fmt.Errorf("Rate Limit exceeded")
   183  	ae := awserr.New("429", "Rate Limited Exceeded", e)
   184  	return nil, awserr.NewRequestFailure(ae, 429, "a5442de54s5454")
   185  }
   186  
   187  func (c mockCloudWatchRateLimitedClient) ListMetricsPages(li *cloudwatch.ListMetricsInput, callback func(*cloudwatch.ListMetricsOutput, bool) bool) error {
   188  	e := fmt.Errorf("Rate Limit exceeded")
   189  	ae := awserr.New("429", "Rate Limited Exceeded", e)
   190  	return awserr.NewRequestFailure(ae, 429, "a5442de54s5454")
   191  }
   192  
   193  // -----
   194  
   195  // Mocks for checking paging behaviour
   196  
   197  type pagingProfileProvider struct {
   198  }
   199  type mockCloudWatchPagingClient struct {
   200  	cloudwatchiface.CloudWatchAPI
   201  }
   202  
   203  func (m *pagingProfileProvider) NewProfile(name, region string) cloudwatchiface.CloudWatchAPI {
   204  	return &mockCloudWatchPagingClient{}
   205  }
   206  
   207  func (c mockCloudWatchPagingClient) ListMetricsPages(li *cloudwatch.ListMetricsInput, callback func(*cloudwatch.ListMetricsOutput, bool) bool) error {
   208  	var metrics []*cloudwatch.Metric
   209  	lmo := &cloudwatch.ListMetricsOutput{
   210  		Metrics:   metrics,
   211  		NextToken: nil,
   212  	}
   213  	p := 0
   214  	for callback(lmo, p == pagesLimit) {
   215  		p++
   216  	}
   217  	return nil
   218  }
   219  
   220  // ----------------------------
   221  
   222  func TestLookupDimensions(t *testing.T) {
   223  	c := MockGetContextWithProvider(&mockProfileProvider{})
   224  
   225  	lr := LookupRequest{
   226  		Region:     region,
   227  		Namespace:  namespace,
   228  		Metric:     metric,
   229  		Dimensions: nil,
   230  		Profile:    profile,
   231  	}
   232  
   233  	var tests = []struct {
   234  		dims  [][]Dimension
   235  		count int
   236  		e     error
   237  	}{
   238  		{[][]Dimension{{
   239  			Dimension{
   240  				Name:  "Broker ID",
   241  				Value: "*",
   242  			}, Dimension{
   243  				Name:  "Cluster Name",
   244  				Value: "*",
   245  			},
   246  		}}, 0, ErrExpansionLimit},
   247  		{[][]Dimension{{
   248  			Dimension{
   249  				Name:  "Broker ID",
   250  				Value: "*",
   251  			}, Dimension{
   252  				Name:  "Cluster Name",
   253  				Value: "big",
   254  			},
   255  		}}, largeCluster, nil},
   256  		{[][]Dimension{{
   257  			Dimension{
   258  				Name:  "Broker ID",
   259  				Value: "*",
   260  			}, Dimension{
   261  				Name:  "Cluster Name",
   262  				Value: "small",
   263  			},
   264  		}}, smallCluster, nil},
   265  		{[][]Dimension{{
   266  			Dimension{
   267  				Name:  "Irrelevant Dimension",
   268  				Value: "1234",
   269  			}, Dimension{
   270  				Name:  "Cluster Name",
   271  				Value: "small",
   272  			},
   273  		}}, 0, nil},
   274  	}
   275  	for _, test := range tests {
   276  		lr.Dimensions = test.dims
   277  		res, err := c.LookupDimensions(&lr)
   278  
   279  		if err != test.e {
   280  			t.Error(err)
   281  		}
   282  		if len(res) != test.count {
   283  			t.Errorf("Did not get expected count, wanted %d got %d", test.count, len(res))
   284  		}
   285  	}
   286  
   287  }
   288  
   289  func TestLookupPageLimit(t *testing.T) {
   290  	c := MockGetContextWithProvider(&pagingProfileProvider{})
   291  
   292  	lr := LookupRequest{
   293  		Region:     region,
   294  		Namespace:  namespace,
   295  		Metric:     metric,
   296  		Dimensions: nil,
   297  		Profile:    profile,
   298  	}
   299  
   300  	_, err := c.LookupDimensions(&lr)
   301  	if err != ErrPagingLimit {
   302  		t.Error("Should have failed from hitting expansion limit")
   303  	}
   304  }
   305  
   306  func TestLookupDimensionsError(t *testing.T) {
   307  	c := MockGetContextWithProvider(&rateLimitedProfileProvider{})
   308  	dims := [][]Dimension{{
   309  		Dimension{
   310  			Name:  "Broker ID",
   311  			Value: "*",
   312  		}, Dimension{
   313  			Name:  "Cluster Name",
   314  			Value: "*",
   315  		}}}
   316  
   317  	lr := LookupRequest{
   318  		Region:     region,
   319  		Namespace:  namespace,
   320  		Metric:     metric,
   321  		Dimensions: dims,
   322  		Profile:    profile,
   323  	}
   324  	_, err := c.LookupDimensions(&lr)
   325  	if err == nil {
   326  		t.Error("Error did not bubble up correctly")
   327  	}
   328  }
   329  
   330  func TestQuery(t *testing.T) {
   331  	c := MockGetContextWithProvider(&mockProfileProvider{})
   332  	start := time.Date(2018, time.January, 1, 0, 0, 0, 0, time.UTC)
   333  	end := time.Date(2018, time.January, 1, 1, 0, 0, 0, time.UTC)
   334  
   335  	dims := [][]Dimension{{
   336  		Dimension{
   337  			Name:  "Broker ID",
   338  			Value: "*",
   339  		}, Dimension{
   340  			Name:  "Cluster Name",
   341  			Value: "grappler-msk-A",
   342  		}}}
   343  
   344  	tests := []struct {
   345  		r    Request
   346  		err  error
   347  		size int
   348  	}{
   349  		{
   350  			r: Request{
   351  				Start:      &start,
   352  				End:        &end,
   353  				Region:     region,
   354  				Namespace:  namespace,
   355  				Metric:     metric,
   356  				Period:     60,
   357  				Statistic:  "Sum",
   358  				Dimensions: dims,
   359  				Profile:    profile,
   360  			},
   361  			err:  nil,
   362  			size: 10,
   363  		},
   364  		{
   365  			r: Request{
   366  				Start:      &start,
   367  				End:        &end,
   368  				Region:     region,
   369  				Namespace:  namespace,
   370  				Metric:     metric,
   371  				Period:     60,
   372  				Statistic:  "Sum",
   373  				Dimensions: nil,
   374  				Profile:    profile,
   375  			},
   376  			err:  nil,
   377  			size: 10,
   378  		},
   379  		{
   380  			r: Request{
   381  				Start:      &start,
   382  				End:        &end,
   383  				Region:     region,
   384  				Namespace:  namespace,
   385  				Metric:     metric,
   386  				Period:     0,
   387  				Statistic:  "Sum",
   388  				Dimensions: nil,
   389  				Profile:    profile,
   390  			},
   391  			err:  ErrInvalidPeriod,
   392  			size: 0,
   393  		},
   394  	}
   395  	for _, test := range tests {
   396  		res, err := c.Query(&test.r)
   397  		if err != test.err {
   398  			t.Errorf("Query failed, expect error to be %v, got %v", test.err, err)
   399  		}
   400  		if len(res.Raw.MetricDataResults) != test.size {
   401  			t.Errorf("Query returned wrong number of results, expected %d, got %d", test.size, len(res.Raw.MetricDataResults))
   402  		}
   403  	}
   404  
   405  }
   406  
   407  func TestQueryError(t *testing.T) {
   408  	c := MockGetContextWithProvider(&rateLimitedProfileProvider{})
   409  	start := time.Date(2018, time.January, 1, 0, 0, 0, 0, time.UTC)
   410  	end := time.Date(2018, time.January, 1, 1, 0, 0, 0, time.UTC)
   411  
   412  	dims := [][]Dimension{{
   413  		Dimension{
   414  			Name:  "Broker ID",
   415  			Value: "*",
   416  		}, Dimension{
   417  			Name:  "Cluster Name",
   418  			Value: "grappler-msk-A",
   419  		}}}
   420  
   421  	r := Request{
   422  		Start:      &start,
   423  		End:        &end,
   424  		Region:     region,
   425  		Namespace:  namespace,
   426  		Metric:     metric,
   427  		Period:     60,
   428  		Statistic:  "Sum",
   429  		Dimensions: dims,
   430  		Profile:    profile,
   431  	}
   432  	_, err := c.Query(&r)
   433  	if err == nil {
   434  		t.Error("Error did not bubble properly", err)
   435  	}
   436  
   437  }
   438  
   439  func TestFilterDimensions(t *testing.T) {
   440  
   441  	metric := "FreeableMemory"
   442  	namespace := "AWS/ElastiCache"
   443  
   444  	d1 := "CacheClusterId"
   445  	v1 := "grappler-cluster-1"
   446  
   447  	d2 := "CacheNodeId"
   448  	v2 := "0001"
   449  
   450  	v3 := "not-cluster-1"
   451  
   452  	wildcards := make(Wildcards)
   453  	wildcards[d1] = "grappler-cluster-1"
   454  	wildcards[d2] = "0*"
   455  
   456  	// set dimensions that are present in the query and we expect to be in results set
   457  	ds := make(DimensionSet)
   458  	ds[d1] = true
   459  	ds[d2] = true
   460  
   461  	// example of elasticache node level metric
   462  	metric1 := cloudwatch.Metric{
   463  		Dimensions: []*cloudwatch.Dimension{
   464  			{
   465  				Name:  &d1,
   466  				Value: &v1,
   467  			},
   468  			{
   469  				Name:  &d2,
   470  				Value: &v2,
   471  			},
   472  		},
   473  		MetricName: &metric,
   474  		Namespace:  &namespace,
   475  	}
   476  
   477  	// cluster level metric
   478  	metric2 := cloudwatch.Metric{
   479  
   480  		Dimensions: []*cloudwatch.Dimension{
   481  			{
   482  				Name:  &d1,
   483  				Value: &v1,
   484  			}},
   485  		MetricName: &metric,
   486  		Namespace:  &namespace,
   487  	}
   488  
   489  	// account level metric
   490  	metric3 := cloudwatch.Metric{
   491  
   492  		Dimensions: nil,
   493  		MetricName: &metric,
   494  		Namespace:  &namespace,
   495  	}
   496  
   497  	// different cluster than the one we're searching for
   498  	metric4 := cloudwatch.Metric{
   499  		Dimensions: []*cloudwatch.Dimension{
   500  			{
   501  				Name:  &d1,
   502  				Value: &v3,
   503  			},
   504  			{
   505  				Name:  &d2,
   506  				Value: &v2,
   507  			},
   508  		},
   509  		MetricName: &metric,
   510  		Namespace:  &namespace,
   511  	}
   512  
   513  	metrics := []*cloudwatch.Metric{&metric1, &metric2, &metric3, &metric4}
   514  
   515  	m, err := filterDimensions(metrics, wildcards, ds, expansionLimit)
   516  	if err != nil {
   517  		t.Error(err)
   518  	}
   519  	// only  the node level metric should match the filter criteria
   520  	if len(m) != 1 || m[0][0].Value != v1 || m[0][1].Value != v2 {
   521  		t.Error("Filter didn't select correct metric")
   522  	}
   523  
   524  }
   525  
   526  func TestCacheKeyMatch(t *testing.T) {
   527  	start := time.Date(2018, 7, 4, 17, 0, 0, 0, time.UTC)
   528  	end := time.Date(2018, 7, 4, 18, 0, 0, 0, time.UTC)
   529  	var tests = []struct {
   530  		req Request
   531  		key string
   532  	}{
   533  		{req: Request{
   534  			Start:     &start,
   535  			End:       &end,
   536  			Region:    "eu-west-1",
   537  			Namespace: "AWS/EC2",
   538  			Metric:    "CPUUtilization",
   539  			Period:    60, Statistic: "Sum",
   540  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   541  			Profile:         "prod",
   542  		},
   543  			key: "cloudwatch-1530723600-1530727200-eu-west-1-AWS/EC2-CPUUtilization-60-Sum-InstanceId:i-0106b4d25c54baac7-prod"},
   544  	}
   545  
   546  	for _, u := range tests {
   547  		calculatedKey := u.req.CacheKey()
   548  		if u.key != calculatedKey {
   549  			t.Errorf("Cache key doesn't match, expected '%s' got '%s' ", u.key, calculatedKey)
   550  		}
   551  	}
   552  
   553  }
   554  
   555  func TestCacheKeyMisMatch(t *testing.T) {
   556  
   557  	start := time.Date(2018, 7, 4, 17, 0, 0, 0, time.UTC)
   558  	end := time.Date(2018, 7, 4, 18, 0, 0, 0, time.UTC)
   559  	exampleRequest := Request{
   560  		Start:           &start,
   561  		End:             &end,
   562  		Region:          "eu-west-1",
   563  		Namespace:       "AWS/EC2",
   564  		Metric:          "CPUUtilization",
   565  		Period:          60,
   566  		Statistic:       "Sum",
   567  		DimensionString: "InstanceId:i-0106b4d25c54baac7",
   568  		Profile:         "prod",
   569  	}
   570  
   571  	exampleKey := exampleRequest.CacheKey()
   572  
   573  	variantStart := time.Date(2018, 7, 4, 17, 30, 0, 0, time.UTC)
   574  	variantEnd := time.Date(2018, 7, 4, 18, 30, 0, 0, time.UTC)
   575  	var tests = []Request{
   576  		{
   577  			Start:           &start,
   578  			End:             &end,
   579  			Region:          "eu-west-1",
   580  			Namespace:       "AWS/EC2",
   581  			Metric:          "CPUUtilization",
   582  			Period:          60,
   583  			Statistic:       "Sum",
   584  			DimensionString: "InstanceId:i-0106b4d25*",
   585  			Profile:         "prod",
   586  		},
   587  		{
   588  			Start:           &variantStart,
   589  			End:             &end,
   590  			Region:          "eu-west-1",
   591  			Namespace:       "AWS/EC2",
   592  			Metric:          "CPUUtilization",
   593  			Period:          60,
   594  			Statistic:       "Sum",
   595  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   596  			Profile:         "prod",
   597  		},
   598  		{
   599  			Start:           &start,
   600  			End:             &variantEnd,
   601  			Region:          "eu-west-1",
   602  			Namespace:       "AWS/EC2",
   603  			Metric:          "CPUUtilization",
   604  			Period:          60,
   605  			Statistic:       "Sum",
   606  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   607  			Profile:         "prod",
   608  		},
   609  		{
   610  			Start:           &start,
   611  			End:             &end,
   612  			Region:          "eu-central-1",
   613  			Namespace:       "AWS/EC2",
   614  			Metric:          "CPUUtilization",
   615  			Period:          60,
   616  			Statistic:       "Sum",
   617  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   618  			Profile:         "prod",
   619  		},
   620  		{
   621  			Start:           &start,
   622  			End:             &end,
   623  			Region:          "eu-west-1",
   624  			Namespace:       "AWS/ECS",
   625  			Metric:          "CPUUtilization",
   626  			Period:          60,
   627  			Statistic:       "Sum",
   628  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   629  			Profile:         "prod",
   630  		},
   631  		{
   632  			Start:           &start,
   633  			End:             &end,
   634  			Region:          "eu-west-1",
   635  			Namespace:       "AWS/EC2",
   636  			Metric:          "MemoryUsage",
   637  			Period:          60,
   638  			Statistic:       "Sum",
   639  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   640  			Profile:         "prod",
   641  		},
   642  		{
   643  			Start:           &start,
   644  			End:             &end,
   645  			Region:          "eu-west-1",
   646  			Namespace:       "AWS/EC2",
   647  			Metric:          "CPUUtilization",
   648  			Period:          300,
   649  			Statistic:       "Sum",
   650  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   651  			Profile:         "prod",
   652  		},
   653  		{
   654  			Start:           &start,
   655  			End:             &end,
   656  			Region:          "eu-west-1",
   657  			Namespace:       "AWS/EC2",
   658  			Metric:          "CPUUtilization",
   659  			Period:          60,
   660  			Statistic:       "Avg",
   661  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   662  			Profile:         "prod",
   663  		},
   664  		{
   665  			Start:           &start,
   666  			End:             &end,
   667  			Region:          "eu-west-1",
   668  			Namespace:       "AWS/EC2",
   669  			Metric:          "CPUUtilization",
   670  			Period:          300,
   671  			Statistic:       "Sum",
   672  			DimensionString: "InstanceId:i-01064646d6d6baac7",
   673  			Profile:         "prod",
   674  		},
   675  		{
   676  			Start:           &start,
   677  			End:             &end,
   678  			Region:          "eu-west-1",
   679  			Namespace:       "AWS/EC2",
   680  			Metric:          "CPUUtilization",
   681  			Period:          60,
   682  			Statistic:       "Sum",
   683  			DimensionString: "InstanceId:i-0106b4d25c54baac7",
   684  			Profile:         "sandbox",
   685  		},
   686  	}
   687  	for _, u := range tests {
   688  		calculatedKey := u.CacheKey()
   689  		if exampleKey == calculatedKey {
   690  			t.Errorf("Calculated key shouldn't match example but does. '%s' == '%s' ", calculatedKey, exampleKey)
   691  		}
   692  	}
   693  }