bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/expr/cloudwatch.go (about)

     1  package expr
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"regexp"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"bosun.org/cloudwatch"
    12  	"bosun.org/cmd/bosun/expr/parse"
    13  	"bosun.org/models"
    14  	"bosun.org/opentsdb"
    15  )
    16  
    17  // cloudwatch defines functions for use with amazon cloudwatch api
    18  var CloudWatch = map[string]parse.Func{
    19  
    20  	"cw": {
    21  		Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeString,
    22  			models.TypeString, models.TypeString, models.TypeString, models.TypeString},
    23  		Return:        models.TypeSeriesSet,
    24  		Tags:          cloudwatchTagQuery,
    25  		F:             CloudWatchQuery,
    26  		PrefixEnabled: true,
    27  	},
    28  }
    29  
    30  var PeriodParseError = errors.New("Could not parse the period value")
    31  var StartParseError = errors.New("Could not parse the start value")
    32  var EndParseError = errors.New("Could not parse the end value")
    33  var DimensionParseError = errors.New("dimensions must be in format key:value")
    34  
    35  var isNumber = regexp.MustCompile("^\\d+$")
    36  
    37  func parseCloudWatchResponse(req *cloudwatch.Request, s *cloudwatch.Response, multiRegion bool) ([]*Result, error) {
    38  	const parseErrFmt = "cloudwatch ParseError (%s): %s"
    39  	var dps Series
    40  	if s == nil {
    41  		return nil, fmt.Errorf(parseErrFmt, req.Metric, "empty response")
    42  	}
    43  	results := make([]*Result, 0)
    44  
    45  	for _, result := range s.Raw.MetricDataResults {
    46  		if len(result.Timestamps) == 0 {
    47  			continue
    48  		}
    49  		tags := make(opentsdb.TagSet)
    50  		for k, v := range s.TagSet[*result.Id] {
    51  			tags[k] = v
    52  		}
    53  		dps = make(Series)
    54  		for x, t := range result.Timestamps {
    55  			dps[*t] = *result.Values[x]
    56  		}
    57  
    58  		r := Result{
    59  			Value: dps,
    60  			Group: tags,
    61  		}
    62  
    63  		if multiRegion {
    64  			r.Group["bosun-region"] = req.Region
    65  		}
    66  
    67  		results = append(results, &r)
    68  	}
    69  
    70  	return results, nil
    71  }
    72  
    73  func hasWildcardDimension(dimensions string) bool {
    74  	return strings.Contains(dimensions, "*")
    75  }
    76  
    77  func parseDimensions(dimensions string) ([][]cloudwatch.Dimension, error) {
    78  	dl := make([][]cloudwatch.Dimension, 0)
    79  	if len(strings.TrimSpace(dimensions)) == 0 {
    80  		return dl, nil
    81  	}
    82  	dims := strings.Split(dimensions, ",")
    83  
    84  	l := make([]cloudwatch.Dimension, 0)
    85  	for _, row := range dims {
    86  		dim := strings.Split(row, ":")
    87  		if len(dim) != 2 {
    88  			return nil, DimensionParseError
    89  		}
    90  		l = append(l, cloudwatch.Dimension{Name: dim[0], Value: dim[1]})
    91  	}
    92  	dl = append(dl, l)
    93  
    94  	return dl, nil
    95  }
    96  
    97  func parseDurations(s, e, p string) (start, end, period opentsdb.Duration, err error) {
    98  
    99  	start, err = opentsdb.ParseDuration(s)
   100  	if err != nil {
   101  		return start, end, period, StartParseError
   102  	}
   103  	end = opentsdb.Duration(0)
   104  	if e != "" {
   105  		end, err = opentsdb.ParseDuration(e)
   106  		if err != nil {
   107  			return start, end, period, EndParseError
   108  		}
   109  	}
   110  
   111  	// to maintain backwards compatibility assume that period without time unit is seconds
   112  	if isNumber.MatchString(p) {
   113  		p += "s"
   114  	}
   115  	period, err = opentsdb.ParseDuration(p)
   116  	if err != nil {
   117  		return start, end, period, PeriodParseError
   118  	}
   119  	return
   120  }
   121  
   122  func CloudWatchQuery(prefix string, e *State, region, namespace, metric, period, statistic, dimensions, sduration, eduration string) (*Results, error) {
   123  
   124  	r := new(Results)
   125  
   126  	regions := strings.Split(region, ",")
   127  	if len(regions) == 0 {
   128  		return r, nil
   129  	}
   130  
   131  	var wg sync.WaitGroup
   132  	queryResults := []*Results{}
   133  
   134  	// reqCh (Request Channel) is populated with cloudwatch requests for each region
   135  	reqCh := make(chan cloudwatch.Request, len(regions))
   136  	// resCh (Result Channel) contains the timeseries responses for requests for region
   137  	resCh := make(chan *Results, len(regions))
   138  	// errCh (Error Channel) contains any request errors
   139  	errCh := make(chan error, len(regions))
   140  
   141  	// a worker makes a getMetricData request for a region
   142  	worker := func() {
   143  		for req := range reqCh {
   144  			res := []*Result{}
   145  			data, err := getCloudwatchData(e, &req)
   146  			if err == nil {
   147  				res, err = parseCloudWatchResponse(&req, &data, len(regions) > 1)
   148  				resCh <- &Results{Results: res}
   149  			}
   150  			errCh <- err
   151  		}
   152  		defer wg.Done()
   153  	}
   154  
   155  	// Create N workers to parallelize multiple requests at once since each region requires an HTTP request
   156  	for i := 0; i < e.CloudWatchContext.GetConcurrency(); i++ {
   157  		wg.Add(1)
   158  		go worker()
   159  	}
   160  
   161  	sd, ed, p, err := parseDurations(sduration, eduration, period)
   162  	if err != nil {
   163  		return r, err
   164  	}
   165  
   166  	timingString := fmt.Sprintf(`querying %d regions for metric:"%v"`, len(regions), metric)
   167  	e.Timer.StepCustomTiming("cloudwatch", "query", timingString, func() {
   168  		// Feed region queries into the request channel which the workers will consume
   169  
   170  		for _, r := range regions {
   171  
   172  			// The times are rounded to a whole period. This improves
   173  			// both the caching of the query results as well as the query performance for
   174  			// the reasons outlined in the aws sdk docs here
   175  			// https://docs.aws.amazon.com/AmazonCloudWatch/latest/APIReference/API_GetMetricData.html
   176  
   177  			// round down start time
   178  			st := e.now.Add(-time.Duration(sd)).Truncate(time.Duration(p))
   179  			// round up end time
   180  			et := e.now.Add(-time.Duration(ed)).Truncate(time.Duration(p)).Add(time.Duration(p))
   181  
   182  			req := cloudwatch.Request{
   183  				Start:           &st,
   184  				End:             &et,
   185  				Region:          r,
   186  				Namespace:       namespace,
   187  				Metric:          metric,
   188  				Period:          int64(p.Seconds()),
   189  				Statistic:       statistic,
   190  				DimensionString: dimensions,
   191  				Profile:         prefix,
   192  			}
   193  			reqCh <- req
   194  		}
   195  		close(reqCh)
   196  		wg.Wait() // Wait for all the workers to finish
   197  	})
   198  	close(resCh)
   199  	close(errCh)
   200  
   201  	// Gather errors from the request and return an error if any of the requests failled
   202  	errs := []string{}
   203  	for err := range errCh {
   204  		if err == nil {
   205  			continue
   206  		}
   207  		errs = append(errs, err.Error())
   208  	}
   209  	if len(errs) > 0 {
   210  		return r, fmt.Errorf(strings.Join(errs, " :: "))
   211  	}
   212  	// Gather all the query results
   213  	for res := range resCh {
   214  		queryResults = append(queryResults, res)
   215  	}
   216  	if len(queryResults) == 1 { // no need to merge if there is only one item
   217  		return queryResults[0], nil
   218  	}
   219  	// Merge the query results into a single seriesSet
   220  	r, err = Merge(e, queryResults...)
   221  	return r, err
   222  
   223  }
   224  
   225  func getCloudwatchData(e *State, req *cloudwatch.Request) (resp cloudwatch.Response, err error) {
   226  	e.cloudwatchQueries = append(e.cloudwatchQueries, *req)
   227  
   228  	key := req.CacheKey()
   229  	getFn := func() (interface{}, error) {
   230  
   231  		d, err := parseDimensions(req.DimensionString)
   232  
   233  		if hasWildcardDimension(req.DimensionString) {
   234  			lr := cloudwatch.LookupRequest{
   235  				Region:     req.Region,
   236  				Namespace:  req.Namespace,
   237  				Metric:     req.Metric,
   238  				Dimensions: d,
   239  				Profile:    req.Profile,
   240  			}
   241  			d, err = e.CloudWatchContext.LookupDimensions(&lr)
   242  			if err != nil {
   243  				return resp, err
   244  			}
   245  			if len(d) == 0 {
   246  				return resp, fmt.Errorf("Wildcard dimensionString did not match any cloudwatch metrics in region %s", req.Region)
   247  			}
   248  		}
   249  		req.Dimensions = d
   250  		return e.CloudWatchContext.Query(req)
   251  	}
   252  
   253  	var val interface{}
   254  	var hit bool
   255  	val, err, hit = e.Cache.Get(key, getFn)
   256  
   257  	collectCacheHit(e.Cache, "cloudwatch", hit)
   258  	resp = val.(cloudwatch.Response)
   259  
   260  	return
   261  }
   262  
   263  func cloudwatchTagQuery(args []parse.Node) (parse.Tags, error) {
   264  	t := make(parse.Tags)
   265  	n := args[5].(*parse.StringNode)
   266  	for _, s := range strings.Split(n.Text, ",") {
   267  		if s != "" {
   268  			g := strings.Split(s, ":")
   269  			if g[0] != "" {
   270  				t[g[0]] = struct{}{}
   271  			}
   272  		}
   273  	}
   274  	return t, nil
   275  }