bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/expr/prom.go (about)

     1  package expr
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"sort"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/prometheus/prometheus/promql"
    14  
    15  	"bosun.org/cmd/bosun/conf/template"
    16  	"bosun.org/cmd/bosun/expr/parse"
    17  	"bosun.org/models"
    18  	"bosun.org/opentsdb"
    19  	promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
    20  	promModels "github.com/prometheus/common/model"
    21  )
    22  
    23  // PromClients is a collection of Prometheus API v1 client APIs (connections)
    24  type PromClients map[string]promv1.API
    25  
    26  // Prom is a map of functions to query Prometheus.
    27  var Prom = map[string]parse.Func{
    28  	"prom": {
    29  		Args: []models.FuncType{
    30  			models.TypeString, // metric
    31  			models.TypeString, // groupby tags
    32  			models.TypeString, // filter string
    33  			models.TypeString, // aggregation type
    34  			models.TypeString, // step interval duration
    35  			models.TypeString, // start duration
    36  			models.TypeString, // end duration
    37  		},
    38  		Return:        models.TypeSeriesSet,
    39  		Tags:          promGroupTags,
    40  		F:             PromQuery,
    41  		PrefixEnabled: true,
    42  	},
    43  	"promm": {
    44  		Args: []models.FuncType{
    45  			models.TypeString, // metric
    46  			models.TypeString, // groupby tags
    47  			models.TypeString, // filter string
    48  			models.TypeString, // aggregation type
    49  			models.TypeString, // step interval duration
    50  			models.TypeString, // start duration
    51  			models.TypeString, // end duration
    52  		},
    53  		Return:        models.TypeSeriesSet,
    54  		Tags:          promMGroupTags,
    55  		F:             PromMQuery,
    56  		PrefixEnabled: true,
    57  	},
    58  	"promrate": {
    59  		Args: []models.FuncType{
    60  			models.TypeString, // metric
    61  			models.TypeString, // groupby tags
    62  			models.TypeString, // filter string
    63  			models.TypeString, // aggregation type
    64  			models.TypeString, // rate step interval duration
    65  			models.TypeString, // step interval duration
    66  			models.TypeString, // start duration
    67  			models.TypeString, // end duration
    68  		},
    69  		Return:        models.TypeSeriesSet,
    70  		Tags:          promGroupTags,
    71  		F:             PromRate,
    72  		PrefixEnabled: true,
    73  	},
    74  	"promratem": {
    75  		Args: []models.FuncType{
    76  			models.TypeString, // metric
    77  			models.TypeString, // groupby tags
    78  			models.TypeString, // filter string
    79  			models.TypeString, // aggregation type
    80  			models.TypeString, // rate step interval duration
    81  			models.TypeString, // step interval duration
    82  			models.TypeString, // start duration
    83  			models.TypeString, // end duration
    84  		},
    85  		Return:        models.TypeSeriesSet,
    86  		Tags:          promMGroupTags,
    87  		F:             PromMRate,
    88  		PrefixEnabled: true,
    89  	},
    90  	"promras": { // prom raw aggregated series
    91  		Args: []models.FuncType{
    92  			models.TypeString, // promql query
    93  			models.TypeString, // step interval duration
    94  			models.TypeString, // start duration
    95  			models.TypeString, // end duration
    96  		},
    97  		Return:        models.TypeSeriesSet,
    98  		Tags:          promAggRawTags,
    99  		F:             PromRawAggSeriesQuery,
   100  		PrefixEnabled: true,
   101  	},
   102  	"prommras": { // prom multi raw aggregated series
   103  		Args: []models.FuncType{
   104  			models.TypeString, // promql query
   105  			models.TypeString, // step interval duration
   106  			models.TypeString, // start duration
   107  			models.TypeString, // end duration
   108  		},
   109  		Return:        models.TypeSeriesSet,
   110  		Tags:          promMAggRawTags,
   111  		F:             PromMRawAggSeriesQuery,
   112  		PrefixEnabled: true,
   113  	},
   114  	"prommetrics": {
   115  		Args:          []models.FuncType{},
   116  		Return:        models.TypeInfo,
   117  		F:             PromMetricList,
   118  		PrefixEnabled: true,
   119  	},
   120  	"promtags": {
   121  		Args: []models.FuncType{
   122  			models.TypeString, // metric
   123  			models.TypeString, // start duration
   124  			models.TypeString, // end duration
   125  		},
   126  		Return:        models.TypeInfo,
   127  		F:             PromTagInfo,
   128  		PrefixEnabled: true,
   129  	},
   130  }
   131  
   132  // promMultiKey is the value for the tag key that is added to multibackend queries.
   133  const promMultiKey = "bosun_prefix"
   134  
   135  // promGroupTags parses the csv tags argument of the prom based functions
   136  func promGroupTags(args []parse.Node) (parse.Tags, error) {
   137  	tags := make(parse.Tags)
   138  	csvTags := strings.Split(args[1].(*parse.StringNode).Text, ",")
   139  	for _, k := range csvTags {
   140  		tags[k] = struct{}{}
   141  	}
   142  	return tags, nil
   143  }
   144  
   145  // promMGroupTags parses the csv tags argument of the prom based functions
   146  // and also adds the promMultiKey tag
   147  func promMGroupTags(args []parse.Node) (parse.Tags, error) {
   148  	tags, err := promGroupTags(args)
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  	tags[promMultiKey] = struct{}{}
   153  	return tags, nil
   154  }
   155  
   156  // promAggRawTags parses the promql argument to get the expected
   157  // grouping tags from an aggregated series
   158  func promAggRawTags(args []parse.Node) (parse.Tags, error) {
   159  	tags := make(parse.Tags)
   160  	pq := args[0].(*parse.StringNode).Text
   161  	parsedPromExpr, err := promql.ParseExpr(pq)
   162  	if err != nil {
   163  		return nil, fmt.Errorf("failed to extract tags from promql query due to invalid promql expression: %v", err)
   164  	}
   165  	promAgExprNode, ok := parsedPromExpr.(*promql.AggregateExpr)
   166  	if !ok || promAgExprNode == nil {
   167  		return nil, fmt.Errorf("failed to extract tags from promql query, top level expression is not aggregation operation: %v", err)
   168  	}
   169  	for _, k := range promAgExprNode.Grouping {
   170  		tags[k] = struct{}{}
   171  	}
   172  	return tags, nil
   173  }
   174  
   175  // promMAggRawTags is a wrapper for promAggRawTags but adds the promMultiKey tag.
   176  func promMAggRawTags(args []parse.Node) (parse.Tags, error) {
   177  	tags, err := promAggRawTags(args)
   178  	if err != nil {
   179  		return nil, err
   180  	}
   181  	tags[promMultiKey] = struct{}{}
   182  	return tags, nil
   183  }
   184  
   185  // PromRawAggSeriesQuery is wrapper for promRawAggSeriesQuery setting the multi argument to false.
   186  func PromRawAggSeriesQuery(prefix string, e *State, query, stepDuration, sdur, edur string) (*Results, error) {
   187  	return promRawAggSeriesQuery(prefix, e, query, stepDuration, sdur, edur, false)
   188  }
   189  
   190  // PromMRawAggSeriesQuery is wrapper for promRawAggSeriesQuery setting the multi argument to true.
   191  func PromMRawAggSeriesQuery(prefix string, e *State, query, stepDuration, sdur, edur string) (*Results, error) {
   192  	return promRawAggSeriesQuery(prefix, e, query, stepDuration, sdur, edur, true)
   193  }
   194  
   195  // promRawAggSeriesQuery takes a raw promql query that has a top level promql aggregation function
   196  // and returns a seriesSet. If multi is true then the promMultiKey is added to each series in the result
   197  // and multiple prometheus tsdbs are queried.
   198  func promRawAggSeriesQuery(prefix string, e *State, query, stepDuration, sdur, edur string, multi bool) (r *Results, err error) {
   199  	r = new(Results)
   200  	parsedPromExpr, err := promql.ParseExpr(query)
   201  	if err != nil {
   202  		return nil, fmt.Errorf("failed to parse invalid promql expression: %v", err)
   203  	}
   204  	promAgExprNode, ok := parsedPromExpr.(*promql.AggregateExpr)
   205  	if !ok || promAgExprNode == nil {
   206  		return nil, fmt.Errorf("top level expression is not aggregation operation")
   207  	}
   208  	start, end, err := parseDurationPair(e, sdur, edur)
   209  	if err != nil {
   210  		return
   211  	}
   212  	st, err := opentsdb.ParseDuration(stepDuration)
   213  	if err != nil {
   214  		return
   215  	}
   216  	step := time.Duration(st)
   217  	tagLen := len(promAgExprNode.Grouping)
   218  
   219  	prefixes := strings.Split(prefix, ",")
   220  
   221  	// Single prom backend case
   222  	if !multi || (len(prefixes) == 1 && prefixes[0] == "") {
   223  		qRes, err := timePromRequest(e, prefix, query, start, end, step)
   224  		if err != nil {
   225  			return nil, err
   226  		}
   227  		err = promMatrixToResults(prefix, e, qRes, tagLen, false, r)
   228  		return r, err
   229  	}
   230  
   231  	// Multibackend case
   232  	wg := sync.WaitGroup{}
   233  	wg.Add(len(prefixes))
   234  	resCh := make(chan struct {
   235  		prefix  string
   236  		promVal promModels.Value
   237  	}, len(prefixes))
   238  	errCh := make(chan error, len(prefixes))
   239  
   240  	for _, prefix := range prefixes {
   241  		go func(prefix string) {
   242  			defer wg.Done()
   243  			res, err := timePromRequest(e, prefix, query, start, end, step)
   244  			resCh <- struct {
   245  				prefix  string
   246  				promVal promModels.Value
   247  			}{prefix, res}
   248  			errCh <- err
   249  		}(prefix)
   250  	}
   251  
   252  	wg.Wait()
   253  	close(resCh)
   254  	close(errCh)
   255  	errors := []string{}
   256  	for err := range errCh {
   257  		if err == nil {
   258  			continue
   259  		}
   260  		errors = append(errors, err.Error())
   261  	}
   262  	if len(errors) > 0 {
   263  		return r, fmt.Errorf(strings.Join(errors, " :: "))
   264  	}
   265  
   266  	for promRes := range resCh {
   267  		err = promMatrixToResults(promRes.prefix, e, promRes.promVal, tagLen, true, r)
   268  		if err != nil {
   269  			return
   270  		}
   271  	}
   272  
   273  	return
   274  }
   275  
   276  // PromQuery is a wrapper for promQuery so there is a function signature that doesn't require the rate argument in the expr language.
   277  // It also sets promQuery's addPrefixTag argument to false since this only queries one backend.
   278  func PromQuery(prefix string, e *State, metric, groupBy, filter, agType, stepDuration, sdur, edur string) (r *Results, err error) {
   279  	return promQuery(prefix, e, metric, groupBy, filter, agType, "", stepDuration, sdur, edur, false)
   280  }
   281  
   282  // PromRate is a wrapper for promQuery like PromQuery except that it has a rateDuration argument for the step of the rate calculation.
   283  // This enables rate calculation for counters.
   284  func PromRate(prefix string, e *State, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur string) (r *Results, err error) {
   285  	return promQuery(prefix, e, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur, false)
   286  }
   287  
   288  // PromMQuery is a wrapper from promMQuery in the way that PromQuery is a wrapper from promQuery.
   289  func PromMQuery(prefix string, e *State, metric, groupBy, filter, agType, stepDuration, sdur, edur string) (r *Results, err error) {
   290  	return promMQuery(prefix, e, metric, groupBy, filter, agType, "", stepDuration, sdur, edur)
   291  }
   292  
   293  // PromMRate is a wrapper from promMQuery in the way that PromRate is a wrapper from promQuery. It has a stepDuration argument
   294  // for rate calculation.
   295  func PromMRate(prefix string, e *State, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur string) (r *Results, err error) {
   296  	return promMQuery(prefix, e, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur)
   297  }
   298  
   299  // promMQuery makes call to multiple prometheus TSDBs and combines the results into a single series set.
   300  // It adds the promMultiKey tag key with the value of prefix label to the results. Queries are executed in parallel.
   301  func promMQuery(prefix string, e *State, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur string) (r *Results, err error) {
   302  	r = new(Results)
   303  	prefixes := strings.Split(prefix, ",")
   304  	if len(prefixes) == 1 && prefixes[0] == "" {
   305  		return promQuery("default", e, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur, true)
   306  	}
   307  
   308  	wg := sync.WaitGroup{}
   309  	wg.Add(len(prefixes))
   310  	resCh := make(chan *Results, len(prefixes))
   311  	errCh := make(chan error, len(prefixes))
   312  
   313  	for _, prefix := range prefixes {
   314  		go func(prefix string) {
   315  			defer wg.Done()
   316  			res, err := promQuery(prefix, e, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur, true)
   317  			resCh <- res
   318  			errCh <- err
   319  		}(prefix)
   320  	}
   321  
   322  	wg.Wait()
   323  	close(resCh)
   324  	close(errCh)
   325  	// Gather errors from the request and return an error if any of the requests failled
   326  	errors := []string{}
   327  	for err := range errCh {
   328  		if err == nil {
   329  			continue
   330  		}
   331  		errors = append(errors, err.Error())
   332  	}
   333  	if len(errors) > 0 {
   334  		return r, fmt.Errorf(strings.Join(errors, " :: "))
   335  	}
   336  	resultCollection := []*Results{}
   337  	for res := range resCh {
   338  		resultCollection = append(resultCollection, res)
   339  	}
   340  	if len(resultCollection) == 1 { // no need to merge if there is only one item
   341  		return resultCollection[0], nil
   342  	}
   343  	// Merge the query results into a single seriesSet
   344  	r, err = Merge(e, resultCollection...)
   345  	return
   346  }
   347  
   348  // promQuery uses the information passed to it to generate an PromQL query using the promQueryTemplate.
   349  // It then calls timePromRequest to execute the query and process that results in to a Bosun Results object.
   350  func promQuery(prefix string, e *State, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur string, addPrefixTag bool) (r *Results, err error) {
   351  	r = new(Results)
   352  	start, end, err := parseDurationPair(e, sdur, edur)
   353  	if err != nil {
   354  		return
   355  	}
   356  	st, err := opentsdb.ParseDuration(stepDuration)
   357  	if err != nil {
   358  		return
   359  	}
   360  	step := time.Duration(st)
   361  	qd := promQueryTemplateData{
   362  		Metric:       metric,
   363  		AgFunc:       agType,
   364  		Tags:         groupBy,
   365  		Filter:       filter,
   366  		RateDuration: rateDuration,
   367  	}
   368  	query, err := qd.RenderString()
   369  	qRes, err := timePromRequest(e, prefix, query, start, end, step)
   370  	if err != nil {
   371  		return
   372  	}
   373  	groupByTagSet := make(opentsdb.TagSet)
   374  	for _, v := range strings.Split(groupBy, ",") {
   375  		if v != "" {
   376  			groupByTagSet[v] = ""
   377  		}
   378  	}
   379  	err = promMatrixToResults(prefix, e, qRes, len(groupByTagSet), addPrefixTag, r)
   380  	return r, err
   381  }
   382  
   383  // promQueryTemplate is a template for PromQL time series queries. It supports
   384  // filtering and aggregation.
   385  var promQueryTemplate = template.Must(template.New("promQueryTemplate").Parse(`
   386  {{ .AgFunc }}(
   387  {{- if ne .RateDuration "" }}rate({{ end }} {{ .Metric -}}
   388  {{- if ne .Filter "" }} {{ .Filter | printf "{%v} " -}} {{- end -}}
   389  {{- if ne .RateDuration "" -}} {{ .RateDuration | printf " [%v] )"  }} {{- end -}}
   390  ) by ( {{ .Tags }} )`))
   391  
   392  // promQueryTemplateData is the struct the contains the fields to render the promQueryTemplate.
   393  type promQueryTemplateData struct {
   394  	Metric       string
   395  	AgFunc       string
   396  	Tags         string
   397  	Filter       string
   398  	RateDuration string
   399  }
   400  
   401  // RenderString creates a query string using promQueryTemplate.
   402  func (pq promQueryTemplateData) RenderString() (string, error) {
   403  	buf := new(bytes.Buffer)
   404  	err := promQueryTemplate.Execute(buf, pq)
   405  	if err != nil {
   406  		return "", err
   407  	}
   408  	return buf.String(), nil
   409  }
   410  
   411  // timePromRequest takes a PromQL query string with the given time frame and step duration. The result
   412  // type of the PromQL query must be a Prometheus Matrix.
   413  func timePromRequest(e *State, prefix, query string, start, end time.Time, step time.Duration) (s promModels.Value, err error) {
   414  	client, found := e.PromConfig[prefix]
   415  	if !found {
   416  		return s, fmt.Errorf(`prometheus client with name "%v" not defined`, prefix)
   417  	}
   418  	r := promv1.Range{Start: start, End: end, Step: step}
   419  	cacheKey := struct {
   420  		Query  string
   421  		Range  promv1.Range
   422  		Step   time.Duration
   423  		Prefix string
   424  	}{
   425  		query,
   426  		r,
   427  		step,
   428  		prefix,
   429  	}
   430  	cacheKeyBytes, _ := json.MarshalIndent(cacheKey, "", "  ")
   431  	e.Timer.StepCustomTiming("prom", fmt.Sprintf("query (%v)", prefix), query, func() {
   432  		getFn := func() (interface{}, error) {
   433  			res, err := client.QueryRange(context.Background(), query, r)
   434  			if err != nil {
   435  				return nil, err
   436  			}
   437  			m, ok := res.(promModels.Matrix)
   438  			if !ok {
   439  				return nil, fmt.Errorf("prom: expected matrix result")
   440  			}
   441  			return m, nil
   442  		}
   443  		val, err, hit := e.Cache.Get(string(cacheKeyBytes), getFn)
   444  		collectCacheHit(e.Cache, "prom_ts", hit)
   445  		var ok bool
   446  		if s, ok = val.(promModels.Matrix); !ok {
   447  			err = fmt.Errorf("prom: did not get valid result from prometheus, %v", err)
   448  		}
   449  	})
   450  	return
   451  }
   452  
   453  // promMatrixToResults takes the Value result of a prometheus response and
   454  // updates the Results property of the passed Results object
   455  func promMatrixToResults(prefix string, e *State, res promModels.Value, expectedTagLen int, addPrefix bool, r *Results) (err error) {
   456  	matrix, ok := res.(promModels.Matrix)
   457  	if !ok {
   458  		return fmt.Errorf("result not of type matrix")
   459  	}
   460  	for _, row := range matrix {
   461  		tags := make(opentsdb.TagSet)
   462  		for tagK, tagV := range row.Metric {
   463  			tags[string(tagK)] = string(tagV)
   464  		}
   465  		// Remove results with less tag keys than those requests
   466  		if len(tags) < expectedTagLen {
   467  			continue
   468  		}
   469  		if addPrefix {
   470  			tags[promMultiKey] = prefix
   471  		}
   472  		if e.Squelched(tags) {
   473  			continue
   474  		}
   475  		values := make(Series, len(row.Values))
   476  		for _, v := range row.Values {
   477  			values[v.Timestamp.Time()] = float64(v.Value)
   478  		}
   479  		r.Results = append(r.Results, &Result{
   480  			Value: values,
   481  			Group: tags,
   482  		})
   483  	}
   484  	return
   485  }
   486  
   487  // PromMetricList returns a list of available metrics for the prometheus backend
   488  // by using querying the Prometheus Label Values API for "__name__"
   489  func PromMetricList(prefix string, e *State) (r *Results, err error) {
   490  	r = new(Results)
   491  	client, found := e.PromConfig[prefix]
   492  	if !found {
   493  		return r, fmt.Errorf(`prometheus client with name "%v" not defined`, prefix)
   494  	}
   495  	getFn := func() (interface{}, error) {
   496  		var metrics promModels.LabelValues
   497  		e.Timer.StepCustomTiming("prom", "metriclist", "", func() {
   498  			metrics, err = client.LabelValues(context.Background(), "__name__")
   499  		})
   500  		if err != nil {
   501  			return nil, err
   502  		}
   503  		return metrics, nil
   504  	}
   505  	val, err, hit := e.Cache.Get(fmt.Sprintf("%v:metriclist", prefix), getFn)
   506  	collectCacheHit(e.Cache, "prom_metrics", hit)
   507  	if err != nil {
   508  		return nil, err
   509  	}
   510  	metrics := val.(promModels.LabelValues)
   511  	r.Results = append(r.Results, &Result{Value: Info{metrics}})
   512  	return
   513  }
   514  
   515  // PromTagInfo does a range query for the given metric and returns info about the
   516  // tags and labels for the metric based on the data from the queried timeframe
   517  func PromTagInfo(prefix string, e *State, metric, sdur, edur string) (r *Results, err error) {
   518  	r = new(Results)
   519  	client, found := e.PromConfig[prefix]
   520  	if !found {
   521  		return r, fmt.Errorf(`prometheus client with name "%v" not defined`, prefix)
   522  	}
   523  	start, end, err := parseDurationPair(e, sdur, edur)
   524  	if err != nil {
   525  		return
   526  	}
   527  
   528  	qRange := promv1.Range{Start: start, End: end, Step: time.Minute}
   529  
   530  	getFn := func() (interface{}, error) {
   531  		var res promModels.Value
   532  		e.Timer.StepCustomTiming("prom", "taginfo", metric, func() {
   533  			res, err = client.QueryRange(context.Background(), metric, qRange)
   534  		})
   535  		if err != nil {
   536  			return nil, err
   537  		}
   538  		m, ok := res.(promModels.Matrix)
   539  		if !ok {
   540  			return nil, fmt.Errorf("prom: expected a prometheus matrix type in result but got %v", res.Type().String())
   541  		}
   542  		return m, nil
   543  	}
   544  	val, err, hit := e.Cache.Get(fmt.Sprintf("%v:%v:taginfo", prefix, metric), getFn)
   545  	collectCacheHit(e.Cache, "prom_metrics", hit)
   546  	if err != nil {
   547  		return nil, err
   548  	}
   549  	matrix, ok := val.(promModels.Matrix)
   550  	if !ok {
   551  		err = fmt.Errorf("prom: did not get valid result from prometheus, %v", err)
   552  	}
   553  	tagInfo := struct {
   554  		Metric       string
   555  		Keys         []string
   556  		KeysToValues map[string][]string
   557  		UniqueSets   []string
   558  	}{}
   559  	tagInfo.Metric = metric
   560  	tagInfo.KeysToValues = make(map[string][]string)
   561  	sets := make(map[string]struct{})
   562  	keysToValues := make(map[string]map[string]struct{})
   563  	for _, row := range matrix {
   564  		tags := make(opentsdb.TagSet)
   565  		for rawTagK, rawTagV := range row.Metric {
   566  			tagK := string(rawTagK)
   567  			tagV := string(rawTagV)
   568  			if tagK == "__name__" {
   569  				continue
   570  			}
   571  			tags[tagK] = tagV
   572  			if _, ok := keysToValues[tagK]; !ok {
   573  				keysToValues[tagK] = make(map[string]struct{})
   574  			}
   575  			keysToValues[tagK][tagV] = struct{}{}
   576  		}
   577  		sets[tags.String()] = struct{}{}
   578  	}
   579  	for k, values := range keysToValues {
   580  		tagInfo.Keys = append(tagInfo.Keys, k)
   581  		for val := range values {
   582  			tagInfo.KeysToValues[k] = append(tagInfo.KeysToValues[k], val)
   583  		}
   584  	}
   585  	sort.Strings(tagInfo.Keys)
   586  	for s := range sets {
   587  		tagInfo.UniqueSets = append(tagInfo.UniqueSets, s)
   588  	}
   589  	sort.Strings(tagInfo.UniqueSets)
   590  	r.Results = append(r.Results, &Result{
   591  		Value: Info{tagInfo},
   592  	})
   593  	return
   594  }