bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/expr/prom.go (about) 1 package expr 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "fmt" 8 "sort" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/prometheus/prometheus/promql" 14 15 "bosun.org/cmd/bosun/conf/template" 16 "bosun.org/cmd/bosun/expr/parse" 17 "bosun.org/models" 18 "bosun.org/opentsdb" 19 promv1 "github.com/prometheus/client_golang/api/prometheus/v1" 20 promModels "github.com/prometheus/common/model" 21 ) 22 23 // PromClients is a collection of Prometheus API v1 client APIs (connections) 24 type PromClients map[string]promv1.API 25 26 // Prom is a map of functions to query Prometheus. 27 var Prom = map[string]parse.Func{ 28 "prom": { 29 Args: []models.FuncType{ 30 models.TypeString, // metric 31 models.TypeString, // groupby tags 32 models.TypeString, // filter string 33 models.TypeString, // aggregation type 34 models.TypeString, // step interval duration 35 models.TypeString, // start duration 36 models.TypeString, // end duration 37 }, 38 Return: models.TypeSeriesSet, 39 Tags: promGroupTags, 40 F: PromQuery, 41 PrefixEnabled: true, 42 }, 43 "promm": { 44 Args: []models.FuncType{ 45 models.TypeString, // metric 46 models.TypeString, // groupby tags 47 models.TypeString, // filter string 48 models.TypeString, // aggregation type 49 models.TypeString, // step interval duration 50 models.TypeString, // start duration 51 models.TypeString, // end duration 52 }, 53 Return: models.TypeSeriesSet, 54 Tags: promMGroupTags, 55 F: PromMQuery, 56 PrefixEnabled: true, 57 }, 58 "promrate": { 59 Args: []models.FuncType{ 60 models.TypeString, // metric 61 models.TypeString, // groupby tags 62 models.TypeString, // filter string 63 models.TypeString, // aggregation type 64 models.TypeString, // rate step interval duration 65 models.TypeString, // step interval duration 66 models.TypeString, // start duration 67 models.TypeString, // end duration 68 }, 69 Return: models.TypeSeriesSet, 70 Tags: promGroupTags, 71 F: PromRate, 72 PrefixEnabled: true, 73 }, 74 "promratem": { 75 Args: []models.FuncType{ 76 models.TypeString, // metric 77 models.TypeString, // groupby tags 78 models.TypeString, // filter string 79 models.TypeString, // aggregation type 80 models.TypeString, // rate step interval duration 81 models.TypeString, // step interval duration 82 models.TypeString, // start duration 83 models.TypeString, // end duration 84 }, 85 Return: models.TypeSeriesSet, 86 Tags: promMGroupTags, 87 F: PromMRate, 88 PrefixEnabled: true, 89 }, 90 "promras": { // prom raw aggregated series 91 Args: []models.FuncType{ 92 models.TypeString, // promql query 93 models.TypeString, // step interval duration 94 models.TypeString, // start duration 95 models.TypeString, // end duration 96 }, 97 Return: models.TypeSeriesSet, 98 Tags: promAggRawTags, 99 F: PromRawAggSeriesQuery, 100 PrefixEnabled: true, 101 }, 102 "prommras": { // prom multi raw aggregated series 103 Args: []models.FuncType{ 104 models.TypeString, // promql query 105 models.TypeString, // step interval duration 106 models.TypeString, // start duration 107 models.TypeString, // end duration 108 }, 109 Return: models.TypeSeriesSet, 110 Tags: promMAggRawTags, 111 F: PromMRawAggSeriesQuery, 112 PrefixEnabled: true, 113 }, 114 "prommetrics": { 115 Args: []models.FuncType{}, 116 Return: models.TypeInfo, 117 F: PromMetricList, 118 PrefixEnabled: true, 119 }, 120 "promtags": { 121 Args: []models.FuncType{ 122 models.TypeString, // metric 123 models.TypeString, // start duration 124 models.TypeString, // end duration 125 }, 126 Return: models.TypeInfo, 127 F: PromTagInfo, 128 PrefixEnabled: true, 129 }, 130 } 131 132 // promMultiKey is the value for the tag key that is added to multibackend queries. 133 const promMultiKey = "bosun_prefix" 134 135 // promGroupTags parses the csv tags argument of the prom based functions 136 func promGroupTags(args []parse.Node) (parse.Tags, error) { 137 tags := make(parse.Tags) 138 csvTags := strings.Split(args[1].(*parse.StringNode).Text, ",") 139 for _, k := range csvTags { 140 tags[k] = struct{}{} 141 } 142 return tags, nil 143 } 144 145 // promMGroupTags parses the csv tags argument of the prom based functions 146 // and also adds the promMultiKey tag 147 func promMGroupTags(args []parse.Node) (parse.Tags, error) { 148 tags, err := promGroupTags(args) 149 if err != nil { 150 return nil, err 151 } 152 tags[promMultiKey] = struct{}{} 153 return tags, nil 154 } 155 156 // promAggRawTags parses the promql argument to get the expected 157 // grouping tags from an aggregated series 158 func promAggRawTags(args []parse.Node) (parse.Tags, error) { 159 tags := make(parse.Tags) 160 pq := args[0].(*parse.StringNode).Text 161 parsedPromExpr, err := promql.ParseExpr(pq) 162 if err != nil { 163 return nil, fmt.Errorf("failed to extract tags from promql query due to invalid promql expression: %v", err) 164 } 165 promAgExprNode, ok := parsedPromExpr.(*promql.AggregateExpr) 166 if !ok || promAgExprNode == nil { 167 return nil, fmt.Errorf("failed to extract tags from promql query, top level expression is not aggregation operation: %v", err) 168 } 169 for _, k := range promAgExprNode.Grouping { 170 tags[k] = struct{}{} 171 } 172 return tags, nil 173 } 174 175 // promMAggRawTags is a wrapper for promAggRawTags but adds the promMultiKey tag. 176 func promMAggRawTags(args []parse.Node) (parse.Tags, error) { 177 tags, err := promAggRawTags(args) 178 if err != nil { 179 return nil, err 180 } 181 tags[promMultiKey] = struct{}{} 182 return tags, nil 183 } 184 185 // PromRawAggSeriesQuery is wrapper for promRawAggSeriesQuery setting the multi argument to false. 186 func PromRawAggSeriesQuery(prefix string, e *State, query, stepDuration, sdur, edur string) (*Results, error) { 187 return promRawAggSeriesQuery(prefix, e, query, stepDuration, sdur, edur, false) 188 } 189 190 // PromMRawAggSeriesQuery is wrapper for promRawAggSeriesQuery setting the multi argument to true. 191 func PromMRawAggSeriesQuery(prefix string, e *State, query, stepDuration, sdur, edur string) (*Results, error) { 192 return promRawAggSeriesQuery(prefix, e, query, stepDuration, sdur, edur, true) 193 } 194 195 // promRawAggSeriesQuery takes a raw promql query that has a top level promql aggregation function 196 // and returns a seriesSet. If multi is true then the promMultiKey is added to each series in the result 197 // and multiple prometheus tsdbs are queried. 198 func promRawAggSeriesQuery(prefix string, e *State, query, stepDuration, sdur, edur string, multi bool) (r *Results, err error) { 199 r = new(Results) 200 parsedPromExpr, err := promql.ParseExpr(query) 201 if err != nil { 202 return nil, fmt.Errorf("failed to parse invalid promql expression: %v", err) 203 } 204 promAgExprNode, ok := parsedPromExpr.(*promql.AggregateExpr) 205 if !ok || promAgExprNode == nil { 206 return nil, fmt.Errorf("top level expression is not aggregation operation") 207 } 208 start, end, err := parseDurationPair(e, sdur, edur) 209 if err != nil { 210 return 211 } 212 st, err := opentsdb.ParseDuration(stepDuration) 213 if err != nil { 214 return 215 } 216 step := time.Duration(st) 217 tagLen := len(promAgExprNode.Grouping) 218 219 prefixes := strings.Split(prefix, ",") 220 221 // Single prom backend case 222 if !multi || (len(prefixes) == 1 && prefixes[0] == "") { 223 qRes, err := timePromRequest(e, prefix, query, start, end, step) 224 if err != nil { 225 return nil, err 226 } 227 err = promMatrixToResults(prefix, e, qRes, tagLen, false, r) 228 return r, err 229 } 230 231 // Multibackend case 232 wg := sync.WaitGroup{} 233 wg.Add(len(prefixes)) 234 resCh := make(chan struct { 235 prefix string 236 promVal promModels.Value 237 }, len(prefixes)) 238 errCh := make(chan error, len(prefixes)) 239 240 for _, prefix := range prefixes { 241 go func(prefix string) { 242 defer wg.Done() 243 res, err := timePromRequest(e, prefix, query, start, end, step) 244 resCh <- struct { 245 prefix string 246 promVal promModels.Value 247 }{prefix, res} 248 errCh <- err 249 }(prefix) 250 } 251 252 wg.Wait() 253 close(resCh) 254 close(errCh) 255 errors := []string{} 256 for err := range errCh { 257 if err == nil { 258 continue 259 } 260 errors = append(errors, err.Error()) 261 } 262 if len(errors) > 0 { 263 return r, fmt.Errorf(strings.Join(errors, " :: ")) 264 } 265 266 for promRes := range resCh { 267 err = promMatrixToResults(promRes.prefix, e, promRes.promVal, tagLen, true, r) 268 if err != nil { 269 return 270 } 271 } 272 273 return 274 } 275 276 // PromQuery is a wrapper for promQuery so there is a function signature that doesn't require the rate argument in the expr language. 277 // It also sets promQuery's addPrefixTag argument to false since this only queries one backend. 278 func PromQuery(prefix string, e *State, metric, groupBy, filter, agType, stepDuration, sdur, edur string) (r *Results, err error) { 279 return promQuery(prefix, e, metric, groupBy, filter, agType, "", stepDuration, sdur, edur, false) 280 } 281 282 // PromRate is a wrapper for promQuery like PromQuery except that it has a rateDuration argument for the step of the rate calculation. 283 // This enables rate calculation for counters. 284 func PromRate(prefix string, e *State, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur string) (r *Results, err error) { 285 return promQuery(prefix, e, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur, false) 286 } 287 288 // PromMQuery is a wrapper from promMQuery in the way that PromQuery is a wrapper from promQuery. 289 func PromMQuery(prefix string, e *State, metric, groupBy, filter, agType, stepDuration, sdur, edur string) (r *Results, err error) { 290 return promMQuery(prefix, e, metric, groupBy, filter, agType, "", stepDuration, sdur, edur) 291 } 292 293 // PromMRate is a wrapper from promMQuery in the way that PromRate is a wrapper from promQuery. It has a stepDuration argument 294 // for rate calculation. 295 func PromMRate(prefix string, e *State, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur string) (r *Results, err error) { 296 return promMQuery(prefix, e, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur) 297 } 298 299 // promMQuery makes call to multiple prometheus TSDBs and combines the results into a single series set. 300 // It adds the promMultiKey tag key with the value of prefix label to the results. Queries are executed in parallel. 301 func promMQuery(prefix string, e *State, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur string) (r *Results, err error) { 302 r = new(Results) 303 prefixes := strings.Split(prefix, ",") 304 if len(prefixes) == 1 && prefixes[0] == "" { 305 return promQuery("default", e, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur, true) 306 } 307 308 wg := sync.WaitGroup{} 309 wg.Add(len(prefixes)) 310 resCh := make(chan *Results, len(prefixes)) 311 errCh := make(chan error, len(prefixes)) 312 313 for _, prefix := range prefixes { 314 go func(prefix string) { 315 defer wg.Done() 316 res, err := promQuery(prefix, e, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur, true) 317 resCh <- res 318 errCh <- err 319 }(prefix) 320 } 321 322 wg.Wait() 323 close(resCh) 324 close(errCh) 325 // Gather errors from the request and return an error if any of the requests failled 326 errors := []string{} 327 for err := range errCh { 328 if err == nil { 329 continue 330 } 331 errors = append(errors, err.Error()) 332 } 333 if len(errors) > 0 { 334 return r, fmt.Errorf(strings.Join(errors, " :: ")) 335 } 336 resultCollection := []*Results{} 337 for res := range resCh { 338 resultCollection = append(resultCollection, res) 339 } 340 if len(resultCollection) == 1 { // no need to merge if there is only one item 341 return resultCollection[0], nil 342 } 343 // Merge the query results into a single seriesSet 344 r, err = Merge(e, resultCollection...) 345 return 346 } 347 348 // promQuery uses the information passed to it to generate an PromQL query using the promQueryTemplate. 349 // It then calls timePromRequest to execute the query and process that results in to a Bosun Results object. 350 func promQuery(prefix string, e *State, metric, groupBy, filter, agType, rateDuration, stepDuration, sdur, edur string, addPrefixTag bool) (r *Results, err error) { 351 r = new(Results) 352 start, end, err := parseDurationPair(e, sdur, edur) 353 if err != nil { 354 return 355 } 356 st, err := opentsdb.ParseDuration(stepDuration) 357 if err != nil { 358 return 359 } 360 step := time.Duration(st) 361 qd := promQueryTemplateData{ 362 Metric: metric, 363 AgFunc: agType, 364 Tags: groupBy, 365 Filter: filter, 366 RateDuration: rateDuration, 367 } 368 query, err := qd.RenderString() 369 qRes, err := timePromRequest(e, prefix, query, start, end, step) 370 if err != nil { 371 return 372 } 373 groupByTagSet := make(opentsdb.TagSet) 374 for _, v := range strings.Split(groupBy, ",") { 375 if v != "" { 376 groupByTagSet[v] = "" 377 } 378 } 379 err = promMatrixToResults(prefix, e, qRes, len(groupByTagSet), addPrefixTag, r) 380 return r, err 381 } 382 383 // promQueryTemplate is a template for PromQL time series queries. It supports 384 // filtering and aggregation. 385 var promQueryTemplate = template.Must(template.New("promQueryTemplate").Parse(` 386 {{ .AgFunc }}( 387 {{- if ne .RateDuration "" }}rate({{ end }} {{ .Metric -}} 388 {{- if ne .Filter "" }} {{ .Filter | printf "{%v} " -}} {{- end -}} 389 {{- if ne .RateDuration "" -}} {{ .RateDuration | printf " [%v] )" }} {{- end -}} 390 ) by ( {{ .Tags }} )`)) 391 392 // promQueryTemplateData is the struct the contains the fields to render the promQueryTemplate. 393 type promQueryTemplateData struct { 394 Metric string 395 AgFunc string 396 Tags string 397 Filter string 398 RateDuration string 399 } 400 401 // RenderString creates a query string using promQueryTemplate. 402 func (pq promQueryTemplateData) RenderString() (string, error) { 403 buf := new(bytes.Buffer) 404 err := promQueryTemplate.Execute(buf, pq) 405 if err != nil { 406 return "", err 407 } 408 return buf.String(), nil 409 } 410 411 // timePromRequest takes a PromQL query string with the given time frame and step duration. The result 412 // type of the PromQL query must be a Prometheus Matrix. 413 func timePromRequest(e *State, prefix, query string, start, end time.Time, step time.Duration) (s promModels.Value, err error) { 414 client, found := e.PromConfig[prefix] 415 if !found { 416 return s, fmt.Errorf(`prometheus client with name "%v" not defined`, prefix) 417 } 418 r := promv1.Range{Start: start, End: end, Step: step} 419 cacheKey := struct { 420 Query string 421 Range promv1.Range 422 Step time.Duration 423 Prefix string 424 }{ 425 query, 426 r, 427 step, 428 prefix, 429 } 430 cacheKeyBytes, _ := json.MarshalIndent(cacheKey, "", " ") 431 e.Timer.StepCustomTiming("prom", fmt.Sprintf("query (%v)", prefix), query, func() { 432 getFn := func() (interface{}, error) { 433 res, err := client.QueryRange(context.Background(), query, r) 434 if err != nil { 435 return nil, err 436 } 437 m, ok := res.(promModels.Matrix) 438 if !ok { 439 return nil, fmt.Errorf("prom: expected matrix result") 440 } 441 return m, nil 442 } 443 val, err, hit := e.Cache.Get(string(cacheKeyBytes), getFn) 444 collectCacheHit(e.Cache, "prom_ts", hit) 445 var ok bool 446 if s, ok = val.(promModels.Matrix); !ok { 447 err = fmt.Errorf("prom: did not get valid result from prometheus, %v", err) 448 } 449 }) 450 return 451 } 452 453 // promMatrixToResults takes the Value result of a prometheus response and 454 // updates the Results property of the passed Results object 455 func promMatrixToResults(prefix string, e *State, res promModels.Value, expectedTagLen int, addPrefix bool, r *Results) (err error) { 456 matrix, ok := res.(promModels.Matrix) 457 if !ok { 458 return fmt.Errorf("result not of type matrix") 459 } 460 for _, row := range matrix { 461 tags := make(opentsdb.TagSet) 462 for tagK, tagV := range row.Metric { 463 tags[string(tagK)] = string(tagV) 464 } 465 // Remove results with less tag keys than those requests 466 if len(tags) < expectedTagLen { 467 continue 468 } 469 if addPrefix { 470 tags[promMultiKey] = prefix 471 } 472 if e.Squelched(tags) { 473 continue 474 } 475 values := make(Series, len(row.Values)) 476 for _, v := range row.Values { 477 values[v.Timestamp.Time()] = float64(v.Value) 478 } 479 r.Results = append(r.Results, &Result{ 480 Value: values, 481 Group: tags, 482 }) 483 } 484 return 485 } 486 487 // PromMetricList returns a list of available metrics for the prometheus backend 488 // by using querying the Prometheus Label Values API for "__name__" 489 func PromMetricList(prefix string, e *State) (r *Results, err error) { 490 r = new(Results) 491 client, found := e.PromConfig[prefix] 492 if !found { 493 return r, fmt.Errorf(`prometheus client with name "%v" not defined`, prefix) 494 } 495 getFn := func() (interface{}, error) { 496 var metrics promModels.LabelValues 497 e.Timer.StepCustomTiming("prom", "metriclist", "", func() { 498 metrics, err = client.LabelValues(context.Background(), "__name__") 499 }) 500 if err != nil { 501 return nil, err 502 } 503 return metrics, nil 504 } 505 val, err, hit := e.Cache.Get(fmt.Sprintf("%v:metriclist", prefix), getFn) 506 collectCacheHit(e.Cache, "prom_metrics", hit) 507 if err != nil { 508 return nil, err 509 } 510 metrics := val.(promModels.LabelValues) 511 r.Results = append(r.Results, &Result{Value: Info{metrics}}) 512 return 513 } 514 515 // PromTagInfo does a range query for the given metric and returns info about the 516 // tags and labels for the metric based on the data from the queried timeframe 517 func PromTagInfo(prefix string, e *State, metric, sdur, edur string) (r *Results, err error) { 518 r = new(Results) 519 client, found := e.PromConfig[prefix] 520 if !found { 521 return r, fmt.Errorf(`prometheus client with name "%v" not defined`, prefix) 522 } 523 start, end, err := parseDurationPair(e, sdur, edur) 524 if err != nil { 525 return 526 } 527 528 qRange := promv1.Range{Start: start, End: end, Step: time.Minute} 529 530 getFn := func() (interface{}, error) { 531 var res promModels.Value 532 e.Timer.StepCustomTiming("prom", "taginfo", metric, func() { 533 res, err = client.QueryRange(context.Background(), metric, qRange) 534 }) 535 if err != nil { 536 return nil, err 537 } 538 m, ok := res.(promModels.Matrix) 539 if !ok { 540 return nil, fmt.Errorf("prom: expected a prometheus matrix type in result but got %v", res.Type().String()) 541 } 542 return m, nil 543 } 544 val, err, hit := e.Cache.Get(fmt.Sprintf("%v:%v:taginfo", prefix, metric), getFn) 545 collectCacheHit(e.Cache, "prom_metrics", hit) 546 if err != nil { 547 return nil, err 548 } 549 matrix, ok := val.(promModels.Matrix) 550 if !ok { 551 err = fmt.Errorf("prom: did not get valid result from prometheus, %v", err) 552 } 553 tagInfo := struct { 554 Metric string 555 Keys []string 556 KeysToValues map[string][]string 557 UniqueSets []string 558 }{} 559 tagInfo.Metric = metric 560 tagInfo.KeysToValues = make(map[string][]string) 561 sets := make(map[string]struct{}) 562 keysToValues := make(map[string]map[string]struct{}) 563 for _, row := range matrix { 564 tags := make(opentsdb.TagSet) 565 for rawTagK, rawTagV := range row.Metric { 566 tagK := string(rawTagK) 567 tagV := string(rawTagV) 568 if tagK == "__name__" { 569 continue 570 } 571 tags[tagK] = tagV 572 if _, ok := keysToValues[tagK]; !ok { 573 keysToValues[tagK] = make(map[string]struct{}) 574 } 575 keysToValues[tagK][tagV] = struct{}{} 576 } 577 sets[tags.String()] = struct{}{} 578 } 579 for k, values := range keysToValues { 580 tagInfo.Keys = append(tagInfo.Keys, k) 581 for val := range values { 582 tagInfo.KeysToValues[k] = append(tagInfo.KeysToValues[k], val) 583 } 584 } 585 sort.Strings(tagInfo.Keys) 586 for s := range sets { 587 tagInfo.UniqueSets = append(tagInfo.UniqueSets, s) 588 } 589 sort.Strings(tagInfo.UniqueSets) 590 r.Results = append(r.Results, &Result{ 591 Value: Info{tagInfo}, 592 }) 593 return 594 }