bosun.org@v0.0.0-20210513094433-e25bc3e69a1f/cmd/bosun/expr/azure.go (about) 1 package expr 2 3 import ( 4 "context" 5 "fmt" 6 "regexp" 7 "strconv" 8 "strings" 9 "sync" 10 "time" 11 12 "bosun.org/metadata" 13 14 "bosun.org/collect" 15 16 "bosun.org/slog" 17 18 "bosun.org/cmd/bosun/expr/parse" 19 "bosun.org/models" 20 "bosun.org/opentsdb" 21 ainsightsmgmt "github.com/Azure/azure-sdk-for-go/services/appinsights/mgmt/2015-05-01/insights" 22 ainsights "github.com/Azure/azure-sdk-for-go/services/appinsights/v1/insights" 23 "github.com/Azure/azure-sdk-for-go/services/preview/monitor/mgmt/2018-03-01/insights" 24 "github.com/Azure/azure-sdk-for-go/services/resources/mgmt/2018-02-01/resources" 25 "github.com/kylebrandt/boolq" 26 ) 27 28 // AzureMonitor is the collection of functions for the Azure monitor datasource 29 var AzureMonitor = map[string]parse.Func{ 30 "az": { 31 Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString}, 32 Return: models.TypeSeriesSet, 33 Tags: azTags, 34 F: AzureQuery, 35 PrefixEnabled: true, 36 }, 37 "azmulti": { 38 Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeAzureResourceList, models.TypeString, models.TypeString, models.TypeString, models.TypeString}, 39 Return: models.TypeSeriesSet, 40 Tags: azMultiTags, 41 F: AzureMultiQuery, 42 PrefixEnabled: true, 43 }, 44 "azmd": { // TODO Finish and document this func 45 Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeString}, 46 Return: models.TypeSeriesSet, // TODO return type 47 Tags: tagFirst, //TODO: Appropriate tags func 48 F: AzureMetricDefinitions, 49 PrefixEnabled: true, 50 }, 51 "azrt": { 52 Args: []models.FuncType{models.TypeString}, 53 Return: models.TypeAzureResourceList, 54 F: AzureResourcesByType, 55 PrefixEnabled: true, 56 }, 57 "azrf": { 58 Args: []models.FuncType{models.TypeAzureResourceList, models.TypeString}, 59 Return: models.TypeAzureResourceList, 60 F: AzureFilterResources, 61 }, 62 // Azure function for application insights, See azureai.go 63 "aiapp": { 64 Args: []models.FuncType{}, 65 Return: models.TypeAzureAIApps, 66 F: AzureAIListApps, 67 PrefixEnabled: true, 68 }, 69 "aiappf": { 70 Args: []models.FuncType{models.TypeAzureAIApps, models.TypeString}, 71 Return: models.TypeAzureAIApps, 72 F: AzureAIFilterApps, 73 PrefixEnabled: true, 74 }, 75 "aimd": { 76 Args: []models.FuncType{models.TypeAzureAIApps}, 77 Return: models.TypeInfo, 78 F: AzureAIMetricMD, 79 PrefixEnabled: true, 80 }, 81 "ai": { 82 Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeAzureAIApps, models.TypeString, models.TypeString, models.TypeString, models.TypeString}, 83 Return: models.TypeSeriesSet, 84 Tags: azAITags, 85 F: AzureAIQuery, 86 PrefixEnabled: true, 87 }, 88 } 89 90 // azTags is the tag function for the "az" expression function 91 func azTags(args []parse.Node) (parse.Tags, error) { 92 return azureTags(args[2]) 93 } 94 95 // azMultiTag function for the "azmulti" expression function 96 func azMultiTags(args []parse.Node) (parse.Tags, error) { 97 return azureTags(args[1]) 98 } 99 100 // azureTags adds tags for the csv argument along with the "name" and "rsg" tags 101 func azureTags(arg parse.Node) (parse.Tags, error) { 102 tags := parse.Tags{azureTagName: struct{}{}, azureTagRSG: struct{}{}} 103 csvTags := strings.Split(arg.(*parse.StringNode).Text, ",") 104 for _, k := range csvTags { 105 tags[k] = struct{}{} 106 } 107 return tags, nil 108 } 109 110 // Azure API References 111 // - https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-supported-metrics 112 // - https://docs.microsoft.com/en-us/azure/monitoring-and-diagnostics/monitoring-data-sources 113 114 // TODO 115 // - Finish up azmd info function 116 117 const azTimeFmt = "2006-01-02T15:04:05" 118 119 // azResourceURI builds a resource uri appropriate for an Azure API request based on the arguments 120 func azResourceURI(subscription, resourceGrp, Namespace, Resource string) string { 121 return fmt.Sprintf("/subscriptions/%s/resourceGroups/%s/providers/%s/%s", subscription, resourceGrp, Namespace, Resource) 122 } 123 124 // AzureMetricDefinitions fetches metric information for a specific resource and metric tuple 125 // TODO make this return and not fmt.Printf 126 func AzureMetricDefinitions(prefix string, e *State, namespace, metric, rsg, resource string) (r *Results, err error) { 127 r = new(Results) 128 cc, clientFound := e.Backends.AzureMonitor[prefix] 129 if !clientFound { 130 return r, fmt.Errorf("azure client with name %v not defined", prefix) 131 } 132 c := cc.MetricDefinitionsClient 133 defs, err := c.List(context.Background(), azResourceURI(c.SubscriptionID, rsg, namespace, resource), namespace) 134 if err != nil { 135 return 136 } 137 if defs.Value == nil { 138 return r, fmt.Errorf("No metric definitions in response") 139 } 140 for _, def := range *defs.Value { 141 agtypes := []string{} 142 for _, x := range *def.SupportedAggregationTypes { 143 agtypes = append(agtypes, fmt.Sprintf("%s", x)) 144 } 145 dims := []string{} 146 if def.Dimensions != nil { 147 for _, x := range *def.Dimensions { 148 dims = append(dims, fmt.Sprintf("%s", *x.Value)) 149 } 150 } 151 fmt.Println(*def.Name.LocalizedValue, strings.Join(dims, ", "), strings.Join(agtypes, ", ")) 152 } 153 return 154 } 155 156 func azureTimeSpan(e *State, sdur, edur string) (span string, err error) { 157 sd, err := opentsdb.ParseDuration(sdur) 158 if err != nil { 159 return 160 } 161 var ed opentsdb.Duration 162 if edur != "" { 163 ed, err = opentsdb.ParseDuration(edur) 164 if err != nil { 165 return 166 } 167 } 168 st := e.now.Add(time.Duration(-sd)).Format(azTimeFmt) 169 en := e.now.Add(time.Duration(-ed)).Format(azTimeFmt) 170 return fmt.Sprintf("%s/%s", st, en), nil 171 } 172 173 // azureQuery queries Azure metrics for time series data based on the resourceUri 174 func azureQuery(prefix string, e *State, metric, tagKeysCSV, rsg, resName, resourceUri, agtype, interval, sdur, edur string) (r *Results, err error) { 175 r = new(Results) 176 // Verify prefix is a defined resource and fetch the collection of clients 177 cc, clientFound := e.Backends.AzureMonitor[prefix] 178 if !clientFound { 179 return r, fmt.Errorf(`azure client with name "%v" not defined`, prefix) 180 } 181 c := cc.MetricsClient 182 r = new(Results) 183 // Parse Relative Time to absolute time 184 timespan, err := azureTimeSpan(e, sdur, edur) 185 if err != nil { 186 return nil, err 187 } 188 189 // Set Dimensions (tag) keys for metrics that support them by building an Azure filter 190 // expression in form of "tagKey eq '*' and tagKey eq ..." 191 // reference: https://docs.microsoft.com/en-us/rest/api/monitor/filter-syntax 192 filter := "" 193 if tagKeysCSV != "" { 194 filters := []string{} 195 tagKeys := strings.Split(tagKeysCSV, ",") 196 for _, k := range tagKeys { 197 filters = append(filters, fmt.Sprintf("%s eq '*'", k)) 198 } 199 filter = strings.Join(filters, " and ") 200 } 201 202 // Set the Interval/Timegrain (Azure metric downsampling) 203 var tg *string 204 if interval != "" { 205 tg = azureIntervalToTimegrain(interval) 206 } 207 208 // Set Azure aggregation method 209 aggLong, err := azureShortAggToLong(agtype) 210 if err != nil { 211 return 212 } 213 cacheKey := strings.Join([]string{metric, filter, resourceUri, aggLong, interval, timespan}, ":") 214 getFn := func() (interface{}, error) { 215 req, err := c.ListPreparer(context.Background(), resourceUri, 216 timespan, 217 tg, 218 metric, 219 aggLong, 220 nil, 221 "", 222 filter, 223 insights.Data, 224 "") 225 if err != nil { 226 return nil, err 227 } 228 var resp insights.Response 229 e.Timer.StepCustomTiming("azure", "query", req.URL.String(), func() { 230 hr, sendErr := c.ListSender(req) 231 if sendErr == nil { 232 resp, err = c.ListResponder(hr) 233 } else { 234 err = sendErr 235 } 236 }) 237 return resp, err 238 } 239 // Get Azure metric values by calling the Azure API or via cache if available 240 val, err, hit := e.Cache.Get(cacheKey, getFn) 241 if err != nil { 242 return r, err 243 } 244 collectCacheHit(e.Cache, "azure_ts", hit) 245 resp := val.(insights.Response) 246 rawReadsRemaining := resp.Header.Get("X-Ms-Ratelimit-Remaining-Subscription-Reads") 247 readsRemaining, err := strconv.ParseInt(rawReadsRemaining, 10, 64) 248 if err != nil { 249 slog.Errorf("failure to parse remaning reads from Azure response") 250 } else { 251 // Since we may be hitting different Azure Resource Manager servers on Azure's side the rate limit 252 // may have a high variance therefore we sample 253 // see https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits 254 collect.Sample("azure.remaining_reads", opentsdb.TagSet{"prefix": prefix}, float64(readsRemaining)) 255 if readsRemaining < 100 { 256 slog.Warningf("less than 100 reads detected for the Azure api on client %v", prefix) 257 } 258 } 259 if resp.Value != nil { 260 for _, tsContainer := range *resp.Value { 261 if tsContainer.Timeseries == nil { 262 continue // If the container doesn't have a time series object then skip 263 } 264 for _, dataContainer := range *tsContainer.Timeseries { 265 if dataContainer.Data == nil { 266 continue // The timeseries has no data in it - then skip 267 } 268 series := make(Series) 269 tags := make(opentsdb.TagSet) 270 tags[azureTagRSG] = rsg 271 tags[azureTagName] = resName 272 // Get the Key/Values that make up the Azure dimension and turn them into tags 273 if dataContainer.Metadatavalues != nil { 274 for _, md := range *dataContainer.Metadatavalues { 275 if md.Name != nil && md.Name.Value != nil && md.Value != nil { 276 tags[*md.Name.Value] = *md.Value 277 } 278 } 279 } 280 for _, mValue := range *dataContainer.Data { 281 // extract the value that corresponds the the request aggregation 282 exValue := azureExtractMetricValue(&mValue, aggLong) 283 if exValue != nil && mValue.TimeStamp != nil { 284 series[mValue.TimeStamp.ToTime()] = *exValue 285 } 286 } 287 if len(series) == 0 { 288 continue // If we end up with an empty series then skip 289 } 290 r.Results = append(r.Results, &Result{ 291 Value: series, 292 Group: tags, 293 }) 294 } 295 } 296 } 297 return r, nil 298 } 299 300 // AzureQuery queries an Azure monitor metric for the given resource and returns a series set tagged by 301 // rsg (resource group), name (resource name), and any tag keys parsed from the tagKeysCSV argument 302 func AzureQuery(prefix string, e *State, namespace, metric, tagKeysCSV, rsg, resName, agtype, interval, sdur, edur string) (r *Results, err error) { 303 r = new(Results) 304 // Verify prefix is a defined resource and fetch the collection of clients 305 cc, clientFound := e.Backends.AzureMonitor[prefix] 306 if !clientFound { 307 return r, fmt.Errorf(`azure client with name "%v" not defined`, prefix) 308 } 309 c := cc.MetricsClient 310 resourceURI := azResourceURI(c.SubscriptionID, rsg, namespace, resName) 311 return azureQuery(prefix, e, metric, tagKeysCSV, rsg, resName, resourceURI, agtype, interval, sdur, edur) 312 } 313 314 // AzureMultiQuery queries multiple Azure resources and returns them as a single result set 315 // It makes one HTTP request per resource and parallelizes the requests 316 func AzureMultiQuery(prefix string, e *State, metric, tagKeysCSV string, resources AzureResources, agtype string, interval, sdur, edur string) (r *Results, err error) { 317 r = new(Results) 318 if resources.Prefix != prefix { 319 return r, fmt.Errorf(`mismatched Azure clients: attempting to use resources from client "%v" on a query with client "%v"`, resources.Prefix, prefix) 320 } 321 nResources := len(resources.Resources) 322 if nResources == 0 { 323 return r, nil 324 } 325 queryResults := []*Results{} 326 var wg sync.WaitGroup 327 // reqCh (Request Channel) is populated with Azure resources, and resources are pulled from channel to make a time series request per resource 328 reqCh := make(chan AzureResource, nResources) 329 // resCh (Result Channel) contains the timeseries responses for requests for resource 330 resCh := make(chan *Results, nResources) 331 // errCh (Error Channel) contains any request errors 332 errCh := make(chan error, nResources) 333 // a worker makes a time series request for a resource 334 worker := func() { 335 for resource := range reqCh { 336 res, err := azureQuery(prefix, e, metric, tagKeysCSV, resource.ResourceGroup, resource.Name, resource.ID, agtype, interval, sdur, edur) 337 resCh <- res 338 errCh <- err 339 } 340 defer wg.Done() 341 } 342 // Create N workers to parallelize multiple requests at once since he resource requires an HTTP request 343 for i := 0; i < e.AzureMonitor[prefix].Concurrency; i++ { 344 wg.Add(1) 345 go worker() 346 } 347 timingString := fmt.Sprintf(`%v queries for metric:"%v" using client "%v"`, nResources, metric, prefix) 348 e.Timer.StepCustomTiming("azure", "query-multi", timingString, func() { 349 // Feed resources into the request channel which the workers will consume 350 for _, resource := range resources.Resources { 351 reqCh <- resource 352 } 353 close(reqCh) 354 wg.Wait() // Wait for all the workers to finish 355 }) 356 close(resCh) 357 close(errCh) 358 359 // Gather errors from the request and return an error if any of the requests failled 360 errors := []string{} 361 for err := range errCh { 362 if err == nil { 363 continue 364 } 365 errors = append(errors, err.Error()) 366 } 367 if len(errors) > 0 { 368 return r, fmt.Errorf(strings.Join(errors, " :: ")) 369 } 370 // Gather all the query results 371 for res := range resCh { 372 queryResults = append(queryResults, res) 373 } 374 if len(queryResults) == 1 { // no need to merge if there is only one item 375 return queryResults[0], nil 376 } 377 // Merge the query results into a single seriesSet 378 r, err = Merge(e, queryResults...) 379 return 380 } 381 382 // azureListResources fetches all resources for the tenant/subscription and caches them for 383 // up to one minute. 384 func azureListResources(prefix string, e *State) (AzureResources, error) { 385 // Cache will only last for one minute. In practice this will only apply for web sessions since a 386 // new cache is created for each check cycle in the cache 387 key := fmt.Sprintf("AzureResourceCache:%s:%s", prefix, time.Now().Truncate(time.Minute*1)) // https://github.com/golang/groupcache/issues/92 388 // getFn is a cacheable function for listing Azure resources 389 getFn := func() (interface{}, error) { 390 r := AzureResources{Prefix: prefix} 391 cc, clientFound := e.Backends.AzureMonitor[prefix] 392 if !clientFound { 393 return r, fmt.Errorf("Azure client with name %v not defined", prefix) 394 } 395 c := cc.ResourcesClient 396 // Page through all resources 397 for rList, err := c.ListComplete(context.Background(), "", "", nil); rList.NotDone(); err = rList.Next() { 398 // TODO not catching auth error here for some reason, err is nil when error!! 399 if err != nil { 400 return r, err 401 } 402 val := rList.Value() 403 if val.Name != nil && val.Type != nil && val.ID != nil { 404 // Extract out the resource group name from the Id 405 splitID := strings.Split(*val.ID, "/") 406 if len(splitID) < 5 { 407 return r, fmt.Errorf("unexpected ID for resource: %s", *val.ID) 408 } 409 // Add Azure tags to the resource 410 azTags := make(map[string]string) 411 for k, v := range val.Tags { 412 if v != nil { 413 azTags[k] = *v 414 } 415 } 416 r.Resources = append(r.Resources, AzureResource{ 417 Name: *val.Name, 418 Type: *val.Type, 419 ResourceGroup: splitID[4], 420 Tags: azTags, 421 ID: *val.ID, 422 }) 423 } 424 } 425 return r, nil 426 } 427 val, err, hit := e.Cache.Get(key, getFn) 428 collectCacheHit(e.Cache, "azure_resource", hit) 429 if err != nil { 430 return AzureResources{}, err 431 } 432 return val.(AzureResources), nil 433 } 434 435 // AzureResourcesByType returns all resources of the specified type 436 // It fetches the complete list resources and then filters them relying on a Cache of that resource list 437 func AzureResourcesByType(prefix string, e *State, tp string) (r *Results, err error) { 438 resources := AzureResources{Prefix: prefix} 439 r = new(Results) 440 allResources, err := azureListResources(prefix, e) 441 if err != nil { 442 return 443 } 444 for _, res := range allResources.Resources { 445 if res.Type == tp { 446 resources.Resources = append(resources.Resources, res) 447 } 448 } 449 r.Results = append(r.Results, &Result{Value: resources}) 450 return 451 } 452 453 // AzureFilterResources filters a list of resources based on the value of the name, resource group 454 // or tags associated with that resource 455 func AzureFilterResources(e *State, resources AzureResources, filter string) (r *Results, err error) { 456 r = new(Results) 457 // Parse the filter once and then apply it to each item in the loop 458 bqf, err := boolq.Parse(filter) 459 if err != nil { 460 return r, err 461 } 462 filteredResources := AzureResources{Prefix: resources.Prefix} 463 for _, res := range resources.Resources { 464 match, err := boolq.AskParsedExpr(bqf, res) 465 if err != nil { 466 return r, err 467 } 468 if match { 469 filteredResources.Resources = append(filteredResources.Resources, res) 470 } 471 } 472 r.Results = append(r.Results, &Result{Value: filteredResources}) 473 return 474 } 475 476 // AzureResource is a container for Azure resource information that Bosun can interact with 477 type AzureResource struct { 478 Name string 479 Type string 480 ResourceGroup string 481 Tags map[string]string 482 ID string 483 } 484 485 // AzureResources is a slice of AzureResource 486 //type AzureResources []AzureResource 487 type AzureResources struct { 488 Resources []AzureResource 489 Prefix string 490 } 491 492 // Get Returns an AzureResource from AzureResources based on the resource type, group, and name 493 // If no matching resource is found, an AzureResource object will be returned but found will be 494 // false. 495 func (resources AzureResources) Get(rType, rsg, name string) (az AzureResource, found bool) { 496 for _, res := range resources.Resources { 497 if res.Type == rType && res.ResourceGroup == rsg && res.Name == name { 498 return res, true 499 } 500 } 501 return 502 } 503 504 // Ask makes an AzureResource a github.com/kylebrandt/boolq Asker, which allows it 505 // to take boolean expressions to create true/false conditions for filtering 506 func (ar AzureResource) Ask(filter string) (bool, error) { 507 sp := strings.SplitN(filter, ":", 2) 508 if len(sp) != 2 { 509 return false, fmt.Errorf("bad filter, filter must be in k:v format, got %v", filter) 510 } 511 key := strings.ToLower(sp[0]) // Make key case insensitive 512 value := sp[1] 513 switch key { 514 case azureTagName: 515 re, err := regexp.Compile(value) 516 if err != nil { 517 return false, err 518 } 519 if re.MatchString(ar.Name) { 520 return true, nil 521 } 522 case azureTagRSG: 523 re, err := regexp.Compile(value) 524 if err != nil { 525 return false, err 526 } 527 if re.MatchString(ar.ResourceGroup) { 528 return true, nil 529 } 530 default: 531 // Does not support tags that have a tag key of rsg, resourcegroup, or name. If it is a problem at some point 532 // we can do something like "\name" to mean the tag "name" if such thing is even allowed 533 if tagV, ok := ar.Tags[key]; ok { 534 re, err := regexp.Compile(value) 535 if err != nil { 536 return false, err 537 } 538 if re.MatchString(tagV) { 539 return true, nil 540 } 541 } 542 543 } 544 return false, nil 545 } 546 547 // AzureMonitorClientCollection is a collection of Azure SDK clients since 548 // the SDK provides different clients to access different sorts of resources 549 type AzureMonitorClientCollection struct { 550 MetricsClient insights.MetricsClient 551 MetricDefinitionsClient insights.MetricDefinitionsClient 552 ResourcesClient resources.Client 553 AIComponentsClient ainsightsmgmt.ComponentsClient 554 AIMetricsClient ainsights.MetricsClient 555 Concurrency int 556 TenantId string 557 } 558 559 // AzureMonitorClients is map of all the AzureMonitorClientCollections that 560 // have been configured. This is so multiple subscription/tenant/clients 561 // can be queries from the same Bosun instance using the prefix syntax 562 type AzureMonitorClients map[string]AzureMonitorClientCollection 563 564 // AzureExtractMetricValue is a helper for fetching the value of the requested 565 // aggregation for the metric 566 func azureExtractMetricValue(mv *insights.MetricValue, field string) (v *float64) { 567 switch field { 568 case string(insights.Average), "": 569 v = mv.Average 570 case string(insights.Minimum): 571 v = mv.Minimum 572 case string(insights.Maximum): 573 v = mv.Maximum 574 case string(insights.Total): 575 v = mv.Total 576 } 577 return 578 } 579 580 // azureShortAggToLong coverts bosun style names for aggregations (like the reduction functions) 581 // to the string that is expected for Azure queries 582 func azureShortAggToLong(agtype string) (string, error) { 583 switch agtype { 584 case "avg", "": 585 return string(insights.Average), nil 586 case "min": 587 return string(insights.Minimum), nil 588 case "max": 589 return string(insights.Maximum), nil 590 case "total": 591 return string(insights.Total), nil 592 case "count": 593 return string(insights.Count), nil 594 } 595 return "", fmt.Errorf("unrecognized aggregation type %s, must be avg, min, max, or total", agtype) 596 } 597 598 // azureIntervalToTimegrain adds a PT prefix and upper cases the argument to 599 // make the string in the format of Azure Timegrain 600 func azureIntervalToTimegrain(s string) *string { 601 tg := fmt.Sprintf("PT%v", strings.ToUpper(s)) 602 return &tg 603 } 604 605 func init() { 606 metadata.AddMetricMeta("bosun.azure.remaining_reads", metadata.Gauge, metadata.Operation, 607 "A sampling of the number of remaining reads to the Azure API before being ratelimited.") 608 } 609 610 const ( 611 // constants for tag keys 612 azureTagName = "name" 613 azureTagRSG = "rsg" 614 )