github.com/thanos-io/thanos@v0.32.5/pkg/query/endpointset.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package query 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 "math" 11 "sort" 12 "sync" 13 "time" 14 "unicode/utf8" 15 16 "github.com/thanos-io/thanos/pkg/api/query/querypb" 17 18 "github.com/go-kit/log" 19 "github.com/go-kit/log/level" 20 "github.com/pkg/errors" 21 "github.com/prometheus/client_golang/prometheus" 22 "github.com/prometheus/prometheus/model/labels" 23 "google.golang.org/grpc" 24 25 "github.com/thanos-io/thanos/pkg/component" 26 "github.com/thanos-io/thanos/pkg/exemplars/exemplarspb" 27 "github.com/thanos-io/thanos/pkg/info/infopb" 28 "github.com/thanos-io/thanos/pkg/metadata/metadatapb" 29 "github.com/thanos-io/thanos/pkg/rules/rulespb" 30 "github.com/thanos-io/thanos/pkg/runutil" 31 "github.com/thanos-io/thanos/pkg/store" 32 "github.com/thanos-io/thanos/pkg/store/labelpb" 33 "github.com/thanos-io/thanos/pkg/store/storepb" 34 "github.com/thanos-io/thanos/pkg/targets/targetspb" 35 ) 36 37 const ( 38 unhealthyEndpointMessage = "removing endpoint because it's unhealthy or does not exist" 39 noMetadataEndpointMessage = "cannot obtain metadata: neither info nor store client found" 40 ) 41 42 type queryConnMetricLabel string 43 44 const ( 45 ExternalLabels queryConnMetricLabel = "external_labels" 46 StoreType queryConnMetricLabel = "store_type" 47 ) 48 49 type GRPCEndpointSpec struct { 50 addr string 51 isStrictStatic bool 52 dialOpts []grpc.DialOption 53 } 54 55 const externalLabelLimit = 1000 56 57 // NewGRPCEndpointSpec creates gRPC endpoint spec. 58 // It uses InfoAPI to get Metadata. 59 func NewGRPCEndpointSpec(addr string, isStrictStatic bool, dialOpts ...grpc.DialOption) *GRPCEndpointSpec { 60 return &GRPCEndpointSpec{ 61 addr: addr, 62 isStrictStatic: isStrictStatic, 63 dialOpts: dialOpts, 64 } 65 } 66 67 func (es *GRPCEndpointSpec) Addr() string { 68 // API address should not change between state changes. 69 return es.addr 70 } 71 72 // Metadata method for gRPC endpoint tries to call InfoAPI exposed by Thanos components until context timeout. If we are unable to get metadata after 73 // that time, we assume that the host is unhealthy and return error. 74 func (es *endpointRef) Metadata(ctx context.Context, infoClient infopb.InfoClient, storeClient storepb.StoreClient) (*endpointMetadata, error) { 75 if infoClient != nil { 76 resp, err := infoClient.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true)) 77 if err == nil { 78 return &endpointMetadata{resp}, nil 79 } 80 } 81 82 // Call Info method of StoreAPI, this way querier will be able to discovery old components not exposing InfoAPI. 83 if storeClient != nil { 84 metadata, err := es.getMetadataUsingStoreAPI(ctx, storeClient) 85 if err != nil { 86 return nil, errors.Wrapf(err, "fallback fetching info from %s", es.addr) 87 } 88 return metadata, nil 89 } 90 91 return nil, errors.New(noMetadataEndpointMessage) 92 } 93 94 func (es *endpointRef) getMetadataUsingStoreAPI(ctx context.Context, client storepb.StoreClient) (*endpointMetadata, error) { 95 resp, err := client.Info(ctx, &storepb.InfoRequest{}) 96 if err != nil { 97 return nil, err 98 } 99 100 infoResp := fillExpectedAPIs(component.FromProto(resp.StoreType), resp.MinTime, resp.MaxTime) 101 infoResp.LabelSets = resp.LabelSets 102 infoResp.ComponentType = component.FromProto(resp.StoreType).String() 103 104 return &endpointMetadata{ 105 &infoResp, 106 }, nil 107 } 108 109 func fillExpectedAPIs(componentType component.Component, mintime, maxTime int64) infopb.InfoResponse { 110 switch componentType { 111 case component.Sidecar: 112 return infopb.InfoResponse{ 113 Store: &infopb.StoreInfo{ 114 MinTime: mintime, 115 MaxTime: maxTime, 116 }, 117 Rules: &infopb.RulesInfo{}, 118 Targets: &infopb.TargetsInfo{}, 119 MetricMetadata: &infopb.MetricMetadataInfo{}, 120 Exemplars: &infopb.ExemplarsInfo{}, 121 } 122 case component.Query: 123 { 124 return infopb.InfoResponse{ 125 Store: &infopb.StoreInfo{ 126 MinTime: mintime, 127 MaxTime: maxTime, 128 }, 129 Rules: &infopb.RulesInfo{}, 130 Targets: &infopb.TargetsInfo{}, 131 MetricMetadata: &infopb.MetricMetadataInfo{}, 132 Exemplars: &infopb.ExemplarsInfo{}, 133 Query: &infopb.QueryAPIInfo{}, 134 } 135 } 136 case component.Receive: 137 { 138 return infopb.InfoResponse{ 139 Store: &infopb.StoreInfo{ 140 MinTime: mintime, 141 MaxTime: maxTime, 142 }, 143 Exemplars: &infopb.ExemplarsInfo{}, 144 } 145 } 146 case component.Store: 147 return infopb.InfoResponse{ 148 Store: &infopb.StoreInfo{ 149 MinTime: mintime, 150 MaxTime: maxTime, 151 }, 152 } 153 case component.Rule: 154 return infopb.InfoResponse{ 155 Store: &infopb.StoreInfo{ 156 MinTime: mintime, 157 MaxTime: maxTime, 158 }, 159 Rules: &infopb.RulesInfo{}, 160 } 161 default: 162 return infopb.InfoResponse{} 163 } 164 } 165 166 // stringError forces the error to be a string 167 // when marshaled into a JSON. 168 type stringError struct { 169 originalErr error 170 } 171 172 // MarshalJSON marshals the error into a string form. 173 func (e *stringError) MarshalJSON() ([]byte, error) { 174 return json.Marshal(e.originalErr.Error()) 175 } 176 177 // Error returns the original underlying error. 178 func (e *stringError) Error() string { 179 return e.originalErr.Error() 180 } 181 182 type EndpointStatus struct { 183 Name string `json:"name"` 184 LastCheck time.Time `json:"lastCheck"` 185 LastError *stringError `json:"lastError"` 186 LabelSets []labels.Labels `json:"labelSets"` 187 ComponentType component.Component `json:"-"` 188 MinTime int64 `json:"minTime"` 189 MaxTime int64 `json:"maxTime"` 190 } 191 192 // endpointSetNodeCollector is a metric collector reporting the number of available storeAPIs for Querier. 193 // A Collector is required as we want atomic updates for all 'thanos_store_nodes_grpc_connections' series. 194 // TODO(hitanshu-mehta) Currently,only collecting metrics of storeEndpoints. Make this struct generic. 195 type endpointSetNodeCollector struct { 196 mtx sync.Mutex 197 storeNodes map[component.Component]map[string]int 198 storePerExtLset map[string]int 199 200 connectionsDesc *prometheus.Desc 201 labels []string 202 } 203 204 func newEndpointSetNodeCollector(labels ...string) *endpointSetNodeCollector { 205 if len(labels) == 0 { 206 labels = []string{string(ExternalLabels), string(StoreType)} 207 } 208 return &endpointSetNodeCollector{ 209 storeNodes: map[component.Component]map[string]int{}, 210 connectionsDesc: prometheus.NewDesc( 211 "thanos_store_nodes_grpc_connections", 212 "Number of gRPC connection to Store APIs. Opened connection means healthy store APIs available for Querier.", 213 labels, nil, 214 ), 215 labels: labels, 216 } 217 } 218 219 // truncateExtLabels truncates the stringify external labels with the format of {labels..}. 220 func truncateExtLabels(s string, threshold int) string { 221 if len(s) > threshold { 222 for cut := 1; cut < 4; cut++ { 223 for cap := 1; cap < 4; cap++ { 224 if utf8.ValidString(s[threshold-cut-cap : threshold-cut]) { 225 return fmt.Sprintf("%s}", s[:threshold-cut]) 226 } 227 } 228 } 229 } 230 return s 231 } 232 func (c *endpointSetNodeCollector) Update(nodes map[component.Component]map[string]int) { 233 storeNodes := make(map[component.Component]map[string]int, len(nodes)) 234 storePerExtLset := map[string]int{} 235 236 for storeType, occurrencesPerExtLset := range nodes { 237 storeNodes[storeType] = make(map[string]int, len(occurrencesPerExtLset)) 238 for externalLabels, occurrences := range occurrencesPerExtLset { 239 externalLabels = truncateExtLabels(externalLabels, externalLabelLimit) 240 storePerExtLset[externalLabels] += occurrences 241 storeNodes[storeType][externalLabels] = occurrences 242 } 243 } 244 245 c.mtx.Lock() 246 defer c.mtx.Unlock() 247 c.storeNodes = storeNodes 248 c.storePerExtLset = storePerExtLset 249 } 250 251 func (c *endpointSetNodeCollector) Describe(ch chan<- *prometheus.Desc) { 252 ch <- c.connectionsDesc 253 } 254 255 func (c *endpointSetNodeCollector) Collect(ch chan<- prometheus.Metric) { 256 c.mtx.Lock() 257 defer c.mtx.Unlock() 258 259 for storeType, occurrencesPerExtLset := range c.storeNodes { 260 for externalLabels, occurrences := range occurrencesPerExtLset { 261 var storeTypeStr string 262 if storeType != nil { 263 storeTypeStr = storeType.String() 264 } 265 // Select only required labels. 266 lbls := []string{} 267 for _, lbl := range c.labels { 268 switch lbl { 269 case string(ExternalLabels): 270 lbls = append(lbls, externalLabels) 271 case string(StoreType): 272 lbls = append(lbls, storeTypeStr) 273 } 274 } 275 ch <- prometheus.MustNewConstMetric(c.connectionsDesc, prometheus.GaugeValue, float64(occurrences), lbls...) 276 } 277 } 278 } 279 280 // EndpointSet maintains a set of active Thanos endpoints. It is backed up by Endpoint Specifications that are dynamically fetched on 281 // every Update() call. 282 type EndpointSet struct { 283 now nowFunc 284 logger log.Logger 285 286 // Endpoint specifications can change dynamically. If some component is missing from the list, we assume it is no longer 287 // accessible and we close gRPC client for it, unless it is strict. 288 endpointSpec func() map[string]*GRPCEndpointSpec 289 dialOpts []grpc.DialOption 290 endpointInfoTimeout time.Duration 291 unhealthyEndpointTimeout time.Duration 292 293 updateMtx sync.Mutex 294 295 endpointsMtx sync.RWMutex 296 endpoints map[string]*endpointRef 297 endpointsMetric *endpointSetNodeCollector 298 } 299 300 // nowFunc is a function that returns time.Time. 301 // Test code can inject a function through which 302 // time can be modified before updating the EndpointSet. 303 // Production code can use time.Time. 304 type nowFunc func() time.Time 305 306 // NewEndpointSet returns a new set of Thanos APIs. 307 func NewEndpointSet( 308 now nowFunc, 309 logger log.Logger, 310 reg *prometheus.Registry, 311 endpointSpecs func() []*GRPCEndpointSpec, 312 dialOpts []grpc.DialOption, 313 unhealthyEndpointTimeout time.Duration, 314 endpointInfoTimeout time.Duration, 315 endpointMetricLabels ...string, 316 ) *EndpointSet { 317 endpointsMetric := newEndpointSetNodeCollector(endpointMetricLabels...) 318 if reg != nil { 319 reg.MustRegister(endpointsMetric) 320 } 321 322 if logger == nil { 323 logger = log.NewNopLogger() 324 } 325 326 if endpointSpecs == nil { 327 endpointSpecs = func() []*GRPCEndpointSpec { return nil } 328 } 329 330 return &EndpointSet{ 331 now: now, 332 logger: log.With(logger, "component", "endpointset"), 333 endpointsMetric: endpointsMetric, 334 335 dialOpts: dialOpts, 336 endpointInfoTimeout: endpointInfoTimeout, 337 unhealthyEndpointTimeout: unhealthyEndpointTimeout, 338 endpointSpec: func() map[string]*GRPCEndpointSpec { 339 specs := make(map[string]*GRPCEndpointSpec) 340 for _, s := range endpointSpecs() { 341 specs[s.addr] = s 342 } 343 return specs 344 }, 345 endpoints: make(map[string]*endpointRef), 346 } 347 } 348 349 // Update updates the endpoint set. It fetches current list of endpoint specs from function and updates the fresh metadata 350 // from all endpoints. Keeps around statically defined nodes that were defined with the strict mode. 351 func (e *EndpointSet) Update(ctx context.Context) { 352 e.updateMtx.Lock() 353 defer e.updateMtx.Unlock() 354 level.Debug(e.logger).Log("msg", "starting to update API endpoints", "cachedEndpoints", len(e.endpoints)) 355 356 var ( 357 newRefs = make(map[string]*endpointRef) 358 existingRefs = make(map[string]*endpointRef) 359 staleRefs = make(map[string]*endpointRef) 360 361 wg sync.WaitGroup 362 mu sync.Mutex 363 ) 364 365 for _, spec := range e.endpointSpec() { 366 spec := spec 367 368 if er, existingRef := e.endpoints[spec.Addr()]; existingRef { 369 wg.Add(1) 370 go func(spec *GRPCEndpointSpec) { 371 defer wg.Done() 372 ctx, cancel := context.WithTimeout(ctx, e.endpointInfoTimeout) 373 defer cancel() 374 e.updateEndpoint(ctx, spec, er) 375 376 mu.Lock() 377 defer mu.Unlock() 378 existingRefs[spec.Addr()] = er 379 }(spec) 380 381 continue 382 } 383 384 wg.Add(1) 385 go func(spec *GRPCEndpointSpec) { 386 defer wg.Done() 387 ctx, cancel := context.WithTimeout(ctx, e.endpointInfoTimeout) 388 defer cancel() 389 390 newRef, err := e.newEndpointRef(ctx, spec) 391 if err != nil { 392 level.Warn(e.logger).Log("msg", "new endpoint creation failed", "err", err, "address", spec.Addr()) 393 return 394 } 395 396 e.updateEndpoint(ctx, spec, newRef) 397 if !newRef.isQueryable() { 398 newRef.Close() 399 return 400 } 401 402 mu.Lock() 403 defer mu.Unlock() 404 newRefs[spec.Addr()] = newRef 405 }(spec) 406 } 407 wg.Wait() 408 409 timedOutRefs := e.getTimedOutRefs() 410 e.endpointsMtx.RLock() 411 for addr, er := range e.endpoints { 412 _, isNew := newRefs[addr] 413 _, isExisting := existingRefs[addr] 414 _, isTimedOut := timedOutRefs[addr] 415 if !isNew && !isExisting || isTimedOut { 416 staleRefs[addr] = er 417 } 418 } 419 e.endpointsMtx.RUnlock() 420 421 e.endpointsMtx.Lock() 422 defer e.endpointsMtx.Unlock() 423 for addr, er := range newRefs { 424 extLset := labelpb.PromLabelSetsToString(er.LabelSets()) 425 level.Info(e.logger).Log("msg", fmt.Sprintf("adding new %v with %+v", er.ComponentType(), er.apisPresent()), "address", addr, "extLset", extLset) 426 e.endpoints[addr] = er 427 } 428 for addr, er := range staleRefs { 429 level.Info(er.logger).Log("msg", unhealthyEndpointMessage, "address", er.addr, "extLset", labelpb.PromLabelSetsToString(er.LabelSets())) 430 er.Close() 431 delete(e.endpoints, addr) 432 } 433 level.Debug(e.logger).Log("msg", "updated endpoints", "activeEndpoints", len(e.endpoints)) 434 435 // Update stats. 436 stats := newEndpointAPIStats() 437 for addr, er := range e.endpoints { 438 if !er.isQueryable() { 439 continue 440 } 441 442 extLset := labelpb.PromLabelSetsToString(er.LabelSets()) 443 444 // All producers that expose StoreAPI should have unique external labels. Check all which connect to our Querier. 445 if er.HasStoreAPI() && (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule) && 446 stats[component.Sidecar][extLset]+stats[component.Rule][extLset] > 0 { 447 448 level.Warn(e.logger).Log("msg", "found duplicate storeEndpoints producer (sidecar or ruler). This is not advices as it will malform data in in the same bucket", 449 "address", addr, "extLset", extLset, "duplicates", fmt.Sprintf("%v", stats[component.Sidecar][extLset]+stats[component.Rule][extLset]+1)) 450 } 451 stats[er.ComponentType()][extLset]++ 452 } 453 454 e.endpointsMetric.Update(stats) 455 } 456 457 func (e *EndpointSet) updateEndpoint(ctx context.Context, spec *GRPCEndpointSpec, er *endpointRef) { 458 metadata, err := er.Metadata(ctx, infopb.NewInfoClient(er.cc), storepb.NewStoreClient(er.cc)) 459 if err != nil { 460 level.Warn(e.logger).Log("msg", "update of endpoint failed", "err", errors.Wrap(err, "getting metadata"), "address", spec.Addr()) 461 } 462 er.update(e.now, metadata, err) 463 } 464 465 // getTimedOutRefs returns unhealthy endpoints for which the last 466 // successful health check is older than the unhealthyEndpointTimeout. 467 // Strict endpoints are never considered as timed out. 468 func (e *EndpointSet) getTimedOutRefs() map[string]*endpointRef { 469 e.endpointsMtx.RLock() 470 defer e.endpointsMtx.RUnlock() 471 result := make(map[string]*endpointRef) 472 473 endpoints := e.endpoints 474 now := e.now() 475 for _, er := range endpoints { 476 if er.isStrict { 477 continue 478 } 479 480 if now.Sub(er.created) < e.unhealthyEndpointTimeout { 481 continue 482 } 483 484 er.mtx.RLock() 485 lastCheck := er.status.LastCheck 486 er.mtx.RUnlock() 487 488 if now.Sub(lastCheck) >= e.unhealthyEndpointTimeout { 489 result[er.addr] = er 490 } 491 } 492 493 return result 494 } 495 496 func (e *EndpointSet) getQueryableRefs() map[string]*endpointRef { 497 e.endpointsMtx.RLock() 498 defer e.endpointsMtx.RUnlock() 499 500 endpoints := make(map[string]*endpointRef) 501 for addr, er := range e.endpoints { 502 if er.isQueryable() { 503 endpoints[addr] = er 504 } 505 } 506 507 return endpoints 508 } 509 510 // GetStoreClients returns a list of all active stores. 511 func (e *EndpointSet) GetStoreClients() []store.Client { 512 endpoints := e.getQueryableRefs() 513 514 stores := make([]store.Client, 0, len(endpoints)) 515 for _, er := range endpoints { 516 if er.HasStoreAPI() { 517 er.mtx.RLock() 518 // Make a new endpointRef with store client. 519 stores = append(stores, &endpointRef{ 520 StoreClient: storepb.NewStoreClient(er.cc), 521 addr: er.addr, 522 metadata: er.metadata, 523 }) 524 er.mtx.RUnlock() 525 } 526 } 527 return stores 528 } 529 530 // GetQueryAPIClients returns a list of all active query API clients. 531 func (e *EndpointSet) GetQueryAPIClients() []Client { 532 endpoints := e.getQueryableRefs() 533 534 queryClients := make([]Client, 0, len(endpoints)) 535 for _, er := range endpoints { 536 if er.HasQueryAPI() { 537 client := querypb.NewQueryClient(er.cc) 538 queryClients = append(queryClients, NewClient(client, er.addr, er.TSDBInfos())) 539 } 540 } 541 return queryClients 542 } 543 544 // GetRulesClients returns a list of all active rules clients. 545 func (e *EndpointSet) GetRulesClients() []rulespb.RulesClient { 546 endpoints := e.getQueryableRefs() 547 548 rules := make([]rulespb.RulesClient, 0, len(endpoints)) 549 for _, er := range endpoints { 550 if er.HasRulesAPI() { 551 rules = append(rules, rulespb.NewRulesClient(er.cc)) 552 } 553 } 554 return rules 555 } 556 557 // GetTargetsClients returns a list of all active targets clients. 558 func (e *EndpointSet) GetTargetsClients() []targetspb.TargetsClient { 559 endpoints := e.getQueryableRefs() 560 561 targets := make([]targetspb.TargetsClient, 0, len(endpoints)) 562 for _, er := range endpoints { 563 if er.HasTargetsAPI() { 564 targets = append(targets, targetspb.NewTargetsClient(er.cc)) 565 } 566 } 567 return targets 568 } 569 570 // GetMetricMetadataClients returns a list of all active metadata clients. 571 func (e *EndpointSet) GetMetricMetadataClients() []metadatapb.MetadataClient { 572 endpoints := e.getQueryableRefs() 573 574 metadataClients := make([]metadatapb.MetadataClient, 0, len(endpoints)) 575 for _, er := range endpoints { 576 if er.HasMetricMetadataAPI() { 577 metadataClients = append(metadataClients, metadatapb.NewMetadataClient(er.cc)) 578 } 579 } 580 return metadataClients 581 } 582 583 // GetExemplarsStores returns a list of all active exemplars stores. 584 func (e *EndpointSet) GetExemplarsStores() []*exemplarspb.ExemplarStore { 585 endpoints := e.getQueryableRefs() 586 587 exemplarStores := make([]*exemplarspb.ExemplarStore, 0, len(endpoints)) 588 for _, er := range endpoints { 589 if er.HasExemplarsAPI() { 590 exemplarStores = append(exemplarStores, &exemplarspb.ExemplarStore{ 591 ExemplarsClient: exemplarspb.NewExemplarsClient(er.cc), 592 LabelSets: labelpb.ZLabelSetsToPromLabelSets(er.metadata.LabelSets...), 593 }) 594 } 595 } 596 return exemplarStores 597 } 598 599 func (e *EndpointSet) Close() { 600 e.endpointsMtx.Lock() 601 defer e.endpointsMtx.Unlock() 602 603 for _, ef := range e.endpoints { 604 ef.Close() 605 } 606 e.endpoints = map[string]*endpointRef{} 607 } 608 609 func (e *EndpointSet) GetEndpointStatus() []EndpointStatus { 610 e.endpointsMtx.RLock() 611 defer e.endpointsMtx.RUnlock() 612 613 statuses := make([]EndpointStatus, 0, len(e.endpoints)) 614 for _, v := range e.endpoints { 615 v.mtx.RLock() 616 defer v.mtx.RUnlock() 617 618 status := v.status 619 if status != nil { 620 statuses = append(statuses, *status) 621 } 622 } 623 624 sort.Slice(statuses, func(i, j int) bool { 625 return statuses[i].Name < statuses[j].Name 626 }) 627 return statuses 628 } 629 630 type endpointRef struct { 631 storepb.StoreClient 632 633 mtx sync.RWMutex 634 cc *grpc.ClientConn 635 addr string 636 isStrict bool 637 638 created time.Time 639 metadata *endpointMetadata 640 status *EndpointStatus 641 642 logger log.Logger 643 } 644 645 // newEndpointRef creates a new endpointRef with a gRPC channel to the given the IP address. 646 // The call to newEndpointRef will return an error if establishing the channel fails. 647 func (e *EndpointSet) newEndpointRef(ctx context.Context, spec *GRPCEndpointSpec) (*endpointRef, error) { 648 var dialOpts []grpc.DialOption 649 650 dialOpts = append(dialOpts, e.dialOpts...) 651 dialOpts = append(dialOpts, spec.dialOpts...) 652 // By default DialContext is non-blocking which means that any connection 653 // failure won't be reported/logged. Instead block until the connection is 654 // successfully established and return the details of the connection error 655 // if any. 656 dialOpts = append(dialOpts, grpc.WithReturnConnectionError()) 657 conn, err := grpc.DialContext(ctx, spec.Addr(), dialOpts...) 658 if err != nil { 659 return nil, errors.Wrap(err, "dialing connection") 660 } 661 662 return &endpointRef{ 663 logger: e.logger, 664 created: e.now(), 665 addr: spec.Addr(), 666 isStrict: spec.isStrictStatic, 667 cc: conn, 668 }, nil 669 } 670 671 // update sets the metadata and status of the endpoint ref based on the info response value and error. 672 func (er *endpointRef) update(now nowFunc, metadata *endpointMetadata, err error) { 673 er.mtx.Lock() 674 defer er.mtx.Unlock() 675 676 er.updateMetadata(metadata, err) 677 er.updateStatus(now, err) 678 } 679 680 // updateStatus updates the endpointRef status based on the info call error. 681 func (er *endpointRef) updateStatus(now nowFunc, err error) { 682 mint, maxt := er.timeRange() 683 if er.status == nil { 684 er.status = &EndpointStatus{Name: er.addr} 685 } 686 687 if err == nil { 688 er.status.LastCheck = now() 689 er.status.LabelSets = er.labelSets() 690 er.status.ComponentType = er.componentType() 691 er.status.MinTime = mint 692 er.status.MaxTime = maxt 693 er.status.LastError = nil 694 } else { 695 er.status.LastError = &stringError{originalErr: err} 696 } 697 } 698 699 // updateMetadata sets the metadata for an endpoint ref based on the info call result and the info call error. 700 // When an info call for an endpoint fails, we preserve metadata from the previous state. 701 // If the is new and has no previous state, we assume it is a Store covering the complete time range. 702 func (er *endpointRef) updateMetadata(metadata *endpointMetadata, err error) { 703 if err == nil { 704 er.metadata = metadata 705 } 706 707 if err != nil && er.metadata == nil { 708 er.metadata = maxRangeStoreMetadata() 709 } 710 } 711 712 // isQueryable returns true if an endpointRef should be used for querying. 713 // A strict endpointRef is always queriable. A non-strict endpointRef 714 // is queryable if the last health check (info call) succeeded. 715 func (er *endpointRef) isQueryable() bool { 716 er.mtx.RLock() 717 defer er.mtx.RUnlock() 718 719 return er.isStrict || er.status.LastError == nil 720 } 721 722 func (er *endpointRef) ComponentType() component.Component { 723 er.mtx.RLock() 724 defer er.mtx.RUnlock() 725 726 return er.componentType() 727 } 728 729 func (er *endpointRef) componentType() component.Component { 730 if er.metadata == nil { 731 return component.UnknownStoreAPI 732 } 733 734 return component.FromString(er.metadata.ComponentType) 735 } 736 737 func (er *endpointRef) HasStoreAPI() bool { 738 er.mtx.RLock() 739 defer er.mtx.RUnlock() 740 741 return er.metadata != nil && er.metadata.Store != nil 742 } 743 744 func (er *endpointRef) HasQueryAPI() bool { 745 er.mtx.RLock() 746 defer er.mtx.RUnlock() 747 748 return er.metadata != nil && er.metadata.Query != nil 749 } 750 751 func (er *endpointRef) HasRulesAPI() bool { 752 er.mtx.RLock() 753 defer er.mtx.RUnlock() 754 755 return er.metadata != nil && er.metadata.Rules != nil 756 } 757 758 func (er *endpointRef) HasTargetsAPI() bool { 759 er.mtx.RLock() 760 defer er.mtx.RUnlock() 761 762 return er.metadata != nil && er.metadata.Targets != nil 763 } 764 765 func (er *endpointRef) HasMetricMetadataAPI() bool { 766 er.mtx.RLock() 767 defer er.mtx.RUnlock() 768 769 return er.metadata != nil && er.metadata.MetricMetadata != nil 770 } 771 772 func (er *endpointRef) HasExemplarsAPI() bool { 773 er.mtx.RLock() 774 defer er.mtx.RUnlock() 775 776 return er.metadata != nil && er.metadata.Exemplars != nil 777 } 778 779 func (er *endpointRef) LabelSets() []labels.Labels { 780 er.mtx.RLock() 781 defer er.mtx.RUnlock() 782 783 return er.labelSets() 784 } 785 786 func (er *endpointRef) labelSets() []labels.Labels { 787 if er.metadata == nil { 788 return make([]labels.Labels, 0) 789 } 790 791 labelSet := make([]labels.Labels, 0, len(er.metadata.LabelSets)) 792 for _, ls := range labelpb.ZLabelSetsToPromLabelSets(er.metadata.LabelSets...) { 793 if len(ls) == 0 { 794 continue 795 } 796 // Compatibility label for Queriers pre 0.8.1. Filter it out now. 797 if ls[0].Name == store.CompatibilityTypeLabelName { 798 continue 799 } 800 labelSet = append(labelSet, ls.Copy()) 801 } 802 return labelSet 803 } 804 805 func (er *endpointRef) TimeRange() (mint, maxt int64) { 806 er.mtx.RLock() 807 defer er.mtx.RUnlock() 808 809 return er.timeRange() 810 } 811 812 func (er *endpointRef) TSDBInfos() []infopb.TSDBInfo { 813 er.mtx.RLock() 814 defer er.mtx.RUnlock() 815 816 if er.metadata == nil || er.metadata.Store == nil { 817 return nil 818 } 819 820 // Currently, min/max time of only StoreAPI is being updated by all components. 821 return er.metadata.Store.TsdbInfos 822 } 823 824 func (er *endpointRef) timeRange() (int64, int64) { 825 if er.metadata == nil || er.metadata.Store == nil { 826 return math.MinInt64, math.MaxInt64 827 } 828 829 // Currently, min/max time of only StoreAPI is being updated by all components. 830 return er.metadata.Store.MinTime, er.metadata.Store.MaxTime 831 } 832 833 func (er *endpointRef) SupportsSharding() bool { 834 er.mtx.RLock() 835 defer er.mtx.RUnlock() 836 837 if er.metadata == nil || er.metadata.Store == nil { 838 return false 839 } 840 841 return er.metadata.Store.SupportsSharding 842 } 843 844 func (er *endpointRef) SupportsWithoutReplicaLabels() bool { 845 er.mtx.RLock() 846 defer er.mtx.RUnlock() 847 848 if er.metadata == nil || er.metadata.Store == nil { 849 return false 850 } 851 852 return er.metadata.Store.SupportsWithoutReplicaLabels 853 } 854 855 func (er *endpointRef) String() string { 856 mint, maxt := er.TimeRange() 857 return fmt.Sprintf( 858 "Addr: %s LabelSets: %v MinTime: %d MaxTime: %d", 859 er.addr, labelpb.PromLabelSetsToString(er.LabelSets()), mint, maxt, 860 ) 861 } 862 863 func (er *endpointRef) Addr() (string, bool) { 864 return er.addr, false 865 } 866 867 func (er *endpointRef) Close() { 868 runutil.CloseWithLogOnErr(er.logger, er.cc, fmt.Sprintf("endpoint %v connection closed", er.addr)) 869 } 870 871 func (er *endpointRef) apisPresent() []string { 872 var apisPresent []string 873 874 if er.HasStoreAPI() { 875 apisPresent = append(apisPresent, "storeEndpoints") 876 } 877 878 if er.HasRulesAPI() { 879 apisPresent = append(apisPresent, "rulesAPI") 880 } 881 882 if er.HasExemplarsAPI() { 883 apisPresent = append(apisPresent, "exemplarsAPI") 884 } 885 886 if er.HasTargetsAPI() { 887 apisPresent = append(apisPresent, "targetsAPI") 888 } 889 890 if er.HasMetricMetadataAPI() { 891 apisPresent = append(apisPresent, "MetricMetadataAPI") 892 } 893 894 if er.HasQueryAPI() { 895 apisPresent = append(apisPresent, "QueryAPI") 896 } 897 898 return apisPresent 899 } 900 901 type endpointMetadata struct { 902 *infopb.InfoResponse 903 } 904 905 func newEndpointAPIStats() map[component.Component]map[string]int { 906 nodes := make(map[component.Component]map[string]int, len(storepb.StoreType_name)) 907 for i := range storepb.StoreType_name { 908 nodes[component.FromProto(storepb.StoreType(i))] = map[string]int{} 909 } 910 return nodes 911 } 912 913 func maxRangeStoreMetadata() *endpointMetadata { 914 return &endpointMetadata{ 915 InfoResponse: &infopb.InfoResponse{ 916 Store: &infopb.StoreInfo{ 917 MinTime: math.MinInt64, 918 MaxTime: math.MaxInt64, 919 }, 920 }, 921 } 922 }