github.com/thanos-io/thanos@v0.32.5/pkg/query/endpointset.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package query
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"fmt"
    10  	"math"
    11  	"sort"
    12  	"sync"
    13  	"time"
    14  	"unicode/utf8"
    15  
    16  	"github.com/thanos-io/thanos/pkg/api/query/querypb"
    17  
    18  	"github.com/go-kit/log"
    19  	"github.com/go-kit/log/level"
    20  	"github.com/pkg/errors"
    21  	"github.com/prometheus/client_golang/prometheus"
    22  	"github.com/prometheus/prometheus/model/labels"
    23  	"google.golang.org/grpc"
    24  
    25  	"github.com/thanos-io/thanos/pkg/component"
    26  	"github.com/thanos-io/thanos/pkg/exemplars/exemplarspb"
    27  	"github.com/thanos-io/thanos/pkg/info/infopb"
    28  	"github.com/thanos-io/thanos/pkg/metadata/metadatapb"
    29  	"github.com/thanos-io/thanos/pkg/rules/rulespb"
    30  	"github.com/thanos-io/thanos/pkg/runutil"
    31  	"github.com/thanos-io/thanos/pkg/store"
    32  	"github.com/thanos-io/thanos/pkg/store/labelpb"
    33  	"github.com/thanos-io/thanos/pkg/store/storepb"
    34  	"github.com/thanos-io/thanos/pkg/targets/targetspb"
    35  )
    36  
    37  const (
    38  	unhealthyEndpointMessage  = "removing endpoint because it's unhealthy or does not exist"
    39  	noMetadataEndpointMessage = "cannot obtain metadata: neither info nor store client found"
    40  )
    41  
    42  type queryConnMetricLabel string
    43  
    44  const (
    45  	ExternalLabels queryConnMetricLabel = "external_labels"
    46  	StoreType      queryConnMetricLabel = "store_type"
    47  )
    48  
    49  type GRPCEndpointSpec struct {
    50  	addr           string
    51  	isStrictStatic bool
    52  	dialOpts       []grpc.DialOption
    53  }
    54  
    55  const externalLabelLimit = 1000
    56  
    57  // NewGRPCEndpointSpec creates gRPC endpoint spec.
    58  // It uses InfoAPI to get Metadata.
    59  func NewGRPCEndpointSpec(addr string, isStrictStatic bool, dialOpts ...grpc.DialOption) *GRPCEndpointSpec {
    60  	return &GRPCEndpointSpec{
    61  		addr:           addr,
    62  		isStrictStatic: isStrictStatic,
    63  		dialOpts:       dialOpts,
    64  	}
    65  }
    66  
    67  func (es *GRPCEndpointSpec) Addr() string {
    68  	// API address should not change between state changes.
    69  	return es.addr
    70  }
    71  
    72  // Metadata method for gRPC endpoint tries to call InfoAPI exposed by Thanos components until context timeout. If we are unable to get metadata after
    73  // that time, we assume that the host is unhealthy and return error.
    74  func (es *endpointRef) Metadata(ctx context.Context, infoClient infopb.InfoClient, storeClient storepb.StoreClient) (*endpointMetadata, error) {
    75  	if infoClient != nil {
    76  		resp, err := infoClient.Info(ctx, &infopb.InfoRequest{}, grpc.WaitForReady(true))
    77  		if err == nil {
    78  			return &endpointMetadata{resp}, nil
    79  		}
    80  	}
    81  
    82  	// Call Info method of StoreAPI, this way querier will be able to discovery old components not exposing InfoAPI.
    83  	if storeClient != nil {
    84  		metadata, err := es.getMetadataUsingStoreAPI(ctx, storeClient)
    85  		if err != nil {
    86  			return nil, errors.Wrapf(err, "fallback fetching info from %s", es.addr)
    87  		}
    88  		return metadata, nil
    89  	}
    90  
    91  	return nil, errors.New(noMetadataEndpointMessage)
    92  }
    93  
    94  func (es *endpointRef) getMetadataUsingStoreAPI(ctx context.Context, client storepb.StoreClient) (*endpointMetadata, error) {
    95  	resp, err := client.Info(ctx, &storepb.InfoRequest{})
    96  	if err != nil {
    97  		return nil, err
    98  	}
    99  
   100  	infoResp := fillExpectedAPIs(component.FromProto(resp.StoreType), resp.MinTime, resp.MaxTime)
   101  	infoResp.LabelSets = resp.LabelSets
   102  	infoResp.ComponentType = component.FromProto(resp.StoreType).String()
   103  
   104  	return &endpointMetadata{
   105  		&infoResp,
   106  	}, nil
   107  }
   108  
   109  func fillExpectedAPIs(componentType component.Component, mintime, maxTime int64) infopb.InfoResponse {
   110  	switch componentType {
   111  	case component.Sidecar:
   112  		return infopb.InfoResponse{
   113  			Store: &infopb.StoreInfo{
   114  				MinTime: mintime,
   115  				MaxTime: maxTime,
   116  			},
   117  			Rules:          &infopb.RulesInfo{},
   118  			Targets:        &infopb.TargetsInfo{},
   119  			MetricMetadata: &infopb.MetricMetadataInfo{},
   120  			Exemplars:      &infopb.ExemplarsInfo{},
   121  		}
   122  	case component.Query:
   123  		{
   124  			return infopb.InfoResponse{
   125  				Store: &infopb.StoreInfo{
   126  					MinTime: mintime,
   127  					MaxTime: maxTime,
   128  				},
   129  				Rules:          &infopb.RulesInfo{},
   130  				Targets:        &infopb.TargetsInfo{},
   131  				MetricMetadata: &infopb.MetricMetadataInfo{},
   132  				Exemplars:      &infopb.ExemplarsInfo{},
   133  				Query:          &infopb.QueryAPIInfo{},
   134  			}
   135  		}
   136  	case component.Receive:
   137  		{
   138  			return infopb.InfoResponse{
   139  				Store: &infopb.StoreInfo{
   140  					MinTime: mintime,
   141  					MaxTime: maxTime,
   142  				},
   143  				Exemplars: &infopb.ExemplarsInfo{},
   144  			}
   145  		}
   146  	case component.Store:
   147  		return infopb.InfoResponse{
   148  			Store: &infopb.StoreInfo{
   149  				MinTime: mintime,
   150  				MaxTime: maxTime,
   151  			},
   152  		}
   153  	case component.Rule:
   154  		return infopb.InfoResponse{
   155  			Store: &infopb.StoreInfo{
   156  				MinTime: mintime,
   157  				MaxTime: maxTime,
   158  			},
   159  			Rules: &infopb.RulesInfo{},
   160  		}
   161  	default:
   162  		return infopb.InfoResponse{}
   163  	}
   164  }
   165  
   166  // stringError forces the error to be a string
   167  // when marshaled into a JSON.
   168  type stringError struct {
   169  	originalErr error
   170  }
   171  
   172  // MarshalJSON marshals the error into a string form.
   173  func (e *stringError) MarshalJSON() ([]byte, error) {
   174  	return json.Marshal(e.originalErr.Error())
   175  }
   176  
   177  // Error returns the original underlying error.
   178  func (e *stringError) Error() string {
   179  	return e.originalErr.Error()
   180  }
   181  
   182  type EndpointStatus struct {
   183  	Name          string              `json:"name"`
   184  	LastCheck     time.Time           `json:"lastCheck"`
   185  	LastError     *stringError        `json:"lastError"`
   186  	LabelSets     []labels.Labels     `json:"labelSets"`
   187  	ComponentType component.Component `json:"-"`
   188  	MinTime       int64               `json:"minTime"`
   189  	MaxTime       int64               `json:"maxTime"`
   190  }
   191  
   192  // endpointSetNodeCollector is a metric collector reporting the number of available storeAPIs for Querier.
   193  // A Collector is required as we want atomic updates for all 'thanos_store_nodes_grpc_connections' series.
   194  // TODO(hitanshu-mehta) Currently,only collecting metrics of storeEndpoints. Make this struct generic.
   195  type endpointSetNodeCollector struct {
   196  	mtx             sync.Mutex
   197  	storeNodes      map[component.Component]map[string]int
   198  	storePerExtLset map[string]int
   199  
   200  	connectionsDesc *prometheus.Desc
   201  	labels          []string
   202  }
   203  
   204  func newEndpointSetNodeCollector(labels ...string) *endpointSetNodeCollector {
   205  	if len(labels) == 0 {
   206  		labels = []string{string(ExternalLabels), string(StoreType)}
   207  	}
   208  	return &endpointSetNodeCollector{
   209  		storeNodes: map[component.Component]map[string]int{},
   210  		connectionsDesc: prometheus.NewDesc(
   211  			"thanos_store_nodes_grpc_connections",
   212  			"Number of gRPC connection to Store APIs. Opened connection means healthy store APIs available for Querier.",
   213  			labels, nil,
   214  		),
   215  		labels: labels,
   216  	}
   217  }
   218  
   219  // truncateExtLabels truncates the stringify external labels with the format of {labels..}.
   220  func truncateExtLabels(s string, threshold int) string {
   221  	if len(s) > threshold {
   222  		for cut := 1; cut < 4; cut++ {
   223  			for cap := 1; cap < 4; cap++ {
   224  				if utf8.ValidString(s[threshold-cut-cap : threshold-cut]) {
   225  					return fmt.Sprintf("%s}", s[:threshold-cut])
   226  				}
   227  			}
   228  		}
   229  	}
   230  	return s
   231  }
   232  func (c *endpointSetNodeCollector) Update(nodes map[component.Component]map[string]int) {
   233  	storeNodes := make(map[component.Component]map[string]int, len(nodes))
   234  	storePerExtLset := map[string]int{}
   235  
   236  	for storeType, occurrencesPerExtLset := range nodes {
   237  		storeNodes[storeType] = make(map[string]int, len(occurrencesPerExtLset))
   238  		for externalLabels, occurrences := range occurrencesPerExtLset {
   239  			externalLabels = truncateExtLabels(externalLabels, externalLabelLimit)
   240  			storePerExtLset[externalLabels] += occurrences
   241  			storeNodes[storeType][externalLabels] = occurrences
   242  		}
   243  	}
   244  
   245  	c.mtx.Lock()
   246  	defer c.mtx.Unlock()
   247  	c.storeNodes = storeNodes
   248  	c.storePerExtLset = storePerExtLset
   249  }
   250  
   251  func (c *endpointSetNodeCollector) Describe(ch chan<- *prometheus.Desc) {
   252  	ch <- c.connectionsDesc
   253  }
   254  
   255  func (c *endpointSetNodeCollector) Collect(ch chan<- prometheus.Metric) {
   256  	c.mtx.Lock()
   257  	defer c.mtx.Unlock()
   258  
   259  	for storeType, occurrencesPerExtLset := range c.storeNodes {
   260  		for externalLabels, occurrences := range occurrencesPerExtLset {
   261  			var storeTypeStr string
   262  			if storeType != nil {
   263  				storeTypeStr = storeType.String()
   264  			}
   265  			// Select only required labels.
   266  			lbls := []string{}
   267  			for _, lbl := range c.labels {
   268  				switch lbl {
   269  				case string(ExternalLabels):
   270  					lbls = append(lbls, externalLabels)
   271  				case string(StoreType):
   272  					lbls = append(lbls, storeTypeStr)
   273  				}
   274  			}
   275  			ch <- prometheus.MustNewConstMetric(c.connectionsDesc, prometheus.GaugeValue, float64(occurrences), lbls...)
   276  		}
   277  	}
   278  }
   279  
   280  // EndpointSet maintains a set of active Thanos endpoints. It is backed up by Endpoint Specifications that are dynamically fetched on
   281  // every Update() call.
   282  type EndpointSet struct {
   283  	now    nowFunc
   284  	logger log.Logger
   285  
   286  	// Endpoint specifications can change dynamically. If some component is missing from the list, we assume it is no longer
   287  	// accessible and we close gRPC client for it, unless it is strict.
   288  	endpointSpec             func() map[string]*GRPCEndpointSpec
   289  	dialOpts                 []grpc.DialOption
   290  	endpointInfoTimeout      time.Duration
   291  	unhealthyEndpointTimeout time.Duration
   292  
   293  	updateMtx sync.Mutex
   294  
   295  	endpointsMtx    sync.RWMutex
   296  	endpoints       map[string]*endpointRef
   297  	endpointsMetric *endpointSetNodeCollector
   298  }
   299  
   300  // nowFunc is a function that returns time.Time.
   301  // Test code can inject a function through which
   302  // time can be modified before updating the EndpointSet.
   303  // Production code can use time.Time.
   304  type nowFunc func() time.Time
   305  
   306  // NewEndpointSet returns a new set of Thanos APIs.
   307  func NewEndpointSet(
   308  	now nowFunc,
   309  	logger log.Logger,
   310  	reg *prometheus.Registry,
   311  	endpointSpecs func() []*GRPCEndpointSpec,
   312  	dialOpts []grpc.DialOption,
   313  	unhealthyEndpointTimeout time.Duration,
   314  	endpointInfoTimeout time.Duration,
   315  	endpointMetricLabels ...string,
   316  ) *EndpointSet {
   317  	endpointsMetric := newEndpointSetNodeCollector(endpointMetricLabels...)
   318  	if reg != nil {
   319  		reg.MustRegister(endpointsMetric)
   320  	}
   321  
   322  	if logger == nil {
   323  		logger = log.NewNopLogger()
   324  	}
   325  
   326  	if endpointSpecs == nil {
   327  		endpointSpecs = func() []*GRPCEndpointSpec { return nil }
   328  	}
   329  
   330  	return &EndpointSet{
   331  		now:             now,
   332  		logger:          log.With(logger, "component", "endpointset"),
   333  		endpointsMetric: endpointsMetric,
   334  
   335  		dialOpts:                 dialOpts,
   336  		endpointInfoTimeout:      endpointInfoTimeout,
   337  		unhealthyEndpointTimeout: unhealthyEndpointTimeout,
   338  		endpointSpec: func() map[string]*GRPCEndpointSpec {
   339  			specs := make(map[string]*GRPCEndpointSpec)
   340  			for _, s := range endpointSpecs() {
   341  				specs[s.addr] = s
   342  			}
   343  			return specs
   344  		},
   345  		endpoints: make(map[string]*endpointRef),
   346  	}
   347  }
   348  
   349  // Update updates the endpoint set. It fetches current list of endpoint specs from function and updates the fresh metadata
   350  // from all endpoints. Keeps around statically defined nodes that were defined with the strict mode.
   351  func (e *EndpointSet) Update(ctx context.Context) {
   352  	e.updateMtx.Lock()
   353  	defer e.updateMtx.Unlock()
   354  	level.Debug(e.logger).Log("msg", "starting to update API endpoints", "cachedEndpoints", len(e.endpoints))
   355  
   356  	var (
   357  		newRefs      = make(map[string]*endpointRef)
   358  		existingRefs = make(map[string]*endpointRef)
   359  		staleRefs    = make(map[string]*endpointRef)
   360  
   361  		wg sync.WaitGroup
   362  		mu sync.Mutex
   363  	)
   364  
   365  	for _, spec := range e.endpointSpec() {
   366  		spec := spec
   367  
   368  		if er, existingRef := e.endpoints[spec.Addr()]; existingRef {
   369  			wg.Add(1)
   370  			go func(spec *GRPCEndpointSpec) {
   371  				defer wg.Done()
   372  				ctx, cancel := context.WithTimeout(ctx, e.endpointInfoTimeout)
   373  				defer cancel()
   374  				e.updateEndpoint(ctx, spec, er)
   375  
   376  				mu.Lock()
   377  				defer mu.Unlock()
   378  				existingRefs[spec.Addr()] = er
   379  			}(spec)
   380  
   381  			continue
   382  		}
   383  
   384  		wg.Add(1)
   385  		go func(spec *GRPCEndpointSpec) {
   386  			defer wg.Done()
   387  			ctx, cancel := context.WithTimeout(ctx, e.endpointInfoTimeout)
   388  			defer cancel()
   389  
   390  			newRef, err := e.newEndpointRef(ctx, spec)
   391  			if err != nil {
   392  				level.Warn(e.logger).Log("msg", "new endpoint creation failed", "err", err, "address", spec.Addr())
   393  				return
   394  			}
   395  
   396  			e.updateEndpoint(ctx, spec, newRef)
   397  			if !newRef.isQueryable() {
   398  				newRef.Close()
   399  				return
   400  			}
   401  
   402  			mu.Lock()
   403  			defer mu.Unlock()
   404  			newRefs[spec.Addr()] = newRef
   405  		}(spec)
   406  	}
   407  	wg.Wait()
   408  
   409  	timedOutRefs := e.getTimedOutRefs()
   410  	e.endpointsMtx.RLock()
   411  	for addr, er := range e.endpoints {
   412  		_, isNew := newRefs[addr]
   413  		_, isExisting := existingRefs[addr]
   414  		_, isTimedOut := timedOutRefs[addr]
   415  		if !isNew && !isExisting || isTimedOut {
   416  			staleRefs[addr] = er
   417  		}
   418  	}
   419  	e.endpointsMtx.RUnlock()
   420  
   421  	e.endpointsMtx.Lock()
   422  	defer e.endpointsMtx.Unlock()
   423  	for addr, er := range newRefs {
   424  		extLset := labelpb.PromLabelSetsToString(er.LabelSets())
   425  		level.Info(e.logger).Log("msg", fmt.Sprintf("adding new %v with %+v", er.ComponentType(), er.apisPresent()), "address", addr, "extLset", extLset)
   426  		e.endpoints[addr] = er
   427  	}
   428  	for addr, er := range staleRefs {
   429  		level.Info(er.logger).Log("msg", unhealthyEndpointMessage, "address", er.addr, "extLset", labelpb.PromLabelSetsToString(er.LabelSets()))
   430  		er.Close()
   431  		delete(e.endpoints, addr)
   432  	}
   433  	level.Debug(e.logger).Log("msg", "updated endpoints", "activeEndpoints", len(e.endpoints))
   434  
   435  	// Update stats.
   436  	stats := newEndpointAPIStats()
   437  	for addr, er := range e.endpoints {
   438  		if !er.isQueryable() {
   439  			continue
   440  		}
   441  
   442  		extLset := labelpb.PromLabelSetsToString(er.LabelSets())
   443  
   444  		// All producers that expose StoreAPI should have unique external labels. Check all which connect to our Querier.
   445  		if er.HasStoreAPI() && (er.ComponentType() == component.Sidecar || er.ComponentType() == component.Rule) &&
   446  			stats[component.Sidecar][extLset]+stats[component.Rule][extLset] > 0 {
   447  
   448  			level.Warn(e.logger).Log("msg", "found duplicate storeEndpoints producer (sidecar or ruler). This is not advices as it will malform data in in the same bucket",
   449  				"address", addr, "extLset", extLset, "duplicates", fmt.Sprintf("%v", stats[component.Sidecar][extLset]+stats[component.Rule][extLset]+1))
   450  		}
   451  		stats[er.ComponentType()][extLset]++
   452  	}
   453  
   454  	e.endpointsMetric.Update(stats)
   455  }
   456  
   457  func (e *EndpointSet) updateEndpoint(ctx context.Context, spec *GRPCEndpointSpec, er *endpointRef) {
   458  	metadata, err := er.Metadata(ctx, infopb.NewInfoClient(er.cc), storepb.NewStoreClient(er.cc))
   459  	if err != nil {
   460  		level.Warn(e.logger).Log("msg", "update of endpoint failed", "err", errors.Wrap(err, "getting metadata"), "address", spec.Addr())
   461  	}
   462  	er.update(e.now, metadata, err)
   463  }
   464  
   465  // getTimedOutRefs returns unhealthy endpoints for which the last
   466  // successful health check is older than the unhealthyEndpointTimeout.
   467  // Strict endpoints are never considered as timed out.
   468  func (e *EndpointSet) getTimedOutRefs() map[string]*endpointRef {
   469  	e.endpointsMtx.RLock()
   470  	defer e.endpointsMtx.RUnlock()
   471  	result := make(map[string]*endpointRef)
   472  
   473  	endpoints := e.endpoints
   474  	now := e.now()
   475  	for _, er := range endpoints {
   476  		if er.isStrict {
   477  			continue
   478  		}
   479  
   480  		if now.Sub(er.created) < e.unhealthyEndpointTimeout {
   481  			continue
   482  		}
   483  
   484  		er.mtx.RLock()
   485  		lastCheck := er.status.LastCheck
   486  		er.mtx.RUnlock()
   487  
   488  		if now.Sub(lastCheck) >= e.unhealthyEndpointTimeout {
   489  			result[er.addr] = er
   490  		}
   491  	}
   492  
   493  	return result
   494  }
   495  
   496  func (e *EndpointSet) getQueryableRefs() map[string]*endpointRef {
   497  	e.endpointsMtx.RLock()
   498  	defer e.endpointsMtx.RUnlock()
   499  
   500  	endpoints := make(map[string]*endpointRef)
   501  	for addr, er := range e.endpoints {
   502  		if er.isQueryable() {
   503  			endpoints[addr] = er
   504  		}
   505  	}
   506  
   507  	return endpoints
   508  }
   509  
   510  // GetStoreClients returns a list of all active stores.
   511  func (e *EndpointSet) GetStoreClients() []store.Client {
   512  	endpoints := e.getQueryableRefs()
   513  
   514  	stores := make([]store.Client, 0, len(endpoints))
   515  	for _, er := range endpoints {
   516  		if er.HasStoreAPI() {
   517  			er.mtx.RLock()
   518  			// Make a new endpointRef with store client.
   519  			stores = append(stores, &endpointRef{
   520  				StoreClient: storepb.NewStoreClient(er.cc),
   521  				addr:        er.addr,
   522  				metadata:    er.metadata,
   523  			})
   524  			er.mtx.RUnlock()
   525  		}
   526  	}
   527  	return stores
   528  }
   529  
   530  // GetQueryAPIClients returns a list of all active query API clients.
   531  func (e *EndpointSet) GetQueryAPIClients() []Client {
   532  	endpoints := e.getQueryableRefs()
   533  
   534  	queryClients := make([]Client, 0, len(endpoints))
   535  	for _, er := range endpoints {
   536  		if er.HasQueryAPI() {
   537  			client := querypb.NewQueryClient(er.cc)
   538  			queryClients = append(queryClients, NewClient(client, er.addr, er.TSDBInfos()))
   539  		}
   540  	}
   541  	return queryClients
   542  }
   543  
   544  // GetRulesClients returns a list of all active rules clients.
   545  func (e *EndpointSet) GetRulesClients() []rulespb.RulesClient {
   546  	endpoints := e.getQueryableRefs()
   547  
   548  	rules := make([]rulespb.RulesClient, 0, len(endpoints))
   549  	for _, er := range endpoints {
   550  		if er.HasRulesAPI() {
   551  			rules = append(rules, rulespb.NewRulesClient(er.cc))
   552  		}
   553  	}
   554  	return rules
   555  }
   556  
   557  // GetTargetsClients returns a list of all active targets clients.
   558  func (e *EndpointSet) GetTargetsClients() []targetspb.TargetsClient {
   559  	endpoints := e.getQueryableRefs()
   560  
   561  	targets := make([]targetspb.TargetsClient, 0, len(endpoints))
   562  	for _, er := range endpoints {
   563  		if er.HasTargetsAPI() {
   564  			targets = append(targets, targetspb.NewTargetsClient(er.cc))
   565  		}
   566  	}
   567  	return targets
   568  }
   569  
   570  // GetMetricMetadataClients returns a list of all active metadata clients.
   571  func (e *EndpointSet) GetMetricMetadataClients() []metadatapb.MetadataClient {
   572  	endpoints := e.getQueryableRefs()
   573  
   574  	metadataClients := make([]metadatapb.MetadataClient, 0, len(endpoints))
   575  	for _, er := range endpoints {
   576  		if er.HasMetricMetadataAPI() {
   577  			metadataClients = append(metadataClients, metadatapb.NewMetadataClient(er.cc))
   578  		}
   579  	}
   580  	return metadataClients
   581  }
   582  
   583  // GetExemplarsStores returns a list of all active exemplars stores.
   584  func (e *EndpointSet) GetExemplarsStores() []*exemplarspb.ExemplarStore {
   585  	endpoints := e.getQueryableRefs()
   586  
   587  	exemplarStores := make([]*exemplarspb.ExemplarStore, 0, len(endpoints))
   588  	for _, er := range endpoints {
   589  		if er.HasExemplarsAPI() {
   590  			exemplarStores = append(exemplarStores, &exemplarspb.ExemplarStore{
   591  				ExemplarsClient: exemplarspb.NewExemplarsClient(er.cc),
   592  				LabelSets:       labelpb.ZLabelSetsToPromLabelSets(er.metadata.LabelSets...),
   593  			})
   594  		}
   595  	}
   596  	return exemplarStores
   597  }
   598  
   599  func (e *EndpointSet) Close() {
   600  	e.endpointsMtx.Lock()
   601  	defer e.endpointsMtx.Unlock()
   602  
   603  	for _, ef := range e.endpoints {
   604  		ef.Close()
   605  	}
   606  	e.endpoints = map[string]*endpointRef{}
   607  }
   608  
   609  func (e *EndpointSet) GetEndpointStatus() []EndpointStatus {
   610  	e.endpointsMtx.RLock()
   611  	defer e.endpointsMtx.RUnlock()
   612  
   613  	statuses := make([]EndpointStatus, 0, len(e.endpoints))
   614  	for _, v := range e.endpoints {
   615  		v.mtx.RLock()
   616  		defer v.mtx.RUnlock()
   617  
   618  		status := v.status
   619  		if status != nil {
   620  			statuses = append(statuses, *status)
   621  		}
   622  	}
   623  
   624  	sort.Slice(statuses, func(i, j int) bool {
   625  		return statuses[i].Name < statuses[j].Name
   626  	})
   627  	return statuses
   628  }
   629  
   630  type endpointRef struct {
   631  	storepb.StoreClient
   632  
   633  	mtx      sync.RWMutex
   634  	cc       *grpc.ClientConn
   635  	addr     string
   636  	isStrict bool
   637  
   638  	created  time.Time
   639  	metadata *endpointMetadata
   640  	status   *EndpointStatus
   641  
   642  	logger log.Logger
   643  }
   644  
   645  // newEndpointRef creates a new endpointRef with a gRPC channel to the given the IP address.
   646  // The call to newEndpointRef will return an error if establishing the channel fails.
   647  func (e *EndpointSet) newEndpointRef(ctx context.Context, spec *GRPCEndpointSpec) (*endpointRef, error) {
   648  	var dialOpts []grpc.DialOption
   649  
   650  	dialOpts = append(dialOpts, e.dialOpts...)
   651  	dialOpts = append(dialOpts, spec.dialOpts...)
   652  	// By default DialContext is non-blocking which means that any connection
   653  	// failure won't be reported/logged. Instead block until the connection is
   654  	// successfully established and return the details of the connection error
   655  	// if any.
   656  	dialOpts = append(dialOpts, grpc.WithReturnConnectionError())
   657  	conn, err := grpc.DialContext(ctx, spec.Addr(), dialOpts...)
   658  	if err != nil {
   659  		return nil, errors.Wrap(err, "dialing connection")
   660  	}
   661  
   662  	return &endpointRef{
   663  		logger:   e.logger,
   664  		created:  e.now(),
   665  		addr:     spec.Addr(),
   666  		isStrict: spec.isStrictStatic,
   667  		cc:       conn,
   668  	}, nil
   669  }
   670  
   671  // update sets the metadata and status of the endpoint ref based on the info response value and error.
   672  func (er *endpointRef) update(now nowFunc, metadata *endpointMetadata, err error) {
   673  	er.mtx.Lock()
   674  	defer er.mtx.Unlock()
   675  
   676  	er.updateMetadata(metadata, err)
   677  	er.updateStatus(now, err)
   678  }
   679  
   680  // updateStatus updates the endpointRef status based on the info call error.
   681  func (er *endpointRef) updateStatus(now nowFunc, err error) {
   682  	mint, maxt := er.timeRange()
   683  	if er.status == nil {
   684  		er.status = &EndpointStatus{Name: er.addr}
   685  	}
   686  
   687  	if err == nil {
   688  		er.status.LastCheck = now()
   689  		er.status.LabelSets = er.labelSets()
   690  		er.status.ComponentType = er.componentType()
   691  		er.status.MinTime = mint
   692  		er.status.MaxTime = maxt
   693  		er.status.LastError = nil
   694  	} else {
   695  		er.status.LastError = &stringError{originalErr: err}
   696  	}
   697  }
   698  
   699  // updateMetadata sets the metadata for an endpoint ref based on the info call result and the info call error.
   700  // When an info call for an endpoint fails, we preserve metadata from the previous state.
   701  // If the is new and has no previous state, we assume it is a Store covering the complete time range.
   702  func (er *endpointRef) updateMetadata(metadata *endpointMetadata, err error) {
   703  	if err == nil {
   704  		er.metadata = metadata
   705  	}
   706  
   707  	if err != nil && er.metadata == nil {
   708  		er.metadata = maxRangeStoreMetadata()
   709  	}
   710  }
   711  
   712  // isQueryable returns true if an endpointRef should be used for querying.
   713  // A strict endpointRef is always queriable. A non-strict endpointRef
   714  // is queryable if the last health check (info call) succeeded.
   715  func (er *endpointRef) isQueryable() bool {
   716  	er.mtx.RLock()
   717  	defer er.mtx.RUnlock()
   718  
   719  	return er.isStrict || er.status.LastError == nil
   720  }
   721  
   722  func (er *endpointRef) ComponentType() component.Component {
   723  	er.mtx.RLock()
   724  	defer er.mtx.RUnlock()
   725  
   726  	return er.componentType()
   727  }
   728  
   729  func (er *endpointRef) componentType() component.Component {
   730  	if er.metadata == nil {
   731  		return component.UnknownStoreAPI
   732  	}
   733  
   734  	return component.FromString(er.metadata.ComponentType)
   735  }
   736  
   737  func (er *endpointRef) HasStoreAPI() bool {
   738  	er.mtx.RLock()
   739  	defer er.mtx.RUnlock()
   740  
   741  	return er.metadata != nil && er.metadata.Store != nil
   742  }
   743  
   744  func (er *endpointRef) HasQueryAPI() bool {
   745  	er.mtx.RLock()
   746  	defer er.mtx.RUnlock()
   747  
   748  	return er.metadata != nil && er.metadata.Query != nil
   749  }
   750  
   751  func (er *endpointRef) HasRulesAPI() bool {
   752  	er.mtx.RLock()
   753  	defer er.mtx.RUnlock()
   754  
   755  	return er.metadata != nil && er.metadata.Rules != nil
   756  }
   757  
   758  func (er *endpointRef) HasTargetsAPI() bool {
   759  	er.mtx.RLock()
   760  	defer er.mtx.RUnlock()
   761  
   762  	return er.metadata != nil && er.metadata.Targets != nil
   763  }
   764  
   765  func (er *endpointRef) HasMetricMetadataAPI() bool {
   766  	er.mtx.RLock()
   767  	defer er.mtx.RUnlock()
   768  
   769  	return er.metadata != nil && er.metadata.MetricMetadata != nil
   770  }
   771  
   772  func (er *endpointRef) HasExemplarsAPI() bool {
   773  	er.mtx.RLock()
   774  	defer er.mtx.RUnlock()
   775  
   776  	return er.metadata != nil && er.metadata.Exemplars != nil
   777  }
   778  
   779  func (er *endpointRef) LabelSets() []labels.Labels {
   780  	er.mtx.RLock()
   781  	defer er.mtx.RUnlock()
   782  
   783  	return er.labelSets()
   784  }
   785  
   786  func (er *endpointRef) labelSets() []labels.Labels {
   787  	if er.metadata == nil {
   788  		return make([]labels.Labels, 0)
   789  	}
   790  
   791  	labelSet := make([]labels.Labels, 0, len(er.metadata.LabelSets))
   792  	for _, ls := range labelpb.ZLabelSetsToPromLabelSets(er.metadata.LabelSets...) {
   793  		if len(ls) == 0 {
   794  			continue
   795  		}
   796  		// Compatibility label for Queriers pre 0.8.1. Filter it out now.
   797  		if ls[0].Name == store.CompatibilityTypeLabelName {
   798  			continue
   799  		}
   800  		labelSet = append(labelSet, ls.Copy())
   801  	}
   802  	return labelSet
   803  }
   804  
   805  func (er *endpointRef) TimeRange() (mint, maxt int64) {
   806  	er.mtx.RLock()
   807  	defer er.mtx.RUnlock()
   808  
   809  	return er.timeRange()
   810  }
   811  
   812  func (er *endpointRef) TSDBInfos() []infopb.TSDBInfo {
   813  	er.mtx.RLock()
   814  	defer er.mtx.RUnlock()
   815  
   816  	if er.metadata == nil || er.metadata.Store == nil {
   817  		return nil
   818  	}
   819  
   820  	// Currently, min/max time of only StoreAPI is being updated by all components.
   821  	return er.metadata.Store.TsdbInfos
   822  }
   823  
   824  func (er *endpointRef) timeRange() (int64, int64) {
   825  	if er.metadata == nil || er.metadata.Store == nil {
   826  		return math.MinInt64, math.MaxInt64
   827  	}
   828  
   829  	// Currently, min/max time of only StoreAPI is being updated by all components.
   830  	return er.metadata.Store.MinTime, er.metadata.Store.MaxTime
   831  }
   832  
   833  func (er *endpointRef) SupportsSharding() bool {
   834  	er.mtx.RLock()
   835  	defer er.mtx.RUnlock()
   836  
   837  	if er.metadata == nil || er.metadata.Store == nil {
   838  		return false
   839  	}
   840  
   841  	return er.metadata.Store.SupportsSharding
   842  }
   843  
   844  func (er *endpointRef) SupportsWithoutReplicaLabels() bool {
   845  	er.mtx.RLock()
   846  	defer er.mtx.RUnlock()
   847  
   848  	if er.metadata == nil || er.metadata.Store == nil {
   849  		return false
   850  	}
   851  
   852  	return er.metadata.Store.SupportsWithoutReplicaLabels
   853  }
   854  
   855  func (er *endpointRef) String() string {
   856  	mint, maxt := er.TimeRange()
   857  	return fmt.Sprintf(
   858  		"Addr: %s LabelSets: %v MinTime: %d MaxTime: %d",
   859  		er.addr, labelpb.PromLabelSetsToString(er.LabelSets()), mint, maxt,
   860  	)
   861  }
   862  
   863  func (er *endpointRef) Addr() (string, bool) {
   864  	return er.addr, false
   865  }
   866  
   867  func (er *endpointRef) Close() {
   868  	runutil.CloseWithLogOnErr(er.logger, er.cc, fmt.Sprintf("endpoint %v connection closed", er.addr))
   869  }
   870  
   871  func (er *endpointRef) apisPresent() []string {
   872  	var apisPresent []string
   873  
   874  	if er.HasStoreAPI() {
   875  		apisPresent = append(apisPresent, "storeEndpoints")
   876  	}
   877  
   878  	if er.HasRulesAPI() {
   879  		apisPresent = append(apisPresent, "rulesAPI")
   880  	}
   881  
   882  	if er.HasExemplarsAPI() {
   883  		apisPresent = append(apisPresent, "exemplarsAPI")
   884  	}
   885  
   886  	if er.HasTargetsAPI() {
   887  		apisPresent = append(apisPresent, "targetsAPI")
   888  	}
   889  
   890  	if er.HasMetricMetadataAPI() {
   891  		apisPresent = append(apisPresent, "MetricMetadataAPI")
   892  	}
   893  
   894  	if er.HasQueryAPI() {
   895  		apisPresent = append(apisPresent, "QueryAPI")
   896  	}
   897  
   898  	return apisPresent
   899  }
   900  
   901  type endpointMetadata struct {
   902  	*infopb.InfoResponse
   903  }
   904  
   905  func newEndpointAPIStats() map[component.Component]map[string]int {
   906  	nodes := make(map[component.Component]map[string]int, len(storepb.StoreType_name))
   907  	for i := range storepb.StoreType_name {
   908  		nodes[component.FromProto(storepb.StoreType(i))] = map[string]int{}
   909  	}
   910  	return nodes
   911  }
   912  
   913  func maxRangeStoreMetadata() *endpointMetadata {
   914  	return &endpointMetadata{
   915  		InfoResponse: &infopb.InfoResponse{
   916  			Store: &infopb.StoreInfo{
   917  				MinTime: math.MinInt64,
   918  				MaxTime: math.MaxInt64,
   919  			},
   920  		},
   921  	}
   922  }