github.com/thanos-io/thanos@v0.32.5/pkg/query/endpointset_test.go (about)

     1  // Copyright (c) The Thanos Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package query
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"fmt"
    10  	"math"
    11  	"net"
    12  	"strings"
    13  	"sync"
    14  	"testing"
    15  	"time"
    16  
    17  	"github.com/stretchr/testify/require"
    18  
    19  	"github.com/prometheus/prometheus/model/labels"
    20  	"github.com/thanos-io/thanos/pkg/store"
    21  
    22  	"golang.org/x/sync/errgroup"
    23  	"google.golang.org/grpc"
    24  	"google.golang.org/grpc/credentials/insecure"
    25  
    26  	"github.com/efficientgo/core/testutil"
    27  	"github.com/pkg/errors"
    28  	promtestutil "github.com/prometheus/client_golang/prometheus/testutil"
    29  	"github.com/thanos-io/thanos/pkg/component"
    30  	"github.com/thanos-io/thanos/pkg/info/infopb"
    31  	"github.com/thanos-io/thanos/pkg/store/labelpb"
    32  	"github.com/thanos-io/thanos/pkg/store/storepb"
    33  )
    34  
    35  var testGRPCOpts = []grpc.DialOption{
    36  	grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)),
    37  	grpc.WithTransportCredentials(insecure.NewCredentials()),
    38  }
    39  
    40  var (
    41  	sidecarInfo = &infopb.InfoResponse{
    42  		ComponentType: component.Sidecar.String(),
    43  		Store: &infopb.StoreInfo{
    44  			MinTime: math.MinInt64,
    45  			MaxTime: math.MaxInt64,
    46  		},
    47  		Exemplars:      &infopb.ExemplarsInfo{},
    48  		Rules:          &infopb.RulesInfo{},
    49  		MetricMetadata: &infopb.MetricMetadataInfo{},
    50  		Targets:        &infopb.TargetsInfo{},
    51  	}
    52  	queryInfo = &infopb.InfoResponse{
    53  		ComponentType: component.Query.String(),
    54  		Store: &infopb.StoreInfo{
    55  			MinTime: math.MinInt64,
    56  			MaxTime: math.MaxInt64,
    57  		},
    58  		Exemplars:      &infopb.ExemplarsInfo{},
    59  		Rules:          &infopb.RulesInfo{},
    60  		MetricMetadata: &infopb.MetricMetadataInfo{},
    61  		Targets:        &infopb.TargetsInfo{},
    62  		Query:          &infopb.QueryAPIInfo{},
    63  	}
    64  	ruleInfo = &infopb.InfoResponse{
    65  		ComponentType: component.Rule.String(),
    66  		Store: &infopb.StoreInfo{
    67  			MinTime: math.MinInt64,
    68  			MaxTime: math.MaxInt64,
    69  		},
    70  		Rules: &infopb.RulesInfo{},
    71  	}
    72  	storeGWInfo = &infopb.InfoResponse{
    73  		ComponentType: component.Store.String(),
    74  		Store: &infopb.StoreInfo{
    75  			MinTime: math.MinInt64,
    76  			MaxTime: math.MaxInt64,
    77  		},
    78  	}
    79  	receiveInfo = &infopb.InfoResponse{
    80  		ComponentType: component.Receive.String(),
    81  		Store: &infopb.StoreInfo{
    82  			MinTime: math.MinInt64,
    83  			MaxTime: math.MaxInt64,
    84  		},
    85  		Exemplars: &infopb.ExemplarsInfo{},
    86  	}
    87  )
    88  
    89  type mockedEndpoint struct {
    90  	infoDelay time.Duration
    91  	info      infopb.InfoResponse
    92  	err       error
    93  }
    94  
    95  func (c *mockedEndpoint) setResponseError(err error) {
    96  	c.err = err
    97  }
    98  
    99  func (c *mockedEndpoint) Info(ctx context.Context, r *infopb.InfoRequest) (*infopb.InfoResponse, error) {
   100  	if c.err != nil {
   101  		return nil, c.err
   102  	}
   103  
   104  	select {
   105  	case <-ctx.Done():
   106  		return nil, context.Canceled
   107  	case <-time.After(c.infoDelay):
   108  	}
   109  
   110  	return &c.info, nil
   111  }
   112  
   113  type mockedStoreSrv struct {
   114  	infoDelay time.Duration
   115  	info      storepb.InfoResponse
   116  	err       error
   117  }
   118  
   119  func (s *mockedStoreSrv) setResponseError(err error) {
   120  	s.err = err
   121  }
   122  
   123  func (s *mockedStoreSrv) Info(ctx context.Context, _ *storepb.InfoRequest) (*storepb.InfoResponse, error) {
   124  	if s.err != nil {
   125  		return nil, s.err
   126  	}
   127  
   128  	select {
   129  	case <-ctx.Done():
   130  		return nil, context.Canceled
   131  	case <-time.After(s.infoDelay):
   132  	}
   133  
   134  	return &s.info, nil
   135  }
   136  func (s *mockedStoreSrv) Series(*storepb.SeriesRequest, storepb.Store_SeriesServer) error {
   137  	return nil
   138  }
   139  func (s *mockedStoreSrv) LabelNames(context.Context, *storepb.LabelNamesRequest) (*storepb.LabelNamesResponse, error) {
   140  	return nil, nil
   141  }
   142  func (s *mockedStoreSrv) LabelValues(context.Context, *storepb.LabelValuesRequest) (*storepb.LabelValuesResponse, error) {
   143  	return nil, nil
   144  }
   145  
   146  type APIs struct {
   147  	store          bool
   148  	metricMetadata bool
   149  	rules          bool
   150  	target         bool
   151  	exemplars      bool
   152  }
   153  
   154  type testEndpointMeta struct {
   155  	*infopb.InfoResponse
   156  	extlsetFn func(addr string) []labelpb.ZLabelSet
   157  	infoDelay time.Duration
   158  	err       error
   159  }
   160  
   161  type testEndpoints struct {
   162  	srvs        map[string]*grpc.Server
   163  	endpoints   map[string]*mockedEndpoint
   164  	stores      map[string]*mockedStoreSrv
   165  	orderAddrs  []string
   166  	exposedAPIs map[string]*APIs
   167  }
   168  
   169  func componentTypeToStoreType(componentType string) storepb.StoreType {
   170  	switch componentType {
   171  	case component.Query.String():
   172  		return storepb.StoreType_QUERY
   173  	case component.Rule.String():
   174  		return storepb.StoreType_RULE
   175  	case component.Sidecar.String():
   176  		return storepb.StoreType_SIDECAR
   177  	case component.Store.String():
   178  		return storepb.StoreType_STORE
   179  	case component.Receive.String():
   180  		return storepb.StoreType_RECEIVE
   181  	case component.Debug.String():
   182  		return storepb.StoreType_DEBUG
   183  	default:
   184  		return storepb.StoreType_STORE
   185  	}
   186  }
   187  
   188  func startTestEndpoints(testEndpointMeta []testEndpointMeta) (*testEndpoints, error) {
   189  	e := &testEndpoints{
   190  		srvs:        map[string]*grpc.Server{},
   191  		endpoints:   map[string]*mockedEndpoint{},
   192  		stores:      map[string]*mockedStoreSrv{},
   193  		exposedAPIs: map[string]*APIs{},
   194  	}
   195  
   196  	for _, meta := range testEndpointMeta {
   197  		listener, err := net.Listen("tcp", "127.0.0.1:0")
   198  		if err != nil {
   199  			// Close so far started servers.
   200  			e.Close()
   201  			return nil, err
   202  		}
   203  
   204  		srv := grpc.NewServer()
   205  		addr := listener.Addr().String()
   206  
   207  		storeSrv := &mockedStoreSrv{
   208  			err: meta.err,
   209  			info: storepb.InfoResponse{
   210  				LabelSets: meta.extlsetFn(listener.Addr().String()),
   211  				StoreType: componentTypeToStoreType(meta.ComponentType),
   212  			},
   213  			infoDelay: meta.infoDelay,
   214  		}
   215  
   216  		if meta.Store != nil {
   217  			storeSrv.info.MinTime = meta.Store.MinTime
   218  			storeSrv.info.MaxTime = meta.Store.MaxTime
   219  		}
   220  
   221  		endpointSrv := &mockedEndpoint{
   222  			err: meta.err,
   223  			info: infopb.InfoResponse{
   224  				LabelSets:      meta.extlsetFn(listener.Addr().String()),
   225  				Store:          meta.Store,
   226  				MetricMetadata: meta.MetricMetadata,
   227  				Rules:          meta.Rules,
   228  				Targets:        meta.Targets,
   229  				Exemplars:      meta.Exemplars,
   230  				Query:          meta.Query,
   231  				ComponentType:  meta.ComponentType,
   232  			},
   233  			infoDelay: meta.infoDelay,
   234  		}
   235  		infopb.RegisterInfoServer(srv, endpointSrv)
   236  		storepb.RegisterStoreServer(srv, storeSrv)
   237  		go func() {
   238  			_ = srv.Serve(listener)
   239  		}()
   240  
   241  		e.exposedAPIs[addr] = exposedAPIs(meta.ComponentType)
   242  		e.srvs[addr] = srv
   243  		e.endpoints[addr] = endpointSrv
   244  		e.stores[addr] = storeSrv
   245  		e.orderAddrs = append(e.orderAddrs, listener.Addr().String())
   246  	}
   247  
   248  	return e, nil
   249  }
   250  
   251  func (e *testEndpoints) EndpointAddresses() []string {
   252  	var endpoints []string
   253  	endpoints = append(endpoints, e.orderAddrs...)
   254  	return endpoints
   255  }
   256  
   257  func (e *testEndpoints) Close() {
   258  	for _, srv := range e.srvs {
   259  		srv.Stop()
   260  	}
   261  	e.srvs = nil
   262  }
   263  
   264  func (e *testEndpoints) CloseOne(addr string) {
   265  	srv, ok := e.srvs[addr]
   266  	if !ok {
   267  		return
   268  	}
   269  
   270  	srv.Stop()
   271  	delete(e.srvs, addr)
   272  }
   273  
   274  func TestTruncateExtLabels(t *testing.T) {
   275  	const testLength = 10
   276  
   277  	for _, tc := range []struct {
   278  		labelToTruncate string
   279  		expectedOutput  string
   280  	}{
   281  		{
   282  			labelToTruncate: "{abc}",
   283  			expectedOutput:  "{abc}",
   284  		},
   285  		{
   286  			labelToTruncate: "{abcdefgh}",
   287  			expectedOutput:  "{abcdefgh}",
   288  		},
   289  		{
   290  			labelToTruncate: "{abcdefghij}",
   291  			expectedOutput:  "{abcdefgh}",
   292  		},
   293  		{
   294  			labelToTruncate: "{abcde花}",
   295  			expectedOutput:  "{abcde花}",
   296  		},
   297  		{
   298  			labelToTruncate: "{abcde花朵}",
   299  			expectedOutput:  "{abcde花}",
   300  		},
   301  		{
   302  			labelToTruncate: "{abcde花fghij}",
   303  			expectedOutput:  "{abcde花}",
   304  		},
   305  	} {
   306  		t.Run(tc.labelToTruncate, func(t *testing.T) {
   307  			got := truncateExtLabels(tc.labelToTruncate, testLength)
   308  			testutil.Equals(t, tc.expectedOutput, got)
   309  			testutil.Assert(t, len(got) <= testLength)
   310  		})
   311  	}
   312  }
   313  
   314  func TestEndpointSetUpdate(t *testing.T) {
   315  	const metricsMeta = `
   316  	# HELP thanos_store_nodes_grpc_connections Number of gRPC connection to Store APIs. Opened connection means healthy store APIs available for Querier.
   317  	# TYPE thanos_store_nodes_grpc_connections gauge
   318  	`
   319  	testCases := []struct {
   320  		name       string
   321  		endpoints  []testEndpointMeta
   322  		strict     bool
   323  		connLabels []string
   324  
   325  		expectedEndpoints   int
   326  		expectedConnMetrics string
   327  	}{
   328  		{
   329  			name: "available endpoint",
   330  			endpoints: []testEndpointMeta{
   331  				{
   332  					InfoResponse: sidecarInfo,
   333  					extlsetFn: func(addr string) []labelpb.ZLabelSet {
   334  						return labelpb.ZLabelSetsFromPromLabels(
   335  							labels.FromStrings("addr", addr, "a", "b"),
   336  						)
   337  					},
   338  				},
   339  			},
   340  			connLabels: []string{"store_type"},
   341  
   342  			expectedEndpoints: 1,
   343  			expectedConnMetrics: metricsMeta +
   344  				`
   345  			thanos_store_nodes_grpc_connections{store_type="sidecar"} 1
   346  			`,
   347  		},
   348  		{
   349  			name: "unavailable endpoint",
   350  			endpoints: []testEndpointMeta{
   351  				{
   352  					err:          fmt.Errorf("endpoint unavailable"),
   353  					InfoResponse: sidecarInfo,
   354  					extlsetFn: func(addr string) []labelpb.ZLabelSet {
   355  						return labelpb.ZLabelSetsFromPromLabels(
   356  							labels.FromStrings("addr", addr, "a", "b"),
   357  						)
   358  					},
   359  				},
   360  			},
   361  
   362  			expectedEndpoints:   0,
   363  			expectedConnMetrics: "",
   364  		},
   365  		{
   366  			name: "slow endpoint",
   367  			endpoints: []testEndpointMeta{
   368  				{
   369  					infoDelay:    5 * time.Second,
   370  					InfoResponse: sidecarInfo,
   371  					extlsetFn: func(addr string) []labelpb.ZLabelSet {
   372  						return labelpb.ZLabelSetsFromPromLabels(
   373  							labels.FromStrings("addr", addr, "a", "b"),
   374  						)
   375  					},
   376  				},
   377  			},
   378  
   379  			expectedEndpoints:   0,
   380  			expectedConnMetrics: "",
   381  		},
   382  		{
   383  			name: "strict endpoint",
   384  			endpoints: []testEndpointMeta{
   385  				{
   386  					InfoResponse: sidecarInfo,
   387  					extlsetFn: func(addr string) []labelpb.ZLabelSet {
   388  						return labelpb.ZLabelSetsFromPromLabels(
   389  							labels.FromStrings("addr", addr, "a", "b"),
   390  						)
   391  					},
   392  				},
   393  			},
   394  			strict:            true,
   395  			connLabels:        []string{"store_type"},
   396  			expectedEndpoints: 1,
   397  			expectedConnMetrics: metricsMeta +
   398  				`
   399  			thanos_store_nodes_grpc_connections{store_type="sidecar"} 1
   400  			`,
   401  		},
   402  		{
   403  			name: "long external labels",
   404  			endpoints: []testEndpointMeta{
   405  				{
   406  					InfoResponse: sidecarInfo,
   407  					// Simulate very long external labels.
   408  					extlsetFn: func(addr string) []labelpb.ZLabelSet {
   409  						sLabel := []string{}
   410  						for i := 0; i < 1000; i++ {
   411  							sLabel = append(sLabel, "lbl")
   412  							sLabel = append(sLabel, "val")
   413  						}
   414  						return labelpb.ZLabelSetsFromPromLabels(
   415  							labels.FromStrings(sLabel...),
   416  						)
   417  					},
   418  				},
   419  			},
   420  			expectedEndpoints: 1,
   421  			expectedConnMetrics: metricsMeta + `
   422  			thanos_store_nodes_grpc_connections{external_labels="{lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val}",store_type="sidecar"} 1
   423  			`,
   424  		},
   425  	}
   426  
   427  	for _, tc := range testCases {
   428  		t.Run(tc.name, func(t *testing.T) {
   429  			endpoints, err := startTestEndpoints(tc.endpoints)
   430  			testutil.Ok(t, err)
   431  			defer endpoints.Close()
   432  
   433  			discoveredEndpointAddr := endpoints.EndpointAddresses()
   434  			// Specify only "store_type" to exclude "external_labels".
   435  			endpointSet := makeEndpointSet(discoveredEndpointAddr, tc.strict, time.Now, tc.connLabels...)
   436  			defer endpointSet.Close()
   437  
   438  			endpointSet.Update(context.Background())
   439  			testutil.Equals(t, tc.expectedEndpoints, len(endpointSet.GetEndpointStatus()))
   440  			testutil.Equals(t, tc.expectedEndpoints, len(endpointSet.GetStoreClients()))
   441  
   442  			testutil.Ok(t, promtestutil.CollectAndCompare(endpointSet.endpointsMetric, strings.NewReader(tc.expectedConnMetrics)))
   443  		})
   444  	}
   445  }
   446  
   447  func TestEndpointSetUpdate_DuplicateSpecs(t *testing.T) {
   448  	endpoints, err := startTestEndpoints([]testEndpointMeta{
   449  		{
   450  			InfoResponse: sidecarInfo,
   451  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   452  				return labelpb.ZLabelSetsFromPromLabels(
   453  					labels.FromStrings("addr", addr, "a", "b"),
   454  				)
   455  			},
   456  		},
   457  	})
   458  	testutil.Ok(t, err)
   459  	defer endpoints.Close()
   460  
   461  	discoveredEndpointAddr := endpoints.EndpointAddresses()
   462  	discoveredEndpointAddr = append(discoveredEndpointAddr, discoveredEndpointAddr[0])
   463  
   464  	endpointSet := makeEndpointSet(discoveredEndpointAddr, false, time.Now)
   465  	defer endpointSet.Close()
   466  
   467  	endpointSet.Update(context.Background())
   468  	testutil.Equals(t, 1, len(endpointSet.endpoints))
   469  }
   470  
   471  func TestEndpointSetUpdate_EndpointGoingAway(t *testing.T) {
   472  	endpoints, err := startTestEndpoints([]testEndpointMeta{
   473  		{
   474  			InfoResponse: sidecarInfo,
   475  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   476  				return labelpb.ZLabelSetsFromPromLabels(
   477  					labels.FromStrings("addr", addr, "a", "b"),
   478  				)
   479  			},
   480  		},
   481  	})
   482  	testutil.Ok(t, err)
   483  	defer endpoints.Close()
   484  
   485  	discoveredEndpointAddr := endpoints.EndpointAddresses()
   486  	endpointSet := makeEndpointSet(discoveredEndpointAddr, false, time.Now)
   487  	defer endpointSet.Close()
   488  
   489  	// Initial update.
   490  	endpointSet.Update(context.Background())
   491  	testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus()))
   492  	testutil.Equals(t, 1, len(endpointSet.GetStoreClients()))
   493  
   494  	endpoints.CloseOne(discoveredEndpointAddr[0])
   495  	endpointSet.Update(context.Background())
   496  	testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus()))
   497  	testutil.Equals(t, 0, len(endpointSet.GetStoreClients()))
   498  }
   499  
   500  func TestEndpointSetUpdate_EndpointComingOnline(t *testing.T) {
   501  	endpoints, err := startTestEndpoints([]testEndpointMeta{
   502  		{
   503  			err:          fmt.Errorf("endpoint unavailable"),
   504  			InfoResponse: sidecarInfo,
   505  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   506  				return nil
   507  			},
   508  		},
   509  	})
   510  	testutil.Ok(t, err)
   511  	defer endpoints.Close()
   512  
   513  	discoveredEndpointAddr := endpoints.EndpointAddresses()
   514  	endpointSet := makeEndpointSet(discoveredEndpointAddr, false, time.Now)
   515  	defer endpointSet.Close()
   516  
   517  	// Initial update.
   518  	endpointSet.Update(context.Background())
   519  	testutil.Equals(t, 0, len(endpointSet.GetEndpointStatus()))
   520  	testutil.Equals(t, 0, len(endpointSet.GetStoreClients()))
   521  
   522  	srvAddr := discoveredEndpointAddr[0]
   523  	endpoints.endpoints[srvAddr].setResponseError(nil)
   524  	endpointSet.Update(context.Background())
   525  	testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus()))
   526  	testutil.Equals(t, 1, len(endpointSet.GetStoreClients()))
   527  }
   528  
   529  func TestEndpointSetUpdate_StrictEndpointMetadata(t *testing.T) {
   530  	info := sidecarInfo
   531  	info.Store.MinTime = 111
   532  	info.Store.MaxTime = 222
   533  	endpoints, err := startTestEndpoints([]testEndpointMeta{
   534  		{
   535  			err:          fmt.Errorf("endpoint unavailable"),
   536  			InfoResponse: info,
   537  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   538  				return nil
   539  			},
   540  		},
   541  	})
   542  	testutil.Ok(t, err)
   543  	defer endpoints.Close()
   544  
   545  	discoveredEndpointAddr := endpoints.EndpointAddresses()
   546  	endpointSet := makeEndpointSet(discoveredEndpointAddr, true, time.Now)
   547  	defer endpointSet.Close()
   548  
   549  	addr := discoveredEndpointAddr[0]
   550  	// Initial update.
   551  	endpointSet.Update(context.Background())
   552  	testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus()))
   553  	testutil.Equals(t, int64(math.MinInt64), endpointSet.endpoints[addr].metadata.Store.MinTime)
   554  	testutil.Equals(t, int64(math.MaxInt64), endpointSet.endpoints[addr].metadata.Store.MaxTime)
   555  
   556  	endpoints.endpoints[addr].setResponseError(nil)
   557  	endpointSet.Update(context.Background())
   558  	testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus()))
   559  	testutil.Equals(t, info.Store.MinTime, endpointSet.endpoints[addr].metadata.Store.MinTime)
   560  	testutil.Equals(t, info.Store.MaxTime, endpointSet.endpoints[addr].metadata.Store.MaxTime)
   561  
   562  	endpoints.CloseOne(addr)
   563  	endpointSet.Update(context.Background())
   564  	testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus()))
   565  	testutil.Equals(t, info.Store.MinTime, endpointSet.endpoints[addr].metadata.Store.MinTime)
   566  	testutil.Equals(t, info.Store.MaxTime, endpointSet.endpoints[addr].metadata.Store.MaxTime)
   567  }
   568  
   569  func TestEndpointSetUpdate_PruneInactiveEndpoints(t *testing.T) {
   570  	testCases := []struct {
   571  		name      string
   572  		endpoints []testEndpointMeta
   573  		strict    bool
   574  
   575  		expectedEndpoints int
   576  	}{
   577  		{
   578  			name:   "non-strict endpoint",
   579  			strict: false,
   580  			endpoints: []testEndpointMeta{
   581  				{
   582  					InfoResponse: sidecarInfo,
   583  					extlsetFn: func(addr string) []labelpb.ZLabelSet {
   584  						return labelpb.ZLabelSetsFromPromLabels(
   585  							labels.FromStrings("addr", addr, "a", "b"),
   586  						)
   587  					},
   588  				},
   589  			},
   590  			expectedEndpoints: 0,
   591  		},
   592  		{
   593  			name:   "strict endpoint",
   594  			strict: true,
   595  			endpoints: []testEndpointMeta{
   596  				{
   597  					InfoResponse: sidecarInfo,
   598  					extlsetFn: func(addr string) []labelpb.ZLabelSet {
   599  						return labelpb.ZLabelSetsFromPromLabels(
   600  							labels.FromStrings("addr", addr, "a", "b"),
   601  						)
   602  					},
   603  				},
   604  			},
   605  			expectedEndpoints: 1,
   606  		},
   607  	}
   608  
   609  	for _, tc := range testCases {
   610  		t.Run(tc.name, func(t *testing.T) {
   611  			endpoints, err := startTestEndpoints(tc.endpoints)
   612  			testutil.Ok(t, err)
   613  			defer endpoints.Close()
   614  
   615  			updateTime := time.Now()
   616  			discoveredEndpointAddr := endpoints.EndpointAddresses()
   617  			endpointSet := makeEndpointSet(discoveredEndpointAddr, tc.strict, func() time.Time { return updateTime })
   618  			defer endpointSet.Close()
   619  
   620  			endpointSet.Update(context.Background())
   621  			testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus()))
   622  			testutil.Equals(t, 1, len(endpointSet.GetStoreClients()))
   623  
   624  			addr := discoveredEndpointAddr[0]
   625  			endpoints.endpoints[addr].setResponseError(errors.New("failed info request"))
   626  			endpoints.stores[addr].setResponseError(errors.New("failed info request"))
   627  			endpointSet.Update(context.Background())
   628  
   629  			updateTime = updateTime.Add(10 * time.Minute)
   630  			endpointSet.Update(context.Background())
   631  			testutil.Equals(t, tc.expectedEndpoints, len(endpointSet.GetEndpointStatus()))
   632  			testutil.Equals(t, tc.expectedEndpoints, len(endpointSet.GetStoreClients()))
   633  		})
   634  	}
   635  }
   636  
   637  func TestEndpointSetUpdate_AtomicEndpointAdditions(t *testing.T) {
   638  	numResponses := 4
   639  	metas := makeInfoResponses(numResponses)
   640  	metas[1].infoDelay = 2 * time.Second
   641  
   642  	endpoints, err := startTestEndpoints(metas)
   643  	testutil.Ok(t, err)
   644  	defer endpoints.Close()
   645  
   646  	updateTime := time.Now()
   647  	discoveredEndpointAddr := endpoints.EndpointAddresses()
   648  	endpointSet := makeEndpointSet(discoveredEndpointAddr, false, func() time.Time { return updateTime })
   649  	endpointSet.endpointInfoTimeout = 3 * time.Second
   650  	defer endpointSet.Close()
   651  
   652  	var wg sync.WaitGroup
   653  	wg.Add(1)
   654  	go func() {
   655  		defer wg.Done()
   656  		require.Never(t, func() bool {
   657  			numStatuses := len(endpointSet.GetStoreClients())
   658  			return numStatuses != numResponses && numStatuses != 0
   659  		}, 3*time.Second, 100*time.Millisecond)
   660  	}()
   661  
   662  	endpointSet.Update(context.Background())
   663  	testutil.Equals(t, numResponses, len(endpointSet.GetEndpointStatus()))
   664  	testutil.Equals(t, numResponses, len(endpointSet.GetStoreClients()))
   665  	wg.Wait()
   666  }
   667  
   668  func TestEndpointSetUpdate_AvailabilityScenarios(t *testing.T) {
   669  	endpoints, err := startTestEndpoints([]testEndpointMeta{
   670  		{
   671  			InfoResponse: sidecarInfo,
   672  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   673  				return []labelpb.ZLabelSet{
   674  					{
   675  						Labels: []labelpb.ZLabel{
   676  							{Name: "addr", Value: addr},
   677  						},
   678  					},
   679  					{
   680  						Labels: []labelpb.ZLabel{
   681  							{Name: "a", Value: "b"},
   682  						},
   683  					},
   684  				}
   685  			},
   686  		},
   687  		{
   688  			InfoResponse: sidecarInfo,
   689  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   690  				return []labelpb.ZLabelSet{
   691  					{
   692  						Labels: []labelpb.ZLabel{
   693  							{Name: "addr", Value: addr},
   694  						},
   695  					},
   696  					{
   697  						Labels: []labelpb.ZLabel{
   698  							{Name: "a", Value: "b"},
   699  						},
   700  					},
   701  				}
   702  			},
   703  		},
   704  		{
   705  			InfoResponse: queryInfo,
   706  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   707  				return []labelpb.ZLabelSet{
   708  					{
   709  						Labels: []labelpb.ZLabel{
   710  							{Name: "addr", Value: addr},
   711  						},
   712  					},
   713  					{
   714  						Labels: []labelpb.ZLabel{
   715  							{Name: "a", Value: "b"},
   716  						},
   717  					},
   718  				}
   719  			},
   720  		},
   721  	})
   722  	testutil.Ok(t, err)
   723  	defer endpoints.Close()
   724  
   725  	discoveredEndpointAddr := endpoints.EndpointAddresses()
   726  
   727  	now := time.Now()
   728  	nowFunc := func() time.Time { return now }
   729  	// Testing if duplicates can cause weird results.
   730  	discoveredEndpointAddr = append(discoveredEndpointAddr, discoveredEndpointAddr[0])
   731  	endpointSet := NewEndpointSet(nowFunc, nil, nil,
   732  		func() (specs []*GRPCEndpointSpec) {
   733  			for _, addr := range discoveredEndpointAddr {
   734  				specs = append(specs, NewGRPCEndpointSpec(addr, false))
   735  			}
   736  			return specs
   737  		},
   738  		testGRPCOpts, time.Minute, 2*time.Second)
   739  	defer endpointSet.Close()
   740  
   741  	// Initial update.
   742  	endpointSet.Update(context.Background())
   743  	testutil.Equals(t, 3, len(endpointSet.endpoints))
   744  
   745  	// Start with one not available.
   746  	endpoints.CloseOne(discoveredEndpointAddr[2])
   747  
   748  	// Should not matter how many of these we run.
   749  	endpointSet.Update(context.Background())
   750  	endpointSet.Update(context.Background())
   751  	testutil.Equals(t, 2, len(endpointSet.GetStoreClients()))
   752  	testutil.Equals(t, 3, len(endpointSet.GetEndpointStatus()))
   753  
   754  	for addr, e := range endpointSet.endpoints {
   755  		testutil.Equals(t, addr, e.addr)
   756  
   757  		lset := e.LabelSets()
   758  		testutil.Equals(t, 2, len(lset))
   759  		testutil.Equals(t, "addr", lset[0][0].Name)
   760  		testutil.Equals(t, addr, lset[0][0].Value)
   761  		testutil.Equals(t, "a", lset[1][0].Name)
   762  		testutil.Equals(t, "b", lset[1][0].Value)
   763  		assertRegisteredAPIs(t, endpoints.exposedAPIs[addr], e)
   764  	}
   765  
   766  	// Check stats.
   767  	expected := newEndpointAPIStats()
   768  	expected[component.Sidecar] = map[string]int{
   769  		fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[0]): 1,
   770  		fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[1]): 1,
   771  	}
   772  	testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes)
   773  
   774  	// Remove address from discovered and reset last check, which should ensure cleanup of status on next update.
   775  	now = now.Add(3 * time.Minute)
   776  	discoveredEndpointAddr = discoveredEndpointAddr[:len(discoveredEndpointAddr)-2]
   777  	endpointSet.Update(context.Background())
   778  	testutil.Equals(t, 2, len(endpointSet.endpoints))
   779  
   780  	endpoints.CloseOne(discoveredEndpointAddr[0])
   781  	delete(expected[component.Sidecar], fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[0]))
   782  
   783  	// We expect Update to tear down store client for closed store server.
   784  	endpointSet.Update(context.Background())
   785  	testutil.Equals(t, 1, len(endpointSet.GetStoreClients()), "only one service should respond just fine, so we expect one client to be ready.")
   786  
   787  	addr := discoveredEndpointAddr[1]
   788  	st, ok := endpointSet.endpoints[addr]
   789  	testutil.Assert(t, ok, "addr exist")
   790  	testutil.Equals(t, addr, st.addr)
   791  
   792  	lset := st.LabelSets()
   793  	testutil.Equals(t, 2, len(lset))
   794  	testutil.Equals(t, "addr", lset[0][0].Name)
   795  	testutil.Equals(t, addr, lset[0][0].Value)
   796  	testutil.Equals(t, "a", lset[1][0].Name)
   797  	testutil.Equals(t, "b", lset[1][0].Value)
   798  	testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes)
   799  
   800  	// New big batch of endpoints.
   801  	endpoint2, err := startTestEndpoints([]testEndpointMeta{
   802  		{
   803  			InfoResponse: queryInfo,
   804  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   805  				return []labelpb.ZLabelSet{
   806  					{
   807  						Labels: []labelpb.ZLabel{
   808  							{Name: "l1", Value: "v2"},
   809  							{Name: "l2", Value: "v3"},
   810  						},
   811  					},
   812  					{
   813  						Labels: []labelpb.ZLabel{
   814  							{Name: "l3", Value: "v4"},
   815  						},
   816  					},
   817  				}
   818  			},
   819  		},
   820  		{
   821  			// Duplicated Querier, in previous versions it would be deduplicated. Now it should be not.
   822  			InfoResponse: queryInfo,
   823  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   824  				return []labelpb.ZLabelSet{
   825  					{
   826  						Labels: []labelpb.ZLabel{
   827  							{Name: "l1", Value: "v2"},
   828  							{Name: "l2", Value: "v3"},
   829  						},
   830  					},
   831  					{
   832  						Labels: []labelpb.ZLabel{
   833  							{Name: "l3", Value: "v4"},
   834  						},
   835  					},
   836  				}
   837  			},
   838  		},
   839  		{
   840  			InfoResponse: sidecarInfo,
   841  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   842  				return []labelpb.ZLabelSet{
   843  					{
   844  						Labels: []labelpb.ZLabel{
   845  							{Name: "l1", Value: "v2"},
   846  							{Name: "l2", Value: "v3"},
   847  						},
   848  					},
   849  				}
   850  			},
   851  		},
   852  		{
   853  			// Duplicated Sidecar, in previous versions it would be deduplicated. Now it should be not.
   854  			InfoResponse: sidecarInfo,
   855  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   856  				return []labelpb.ZLabelSet{
   857  					{
   858  						Labels: []labelpb.ZLabel{
   859  							{Name: "l1", Value: "v2"},
   860  							{Name: "l2", Value: "v3"},
   861  						},
   862  					},
   863  				}
   864  			},
   865  		},
   866  		{
   867  			// Querier that duplicates with sidecar, in previous versions it would be deduplicated. Now it should be not.
   868  			InfoResponse: queryInfo,
   869  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   870  				return []labelpb.ZLabelSet{
   871  					{
   872  						Labels: []labelpb.ZLabel{
   873  							{Name: "l1", Value: "v2"},
   874  							{Name: "l2", Value: "v3"},
   875  						},
   876  					},
   877  				}
   878  			},
   879  		},
   880  		{
   881  			// Ruler that duplicates with sidecar, in previous versions it would be deduplicated. Now it should be not.
   882  			// Warning should be produced.
   883  			InfoResponse: ruleInfo,
   884  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   885  				return []labelpb.ZLabelSet{
   886  					{
   887  						Labels: []labelpb.ZLabel{
   888  							{Name: "l1", Value: "v2"},
   889  							{Name: "l2", Value: "v3"},
   890  						},
   891  					},
   892  				}
   893  			},
   894  		},
   895  		{
   896  			// Duplicated Rule, in previous versions it would be deduplicated. Now it should be not. Warning should be produced.
   897  			InfoResponse: ruleInfo,
   898  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   899  				return []labelpb.ZLabelSet{
   900  					{
   901  						Labels: []labelpb.ZLabel{
   902  							{Name: "l1", Value: "v2"},
   903  							{Name: "l2", Value: "v3"},
   904  						},
   905  					},
   906  				}
   907  			},
   908  		},
   909  		// Two pre v0.8.0 store gateway nodes, they don't have ext labels set.
   910  		{
   911  			InfoResponse: storeGWInfo,
   912  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   913  				return []labelpb.ZLabelSet{}
   914  			},
   915  		},
   916  		{
   917  			InfoResponse: storeGWInfo,
   918  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   919  				return []labelpb.ZLabelSet{}
   920  			},
   921  		},
   922  		// Regression tests against https://github.com/thanos-io/thanos/issues/1632: From v0.8.0 stores advertise labels.
   923  		// If the object storage handled by store gateway has only one sidecar we used to hitting issue.
   924  		{
   925  			InfoResponse: storeGWInfo,
   926  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   927  				return []labelpb.ZLabelSet{
   928  					{
   929  						Labels: []labelpb.ZLabel{
   930  							{Name: "l1", Value: "v2"},
   931  							{Name: "l2", Value: "v3"},
   932  						},
   933  					},
   934  					{
   935  						Labels: []labelpb.ZLabel{
   936  							{Name: "l3", Value: "v4"},
   937  						},
   938  					},
   939  				}
   940  			},
   941  		},
   942  		// Stores v0.8.1 has compatibility labels. Check if they are correctly removed.
   943  		{
   944  			InfoResponse: storeGWInfo,
   945  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   946  				return []labelpb.ZLabelSet{
   947  					{
   948  						Labels: []labelpb.ZLabel{
   949  							{Name: "l1", Value: "v2"},
   950  							{Name: "l2", Value: "v3"},
   951  						},
   952  					},
   953  					{
   954  						Labels: []labelpb.ZLabel{
   955  							{Name: "l3", Value: "v4"},
   956  						},
   957  					},
   958  					{
   959  						Labels: []labelpb.ZLabel{
   960  							{Name: store.CompatibilityTypeLabelName, Value: "store"},
   961  						},
   962  					},
   963  				}
   964  			},
   965  		},
   966  		// Duplicated store, in previous versions it would be deduplicated. Now it should be not.
   967  		{
   968  			InfoResponse: storeGWInfo,
   969  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   970  				return []labelpb.ZLabelSet{
   971  					{
   972  						Labels: []labelpb.ZLabel{
   973  							{Name: "l1", Value: "v2"},
   974  							{Name: "l2", Value: "v3"},
   975  						},
   976  					},
   977  					{
   978  						Labels: []labelpb.ZLabel{
   979  							{Name: "l3", Value: "v4"},
   980  						},
   981  					},
   982  					{
   983  						Labels: []labelpb.ZLabel{
   984  							{Name: store.CompatibilityTypeLabelName, Value: "store"},
   985  						},
   986  					},
   987  				}
   988  			},
   989  		},
   990  		{
   991  			InfoResponse: receiveInfo,
   992  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
   993  				return []labelpb.ZLabelSet{
   994  					{
   995  						Labels: []labelpb.ZLabel{
   996  							{Name: "l1", Value: "v2"},
   997  							{Name: "l2", Value: "v3"},
   998  						},
   999  					},
  1000  					{
  1001  						Labels: []labelpb.ZLabel{
  1002  							{Name: "l3", Value: "v4"},
  1003  						},
  1004  					},
  1005  				}
  1006  			},
  1007  		},
  1008  		// Duplicate receiver
  1009  		{
  1010  			InfoResponse: receiveInfo,
  1011  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1012  				return []labelpb.ZLabelSet{
  1013  					{
  1014  						Labels: []labelpb.ZLabel{
  1015  							{Name: "l1", Value: "v2"},
  1016  							{Name: "l2", Value: "v3"},
  1017  						},
  1018  					},
  1019  					{
  1020  						Labels: []labelpb.ZLabel{
  1021  							{Name: "l3", Value: "v4"},
  1022  						},
  1023  					},
  1024  				}
  1025  			},
  1026  		},
  1027  	})
  1028  	testutil.Ok(t, err)
  1029  	defer endpoint2.Close()
  1030  
  1031  	discoveredEndpointAddr = append(discoveredEndpointAddr, endpoint2.EndpointAddresses()...)
  1032  
  1033  	// New stores should be loaded.
  1034  	endpointSet.Update(context.Background())
  1035  	testutil.Equals(t, 1+len(endpoint2.srvs), len(endpointSet.GetStoreClients()))
  1036  
  1037  	// Check stats.
  1038  	expected = newEndpointAPIStats()
  1039  	expected[component.Query] = map[string]int{
  1040  		"{l1=\"v2\", l2=\"v3\"}":             1,
  1041  		"{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 2,
  1042  	}
  1043  	expected[component.Rule] = map[string]int{
  1044  		"{l1=\"v2\", l2=\"v3\"}": 2,
  1045  	}
  1046  	expected[component.Sidecar] = map[string]int{
  1047  		fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[1]): 1,
  1048  		"{l1=\"v2\", l2=\"v3\"}": 2,
  1049  	}
  1050  	expected[component.Store] = map[string]int{
  1051  		"":                                   2,
  1052  		"{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 3,
  1053  	}
  1054  	expected[component.Receive] = map[string]int{
  1055  		"{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 2,
  1056  	}
  1057  	testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes)
  1058  
  1059  	// Close remaining endpoint from previous batch
  1060  	endpoints.CloseOne(discoveredEndpointAddr[1])
  1061  	endpointSet.Update(context.Background())
  1062  
  1063  	for addr, e := range endpointSet.getQueryableRefs() {
  1064  		testutil.Equals(t, addr, e.addr)
  1065  		assertRegisteredAPIs(t, endpoint2.exposedAPIs[addr], e)
  1066  	}
  1067  
  1068  	// Check statuses.
  1069  	testutil.Equals(t, 2+len(endpoint2.srvs), len(endpointSet.GetEndpointStatus()))
  1070  }
  1071  
  1072  func TestEndpointSet_Update_NoneAvailable(t *testing.T) {
  1073  	endpoints, err := startTestEndpoints([]testEndpointMeta{
  1074  		{
  1075  			InfoResponse: sidecarInfo,
  1076  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1077  				return []labelpb.ZLabelSet{
  1078  					{
  1079  						Labels: []labelpb.ZLabel{
  1080  							{
  1081  								Name:  "addr",
  1082  								Value: addr,
  1083  							},
  1084  						},
  1085  					},
  1086  				}
  1087  			},
  1088  		},
  1089  		{
  1090  			InfoResponse: sidecarInfo,
  1091  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1092  				return []labelpb.ZLabelSet{
  1093  					{
  1094  						Labels: []labelpb.ZLabel{
  1095  							{
  1096  								Name:  "addr",
  1097  								Value: addr,
  1098  							},
  1099  						},
  1100  					},
  1101  				}
  1102  			},
  1103  		},
  1104  	})
  1105  	testutil.Ok(t, err)
  1106  	defer endpoints.Close()
  1107  
  1108  	initialEndpointAddr := endpoints.EndpointAddresses()
  1109  	endpoints.CloseOne(initialEndpointAddr[0])
  1110  	endpoints.CloseOne(initialEndpointAddr[1])
  1111  
  1112  	endpointSet := NewEndpointSet(time.Now, nil, nil,
  1113  		func() (specs []*GRPCEndpointSpec) {
  1114  			for _, addr := range initialEndpointAddr {
  1115  				specs = append(specs, NewGRPCEndpointSpec(addr, false))
  1116  			}
  1117  			return specs
  1118  		},
  1119  		testGRPCOpts, time.Minute, 2*time.Second)
  1120  	defer endpointSet.Close()
  1121  
  1122  	// Should not matter how many of these we run.
  1123  	endpointSet.Update(context.Background())
  1124  	endpointSet.Update(context.Background())
  1125  	testutil.Equals(t, 0, len(endpointSet.GetStoreClients()), "none of services should respond just fine, so we expect no client to be ready.")
  1126  
  1127  	// Leak test will ensure that we don't keep client connection around.
  1128  	expected := newEndpointAPIStats()
  1129  	testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes)
  1130  
  1131  }
  1132  
  1133  // TestEndpoint_Update_QuerierStrict tests what happens when the strict mode is enabled/disabled.
  1134  func TestEndpoint_Update_QuerierStrict(t *testing.T) {
  1135  	endpoints, err := startTestEndpoints([]testEndpointMeta{
  1136  		{
  1137  			InfoResponse: &infopb.InfoResponse{
  1138  				ComponentType: component.Sidecar.String(),
  1139  				Store: &infopb.StoreInfo{
  1140  					MinTime: 12345,
  1141  					MaxTime: 54321,
  1142  				},
  1143  				Exemplars:      &infopb.ExemplarsInfo{},
  1144  				Rules:          &infopb.RulesInfo{},
  1145  				MetricMetadata: &infopb.MetricMetadataInfo{},
  1146  				Targets:        &infopb.TargetsInfo{},
  1147  			},
  1148  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1149  				return []labelpb.ZLabelSet{
  1150  					{
  1151  						Labels: []labelpb.ZLabel{
  1152  							{
  1153  								Name:  "addr",
  1154  								Value: addr,
  1155  							},
  1156  						},
  1157  					},
  1158  				}
  1159  			},
  1160  		},
  1161  		{
  1162  			InfoResponse: &infopb.InfoResponse{
  1163  				ComponentType: component.Sidecar.String(),
  1164  				Store: &infopb.StoreInfo{
  1165  					MinTime: 66666,
  1166  					MaxTime: 77777,
  1167  				},
  1168  				Exemplars:      &infopb.ExemplarsInfo{},
  1169  				Rules:          &infopb.RulesInfo{},
  1170  				MetricMetadata: &infopb.MetricMetadataInfo{},
  1171  				Targets:        &infopb.TargetsInfo{},
  1172  			},
  1173  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1174  				return []labelpb.ZLabelSet{
  1175  					{
  1176  						Labels: []labelpb.ZLabel{
  1177  							{
  1178  								Name:  "addr",
  1179  								Value: addr,
  1180  							},
  1181  						},
  1182  					},
  1183  				}
  1184  			},
  1185  		},
  1186  		// Slow store.
  1187  		{
  1188  			InfoResponse: &infopb.InfoResponse{
  1189  				ComponentType: component.Sidecar.String(),
  1190  				Store: &infopb.StoreInfo{
  1191  					MinTime: 65644,
  1192  					MaxTime: 77777,
  1193  				},
  1194  				Exemplars:      &infopb.ExemplarsInfo{},
  1195  				Rules:          &infopb.RulesInfo{},
  1196  				MetricMetadata: &infopb.MetricMetadataInfo{},
  1197  				Targets:        &infopb.TargetsInfo{},
  1198  			},
  1199  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1200  				return []labelpb.ZLabelSet{
  1201  					{
  1202  						Labels: []labelpb.ZLabel{
  1203  							{
  1204  								Name:  "addr",
  1205  								Value: addr,
  1206  							},
  1207  						},
  1208  					},
  1209  				}
  1210  			},
  1211  			infoDelay: 2 * time.Second,
  1212  		},
  1213  	})
  1214  
  1215  	testutil.Ok(t, err)
  1216  	defer endpoints.Close()
  1217  
  1218  	discoveredEndpointAddr := endpoints.EndpointAddresses()
  1219  
  1220  	staticEndpointAddr := discoveredEndpointAddr[0]
  1221  	slowStaticEndpointAddr := discoveredEndpointAddr[2]
  1222  	endpointSet := NewEndpointSet(time.Now, nil, nil, func() (specs []*GRPCEndpointSpec) {
  1223  		return []*GRPCEndpointSpec{
  1224  			NewGRPCEndpointSpec(discoveredEndpointAddr[0], true),
  1225  			NewGRPCEndpointSpec(discoveredEndpointAddr[1], false),
  1226  			NewGRPCEndpointSpec(discoveredEndpointAddr[2], true),
  1227  		}
  1228  	}, testGRPCOpts, time.Minute, 1*time.Second)
  1229  	defer endpointSet.Close()
  1230  
  1231  	// Initial update.
  1232  	endpointSet.Update(context.Background())
  1233  	testutil.Equals(t, 3, len(endpointSet.endpoints), "three clients must be available for running nodes")
  1234  
  1235  	// The endpoint has not responded to the info call and is assumed to cover everything.
  1236  	curMin, curMax := endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime
  1237  	testutil.Assert(t, endpointSet.endpoints[slowStaticEndpointAddr].cc.GetState().String() != "SHUTDOWN", "slow store's connection should not be closed")
  1238  	testutil.Equals(t, int64(math.MinInt64), curMin)
  1239  	testutil.Equals(t, int64(math.MaxInt64), curMax)
  1240  
  1241  	// The endpoint is statically defined + strict mode is enabled
  1242  	// so its client + information must be retained.
  1243  	curMin, curMax = endpointSet.endpoints[staticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MaxTime
  1244  	testutil.Equals(t, int64(12345), curMin, "got incorrect minimum time")
  1245  	testutil.Equals(t, int64(54321), curMax, "got incorrect minimum time")
  1246  
  1247  	// Successfully retrieve the information and observe minTime/maxTime updating.
  1248  	endpointSet.endpointInfoTimeout = 3 * time.Second
  1249  	endpointSet.Update(context.Background())
  1250  	updatedCurMin, updatedCurMax := endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime
  1251  	testutil.Equals(t, int64(65644), updatedCurMin)
  1252  	testutil.Equals(t, int64(77777), updatedCurMax)
  1253  	endpointSet.endpointInfoTimeout = 1 * time.Second
  1254  
  1255  	// Turn off the endpoints.
  1256  	endpoints.Close()
  1257  
  1258  	// Update again many times. Should not matter WRT the static one.
  1259  	endpointSet.Update(context.Background())
  1260  	endpointSet.Update(context.Background())
  1261  	endpointSet.Update(context.Background())
  1262  
  1263  	// Check that the information is the same.
  1264  	testutil.Equals(t, 2, len(endpointSet.GetStoreClients()), "two static clients must remain available")
  1265  	testutil.Equals(t, curMin, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MinTime, "minimum time reported by the store node is different")
  1266  	testutil.Equals(t, curMax, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MaxTime, "minimum time reported by the store node is different")
  1267  	testutil.NotOk(t, endpointSet.endpoints[staticEndpointAddr].status.LastError.originalErr)
  1268  
  1269  	testutil.Equals(t, updatedCurMin, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, "minimum time reported by the store node is different")
  1270  	testutil.Equals(t, updatedCurMax, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime, "minimum time reported by the store node is different")
  1271  }
  1272  
  1273  func TestEndpointSet_APIs_Discovery(t *testing.T) {
  1274  	endpoints, err := startTestEndpoints([]testEndpointMeta{
  1275  		{
  1276  			InfoResponse: sidecarInfo,
  1277  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1278  				return []labelpb.ZLabelSet{}
  1279  			},
  1280  		},
  1281  		{
  1282  			InfoResponse: ruleInfo,
  1283  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1284  				return []labelpb.ZLabelSet{}
  1285  			},
  1286  		},
  1287  		{
  1288  			InfoResponse: receiveInfo,
  1289  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1290  				return []labelpb.ZLabelSet{}
  1291  			},
  1292  		},
  1293  		{
  1294  			InfoResponse: storeGWInfo,
  1295  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1296  				return []labelpb.ZLabelSet{}
  1297  			},
  1298  		},
  1299  		{
  1300  			InfoResponse: queryInfo,
  1301  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1302  				return []labelpb.ZLabelSet{}
  1303  			},
  1304  		},
  1305  	})
  1306  	testutil.Ok(t, err)
  1307  	defer endpoints.Close()
  1308  
  1309  	type discoveryState struct {
  1310  		name                   string
  1311  		endpointSpec           func() []*GRPCEndpointSpec
  1312  		expectedStores         int
  1313  		expectedRules          int
  1314  		expectedTarget         int
  1315  		expectedMetricMetadata int
  1316  		expectedExemplars      int
  1317  		expectedQueryAPIs      int
  1318  	}
  1319  
  1320  	for _, tc := range []struct {
  1321  		states []discoveryState
  1322  		name   string
  1323  	}{
  1324  		{
  1325  			name: "All endpoints discovered concurrently",
  1326  			states: []discoveryState{
  1327  				{
  1328  					name:         "no endpoints",
  1329  					endpointSpec: nil,
  1330  				},
  1331  				{
  1332  					name: "Sidecar, Ruler, Querier, Receiver and StoreGW discovered",
  1333  					endpointSpec: func() []*GRPCEndpointSpec {
  1334  						endpointSpec := make([]*GRPCEndpointSpec, 0, len(endpoints.orderAddrs))
  1335  						for _, addr := range endpoints.orderAddrs {
  1336  							endpointSpec = append(endpointSpec, NewGRPCEndpointSpec(addr, false))
  1337  						}
  1338  						return endpointSpec
  1339  					},
  1340  					expectedStores:         5, // sidecar + querier + receiver + storeGW + ruler
  1341  					expectedRules:          3, // sidecar + querier + ruler
  1342  					expectedTarget:         2, // sidecar + querier
  1343  					expectedMetricMetadata: 2, // sidecar + querier
  1344  					expectedExemplars:      3, // sidecar + querier + receiver
  1345  					expectedQueryAPIs:      1, // querier
  1346  				},
  1347  			},
  1348  		},
  1349  		{
  1350  			name: "Sidecar discovery first, eventually Ruler discovered and then Sidecar removed",
  1351  			states: []discoveryState{
  1352  				{
  1353  					name:         "no stores",
  1354  					endpointSpec: nil,
  1355  				},
  1356  				{
  1357  					name: "Sidecar discovered, no Ruler discovered",
  1358  					endpointSpec: func() []*GRPCEndpointSpec {
  1359  						return []*GRPCEndpointSpec{
  1360  							NewGRPCEndpointSpec(endpoints.orderAddrs[0], false),
  1361  						}
  1362  					},
  1363  					expectedStores:         1, // sidecar
  1364  					expectedRules:          1, // sidecar
  1365  					expectedTarget:         1, // sidecar
  1366  					expectedMetricMetadata: 1, // sidecar
  1367  					expectedExemplars:      1, // sidecar
  1368  				},
  1369  				{
  1370  					name: "Ruler discovered",
  1371  					endpointSpec: func() []*GRPCEndpointSpec {
  1372  						return []*GRPCEndpointSpec{
  1373  							NewGRPCEndpointSpec(endpoints.orderAddrs[0], false),
  1374  							NewGRPCEndpointSpec(endpoints.orderAddrs[1], false),
  1375  						}
  1376  					},
  1377  					expectedStores:         2, // sidecar + ruler
  1378  					expectedRules:          2, // sidecar + ruler
  1379  					expectedTarget:         1, // sidecar
  1380  					expectedMetricMetadata: 1, // sidecar
  1381  					expectedExemplars:      1, // sidecar
  1382  				},
  1383  				{
  1384  					name: "Sidecar removed",
  1385  					endpointSpec: func() []*GRPCEndpointSpec {
  1386  						return []*GRPCEndpointSpec{
  1387  							NewGRPCEndpointSpec(endpoints.orderAddrs[1], false),
  1388  						}
  1389  					},
  1390  					expectedStores: 1, // ruler
  1391  					expectedRules:  1, // ruler
  1392  				},
  1393  			},
  1394  		},
  1395  	} {
  1396  		t.Run(tc.name, func(t *testing.T) {
  1397  			currentState := 0
  1398  
  1399  			endpointSet := NewEndpointSet(time.Now, nil, nil,
  1400  				func() []*GRPCEndpointSpec {
  1401  					if tc.states[currentState].endpointSpec == nil {
  1402  						return nil
  1403  					}
  1404  
  1405  					return tc.states[currentState].endpointSpec()
  1406  				},
  1407  				testGRPCOpts, time.Minute, 2*time.Second)
  1408  
  1409  			defer endpointSet.Close()
  1410  
  1411  			for {
  1412  				endpointSet.Update(context.Background())
  1413  
  1414  				gotStores := 0
  1415  				gotRules := 0
  1416  				gotTarget := 0
  1417  				gotExemplars := 0
  1418  				gotMetricMetadata := 0
  1419  				gotQueryAPIs := 0
  1420  
  1421  				for _, er := range endpointSet.endpoints {
  1422  					if er.HasStoreAPI() {
  1423  						gotStores += 1
  1424  					}
  1425  					if er.HasRulesAPI() {
  1426  						gotRules += 1
  1427  					}
  1428  					if er.HasTargetsAPI() {
  1429  						gotTarget += 1
  1430  					}
  1431  					if er.HasExemplarsAPI() {
  1432  						gotExemplars += 1
  1433  					}
  1434  					if er.HasMetricMetadataAPI() {
  1435  						gotMetricMetadata += 1
  1436  					}
  1437  					if er.HasQueryAPI() {
  1438  						gotQueryAPIs += 1
  1439  					}
  1440  				}
  1441  				testutil.Equals(
  1442  					t,
  1443  					tc.states[currentState].expectedStores,
  1444  					gotStores,
  1445  					"unexepected discovered storeAPIs in state %q",
  1446  					tc.states[currentState].name)
  1447  				testutil.Equals(
  1448  					t,
  1449  					tc.states[currentState].expectedRules,
  1450  					gotRules,
  1451  					"unexepected discovered rulesAPIs in state %q",
  1452  					tc.states[currentState].name)
  1453  				testutil.Equals(
  1454  					t,
  1455  					tc.states[currentState].expectedTarget,
  1456  					gotTarget,
  1457  					"unexepected discovered targetAPIs in state %q",
  1458  					tc.states[currentState].name,
  1459  				)
  1460  				testutil.Equals(
  1461  					t,
  1462  					tc.states[currentState].expectedMetricMetadata,
  1463  					gotMetricMetadata,
  1464  					"unexepected discovered metricMetadataAPIs in state %q",
  1465  					tc.states[currentState].name,
  1466  				)
  1467  				testutil.Equals(
  1468  					t,
  1469  					tc.states[currentState].expectedExemplars,
  1470  					gotExemplars,
  1471  					"unexepected discovered ExemplarsAPIs in state %q",
  1472  					tc.states[currentState].name,
  1473  				)
  1474  				testutil.Equals(
  1475  					t,
  1476  					tc.states[currentState].expectedQueryAPIs,
  1477  					gotQueryAPIs,
  1478  					"unexepected discovered QueryAPIs in state %q",
  1479  					tc.states[currentState].name,
  1480  				)
  1481  
  1482  				currentState = currentState + 1
  1483  				if len(tc.states) == currentState {
  1484  					break
  1485  				}
  1486  			}
  1487  		})
  1488  	}
  1489  }
  1490  
  1491  func makeInfoResponses(n int) []testEndpointMeta {
  1492  	responses := make([]testEndpointMeta, 0, n)
  1493  	for i := 0; i < n; i++ {
  1494  		responses = append(responses, testEndpointMeta{
  1495  			InfoResponse: sidecarInfo,
  1496  			extlsetFn: func(addr string) []labelpb.ZLabelSet {
  1497  				return labelpb.ZLabelSetsFromPromLabels(
  1498  					labels.FromStrings("addr", addr, "a", "b"),
  1499  				)
  1500  			},
  1501  		})
  1502  	}
  1503  
  1504  	return responses
  1505  }
  1506  
  1507  type errThatMarshalsToEmptyDict struct {
  1508  	msg string
  1509  }
  1510  
  1511  // MarshalJSON marshals the error and returns and empty dict, not the error string.
  1512  func (e *errThatMarshalsToEmptyDict) MarshalJSON() ([]byte, error) {
  1513  	return json.Marshal(map[string]string{})
  1514  }
  1515  
  1516  // Error returns the original, underlying string.
  1517  func (e *errThatMarshalsToEmptyDict) Error() string {
  1518  	return e.msg
  1519  }
  1520  
  1521  // Test highlights that without wrapping the error, it is marshaled to empty dict {}, not its message.
  1522  func TestEndpointStringError(t *testing.T) {
  1523  	dictErr := &errThatMarshalsToEmptyDict{msg: "Error message"}
  1524  	stringErr := &stringError{originalErr: dictErr}
  1525  
  1526  	endpointstatusMock := map[string]error{}
  1527  	endpointstatusMock["dictErr"] = dictErr
  1528  	endpointstatusMock["stringErr"] = stringErr
  1529  
  1530  	b, err := json.Marshal(endpointstatusMock)
  1531  
  1532  	testutil.Ok(t, err)
  1533  	testutil.Equals(t, []byte(`{"dictErr":{},"stringErr":"Error message"}`), b, "expected to get proper results")
  1534  }
  1535  
  1536  // Errors that usually marshal to empty dict should return the original error string.
  1537  func TestUpdateEndpointStateLastError(t *testing.T) {
  1538  	tcs := []struct {
  1539  		InputError      error
  1540  		ExpectedLastErr string
  1541  	}{
  1542  		{errors.New("normal_err"), `"normal_err"`},
  1543  		{nil, `null`},
  1544  		{&errThatMarshalsToEmptyDict{"the error message"}, `"the error message"`},
  1545  	}
  1546  
  1547  	for _, tc := range tcs {
  1548  		mockEndpointRef := &endpointRef{
  1549  			addr: "mockedStore",
  1550  			metadata: &endpointMetadata{
  1551  				&infopb.InfoResponse{},
  1552  			},
  1553  		}
  1554  
  1555  		mockEndpointRef.update(time.Now, mockEndpointRef.metadata, tc.InputError)
  1556  
  1557  		b, err := json.Marshal(mockEndpointRef.status.LastError)
  1558  		testutil.Ok(t, err)
  1559  		testutil.Equals(t, tc.ExpectedLastErr, string(b))
  1560  	}
  1561  }
  1562  
  1563  func TestUpdateEndpointStateForgetsPreviousErrors(t *testing.T) {
  1564  	mockEndpointRef := &endpointRef{
  1565  		addr: "mockedStore",
  1566  		metadata: &endpointMetadata{
  1567  			&infopb.InfoResponse{},
  1568  		},
  1569  	}
  1570  
  1571  	mockEndpointRef.update(time.Now, mockEndpointRef.metadata, errors.New("test err"))
  1572  
  1573  	b, err := json.Marshal(mockEndpointRef.status.LastError)
  1574  	testutil.Ok(t, err)
  1575  	testutil.Equals(t, `"test err"`, string(b))
  1576  
  1577  	// updating status without and error should clear the previous one.
  1578  	mockEndpointRef.update(time.Now, mockEndpointRef.metadata, nil)
  1579  
  1580  	b, err = json.Marshal(mockEndpointRef.status.LastError)
  1581  	testutil.Ok(t, err)
  1582  	testutil.Equals(t, `null`, string(b))
  1583  }
  1584  
  1585  func makeEndpointSet(discoveredEndpointAddr []string, strict bool, now nowFunc, metricLabels ...string) *EndpointSet {
  1586  	endpointSet := NewEndpointSet(now, nil, nil,
  1587  		func() (specs []*GRPCEndpointSpec) {
  1588  			for _, addr := range discoveredEndpointAddr {
  1589  				specs = append(specs, NewGRPCEndpointSpec(addr, strict))
  1590  			}
  1591  			return specs
  1592  		},
  1593  		testGRPCOpts, time.Minute, time.Second, metricLabels...)
  1594  	return endpointSet
  1595  }
  1596  
  1597  func exposedAPIs(c string) *APIs {
  1598  	switch c {
  1599  	case component.Sidecar.String():
  1600  		return &APIs{
  1601  			store:          true,
  1602  			target:         true,
  1603  			rules:          true,
  1604  			metricMetadata: true,
  1605  			exemplars:      true,
  1606  		}
  1607  	case component.Query.String():
  1608  		return &APIs{
  1609  			store:          true,
  1610  			target:         true,
  1611  			rules:          true,
  1612  			metricMetadata: true,
  1613  			exemplars:      true,
  1614  		}
  1615  	case component.Receive.String():
  1616  		return &APIs{
  1617  			store:     true,
  1618  			exemplars: true,
  1619  		}
  1620  	case component.Rule.String():
  1621  		return &APIs{
  1622  			store: true,
  1623  			rules: true,
  1624  		}
  1625  	case component.Store.String():
  1626  		return &APIs{
  1627  			store: true,
  1628  		}
  1629  	}
  1630  	return &APIs{}
  1631  }
  1632  
  1633  func assertRegisteredAPIs(t *testing.T, expectedAPIs *APIs, er *endpointRef) {
  1634  	testutil.Equals(t, expectedAPIs.store, er.HasStoreAPI())
  1635  	testutil.Equals(t, expectedAPIs.rules, er.HasRulesAPI())
  1636  	testutil.Equals(t, expectedAPIs.target, er.HasTargetsAPI())
  1637  	testutil.Equals(t, expectedAPIs.metricMetadata, er.HasMetricMetadataAPI())
  1638  	testutil.Equals(t, expectedAPIs.exemplars, er.HasExemplarsAPI())
  1639  }
  1640  
  1641  // Regression test for: https://github.com/thanos-io/thanos/issues/4766.
  1642  func TestDeadlockLocking(t *testing.T) {
  1643  	t.Parallel()
  1644  
  1645  	mockEndpointRef := &endpointRef{
  1646  		addr: "mockedStore",
  1647  		metadata: &endpointMetadata{
  1648  			&infopb.InfoResponse{},
  1649  		},
  1650  	}
  1651  
  1652  	g := &errgroup.Group{}
  1653  	deadline := time.Now().Add(3 * time.Second)
  1654  
  1655  	g.Go(func() error {
  1656  		for {
  1657  			if time.Now().After(deadline) {
  1658  				break
  1659  			}
  1660  			mockEndpointRef.update(time.Now, &endpointMetadata{
  1661  				InfoResponse: &infopb.InfoResponse{},
  1662  			}, nil)
  1663  		}
  1664  		return nil
  1665  	})
  1666  
  1667  	g.Go(func() error {
  1668  		for {
  1669  			if time.Now().After(deadline) {
  1670  				break
  1671  			}
  1672  			mockEndpointRef.HasStoreAPI()
  1673  			mockEndpointRef.HasExemplarsAPI()
  1674  			mockEndpointRef.HasMetricMetadataAPI()
  1675  			mockEndpointRef.HasRulesAPI()
  1676  			mockEndpointRef.HasTargetsAPI()
  1677  		}
  1678  		return nil
  1679  	})
  1680  
  1681  	testutil.Ok(t, g.Wait())
  1682  }