github.com/thanos-io/thanos@v0.32.5/pkg/query/endpointset_test.go (about) 1 // Copyright (c) The Thanos Authors. 2 // Licensed under the Apache License 2.0. 3 4 package query 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 "math" 11 "net" 12 "strings" 13 "sync" 14 "testing" 15 "time" 16 17 "github.com/stretchr/testify/require" 18 19 "github.com/prometheus/prometheus/model/labels" 20 "github.com/thanos-io/thanos/pkg/store" 21 22 "golang.org/x/sync/errgroup" 23 "google.golang.org/grpc" 24 "google.golang.org/grpc/credentials/insecure" 25 26 "github.com/efficientgo/core/testutil" 27 "github.com/pkg/errors" 28 promtestutil "github.com/prometheus/client_golang/prometheus/testutil" 29 "github.com/thanos-io/thanos/pkg/component" 30 "github.com/thanos-io/thanos/pkg/info/infopb" 31 "github.com/thanos-io/thanos/pkg/store/labelpb" 32 "github.com/thanos-io/thanos/pkg/store/storepb" 33 ) 34 35 var testGRPCOpts = []grpc.DialOption{ 36 grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(math.MaxInt32)), 37 grpc.WithTransportCredentials(insecure.NewCredentials()), 38 } 39 40 var ( 41 sidecarInfo = &infopb.InfoResponse{ 42 ComponentType: component.Sidecar.String(), 43 Store: &infopb.StoreInfo{ 44 MinTime: math.MinInt64, 45 MaxTime: math.MaxInt64, 46 }, 47 Exemplars: &infopb.ExemplarsInfo{}, 48 Rules: &infopb.RulesInfo{}, 49 MetricMetadata: &infopb.MetricMetadataInfo{}, 50 Targets: &infopb.TargetsInfo{}, 51 } 52 queryInfo = &infopb.InfoResponse{ 53 ComponentType: component.Query.String(), 54 Store: &infopb.StoreInfo{ 55 MinTime: math.MinInt64, 56 MaxTime: math.MaxInt64, 57 }, 58 Exemplars: &infopb.ExemplarsInfo{}, 59 Rules: &infopb.RulesInfo{}, 60 MetricMetadata: &infopb.MetricMetadataInfo{}, 61 Targets: &infopb.TargetsInfo{}, 62 Query: &infopb.QueryAPIInfo{}, 63 } 64 ruleInfo = &infopb.InfoResponse{ 65 ComponentType: component.Rule.String(), 66 Store: &infopb.StoreInfo{ 67 MinTime: math.MinInt64, 68 MaxTime: math.MaxInt64, 69 }, 70 Rules: &infopb.RulesInfo{}, 71 } 72 storeGWInfo = &infopb.InfoResponse{ 73 ComponentType: component.Store.String(), 74 Store: &infopb.StoreInfo{ 75 MinTime: math.MinInt64, 76 MaxTime: math.MaxInt64, 77 }, 78 } 79 receiveInfo = &infopb.InfoResponse{ 80 ComponentType: component.Receive.String(), 81 Store: &infopb.StoreInfo{ 82 MinTime: math.MinInt64, 83 MaxTime: math.MaxInt64, 84 }, 85 Exemplars: &infopb.ExemplarsInfo{}, 86 } 87 ) 88 89 type mockedEndpoint struct { 90 infoDelay time.Duration 91 info infopb.InfoResponse 92 err error 93 } 94 95 func (c *mockedEndpoint) setResponseError(err error) { 96 c.err = err 97 } 98 99 func (c *mockedEndpoint) Info(ctx context.Context, r *infopb.InfoRequest) (*infopb.InfoResponse, error) { 100 if c.err != nil { 101 return nil, c.err 102 } 103 104 select { 105 case <-ctx.Done(): 106 return nil, context.Canceled 107 case <-time.After(c.infoDelay): 108 } 109 110 return &c.info, nil 111 } 112 113 type mockedStoreSrv struct { 114 infoDelay time.Duration 115 info storepb.InfoResponse 116 err error 117 } 118 119 func (s *mockedStoreSrv) setResponseError(err error) { 120 s.err = err 121 } 122 123 func (s *mockedStoreSrv) Info(ctx context.Context, _ *storepb.InfoRequest) (*storepb.InfoResponse, error) { 124 if s.err != nil { 125 return nil, s.err 126 } 127 128 select { 129 case <-ctx.Done(): 130 return nil, context.Canceled 131 case <-time.After(s.infoDelay): 132 } 133 134 return &s.info, nil 135 } 136 func (s *mockedStoreSrv) Series(*storepb.SeriesRequest, storepb.Store_SeriesServer) error { 137 return nil 138 } 139 func (s *mockedStoreSrv) LabelNames(context.Context, *storepb.LabelNamesRequest) (*storepb.LabelNamesResponse, error) { 140 return nil, nil 141 } 142 func (s *mockedStoreSrv) LabelValues(context.Context, *storepb.LabelValuesRequest) (*storepb.LabelValuesResponse, error) { 143 return nil, nil 144 } 145 146 type APIs struct { 147 store bool 148 metricMetadata bool 149 rules bool 150 target bool 151 exemplars bool 152 } 153 154 type testEndpointMeta struct { 155 *infopb.InfoResponse 156 extlsetFn func(addr string) []labelpb.ZLabelSet 157 infoDelay time.Duration 158 err error 159 } 160 161 type testEndpoints struct { 162 srvs map[string]*grpc.Server 163 endpoints map[string]*mockedEndpoint 164 stores map[string]*mockedStoreSrv 165 orderAddrs []string 166 exposedAPIs map[string]*APIs 167 } 168 169 func componentTypeToStoreType(componentType string) storepb.StoreType { 170 switch componentType { 171 case component.Query.String(): 172 return storepb.StoreType_QUERY 173 case component.Rule.String(): 174 return storepb.StoreType_RULE 175 case component.Sidecar.String(): 176 return storepb.StoreType_SIDECAR 177 case component.Store.String(): 178 return storepb.StoreType_STORE 179 case component.Receive.String(): 180 return storepb.StoreType_RECEIVE 181 case component.Debug.String(): 182 return storepb.StoreType_DEBUG 183 default: 184 return storepb.StoreType_STORE 185 } 186 } 187 188 func startTestEndpoints(testEndpointMeta []testEndpointMeta) (*testEndpoints, error) { 189 e := &testEndpoints{ 190 srvs: map[string]*grpc.Server{}, 191 endpoints: map[string]*mockedEndpoint{}, 192 stores: map[string]*mockedStoreSrv{}, 193 exposedAPIs: map[string]*APIs{}, 194 } 195 196 for _, meta := range testEndpointMeta { 197 listener, err := net.Listen("tcp", "127.0.0.1:0") 198 if err != nil { 199 // Close so far started servers. 200 e.Close() 201 return nil, err 202 } 203 204 srv := grpc.NewServer() 205 addr := listener.Addr().String() 206 207 storeSrv := &mockedStoreSrv{ 208 err: meta.err, 209 info: storepb.InfoResponse{ 210 LabelSets: meta.extlsetFn(listener.Addr().String()), 211 StoreType: componentTypeToStoreType(meta.ComponentType), 212 }, 213 infoDelay: meta.infoDelay, 214 } 215 216 if meta.Store != nil { 217 storeSrv.info.MinTime = meta.Store.MinTime 218 storeSrv.info.MaxTime = meta.Store.MaxTime 219 } 220 221 endpointSrv := &mockedEndpoint{ 222 err: meta.err, 223 info: infopb.InfoResponse{ 224 LabelSets: meta.extlsetFn(listener.Addr().String()), 225 Store: meta.Store, 226 MetricMetadata: meta.MetricMetadata, 227 Rules: meta.Rules, 228 Targets: meta.Targets, 229 Exemplars: meta.Exemplars, 230 Query: meta.Query, 231 ComponentType: meta.ComponentType, 232 }, 233 infoDelay: meta.infoDelay, 234 } 235 infopb.RegisterInfoServer(srv, endpointSrv) 236 storepb.RegisterStoreServer(srv, storeSrv) 237 go func() { 238 _ = srv.Serve(listener) 239 }() 240 241 e.exposedAPIs[addr] = exposedAPIs(meta.ComponentType) 242 e.srvs[addr] = srv 243 e.endpoints[addr] = endpointSrv 244 e.stores[addr] = storeSrv 245 e.orderAddrs = append(e.orderAddrs, listener.Addr().String()) 246 } 247 248 return e, nil 249 } 250 251 func (e *testEndpoints) EndpointAddresses() []string { 252 var endpoints []string 253 endpoints = append(endpoints, e.orderAddrs...) 254 return endpoints 255 } 256 257 func (e *testEndpoints) Close() { 258 for _, srv := range e.srvs { 259 srv.Stop() 260 } 261 e.srvs = nil 262 } 263 264 func (e *testEndpoints) CloseOne(addr string) { 265 srv, ok := e.srvs[addr] 266 if !ok { 267 return 268 } 269 270 srv.Stop() 271 delete(e.srvs, addr) 272 } 273 274 func TestTruncateExtLabels(t *testing.T) { 275 const testLength = 10 276 277 for _, tc := range []struct { 278 labelToTruncate string 279 expectedOutput string 280 }{ 281 { 282 labelToTruncate: "{abc}", 283 expectedOutput: "{abc}", 284 }, 285 { 286 labelToTruncate: "{abcdefgh}", 287 expectedOutput: "{abcdefgh}", 288 }, 289 { 290 labelToTruncate: "{abcdefghij}", 291 expectedOutput: "{abcdefgh}", 292 }, 293 { 294 labelToTruncate: "{abcde花}", 295 expectedOutput: "{abcde花}", 296 }, 297 { 298 labelToTruncate: "{abcde花朵}", 299 expectedOutput: "{abcde花}", 300 }, 301 { 302 labelToTruncate: "{abcde花fghij}", 303 expectedOutput: "{abcde花}", 304 }, 305 } { 306 t.Run(tc.labelToTruncate, func(t *testing.T) { 307 got := truncateExtLabels(tc.labelToTruncate, testLength) 308 testutil.Equals(t, tc.expectedOutput, got) 309 testutil.Assert(t, len(got) <= testLength) 310 }) 311 } 312 } 313 314 func TestEndpointSetUpdate(t *testing.T) { 315 const metricsMeta = ` 316 # HELP thanos_store_nodes_grpc_connections Number of gRPC connection to Store APIs. Opened connection means healthy store APIs available for Querier. 317 # TYPE thanos_store_nodes_grpc_connections gauge 318 ` 319 testCases := []struct { 320 name string 321 endpoints []testEndpointMeta 322 strict bool 323 connLabels []string 324 325 expectedEndpoints int 326 expectedConnMetrics string 327 }{ 328 { 329 name: "available endpoint", 330 endpoints: []testEndpointMeta{ 331 { 332 InfoResponse: sidecarInfo, 333 extlsetFn: func(addr string) []labelpb.ZLabelSet { 334 return labelpb.ZLabelSetsFromPromLabels( 335 labels.FromStrings("addr", addr, "a", "b"), 336 ) 337 }, 338 }, 339 }, 340 connLabels: []string{"store_type"}, 341 342 expectedEndpoints: 1, 343 expectedConnMetrics: metricsMeta + 344 ` 345 thanos_store_nodes_grpc_connections{store_type="sidecar"} 1 346 `, 347 }, 348 { 349 name: "unavailable endpoint", 350 endpoints: []testEndpointMeta{ 351 { 352 err: fmt.Errorf("endpoint unavailable"), 353 InfoResponse: sidecarInfo, 354 extlsetFn: func(addr string) []labelpb.ZLabelSet { 355 return labelpb.ZLabelSetsFromPromLabels( 356 labels.FromStrings("addr", addr, "a", "b"), 357 ) 358 }, 359 }, 360 }, 361 362 expectedEndpoints: 0, 363 expectedConnMetrics: "", 364 }, 365 { 366 name: "slow endpoint", 367 endpoints: []testEndpointMeta{ 368 { 369 infoDelay: 5 * time.Second, 370 InfoResponse: sidecarInfo, 371 extlsetFn: func(addr string) []labelpb.ZLabelSet { 372 return labelpb.ZLabelSetsFromPromLabels( 373 labels.FromStrings("addr", addr, "a", "b"), 374 ) 375 }, 376 }, 377 }, 378 379 expectedEndpoints: 0, 380 expectedConnMetrics: "", 381 }, 382 { 383 name: "strict endpoint", 384 endpoints: []testEndpointMeta{ 385 { 386 InfoResponse: sidecarInfo, 387 extlsetFn: func(addr string) []labelpb.ZLabelSet { 388 return labelpb.ZLabelSetsFromPromLabels( 389 labels.FromStrings("addr", addr, "a", "b"), 390 ) 391 }, 392 }, 393 }, 394 strict: true, 395 connLabels: []string{"store_type"}, 396 expectedEndpoints: 1, 397 expectedConnMetrics: metricsMeta + 398 ` 399 thanos_store_nodes_grpc_connections{store_type="sidecar"} 1 400 `, 401 }, 402 { 403 name: "long external labels", 404 endpoints: []testEndpointMeta{ 405 { 406 InfoResponse: sidecarInfo, 407 // Simulate very long external labels. 408 extlsetFn: func(addr string) []labelpb.ZLabelSet { 409 sLabel := []string{} 410 for i := 0; i < 1000; i++ { 411 sLabel = append(sLabel, "lbl") 412 sLabel = append(sLabel, "val") 413 } 414 return labelpb.ZLabelSetsFromPromLabels( 415 labels.FromStrings(sLabel...), 416 ) 417 }, 418 }, 419 }, 420 expectedEndpoints: 1, 421 expectedConnMetrics: metricsMeta + ` 422 thanos_store_nodes_grpc_connections{external_labels="{lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val\", lbl=\"val}",store_type="sidecar"} 1 423 `, 424 }, 425 } 426 427 for _, tc := range testCases { 428 t.Run(tc.name, func(t *testing.T) { 429 endpoints, err := startTestEndpoints(tc.endpoints) 430 testutil.Ok(t, err) 431 defer endpoints.Close() 432 433 discoveredEndpointAddr := endpoints.EndpointAddresses() 434 // Specify only "store_type" to exclude "external_labels". 435 endpointSet := makeEndpointSet(discoveredEndpointAddr, tc.strict, time.Now, tc.connLabels...) 436 defer endpointSet.Close() 437 438 endpointSet.Update(context.Background()) 439 testutil.Equals(t, tc.expectedEndpoints, len(endpointSet.GetEndpointStatus())) 440 testutil.Equals(t, tc.expectedEndpoints, len(endpointSet.GetStoreClients())) 441 442 testutil.Ok(t, promtestutil.CollectAndCompare(endpointSet.endpointsMetric, strings.NewReader(tc.expectedConnMetrics))) 443 }) 444 } 445 } 446 447 func TestEndpointSetUpdate_DuplicateSpecs(t *testing.T) { 448 endpoints, err := startTestEndpoints([]testEndpointMeta{ 449 { 450 InfoResponse: sidecarInfo, 451 extlsetFn: func(addr string) []labelpb.ZLabelSet { 452 return labelpb.ZLabelSetsFromPromLabels( 453 labels.FromStrings("addr", addr, "a", "b"), 454 ) 455 }, 456 }, 457 }) 458 testutil.Ok(t, err) 459 defer endpoints.Close() 460 461 discoveredEndpointAddr := endpoints.EndpointAddresses() 462 discoveredEndpointAddr = append(discoveredEndpointAddr, discoveredEndpointAddr[0]) 463 464 endpointSet := makeEndpointSet(discoveredEndpointAddr, false, time.Now) 465 defer endpointSet.Close() 466 467 endpointSet.Update(context.Background()) 468 testutil.Equals(t, 1, len(endpointSet.endpoints)) 469 } 470 471 func TestEndpointSetUpdate_EndpointGoingAway(t *testing.T) { 472 endpoints, err := startTestEndpoints([]testEndpointMeta{ 473 { 474 InfoResponse: sidecarInfo, 475 extlsetFn: func(addr string) []labelpb.ZLabelSet { 476 return labelpb.ZLabelSetsFromPromLabels( 477 labels.FromStrings("addr", addr, "a", "b"), 478 ) 479 }, 480 }, 481 }) 482 testutil.Ok(t, err) 483 defer endpoints.Close() 484 485 discoveredEndpointAddr := endpoints.EndpointAddresses() 486 endpointSet := makeEndpointSet(discoveredEndpointAddr, false, time.Now) 487 defer endpointSet.Close() 488 489 // Initial update. 490 endpointSet.Update(context.Background()) 491 testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus())) 492 testutil.Equals(t, 1, len(endpointSet.GetStoreClients())) 493 494 endpoints.CloseOne(discoveredEndpointAddr[0]) 495 endpointSet.Update(context.Background()) 496 testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus())) 497 testutil.Equals(t, 0, len(endpointSet.GetStoreClients())) 498 } 499 500 func TestEndpointSetUpdate_EndpointComingOnline(t *testing.T) { 501 endpoints, err := startTestEndpoints([]testEndpointMeta{ 502 { 503 err: fmt.Errorf("endpoint unavailable"), 504 InfoResponse: sidecarInfo, 505 extlsetFn: func(addr string) []labelpb.ZLabelSet { 506 return nil 507 }, 508 }, 509 }) 510 testutil.Ok(t, err) 511 defer endpoints.Close() 512 513 discoveredEndpointAddr := endpoints.EndpointAddresses() 514 endpointSet := makeEndpointSet(discoveredEndpointAddr, false, time.Now) 515 defer endpointSet.Close() 516 517 // Initial update. 518 endpointSet.Update(context.Background()) 519 testutil.Equals(t, 0, len(endpointSet.GetEndpointStatus())) 520 testutil.Equals(t, 0, len(endpointSet.GetStoreClients())) 521 522 srvAddr := discoveredEndpointAddr[0] 523 endpoints.endpoints[srvAddr].setResponseError(nil) 524 endpointSet.Update(context.Background()) 525 testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus())) 526 testutil.Equals(t, 1, len(endpointSet.GetStoreClients())) 527 } 528 529 func TestEndpointSetUpdate_StrictEndpointMetadata(t *testing.T) { 530 info := sidecarInfo 531 info.Store.MinTime = 111 532 info.Store.MaxTime = 222 533 endpoints, err := startTestEndpoints([]testEndpointMeta{ 534 { 535 err: fmt.Errorf("endpoint unavailable"), 536 InfoResponse: info, 537 extlsetFn: func(addr string) []labelpb.ZLabelSet { 538 return nil 539 }, 540 }, 541 }) 542 testutil.Ok(t, err) 543 defer endpoints.Close() 544 545 discoveredEndpointAddr := endpoints.EndpointAddresses() 546 endpointSet := makeEndpointSet(discoveredEndpointAddr, true, time.Now) 547 defer endpointSet.Close() 548 549 addr := discoveredEndpointAddr[0] 550 // Initial update. 551 endpointSet.Update(context.Background()) 552 testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus())) 553 testutil.Equals(t, int64(math.MinInt64), endpointSet.endpoints[addr].metadata.Store.MinTime) 554 testutil.Equals(t, int64(math.MaxInt64), endpointSet.endpoints[addr].metadata.Store.MaxTime) 555 556 endpoints.endpoints[addr].setResponseError(nil) 557 endpointSet.Update(context.Background()) 558 testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus())) 559 testutil.Equals(t, info.Store.MinTime, endpointSet.endpoints[addr].metadata.Store.MinTime) 560 testutil.Equals(t, info.Store.MaxTime, endpointSet.endpoints[addr].metadata.Store.MaxTime) 561 562 endpoints.CloseOne(addr) 563 endpointSet.Update(context.Background()) 564 testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus())) 565 testutil.Equals(t, info.Store.MinTime, endpointSet.endpoints[addr].metadata.Store.MinTime) 566 testutil.Equals(t, info.Store.MaxTime, endpointSet.endpoints[addr].metadata.Store.MaxTime) 567 } 568 569 func TestEndpointSetUpdate_PruneInactiveEndpoints(t *testing.T) { 570 testCases := []struct { 571 name string 572 endpoints []testEndpointMeta 573 strict bool 574 575 expectedEndpoints int 576 }{ 577 { 578 name: "non-strict endpoint", 579 strict: false, 580 endpoints: []testEndpointMeta{ 581 { 582 InfoResponse: sidecarInfo, 583 extlsetFn: func(addr string) []labelpb.ZLabelSet { 584 return labelpb.ZLabelSetsFromPromLabels( 585 labels.FromStrings("addr", addr, "a", "b"), 586 ) 587 }, 588 }, 589 }, 590 expectedEndpoints: 0, 591 }, 592 { 593 name: "strict endpoint", 594 strict: true, 595 endpoints: []testEndpointMeta{ 596 { 597 InfoResponse: sidecarInfo, 598 extlsetFn: func(addr string) []labelpb.ZLabelSet { 599 return labelpb.ZLabelSetsFromPromLabels( 600 labels.FromStrings("addr", addr, "a", "b"), 601 ) 602 }, 603 }, 604 }, 605 expectedEndpoints: 1, 606 }, 607 } 608 609 for _, tc := range testCases { 610 t.Run(tc.name, func(t *testing.T) { 611 endpoints, err := startTestEndpoints(tc.endpoints) 612 testutil.Ok(t, err) 613 defer endpoints.Close() 614 615 updateTime := time.Now() 616 discoveredEndpointAddr := endpoints.EndpointAddresses() 617 endpointSet := makeEndpointSet(discoveredEndpointAddr, tc.strict, func() time.Time { return updateTime }) 618 defer endpointSet.Close() 619 620 endpointSet.Update(context.Background()) 621 testutil.Equals(t, 1, len(endpointSet.GetEndpointStatus())) 622 testutil.Equals(t, 1, len(endpointSet.GetStoreClients())) 623 624 addr := discoveredEndpointAddr[0] 625 endpoints.endpoints[addr].setResponseError(errors.New("failed info request")) 626 endpoints.stores[addr].setResponseError(errors.New("failed info request")) 627 endpointSet.Update(context.Background()) 628 629 updateTime = updateTime.Add(10 * time.Minute) 630 endpointSet.Update(context.Background()) 631 testutil.Equals(t, tc.expectedEndpoints, len(endpointSet.GetEndpointStatus())) 632 testutil.Equals(t, tc.expectedEndpoints, len(endpointSet.GetStoreClients())) 633 }) 634 } 635 } 636 637 func TestEndpointSetUpdate_AtomicEndpointAdditions(t *testing.T) { 638 numResponses := 4 639 metas := makeInfoResponses(numResponses) 640 metas[1].infoDelay = 2 * time.Second 641 642 endpoints, err := startTestEndpoints(metas) 643 testutil.Ok(t, err) 644 defer endpoints.Close() 645 646 updateTime := time.Now() 647 discoveredEndpointAddr := endpoints.EndpointAddresses() 648 endpointSet := makeEndpointSet(discoveredEndpointAddr, false, func() time.Time { return updateTime }) 649 endpointSet.endpointInfoTimeout = 3 * time.Second 650 defer endpointSet.Close() 651 652 var wg sync.WaitGroup 653 wg.Add(1) 654 go func() { 655 defer wg.Done() 656 require.Never(t, func() bool { 657 numStatuses := len(endpointSet.GetStoreClients()) 658 return numStatuses != numResponses && numStatuses != 0 659 }, 3*time.Second, 100*time.Millisecond) 660 }() 661 662 endpointSet.Update(context.Background()) 663 testutil.Equals(t, numResponses, len(endpointSet.GetEndpointStatus())) 664 testutil.Equals(t, numResponses, len(endpointSet.GetStoreClients())) 665 wg.Wait() 666 } 667 668 func TestEndpointSetUpdate_AvailabilityScenarios(t *testing.T) { 669 endpoints, err := startTestEndpoints([]testEndpointMeta{ 670 { 671 InfoResponse: sidecarInfo, 672 extlsetFn: func(addr string) []labelpb.ZLabelSet { 673 return []labelpb.ZLabelSet{ 674 { 675 Labels: []labelpb.ZLabel{ 676 {Name: "addr", Value: addr}, 677 }, 678 }, 679 { 680 Labels: []labelpb.ZLabel{ 681 {Name: "a", Value: "b"}, 682 }, 683 }, 684 } 685 }, 686 }, 687 { 688 InfoResponse: sidecarInfo, 689 extlsetFn: func(addr string) []labelpb.ZLabelSet { 690 return []labelpb.ZLabelSet{ 691 { 692 Labels: []labelpb.ZLabel{ 693 {Name: "addr", Value: addr}, 694 }, 695 }, 696 { 697 Labels: []labelpb.ZLabel{ 698 {Name: "a", Value: "b"}, 699 }, 700 }, 701 } 702 }, 703 }, 704 { 705 InfoResponse: queryInfo, 706 extlsetFn: func(addr string) []labelpb.ZLabelSet { 707 return []labelpb.ZLabelSet{ 708 { 709 Labels: []labelpb.ZLabel{ 710 {Name: "addr", Value: addr}, 711 }, 712 }, 713 { 714 Labels: []labelpb.ZLabel{ 715 {Name: "a", Value: "b"}, 716 }, 717 }, 718 } 719 }, 720 }, 721 }) 722 testutil.Ok(t, err) 723 defer endpoints.Close() 724 725 discoveredEndpointAddr := endpoints.EndpointAddresses() 726 727 now := time.Now() 728 nowFunc := func() time.Time { return now } 729 // Testing if duplicates can cause weird results. 730 discoveredEndpointAddr = append(discoveredEndpointAddr, discoveredEndpointAddr[0]) 731 endpointSet := NewEndpointSet(nowFunc, nil, nil, 732 func() (specs []*GRPCEndpointSpec) { 733 for _, addr := range discoveredEndpointAddr { 734 specs = append(specs, NewGRPCEndpointSpec(addr, false)) 735 } 736 return specs 737 }, 738 testGRPCOpts, time.Minute, 2*time.Second) 739 defer endpointSet.Close() 740 741 // Initial update. 742 endpointSet.Update(context.Background()) 743 testutil.Equals(t, 3, len(endpointSet.endpoints)) 744 745 // Start with one not available. 746 endpoints.CloseOne(discoveredEndpointAddr[2]) 747 748 // Should not matter how many of these we run. 749 endpointSet.Update(context.Background()) 750 endpointSet.Update(context.Background()) 751 testutil.Equals(t, 2, len(endpointSet.GetStoreClients())) 752 testutil.Equals(t, 3, len(endpointSet.GetEndpointStatus())) 753 754 for addr, e := range endpointSet.endpoints { 755 testutil.Equals(t, addr, e.addr) 756 757 lset := e.LabelSets() 758 testutil.Equals(t, 2, len(lset)) 759 testutil.Equals(t, "addr", lset[0][0].Name) 760 testutil.Equals(t, addr, lset[0][0].Value) 761 testutil.Equals(t, "a", lset[1][0].Name) 762 testutil.Equals(t, "b", lset[1][0].Value) 763 assertRegisteredAPIs(t, endpoints.exposedAPIs[addr], e) 764 } 765 766 // Check stats. 767 expected := newEndpointAPIStats() 768 expected[component.Sidecar] = map[string]int{ 769 fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[0]): 1, 770 fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[1]): 1, 771 } 772 testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes) 773 774 // Remove address from discovered and reset last check, which should ensure cleanup of status on next update. 775 now = now.Add(3 * time.Minute) 776 discoveredEndpointAddr = discoveredEndpointAddr[:len(discoveredEndpointAddr)-2] 777 endpointSet.Update(context.Background()) 778 testutil.Equals(t, 2, len(endpointSet.endpoints)) 779 780 endpoints.CloseOne(discoveredEndpointAddr[0]) 781 delete(expected[component.Sidecar], fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[0])) 782 783 // We expect Update to tear down store client for closed store server. 784 endpointSet.Update(context.Background()) 785 testutil.Equals(t, 1, len(endpointSet.GetStoreClients()), "only one service should respond just fine, so we expect one client to be ready.") 786 787 addr := discoveredEndpointAddr[1] 788 st, ok := endpointSet.endpoints[addr] 789 testutil.Assert(t, ok, "addr exist") 790 testutil.Equals(t, addr, st.addr) 791 792 lset := st.LabelSets() 793 testutil.Equals(t, 2, len(lset)) 794 testutil.Equals(t, "addr", lset[0][0].Name) 795 testutil.Equals(t, addr, lset[0][0].Value) 796 testutil.Equals(t, "a", lset[1][0].Name) 797 testutil.Equals(t, "b", lset[1][0].Value) 798 testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes) 799 800 // New big batch of endpoints. 801 endpoint2, err := startTestEndpoints([]testEndpointMeta{ 802 { 803 InfoResponse: queryInfo, 804 extlsetFn: func(addr string) []labelpb.ZLabelSet { 805 return []labelpb.ZLabelSet{ 806 { 807 Labels: []labelpb.ZLabel{ 808 {Name: "l1", Value: "v2"}, 809 {Name: "l2", Value: "v3"}, 810 }, 811 }, 812 { 813 Labels: []labelpb.ZLabel{ 814 {Name: "l3", Value: "v4"}, 815 }, 816 }, 817 } 818 }, 819 }, 820 { 821 // Duplicated Querier, in previous versions it would be deduplicated. Now it should be not. 822 InfoResponse: queryInfo, 823 extlsetFn: func(addr string) []labelpb.ZLabelSet { 824 return []labelpb.ZLabelSet{ 825 { 826 Labels: []labelpb.ZLabel{ 827 {Name: "l1", Value: "v2"}, 828 {Name: "l2", Value: "v3"}, 829 }, 830 }, 831 { 832 Labels: []labelpb.ZLabel{ 833 {Name: "l3", Value: "v4"}, 834 }, 835 }, 836 } 837 }, 838 }, 839 { 840 InfoResponse: sidecarInfo, 841 extlsetFn: func(addr string) []labelpb.ZLabelSet { 842 return []labelpb.ZLabelSet{ 843 { 844 Labels: []labelpb.ZLabel{ 845 {Name: "l1", Value: "v2"}, 846 {Name: "l2", Value: "v3"}, 847 }, 848 }, 849 } 850 }, 851 }, 852 { 853 // Duplicated Sidecar, in previous versions it would be deduplicated. Now it should be not. 854 InfoResponse: sidecarInfo, 855 extlsetFn: func(addr string) []labelpb.ZLabelSet { 856 return []labelpb.ZLabelSet{ 857 { 858 Labels: []labelpb.ZLabel{ 859 {Name: "l1", Value: "v2"}, 860 {Name: "l2", Value: "v3"}, 861 }, 862 }, 863 } 864 }, 865 }, 866 { 867 // Querier that duplicates with sidecar, in previous versions it would be deduplicated. Now it should be not. 868 InfoResponse: queryInfo, 869 extlsetFn: func(addr string) []labelpb.ZLabelSet { 870 return []labelpb.ZLabelSet{ 871 { 872 Labels: []labelpb.ZLabel{ 873 {Name: "l1", Value: "v2"}, 874 {Name: "l2", Value: "v3"}, 875 }, 876 }, 877 } 878 }, 879 }, 880 { 881 // Ruler that duplicates with sidecar, in previous versions it would be deduplicated. Now it should be not. 882 // Warning should be produced. 883 InfoResponse: ruleInfo, 884 extlsetFn: func(addr string) []labelpb.ZLabelSet { 885 return []labelpb.ZLabelSet{ 886 { 887 Labels: []labelpb.ZLabel{ 888 {Name: "l1", Value: "v2"}, 889 {Name: "l2", Value: "v3"}, 890 }, 891 }, 892 } 893 }, 894 }, 895 { 896 // Duplicated Rule, in previous versions it would be deduplicated. Now it should be not. Warning should be produced. 897 InfoResponse: ruleInfo, 898 extlsetFn: func(addr string) []labelpb.ZLabelSet { 899 return []labelpb.ZLabelSet{ 900 { 901 Labels: []labelpb.ZLabel{ 902 {Name: "l1", Value: "v2"}, 903 {Name: "l2", Value: "v3"}, 904 }, 905 }, 906 } 907 }, 908 }, 909 // Two pre v0.8.0 store gateway nodes, they don't have ext labels set. 910 { 911 InfoResponse: storeGWInfo, 912 extlsetFn: func(addr string) []labelpb.ZLabelSet { 913 return []labelpb.ZLabelSet{} 914 }, 915 }, 916 { 917 InfoResponse: storeGWInfo, 918 extlsetFn: func(addr string) []labelpb.ZLabelSet { 919 return []labelpb.ZLabelSet{} 920 }, 921 }, 922 // Regression tests against https://github.com/thanos-io/thanos/issues/1632: From v0.8.0 stores advertise labels. 923 // If the object storage handled by store gateway has only one sidecar we used to hitting issue. 924 { 925 InfoResponse: storeGWInfo, 926 extlsetFn: func(addr string) []labelpb.ZLabelSet { 927 return []labelpb.ZLabelSet{ 928 { 929 Labels: []labelpb.ZLabel{ 930 {Name: "l1", Value: "v2"}, 931 {Name: "l2", Value: "v3"}, 932 }, 933 }, 934 { 935 Labels: []labelpb.ZLabel{ 936 {Name: "l3", Value: "v4"}, 937 }, 938 }, 939 } 940 }, 941 }, 942 // Stores v0.8.1 has compatibility labels. Check if they are correctly removed. 943 { 944 InfoResponse: storeGWInfo, 945 extlsetFn: func(addr string) []labelpb.ZLabelSet { 946 return []labelpb.ZLabelSet{ 947 { 948 Labels: []labelpb.ZLabel{ 949 {Name: "l1", Value: "v2"}, 950 {Name: "l2", Value: "v3"}, 951 }, 952 }, 953 { 954 Labels: []labelpb.ZLabel{ 955 {Name: "l3", Value: "v4"}, 956 }, 957 }, 958 { 959 Labels: []labelpb.ZLabel{ 960 {Name: store.CompatibilityTypeLabelName, Value: "store"}, 961 }, 962 }, 963 } 964 }, 965 }, 966 // Duplicated store, in previous versions it would be deduplicated. Now it should be not. 967 { 968 InfoResponse: storeGWInfo, 969 extlsetFn: func(addr string) []labelpb.ZLabelSet { 970 return []labelpb.ZLabelSet{ 971 { 972 Labels: []labelpb.ZLabel{ 973 {Name: "l1", Value: "v2"}, 974 {Name: "l2", Value: "v3"}, 975 }, 976 }, 977 { 978 Labels: []labelpb.ZLabel{ 979 {Name: "l3", Value: "v4"}, 980 }, 981 }, 982 { 983 Labels: []labelpb.ZLabel{ 984 {Name: store.CompatibilityTypeLabelName, Value: "store"}, 985 }, 986 }, 987 } 988 }, 989 }, 990 { 991 InfoResponse: receiveInfo, 992 extlsetFn: func(addr string) []labelpb.ZLabelSet { 993 return []labelpb.ZLabelSet{ 994 { 995 Labels: []labelpb.ZLabel{ 996 {Name: "l1", Value: "v2"}, 997 {Name: "l2", Value: "v3"}, 998 }, 999 }, 1000 { 1001 Labels: []labelpb.ZLabel{ 1002 {Name: "l3", Value: "v4"}, 1003 }, 1004 }, 1005 } 1006 }, 1007 }, 1008 // Duplicate receiver 1009 { 1010 InfoResponse: receiveInfo, 1011 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1012 return []labelpb.ZLabelSet{ 1013 { 1014 Labels: []labelpb.ZLabel{ 1015 {Name: "l1", Value: "v2"}, 1016 {Name: "l2", Value: "v3"}, 1017 }, 1018 }, 1019 { 1020 Labels: []labelpb.ZLabel{ 1021 {Name: "l3", Value: "v4"}, 1022 }, 1023 }, 1024 } 1025 }, 1026 }, 1027 }) 1028 testutil.Ok(t, err) 1029 defer endpoint2.Close() 1030 1031 discoveredEndpointAddr = append(discoveredEndpointAddr, endpoint2.EndpointAddresses()...) 1032 1033 // New stores should be loaded. 1034 endpointSet.Update(context.Background()) 1035 testutil.Equals(t, 1+len(endpoint2.srvs), len(endpointSet.GetStoreClients())) 1036 1037 // Check stats. 1038 expected = newEndpointAPIStats() 1039 expected[component.Query] = map[string]int{ 1040 "{l1=\"v2\", l2=\"v3\"}": 1, 1041 "{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 2, 1042 } 1043 expected[component.Rule] = map[string]int{ 1044 "{l1=\"v2\", l2=\"v3\"}": 2, 1045 } 1046 expected[component.Sidecar] = map[string]int{ 1047 fmt.Sprintf("{a=\"b\"},{addr=\"%s\"}", discoveredEndpointAddr[1]): 1, 1048 "{l1=\"v2\", l2=\"v3\"}": 2, 1049 } 1050 expected[component.Store] = map[string]int{ 1051 "": 2, 1052 "{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 3, 1053 } 1054 expected[component.Receive] = map[string]int{ 1055 "{l1=\"v2\", l2=\"v3\"},{l3=\"v4\"}": 2, 1056 } 1057 testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes) 1058 1059 // Close remaining endpoint from previous batch 1060 endpoints.CloseOne(discoveredEndpointAddr[1]) 1061 endpointSet.Update(context.Background()) 1062 1063 for addr, e := range endpointSet.getQueryableRefs() { 1064 testutil.Equals(t, addr, e.addr) 1065 assertRegisteredAPIs(t, endpoint2.exposedAPIs[addr], e) 1066 } 1067 1068 // Check statuses. 1069 testutil.Equals(t, 2+len(endpoint2.srvs), len(endpointSet.GetEndpointStatus())) 1070 } 1071 1072 func TestEndpointSet_Update_NoneAvailable(t *testing.T) { 1073 endpoints, err := startTestEndpoints([]testEndpointMeta{ 1074 { 1075 InfoResponse: sidecarInfo, 1076 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1077 return []labelpb.ZLabelSet{ 1078 { 1079 Labels: []labelpb.ZLabel{ 1080 { 1081 Name: "addr", 1082 Value: addr, 1083 }, 1084 }, 1085 }, 1086 } 1087 }, 1088 }, 1089 { 1090 InfoResponse: sidecarInfo, 1091 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1092 return []labelpb.ZLabelSet{ 1093 { 1094 Labels: []labelpb.ZLabel{ 1095 { 1096 Name: "addr", 1097 Value: addr, 1098 }, 1099 }, 1100 }, 1101 } 1102 }, 1103 }, 1104 }) 1105 testutil.Ok(t, err) 1106 defer endpoints.Close() 1107 1108 initialEndpointAddr := endpoints.EndpointAddresses() 1109 endpoints.CloseOne(initialEndpointAddr[0]) 1110 endpoints.CloseOne(initialEndpointAddr[1]) 1111 1112 endpointSet := NewEndpointSet(time.Now, nil, nil, 1113 func() (specs []*GRPCEndpointSpec) { 1114 for _, addr := range initialEndpointAddr { 1115 specs = append(specs, NewGRPCEndpointSpec(addr, false)) 1116 } 1117 return specs 1118 }, 1119 testGRPCOpts, time.Minute, 2*time.Second) 1120 defer endpointSet.Close() 1121 1122 // Should not matter how many of these we run. 1123 endpointSet.Update(context.Background()) 1124 endpointSet.Update(context.Background()) 1125 testutil.Equals(t, 0, len(endpointSet.GetStoreClients()), "none of services should respond just fine, so we expect no client to be ready.") 1126 1127 // Leak test will ensure that we don't keep client connection around. 1128 expected := newEndpointAPIStats() 1129 testutil.Equals(t, expected, endpointSet.endpointsMetric.storeNodes) 1130 1131 } 1132 1133 // TestEndpoint_Update_QuerierStrict tests what happens when the strict mode is enabled/disabled. 1134 func TestEndpoint_Update_QuerierStrict(t *testing.T) { 1135 endpoints, err := startTestEndpoints([]testEndpointMeta{ 1136 { 1137 InfoResponse: &infopb.InfoResponse{ 1138 ComponentType: component.Sidecar.String(), 1139 Store: &infopb.StoreInfo{ 1140 MinTime: 12345, 1141 MaxTime: 54321, 1142 }, 1143 Exemplars: &infopb.ExemplarsInfo{}, 1144 Rules: &infopb.RulesInfo{}, 1145 MetricMetadata: &infopb.MetricMetadataInfo{}, 1146 Targets: &infopb.TargetsInfo{}, 1147 }, 1148 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1149 return []labelpb.ZLabelSet{ 1150 { 1151 Labels: []labelpb.ZLabel{ 1152 { 1153 Name: "addr", 1154 Value: addr, 1155 }, 1156 }, 1157 }, 1158 } 1159 }, 1160 }, 1161 { 1162 InfoResponse: &infopb.InfoResponse{ 1163 ComponentType: component.Sidecar.String(), 1164 Store: &infopb.StoreInfo{ 1165 MinTime: 66666, 1166 MaxTime: 77777, 1167 }, 1168 Exemplars: &infopb.ExemplarsInfo{}, 1169 Rules: &infopb.RulesInfo{}, 1170 MetricMetadata: &infopb.MetricMetadataInfo{}, 1171 Targets: &infopb.TargetsInfo{}, 1172 }, 1173 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1174 return []labelpb.ZLabelSet{ 1175 { 1176 Labels: []labelpb.ZLabel{ 1177 { 1178 Name: "addr", 1179 Value: addr, 1180 }, 1181 }, 1182 }, 1183 } 1184 }, 1185 }, 1186 // Slow store. 1187 { 1188 InfoResponse: &infopb.InfoResponse{ 1189 ComponentType: component.Sidecar.String(), 1190 Store: &infopb.StoreInfo{ 1191 MinTime: 65644, 1192 MaxTime: 77777, 1193 }, 1194 Exemplars: &infopb.ExemplarsInfo{}, 1195 Rules: &infopb.RulesInfo{}, 1196 MetricMetadata: &infopb.MetricMetadataInfo{}, 1197 Targets: &infopb.TargetsInfo{}, 1198 }, 1199 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1200 return []labelpb.ZLabelSet{ 1201 { 1202 Labels: []labelpb.ZLabel{ 1203 { 1204 Name: "addr", 1205 Value: addr, 1206 }, 1207 }, 1208 }, 1209 } 1210 }, 1211 infoDelay: 2 * time.Second, 1212 }, 1213 }) 1214 1215 testutil.Ok(t, err) 1216 defer endpoints.Close() 1217 1218 discoveredEndpointAddr := endpoints.EndpointAddresses() 1219 1220 staticEndpointAddr := discoveredEndpointAddr[0] 1221 slowStaticEndpointAddr := discoveredEndpointAddr[2] 1222 endpointSet := NewEndpointSet(time.Now, nil, nil, func() (specs []*GRPCEndpointSpec) { 1223 return []*GRPCEndpointSpec{ 1224 NewGRPCEndpointSpec(discoveredEndpointAddr[0], true), 1225 NewGRPCEndpointSpec(discoveredEndpointAddr[1], false), 1226 NewGRPCEndpointSpec(discoveredEndpointAddr[2], true), 1227 } 1228 }, testGRPCOpts, time.Minute, 1*time.Second) 1229 defer endpointSet.Close() 1230 1231 // Initial update. 1232 endpointSet.Update(context.Background()) 1233 testutil.Equals(t, 3, len(endpointSet.endpoints), "three clients must be available for running nodes") 1234 1235 // The endpoint has not responded to the info call and is assumed to cover everything. 1236 curMin, curMax := endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime 1237 testutil.Assert(t, endpointSet.endpoints[slowStaticEndpointAddr].cc.GetState().String() != "SHUTDOWN", "slow store's connection should not be closed") 1238 testutil.Equals(t, int64(math.MinInt64), curMin) 1239 testutil.Equals(t, int64(math.MaxInt64), curMax) 1240 1241 // The endpoint is statically defined + strict mode is enabled 1242 // so its client + information must be retained. 1243 curMin, curMax = endpointSet.endpoints[staticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MaxTime 1244 testutil.Equals(t, int64(12345), curMin, "got incorrect minimum time") 1245 testutil.Equals(t, int64(54321), curMax, "got incorrect minimum time") 1246 1247 // Successfully retrieve the information and observe minTime/maxTime updating. 1248 endpointSet.endpointInfoTimeout = 3 * time.Second 1249 endpointSet.Update(context.Background()) 1250 updatedCurMin, updatedCurMax := endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime 1251 testutil.Equals(t, int64(65644), updatedCurMin) 1252 testutil.Equals(t, int64(77777), updatedCurMax) 1253 endpointSet.endpointInfoTimeout = 1 * time.Second 1254 1255 // Turn off the endpoints. 1256 endpoints.Close() 1257 1258 // Update again many times. Should not matter WRT the static one. 1259 endpointSet.Update(context.Background()) 1260 endpointSet.Update(context.Background()) 1261 endpointSet.Update(context.Background()) 1262 1263 // Check that the information is the same. 1264 testutil.Equals(t, 2, len(endpointSet.GetStoreClients()), "two static clients must remain available") 1265 testutil.Equals(t, curMin, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MinTime, "minimum time reported by the store node is different") 1266 testutil.Equals(t, curMax, endpointSet.endpoints[staticEndpointAddr].metadata.Store.MaxTime, "minimum time reported by the store node is different") 1267 testutil.NotOk(t, endpointSet.endpoints[staticEndpointAddr].status.LastError.originalErr) 1268 1269 testutil.Equals(t, updatedCurMin, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MinTime, "minimum time reported by the store node is different") 1270 testutil.Equals(t, updatedCurMax, endpointSet.endpoints[slowStaticEndpointAddr].metadata.Store.MaxTime, "minimum time reported by the store node is different") 1271 } 1272 1273 func TestEndpointSet_APIs_Discovery(t *testing.T) { 1274 endpoints, err := startTestEndpoints([]testEndpointMeta{ 1275 { 1276 InfoResponse: sidecarInfo, 1277 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1278 return []labelpb.ZLabelSet{} 1279 }, 1280 }, 1281 { 1282 InfoResponse: ruleInfo, 1283 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1284 return []labelpb.ZLabelSet{} 1285 }, 1286 }, 1287 { 1288 InfoResponse: receiveInfo, 1289 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1290 return []labelpb.ZLabelSet{} 1291 }, 1292 }, 1293 { 1294 InfoResponse: storeGWInfo, 1295 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1296 return []labelpb.ZLabelSet{} 1297 }, 1298 }, 1299 { 1300 InfoResponse: queryInfo, 1301 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1302 return []labelpb.ZLabelSet{} 1303 }, 1304 }, 1305 }) 1306 testutil.Ok(t, err) 1307 defer endpoints.Close() 1308 1309 type discoveryState struct { 1310 name string 1311 endpointSpec func() []*GRPCEndpointSpec 1312 expectedStores int 1313 expectedRules int 1314 expectedTarget int 1315 expectedMetricMetadata int 1316 expectedExemplars int 1317 expectedQueryAPIs int 1318 } 1319 1320 for _, tc := range []struct { 1321 states []discoveryState 1322 name string 1323 }{ 1324 { 1325 name: "All endpoints discovered concurrently", 1326 states: []discoveryState{ 1327 { 1328 name: "no endpoints", 1329 endpointSpec: nil, 1330 }, 1331 { 1332 name: "Sidecar, Ruler, Querier, Receiver and StoreGW discovered", 1333 endpointSpec: func() []*GRPCEndpointSpec { 1334 endpointSpec := make([]*GRPCEndpointSpec, 0, len(endpoints.orderAddrs)) 1335 for _, addr := range endpoints.orderAddrs { 1336 endpointSpec = append(endpointSpec, NewGRPCEndpointSpec(addr, false)) 1337 } 1338 return endpointSpec 1339 }, 1340 expectedStores: 5, // sidecar + querier + receiver + storeGW + ruler 1341 expectedRules: 3, // sidecar + querier + ruler 1342 expectedTarget: 2, // sidecar + querier 1343 expectedMetricMetadata: 2, // sidecar + querier 1344 expectedExemplars: 3, // sidecar + querier + receiver 1345 expectedQueryAPIs: 1, // querier 1346 }, 1347 }, 1348 }, 1349 { 1350 name: "Sidecar discovery first, eventually Ruler discovered and then Sidecar removed", 1351 states: []discoveryState{ 1352 { 1353 name: "no stores", 1354 endpointSpec: nil, 1355 }, 1356 { 1357 name: "Sidecar discovered, no Ruler discovered", 1358 endpointSpec: func() []*GRPCEndpointSpec { 1359 return []*GRPCEndpointSpec{ 1360 NewGRPCEndpointSpec(endpoints.orderAddrs[0], false), 1361 } 1362 }, 1363 expectedStores: 1, // sidecar 1364 expectedRules: 1, // sidecar 1365 expectedTarget: 1, // sidecar 1366 expectedMetricMetadata: 1, // sidecar 1367 expectedExemplars: 1, // sidecar 1368 }, 1369 { 1370 name: "Ruler discovered", 1371 endpointSpec: func() []*GRPCEndpointSpec { 1372 return []*GRPCEndpointSpec{ 1373 NewGRPCEndpointSpec(endpoints.orderAddrs[0], false), 1374 NewGRPCEndpointSpec(endpoints.orderAddrs[1], false), 1375 } 1376 }, 1377 expectedStores: 2, // sidecar + ruler 1378 expectedRules: 2, // sidecar + ruler 1379 expectedTarget: 1, // sidecar 1380 expectedMetricMetadata: 1, // sidecar 1381 expectedExemplars: 1, // sidecar 1382 }, 1383 { 1384 name: "Sidecar removed", 1385 endpointSpec: func() []*GRPCEndpointSpec { 1386 return []*GRPCEndpointSpec{ 1387 NewGRPCEndpointSpec(endpoints.orderAddrs[1], false), 1388 } 1389 }, 1390 expectedStores: 1, // ruler 1391 expectedRules: 1, // ruler 1392 }, 1393 }, 1394 }, 1395 } { 1396 t.Run(tc.name, func(t *testing.T) { 1397 currentState := 0 1398 1399 endpointSet := NewEndpointSet(time.Now, nil, nil, 1400 func() []*GRPCEndpointSpec { 1401 if tc.states[currentState].endpointSpec == nil { 1402 return nil 1403 } 1404 1405 return tc.states[currentState].endpointSpec() 1406 }, 1407 testGRPCOpts, time.Minute, 2*time.Second) 1408 1409 defer endpointSet.Close() 1410 1411 for { 1412 endpointSet.Update(context.Background()) 1413 1414 gotStores := 0 1415 gotRules := 0 1416 gotTarget := 0 1417 gotExemplars := 0 1418 gotMetricMetadata := 0 1419 gotQueryAPIs := 0 1420 1421 for _, er := range endpointSet.endpoints { 1422 if er.HasStoreAPI() { 1423 gotStores += 1 1424 } 1425 if er.HasRulesAPI() { 1426 gotRules += 1 1427 } 1428 if er.HasTargetsAPI() { 1429 gotTarget += 1 1430 } 1431 if er.HasExemplarsAPI() { 1432 gotExemplars += 1 1433 } 1434 if er.HasMetricMetadataAPI() { 1435 gotMetricMetadata += 1 1436 } 1437 if er.HasQueryAPI() { 1438 gotQueryAPIs += 1 1439 } 1440 } 1441 testutil.Equals( 1442 t, 1443 tc.states[currentState].expectedStores, 1444 gotStores, 1445 "unexepected discovered storeAPIs in state %q", 1446 tc.states[currentState].name) 1447 testutil.Equals( 1448 t, 1449 tc.states[currentState].expectedRules, 1450 gotRules, 1451 "unexepected discovered rulesAPIs in state %q", 1452 tc.states[currentState].name) 1453 testutil.Equals( 1454 t, 1455 tc.states[currentState].expectedTarget, 1456 gotTarget, 1457 "unexepected discovered targetAPIs in state %q", 1458 tc.states[currentState].name, 1459 ) 1460 testutil.Equals( 1461 t, 1462 tc.states[currentState].expectedMetricMetadata, 1463 gotMetricMetadata, 1464 "unexepected discovered metricMetadataAPIs in state %q", 1465 tc.states[currentState].name, 1466 ) 1467 testutil.Equals( 1468 t, 1469 tc.states[currentState].expectedExemplars, 1470 gotExemplars, 1471 "unexepected discovered ExemplarsAPIs in state %q", 1472 tc.states[currentState].name, 1473 ) 1474 testutil.Equals( 1475 t, 1476 tc.states[currentState].expectedQueryAPIs, 1477 gotQueryAPIs, 1478 "unexepected discovered QueryAPIs in state %q", 1479 tc.states[currentState].name, 1480 ) 1481 1482 currentState = currentState + 1 1483 if len(tc.states) == currentState { 1484 break 1485 } 1486 } 1487 }) 1488 } 1489 } 1490 1491 func makeInfoResponses(n int) []testEndpointMeta { 1492 responses := make([]testEndpointMeta, 0, n) 1493 for i := 0; i < n; i++ { 1494 responses = append(responses, testEndpointMeta{ 1495 InfoResponse: sidecarInfo, 1496 extlsetFn: func(addr string) []labelpb.ZLabelSet { 1497 return labelpb.ZLabelSetsFromPromLabels( 1498 labels.FromStrings("addr", addr, "a", "b"), 1499 ) 1500 }, 1501 }) 1502 } 1503 1504 return responses 1505 } 1506 1507 type errThatMarshalsToEmptyDict struct { 1508 msg string 1509 } 1510 1511 // MarshalJSON marshals the error and returns and empty dict, not the error string. 1512 func (e *errThatMarshalsToEmptyDict) MarshalJSON() ([]byte, error) { 1513 return json.Marshal(map[string]string{}) 1514 } 1515 1516 // Error returns the original, underlying string. 1517 func (e *errThatMarshalsToEmptyDict) Error() string { 1518 return e.msg 1519 } 1520 1521 // Test highlights that without wrapping the error, it is marshaled to empty dict {}, not its message. 1522 func TestEndpointStringError(t *testing.T) { 1523 dictErr := &errThatMarshalsToEmptyDict{msg: "Error message"} 1524 stringErr := &stringError{originalErr: dictErr} 1525 1526 endpointstatusMock := map[string]error{} 1527 endpointstatusMock["dictErr"] = dictErr 1528 endpointstatusMock["stringErr"] = stringErr 1529 1530 b, err := json.Marshal(endpointstatusMock) 1531 1532 testutil.Ok(t, err) 1533 testutil.Equals(t, []byte(`{"dictErr":{},"stringErr":"Error message"}`), b, "expected to get proper results") 1534 } 1535 1536 // Errors that usually marshal to empty dict should return the original error string. 1537 func TestUpdateEndpointStateLastError(t *testing.T) { 1538 tcs := []struct { 1539 InputError error 1540 ExpectedLastErr string 1541 }{ 1542 {errors.New("normal_err"), `"normal_err"`}, 1543 {nil, `null`}, 1544 {&errThatMarshalsToEmptyDict{"the error message"}, `"the error message"`}, 1545 } 1546 1547 for _, tc := range tcs { 1548 mockEndpointRef := &endpointRef{ 1549 addr: "mockedStore", 1550 metadata: &endpointMetadata{ 1551 &infopb.InfoResponse{}, 1552 }, 1553 } 1554 1555 mockEndpointRef.update(time.Now, mockEndpointRef.metadata, tc.InputError) 1556 1557 b, err := json.Marshal(mockEndpointRef.status.LastError) 1558 testutil.Ok(t, err) 1559 testutil.Equals(t, tc.ExpectedLastErr, string(b)) 1560 } 1561 } 1562 1563 func TestUpdateEndpointStateForgetsPreviousErrors(t *testing.T) { 1564 mockEndpointRef := &endpointRef{ 1565 addr: "mockedStore", 1566 metadata: &endpointMetadata{ 1567 &infopb.InfoResponse{}, 1568 }, 1569 } 1570 1571 mockEndpointRef.update(time.Now, mockEndpointRef.metadata, errors.New("test err")) 1572 1573 b, err := json.Marshal(mockEndpointRef.status.LastError) 1574 testutil.Ok(t, err) 1575 testutil.Equals(t, `"test err"`, string(b)) 1576 1577 // updating status without and error should clear the previous one. 1578 mockEndpointRef.update(time.Now, mockEndpointRef.metadata, nil) 1579 1580 b, err = json.Marshal(mockEndpointRef.status.LastError) 1581 testutil.Ok(t, err) 1582 testutil.Equals(t, `null`, string(b)) 1583 } 1584 1585 func makeEndpointSet(discoveredEndpointAddr []string, strict bool, now nowFunc, metricLabels ...string) *EndpointSet { 1586 endpointSet := NewEndpointSet(now, nil, nil, 1587 func() (specs []*GRPCEndpointSpec) { 1588 for _, addr := range discoveredEndpointAddr { 1589 specs = append(specs, NewGRPCEndpointSpec(addr, strict)) 1590 } 1591 return specs 1592 }, 1593 testGRPCOpts, time.Minute, time.Second, metricLabels...) 1594 return endpointSet 1595 } 1596 1597 func exposedAPIs(c string) *APIs { 1598 switch c { 1599 case component.Sidecar.String(): 1600 return &APIs{ 1601 store: true, 1602 target: true, 1603 rules: true, 1604 metricMetadata: true, 1605 exemplars: true, 1606 } 1607 case component.Query.String(): 1608 return &APIs{ 1609 store: true, 1610 target: true, 1611 rules: true, 1612 metricMetadata: true, 1613 exemplars: true, 1614 } 1615 case component.Receive.String(): 1616 return &APIs{ 1617 store: true, 1618 exemplars: true, 1619 } 1620 case component.Rule.String(): 1621 return &APIs{ 1622 store: true, 1623 rules: true, 1624 } 1625 case component.Store.String(): 1626 return &APIs{ 1627 store: true, 1628 } 1629 } 1630 return &APIs{} 1631 } 1632 1633 func assertRegisteredAPIs(t *testing.T, expectedAPIs *APIs, er *endpointRef) { 1634 testutil.Equals(t, expectedAPIs.store, er.HasStoreAPI()) 1635 testutil.Equals(t, expectedAPIs.rules, er.HasRulesAPI()) 1636 testutil.Equals(t, expectedAPIs.target, er.HasTargetsAPI()) 1637 testutil.Equals(t, expectedAPIs.metricMetadata, er.HasMetricMetadataAPI()) 1638 testutil.Equals(t, expectedAPIs.exemplars, er.HasExemplarsAPI()) 1639 } 1640 1641 // Regression test for: https://github.com/thanos-io/thanos/issues/4766. 1642 func TestDeadlockLocking(t *testing.T) { 1643 t.Parallel() 1644 1645 mockEndpointRef := &endpointRef{ 1646 addr: "mockedStore", 1647 metadata: &endpointMetadata{ 1648 &infopb.InfoResponse{}, 1649 }, 1650 } 1651 1652 g := &errgroup.Group{} 1653 deadline := time.Now().Add(3 * time.Second) 1654 1655 g.Go(func() error { 1656 for { 1657 if time.Now().After(deadline) { 1658 break 1659 } 1660 mockEndpointRef.update(time.Now, &endpointMetadata{ 1661 InfoResponse: &infopb.InfoResponse{}, 1662 }, nil) 1663 } 1664 return nil 1665 }) 1666 1667 g.Go(func() error { 1668 for { 1669 if time.Now().After(deadline) { 1670 break 1671 } 1672 mockEndpointRef.HasStoreAPI() 1673 mockEndpointRef.HasExemplarsAPI() 1674 mockEndpointRef.HasMetricMetadataAPI() 1675 mockEndpointRef.HasRulesAPI() 1676 mockEndpointRef.HasTargetsAPI() 1677 } 1678 return nil 1679 }) 1680 1681 testutil.Ok(t, g.Wait()) 1682 }