git.frostfs.info/TrueCloudLab/frostfs-sdk-go@v0.0.0-20241022124111-5361f0ecebd3/pool/tree/pool.go (about) 1 package tree 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "sort" 9 "strings" 10 "sync" 11 "time" 12 13 cid "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/container/id" 14 "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool" 15 grpcService "git.frostfs.info/TrueCloudLab/frostfs-sdk-go/pool/tree/service" 16 "github.com/nspcc-dev/neo-go/pkg/crypto/keys" 17 "go.uber.org/zap" 18 "go.uber.org/zap/zapcore" 19 "google.golang.org/grpc" 20 ) 21 22 const ( 23 defaultRebalanceInterval = 15 * time.Second 24 defaultHealthcheckTimeout = 4 * time.Second 25 defaultDialTimeout = 5 * time.Second 26 defaultStreamTimeout = 10 * time.Second 27 ) 28 29 // SubTreeSort defines an order of nodes returned from GetSubTree RPC. 30 type SubTreeSort int32 31 32 const ( 33 // NoneOrder does not specify order of nodes returned in GetSubTree RPC. 34 NoneOrder SubTreeSort = iota 35 // AscendingOrder specifies ascending alphabetical order of nodes based on FilePath attribute. 36 AscendingOrder 37 ) 38 39 var ( 40 // ErrNodeNotFound is returned from Tree service in case of not found error. 41 ErrNodeNotFound = errors.New("not found") 42 43 // ErrNodeAccessDenied is returned from Tree service in case of access denied error. 44 ErrNodeAccessDenied = errors.New("access denied") 45 46 // errNodeEmpty is used to trigger retry when 'GetNodeByPath' return empty result. 47 errNodeEmptyResult = errors.New("empty result") 48 ) 49 50 // client represents virtual connection to the single FrostFS tree service from which Pool is formed. 51 // This interface is expected to have exactly one production implementation - treeClient. 52 // Others are expected to be for test purposes only. 53 type client interface { 54 serviceClient() (grpcService.TreeServiceClient, error) 55 endpoint() string 56 isHealthy() bool 57 setHealthy(bool) 58 dial(ctx context.Context) error 59 redialIfNecessary(context.Context) (bool, error) 60 close() error 61 } 62 63 // InitParameters contains values used to initialize connection Pool. 64 type InitParameters struct { 65 key *keys.PrivateKey 66 logger *zap.Logger 67 nodeDialTimeout time.Duration 68 nodeStreamTimeout time.Duration 69 healthcheckTimeout time.Duration 70 clientRebalanceInterval time.Duration 71 nodeParams []pool.NodeParam 72 dialOptions []grpc.DialOption 73 maxRequestAttempts int 74 } 75 76 // Pool represents virtual connection to the FrostFS tree services network to communicate 77 // with multiple FrostFS tree services without thinking about switching between servers 78 // due to their unavailability. 79 // 80 // Pool can be created and initialized using NewPool function. 81 // Before executing the FrostFS tree operations using the Pool, connection to the 82 // servers MUST BE correctly established (see Dial method). 83 type Pool struct { 84 innerPools []*innerPool 85 key *keys.PrivateKey 86 cancel context.CancelFunc 87 closedCh chan struct{} 88 rebalanceParams rebalanceParameters 89 dialOptions []grpc.DialOption 90 logger *zap.Logger 91 methods []*pool.MethodStatus 92 93 maxRequestAttempts int 94 95 startIndicesMtx sync.RWMutex 96 // startIndices points to the client from which the next request will be executed. 97 // Since clients are stored in innerPool field we have to use two indices. 98 // These indices being changed during: 99 // * rebalance procedure (see Pool.startRebalance) 100 // * retry in case of request failure (see Pool.requestWithRetry) 101 startIndices [2]int 102 } 103 104 type innerPool struct { 105 clients []client 106 } 107 108 type rebalanceParameters struct { 109 nodesGroup [][]pool.NodeParam 110 nodeRequestTimeout time.Duration 111 clientRebalanceInterval time.Duration 112 } 113 114 // GetNodesParams groups parameters of Pool.GetNodes operation. 115 type GetNodesParams struct { 116 CID cid.ID 117 TreeID string 118 Path []string 119 Meta []string 120 PathAttribute string 121 LatestOnly bool 122 AllAttrs bool 123 BearerToken []byte 124 } 125 126 // GetSubTreeParams groups parameters of Pool.GetSubTree operation. 127 type GetSubTreeParams struct { 128 CID cid.ID 129 TreeID string 130 RootID []uint64 131 Depth uint32 132 BearerToken []byte 133 Order SubTreeSort 134 } 135 136 // AddNodeParams groups parameters of Pool.AddNode operation. 137 type AddNodeParams struct { 138 CID cid.ID 139 TreeID string 140 Parent uint64 141 Meta map[string]string 142 BearerToken []byte 143 } 144 145 // AddNodeByPathParams groups parameters of Pool.AddNodeByPath operation. 146 type AddNodeByPathParams struct { 147 CID cid.ID 148 TreeID string 149 Path []string 150 Meta map[string]string 151 PathAttribute string 152 BearerToken []byte 153 } 154 155 // MoveNodeParams groups parameters of Pool.MoveNode operation. 156 type MoveNodeParams struct { 157 CID cid.ID 158 TreeID string 159 NodeID uint64 160 ParentID uint64 161 Meta map[string]string 162 BearerToken []byte 163 } 164 165 // RemoveNodeParams groups parameters of Pool.RemoveNode operation. 166 type RemoveNodeParams struct { 167 CID cid.ID 168 TreeID string 169 NodeID uint64 170 BearerToken []byte 171 } 172 173 // MethodIndex index of method in list of statuses in Pool. 174 type MethodIndex int 175 176 const ( 177 methodGetNodes MethodIndex = iota 178 methodGetSubTree 179 methodAddNode 180 methodAddNodeByPath 181 methodMoveNode 182 methodRemoveNode 183 methodLast 184 ) 185 186 // String implements fmt.Stringer. 187 func (m MethodIndex) String() string { 188 switch m { 189 case methodGetNodes: 190 return "getNodes" 191 case methodAddNode: 192 return "addNode" 193 case methodGetSubTree: 194 return "getSubTree" 195 case methodAddNodeByPath: 196 return "addNodeByPath" 197 case methodMoveNode: 198 return "moveNode" 199 case methodRemoveNode: 200 return "removeNode" 201 default: 202 return "unknown" 203 } 204 } 205 206 // NewPool creates connection pool using parameters. 207 func NewPool(options InitParameters) (*Pool, error) { 208 if options.key == nil { 209 return nil, fmt.Errorf("missed required parameter 'Key'") 210 } 211 212 nodesParams, err := adjustNodeParams(options.nodeParams) 213 if err != nil { 214 return nil, err 215 } 216 217 fillDefaultInitParams(&options) 218 219 methods := make([]*pool.MethodStatus, methodLast) 220 for i := methodGetNodes; i < methodLast; i++ { 221 methods[i] = pool.NewMethodStatus(i.String()) 222 } 223 224 p := &Pool{ 225 key: options.key, 226 logger: options.logger, 227 dialOptions: options.dialOptions, 228 rebalanceParams: rebalanceParameters{ 229 nodesGroup: nodesParams, 230 nodeRequestTimeout: options.healthcheckTimeout, 231 clientRebalanceInterval: options.clientRebalanceInterval, 232 }, 233 maxRequestAttempts: options.maxRequestAttempts, 234 methods: methods, 235 } 236 237 return p, nil 238 } 239 240 // Dial establishes a connection to the tree servers from the FrostFS network. 241 // It also starts a routine that checks the health of the nodes and 242 // updates the weights of the nodes for balancing. 243 // Returns an error describing failure reason. 244 // 245 // If failed, the Pool SHOULD NOT be used. 246 // 247 // See also InitParameters.SetClientRebalanceInterval. 248 func (p *Pool) Dial(ctx context.Context) error { 249 inner := make([]*innerPool, len(p.rebalanceParams.nodesGroup)) 250 var atLeastOneHealthy bool 251 252 for i, nodes := range p.rebalanceParams.nodesGroup { 253 clients := make([]client, len(nodes)) 254 for j, node := range nodes { 255 clients[j] = newTreeClient(node.Address(), p.dialOptions...) 256 if err := clients[j].dial(ctx); err != nil { 257 p.log(zap.WarnLevel, "failed to dial tree client", zap.String("address", node.Address()), zap.Error(err)) 258 continue 259 } 260 261 atLeastOneHealthy = true 262 } 263 264 inner[i] = &innerPool{ 265 clients: clients, 266 } 267 } 268 269 if !atLeastOneHealthy { 270 return fmt.Errorf("at least one node must be healthy") 271 } 272 273 ctx, cancel := context.WithCancel(ctx) 274 p.cancel = cancel 275 p.closedCh = make(chan struct{}) 276 p.innerPools = inner 277 278 go p.startRebalance(ctx) 279 return nil 280 } 281 282 // SetKey specifies default key to be used for the protocol communication by default. 283 func (x *InitParameters) SetKey(key *keys.PrivateKey) { 284 x.key = key 285 } 286 287 // SetLogger specifies logger. 288 func (x *InitParameters) SetLogger(logger *zap.Logger) { 289 x.logger = logger 290 } 291 292 // SetNodeDialTimeout specifies the timeout for connection to be established. 293 func (x *InitParameters) SetNodeDialTimeout(timeout time.Duration) { 294 x.nodeDialTimeout = timeout 295 } 296 297 // SetNodeStreamTimeout specifies the timeout for individual operations in streaming RPC. 298 func (x *InitParameters) SetNodeStreamTimeout(timeout time.Duration) { 299 x.nodeStreamTimeout = timeout 300 } 301 302 // SetHealthcheckTimeout specifies the timeout for request to node to decide if it is alive. 303 // 304 // See also Pool.Dial. 305 func (x *InitParameters) SetHealthcheckTimeout(timeout time.Duration) { 306 x.healthcheckTimeout = timeout 307 } 308 309 // SetClientRebalanceInterval specifies the interval for updating nodes health status. 310 // 311 // See also Pool.Dial. 312 func (x *InitParameters) SetClientRebalanceInterval(interval time.Duration) { 313 x.clientRebalanceInterval = interval 314 } 315 316 // AddNode append information about the node to which you want to connect. 317 func (x *InitParameters) AddNode(nodeParam pool.NodeParam) { 318 x.nodeParams = append(x.nodeParams, nodeParam) 319 } 320 321 // SetGRPCDialOptions sets the gRPC dial options for new gRPC tree client connection. 322 func (x *InitParameters) SetGRPCDialOptions(opts ...grpc.DialOption) { 323 x.dialOptions = opts 324 } 325 326 // SetMaxRequestAttempts sets the max attempt to make successful request. 327 // Default value is 0 that means the number of attempts equals to number of nodes in pool. 328 func (x *InitParameters) SetMaxRequestAttempts(maxAttempts int) { 329 x.maxRequestAttempts = maxAttempts 330 } 331 332 // GetNodes invokes eponymous method from TreeServiceClient. 333 // 334 // Can return predefined errors: 335 // * ErrNodeNotFound 336 // * ErrNodeAccessDenied. 337 func (p *Pool) GetNodes(ctx context.Context, prm GetNodesParams) ([]*grpcService.GetNodeByPathResponse_Info, error) { 338 request := &grpcService.GetNodeByPathRequest{ 339 Body: &grpcService.GetNodeByPathRequest_Body{ 340 ContainerId: prm.CID[:], 341 TreeId: prm.TreeID, 342 Path: prm.Path, 343 Attributes: prm.Meta, 344 PathAttribute: prm.PathAttribute, 345 LatestOnly: prm.LatestOnly, 346 AllAttributes: prm.AllAttrs, 347 BearerToken: prm.BearerToken, 348 }, 349 } 350 351 start := time.Now() 352 if err := p.signRequest(request); err != nil { 353 return nil, err 354 } 355 356 var resp *grpcService.GetNodeByPathResponse 357 err := p.requestWithRetry(ctx, func(client grpcService.TreeServiceClient) (inErr error) { 358 resp, inErr = client.GetNodeByPath(ctx, request) 359 // Pool wants to do retry 'GetNodeByPath' request if result is empty. 360 // Empty result is expected due to delayed tree service sync. 361 // Return an error there to trigger retry and ignore it after, 362 // to keep compatibility with 'GetNodeByPath' implementation. 363 if inErr == nil && len(resp.GetBody().GetNodes()) == 0 { 364 return errNodeEmptyResult 365 } 366 return handleError("failed to get node by path", inErr) 367 }) 368 p.methods[methodGetNodes].IncRequests(time.Since(start)) 369 if err != nil && !errors.Is(err, errNodeEmptyResult) { 370 return nil, err 371 } 372 373 return resp.GetBody().GetNodes(), nil 374 } 375 376 // SubTreeReader is designed to read list of subtree nodes FrostFS tree service. 377 // 378 // Must be initialized using Pool.GetSubTree, any other usage is unsafe. 379 type SubTreeReader struct { 380 cli grpcService.TreeService_GetSubTreeClient 381 } 382 383 // Read reads another list of the subtree nodes. 384 func (x *SubTreeReader) Read(buf []*grpcService.GetSubTreeResponse_Body) (int, error) { 385 for i := range len(buf) { 386 resp, err := x.cli.Recv() 387 if err == io.EOF { 388 return i, io.EOF 389 } else if err != nil { 390 return i, handleError("failed to get sub tree", err) 391 } 392 buf[i] = resp.GetBody() 393 } 394 395 return len(buf), nil 396 } 397 398 // ReadAll reads all nodes subtree nodes. 399 func (x *SubTreeReader) ReadAll() ([]*grpcService.GetSubTreeResponse_Body, error) { 400 var res []*grpcService.GetSubTreeResponse_Body 401 for { 402 resp, err := x.cli.Recv() 403 if err == io.EOF { 404 break 405 } else if err != nil { 406 return nil, handleError("failed to get sub tree", err) 407 } 408 res = append(res, resp.GetBody()) 409 } 410 411 return res, nil 412 } 413 414 // Next gets the next node from subtree. 415 func (x *SubTreeReader) Next() (*grpcService.GetSubTreeResponse_Body, error) { 416 resp, err := x.cli.Recv() 417 if err == io.EOF { 418 return nil, io.EOF 419 } 420 if err != nil { 421 return nil, handleError("failed to get sub tree", err) 422 } 423 424 return resp.GetBody(), nil 425 } 426 427 // GetSubTree invokes eponymous method from TreeServiceClient. 428 // 429 // Can return predefined errors: 430 // * ErrNodeNotFound 431 // * ErrNodeAccessDenied. 432 func (p *Pool) GetSubTree(ctx context.Context, prm GetSubTreeParams) (*SubTreeReader, error) { 433 request := &grpcService.GetSubTreeRequest{ 434 Body: &grpcService.GetSubTreeRequest_Body{ 435 ContainerId: prm.CID[:], 436 TreeId: prm.TreeID, 437 RootId: prm.RootID, 438 Depth: prm.Depth, 439 BearerToken: prm.BearerToken, 440 OrderBy: new(grpcService.GetSubTreeRequest_Body_Order), 441 }, 442 } 443 444 switch prm.Order { 445 case AscendingOrder: 446 request.Body.OrderBy.Direction = grpcService.GetSubTreeRequest_Body_Order_Asc 447 default: 448 request.Body.OrderBy.Direction = grpcService.GetSubTreeRequest_Body_Order_None 449 } 450 451 start := time.Now() 452 if err := p.signRequest(request); err != nil { 453 return nil, err 454 } 455 456 var cli grpcService.TreeService_GetSubTreeClient 457 err := p.requestWithRetry(ctx, func(client grpcService.TreeServiceClient) (inErr error) { 458 cli, inErr = client.GetSubTree(ctx, request) 459 return handleError("failed to get sub tree client", inErr) 460 }) 461 p.methods[methodGetSubTree].IncRequests(time.Since(start)) 462 if err != nil { 463 return nil, err 464 } 465 466 return &SubTreeReader{cli: cli}, nil 467 } 468 469 // AddNode invokes eponymous method from TreeServiceClient. 470 // 471 // Can return predefined errors: 472 // * ErrNodeNotFound 473 // * ErrNodeAccessDenied. 474 func (p *Pool) AddNode(ctx context.Context, prm AddNodeParams) (uint64, error) { 475 request := &grpcService.AddRequest{ 476 Body: &grpcService.AddRequest_Body{ 477 ContainerId: prm.CID[:], 478 TreeId: prm.TreeID, 479 ParentId: prm.Parent, 480 Meta: metaToKV(prm.Meta), 481 BearerToken: prm.BearerToken, 482 }, 483 } 484 485 start := time.Now() 486 if err := p.signRequest(request); err != nil { 487 return 0, err 488 } 489 490 var resp *grpcService.AddResponse 491 err := p.requestWithRetry(ctx, func(client grpcService.TreeServiceClient) (inErr error) { 492 resp, inErr = client.Add(ctx, request) 493 return handleError("failed to add node", inErr) 494 }) 495 p.methods[methodAddNode].IncRequests(time.Since(start)) 496 if err != nil { 497 return 0, err 498 } 499 500 return resp.GetBody().GetNodeId(), nil 501 } 502 503 // AddNodeByPath invokes eponymous method from TreeServiceClient. 504 // 505 // Can return predefined errors: 506 // * ErrNodeNotFound 507 // * ErrNodeAccessDenied. 508 func (p *Pool) AddNodeByPath(ctx context.Context, prm AddNodeByPathParams) (uint64, error) { 509 request := &grpcService.AddByPathRequest{ 510 Body: &grpcService.AddByPathRequest_Body{ 511 ContainerId: prm.CID[:], 512 TreeId: prm.TreeID, 513 Path: prm.Path, 514 Meta: metaToKV(prm.Meta), 515 PathAttribute: prm.PathAttribute, 516 BearerToken: prm.BearerToken, 517 }, 518 } 519 520 start := time.Now() 521 if err := p.signRequest(request); err != nil { 522 return 0, err 523 } 524 525 var resp *grpcService.AddByPathResponse 526 err := p.requestWithRetry(ctx, func(client grpcService.TreeServiceClient) (inErr error) { 527 resp, inErr = client.AddByPath(ctx, request) 528 return handleError("failed to add node by path", inErr) 529 }) 530 p.methods[methodAddNodeByPath].IncRequests(time.Since(start)) 531 if err != nil { 532 return 0, err 533 } 534 535 body := resp.GetBody() 536 if body == nil { 537 return 0, errors.New("nil body in tree service response") 538 } else if len(body.GetNodes()) == 0 { 539 return 0, errors.New("empty list of added nodes in tree service response") 540 } 541 542 // The first node is the leaf that we add, according to tree service docs. 543 return body.GetNodes()[0], nil 544 } 545 546 // MoveNode invokes eponymous method from TreeServiceClient. 547 // 548 // Can return predefined errors: 549 // * ErrNodeNotFound 550 // * ErrNodeAccessDenied. 551 func (p *Pool) MoveNode(ctx context.Context, prm MoveNodeParams) error { 552 request := &grpcService.MoveRequest{ 553 Body: &grpcService.MoveRequest_Body{ 554 ContainerId: prm.CID[:], 555 TreeId: prm.TreeID, 556 NodeId: prm.NodeID, 557 ParentId: prm.ParentID, 558 Meta: metaToKV(prm.Meta), 559 BearerToken: prm.BearerToken, 560 }, 561 } 562 563 start := time.Now() 564 if err := p.signRequest(request); err != nil { 565 return err 566 } 567 568 err := p.requestWithRetry(ctx, func(client grpcService.TreeServiceClient) error { 569 if _, err := client.Move(ctx, request); err != nil { 570 return handleError("failed to move node", err) 571 } 572 return nil 573 }) 574 p.methods[methodMoveNode].IncRequests(time.Since(start)) 575 576 return err 577 } 578 579 // RemoveNode invokes eponymous method from TreeServiceClient. 580 // 581 // Can return predefined errors: 582 // * ErrNodeNotFound 583 // * ErrNodeAccessDenied. 584 func (p *Pool) RemoveNode(ctx context.Context, prm RemoveNodeParams) error { 585 request := &grpcService.RemoveRequest{ 586 Body: &grpcService.RemoveRequest_Body{ 587 ContainerId: prm.CID[:], 588 TreeId: prm.TreeID, 589 NodeId: prm.NodeID, 590 BearerToken: prm.BearerToken, 591 }, 592 } 593 594 start := time.Now() 595 if err := p.signRequest(request); err != nil { 596 return err 597 } 598 599 err := p.requestWithRetry(ctx, func(client grpcService.TreeServiceClient) error { 600 if _, err := client.Remove(ctx, request); err != nil { 601 return handleError("failed to remove node", err) 602 } 603 return nil 604 }) 605 p.methods[methodRemoveNode].IncRequests(time.Since(start)) 606 607 return err 608 } 609 610 // Close closes the Pool and releases all the associated resources. 611 func (p *Pool) Close() error { 612 p.cancel() 613 <-p.closedCh 614 615 var err error 616 for _, group := range p.innerPools { 617 for _, cl := range group.clients { 618 if closeErr := cl.close(); closeErr != nil { 619 p.log(zapcore.ErrorLevel, "close client connection", zap.Error(closeErr)) 620 err = closeErr 621 } 622 } 623 } 624 625 return err 626 } 627 628 // Statistic returns tree pool statistics. 629 func (p *Pool) Statistic() Statistic { 630 stat := Statistic{make([]pool.StatusSnapshot, len(p.methods))} 631 632 for i, method := range p.methods { 633 stat.methods[i] = method.Snapshot() 634 method.Reset() 635 } 636 637 return stat 638 } 639 640 func handleError(msg string, err error) error { 641 if err == nil { 642 return nil 643 } 644 if strings.Contains(err.Error(), "not found") { 645 return fmt.Errorf("%w: %s", ErrNodeNotFound, err.Error()) 646 } else if strings.Contains(err.Error(), "denied") { 647 return fmt.Errorf("%w: %s", ErrNodeAccessDenied, err.Error()) 648 } 649 return fmt.Errorf("%s: %w", msg, err) 650 } 651 652 func metaToKV(meta map[string]string) []*grpcService.KeyValue { 653 result := make([]*grpcService.KeyValue, 0, len(meta)) 654 655 for key, value := range meta { 656 result = append(result, &grpcService.KeyValue{Key: key, Value: []byte(value)}) 657 } 658 659 return result 660 } 661 662 func adjustNodeParams(nodeParams []pool.NodeParam) ([][]pool.NodeParam, error) { 663 if len(nodeParams) == 0 { 664 return nil, errors.New("no FrostFS peers configured") 665 } 666 667 nodeParamsMap := make(map[int][]pool.NodeParam) 668 for _, param := range nodeParams { 669 nodes := nodeParamsMap[param.Priority()] 670 nodeParamsMap[param.Priority()] = append(nodes, param) 671 } 672 673 res := make([][]pool.NodeParam, 0, len(nodeParamsMap)) 674 for _, nodes := range nodeParamsMap { 675 res = append(res, nodes) 676 } 677 678 sort.Slice(res, func(i, j int) bool { 679 return res[i][0].Priority() < res[j][0].Priority() 680 }) 681 682 return res, nil 683 } 684 685 func fillDefaultInitParams(params *InitParameters) { 686 if params.clientRebalanceInterval <= 0 { 687 params.clientRebalanceInterval = defaultRebalanceInterval 688 } 689 690 if params.healthcheckTimeout <= 0 { 691 params.healthcheckTimeout = defaultHealthcheckTimeout 692 } 693 694 if params.nodeDialTimeout <= 0 { 695 params.nodeDialTimeout = defaultDialTimeout 696 } 697 698 if params.nodeStreamTimeout <= 0 { 699 params.nodeStreamTimeout = defaultStreamTimeout 700 } 701 702 if params.maxRequestAttempts <= 0 { 703 params.maxRequestAttempts = len(params.nodeParams) 704 } 705 } 706 707 func (p *Pool) log(level zapcore.Level, msg string, fields ...zap.Field) { 708 if p.logger == nil { 709 return 710 } 711 712 p.logger.Log(level, msg, fields...) 713 } 714 715 // startRebalance runs loop to monitor tree client healthy status. 716 func (p *Pool) startRebalance(ctx context.Context) { 717 ticker := time.NewTimer(p.rebalanceParams.clientRebalanceInterval) 718 buffers := make([][]bool, len(p.rebalanceParams.nodesGroup)) 719 for i, nodes := range p.rebalanceParams.nodesGroup { 720 buffers[i] = make([]bool, len(nodes)) 721 } 722 723 for { 724 select { 725 case <-ctx.Done(): 726 close(p.closedCh) 727 return 728 case <-ticker.C: 729 p.updateNodesHealth(ctx, buffers) 730 ticker.Reset(p.rebalanceParams.clientRebalanceInterval) 731 } 732 } 733 } 734 735 func (p *Pool) updateNodesHealth(ctx context.Context, buffers [][]bool) { 736 wg := sync.WaitGroup{} 737 for i, inner := range p.innerPools { 738 wg.Add(1) 739 740 go func(i int, _ *innerPool) { 741 defer wg.Done() 742 p.updateInnerNodesHealth(ctx, i, buffers[i]) 743 }(i, inner) 744 } 745 wg.Wait() 746 747 LOOP: 748 for i, buffer := range buffers { 749 for j, healthy := range buffer { 750 if healthy { 751 p.setStartIndices(i, j) 752 break LOOP 753 } 754 } 755 } 756 } 757 758 func (p *Pool) updateInnerNodesHealth(ctx context.Context, i int, buffer []bool) { 759 if i > len(p.innerPools)-1 { 760 return 761 } 762 nodesByPriority := p.innerPools[i] 763 options := p.rebalanceParams 764 765 var wg sync.WaitGroup 766 for j, cli := range nodesByPriority.clients { 767 wg.Add(1) 768 go func(j int, cli client) { 769 defer wg.Done() 770 771 tctx, c := context.WithTimeout(ctx, options.nodeRequestTimeout) 772 defer c() 773 774 changed, err := cli.redialIfNecessary(tctx) 775 healthy := err == nil 776 if changed { 777 fields := []zap.Field{zap.String("address", cli.endpoint()), zap.Bool("healthy", healthy)} 778 if err != nil { 779 fields = append(fields, zap.Error(err)) 780 } 781 p.log(zap.DebugLevel, "tree health has changed", fields...) 782 } else if err != nil { 783 p.log(zap.DebugLevel, "tree redial error", zap.String("address", cli.endpoint()), zap.Error(err)) 784 } 785 buffer[j] = healthy 786 }(j, cli) 787 } 788 wg.Wait() 789 } 790 791 func (p *Pool) getStartIndices() (int, int) { 792 p.startIndicesMtx.RLock() 793 defer p.startIndicesMtx.RUnlock() 794 795 return p.startIndices[0], p.startIndices[1] 796 } 797 798 func (p *Pool) setStartIndices(i, j int) { 799 p.startIndicesMtx.Lock() 800 p.startIndices[0] = i 801 p.startIndices[1] = j 802 p.startIndicesMtx.Unlock() 803 } 804 805 func (p *Pool) requestWithRetry(ctx context.Context, fn func(client grpcService.TreeServiceClient) error) error { 806 var ( 807 err, finErr error 808 cl grpcService.TreeServiceClient 809 ) 810 811 reqID := GetRequestID(ctx) 812 813 startI, startJ := p.getStartIndices() 814 groupsLen := len(p.innerPools) 815 attempts := p.maxRequestAttempts 816 817 LOOP: 818 for i := startI; i < startI+groupsLen; i++ { 819 indexI := i % groupsLen 820 clientsLen := len(p.innerPools[indexI].clients) 821 for j := startJ; j < startJ+clientsLen; j++ { 822 indexJ := j % clientsLen 823 824 if attempts == 0 { 825 if startI != indexI || startJ != indexJ { 826 p.setStartIndices(indexI, indexJ) 827 } 828 break LOOP 829 } 830 attempts-- 831 832 if cl, err = p.innerPools[indexI].clients[indexJ].serviceClient(); err == nil { 833 err = fn(cl) 834 } 835 if !shouldTryAgain(err) { 836 if startI != indexI || startJ != indexJ { 837 p.setStartIndices(indexI, indexJ) 838 } 839 840 if err != nil { 841 err = fmt.Errorf("address %s: %w", p.innerPools[indexI].clients[indexJ].endpoint(), err) 842 } 843 844 return err 845 } 846 847 finErr = finalError(finErr, err) 848 p.log(zap.DebugLevel, "tree request error", zap.String("request_id", reqID), zap.Int("remaining attempts", attempts), 849 zap.String("address", p.innerPools[indexI].clients[indexJ].endpoint()), zap.Error(err)) 850 } 851 startJ = 0 852 } 853 854 return finErr 855 } 856 857 func shouldTryAgain(err error) bool { 858 return !(err == nil || errors.Is(err, ErrNodeAccessDenied)) 859 } 860 861 func prioErr(err error) int { 862 switch { 863 case err == nil: 864 return -1 865 case errors.Is(err, ErrNodeAccessDenied): 866 return 100 867 case errors.Is(err, ErrNodeNotFound) || 868 errors.Is(err, errNodeEmptyResult): 869 return 200 870 case errors.Is(err, ErrUnhealthyEndpoint): 871 return 300 872 default: 873 return 500 874 } 875 } 876 877 func finalError(current, candidate error) error { 878 if current == nil || candidate == nil { 879 return candidate 880 } 881 882 // lower priority error is more desirable 883 if prioErr(candidate) < prioErr(current) { 884 return candidate 885 } 886 887 return current 888 } 889 890 type reqKeyType struct{} 891 892 // SetRequestID sets request identifier to context so when some operations are logged in tree pool 893 // this identifier also be logged. 894 func SetRequestID(ctx context.Context, reqID string) context.Context { 895 return context.WithValue(ctx, reqKeyType{}, reqID) 896 } 897 898 // GetRequestID fetch tree pool request identifier from context. 899 func GetRequestID(ctx context.Context) string { 900 reqID, _ := ctx.Value(reqKeyType{}).(string) 901 return reqID 902 }