github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/chaos/cases/member.go (about)

     1  // Copyright 2020 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package main
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"time"
    20  
    21  	"github.com/pingcap/tiflow/dm/master/scheduler"
    22  	"github.com/pingcap/tiflow/dm/pb"
    23  )
    24  
    25  const (
    26  	checkMemberTimes    = 5
    27  	checkMemberInterval = 10 * time.Second
    28  )
    29  
    30  // checkMembersReadyLoop checks whether all DM-master and DM-worker members have been ready.
    31  // NOTE: in this chaos case, we ensure 3 DM-master and 3 DM-worker started.
    32  func checkMembersReadyLoop(ctx context.Context, cli pb.MasterClient, masterCount, workerCount int) (err error) {
    33  	for i := 0; i < checkMemberTimes; i++ {
    34  		select {
    35  		case <-ctx.Done():
    36  			return nil
    37  		case <-time.After(checkMemberInterval):
    38  			err = checkMembersReady(ctx, cli, masterCount, workerCount)
    39  			if err == nil {
    40  				return nil
    41  			}
    42  		}
    43  	}
    44  	return err
    45  }
    46  
    47  func checkMembersReady(ctx context.Context, cli pb.MasterClient, masterCount, workerCount int) error {
    48  	resp, err := cli.ListMember(ctx, &pb.ListMemberRequest{})
    49  	if err != nil {
    50  		return err
    51  	} else if !resp.Result {
    52  		return fmt.Errorf("fail to list member: %s", resp.Msg)
    53  	}
    54  
    55  	var (
    56  		hasLeader       bool
    57  		allMasterAlive  bool
    58  		allWorkerOnline bool
    59  	)
    60  
    61  	for _, m := range resp.Members {
    62  		if m.GetLeader() != nil {
    63  			hasLeader = true
    64  		} else if lm := m.GetMaster(); lm != nil {
    65  			var aliveCount int
    66  			for _, master := range lm.Masters {
    67  				if master.Alive {
    68  					aliveCount++
    69  				}
    70  			}
    71  			allMasterAlive = aliveCount == masterCount
    72  		} else if lw := m.GetWorker(); lw != nil {
    73  			var onlineCount int
    74  			for _, worker := range lw.Workers {
    75  				if worker.Stage != string(scheduler.WorkerOffline) {
    76  					onlineCount++
    77  				}
    78  			}
    79  			allWorkerOnline = onlineCount == workerCount
    80  		}
    81  	}
    82  
    83  	if !hasLeader || !allMasterAlive || !allWorkerOnline {
    84  		return fmt.Errorf("not all members are ready: %s", resp.String())
    85  	}
    86  	return nil
    87  }