github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/chaos/cases/member.go (about) 1 // Copyright 2020 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package main 15 16 import ( 17 "context" 18 "fmt" 19 "time" 20 21 "github.com/pingcap/tiflow/dm/master/scheduler" 22 "github.com/pingcap/tiflow/dm/pb" 23 ) 24 25 const ( 26 checkMemberTimes = 5 27 checkMemberInterval = 10 * time.Second 28 ) 29 30 // checkMembersReadyLoop checks whether all DM-master and DM-worker members have been ready. 31 // NOTE: in this chaos case, we ensure 3 DM-master and 3 DM-worker started. 32 func checkMembersReadyLoop(ctx context.Context, cli pb.MasterClient, masterCount, workerCount int) (err error) { 33 for i := 0; i < checkMemberTimes; i++ { 34 select { 35 case <-ctx.Done(): 36 return nil 37 case <-time.After(checkMemberInterval): 38 err = checkMembersReady(ctx, cli, masterCount, workerCount) 39 if err == nil { 40 return nil 41 } 42 } 43 } 44 return err 45 } 46 47 func checkMembersReady(ctx context.Context, cli pb.MasterClient, masterCount, workerCount int) error { 48 resp, err := cli.ListMember(ctx, &pb.ListMemberRequest{}) 49 if err != nil { 50 return err 51 } else if !resp.Result { 52 return fmt.Errorf("fail to list member: %s", resp.Msg) 53 } 54 55 var ( 56 hasLeader bool 57 allMasterAlive bool 58 allWorkerOnline bool 59 ) 60 61 for _, m := range resp.Members { 62 if m.GetLeader() != nil { 63 hasLeader = true 64 } else if lm := m.GetMaster(); lm != nil { 65 var aliveCount int 66 for _, master := range lm.Masters { 67 if master.Alive { 68 aliveCount++ 69 } 70 } 71 allMasterAlive = aliveCount == masterCount 72 } else if lw := m.GetWorker(); lw != nil { 73 var onlineCount int 74 for _, worker := range lw.Workers { 75 if worker.Stage != string(scheduler.WorkerOffline) { 76 onlineCount++ 77 } 78 } 79 allWorkerOnline = onlineCount == workerCount 80 } 81 } 82 83 if !hasLeader || !allMasterAlive || !allWorkerOnline { 84 return fmt.Errorf("not all members are ready: %s", resp.String()) 85 } 86 return nil 87 }