github.com/matrixorigin/matrixone@v0.7.0/pkg/hakeeper/checkers/coordinator.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package checkers 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/common/runtime" 19 "github.com/matrixorigin/matrixone/pkg/hakeeper" 20 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/cnservice" 21 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/dnservice" 22 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/logservice" 23 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/syshealth" 24 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/util" 25 "github.com/matrixorigin/matrixone/pkg/hakeeper/operator" 26 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 27 "go.uber.org/zap" 28 ) 29 30 // Coordinator is assumed to be used in synchronous, single-threaded context. 31 type Coordinator struct { 32 OperatorController *operator.Controller 33 34 // Considering the context of `Coordinator`, 35 // there is no need for a mutext to protect. 36 teardown bool 37 teardownOps []*operator.Operator 38 39 cfg hakeeper.Config 40 } 41 42 func NewCoordinator(cfg hakeeper.Config) *Coordinator { 43 cfg.Fill() 44 return &Coordinator{ 45 OperatorController: operator.NewController(), 46 cfg: cfg, 47 } 48 } 49 50 func (c *Coordinator) Check(alloc util.IDAllocator, state pb.CheckerState) []pb.ScheduleCommand { 51 logState := state.LogState 52 dnState := state.DNState 53 cnState := state.CNState 54 cluster := state.ClusterInfo 55 currentTick := state.Tick 56 user := state.TaskTableUser 57 runtime.ProcessLevelRuntime().Logger().Debug("hakeeper checker state", 58 zap.Any("cluster information", cluster), 59 zap.Any("log state", logState), 60 zap.Any("dn state", dnState), 61 zap.Any("cn state", cnState), 62 zap.Uint64("current tick", currentTick), 63 ) 64 65 defer func() { 66 if !c.teardown { 67 runtime.ProcessLevelRuntime().Logger().Debug("MO is working.") 68 } 69 }() 70 71 c.OperatorController.RemoveFinishedOperator(logState, dnState, cnState) 72 73 // if we've discovered unhealthy already, no need to keep alive anymore. 74 if c.teardown { 75 return c.OperatorController.Dispatch(c.teardownOps, logState, dnState, cnState) 76 } 77 78 // check whether system health or not. 79 if operators, health := syshealth.Check(c.cfg, cluster, dnState, logState, currentTick); !health { 80 c.teardown = true 81 c.teardownOps = operators 82 return c.OperatorController.Dispatch(c.teardownOps, logState, dnState, cnState) 83 } 84 85 // system health, try to keep alive. 86 executing := c.OperatorController.GetExecutingReplicas() 87 88 operators := make([]*operator.Operator, 0) 89 operators = append(operators, logservice.Check(alloc, c.cfg, cluster, logState, executing, user, currentTick)...) 90 operators = append(operators, dnservice.Check(alloc, c.cfg, cluster, dnState, user, currentTick)...) 91 operators = append(operators, cnservice.Check(c.cfg, cnState, user, currentTick)...) 92 93 return c.OperatorController.Dispatch(operators, logState, dnState, cnState) 94 }