github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/checkers/coordinator.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package checkers 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/common/runtime" 19 "github.com/matrixorigin/matrixone/pkg/hakeeper" 20 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/cnservice" 21 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/dnservice" 22 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/logservice" 23 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/proxy" 24 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/syshealth" 25 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/util" 26 "github.com/matrixorigin/matrixone/pkg/hakeeper/operator" 27 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 28 "go.uber.org/zap" 29 ) 30 31 // Coordinator is assumed to be used in synchronous, single-threaded context. 32 type Coordinator struct { 33 OperatorController *operator.Controller 34 35 // Considering the context of `Coordinator`, 36 // there is no need for a mutext to protect. 37 teardown bool 38 teardownOps []*operator.Operator 39 40 cfg hakeeper.Config 41 } 42 43 func NewCoordinator(cfg hakeeper.Config) *Coordinator { 44 cfg.Fill() 45 return &Coordinator{ 46 OperatorController: operator.NewController(), 47 cfg: cfg, 48 } 49 } 50 51 func (c *Coordinator) Check(alloc util.IDAllocator, state pb.CheckerState) []pb.ScheduleCommand { 52 logState := state.LogState 53 tnState := state.TNState 54 cnState := state.CNState 55 proxyState := state.ProxyState 56 cluster := state.ClusterInfo 57 currentTick := state.Tick 58 user := state.TaskTableUser 59 runtime.ProcessLevelRuntime().Logger().Debug("hakeeper checker state", 60 zap.Any("cluster information", cluster), 61 zap.Any("log state", logState), 62 zap.Any("dn state", tnState), 63 zap.Any("cn state", cnState), 64 zap.Uint64("current tick", currentTick), 65 ) 66 67 defer func() { 68 if !c.teardown { 69 runtime.ProcessLevelRuntime().Logger().Debug("MO is working.") 70 } 71 }() 72 73 c.OperatorController.RemoveFinishedOperator(logState, tnState, cnState, proxyState) 74 75 // if we've discovered unhealthy already, no need to keep alive anymore. 76 if c.teardown { 77 return c.OperatorController.Dispatch(c.teardownOps, logState, tnState, cnState, proxyState) 78 } 79 80 // check whether system health or not. 81 if operators, health := syshealth.Check(c.cfg, cluster, tnState, logState, currentTick); !health { 82 c.teardown = true 83 c.teardownOps = operators 84 return c.OperatorController.Dispatch(c.teardownOps, logState, tnState, cnState, proxyState) 85 } 86 87 // system health, try to keep alive. 88 executing := c.OperatorController.GetExecutingReplicas() 89 90 operators := make([]*operator.Operator, 0) 91 operators = append(operators, logservice.Check(alloc, c.cfg, cluster, logState, executing, user, currentTick)...) 92 operators = append(operators, dnservice.Check(alloc, c.cfg, cluster, tnState, user, currentTick)...) 93 operators = append(operators, cnservice.Check(c.cfg, cnState, user, currentTick)...) 94 operators = append(operators, proxy.Check(c.cfg, proxyState, currentTick)...) 95 96 return c.OperatorController.Dispatch(operators, logState, tnState, cnState, proxyState) 97 }