github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/checkers/coordinator.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package checkers
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    19  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    20  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/cnservice"
    21  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/dnservice"
    22  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/logservice"
    23  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/proxy"
    24  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/syshealth"
    25  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/util"
    26  	"github.com/matrixorigin/matrixone/pkg/hakeeper/operator"
    27  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  // Coordinator is assumed to be used in synchronous, single-threaded context.
    32  type Coordinator struct {
    33  	OperatorController *operator.Controller
    34  
    35  	// Considering the context of `Coordinator`,
    36  	// there is no need for a mutext to protect.
    37  	teardown    bool
    38  	teardownOps []*operator.Operator
    39  
    40  	cfg hakeeper.Config
    41  }
    42  
    43  func NewCoordinator(cfg hakeeper.Config) *Coordinator {
    44  	cfg.Fill()
    45  	return &Coordinator{
    46  		OperatorController: operator.NewController(),
    47  		cfg:                cfg,
    48  	}
    49  }
    50  
    51  func (c *Coordinator) Check(alloc util.IDAllocator, state pb.CheckerState) []pb.ScheduleCommand {
    52  	logState := state.LogState
    53  	tnState := state.TNState
    54  	cnState := state.CNState
    55  	proxyState := state.ProxyState
    56  	cluster := state.ClusterInfo
    57  	currentTick := state.Tick
    58  	user := state.TaskTableUser
    59  	runtime.ProcessLevelRuntime().Logger().Debug("hakeeper checker state",
    60  		zap.Any("cluster information", cluster),
    61  		zap.Any("log state", logState),
    62  		zap.Any("dn state", tnState),
    63  		zap.Any("cn state", cnState),
    64  		zap.Uint64("current tick", currentTick),
    65  	)
    66  
    67  	defer func() {
    68  		if !c.teardown {
    69  			runtime.ProcessLevelRuntime().Logger().Debug("MO is working.")
    70  		}
    71  	}()
    72  
    73  	c.OperatorController.RemoveFinishedOperator(logState, tnState, cnState, proxyState)
    74  
    75  	// if we've discovered unhealthy already, no need to keep alive anymore.
    76  	if c.teardown {
    77  		return c.OperatorController.Dispatch(c.teardownOps, logState, tnState, cnState, proxyState)
    78  	}
    79  
    80  	// check whether system health or not.
    81  	if operators, health := syshealth.Check(c.cfg, cluster, tnState, logState, currentTick); !health {
    82  		c.teardown = true
    83  		c.teardownOps = operators
    84  		return c.OperatorController.Dispatch(c.teardownOps, logState, tnState, cnState, proxyState)
    85  	}
    86  
    87  	// system health, try to keep alive.
    88  	executing := c.OperatorController.GetExecutingReplicas()
    89  
    90  	operators := make([]*operator.Operator, 0)
    91  	operators = append(operators, logservice.Check(alloc, c.cfg, cluster, logState, executing, user, currentTick)...)
    92  	operators = append(operators, dnservice.Check(alloc, c.cfg, cluster, tnState, user, currentTick)...)
    93  	operators = append(operators, cnservice.Check(c.cfg, cnState, user, currentTick)...)
    94  	operators = append(operators, proxy.Check(c.cfg, proxyState, currentTick)...)
    95  
    96  	return c.OperatorController.Dispatch(operators, logState, tnState, cnState, proxyState)
    97  }