github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/servermaster/service_util.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package servermaster
    15  
    16  import (
    17  	"fmt"
    18  	"math/rand"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/pingcap/log"
    23  	pb "github.com/pingcap/tiflow/engine/enginepb"
    24  	"github.com/pingcap/tiflow/engine/pkg/rpcutil"
    25  	"github.com/pingcap/tiflow/pkg/election"
    26  	"github.com/pingcap/tiflow/pkg/errors"
    27  	"go.uber.org/atomic"
    28  	"go.uber.org/zap"
    29  	"google.golang.org/grpc"
    30  	"google.golang.org/grpc/credentials/insecure"
    31  )
    32  
    33  func init() {
    34  	rand.Seed(time.Now().UnixNano())
    35  }
    36  
    37  func generateNodeID(name string) string {
    38  	val := rand.Uint32()
    39  	id := fmt.Sprintf("%s-%08x", name, val)
    40  	return id
    41  }
    42  
    43  // multiClient is an interface that implements all the Client interfaces
    44  // for the individual services running on the server masters.
    45  type multiClient interface {
    46  	pb.DiscoveryClient
    47  	pb.ResourceManagerClient
    48  	pb.TaskSchedulerClient
    49  	pb.JobManagerClient
    50  }
    51  
    52  type multiClientImpl struct {
    53  	pb.DiscoveryClient
    54  	pb.ResourceManagerClient
    55  	pb.TaskSchedulerClient
    56  	pb.JobManagerClient
    57  }
    58  
    59  func newMultiClient(conn *grpc.ClientConn) multiClient {
    60  	return &multiClientImpl{
    61  		DiscoveryClient:       pb.NewDiscoveryClient(conn),
    62  		ResourceManagerClient: pb.NewResourceManagerClient(conn),
    63  		TaskSchedulerClient:   pb.NewTaskSchedulerClient(conn),
    64  		JobManagerClient:      pb.NewJobManagerClient(conn),
    65  	}
    66  }
    67  
    68  var leaderOnlyMethods = map[string]struct{}{
    69  	"ListExecutors":    {},
    70  	"RegisterExecutor": {},
    71  	"Heartbeat":        {},
    72  	"CreateJob":        {},
    73  	"GetJob":           {},
    74  	"ListJobs":         {},
    75  	"CancelJob":        {},
    76  	"DeleteJob":        {},
    77  	"ScheduleTask":     {},
    78  }
    79  
    80  var _ rpcutil.ForwardChecker[multiClient] = &forwardChecker{}
    81  
    82  type forwardChecker struct {
    83  	elector election.Elector
    84  
    85  	rwm       sync.RWMutex
    86  	conn      *grpc.ClientConn
    87  	leaderCli multiClient
    88  }
    89  
    90  func newForwardChecker(elector election.Elector) *forwardChecker {
    91  	return &forwardChecker{
    92  		elector: elector,
    93  	}
    94  }
    95  
    96  func (f *forwardChecker) LeaderOnly(method string) bool {
    97  	_, ok := leaderOnlyMethods[method]
    98  	return ok
    99  }
   100  
   101  func (f *forwardChecker) IsLeader() bool {
   102  	return f.elector.IsLeader()
   103  }
   104  
   105  func (f *forwardChecker) LeaderClient() (multiClient, error) {
   106  	leader, ok := f.elector.GetLeader()
   107  	if !ok {
   108  		return nil, errors.ErrMasterNoLeader.GenWithStackByArgs()
   109  	}
   110  	return f.getOrCreateLeaderClient(leader.Address)
   111  }
   112  
   113  func (f *forwardChecker) getOrCreateLeaderClient(leaderAddr string) (multiClient, error) {
   114  	f.rwm.RLock()
   115  	if f.conn != nil && f.conn.Target() == leaderAddr {
   116  		f.rwm.RUnlock()
   117  		return f.leaderCli, nil
   118  	}
   119  	f.rwm.RUnlock()
   120  
   121  	f.rwm.Lock()
   122  	defer f.rwm.Unlock()
   123  
   124  	if f.conn != nil {
   125  		if f.conn.Target() == leaderAddr {
   126  			return f.leaderCli, nil
   127  		}
   128  		if err := f.conn.Close(); err != nil {
   129  			log.Warn("failed to close grpc connection", zap.Error(err))
   130  		}
   131  		f.conn = nil
   132  	}
   133  
   134  	conn, err := grpc.Dial(leaderAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
   135  	if err != nil {
   136  		return nil, errors.Cause(err)
   137  	}
   138  	f.conn = conn
   139  	f.leaderCli = newMultiClient(conn)
   140  	return f.leaderCli, nil
   141  }
   142  
   143  func (f *forwardChecker) Close() error {
   144  	f.rwm.Lock()
   145  	defer f.rwm.Unlock()
   146  
   147  	var err error
   148  	if f.conn != nil {
   149  		err = f.conn.Close()
   150  		f.conn = nil
   151  	}
   152  	f.leaderCli = nil
   153  	return err
   154  }
   155  
   156  // ensure featureDegrader implements rpcutil.FeatureChecker
   157  var _ rpcutil.FeatureChecker = &featureDegrader{}
   158  
   159  // featureDegrader is used to record whether a feature is available or degradation
   160  // in server master.
   161  type featureDegrader struct {
   162  	executorManager     atomic.Bool
   163  	masterWorkerManager atomic.Bool
   164  }
   165  
   166  func newFeatureDegrader() *featureDegrader {
   167  	fd := &featureDegrader{}
   168  	fd.reset()
   169  	return fd
   170  }
   171  
   172  func (d *featureDegrader) updateExecutorManager(val bool) {
   173  	d.executorManager.Store(val)
   174  }
   175  
   176  func (d *featureDegrader) updateMasterWorkerManager(val bool) {
   177  	d.masterWorkerManager.Store(val)
   178  }
   179  
   180  func (d *featureDegrader) reset() {
   181  	d.executorManager.Store(false)
   182  	d.masterWorkerManager.Store(false)
   183  }
   184  
   185  // Available implements rpcutil.FeatureChecker
   186  func (d *featureDegrader) Available(method string) bool {
   187  	switch method {
   188  	case "ListExecutors", "RegisterExecutor", "Heartbeat":
   189  		return d.executorManager.Load()
   190  	case "CreateJob", "GetJob", "ListJobs", "CancelJob", "DeleteJob",
   191  		"ScheduleTask":
   192  		return d.masterWorkerManager.Load()
   193  	}
   194  	return true
   195  }