github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/hakeeper_client.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math/rand"
    21  	"sync"
    22  	"time"
    23  
    24  	"go.uber.org/zap"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    27  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    28  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    29  	"github.com/matrixorigin/matrixone/pkg/logutil"
    30  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    31  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    32  )
    33  
    34  const (
    35  	defaultBackendReadTimeout = time.Second * 8
    36  )
    37  
    38  type basicHAKeeperClient interface {
    39  	// Close closes the hakeeper client.
    40  	Close() error
    41  	// AllocateID allocate a globally unique ID
    42  	AllocateID(ctx context.Context) (uint64, error)
    43  	// AllocateIDByKey allocate a globally unique ID by key.
    44  	AllocateIDByKey(ctx context.Context, key string) (uint64, error)
    45  	// AllocateIDByKey allocate a globally unique ID by key.
    46  	AllocateIDByKeyWithBatch(ctx context.Context, key string, batch uint64) (uint64, error)
    47  	// GetClusterDetails queries the HAKeeper and return CN and TN nodes that are
    48  	// known to the HAKeeper.
    49  	GetClusterDetails(ctx context.Context) (pb.ClusterDetails, error)
    50  	// GetClusterState queries the cluster state
    51  	GetClusterState(ctx context.Context) (pb.CheckerState, error)
    52  }
    53  
    54  // ClusterHAKeeperClient used to get cluster detail
    55  type ClusterHAKeeperClient interface {
    56  	basicHAKeeperClient
    57  }
    58  
    59  // CNHAKeeperClient is the HAKeeper client used by a CN store.
    60  type CNHAKeeperClient interface {
    61  	basicHAKeeperClient
    62  	BRHAKeeperClient
    63  	// SendCNHeartbeat sends the specified heartbeat message to the HAKeeper.
    64  	SendCNHeartbeat(ctx context.Context, hb pb.CNStoreHeartbeat) (pb.CommandBatch, error)
    65  }
    66  
    67  // TNHAKeeperClient is the HAKeeper client used by a TN store.
    68  type TNHAKeeperClient interface {
    69  	basicHAKeeperClient
    70  	// SendTNHeartbeat sends the specified heartbeat message to the HAKeeper. The
    71  	// returned CommandBatch contains Schedule Commands to be executed by the local
    72  	// TN store.
    73  	SendTNHeartbeat(ctx context.Context, hb pb.TNStoreHeartbeat) (pb.CommandBatch, error)
    74  }
    75  
    76  // LogHAKeeperClient is the HAKeeper client used by a Log store.
    77  type LogHAKeeperClient interface {
    78  	basicHAKeeperClient
    79  	// SendLogHeartbeat sends the specified heartbeat message to the HAKeeper. The
    80  	// returned CommandBatch contains Schedule Commands to be executed by the local
    81  	// Log store.
    82  	SendLogHeartbeat(ctx context.Context, hb pb.LogStoreHeartbeat) (pb.CommandBatch, error)
    83  }
    84  
    85  // ProxyHAKeeperClient is the HAKeeper client used by proxy service.
    86  type ProxyHAKeeperClient interface {
    87  	basicHAKeeperClient
    88  	// GetCNState gets CN state from HAKeeper.
    89  	GetCNState(ctx context.Context) (pb.CNState, error)
    90  	// UpdateCNLabel updates the labels of CN.
    91  	UpdateCNLabel(ctx context.Context, label pb.CNStoreLabel) error
    92  	// UpdateCNWorkState updates the work state of CN.
    93  	UpdateCNWorkState(ctx context.Context, state pb.CNWorkState) error
    94  	// PatchCNStore updates the work state and labels of CN.
    95  	PatchCNStore(ctx context.Context, stateLabel pb.CNStateLabel) error
    96  	// DeleteCNStore deletes a CN store from HAKeeper.
    97  	DeleteCNStore(ctx context.Context, cnStore pb.DeleteCNStore) error
    98  	// SendProxyHeartbeat sends the heartbeat of proxy to HAKeeper.
    99  	SendProxyHeartbeat(ctx context.Context, hb pb.ProxyHeartbeat) (pb.CommandBatch, error)
   100  }
   101  
   102  // BRHAKeeperClient is the HAKeeper client for backup and restore.
   103  type BRHAKeeperClient interface {
   104  	GetBackupData(ctx context.Context) ([]byte, error)
   105  }
   106  
   107  // TODO: HAKeeper discovery to be implemented
   108  
   109  var _ CNHAKeeperClient = (*managedHAKeeperClient)(nil)
   110  var _ TNHAKeeperClient = (*managedHAKeeperClient)(nil)
   111  var _ LogHAKeeperClient = (*managedHAKeeperClient)(nil)
   112  var _ ProxyHAKeeperClient = (*managedHAKeeperClient)(nil)
   113  
   114  // NewCNHAKeeperClient creates a HAKeeper client to be used by a CN node.
   115  //
   116  // NB: caller could specify options for morpc.Client via ctx.
   117  func NewCNHAKeeperClient(ctx context.Context,
   118  	cfg HAKeeperClientConfig) (CNHAKeeperClient, error) {
   119  	if err := cfg.Validate(); err != nil {
   120  		return nil, err
   121  	}
   122  	return newManagedHAKeeperClient(ctx, cfg)
   123  }
   124  
   125  // NewTNHAKeeperClient creates a HAKeeper client to be used by a TN node.
   126  //
   127  // NB: caller could specify options for morpc.Client via ctx.
   128  func NewTNHAKeeperClient(ctx context.Context,
   129  	cfg HAKeeperClientConfig) (TNHAKeeperClient, error) {
   130  	if err := cfg.Validate(); err != nil {
   131  		return nil, err
   132  	}
   133  	return newManagedHAKeeperClient(ctx, cfg)
   134  }
   135  
   136  // NewLogHAKeeperClient creates a HAKeeper client to be used by a Log Service node.
   137  //
   138  // NB: caller could specify options for morpc.Client via ctx.
   139  func NewLogHAKeeperClient(ctx context.Context,
   140  	cfg HAKeeperClientConfig) (LogHAKeeperClient, error) {
   141  	if err := cfg.Validate(); err != nil {
   142  		return nil, err
   143  	}
   144  	return newManagedHAKeeperClient(ctx, cfg)
   145  }
   146  
   147  // NewProxyHAKeeperClient creates a HAKeeper client to be used by a proxy service.
   148  //
   149  // NB: caller could specify options for morpc.Client via ctx.
   150  func NewProxyHAKeeperClient(ctx context.Context,
   151  	cfg HAKeeperClientConfig) (ProxyHAKeeperClient, error) {
   152  	if err := cfg.Validate(); err != nil {
   153  		return nil, err
   154  	}
   155  	return newManagedHAKeeperClient(ctx, cfg)
   156  }
   157  
   158  func newManagedHAKeeperClient(ctx context.Context,
   159  	cfg HAKeeperClientConfig) (*managedHAKeeperClient, error) {
   160  	c, err := newHAKeeperClient(ctx, cfg)
   161  	if err != nil {
   162  		return nil, err
   163  	}
   164  
   165  	mc := &managedHAKeeperClient{
   166  		cfg:            cfg,
   167  		backendOptions: GetBackendOptions(ctx),
   168  		clientOptions:  GetClientOptions(ctx),
   169  	}
   170  	mc.mu.client = c
   171  	mc.mu.allocIDByKey = make(map[string]*allocID)
   172  	return mc, nil
   173  }
   174  
   175  // allocID contains nextID and lastID.
   176  type allocID struct {
   177  	nextID uint64
   178  	lastID uint64
   179  }
   180  
   181  type managedHAKeeperClient struct {
   182  	cfg HAKeeperClientConfig
   183  
   184  	// Method `prepareClient` may update moprc.Client.
   185  	// So we need to keep options for morpc.Client.
   186  	backendOptions []morpc.BackendOption
   187  	clientOptions  []morpc.ClientOption
   188  
   189  	mu struct {
   190  		sync.RWMutex
   191  		// allocIDByKey is used to alloc IDs by different key.
   192  		allocIDByKey map[string]*allocID
   193  		// sharedAllocID is used to alloc global IDs.
   194  		sharedAllocID allocID
   195  
   196  		client *hakeeperClient
   197  	}
   198  }
   199  
   200  func (c *managedHAKeeperClient) Close() error {
   201  	c.mu.Lock()
   202  	defer c.mu.Unlock()
   203  	if c.mu.client == nil {
   204  		return nil
   205  	}
   206  	return c.mu.client.close()
   207  }
   208  
   209  func (c *managedHAKeeperClient) GetClusterDetails(ctx context.Context) (pb.ClusterDetails, error) {
   210  	for {
   211  		if err := c.prepareClient(ctx); err != nil {
   212  			return pb.ClusterDetails{}, err
   213  		}
   214  		cd, err := c.getClient().getClusterDetails(ctx)
   215  		if err != nil {
   216  			c.resetClient()
   217  		}
   218  		if c.isRetryableError(err) {
   219  			continue
   220  		}
   221  		return cd, err
   222  	}
   223  }
   224  
   225  func (c *managedHAKeeperClient) GetClusterState(ctx context.Context) (pb.CheckerState, error) {
   226  	for {
   227  		if err := c.prepareClient(ctx); err != nil {
   228  			return pb.CheckerState{}, err
   229  		}
   230  		s, err := c.getClient().getClusterState(ctx)
   231  		if err != nil {
   232  			c.resetClient()
   233  		}
   234  		if c.isRetryableError(err) {
   235  			continue
   236  		}
   237  		return s, err
   238  	}
   239  }
   240  
   241  func (c *managedHAKeeperClient) AllocateID(ctx context.Context) (uint64, error) {
   242  	c.mu.Lock()
   243  	if c.mu.sharedAllocID.nextID != c.mu.sharedAllocID.lastID {
   244  		v := c.mu.sharedAllocID.nextID
   245  		c.mu.sharedAllocID.nextID++
   246  		c.mu.Unlock()
   247  		if v == 0 {
   248  			logutil.Error("id should not be 0",
   249  				zap.Uint64("nextID", c.mu.sharedAllocID.nextID),
   250  				zap.Uint64("lastID", c.mu.sharedAllocID.lastID))
   251  		}
   252  		return v, nil
   253  	}
   254  
   255  	for {
   256  		if err := c.prepareClientLocked(ctx); err != nil {
   257  			return 0, err
   258  		}
   259  		firstID, err := c.mu.client.sendCNAllocateID(ctx, "", c.cfg.AllocateIDBatch)
   260  		if err != nil {
   261  			c.resetClientLocked()
   262  		}
   263  		if c.isRetryableError(err) {
   264  			continue
   265  		}
   266  
   267  		c.mu.sharedAllocID.nextID = firstID + 1
   268  		c.mu.sharedAllocID.lastID = firstID + c.cfg.AllocateIDBatch - 1
   269  		c.mu.Unlock()
   270  		if firstID == 0 {
   271  			logutil.Error("id should not be 0",
   272  				zap.Error(err),
   273  				zap.Uint64("batch", c.cfg.AllocateIDBatch),
   274  				zap.Uint64("nextID", c.mu.sharedAllocID.nextID),
   275  				zap.Uint64("lastID", c.mu.sharedAllocID.lastID))
   276  		}
   277  		return firstID, err
   278  	}
   279  }
   280  
   281  // AllocateIDByKey implements the basicHAKeeperClient interface.
   282  func (c *managedHAKeeperClient) AllocateIDByKey(ctx context.Context, key string) (uint64, error) {
   283  	return c.AllocateIDByKeyWithBatch(ctx, key, c.cfg.AllocateIDBatch)
   284  }
   285  
   286  func (c *managedHAKeeperClient) AllocateIDByKeyWithBatch(
   287  	ctx context.Context,
   288  	key string,
   289  	batch uint64) (uint64, error) {
   290  	// empty key is used in shared allocated IDs.
   291  	if len(key) == 0 {
   292  		return 0, moerr.NewInternalError(ctx, "key should not be empty")
   293  	}
   294  
   295  	c.mu.Lock()
   296  	defer c.mu.Unlock()
   297  	allocIDs, ok := c.mu.allocIDByKey[key]
   298  	if !ok {
   299  		allocIDs = &allocID{nextID: 0, lastID: 0}
   300  		c.mu.allocIDByKey[key] = allocIDs
   301  	}
   302  
   303  	if allocIDs.nextID != allocIDs.lastID {
   304  		v := allocIDs.nextID
   305  		allocIDs.nextID++
   306  		return v, nil
   307  	}
   308  
   309  	for {
   310  		if err := c.prepareClientLocked(ctx); err != nil {
   311  			return 0, err
   312  		}
   313  		firstID, err := c.mu.client.sendCNAllocateID(ctx, key, batch)
   314  		if err != nil {
   315  			c.resetClientLocked()
   316  		}
   317  		if c.isRetryableError(err) {
   318  			continue
   319  		}
   320  
   321  		allocIDs.nextID = firstID + 1
   322  		allocIDs.lastID = firstID + batch - 1
   323  		return firstID, err
   324  	}
   325  }
   326  
   327  func (c *managedHAKeeperClient) SendCNHeartbeat(ctx context.Context,
   328  	hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) {
   329  	for {
   330  		if err := c.prepareClient(ctx); err != nil {
   331  			return pb.CommandBatch{}, err
   332  		}
   333  		result, err := c.getClient().sendCNHeartbeat(ctx, hb)
   334  		if err != nil {
   335  			c.resetClient()
   336  		}
   337  		if c.isRetryableError(err) {
   338  			continue
   339  		}
   340  		return result, err
   341  	}
   342  }
   343  
   344  func (c *managedHAKeeperClient) SendTNHeartbeat(ctx context.Context,
   345  	hb pb.TNStoreHeartbeat) (pb.CommandBatch, error) {
   346  	for {
   347  		if err := c.prepareClient(ctx); err != nil {
   348  			return pb.CommandBatch{}, err
   349  		}
   350  		cb, err := c.getClient().sendTNHeartbeat(ctx, hb)
   351  		if err != nil {
   352  			c.resetClient()
   353  		}
   354  		if c.isRetryableError(err) {
   355  			continue
   356  		}
   357  		return cb, err
   358  	}
   359  }
   360  
   361  func (c *managedHAKeeperClient) SendLogHeartbeat(ctx context.Context,
   362  	hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) {
   363  	for {
   364  		if err := c.prepareClient(ctx); err != nil {
   365  			return pb.CommandBatch{}, err
   366  		}
   367  		cb, err := c.getClient().sendLogHeartbeat(ctx, hb)
   368  		if err != nil {
   369  			c.resetClient()
   370  		}
   371  		if c.isRetryableError(err) {
   372  			continue
   373  		}
   374  		return cb, err
   375  	}
   376  }
   377  
   378  // GetCNState implements the ProxyHAKeeperClient interface.
   379  func (c *managedHAKeeperClient) GetCNState(ctx context.Context) (pb.CNState, error) {
   380  	for {
   381  		if err := c.prepareClient(ctx); err != nil {
   382  			return pb.CNState{}, err
   383  		}
   384  		s, err := c.getClient().getCNState(ctx)
   385  		if err != nil {
   386  			c.resetClient()
   387  		}
   388  		if c.isRetryableError(err) {
   389  			continue
   390  		}
   391  		return s, err
   392  	}
   393  }
   394  
   395  // UpdateCNLabel implements the ProxyHAKeeperClient interface.
   396  func (c *managedHAKeeperClient) UpdateCNLabel(ctx context.Context, label pb.CNStoreLabel) error {
   397  	for {
   398  		if err := c.prepareClient(ctx); err != nil {
   399  			return err
   400  		}
   401  		err := c.getClient().updateCNLabel(ctx, label)
   402  		if err != nil {
   403  			c.resetClient()
   404  		}
   405  		if c.isRetryableError(err) {
   406  			continue
   407  		}
   408  		return err
   409  	}
   410  }
   411  
   412  // UpdateCNWorkState implements the ProxyHAKeeperClient interface.
   413  func (c *managedHAKeeperClient) UpdateCNWorkState(ctx context.Context, state pb.CNWorkState) error {
   414  	for {
   415  		if err := c.prepareClient(ctx); err != nil {
   416  			return err
   417  		}
   418  		err := c.getClient().updateCNWorkState(ctx, state)
   419  		if err != nil {
   420  			c.resetClient()
   421  		}
   422  		if c.isRetryableError(err) {
   423  			continue
   424  		}
   425  		return err
   426  	}
   427  }
   428  
   429  // PatchCNStore implements the ProxyHAKeeperClient interface.
   430  func (c *managedHAKeeperClient) PatchCNStore(ctx context.Context, stateLabel pb.CNStateLabel) error {
   431  	for {
   432  		if err := c.prepareClient(ctx); err != nil {
   433  			return err
   434  		}
   435  		err := c.getClient().patchCNStore(ctx, stateLabel)
   436  		if err != nil {
   437  			c.resetClient()
   438  		}
   439  		if c.isRetryableError(err) {
   440  			continue
   441  		}
   442  		return err
   443  	}
   444  }
   445  
   446  // DeleteCNStore implements the ProxyHAKeeperClient interface.
   447  func (c *managedHAKeeperClient) DeleteCNStore(ctx context.Context, cnStore pb.DeleteCNStore) error {
   448  	for {
   449  		if err := c.prepareClient(ctx); err != nil {
   450  			return err
   451  		}
   452  		err := c.getClient().deleteCNStore(ctx, cnStore)
   453  		if err != nil {
   454  			c.resetClient()
   455  		}
   456  		if c.isRetryableError(err) {
   457  			continue
   458  		}
   459  		return err
   460  	}
   461  }
   462  
   463  // SendProxyHeartbeat implements the ProxyHAKeeperClient interface.
   464  func (c *managedHAKeeperClient) SendProxyHeartbeat(ctx context.Context, hb pb.ProxyHeartbeat) (pb.CommandBatch, error) {
   465  	for {
   466  		if err := c.prepareClient(ctx); err != nil {
   467  			return pb.CommandBatch{}, err
   468  		}
   469  		cb, err := c.getClient().sendProxyHeartbeat(ctx, hb)
   470  		if err != nil {
   471  			c.resetClient()
   472  		}
   473  		if c.isRetryableError(err) {
   474  			continue
   475  		}
   476  		return cb, err
   477  	}
   478  }
   479  
   480  // GetBackupData implements the BRHAKeeperClient interface.
   481  func (c *managedHAKeeperClient) GetBackupData(ctx context.Context) ([]byte, error) {
   482  	for {
   483  		if err := c.prepareClient(ctx); err != nil {
   484  			return nil, err
   485  		}
   486  		s, err := c.getClient().getBackupData(ctx)
   487  		if err != nil {
   488  			c.resetClient()
   489  		}
   490  		if c.isRetryableError(err) {
   491  			continue
   492  		}
   493  		return s, err
   494  	}
   495  }
   496  
   497  func (c *managedHAKeeperClient) isRetryableError(err error) bool {
   498  	return moerr.IsMoErrCode(err, moerr.ErrNoHAKeeper)
   499  }
   500  
   501  func (c *managedHAKeeperClient) resetClient() {
   502  	c.mu.Lock()
   503  	defer c.mu.Unlock()
   504  	c.resetClientLocked()
   505  }
   506  
   507  func (c *managedHAKeeperClient) prepareClient(ctx context.Context) error {
   508  	c.mu.Lock()
   509  	defer c.mu.Unlock()
   510  	return c.prepareClientLocked(ctx)
   511  }
   512  
   513  func (c *managedHAKeeperClient) resetClientLocked() {
   514  	if c.mu.client != nil {
   515  		cc := c.mu.client
   516  		c.mu.client = nil
   517  		if err := cc.close(); err != nil {
   518  			logutil.Error("failed to close client", zap.Error(err))
   519  		}
   520  	}
   521  }
   522  
   523  func (c *managedHAKeeperClient) prepareClientLocked(ctx context.Context) error {
   524  	if c.mu.client != nil {
   525  		return nil
   526  	}
   527  
   528  	// we must use the recoreded options for morpc.Client
   529  	ctx = SetBackendOptions(ctx, c.backendOptions...)
   530  	ctx = SetClientOptions(ctx, c.clientOptions...)
   531  
   532  	cc, err := newHAKeeperClient(ctx, c.cfg)
   533  	if err != nil {
   534  		return err
   535  	}
   536  	c.mu.client = cc
   537  	return nil
   538  }
   539  
   540  type hakeeperClient struct {
   541  	cfg      HAKeeperClientConfig
   542  	client   morpc.RPCClient
   543  	addr     string
   544  	pool     *sync.Pool
   545  	respPool *sync.Pool
   546  }
   547  
   548  func newHAKeeperClient(ctx context.Context,
   549  	cfg HAKeeperClientConfig) (*hakeeperClient, error) {
   550  	var err error
   551  	// If the discovery address is configured, we used it first.
   552  	if len(cfg.DiscoveryAddress) > 0 {
   553  		c, err := connectByReverseProxy(ctx, cfg.DiscoveryAddress, cfg)
   554  		if c != nil && err == nil {
   555  			return c, nil
   556  		}
   557  	} else if len(cfg.ServiceAddresses) > 0 {
   558  		c, err := connectToHAKeeper(ctx, cfg.ServiceAddresses, cfg)
   559  		if c != nil && err == nil {
   560  			return c, nil
   561  		}
   562  	}
   563  	if err != nil {
   564  		return nil, err
   565  	}
   566  	return nil, moerr.NewNoHAKeeper(ctx)
   567  }
   568  
   569  func connectByReverseProxy(ctx context.Context,
   570  	discoveryAddress string, cfg HAKeeperClientConfig) (*hakeeperClient, error) {
   571  	si, ok, err := GetShardInfo(discoveryAddress, hakeeper.DefaultHAKeeperShardID)
   572  	if err != nil {
   573  		return nil, err
   574  	}
   575  	if !ok {
   576  		return nil, nil
   577  	}
   578  	addresses := make([]string, 0)
   579  	leaderAddress, ok := si.Replicas[si.ReplicaID]
   580  	if ok {
   581  		addresses = append(addresses, leaderAddress)
   582  	}
   583  	for replicaID, address := range si.Replicas {
   584  		if replicaID != si.ReplicaID {
   585  			addresses = append(addresses, address)
   586  		}
   587  	}
   588  	return connectToHAKeeper(ctx, addresses, cfg)
   589  }
   590  
   591  func connectToHAKeeper(ctx context.Context,
   592  	targets []string, cfg HAKeeperClientConfig) (*hakeeperClient, error) {
   593  	if len(targets) == 0 {
   594  		return nil, nil
   595  	}
   596  
   597  	pool := &sync.Pool{}
   598  	pool.New = func() interface{} {
   599  		return &RPCRequest{pool: pool}
   600  	}
   601  	respPool := &sync.Pool{}
   602  	respPool.New = func() interface{} {
   603  		return &RPCResponse{pool: respPool}
   604  	}
   605  	c := &hakeeperClient{
   606  		cfg:      cfg,
   607  		pool:     pool,
   608  		respPool: respPool,
   609  	}
   610  	var e error
   611  	addresses := append([]string{}, targets...)
   612  	rand.Shuffle(len(addresses), func(i, j int) {
   613  		addresses[i], addresses[j] = addresses[j], addresses[i]
   614  	})
   615  	for _, addr := range addresses {
   616  		cc, err := getRPCClient(
   617  			ctx,
   618  			addr,
   619  			c.respPool,
   620  			defaultMaxMessageSize,
   621  			cfg.EnableCompress,
   622  			defaultBackendReadTimeout,
   623  			"connectToHAKeeper",
   624  		)
   625  		if err != nil {
   626  			e = err
   627  			continue
   628  		}
   629  		c.addr = addr
   630  		c.client = cc
   631  		isHAKeeper, err := c.checkIsHAKeeper(ctx)
   632  		logutil.Info(fmt.Sprintf("isHAKeeper: %t, err: %v", isHAKeeper, err))
   633  		if err == nil && isHAKeeper {
   634  			return c, nil
   635  		} else if err != nil {
   636  			e = err
   637  		}
   638  		if err := cc.Close(); err != nil {
   639  			logutil.Error("failed to close the client", zap.Error(err))
   640  		}
   641  	}
   642  	if e == nil {
   643  		// didn't encounter any error
   644  		return nil, moerr.NewNoHAKeeper(ctx)
   645  	}
   646  	return nil, e
   647  }
   648  
   649  func (c *hakeeperClient) close() error {
   650  	if c == nil {
   651  		panic("!!!")
   652  	}
   653  
   654  	if c.client != nil {
   655  		return c.client.Close()
   656  	}
   657  	return nil
   658  }
   659  
   660  func (c *hakeeperClient) getClusterDetails(ctx context.Context) (pb.ClusterDetails, error) {
   661  	req := pb.Request{
   662  		Method: pb.GET_CLUSTER_DETAILS,
   663  	}
   664  	resp, err := c.request(ctx, req)
   665  	if err != nil {
   666  		return pb.ClusterDetails{}, err
   667  	}
   668  	return *resp.ClusterDetails, nil
   669  }
   670  
   671  func (c *hakeeperClient) getClusterState(ctx context.Context) (pb.CheckerState, error) {
   672  	req := pb.Request{
   673  		Method: pb.GET_CLUSTER_STATE,
   674  	}
   675  	resp, err := c.request(ctx, req)
   676  	if err != nil {
   677  		return pb.CheckerState{}, err
   678  	}
   679  	return *resp.CheckerState, nil
   680  }
   681  
   682  func (c *hakeeperClient) sendCNHeartbeat(ctx context.Context, hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) {
   683  	req := pb.Request{
   684  		Method:      pb.CN_HEARTBEAT,
   685  		CNHeartbeat: &hb,
   686  	}
   687  	return c.sendHeartbeat(ctx, req)
   688  }
   689  
   690  func (c *hakeeperClient) sendCNAllocateID(ctx context.Context, key string, batch uint64) (uint64, error) {
   691  	req := pb.Request{
   692  		Method:       pb.CN_ALLOCATE_ID,
   693  		CNAllocateID: &pb.CNAllocateID{Key: key, Batch: batch},
   694  	}
   695  	resp, err := c.request(ctx, req)
   696  	if err != nil {
   697  		return 0, err
   698  	}
   699  	return resp.AllocateID.FirstID, nil
   700  }
   701  
   702  func (c *hakeeperClient) sendTNHeartbeat(ctx context.Context,
   703  	hb pb.TNStoreHeartbeat) (pb.CommandBatch, error) {
   704  	req := pb.Request{
   705  		Method:      pb.TN_HEARTBEAT,
   706  		TNHeartbeat: &hb,
   707  	}
   708  	return c.sendHeartbeat(ctx, req)
   709  }
   710  
   711  func (c *hakeeperClient) sendLogHeartbeat(ctx context.Context,
   712  	hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) {
   713  	req := pb.Request{
   714  		Method:       pb.LOG_HEARTBEAT,
   715  		LogHeartbeat: &hb,
   716  	}
   717  	cb, err := c.sendHeartbeat(ctx, req)
   718  	if err != nil {
   719  		return pb.CommandBatch{}, err
   720  	}
   721  	for _, cmd := range cb.Commands {
   722  		logutil.Info("hakeeper client received cmd", zap.String("cmd", cmd.LogString()))
   723  	}
   724  	return cb, nil
   725  }
   726  
   727  func (c *hakeeperClient) sendHeartbeat(ctx context.Context,
   728  	req pb.Request) (pb.CommandBatch, error) {
   729  	resp, err := c.request(ctx, req)
   730  	if err != nil {
   731  		return pb.CommandBatch{}, err
   732  	}
   733  	if resp.CommandBatch == nil {
   734  		return pb.CommandBatch{}, nil
   735  	}
   736  	return *resp.CommandBatch, nil
   737  }
   738  
   739  func (c *hakeeperClient) getCNState(ctx context.Context) (pb.CNState, error) {
   740  	s, err := c.getClusterState(ctx)
   741  	if err != nil {
   742  		return pb.CNState{}, err
   743  	}
   744  	return s.CNState, nil
   745  }
   746  
   747  func (c *hakeeperClient) updateCNLabel(ctx context.Context, label pb.CNStoreLabel) error {
   748  	req := pb.Request{
   749  		Method:       pb.UPDATE_CN_LABEL,
   750  		CNStoreLabel: &label,
   751  	}
   752  	_, err := c.request(ctx, req)
   753  	if err != nil {
   754  		return err
   755  	}
   756  	return nil
   757  }
   758  
   759  func (c *hakeeperClient) updateCNWorkState(ctx context.Context, state pb.CNWorkState) error {
   760  	req := pb.Request{
   761  		Method:      pb.UPDATE_CN_WORK_STATE,
   762  		CNWorkState: &state,
   763  	}
   764  	_, err := c.request(ctx, req)
   765  	if err != nil {
   766  		return err
   767  	}
   768  	return nil
   769  }
   770  
   771  func (c *hakeeperClient) patchCNStore(ctx context.Context, stateLabel pb.CNStateLabel) error {
   772  	req := pb.Request{
   773  		Method:       pb.PATCH_CN_STORE,
   774  		CNStateLabel: &stateLabel,
   775  	}
   776  	_, err := c.request(ctx, req)
   777  	if err != nil {
   778  		return err
   779  	}
   780  	return nil
   781  }
   782  
   783  func (c *hakeeperClient) deleteCNStore(ctx context.Context, cnStore pb.DeleteCNStore) error {
   784  	req := pb.Request{
   785  		Method:        pb.DELETE_CN_STORE,
   786  		DeleteCNStore: &cnStore,
   787  	}
   788  	_, err := c.request(ctx, req)
   789  	if err != nil {
   790  		return err
   791  	}
   792  	return nil
   793  }
   794  
   795  func (c *hakeeperClient) sendProxyHeartbeat(ctx context.Context, hb pb.ProxyHeartbeat) (pb.CommandBatch, error) {
   796  	req := pb.Request{
   797  		Method:         pb.PROXY_HEARTBEAT,
   798  		ProxyHeartbeat: &hb,
   799  	}
   800  	cb, err := c.sendHeartbeat(ctx, req)
   801  	if err != nil {
   802  		return pb.CommandBatch{}, err
   803  	}
   804  	return cb, nil
   805  }
   806  
   807  func (c *hakeeperClient) checkIsHAKeeper(ctx context.Context) (bool, error) {
   808  	req := pb.Request{
   809  		Method: pb.CHECK_HAKEEPER,
   810  	}
   811  	resp, err := c.request(ctx, req)
   812  	if err != nil {
   813  		return false, err
   814  	}
   815  	return resp.IsHAKeeper, nil
   816  }
   817  
   818  func (c *hakeeperClient) request(ctx context.Context, req pb.Request) (pb.Response, error) {
   819  	if c == nil {
   820  		return pb.Response{}, moerr.NewNoHAKeeper(ctx)
   821  	}
   822  	ctx, span := trace.Debug(ctx, "hakeeperClient.request")
   823  	defer span.End()
   824  	r := c.pool.Get().(*RPCRequest)
   825  	r.Request = req
   826  	future, err := c.client.Send(ctx, c.addr, r)
   827  	if err != nil {
   828  		return pb.Response{}, err
   829  	}
   830  	defer future.Close()
   831  	msg, err := future.Get()
   832  	if err != nil {
   833  		return pb.Response{}, err
   834  	}
   835  	response, ok := msg.(*RPCResponse)
   836  	if !ok {
   837  		panic("unexpected response type")
   838  	}
   839  	resp := response.Response
   840  	defer response.Release()
   841  	err = toError(ctx, response.Response)
   842  	if err != nil {
   843  		return pb.Response{}, err
   844  	}
   845  	return resp, nil
   846  }
   847  
   848  func (c *managedHAKeeperClient) getClient() *hakeeperClient {
   849  	c.mu.RLock()
   850  	defer c.mu.RUnlock()
   851  	return c.mu.client
   852  }
   853  
   854  func (c *hakeeperClient) getBackupData(ctx context.Context) ([]byte, error) {
   855  	req := pb.Request{
   856  		Method: pb.GET_CLUSTER_STATE,
   857  	}
   858  	resp, err := c.request(ctx, req)
   859  	if err != nil {
   860  		return nil, err
   861  	}
   862  	p := pb.BackupData{
   863  		NextID:      resp.CheckerState.NextId,
   864  		NextIDByKey: resp.CheckerState.NextIDByKey,
   865  	}
   866  	bs, err := p.Marshal()
   867  	if err != nil {
   868  		return nil, err
   869  	}
   870  	return bs, nil
   871  }