github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/hakeeper_client.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math/rand"
    21  	"sync"
    22  
    23  	"go.uber.org/zap"
    24  
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    27  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    28  	"github.com/matrixorigin/matrixone/pkg/logutil"
    29  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    30  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    31  )
    32  
    33  type basicHAKeeperClient interface {
    34  	// Close closes the hakeeper client.
    35  	Close() error
    36  	// AllocateID allocate a globally unique ID
    37  	AllocateID(ctx context.Context) (uint64, error)
    38  	// GetClusterDetails queries the HAKeeper and return CN and DN nodes that are
    39  	// known to the HAKeeper.
    40  	GetClusterDetails(ctx context.Context) (pb.ClusterDetails, error)
    41  	// GetClusterState queries the cluster state
    42  	GetClusterState(ctx context.Context) (pb.CheckerState, error)
    43  }
    44  
    45  // CNHAKeeperClient is the HAKeeper client used by a CN store.
    46  type CNHAKeeperClient interface {
    47  	basicHAKeeperClient
    48  	// SendCNHeartbeat sends the specified heartbeat message to the HAKeeper.
    49  	SendCNHeartbeat(ctx context.Context, hb pb.CNStoreHeartbeat) (pb.CommandBatch, error)
    50  }
    51  
    52  // DNHAKeeperClient is the HAKeeper client used by a DN store.
    53  type DNHAKeeperClient interface {
    54  	basicHAKeeperClient
    55  	// SendDNHeartbeat sends the specified heartbeat message to the HAKeeper. The
    56  	// returned CommandBatch contains Schedule Commands to be executed by the local
    57  	// DN store.
    58  	SendDNHeartbeat(ctx context.Context, hb pb.DNStoreHeartbeat) (pb.CommandBatch, error)
    59  }
    60  
    61  // LogHAKeeperClient is the HAKeeper client used by a Log store.
    62  type LogHAKeeperClient interface {
    63  	basicHAKeeperClient
    64  	// SendLogHeartbeat sends the specified heartbeat message to the HAKeeper. The
    65  	// returned CommandBatch contains Schedule Commands to be executed by the local
    66  	// Log store.
    67  	SendLogHeartbeat(ctx context.Context, hb pb.LogStoreHeartbeat) (pb.CommandBatch, error)
    68  }
    69  
    70  // TODO: HAKeeper discovery to be implemented
    71  
    72  var _ CNHAKeeperClient = (*managedHAKeeperClient)(nil)
    73  var _ DNHAKeeperClient = (*managedHAKeeperClient)(nil)
    74  var _ LogHAKeeperClient = (*managedHAKeeperClient)(nil)
    75  
    76  // NewCNHAKeeperClient creates a HAKeeper client to be used by a CN node.
    77  //
    78  // NB: caller could specify options for morpc.Client via ctx.
    79  func NewCNHAKeeperClient(ctx context.Context,
    80  	cfg HAKeeperClientConfig) (CNHAKeeperClient, error) {
    81  	if err := cfg.Validate(); err != nil {
    82  		return nil, err
    83  	}
    84  	return newManagedHAKeeperClient(ctx, cfg)
    85  }
    86  
    87  // NewDNHAKeeperClient creates a HAKeeper client to be used by a DN node.
    88  //
    89  // NB: caller could specify options for morpc.Client via ctx.
    90  func NewDNHAKeeperClient(ctx context.Context,
    91  	cfg HAKeeperClientConfig) (DNHAKeeperClient, error) {
    92  	if err := cfg.Validate(); err != nil {
    93  		return nil, err
    94  	}
    95  	return newManagedHAKeeperClient(ctx, cfg)
    96  }
    97  
    98  // NewLogHAKeeperClient creates a HAKeeper client to be used by a Log Service node.
    99  //
   100  // NB: caller could specify options for morpc.Client via ctx.
   101  func NewLogHAKeeperClient(ctx context.Context,
   102  	cfg HAKeeperClientConfig) (LogHAKeeperClient, error) {
   103  	if err := cfg.Validate(); err != nil {
   104  		return nil, err
   105  	}
   106  	return newManagedHAKeeperClient(ctx, cfg)
   107  }
   108  
   109  func newManagedHAKeeperClient(ctx context.Context,
   110  	cfg HAKeeperClientConfig) (*managedHAKeeperClient, error) {
   111  	c, err := newHAKeeperClient(ctx, cfg)
   112  	if err != nil {
   113  		return nil, err
   114  	}
   115  
   116  	mc := &managedHAKeeperClient{
   117  		cfg:            cfg,
   118  		backendOptions: GetBackendOptions(ctx),
   119  		clientOptions:  GetClientOptions(ctx),
   120  	}
   121  	mc.mu.client = c
   122  	return mc, nil
   123  }
   124  
   125  type managedHAKeeperClient struct {
   126  	cfg HAKeeperClientConfig
   127  
   128  	// Method `prepareClient` may update moprc.Client.
   129  	// So we need to keep options for morpc.Client.
   130  	backendOptions []morpc.BackendOption
   131  	clientOptions  []morpc.ClientOption
   132  
   133  	mu struct {
   134  		sync.RWMutex
   135  		nextID uint64
   136  		lastID uint64
   137  		client *hakeeperClient
   138  	}
   139  }
   140  
   141  func (c *managedHAKeeperClient) Close() error {
   142  	c.mu.Lock()
   143  	defer c.mu.Unlock()
   144  	if c.mu.client == nil {
   145  		return nil
   146  	}
   147  	return c.mu.client.close()
   148  }
   149  
   150  func (c *managedHAKeeperClient) GetClusterDetails(ctx context.Context) (pb.ClusterDetails, error) {
   151  	for {
   152  		if err := c.prepareClient(ctx); err != nil {
   153  			return pb.ClusterDetails{}, err
   154  		}
   155  		cd, err := c.getClient().getClusterDetails(ctx)
   156  		if err != nil {
   157  			c.resetClient()
   158  		}
   159  		if c.isRetryableError(err) {
   160  			continue
   161  		}
   162  		return cd, err
   163  	}
   164  }
   165  
   166  func (c *managedHAKeeperClient) GetClusterState(ctx context.Context) (pb.CheckerState, error) {
   167  	for {
   168  		if err := c.prepareClient(ctx); err != nil {
   169  			return pb.CheckerState{}, err
   170  		}
   171  		s, err := c.getClient().getClusterState(ctx)
   172  		if err != nil {
   173  			c.resetClient()
   174  		}
   175  		if c.isRetryableError(err) {
   176  			continue
   177  		}
   178  		return s, err
   179  	}
   180  }
   181  
   182  func (c *managedHAKeeperClient) AllocateID(ctx context.Context) (uint64, error) {
   183  	c.mu.Lock()
   184  	if c.mu.nextID != c.mu.lastID {
   185  		v := c.mu.nextID
   186  		c.mu.nextID++
   187  		c.mu.Unlock()
   188  		return v, nil
   189  	}
   190  
   191  	for {
   192  		if err := c.prepareClientLocked(ctx); err != nil {
   193  			return 0, err
   194  		}
   195  		firstID, err := c.mu.client.sendCNAllocateID(ctx, c.cfg.AllocateIDBatch)
   196  		if err != nil {
   197  			c.resetClientLocked()
   198  		}
   199  		if c.isRetryableError(err) {
   200  			continue
   201  		}
   202  
   203  		c.mu.nextID = firstID + 1
   204  		c.mu.lastID = firstID + c.cfg.AllocateIDBatch - 1
   205  		c.mu.Unlock()
   206  		return firstID, err
   207  	}
   208  }
   209  
   210  func (c *managedHAKeeperClient) SendCNHeartbeat(ctx context.Context,
   211  	hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) {
   212  	for {
   213  		if err := c.prepareClient(ctx); err != nil {
   214  			return pb.CommandBatch{}, err
   215  		}
   216  		result, err := c.getClient().sendCNHeartbeat(ctx, hb)
   217  		if err != nil {
   218  			c.resetClient()
   219  		}
   220  		if c.isRetryableError(err) {
   221  			continue
   222  		}
   223  		return result, err
   224  	}
   225  }
   226  
   227  func (c *managedHAKeeperClient) SendDNHeartbeat(ctx context.Context,
   228  	hb pb.DNStoreHeartbeat) (pb.CommandBatch, error) {
   229  	for {
   230  		if err := c.prepareClient(ctx); err != nil {
   231  			return pb.CommandBatch{}, err
   232  		}
   233  		cb, err := c.getClient().sendDNHeartbeat(ctx, hb)
   234  		if err != nil {
   235  			c.resetClient()
   236  		}
   237  		if c.isRetryableError(err) {
   238  			continue
   239  		}
   240  		return cb, err
   241  	}
   242  }
   243  
   244  func (c *managedHAKeeperClient) SendLogHeartbeat(ctx context.Context,
   245  	hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) {
   246  	for {
   247  		if err := c.prepareClient(ctx); err != nil {
   248  			return pb.CommandBatch{}, err
   249  		}
   250  		cb, err := c.getClient().sendLogHeartbeat(ctx, hb)
   251  		if err != nil {
   252  			c.resetClient()
   253  		}
   254  		if c.isRetryableError(err) {
   255  			continue
   256  		}
   257  		return cb, err
   258  	}
   259  }
   260  
   261  func (c *managedHAKeeperClient) isRetryableError(err error) bool {
   262  	return moerr.IsMoErrCode(err, moerr.ErrNoHAKeeper)
   263  }
   264  
   265  func (c *managedHAKeeperClient) resetClient() {
   266  	c.mu.Lock()
   267  	defer c.mu.Unlock()
   268  	c.resetClientLocked()
   269  }
   270  
   271  func (c *managedHAKeeperClient) prepareClient(ctx context.Context) error {
   272  	c.mu.Lock()
   273  	defer c.mu.Unlock()
   274  	return c.prepareClientLocked(ctx)
   275  }
   276  
   277  func (c *managedHAKeeperClient) resetClientLocked() {
   278  	if c.mu.client != nil {
   279  		cc := c.mu.client
   280  		c.mu.client = nil
   281  		if err := cc.close(); err != nil {
   282  			logutil.Error("failed to close client", zap.Error(err))
   283  		}
   284  	}
   285  }
   286  
   287  func (c *managedHAKeeperClient) prepareClientLocked(ctx context.Context) error {
   288  	if c.mu.client != nil {
   289  		return nil
   290  	}
   291  
   292  	// we must use the recoreded options for morpc.Client
   293  	ctx = SetBackendOptions(ctx, c.backendOptions...)
   294  	ctx = SetClientOptions(ctx, c.clientOptions...)
   295  
   296  	cc, err := newHAKeeperClient(ctx, c.cfg)
   297  	if err != nil {
   298  		return err
   299  	}
   300  	c.mu.client = cc
   301  	return nil
   302  }
   303  
   304  type hakeeperClient struct {
   305  	cfg      HAKeeperClientConfig
   306  	client   morpc.RPCClient
   307  	addr     string
   308  	pool     *sync.Pool
   309  	respPool *sync.Pool
   310  }
   311  
   312  func newHAKeeperClient(ctx context.Context,
   313  	cfg HAKeeperClientConfig) (*hakeeperClient, error) {
   314  	client, err := connectToHAKeeper(ctx, cfg.ServiceAddresses, cfg)
   315  	if client != nil && err == nil {
   316  		return client, nil
   317  	}
   318  	if len(cfg.DiscoveryAddress) > 0 {
   319  		return connectByReverseProxy(ctx, cfg.DiscoveryAddress, cfg)
   320  	}
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  	return nil, moerr.NewNoHAKeeper(ctx)
   325  }
   326  
   327  func connectByReverseProxy(ctx context.Context,
   328  	discoveryAddress string, cfg HAKeeperClientConfig) (*hakeeperClient, error) {
   329  	si, ok, err := GetShardInfo(discoveryAddress, hakeeper.DefaultHAKeeperShardID)
   330  	if err != nil {
   331  		return nil, err
   332  	}
   333  	if !ok {
   334  		return nil, nil
   335  	}
   336  	addresses := make([]string, 0)
   337  	leaderAddress, ok := si.Replicas[si.ReplicaID]
   338  	if ok {
   339  		addresses = append(addresses, leaderAddress)
   340  	}
   341  	for replicaID, address := range si.Replicas {
   342  		if replicaID != si.ReplicaID {
   343  			addresses = append(addresses, address)
   344  		}
   345  	}
   346  	return connectToHAKeeper(ctx, addresses, cfg)
   347  }
   348  
   349  func connectToHAKeeper(ctx context.Context,
   350  	targets []string, cfg HAKeeperClientConfig) (*hakeeperClient, error) {
   351  	if len(targets) == 0 {
   352  		return nil, nil
   353  	}
   354  
   355  	pool := &sync.Pool{}
   356  	pool.New = func() interface{} {
   357  		return &RPCRequest{pool: pool}
   358  	}
   359  	respPool := &sync.Pool{}
   360  	respPool.New = func() interface{} {
   361  		return &RPCResponse{pool: respPool}
   362  	}
   363  	c := &hakeeperClient{
   364  		cfg:      cfg,
   365  		pool:     pool,
   366  		respPool: respPool,
   367  	}
   368  	var e error
   369  	addresses := append([]string{}, targets...)
   370  	rand.Shuffle(len(addresses), func(i, j int) {
   371  		addresses[i], addresses[j] = addresses[j], addresses[i]
   372  	})
   373  	for _, addr := range addresses {
   374  		cc, err := getRPCClient(ctx, addr, c.respPool, defaultMaxMessageSize, cfg.EnableCompress, "connectToHAKeeper")
   375  		if err != nil {
   376  			e = err
   377  			continue
   378  		}
   379  		c.addr = addr
   380  		c.client = cc
   381  		isHAKeeper, err := c.checkIsHAKeeper(ctx)
   382  		logutil.Info(fmt.Sprintf("isHAKeeper: %t, err: %v", isHAKeeper, err))
   383  		if err == nil && isHAKeeper {
   384  			return c, nil
   385  		} else if err != nil {
   386  			e = err
   387  		}
   388  		if err := cc.Close(); err != nil {
   389  			logutil.Error("failed to close the client", zap.Error(err))
   390  		}
   391  	}
   392  	if e == nil {
   393  		// didn't encounter any error
   394  		return nil, moerr.NewNoHAKeeper(ctx)
   395  	}
   396  	return nil, e
   397  }
   398  
   399  func (c *hakeeperClient) close() error {
   400  	if c == nil {
   401  		panic("!!!")
   402  	}
   403  
   404  	if c.client != nil {
   405  		return c.client.Close()
   406  	}
   407  	return nil
   408  }
   409  
   410  func (c *hakeeperClient) getClusterDetails(ctx context.Context) (pb.ClusterDetails, error) {
   411  	req := pb.Request{
   412  		Method: pb.GET_CLUSTER_DETAILS,
   413  	}
   414  	resp, err := c.request(ctx, req)
   415  	if err != nil {
   416  		return pb.ClusterDetails{}, err
   417  	}
   418  	return *resp.ClusterDetails, nil
   419  }
   420  
   421  func (c *hakeeperClient) getClusterState(ctx context.Context) (pb.CheckerState, error) {
   422  	req := pb.Request{
   423  		Method: pb.GET_CLUSTER_STATE,
   424  	}
   425  	resp, err := c.request(ctx, req)
   426  	if err != nil {
   427  		return pb.CheckerState{}, err
   428  	}
   429  	return *resp.CheckerState, nil
   430  }
   431  
   432  func (c *hakeeperClient) sendCNHeartbeat(ctx context.Context, hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) {
   433  	req := pb.Request{
   434  		Method:      pb.CN_HEARTBEAT,
   435  		CNHeartbeat: &hb,
   436  	}
   437  	return c.sendHeartbeat(ctx, req)
   438  }
   439  
   440  func (c *hakeeperClient) sendCNAllocateID(ctx context.Context, batch uint64) (uint64, error) {
   441  	req := pb.Request{
   442  		Method:       pb.CN_ALLOCATE_ID,
   443  		CNAllocateID: &pb.CNAllocateID{Batch: batch},
   444  	}
   445  	resp, err := c.request(ctx, req)
   446  	if err != nil {
   447  		return 0, err
   448  	}
   449  	return resp.AllocateID.FirstID, nil
   450  }
   451  
   452  func (c *hakeeperClient) sendDNHeartbeat(ctx context.Context,
   453  	hb pb.DNStoreHeartbeat) (pb.CommandBatch, error) {
   454  	req := pb.Request{
   455  		Method:      pb.DN_HEARTBEAT,
   456  		DNHeartbeat: &hb,
   457  	}
   458  	return c.sendHeartbeat(ctx, req)
   459  }
   460  
   461  func (c *hakeeperClient) sendLogHeartbeat(ctx context.Context,
   462  	hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) {
   463  	req := pb.Request{
   464  		Method:       pb.LOG_HEARTBEAT,
   465  		LogHeartbeat: &hb,
   466  	}
   467  	cb, err := c.sendHeartbeat(ctx, req)
   468  	if err != nil {
   469  		return pb.CommandBatch{}, err
   470  	}
   471  	for _, cmd := range cb.Commands {
   472  		logutil.Info("hakeeper client received cmd", zap.String("cmd", cmd.LogString()))
   473  	}
   474  	return cb, nil
   475  }
   476  
   477  func (c *hakeeperClient) sendHeartbeat(ctx context.Context,
   478  	req pb.Request) (pb.CommandBatch, error) {
   479  	resp, err := c.request(ctx, req)
   480  	if err != nil {
   481  		return pb.CommandBatch{}, err
   482  	}
   483  	if resp.CommandBatch == nil {
   484  		return pb.CommandBatch{}, nil
   485  	}
   486  	return *resp.CommandBatch, nil
   487  }
   488  
   489  func (c *hakeeperClient) checkIsHAKeeper(ctx context.Context) (bool, error) {
   490  	req := pb.Request{
   491  		Method: pb.CHECK_HAKEEPER,
   492  	}
   493  	resp, err := c.request(ctx, req)
   494  	if err != nil {
   495  		return false, err
   496  	}
   497  	return resp.IsHAKeeper, nil
   498  }
   499  
   500  func (c *hakeeperClient) request(ctx context.Context, req pb.Request) (pb.Response, error) {
   501  	if c == nil {
   502  		return pb.Response{}, moerr.NewNoHAKeeper(ctx)
   503  	}
   504  	ctx, span := trace.Debug(ctx, "hakeeperClient.request")
   505  	defer span.End()
   506  	r := c.pool.Get().(*RPCRequest)
   507  	r.Request = req
   508  	future, err := c.client.Send(ctx, c.addr, r)
   509  	if err != nil {
   510  		return pb.Response{}, err
   511  	}
   512  	defer future.Close()
   513  	msg, err := future.Get()
   514  	if err != nil {
   515  		return pb.Response{}, err
   516  	}
   517  	response, ok := msg.(*RPCResponse)
   518  	if !ok {
   519  		panic("unexpected response type")
   520  	}
   521  	resp := response.Response
   522  	defer response.Release()
   523  	err = toError(ctx, response.Response)
   524  	if err != nil {
   525  		return pb.Response{}, err
   526  	}
   527  	return resp, nil
   528  }
   529  
   530  func (c *managedHAKeeperClient) getClient() *hakeeperClient {
   531  	c.mu.RLock()
   532  	defer c.mu.RUnlock()
   533  	return c.mu.client
   534  }