github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/client.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math/rand"
    21  	"sync"
    22  	"time"
    23  
    24  	"go.uber.org/zap"
    25  
    26  	"github.com/cockroachdb/errors"
    27  	"github.com/lni/dragonboat/v4"
    28  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    29  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    30  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    31  	"github.com/matrixorigin/matrixone/pkg/logutil"
    32  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    33  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    34  )
    35  
    36  const (
    37  	defaultWriteSocketSize = 64 * 1024
    38  )
    39  
    40  // IsTempError returns a boolean value indicating whether the specified error
    41  // is a temp error that worth to be retried, e.g. timeouts, temp network
    42  // issues. Non-temp error caused by program logics rather than some external
    43  // factors.
    44  func IsTempError(err error) bool {
    45  	return isTempError(err)
    46  }
    47  
    48  type ClientFactory func() (Client, error)
    49  
    50  // Client is the Log Service Client interface exposed to the DN.
    51  type Client interface {
    52  	// Close closes the client.
    53  	Close() error
    54  	// Config returns the specified configuration when creating the client.
    55  	Config() ClientConfig
    56  	// GetLogRecord returns a new LogRecord instance with its Data field enough
    57  	// to hold payloadLength bytes of payload. The layout of the Data field is
    58  	// 4 bytes of record type (pb.UserEntryUpdate) + 8 bytes DN replica ID +
    59  	// payloadLength bytes of actual payload.
    60  	GetLogRecord(payloadLength int) pb.LogRecord
    61  	// Append appends the specified LogRecord into the Log Service. On success, the
    62  	// assigned Lsn will be returned. For the specified LogRecord, only its Data
    63  	// field is used with all other fields ignored by Append(). Once returned, the
    64  	// pb.LogRecord can be reused.
    65  	Append(ctx context.Context, rec pb.LogRecord) (Lsn, error)
    66  	// Read reads the Log Service from the specified Lsn position until the
    67  	// returned LogRecord set reaches the specified maxSize in bytes. The returned
    68  	// Lsn indicates the next Lsn to use to resume the read, or it means
    69  	// everything available has been read when it equals to the specified Lsn.
    70  	// The returned pb.LogRecord records will have their Lsn and Type fields set,
    71  	// the Lsn field is the Lsn assigned to the record while the Type field tells
    72  	// whether the record is an internal record generated by the Log Service itself
    73  	// or appended by the user.
    74  	Read(ctx context.Context, firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error)
    75  	// Truncate truncates the Log Service log at the specified Lsn with Lsn
    76  	// itself included. This allows the Log Service to free up storage capacities
    77  	// for future appends, all future reads must start after the specified Lsn
    78  	// position.
    79  	Truncate(ctx context.Context, lsn Lsn) error
    80  	// GetTruncatedLsn returns the largest Lsn value that has been specified for
    81  	// truncation.
    82  	GetTruncatedLsn(ctx context.Context) (Lsn, error)
    83  	// GetTSOTimestamp requests a total of count unique timestamps from the TSO and
    84  	// return the first assigned such timestamp, that is TSO timestamps
    85  	// [returned value, returned value + count] will be owned by the caller.
    86  	GetTSOTimestamp(ctx context.Context, count uint64) (uint64, error)
    87  }
    88  
    89  type managedClient struct {
    90  	cfg    ClientConfig
    91  	client *client
    92  }
    93  
    94  var _ Client = (*managedClient)(nil)
    95  
    96  // NewClient creates a Log Service client. Each returned client can be used
    97  // to synchronously issue requests to the Log Service. To send multiple requests
    98  // to the Log Service in parallel, multiple clients should be created and used
    99  // to do so.
   100  func NewClient(ctx context.Context, cfg ClientConfig) (Client, error) {
   101  	if err := cfg.Validate(); err != nil {
   102  		return nil, err
   103  	}
   104  	client, err := newClient(ctx, cfg)
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  	return &managedClient{cfg: cfg, client: client}, nil
   109  }
   110  
   111  func (c *managedClient) Close() error {
   112  	if c.client != nil {
   113  		return c.client.close()
   114  	}
   115  	return nil
   116  }
   117  
   118  func (c *managedClient) Config() ClientConfig {
   119  	return c.cfg
   120  }
   121  
   122  func (c *managedClient) GetLogRecord(payloadLength int) pb.LogRecord {
   123  	data := make([]byte, headerSize+8+payloadLength)
   124  	binaryEnc.PutUint32(data, uint32(pb.UserEntryUpdate))
   125  	binaryEnc.PutUint64(data[headerSize:], c.cfg.DNReplicaID)
   126  	return pb.LogRecord{Data: data}
   127  }
   128  
   129  func (c *managedClient) Append(ctx context.Context, rec pb.LogRecord) (Lsn, error) {
   130  	for {
   131  		if err := c.prepareClient(ctx); err != nil {
   132  			return 0, err
   133  		}
   134  		v, err := c.client.append(ctx, rec)
   135  		if err != nil {
   136  			c.resetClient()
   137  		}
   138  		if c.isRetryableError(err) {
   139  			continue
   140  		}
   141  		return v, err
   142  	}
   143  }
   144  
   145  func (c *managedClient) Read(ctx context.Context,
   146  	firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) {
   147  	for {
   148  		if err := c.prepareClient(ctx); err != nil {
   149  			return nil, 0, err
   150  		}
   151  		recs, v, err := c.client.read(ctx, firstLsn, maxSize)
   152  		if err != nil {
   153  			c.resetClient()
   154  		}
   155  		if c.isRetryableError(err) {
   156  			continue
   157  		}
   158  		return recs, v, err
   159  	}
   160  }
   161  
   162  func (c *managedClient) Truncate(ctx context.Context, lsn Lsn) error {
   163  	for {
   164  		if err := c.prepareClient(ctx); err != nil {
   165  			return err
   166  		}
   167  		err := c.client.truncate(ctx, lsn)
   168  		if err != nil {
   169  			c.resetClient()
   170  		}
   171  		if c.isRetryableError(err) {
   172  			continue
   173  		}
   174  		return err
   175  	}
   176  }
   177  
   178  func (c *managedClient) GetTruncatedLsn(ctx context.Context) (Lsn, error) {
   179  	for {
   180  		if err := c.prepareClient(ctx); err != nil {
   181  			return 0, err
   182  		}
   183  		v, err := c.client.getTruncatedLsn(ctx)
   184  		if err != nil {
   185  			c.resetClient()
   186  		}
   187  		if c.isRetryableError(err) {
   188  			continue
   189  		}
   190  		return v, err
   191  	}
   192  }
   193  
   194  func (c *managedClient) GetTSOTimestamp(ctx context.Context, count uint64) (uint64, error) {
   195  	for {
   196  		if err := c.prepareClient(ctx); err != nil {
   197  			return 0, err
   198  		}
   199  		v, err := c.client.getTSOTimestamp(ctx, count)
   200  		if err != nil {
   201  			c.resetClient()
   202  		}
   203  		if c.isRetryableError(err) {
   204  			continue
   205  		}
   206  		return v, err
   207  	}
   208  }
   209  
   210  func (c *managedClient) isRetryableError(err error) bool {
   211  	/*
   212  		old code, obviously strange
   213  		if errors.Is(err, dragonboat.ErrTimeout) {
   214  			return false
   215  		}
   216  		return errors.Is(err, dragonboat.ErrShardNotFound)
   217  	*/
   218  
   219  	// Dragonboat error leaked here
   220  	if errors.Is(err, dragonboat.ErrShardNotFound) {
   221  		return true
   222  	}
   223  	return moerr.IsMoErrCode(err, moerr.ErrDragonboatShardNotFound)
   224  }
   225  
   226  func (c *managedClient) resetClient() {
   227  	if c.client != nil {
   228  		cc := c.client
   229  		c.client = nil
   230  		if err := cc.close(); err != nil {
   231  			logutil.Error("failed to close client", zap.Error(err))
   232  		}
   233  	}
   234  }
   235  
   236  func (c *managedClient) prepareClient(ctx context.Context) error {
   237  	if c.client != nil {
   238  		return nil
   239  	}
   240  	cc, err := newClient(ctx, c.cfg)
   241  	if err != nil {
   242  		return err
   243  	}
   244  	c.client = cc
   245  	return nil
   246  }
   247  
   248  type client struct {
   249  	cfg      ClientConfig
   250  	client   morpc.RPCClient
   251  	addr     string
   252  	pool     *sync.Pool
   253  	respPool *sync.Pool
   254  }
   255  
   256  func newClient(ctx context.Context, cfg ClientConfig) (*client, error) {
   257  	client, err := connectToLogService(ctx, cfg.ServiceAddresses, cfg)
   258  	if client != nil && err == nil {
   259  		return client, nil
   260  	}
   261  	if len(cfg.DiscoveryAddress) > 0 {
   262  		return connectToLogServiceByReverseProxy(ctx, cfg.DiscoveryAddress, cfg)
   263  	}
   264  	if err != nil {
   265  		return nil, err
   266  	}
   267  	return nil, moerr.NewLogServiceNotReady(ctx)
   268  }
   269  
   270  func connectToLogServiceByReverseProxy(ctx context.Context,
   271  	discoveryAddress string, cfg ClientConfig) (*client, error) {
   272  	si, ok, err := GetShardInfo(discoveryAddress, cfg.LogShardID)
   273  	if err != nil {
   274  		return nil, err
   275  	}
   276  	if !ok {
   277  		return nil, moerr.NewLogServiceNotReady(ctx)
   278  	}
   279  	addresses := make([]string, 0)
   280  	leaderAddress, ok := si.Replicas[si.ReplicaID]
   281  	if ok {
   282  		addresses = append(addresses, leaderAddress)
   283  	}
   284  	for replicaID, address := range si.Replicas {
   285  		if replicaID != si.ReplicaID {
   286  			addresses = append(addresses, address)
   287  		}
   288  	}
   289  	return connectToLogService(ctx, addresses, cfg)
   290  }
   291  
   292  func connectToLogService(ctx context.Context,
   293  	targets []string, cfg ClientConfig) (*client, error) {
   294  	if len(targets) == 0 {
   295  		return nil, nil
   296  	}
   297  
   298  	pool := &sync.Pool{}
   299  	pool.New = func() interface{} {
   300  		return &RPCRequest{pool: pool}
   301  	}
   302  	respPool := &sync.Pool{}
   303  	respPool.New = func() interface{} {
   304  		return &RPCResponse{pool: respPool}
   305  	}
   306  	c := &client{
   307  		cfg:      cfg,
   308  		pool:     pool,
   309  		respPool: respPool,
   310  	}
   311  	var e error
   312  	addresses := append([]string{}, targets...)
   313  	rand.Shuffle(len(cfg.ServiceAddresses), func(i, j int) {
   314  		addresses[i], addresses[j] = addresses[j], addresses[i]
   315  	})
   316  	for _, addr := range addresses {
   317  		cc, err := getRPCClient(ctx, addr, c.respPool, c.cfg.MaxMessageSize, cfg.EnableCompress, cfg.Tag)
   318  		if err != nil {
   319  			e = err
   320  			continue
   321  		}
   322  		c.addr = addr
   323  		c.client = cc
   324  		if cfg.ReadOnly {
   325  			if err := c.connectReadOnly(ctx); err == nil {
   326  				return c, nil
   327  			} else {
   328  				if err := c.close(); err != nil {
   329  					logutil.Error("failed to close the client", zap.Error(err))
   330  				}
   331  				e = err
   332  			}
   333  		} else {
   334  			// TODO: add a test to check whether it works when there is no truncated
   335  			// LSN known to the logservice.
   336  			if err := c.connectReadWrite(ctx); err == nil {
   337  				return c, nil
   338  			} else {
   339  				if err := c.close(); err != nil {
   340  					logutil.Error("failed to close the client", zap.Error(err))
   341  				}
   342  				e = err
   343  			}
   344  		}
   345  	}
   346  	return nil, e
   347  }
   348  
   349  func (c *client) close() error {
   350  	return c.client.Close()
   351  }
   352  
   353  func (c *client) append(ctx context.Context, rec pb.LogRecord) (Lsn, error) {
   354  	if c.readOnly() {
   355  		return 0, moerr.NewInvalidInput(ctx, "incompatible client")
   356  	}
   357  	// TODO: check piggybacked hint on whether we are connected to the leader node
   358  	return c.doAppend(ctx, rec)
   359  }
   360  
   361  func (c *client) read(ctx context.Context,
   362  	firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) {
   363  	return c.doRead(ctx, firstLsn, maxSize)
   364  }
   365  
   366  func (c *client) truncate(ctx context.Context, lsn Lsn) error {
   367  	if c.readOnly() {
   368  		return moerr.NewInvalidInput(ctx, "incompatible client")
   369  	}
   370  	return c.doTruncate(ctx, lsn)
   371  }
   372  
   373  func (c *client) getTruncatedLsn(ctx context.Context) (Lsn, error) {
   374  	return c.doGetTruncatedLsn(ctx)
   375  }
   376  
   377  func (c *client) getTSOTimestamp(ctx context.Context, count uint64) (uint64, error) {
   378  	return c.tsoRequest(ctx, count)
   379  }
   380  
   381  func (c *client) readOnly() bool {
   382  	return c.cfg.ReadOnly
   383  }
   384  
   385  func (c *client) connectReadWrite(ctx context.Context) error {
   386  	if c.readOnly() {
   387  		panic(moerr.NewInvalidInput(ctx, "incompatible client"))
   388  	}
   389  	return c.connect(ctx, pb.CONNECT)
   390  }
   391  
   392  func (c *client) connectReadOnly(ctx context.Context) error {
   393  	return c.connect(ctx, pb.CONNECT_RO)
   394  }
   395  
   396  func (c *client) request(ctx context.Context,
   397  	mt pb.MethodType, payload []byte, lsn Lsn,
   398  	maxSize uint64) (pb.Response, []pb.LogRecord, error) {
   399  	ctx, span := trace.Debug(ctx, "client.request")
   400  	defer span.End()
   401  	req := pb.Request{
   402  		Method: mt,
   403  		LogRequest: pb.LogRequest{
   404  			ShardID: c.cfg.LogShardID,
   405  			DNID:    c.cfg.DNReplicaID,
   406  			Lsn:     lsn,
   407  			MaxSize: maxSize,
   408  		},
   409  	}
   410  	r := c.pool.Get().(*RPCRequest)
   411  	defer r.Release()
   412  	r.Request = req
   413  	r.payload = payload
   414  	future, err := c.client.Send(ctx, c.addr, r)
   415  	if err != nil {
   416  		return pb.Response{}, nil, err
   417  	}
   418  	defer future.Close()
   419  	msg, err := future.Get()
   420  	if err != nil {
   421  		return pb.Response{}, nil, err
   422  	}
   423  	response, ok := msg.(*RPCResponse)
   424  	if !ok {
   425  		panic("unexpected response type")
   426  	}
   427  	resp := response.Response
   428  	defer response.Release()
   429  	var recs pb.LogRecordResponse
   430  	if len(response.payload) > 0 {
   431  		MustUnmarshal(&recs, response.payload)
   432  	}
   433  	err = toError(ctx, response.Response)
   434  	if err != nil {
   435  		return pb.Response{}, nil, err
   436  	}
   437  	return resp, recs.Records, nil
   438  }
   439  
   440  func (c *client) tsoRequest(ctx context.Context, count uint64) (uint64, error) {
   441  	ctx, span := trace.Debug(ctx, "client.tsoRequest")
   442  	defer span.End()
   443  	req := pb.Request{
   444  		Method: pb.TSO_UPDATE,
   445  		TsoRequest: &pb.TsoRequest{
   446  			Count: count,
   447  		},
   448  	}
   449  	r := c.pool.Get().(*RPCRequest)
   450  	r.Request = req
   451  	future, err := c.client.Send(ctx, c.addr, r)
   452  	if err != nil {
   453  		return 0, err
   454  	}
   455  	defer future.Close()
   456  	msg, err := future.Get()
   457  	if err != nil {
   458  		return 0, err
   459  	}
   460  	response, ok := msg.(*RPCResponse)
   461  	if !ok {
   462  		panic("unexpected response type")
   463  	}
   464  	resp := response.Response
   465  	defer response.Release()
   466  	err = toError(ctx, response.Response)
   467  	if err != nil {
   468  		return 0, err
   469  	}
   470  	return resp.TsoResponse.Value, nil
   471  }
   472  
   473  func (c *client) connect(ctx context.Context, mt pb.MethodType) error {
   474  	_, _, err := c.request(ctx, mt, nil, 0, 0)
   475  	return err
   476  }
   477  
   478  func (c *client) doAppend(ctx context.Context, rec pb.LogRecord) (Lsn, error) {
   479  	resp, _, err := c.request(ctx, pb.APPEND, rec.Data, 0, 0)
   480  	if err != nil {
   481  		return 0, err
   482  	}
   483  	return resp.LogResponse.Lsn, nil
   484  }
   485  
   486  func (c *client) doRead(ctx context.Context,
   487  	firstLsn Lsn, maxSize uint64) ([]pb.LogRecord, Lsn, error) {
   488  	resp, recs, err := c.request(ctx, pb.READ, nil, firstLsn, maxSize)
   489  	if err != nil {
   490  		return nil, 0, err
   491  	}
   492  	return recs, resp.LogResponse.LastLsn, nil
   493  }
   494  
   495  func (c *client) doTruncate(ctx context.Context, lsn Lsn) error {
   496  	_, _, err := c.request(ctx, pb.TRUNCATE, nil, lsn, 0)
   497  	return err
   498  }
   499  
   500  func (c *client) doGetTruncatedLsn(ctx context.Context) (Lsn, error) {
   501  	resp, _, err := c.request(ctx, pb.GET_TRUNCATE, nil, 0, 0)
   502  	if err != nil {
   503  		return 0, err
   504  	}
   505  	return resp.LogResponse.Lsn, nil
   506  }
   507  
   508  func getRPCClient(
   509  	ctx context.Context,
   510  	target string,
   511  	pool *sync.Pool,
   512  	maxMessageSize int,
   513  	enableCompress bool,
   514  	tag ...string) (morpc.RPCClient, error) {
   515  	mf := func() morpc.Message {
   516  		return pool.Get().(*RPCResponse)
   517  	}
   518  
   519  	// construct morpc.BackendOption
   520  	backendOpts := []morpc.BackendOption{
   521  		morpc.WithBackendConnectTimeout(time.Second),
   522  		morpc.WithBackendHasPayloadResponse(),
   523  		morpc.WithBackendLogger(logutil.GetGlobalLogger().Named("hakeeper-client-backend")),
   524  	}
   525  	backendOpts = append(backendOpts, GetBackendOptions(ctx)...)
   526  
   527  	// construct morpc.ClientOption
   528  	clientOpts := []morpc.ClientOption{
   529  		morpc.WithClientInitBackends([]string{target}, []int{1}),
   530  		morpc.WithClientMaxBackendPerHost(1),
   531  		morpc.WithClientTag(fmt.Sprintf("hakeeper-client(%s)", tag)),
   532  		morpc.WithClientLogger(logutil.GetGlobalLogger()),
   533  	}
   534  	clientOpts = append(clientOpts, GetClientOptions(ctx)...)
   535  
   536  	var codecOpts []morpc.CodecOption
   537  	codecOpts = append(codecOpts,
   538  		morpc.WithCodecPayloadCopyBufferSize(defaultWriteSocketSize),
   539  		morpc.WithCodecEnableChecksum(),
   540  		morpc.WithCodecMaxBodySize(maxMessageSize))
   541  	if enableCompress {
   542  		mp, err := mpool.NewMPool("log_rpc_client", 0, mpool.NoFixed)
   543  		if err != nil {
   544  			return nil, err
   545  		}
   546  		codecOpts = append(codecOpts, morpc.WithCodecEnableCompress(mp))
   547  	}
   548  
   549  	// we set connection timeout to a constant value so if ctx's deadline is much
   550  	// larger, then we can ensure that all specified potential nodes have a chance
   551  	// to be attempted
   552  	codec := morpc.NewMessageCodec(mf, codecOpts...)
   553  	bf := morpc.NewGoettyBasedBackendFactory(codec, backendOpts...)
   554  	return morpc.NewClient(bf, clientOpts...)
   555  }