github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/service.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  /*
    16  Package logservice implement MO's LogService component.
    17  */
    18  package logservice
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"sync"
    24  	"sync/atomic"
    25  	"time"
    26  
    27  	"github.com/fagongzi/goetty/v2"
    28  	"github.com/lni/dragonboat/v4"
    29  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    30  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    31  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    32  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    33  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    34  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    35  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    36  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    37  	"github.com/matrixorigin/matrixone/pkg/util"
    38  	v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2"
    39  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    40  	"go.uber.org/zap"
    41  )
    42  
    43  const (
    44  	LogServiceRPCName = "logservice-server"
    45  )
    46  
    47  type Lsn = uint64
    48  
    49  type LogRecord = pb.LogRecord
    50  
    51  // TODO: move this to a better place
    52  func firstError(err1 error, err2 error) error {
    53  	if err1 != nil {
    54  		return err1
    55  	}
    56  	return err2
    57  }
    58  
    59  // Service is the top layer component of a log service node. It manages the
    60  // underlying log store which in turn manages all log shards including the
    61  // HAKeeper shard. The Log Service component communicates with LogService
    62  // clients owned by TN nodes and the HAKeeper service via network, it can
    63  // be considered as the interface layer of the LogService.
    64  type Service struct {
    65  	cfg         Config
    66  	runtime     runtime.Runtime
    67  	store       *store
    68  	server      morpc.RPCServer
    69  	pool        *sync.Pool
    70  	respPool    *sync.Pool
    71  	stopper     *stopper.Stopper
    72  	haClient    LogHAKeeperClient
    73  	fileService fileservice.FileService
    74  	shutdownC   chan struct{}
    75  
    76  	options struct {
    77  		// morpc client would filter remote backend via this
    78  		backendFilter func(msg morpc.Message, backendAddr string) bool
    79  	}
    80  
    81  	task struct {
    82  		sync.RWMutex
    83  		created        bool
    84  		holder         taskservice.TaskServiceHolder
    85  		storageFactory taskservice.TaskStorageFactory
    86  	}
    87  
    88  	config *util.ConfigData
    89  }
    90  
    91  func NewService(
    92  	cfg Config,
    93  	fileService fileservice.FileService,
    94  	shutdownC chan struct{},
    95  	opts ...Option,
    96  ) (*Service, error) {
    97  	cfg.Fill()
    98  	if err := cfg.Validate(); err != nil {
    99  		return nil, err
   100  	}
   101  	configKVMap, _ := dumpLogConfig(cfg)
   102  	opts = append(opts, WithConfigData(configKVMap))
   103  
   104  	service := &Service{
   105  		cfg:         cfg,
   106  		stopper:     stopper.NewStopper("log-service"),
   107  		fileService: fileService,
   108  		shutdownC:   shutdownC,
   109  	}
   110  	for _, opt := range opts {
   111  		opt(service)
   112  	}
   113  	if service.runtime == nil {
   114  		service.runtime = runtime.DefaultRuntime()
   115  	}
   116  	store, err := newLogStore(cfg, service.getTaskService, service.runtime)
   117  	if err != nil {
   118  		service.runtime.Logger().Error("failed to create log store", zap.Error(err))
   119  		return nil, err
   120  	}
   121  	if err := store.loadMetadata(); err != nil {
   122  		return nil, err
   123  	}
   124  	if err := store.startReplicas(); err != nil {
   125  		return nil, err
   126  	}
   127  	pool := &sync.Pool{}
   128  	pool.New = func() interface{} {
   129  		return &RPCRequest{pool: pool}
   130  	}
   131  	respPool := &sync.Pool{}
   132  	respPool.New = func() interface{} {
   133  		return &RPCResponse{pool: respPool}
   134  	}
   135  	mf := func() morpc.Message {
   136  		return pool.Get().(*RPCRequest)
   137  	}
   138  
   139  	var codecOpts []morpc.CodecOption
   140  	codecOpts = append(codecOpts, morpc.WithCodecPayloadCopyBufferSize(16*1024),
   141  		morpc.WithCodecEnableChecksum(),
   142  		morpc.WithCodecMaxBodySize(int(cfg.RPC.MaxMessageSize)))
   143  	if cfg.RPC.EnableCompress {
   144  		mp, err := mpool.NewMPool("log_rpc_server", 0, mpool.NoFixed)
   145  		if err != nil {
   146  			return nil, err
   147  		}
   148  		codecOpts = append(codecOpts, morpc.WithCodecEnableCompress(mp))
   149  	}
   150  
   151  	// TODO: check and fix all these magic numbers
   152  	codec := morpc.NewMessageCodec(mf, codecOpts...)
   153  	server, err := morpc.NewRPCServer(LogServiceRPCName, cfg.LogServiceListenAddr(), codec,
   154  		morpc.WithServerGoettyOptions(goetty.WithSessionReleaseMsgFunc(func(i interface{}) {
   155  			msg := i.(morpc.RPCMessage)
   156  			if !msg.InternalMessage() {
   157  				respPool.Put(msg.Message)
   158  			}
   159  		})),
   160  		morpc.WithServerLogger(service.runtime.Logger().RawLogger()),
   161  	)
   162  	if err != nil {
   163  		return nil, err
   164  	}
   165  
   166  	service.store = store
   167  	service.server = server
   168  	service.pool = pool
   169  	service.respPool = respPool
   170  
   171  	server.RegisterRequestHandler(service.handleRPCRequest)
   172  	// TODO: before making the service available to the outside world, restore all
   173  	// replicas already known to the local store
   174  	if err := server.Start(); err != nil {
   175  		service.runtime.SubLogger(runtime.SystemInit).Error("failed to start the server", zap.Error(err))
   176  		if err := store.close(); err != nil {
   177  			service.runtime.SubLogger(runtime.SystemInit).Error("failed to close the store", zap.Error(err))
   178  		}
   179  		return nil, err
   180  	}
   181  	// start the heartbeat worker
   182  	if !cfg.DisableWorkers {
   183  		if err := service.stopper.RunNamedTask("log-heartbeat-worker", func(ctx context.Context) {
   184  			service.runtime.SubLogger(runtime.SystemInit).Info("logservice heartbeat worker started")
   185  
   186  			// transfer morpc options via context
   187  			ctx = SetBackendOptions(ctx, service.getBackendOptions()...)
   188  			ctx = SetClientOptions(ctx, service.getClientOptions()...)
   189  			service.heartbeatWorker(ctx)
   190  		}); err != nil {
   191  			return nil, err
   192  		}
   193  	}
   194  	service.initTaskHolder()
   195  	service.initSqlWriterFactory()
   196  	return service, nil
   197  }
   198  
   199  func (s *Service) Start() error {
   200  	return nil
   201  }
   202  
   203  func (s *Service) Close() (err error) {
   204  	s.stopper.Stop()
   205  	if s.haClient != nil {
   206  		err = firstError(err, s.haClient.Close())
   207  	}
   208  	err = firstError(err, s.server.Close())
   209  	if s.store != nil {
   210  		err = firstError(err, s.store.close())
   211  	}
   212  	s.task.RLock()
   213  	ts := s.task.holder
   214  	s.task.RUnlock()
   215  	if ts != nil {
   216  		err = firstError(err, ts.Close())
   217  	}
   218  	return err
   219  }
   220  
   221  func (s *Service) ID() string {
   222  	return s.store.id()
   223  }
   224  
   225  func (s *Service) handleRPCRequest(
   226  	ctx context.Context,
   227  	msg morpc.RPCMessage,
   228  	seq uint64,
   229  	cs morpc.ClientSession) error {
   230  	ctx, span := trace.Debug(ctx, "Service.handleRPCRequest")
   231  	defer span.End()
   232  
   233  	req := msg.Message
   234  	rr, ok := req.(*RPCRequest)
   235  	if !ok {
   236  		panic("unexpected message type")
   237  	}
   238  	defer rr.Release()
   239  	resp, records := s.handle(ctx, rr.Request, rr.GetPayloadField())
   240  	var recs []byte
   241  	if len(records.Records) > 0 {
   242  		recs = MustMarshal(&records)
   243  	}
   244  	resp.RequestID = rr.RequestID
   245  	response := s.respPool.Get().(*RPCResponse)
   246  	response.Response = resp
   247  	response.payload = recs
   248  	return cs.Write(ctx, response)
   249  }
   250  
   251  func (s *Service) handle(ctx context.Context, req pb.Request,
   252  	payload []byte) (pb.Response, pb.LogRecordResponse) {
   253  	ctx, span := trace.Debug(ctx, "Service.handle."+req.Method.String())
   254  	defer span.End()
   255  	switch req.Method {
   256  	case pb.TSO_UPDATE:
   257  		return s.handleTsoUpdate(ctx, req), pb.LogRecordResponse{}
   258  	case pb.APPEND:
   259  		return s.handleAppend(ctx, req, payload), pb.LogRecordResponse{}
   260  	case pb.READ:
   261  		return s.handleRead(ctx, req)
   262  	case pb.TRUNCATE:
   263  		return s.handleTruncate(ctx, req), pb.LogRecordResponse{}
   264  	case pb.GET_TRUNCATE:
   265  		return s.handleGetTruncatedIndex(ctx, req), pb.LogRecordResponse{}
   266  	case pb.CONNECT:
   267  		return s.handleConnect(ctx, req), pb.LogRecordResponse{}
   268  	case pb.CONNECT_RO:
   269  		return s.handleConnectRO(ctx, req), pb.LogRecordResponse{}
   270  	case pb.LOG_HEARTBEAT:
   271  		return s.handleLogHeartbeat(ctx, req), pb.LogRecordResponse{}
   272  	case pb.CN_HEARTBEAT:
   273  		return s.handleCNHeartbeat(ctx, req), pb.LogRecordResponse{}
   274  	case pb.CN_ALLOCATE_ID:
   275  		return s.handleCNAllocateID(ctx, req), pb.LogRecordResponse{}
   276  	case pb.TN_HEARTBEAT:
   277  		return s.handleTNHeartbeat(ctx, req), pb.LogRecordResponse{}
   278  	case pb.CHECK_HAKEEPER:
   279  		return s.handleCheckHAKeeper(ctx, req), pb.LogRecordResponse{}
   280  	case pb.GET_CLUSTER_DETAILS:
   281  		return s.handleGetClusterDetails(ctx, req), pb.LogRecordResponse{}
   282  	case pb.GET_CLUSTER_STATE:
   283  		return s.handleGetCheckerState(ctx, req), pb.LogRecordResponse{}
   284  	case pb.GET_SHARD_INFO:
   285  		return s.handleGetShardInfo(ctx, req), pb.LogRecordResponse{}
   286  	case pb.UPDATE_CN_LABEL:
   287  		return s.handleUpdateCNLabel(ctx, req), pb.LogRecordResponse{}
   288  	case pb.UPDATE_CN_WORK_STATE:
   289  		return s.handleUpdateCNWorkState(ctx, req), pb.LogRecordResponse{}
   290  	case pb.PATCH_CN_STORE:
   291  		return s.handlePatchCNStore(ctx, req), pb.LogRecordResponse{}
   292  	case pb.DELETE_CN_STORE:
   293  		return s.handleDeleteCNStore(ctx, req), pb.LogRecordResponse{}
   294  	case pb.PROXY_HEARTBEAT:
   295  		return s.handleProxyHeartbeat(ctx, req), pb.LogRecordResponse{}
   296  	default:
   297  		resp := getResponse(req)
   298  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(
   299  			moerr.NewNotSupported(ctx,
   300  				fmt.Sprintf("logservice method type %d", req.Method)))
   301  		return resp, pb.LogRecordResponse{}
   302  	}
   303  }
   304  
   305  func getResponse(req pb.Request) pb.Response {
   306  	return pb.Response{Method: req.Method}
   307  }
   308  
   309  func (s *Service) handleGetShardInfo(ctx context.Context, req pb.Request) pb.Response {
   310  	resp := getResponse(req)
   311  	if result, ok := s.getShardInfo(req.LogRequest.ShardID); !ok {
   312  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(dragonboat.ErrShardNotFound)
   313  	} else {
   314  		resp.ShardInfo = &result
   315  	}
   316  	return resp
   317  }
   318  
   319  func (s *Service) handleGetClusterDetails(ctx context.Context, req pb.Request) pb.Response {
   320  	resp := getResponse(req)
   321  	if v, err := s.store.getClusterDetails(ctx); err != nil {
   322  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   323  	} else {
   324  		resp.ClusterDetails = &v
   325  	}
   326  	return resp
   327  }
   328  
   329  func (s *Service) handleGetCheckerState(ctx context.Context, req pb.Request) pb.Response {
   330  	resp := getResponse(req)
   331  	if v, err := s.store.getCheckerState(); err != nil {
   332  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   333  	} else {
   334  		resp.CheckerState = v
   335  	}
   336  	return resp
   337  }
   338  
   339  func (s *Service) handleTsoUpdate(ctx context.Context, req pb.Request) pb.Response {
   340  	r := req.TsoRequest
   341  	resp := getResponse(req)
   342  	if v, err := s.store.tsoUpdate(ctx, r.Count); err != nil {
   343  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   344  	} else {
   345  		resp.TsoResponse = &pb.TsoResponse{Value: v}
   346  	}
   347  	return resp
   348  }
   349  
   350  func (s *Service) handleConnect(ctx context.Context, req pb.Request) pb.Response {
   351  	r := req.LogRequest
   352  	resp := getResponse(req)
   353  	if err := s.store.getOrExtendTNLease(ctx, r.ShardID, r.TNID); err != nil {
   354  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   355  	}
   356  	return resp
   357  }
   358  
   359  func (s *Service) handleConnectRO(ctx context.Context, req pb.Request) pb.Response {
   360  	r := req.LogRequest
   361  	resp := getResponse(req)
   362  	// we only check whether the specified shard is available
   363  	if _, err := s.store.getTruncatedLsn(ctx, r.ShardID); err != nil {
   364  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   365  	}
   366  	return resp
   367  }
   368  
   369  func (s *Service) handleAppend(ctx context.Context, req pb.Request, payload []byte) pb.Response {
   370  	r := req.LogRequest
   371  	resp := getResponse(req)
   372  	lsn, err := s.store.append(ctx, r.ShardID, payload)
   373  	if err != nil {
   374  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   375  	} else {
   376  		resp.LogResponse.Lsn = lsn
   377  	}
   378  	return resp
   379  }
   380  
   381  func (s *Service) handleRead(ctx context.Context, req pb.Request) (pb.Response, pb.LogRecordResponse) {
   382  	r := req.LogRequest
   383  	resp := getResponse(req)
   384  	records, lsn, err := s.store.queryLog(ctx, r.ShardID, r.Lsn, r.MaxSize)
   385  	if err != nil {
   386  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   387  	} else {
   388  		resp.LogResponse.LastLsn = lsn
   389  	}
   390  	return resp, pb.LogRecordResponse{Records: records}
   391  }
   392  
   393  func (s *Service) handleTruncate(ctx context.Context, req pb.Request) pb.Response {
   394  	r := req.LogRequest
   395  	resp := getResponse(req)
   396  	if err := s.store.truncateLog(ctx, r.ShardID, r.Lsn); err != nil {
   397  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   398  	}
   399  	return resp
   400  }
   401  
   402  func (s *Service) handleGetTruncatedIndex(ctx context.Context, req pb.Request) pb.Response {
   403  	r := req.LogRequest
   404  	resp := getResponse(req)
   405  	lsn, err := s.store.getTruncatedLsn(ctx, r.ShardID)
   406  	if err != nil {
   407  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   408  	} else {
   409  		resp.LogResponse.Lsn = lsn
   410  	}
   411  	return resp
   412  }
   413  
   414  // TODO: add tests to see what happens when request is sent to non hakeeper stores
   415  func (s *Service) handleLogHeartbeat(ctx context.Context, req pb.Request) pb.Response {
   416  	start := time.Now()
   417  	defer func() {
   418  		v2.LogHeartbeatRecvHistogram.Observe(time.Since(start).Seconds())
   419  	}()
   420  	hb := req.LogHeartbeat
   421  	resp := getResponse(req)
   422  	if cb, err := s.store.addLogStoreHeartbeat(ctx, *hb); err != nil {
   423  		v2.LogHeartbeatRecvFailureCounter.Inc()
   424  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   425  		return resp
   426  	} else {
   427  		resp.CommandBatch = &cb
   428  	}
   429  
   430  	return resp
   431  }
   432  
   433  func (s *Service) handleCNHeartbeat(ctx context.Context, req pb.Request) pb.Response {
   434  	start := time.Now()
   435  	defer func() {
   436  		v2.CNHeartbeatRecvHistogram.Observe(time.Since(start).Seconds())
   437  	}()
   438  	hb := req.CNHeartbeat
   439  	resp := getResponse(req)
   440  	if cb, err := s.store.addCNStoreHeartbeat(ctx, *hb); err != nil {
   441  		v2.CNHeartbeatRecvFailureCounter.Inc()
   442  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   443  		return resp
   444  	} else {
   445  		resp.CommandBatch = &cb
   446  	}
   447  
   448  	return resp
   449  }
   450  
   451  func (s *Service) handleCNAllocateID(ctx context.Context, req pb.Request) pb.Response {
   452  	resp := getResponse(req)
   453  	firstID, err := s.store.cnAllocateID(ctx, *req.CNAllocateID)
   454  	if err != nil {
   455  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   456  		return resp
   457  	}
   458  	resp.AllocateID = &pb.AllocateIDResponse{FirstID: firstID}
   459  	return resp
   460  }
   461  
   462  func (s *Service) handleTNHeartbeat(ctx context.Context, req pb.Request) pb.Response {
   463  	start := time.Now()
   464  	defer func() {
   465  		v2.TNHeartbeatRecvHistogram.Observe(time.Since(start).Seconds())
   466  	}()
   467  	hb := req.TNHeartbeat
   468  	resp := getResponse(req)
   469  	if cb, err := s.store.addTNStoreHeartbeat(ctx, *hb); err != nil {
   470  		v2.TNHeartbeatRecvFailureCounter.Inc()
   471  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   472  		return resp
   473  	} else {
   474  		resp.CommandBatch = &cb
   475  	}
   476  
   477  	return resp
   478  }
   479  
   480  func (s *Service) handleCheckHAKeeper(ctx context.Context, req pb.Request) pb.Response {
   481  	resp := getResponse(req)
   482  	if atomic.LoadUint64(&s.store.haKeeperReplicaID) != 0 {
   483  		resp.IsHAKeeper = true
   484  	}
   485  	return resp
   486  }
   487  
   488  func (s *Service) handleUpdateCNLabel(ctx context.Context, req pb.Request) pb.Response {
   489  	label := req.CNStoreLabel
   490  	resp := getResponse(req)
   491  	if err := s.store.updateCNLabel(ctx, *label); err != nil {
   492  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   493  		return resp
   494  	}
   495  	return resp
   496  }
   497  
   498  func (s *Service) handleUpdateCNWorkState(ctx context.Context, req pb.Request) pb.Response {
   499  	workState := req.CNWorkState
   500  	resp := getResponse(req)
   501  	if err := s.store.updateCNWorkState(ctx, *workState); err != nil {
   502  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   503  		return resp
   504  	}
   505  	return resp
   506  }
   507  
   508  func (s *Service) handlePatchCNStore(ctx context.Context, req pb.Request) pb.Response {
   509  	stateLabel := req.CNStateLabel
   510  	resp := getResponse(req)
   511  	if err := s.store.patchCNStore(ctx, *stateLabel); err != nil {
   512  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   513  		return resp
   514  	}
   515  	return resp
   516  }
   517  
   518  func (s *Service) handleDeleteCNStore(ctx context.Context, req pb.Request) pb.Response {
   519  	cnStore := req.DeleteCNStore
   520  	resp := getResponse(req)
   521  	if err := s.store.deleteCNStore(ctx, *cnStore); err != nil {
   522  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   523  		return resp
   524  	}
   525  	return resp
   526  }
   527  
   528  func (s *Service) handleProxyHeartbeat(ctx context.Context, req pb.Request) pb.Response {
   529  	resp := getResponse(req)
   530  	if cb, err := s.store.addProxyHeartbeat(ctx, *req.ProxyHeartbeat); err != nil {
   531  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   532  		return resp
   533  	} else {
   534  		resp.CommandBatch = &cb
   535  	}
   536  	return resp
   537  }
   538  
   539  func (s *Service) getBackendOptions() []morpc.BackendOption {
   540  	return []morpc.BackendOption{
   541  		morpc.WithBackendFilter(func(msg morpc.Message, backendAddr string) bool {
   542  			m, ok := msg.(*RPCRequest)
   543  			if !ok {
   544  				return true
   545  			}
   546  			return s.options.backendFilter == nil || s.options.backendFilter(m, backendAddr)
   547  		}),
   548  	}
   549  }
   550  
   551  // NB: leave an empty method for future extension.
   552  func (s *Service) getClientOptions() []morpc.ClientOption {
   553  	return []morpc.ClientOption{}
   554  }