github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/service.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  /*
    16  Package logservice implement MO's LogService component.
    17  */
    18  package logservice
    19  
    20  import (
    21  	"context"
    22  	"sync"
    23  	"sync/atomic"
    24  
    25  	"go.uber.org/zap"
    26  
    27  	"github.com/fagongzi/goetty/v2"
    28  	"github.com/lni/dragonboat/v4"
    29  
    30  	"github.com/matrixorigin/matrixone/pkg/common/morpc"
    31  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    32  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    33  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    34  	"github.com/matrixorigin/matrixone/pkg/fileservice"
    35  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    36  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    37  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    38  )
    39  
    40  const (
    41  	LogServiceRPCName = "logservice-rpc"
    42  )
    43  
    44  type Lsn = uint64
    45  
    46  type LogRecord = pb.LogRecord
    47  
    48  // TODO: move this to a better place
    49  func firstError(err1 error, err2 error) error {
    50  	if err1 != nil {
    51  		return err1
    52  	}
    53  	return err2
    54  }
    55  
    56  // Service is the top layer component of a log service node. It manages the
    57  // underlying log store which in turn manages all log shards including the
    58  // HAKeeper shard. The Log Service component communicates with LogService
    59  // clients owned by DN nodes and the HAKeeper service via network, it can
    60  // be considered as the interface layer of the LogService.
    61  type Service struct {
    62  	cfg         Config
    63  	runtime     runtime.Runtime
    64  	store       *store
    65  	server      morpc.RPCServer
    66  	pool        *sync.Pool
    67  	respPool    *sync.Pool
    68  	stopper     *stopper.Stopper
    69  	haClient    LogHAKeeperClient
    70  	fileService fileservice.FileService
    71  
    72  	options struct {
    73  		// morpc client would filter remote backend via this
    74  		backendFilter func(msg morpc.Message, backendAddr string) bool
    75  	}
    76  
    77  	task struct {
    78  		sync.RWMutex
    79  		created        bool
    80  		holder         taskservice.TaskServiceHolder
    81  		storageFactory taskservice.TaskStorageFactory
    82  	}
    83  }
    84  
    85  func NewService(
    86  	cfg Config,
    87  	fileService fileservice.FileService,
    88  	opts ...Option,
    89  ) (*Service, error) {
    90  	cfg.Fill()
    91  	if err := cfg.Validate(); err != nil {
    92  		return nil, err
    93  	}
    94  
    95  	service := &Service{
    96  		cfg:         cfg,
    97  		stopper:     stopper.NewStopper("log-service"),
    98  		fileService: fileService,
    99  	}
   100  	for _, opt := range opts {
   101  		opt(service)
   102  	}
   103  	if service.runtime == nil {
   104  		service.runtime = runtime.DefaultRuntime()
   105  	}
   106  	store, err := newLogStore(cfg, service.getTaskService, service.runtime)
   107  	if err != nil {
   108  		service.runtime.Logger().Error("failed to create log store", zap.Error(err))
   109  		return nil, err
   110  	}
   111  	if err := store.loadMetadata(); err != nil {
   112  		return nil, err
   113  	}
   114  	if err := store.startReplicas(); err != nil {
   115  		return nil, err
   116  	}
   117  	pool := &sync.Pool{}
   118  	pool.New = func() interface{} {
   119  		return &RPCRequest{pool: pool}
   120  	}
   121  	respPool := &sync.Pool{}
   122  	respPool.New = func() interface{} {
   123  		return &RPCResponse{pool: respPool}
   124  	}
   125  	mf := func() morpc.Message {
   126  		return pool.Get().(*RPCRequest)
   127  	}
   128  
   129  	var codecOpts []morpc.CodecOption
   130  	codecOpts = append(codecOpts, morpc.WithCodecPayloadCopyBufferSize(16*1024),
   131  		morpc.WithCodecEnableChecksum(),
   132  		morpc.WithCodecMaxBodySize(int(cfg.RPC.MaxMessageSize)))
   133  	if cfg.RPC.EnableCompress {
   134  		mp, err := mpool.NewMPool("log_rpc_server", 0, mpool.NoFixed)
   135  		if err != nil {
   136  			return nil, err
   137  		}
   138  		codecOpts = append(codecOpts, morpc.WithCodecEnableCompress(mp))
   139  	}
   140  
   141  	// TODO: check and fix all these magic numbers
   142  	codec := morpc.NewMessageCodec(mf, codecOpts...)
   143  	server, err := morpc.NewRPCServer(LogServiceRPCName, cfg.ServiceListenAddress, codec,
   144  		morpc.WithServerGoettyOptions(goetty.WithSessionReleaseMsgFunc(func(i interface{}) {
   145  			msg := i.(morpc.RPCMessage)
   146  			if !msg.InternalMessage() {
   147  				respPool.Put(msg.Message)
   148  			}
   149  		})),
   150  		morpc.WithServerLogger(service.runtime.Logger().RawLogger()),
   151  	)
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  
   156  	service.store = store
   157  	service.server = server
   158  	service.pool = pool
   159  	service.respPool = respPool
   160  
   161  	server.RegisterRequestHandler(service.handleRPCRequest)
   162  	// TODO: before making the service available to the outside world, restore all
   163  	// replicas already known to the local store
   164  	if err := server.Start(); err != nil {
   165  		service.runtime.SubLogger(runtime.SystemInit).Error("failed to start the server", zap.Error(err))
   166  		if err := store.close(); err != nil {
   167  			service.runtime.SubLogger(runtime.SystemInit).Error("failed to close the store", zap.Error(err))
   168  		}
   169  		return nil, err
   170  	}
   171  	// start the heartbeat worker
   172  	if !cfg.DisableWorkers {
   173  		if err := service.stopper.RunNamedTask("log-heartbeat-worker", func(ctx context.Context) {
   174  			service.runtime.SubLogger(runtime.SystemInit).Info("logservice heartbeat worker started")
   175  
   176  			// transfer morpc options via context
   177  			ctx = SetBackendOptions(ctx, service.getBackendOptions()...)
   178  			ctx = SetClientOptions(ctx, service.getClientOptions()...)
   179  			service.heartbeatWorker(ctx)
   180  		}); err != nil {
   181  			return nil, err
   182  		}
   183  	}
   184  	service.initTaskHolder()
   185  	return service, nil
   186  }
   187  
   188  func (s *Service) Start() error {
   189  	return nil
   190  }
   191  
   192  func (s *Service) Close() (err error) {
   193  	s.stopper.Stop()
   194  	if s.haClient != nil {
   195  		err = firstError(err, s.haClient.Close())
   196  	}
   197  	err = firstError(err, s.server.Close())
   198  	if s.store != nil {
   199  		err = firstError(err, s.store.close())
   200  	}
   201  	s.task.RLock()
   202  	ts := s.task.holder
   203  	s.task.RUnlock()
   204  	if ts != nil {
   205  		err = firstError(err, ts.Close())
   206  	}
   207  	return err
   208  }
   209  
   210  func (s *Service) ID() string {
   211  	return s.store.id()
   212  }
   213  
   214  func (s *Service) handleRPCRequest(ctx context.Context, req morpc.Message,
   215  	seq uint64, cs morpc.ClientSession) error {
   216  	ctx, span := trace.Debug(ctx, "Service.handleRPCRequest")
   217  	defer span.End()
   218  	rr, ok := req.(*RPCRequest)
   219  	if !ok {
   220  		panic("unexpected message type")
   221  	}
   222  	defer rr.Release()
   223  	resp, records := s.handle(ctx, rr.Request, rr.GetPayloadField())
   224  	var recs []byte
   225  	if len(records.Records) > 0 {
   226  		recs = MustMarshal(&records)
   227  	}
   228  	resp.RequestID = rr.RequestID
   229  	response := s.respPool.Get().(*RPCResponse)
   230  	response.Response = resp
   231  	response.payload = recs
   232  	return cs.Write(ctx, response)
   233  }
   234  
   235  func (s *Service) handle(ctx context.Context, req pb.Request,
   236  	payload []byte) (pb.Response, pb.LogRecordResponse) {
   237  	ctx, span := trace.Debug(ctx, "Service.handle."+req.Method.String())
   238  	defer span.End()
   239  	switch req.Method {
   240  	case pb.TSO_UPDATE:
   241  		return s.handleTsoUpdate(ctx, req), pb.LogRecordResponse{}
   242  	case pb.APPEND:
   243  		return s.handleAppend(ctx, req, payload), pb.LogRecordResponse{}
   244  	case pb.READ:
   245  		return s.handleRead(ctx, req)
   246  	case pb.TRUNCATE:
   247  		return s.handleTruncate(ctx, req), pb.LogRecordResponse{}
   248  	case pb.GET_TRUNCATE:
   249  		return s.handleGetTruncatedIndex(ctx, req), pb.LogRecordResponse{}
   250  	case pb.CONNECT:
   251  		return s.handleConnect(ctx, req), pb.LogRecordResponse{}
   252  	case pb.CONNECT_RO:
   253  		return s.handleConnectRO(ctx, req), pb.LogRecordResponse{}
   254  	case pb.LOG_HEARTBEAT:
   255  		return s.handleLogHeartbeat(ctx, req), pb.LogRecordResponse{}
   256  	case pb.CN_HEARTBEAT:
   257  		return s.handleCNHeartbeat(ctx, req), pb.LogRecordResponse{}
   258  	case pb.CN_ALLOCATE_ID:
   259  		return s.handleCNAllocateID(ctx, req), pb.LogRecordResponse{}
   260  	case pb.DN_HEARTBEAT:
   261  		return s.handleDNHeartbeat(ctx, req), pb.LogRecordResponse{}
   262  	case pb.CHECK_HAKEEPER:
   263  		return s.handleCheckHAKeeper(ctx, req), pb.LogRecordResponse{}
   264  	case pb.GET_CLUSTER_DETAILS:
   265  		return s.handleGetClusterDetails(ctx, req), pb.LogRecordResponse{}
   266  	case pb.GET_CLUSTER_STATE:
   267  		return s.handleGetCheckerState(ctx, req), pb.LogRecordResponse{}
   268  	case pb.GET_SHARD_INFO:
   269  		return s.handleGetShardInfo(ctx, req), pb.LogRecordResponse{}
   270  	default:
   271  		panic("unknown log service method type")
   272  	}
   273  }
   274  
   275  func getResponse(req pb.Request) pb.Response {
   276  	return pb.Response{Method: req.Method}
   277  }
   278  
   279  func (s *Service) handleGetShardInfo(ctx context.Context, req pb.Request) pb.Response {
   280  	resp := getResponse(req)
   281  	if result, ok := s.getShardInfo(req.LogRequest.ShardID); !ok {
   282  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(dragonboat.ErrShardNotFound)
   283  	} else {
   284  		resp.ShardInfo = &result
   285  	}
   286  	return resp
   287  }
   288  
   289  func (s *Service) handleGetClusterDetails(ctx context.Context, req pb.Request) pb.Response {
   290  	resp := getResponse(req)
   291  	if v, err := s.store.getClusterDetails(ctx); err != nil {
   292  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   293  	} else {
   294  		resp.ClusterDetails = &v
   295  	}
   296  	return resp
   297  }
   298  
   299  func (s *Service) handleGetCheckerState(ctx context.Context, req pb.Request) pb.Response {
   300  	resp := getResponse(req)
   301  	if v, err := s.store.getCheckerState(); err != nil {
   302  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   303  	} else {
   304  		resp.CheckerState = v
   305  	}
   306  	return resp
   307  }
   308  
   309  func (s *Service) handleTsoUpdate(ctx context.Context, req pb.Request) pb.Response {
   310  	r := req.TsoRequest
   311  	resp := getResponse(req)
   312  	if v, err := s.store.tsoUpdate(ctx, r.Count); err != nil {
   313  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   314  	} else {
   315  		resp.TsoResponse = &pb.TsoResponse{Value: v}
   316  	}
   317  	return resp
   318  }
   319  
   320  func (s *Service) handleConnect(ctx context.Context, req pb.Request) pb.Response {
   321  	r := req.LogRequest
   322  	resp := getResponse(req)
   323  	if err := s.store.getOrExtendDNLease(ctx, r.ShardID, r.DNID); err != nil {
   324  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   325  	}
   326  	return resp
   327  }
   328  
   329  func (s *Service) handleConnectRO(ctx context.Context, req pb.Request) pb.Response {
   330  	r := req.LogRequest
   331  	resp := getResponse(req)
   332  	// we only check whether the specified shard is available
   333  	if _, err := s.store.getTruncatedLsn(ctx, r.ShardID); err != nil {
   334  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   335  	}
   336  	return resp
   337  }
   338  
   339  func (s *Service) handleAppend(ctx context.Context, req pb.Request, payload []byte) pb.Response {
   340  	r := req.LogRequest
   341  	resp := getResponse(req)
   342  	lsn, err := s.store.append(ctx, r.ShardID, payload)
   343  	if err != nil {
   344  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   345  	} else {
   346  		resp.LogResponse.Lsn = lsn
   347  	}
   348  	return resp
   349  }
   350  
   351  func (s *Service) handleRead(ctx context.Context, req pb.Request) (pb.Response, pb.LogRecordResponse) {
   352  	r := req.LogRequest
   353  	resp := getResponse(req)
   354  	records, lsn, err := s.store.queryLog(ctx, r.ShardID, r.Lsn, r.MaxSize)
   355  	if err != nil {
   356  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   357  	} else {
   358  		resp.LogResponse.LastLsn = lsn
   359  	}
   360  	return resp, pb.LogRecordResponse{Records: records}
   361  }
   362  
   363  func (s *Service) handleTruncate(ctx context.Context, req pb.Request) pb.Response {
   364  	r := req.LogRequest
   365  	resp := getResponse(req)
   366  	if err := s.store.truncateLog(ctx, r.ShardID, r.Lsn); err != nil {
   367  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   368  	}
   369  	return resp
   370  }
   371  
   372  func (s *Service) handleGetTruncatedIndex(ctx context.Context, req pb.Request) pb.Response {
   373  	r := req.LogRequest
   374  	resp := getResponse(req)
   375  	lsn, err := s.store.getTruncatedLsn(ctx, r.ShardID)
   376  	if err != nil {
   377  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   378  	} else {
   379  		resp.LogResponse.Lsn = lsn
   380  	}
   381  	return resp
   382  }
   383  
   384  // TODO: add tests to see what happens when request is sent to non hakeeper stores
   385  func (s *Service) handleLogHeartbeat(ctx context.Context, req pb.Request) pb.Response {
   386  	hb := req.LogHeartbeat
   387  	resp := getResponse(req)
   388  	if cb, err := s.store.addLogStoreHeartbeat(ctx, *hb); err != nil {
   389  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   390  		return resp
   391  	} else {
   392  		resp.CommandBatch = &cb
   393  	}
   394  
   395  	return resp
   396  }
   397  
   398  func (s *Service) handleCNHeartbeat(ctx context.Context, req pb.Request) pb.Response {
   399  	hb := req.CNHeartbeat
   400  	resp := getResponse(req)
   401  	if cb, err := s.store.addCNStoreHeartbeat(ctx, *hb); err != nil {
   402  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   403  		return resp
   404  	} else {
   405  		resp.CommandBatch = &cb
   406  	}
   407  
   408  	return resp
   409  }
   410  
   411  func (s *Service) handleCNAllocateID(ctx context.Context, req pb.Request) pb.Response {
   412  	resp := getResponse(req)
   413  	firstID, err := s.store.cnAllocateID(ctx, *req.CNAllocateID)
   414  	if err != nil {
   415  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   416  		return resp
   417  	}
   418  	resp.AllocateID = &pb.AllocateIDResponse{FirstID: firstID}
   419  	return resp
   420  }
   421  
   422  func (s *Service) handleDNHeartbeat(ctx context.Context, req pb.Request) pb.Response {
   423  	hb := req.DNHeartbeat
   424  	resp := getResponse(req)
   425  	if cb, err := s.store.addDNStoreHeartbeat(ctx, *hb); err != nil {
   426  		resp.ErrorCode, resp.ErrorMessage = toErrorCode(err)
   427  		return resp
   428  	} else {
   429  		resp.CommandBatch = &cb
   430  	}
   431  
   432  	return resp
   433  }
   434  
   435  func (s *Service) handleCheckHAKeeper(ctx context.Context, req pb.Request) pb.Response {
   436  	resp := getResponse(req)
   437  	if atomic.LoadUint64(&s.store.haKeeperReplicaID) != 0 {
   438  		resp.IsHAKeeper = true
   439  	}
   440  	return resp
   441  }
   442  
   443  func (s *Service) getBackendOptions() []morpc.BackendOption {
   444  	return []morpc.BackendOption{
   445  		morpc.WithBackendFilter(func(msg morpc.Message, backendAddr string) bool {
   446  			return s.options.backendFilter == nil ||
   447  				s.options.backendFilter(msg.(*RPCRequest), backendAddr)
   448  		}),
   449  	}
   450  }
   451  
   452  // NB: leave an empty method for future extension.
   453  func (s *Service) getClientOptions() []morpc.ClientOption {
   454  	return []morpc.ClientOption{
   455  		morpc.WithClientTag("log-heartbeat"),
   456  	}
   457  }