github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/service.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 /* 16 Package logservice implement MO's LogService component. 17 */ 18 package logservice 19 20 import ( 21 "context" 22 "sync" 23 "sync/atomic" 24 25 "go.uber.org/zap" 26 27 "github.com/fagongzi/goetty/v2" 28 "github.com/lni/dragonboat/v4" 29 30 "github.com/matrixorigin/matrixone/pkg/common/morpc" 31 "github.com/matrixorigin/matrixone/pkg/common/mpool" 32 "github.com/matrixorigin/matrixone/pkg/common/runtime" 33 "github.com/matrixorigin/matrixone/pkg/common/stopper" 34 "github.com/matrixorigin/matrixone/pkg/fileservice" 35 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 36 "github.com/matrixorigin/matrixone/pkg/taskservice" 37 "github.com/matrixorigin/matrixone/pkg/util/trace" 38 ) 39 40 const ( 41 LogServiceRPCName = "logservice-rpc" 42 ) 43 44 type Lsn = uint64 45 46 type LogRecord = pb.LogRecord 47 48 // TODO: move this to a better place 49 func firstError(err1 error, err2 error) error { 50 if err1 != nil { 51 return err1 52 } 53 return err2 54 } 55 56 // Service is the top layer component of a log service node. It manages the 57 // underlying log store which in turn manages all log shards including the 58 // HAKeeper shard. The Log Service component communicates with LogService 59 // clients owned by DN nodes and the HAKeeper service via network, it can 60 // be considered as the interface layer of the LogService. 61 type Service struct { 62 cfg Config 63 runtime runtime.Runtime 64 store *store 65 server morpc.RPCServer 66 pool *sync.Pool 67 respPool *sync.Pool 68 stopper *stopper.Stopper 69 haClient LogHAKeeperClient 70 fileService fileservice.FileService 71 72 options struct { 73 // morpc client would filter remote backend via this 74 backendFilter func(msg morpc.Message, backendAddr string) bool 75 } 76 77 task struct { 78 sync.RWMutex 79 created bool 80 holder taskservice.TaskServiceHolder 81 storageFactory taskservice.TaskStorageFactory 82 } 83 } 84 85 func NewService( 86 cfg Config, 87 fileService fileservice.FileService, 88 opts ...Option, 89 ) (*Service, error) { 90 cfg.Fill() 91 if err := cfg.Validate(); err != nil { 92 return nil, err 93 } 94 95 service := &Service{ 96 cfg: cfg, 97 stopper: stopper.NewStopper("log-service"), 98 fileService: fileService, 99 } 100 for _, opt := range opts { 101 opt(service) 102 } 103 if service.runtime == nil { 104 service.runtime = runtime.DefaultRuntime() 105 } 106 store, err := newLogStore(cfg, service.getTaskService, service.runtime) 107 if err != nil { 108 service.runtime.Logger().Error("failed to create log store", zap.Error(err)) 109 return nil, err 110 } 111 if err := store.loadMetadata(); err != nil { 112 return nil, err 113 } 114 if err := store.startReplicas(); err != nil { 115 return nil, err 116 } 117 pool := &sync.Pool{} 118 pool.New = func() interface{} { 119 return &RPCRequest{pool: pool} 120 } 121 respPool := &sync.Pool{} 122 respPool.New = func() interface{} { 123 return &RPCResponse{pool: respPool} 124 } 125 mf := func() morpc.Message { 126 return pool.Get().(*RPCRequest) 127 } 128 129 var codecOpts []morpc.CodecOption 130 codecOpts = append(codecOpts, morpc.WithCodecPayloadCopyBufferSize(16*1024), 131 morpc.WithCodecEnableChecksum(), 132 morpc.WithCodecMaxBodySize(int(cfg.RPC.MaxMessageSize))) 133 if cfg.RPC.EnableCompress { 134 mp, err := mpool.NewMPool("log_rpc_server", 0, mpool.NoFixed) 135 if err != nil { 136 return nil, err 137 } 138 codecOpts = append(codecOpts, morpc.WithCodecEnableCompress(mp)) 139 } 140 141 // TODO: check and fix all these magic numbers 142 codec := morpc.NewMessageCodec(mf, codecOpts...) 143 server, err := morpc.NewRPCServer(LogServiceRPCName, cfg.ServiceListenAddress, codec, 144 morpc.WithServerGoettyOptions(goetty.WithSessionReleaseMsgFunc(func(i interface{}) { 145 msg := i.(morpc.RPCMessage) 146 if !msg.InternalMessage() { 147 respPool.Put(msg.Message) 148 } 149 })), 150 morpc.WithServerLogger(service.runtime.Logger().RawLogger()), 151 ) 152 if err != nil { 153 return nil, err 154 } 155 156 service.store = store 157 service.server = server 158 service.pool = pool 159 service.respPool = respPool 160 161 server.RegisterRequestHandler(service.handleRPCRequest) 162 // TODO: before making the service available to the outside world, restore all 163 // replicas already known to the local store 164 if err := server.Start(); err != nil { 165 service.runtime.SubLogger(runtime.SystemInit).Error("failed to start the server", zap.Error(err)) 166 if err := store.close(); err != nil { 167 service.runtime.SubLogger(runtime.SystemInit).Error("failed to close the store", zap.Error(err)) 168 } 169 return nil, err 170 } 171 // start the heartbeat worker 172 if !cfg.DisableWorkers { 173 if err := service.stopper.RunNamedTask("log-heartbeat-worker", func(ctx context.Context) { 174 service.runtime.SubLogger(runtime.SystemInit).Info("logservice heartbeat worker started") 175 176 // transfer morpc options via context 177 ctx = SetBackendOptions(ctx, service.getBackendOptions()...) 178 ctx = SetClientOptions(ctx, service.getClientOptions()...) 179 service.heartbeatWorker(ctx) 180 }); err != nil { 181 return nil, err 182 } 183 } 184 service.initTaskHolder() 185 return service, nil 186 } 187 188 func (s *Service) Start() error { 189 return nil 190 } 191 192 func (s *Service) Close() (err error) { 193 s.stopper.Stop() 194 if s.haClient != nil { 195 err = firstError(err, s.haClient.Close()) 196 } 197 err = firstError(err, s.server.Close()) 198 if s.store != nil { 199 err = firstError(err, s.store.close()) 200 } 201 s.task.RLock() 202 ts := s.task.holder 203 s.task.RUnlock() 204 if ts != nil { 205 err = firstError(err, ts.Close()) 206 } 207 return err 208 } 209 210 func (s *Service) ID() string { 211 return s.store.id() 212 } 213 214 func (s *Service) handleRPCRequest(ctx context.Context, req morpc.Message, 215 seq uint64, cs morpc.ClientSession) error { 216 ctx, span := trace.Debug(ctx, "Service.handleRPCRequest") 217 defer span.End() 218 rr, ok := req.(*RPCRequest) 219 if !ok { 220 panic("unexpected message type") 221 } 222 defer rr.Release() 223 resp, records := s.handle(ctx, rr.Request, rr.GetPayloadField()) 224 var recs []byte 225 if len(records.Records) > 0 { 226 recs = MustMarshal(&records) 227 } 228 resp.RequestID = rr.RequestID 229 response := s.respPool.Get().(*RPCResponse) 230 response.Response = resp 231 response.payload = recs 232 return cs.Write(ctx, response) 233 } 234 235 func (s *Service) handle(ctx context.Context, req pb.Request, 236 payload []byte) (pb.Response, pb.LogRecordResponse) { 237 ctx, span := trace.Debug(ctx, "Service.handle."+req.Method.String()) 238 defer span.End() 239 switch req.Method { 240 case pb.TSO_UPDATE: 241 return s.handleTsoUpdate(ctx, req), pb.LogRecordResponse{} 242 case pb.APPEND: 243 return s.handleAppend(ctx, req, payload), pb.LogRecordResponse{} 244 case pb.READ: 245 return s.handleRead(ctx, req) 246 case pb.TRUNCATE: 247 return s.handleTruncate(ctx, req), pb.LogRecordResponse{} 248 case pb.GET_TRUNCATE: 249 return s.handleGetTruncatedIndex(ctx, req), pb.LogRecordResponse{} 250 case pb.CONNECT: 251 return s.handleConnect(ctx, req), pb.LogRecordResponse{} 252 case pb.CONNECT_RO: 253 return s.handleConnectRO(ctx, req), pb.LogRecordResponse{} 254 case pb.LOG_HEARTBEAT: 255 return s.handleLogHeartbeat(ctx, req), pb.LogRecordResponse{} 256 case pb.CN_HEARTBEAT: 257 return s.handleCNHeartbeat(ctx, req), pb.LogRecordResponse{} 258 case pb.CN_ALLOCATE_ID: 259 return s.handleCNAllocateID(ctx, req), pb.LogRecordResponse{} 260 case pb.DN_HEARTBEAT: 261 return s.handleDNHeartbeat(ctx, req), pb.LogRecordResponse{} 262 case pb.CHECK_HAKEEPER: 263 return s.handleCheckHAKeeper(ctx, req), pb.LogRecordResponse{} 264 case pb.GET_CLUSTER_DETAILS: 265 return s.handleGetClusterDetails(ctx, req), pb.LogRecordResponse{} 266 case pb.GET_CLUSTER_STATE: 267 return s.handleGetCheckerState(ctx, req), pb.LogRecordResponse{} 268 case pb.GET_SHARD_INFO: 269 return s.handleGetShardInfo(ctx, req), pb.LogRecordResponse{} 270 default: 271 panic("unknown log service method type") 272 } 273 } 274 275 func getResponse(req pb.Request) pb.Response { 276 return pb.Response{Method: req.Method} 277 } 278 279 func (s *Service) handleGetShardInfo(ctx context.Context, req pb.Request) pb.Response { 280 resp := getResponse(req) 281 if result, ok := s.getShardInfo(req.LogRequest.ShardID); !ok { 282 resp.ErrorCode, resp.ErrorMessage = toErrorCode(dragonboat.ErrShardNotFound) 283 } else { 284 resp.ShardInfo = &result 285 } 286 return resp 287 } 288 289 func (s *Service) handleGetClusterDetails(ctx context.Context, req pb.Request) pb.Response { 290 resp := getResponse(req) 291 if v, err := s.store.getClusterDetails(ctx); err != nil { 292 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 293 } else { 294 resp.ClusterDetails = &v 295 } 296 return resp 297 } 298 299 func (s *Service) handleGetCheckerState(ctx context.Context, req pb.Request) pb.Response { 300 resp := getResponse(req) 301 if v, err := s.store.getCheckerState(); err != nil { 302 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 303 } else { 304 resp.CheckerState = v 305 } 306 return resp 307 } 308 309 func (s *Service) handleTsoUpdate(ctx context.Context, req pb.Request) pb.Response { 310 r := req.TsoRequest 311 resp := getResponse(req) 312 if v, err := s.store.tsoUpdate(ctx, r.Count); err != nil { 313 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 314 } else { 315 resp.TsoResponse = &pb.TsoResponse{Value: v} 316 } 317 return resp 318 } 319 320 func (s *Service) handleConnect(ctx context.Context, req pb.Request) pb.Response { 321 r := req.LogRequest 322 resp := getResponse(req) 323 if err := s.store.getOrExtendDNLease(ctx, r.ShardID, r.DNID); err != nil { 324 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 325 } 326 return resp 327 } 328 329 func (s *Service) handleConnectRO(ctx context.Context, req pb.Request) pb.Response { 330 r := req.LogRequest 331 resp := getResponse(req) 332 // we only check whether the specified shard is available 333 if _, err := s.store.getTruncatedLsn(ctx, r.ShardID); err != nil { 334 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 335 } 336 return resp 337 } 338 339 func (s *Service) handleAppend(ctx context.Context, req pb.Request, payload []byte) pb.Response { 340 r := req.LogRequest 341 resp := getResponse(req) 342 lsn, err := s.store.append(ctx, r.ShardID, payload) 343 if err != nil { 344 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 345 } else { 346 resp.LogResponse.Lsn = lsn 347 } 348 return resp 349 } 350 351 func (s *Service) handleRead(ctx context.Context, req pb.Request) (pb.Response, pb.LogRecordResponse) { 352 r := req.LogRequest 353 resp := getResponse(req) 354 records, lsn, err := s.store.queryLog(ctx, r.ShardID, r.Lsn, r.MaxSize) 355 if err != nil { 356 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 357 } else { 358 resp.LogResponse.LastLsn = lsn 359 } 360 return resp, pb.LogRecordResponse{Records: records} 361 } 362 363 func (s *Service) handleTruncate(ctx context.Context, req pb.Request) pb.Response { 364 r := req.LogRequest 365 resp := getResponse(req) 366 if err := s.store.truncateLog(ctx, r.ShardID, r.Lsn); err != nil { 367 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 368 } 369 return resp 370 } 371 372 func (s *Service) handleGetTruncatedIndex(ctx context.Context, req pb.Request) pb.Response { 373 r := req.LogRequest 374 resp := getResponse(req) 375 lsn, err := s.store.getTruncatedLsn(ctx, r.ShardID) 376 if err != nil { 377 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 378 } else { 379 resp.LogResponse.Lsn = lsn 380 } 381 return resp 382 } 383 384 // TODO: add tests to see what happens when request is sent to non hakeeper stores 385 func (s *Service) handleLogHeartbeat(ctx context.Context, req pb.Request) pb.Response { 386 hb := req.LogHeartbeat 387 resp := getResponse(req) 388 if cb, err := s.store.addLogStoreHeartbeat(ctx, *hb); err != nil { 389 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 390 return resp 391 } else { 392 resp.CommandBatch = &cb 393 } 394 395 return resp 396 } 397 398 func (s *Service) handleCNHeartbeat(ctx context.Context, req pb.Request) pb.Response { 399 hb := req.CNHeartbeat 400 resp := getResponse(req) 401 if cb, err := s.store.addCNStoreHeartbeat(ctx, *hb); err != nil { 402 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 403 return resp 404 } else { 405 resp.CommandBatch = &cb 406 } 407 408 return resp 409 } 410 411 func (s *Service) handleCNAllocateID(ctx context.Context, req pb.Request) pb.Response { 412 resp := getResponse(req) 413 firstID, err := s.store.cnAllocateID(ctx, *req.CNAllocateID) 414 if err != nil { 415 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 416 return resp 417 } 418 resp.AllocateID = &pb.AllocateIDResponse{FirstID: firstID} 419 return resp 420 } 421 422 func (s *Service) handleDNHeartbeat(ctx context.Context, req pb.Request) pb.Response { 423 hb := req.DNHeartbeat 424 resp := getResponse(req) 425 if cb, err := s.store.addDNStoreHeartbeat(ctx, *hb); err != nil { 426 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 427 return resp 428 } else { 429 resp.CommandBatch = &cb 430 } 431 432 return resp 433 } 434 435 func (s *Service) handleCheckHAKeeper(ctx context.Context, req pb.Request) pb.Response { 436 resp := getResponse(req) 437 if atomic.LoadUint64(&s.store.haKeeperReplicaID) != 0 { 438 resp.IsHAKeeper = true 439 } 440 return resp 441 } 442 443 func (s *Service) getBackendOptions() []morpc.BackendOption { 444 return []morpc.BackendOption{ 445 morpc.WithBackendFilter(func(msg morpc.Message, backendAddr string) bool { 446 return s.options.backendFilter == nil || 447 s.options.backendFilter(msg.(*RPCRequest), backendAddr) 448 }), 449 } 450 } 451 452 // NB: leave an empty method for future extension. 453 func (s *Service) getClientOptions() []morpc.ClientOption { 454 return []morpc.ClientOption{ 455 morpc.WithClientTag("log-heartbeat"), 456 } 457 }