github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/service.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 /* 16 Package logservice implement MO's LogService component. 17 */ 18 package logservice 19 20 import ( 21 "context" 22 "fmt" 23 "sync" 24 "sync/atomic" 25 "time" 26 27 "github.com/fagongzi/goetty/v2" 28 "github.com/lni/dragonboat/v4" 29 "github.com/matrixorigin/matrixone/pkg/common/moerr" 30 "github.com/matrixorigin/matrixone/pkg/common/morpc" 31 "github.com/matrixorigin/matrixone/pkg/common/mpool" 32 "github.com/matrixorigin/matrixone/pkg/common/runtime" 33 "github.com/matrixorigin/matrixone/pkg/common/stopper" 34 "github.com/matrixorigin/matrixone/pkg/fileservice" 35 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 36 "github.com/matrixorigin/matrixone/pkg/taskservice" 37 "github.com/matrixorigin/matrixone/pkg/util" 38 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 39 "github.com/matrixorigin/matrixone/pkg/util/trace" 40 "go.uber.org/zap" 41 ) 42 43 const ( 44 LogServiceRPCName = "logservice-server" 45 ) 46 47 type Lsn = uint64 48 49 type LogRecord = pb.LogRecord 50 51 // TODO: move this to a better place 52 func firstError(err1 error, err2 error) error { 53 if err1 != nil { 54 return err1 55 } 56 return err2 57 } 58 59 // Service is the top layer component of a log service node. It manages the 60 // underlying log store which in turn manages all log shards including the 61 // HAKeeper shard. The Log Service component communicates with LogService 62 // clients owned by TN nodes and the HAKeeper service via network, it can 63 // be considered as the interface layer of the LogService. 64 type Service struct { 65 cfg Config 66 runtime runtime.Runtime 67 store *store 68 server morpc.RPCServer 69 pool *sync.Pool 70 respPool *sync.Pool 71 stopper *stopper.Stopper 72 haClient LogHAKeeperClient 73 fileService fileservice.FileService 74 shutdownC chan struct{} 75 76 options struct { 77 // morpc client would filter remote backend via this 78 backendFilter func(msg morpc.Message, backendAddr string) bool 79 } 80 81 task struct { 82 sync.RWMutex 83 created bool 84 holder taskservice.TaskServiceHolder 85 storageFactory taskservice.TaskStorageFactory 86 } 87 88 config *util.ConfigData 89 } 90 91 func NewService( 92 cfg Config, 93 fileService fileservice.FileService, 94 shutdownC chan struct{}, 95 opts ...Option, 96 ) (*Service, error) { 97 cfg.Fill() 98 if err := cfg.Validate(); err != nil { 99 return nil, err 100 } 101 configKVMap, _ := dumpLogConfig(cfg) 102 opts = append(opts, WithConfigData(configKVMap)) 103 104 service := &Service{ 105 cfg: cfg, 106 stopper: stopper.NewStopper("log-service"), 107 fileService: fileService, 108 shutdownC: shutdownC, 109 } 110 for _, opt := range opts { 111 opt(service) 112 } 113 if service.runtime == nil { 114 service.runtime = runtime.DefaultRuntime() 115 } 116 store, err := newLogStore(cfg, service.getTaskService, service.runtime) 117 if err != nil { 118 service.runtime.Logger().Error("failed to create log store", zap.Error(err)) 119 return nil, err 120 } 121 if err := store.loadMetadata(); err != nil { 122 return nil, err 123 } 124 if err := store.startReplicas(); err != nil { 125 return nil, err 126 } 127 pool := &sync.Pool{} 128 pool.New = func() interface{} { 129 return &RPCRequest{pool: pool} 130 } 131 respPool := &sync.Pool{} 132 respPool.New = func() interface{} { 133 return &RPCResponse{pool: respPool} 134 } 135 mf := func() morpc.Message { 136 return pool.Get().(*RPCRequest) 137 } 138 139 var codecOpts []morpc.CodecOption 140 codecOpts = append(codecOpts, morpc.WithCodecPayloadCopyBufferSize(16*1024), 141 morpc.WithCodecEnableChecksum(), 142 morpc.WithCodecMaxBodySize(int(cfg.RPC.MaxMessageSize))) 143 if cfg.RPC.EnableCompress { 144 mp, err := mpool.NewMPool("log_rpc_server", 0, mpool.NoFixed) 145 if err != nil { 146 return nil, err 147 } 148 codecOpts = append(codecOpts, morpc.WithCodecEnableCompress(mp)) 149 } 150 151 // TODO: check and fix all these magic numbers 152 codec := morpc.NewMessageCodec(mf, codecOpts...) 153 server, err := morpc.NewRPCServer(LogServiceRPCName, cfg.LogServiceListenAddr(), codec, 154 morpc.WithServerGoettyOptions(goetty.WithSessionReleaseMsgFunc(func(i interface{}) { 155 msg := i.(morpc.RPCMessage) 156 if !msg.InternalMessage() { 157 respPool.Put(msg.Message) 158 } 159 })), 160 morpc.WithServerLogger(service.runtime.Logger().RawLogger()), 161 ) 162 if err != nil { 163 return nil, err 164 } 165 166 service.store = store 167 service.server = server 168 service.pool = pool 169 service.respPool = respPool 170 171 server.RegisterRequestHandler(service.handleRPCRequest) 172 // TODO: before making the service available to the outside world, restore all 173 // replicas already known to the local store 174 if err := server.Start(); err != nil { 175 service.runtime.SubLogger(runtime.SystemInit).Error("failed to start the server", zap.Error(err)) 176 if err := store.close(); err != nil { 177 service.runtime.SubLogger(runtime.SystemInit).Error("failed to close the store", zap.Error(err)) 178 } 179 return nil, err 180 } 181 // start the heartbeat worker 182 if !cfg.DisableWorkers { 183 if err := service.stopper.RunNamedTask("log-heartbeat-worker", func(ctx context.Context) { 184 service.runtime.SubLogger(runtime.SystemInit).Info("logservice heartbeat worker started") 185 186 // transfer morpc options via context 187 ctx = SetBackendOptions(ctx, service.getBackendOptions()...) 188 ctx = SetClientOptions(ctx, service.getClientOptions()...) 189 service.heartbeatWorker(ctx) 190 }); err != nil { 191 return nil, err 192 } 193 } 194 service.initTaskHolder() 195 service.initSqlWriterFactory() 196 return service, nil 197 } 198 199 func (s *Service) Start() error { 200 return nil 201 } 202 203 func (s *Service) Close() (err error) { 204 s.stopper.Stop() 205 if s.haClient != nil { 206 err = firstError(err, s.haClient.Close()) 207 } 208 err = firstError(err, s.server.Close()) 209 if s.store != nil { 210 err = firstError(err, s.store.close()) 211 } 212 s.task.RLock() 213 ts := s.task.holder 214 s.task.RUnlock() 215 if ts != nil { 216 err = firstError(err, ts.Close()) 217 } 218 return err 219 } 220 221 func (s *Service) ID() string { 222 return s.store.id() 223 } 224 225 func (s *Service) handleRPCRequest( 226 ctx context.Context, 227 msg morpc.RPCMessage, 228 seq uint64, 229 cs morpc.ClientSession) error { 230 ctx, span := trace.Debug(ctx, "Service.handleRPCRequest") 231 defer span.End() 232 233 req := msg.Message 234 rr, ok := req.(*RPCRequest) 235 if !ok { 236 panic("unexpected message type") 237 } 238 defer rr.Release() 239 resp, records := s.handle(ctx, rr.Request, rr.GetPayloadField()) 240 var recs []byte 241 if len(records.Records) > 0 { 242 recs = MustMarshal(&records) 243 } 244 resp.RequestID = rr.RequestID 245 response := s.respPool.Get().(*RPCResponse) 246 response.Response = resp 247 response.payload = recs 248 return cs.Write(ctx, response) 249 } 250 251 func (s *Service) handle(ctx context.Context, req pb.Request, 252 payload []byte) (pb.Response, pb.LogRecordResponse) { 253 ctx, span := trace.Debug(ctx, "Service.handle."+req.Method.String()) 254 defer span.End() 255 switch req.Method { 256 case pb.TSO_UPDATE: 257 return s.handleTsoUpdate(ctx, req), pb.LogRecordResponse{} 258 case pb.APPEND: 259 return s.handleAppend(ctx, req, payload), pb.LogRecordResponse{} 260 case pb.READ: 261 return s.handleRead(ctx, req) 262 case pb.TRUNCATE: 263 return s.handleTruncate(ctx, req), pb.LogRecordResponse{} 264 case pb.GET_TRUNCATE: 265 return s.handleGetTruncatedIndex(ctx, req), pb.LogRecordResponse{} 266 case pb.CONNECT: 267 return s.handleConnect(ctx, req), pb.LogRecordResponse{} 268 case pb.CONNECT_RO: 269 return s.handleConnectRO(ctx, req), pb.LogRecordResponse{} 270 case pb.LOG_HEARTBEAT: 271 return s.handleLogHeartbeat(ctx, req), pb.LogRecordResponse{} 272 case pb.CN_HEARTBEAT: 273 return s.handleCNHeartbeat(ctx, req), pb.LogRecordResponse{} 274 case pb.CN_ALLOCATE_ID: 275 return s.handleCNAllocateID(ctx, req), pb.LogRecordResponse{} 276 case pb.TN_HEARTBEAT: 277 return s.handleTNHeartbeat(ctx, req), pb.LogRecordResponse{} 278 case pb.CHECK_HAKEEPER: 279 return s.handleCheckHAKeeper(ctx, req), pb.LogRecordResponse{} 280 case pb.GET_CLUSTER_DETAILS: 281 return s.handleGetClusterDetails(ctx, req), pb.LogRecordResponse{} 282 case pb.GET_CLUSTER_STATE: 283 return s.handleGetCheckerState(ctx, req), pb.LogRecordResponse{} 284 case pb.GET_SHARD_INFO: 285 return s.handleGetShardInfo(ctx, req), pb.LogRecordResponse{} 286 case pb.UPDATE_CN_LABEL: 287 return s.handleUpdateCNLabel(ctx, req), pb.LogRecordResponse{} 288 case pb.UPDATE_CN_WORK_STATE: 289 return s.handleUpdateCNWorkState(ctx, req), pb.LogRecordResponse{} 290 case pb.PATCH_CN_STORE: 291 return s.handlePatchCNStore(ctx, req), pb.LogRecordResponse{} 292 case pb.DELETE_CN_STORE: 293 return s.handleDeleteCNStore(ctx, req), pb.LogRecordResponse{} 294 case pb.PROXY_HEARTBEAT: 295 return s.handleProxyHeartbeat(ctx, req), pb.LogRecordResponse{} 296 default: 297 resp := getResponse(req) 298 resp.ErrorCode, resp.ErrorMessage = toErrorCode( 299 moerr.NewNotSupported(ctx, 300 fmt.Sprintf("logservice method type %d", req.Method))) 301 return resp, pb.LogRecordResponse{} 302 } 303 } 304 305 func getResponse(req pb.Request) pb.Response { 306 return pb.Response{Method: req.Method} 307 } 308 309 func (s *Service) handleGetShardInfo(ctx context.Context, req pb.Request) pb.Response { 310 resp := getResponse(req) 311 if result, ok := s.getShardInfo(req.LogRequest.ShardID); !ok { 312 resp.ErrorCode, resp.ErrorMessage = toErrorCode(dragonboat.ErrShardNotFound) 313 } else { 314 resp.ShardInfo = &result 315 } 316 return resp 317 } 318 319 func (s *Service) handleGetClusterDetails(ctx context.Context, req pb.Request) pb.Response { 320 resp := getResponse(req) 321 if v, err := s.store.getClusterDetails(ctx); err != nil { 322 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 323 } else { 324 resp.ClusterDetails = &v 325 } 326 return resp 327 } 328 329 func (s *Service) handleGetCheckerState(ctx context.Context, req pb.Request) pb.Response { 330 resp := getResponse(req) 331 if v, err := s.store.getCheckerState(); err != nil { 332 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 333 } else { 334 resp.CheckerState = v 335 } 336 return resp 337 } 338 339 func (s *Service) handleTsoUpdate(ctx context.Context, req pb.Request) pb.Response { 340 r := req.TsoRequest 341 resp := getResponse(req) 342 if v, err := s.store.tsoUpdate(ctx, r.Count); err != nil { 343 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 344 } else { 345 resp.TsoResponse = &pb.TsoResponse{Value: v} 346 } 347 return resp 348 } 349 350 func (s *Service) handleConnect(ctx context.Context, req pb.Request) pb.Response { 351 r := req.LogRequest 352 resp := getResponse(req) 353 if err := s.store.getOrExtendTNLease(ctx, r.ShardID, r.TNID); err != nil { 354 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 355 } 356 return resp 357 } 358 359 func (s *Service) handleConnectRO(ctx context.Context, req pb.Request) pb.Response { 360 r := req.LogRequest 361 resp := getResponse(req) 362 // we only check whether the specified shard is available 363 if _, err := s.store.getTruncatedLsn(ctx, r.ShardID); err != nil { 364 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 365 } 366 return resp 367 } 368 369 func (s *Service) handleAppend(ctx context.Context, req pb.Request, payload []byte) pb.Response { 370 r := req.LogRequest 371 resp := getResponse(req) 372 lsn, err := s.store.append(ctx, r.ShardID, payload) 373 if err != nil { 374 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 375 } else { 376 resp.LogResponse.Lsn = lsn 377 } 378 return resp 379 } 380 381 func (s *Service) handleRead(ctx context.Context, req pb.Request) (pb.Response, pb.LogRecordResponse) { 382 r := req.LogRequest 383 resp := getResponse(req) 384 records, lsn, err := s.store.queryLog(ctx, r.ShardID, r.Lsn, r.MaxSize) 385 if err != nil { 386 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 387 } else { 388 resp.LogResponse.LastLsn = lsn 389 } 390 return resp, pb.LogRecordResponse{Records: records} 391 } 392 393 func (s *Service) handleTruncate(ctx context.Context, req pb.Request) pb.Response { 394 r := req.LogRequest 395 resp := getResponse(req) 396 if err := s.store.truncateLog(ctx, r.ShardID, r.Lsn); err != nil { 397 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 398 } 399 return resp 400 } 401 402 func (s *Service) handleGetTruncatedIndex(ctx context.Context, req pb.Request) pb.Response { 403 r := req.LogRequest 404 resp := getResponse(req) 405 lsn, err := s.store.getTruncatedLsn(ctx, r.ShardID) 406 if err != nil { 407 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 408 } else { 409 resp.LogResponse.Lsn = lsn 410 } 411 return resp 412 } 413 414 // TODO: add tests to see what happens when request is sent to non hakeeper stores 415 func (s *Service) handleLogHeartbeat(ctx context.Context, req pb.Request) pb.Response { 416 start := time.Now() 417 defer func() { 418 v2.LogHeartbeatRecvHistogram.Observe(time.Since(start).Seconds()) 419 }() 420 hb := req.LogHeartbeat 421 resp := getResponse(req) 422 if cb, err := s.store.addLogStoreHeartbeat(ctx, *hb); err != nil { 423 v2.LogHeartbeatRecvFailureCounter.Inc() 424 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 425 return resp 426 } else { 427 resp.CommandBatch = &cb 428 } 429 430 return resp 431 } 432 433 func (s *Service) handleCNHeartbeat(ctx context.Context, req pb.Request) pb.Response { 434 start := time.Now() 435 defer func() { 436 v2.CNHeartbeatRecvHistogram.Observe(time.Since(start).Seconds()) 437 }() 438 hb := req.CNHeartbeat 439 resp := getResponse(req) 440 if cb, err := s.store.addCNStoreHeartbeat(ctx, *hb); err != nil { 441 v2.CNHeartbeatRecvFailureCounter.Inc() 442 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 443 return resp 444 } else { 445 resp.CommandBatch = &cb 446 } 447 448 return resp 449 } 450 451 func (s *Service) handleCNAllocateID(ctx context.Context, req pb.Request) pb.Response { 452 resp := getResponse(req) 453 firstID, err := s.store.cnAllocateID(ctx, *req.CNAllocateID) 454 if err != nil { 455 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 456 return resp 457 } 458 resp.AllocateID = &pb.AllocateIDResponse{FirstID: firstID} 459 return resp 460 } 461 462 func (s *Service) handleTNHeartbeat(ctx context.Context, req pb.Request) pb.Response { 463 start := time.Now() 464 defer func() { 465 v2.TNHeartbeatRecvHistogram.Observe(time.Since(start).Seconds()) 466 }() 467 hb := req.TNHeartbeat 468 resp := getResponse(req) 469 if cb, err := s.store.addTNStoreHeartbeat(ctx, *hb); err != nil { 470 v2.TNHeartbeatRecvFailureCounter.Inc() 471 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 472 return resp 473 } else { 474 resp.CommandBatch = &cb 475 } 476 477 return resp 478 } 479 480 func (s *Service) handleCheckHAKeeper(ctx context.Context, req pb.Request) pb.Response { 481 resp := getResponse(req) 482 if atomic.LoadUint64(&s.store.haKeeperReplicaID) != 0 { 483 resp.IsHAKeeper = true 484 } 485 return resp 486 } 487 488 func (s *Service) handleUpdateCNLabel(ctx context.Context, req pb.Request) pb.Response { 489 label := req.CNStoreLabel 490 resp := getResponse(req) 491 if err := s.store.updateCNLabel(ctx, *label); err != nil { 492 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 493 return resp 494 } 495 return resp 496 } 497 498 func (s *Service) handleUpdateCNWorkState(ctx context.Context, req pb.Request) pb.Response { 499 workState := req.CNWorkState 500 resp := getResponse(req) 501 if err := s.store.updateCNWorkState(ctx, *workState); err != nil { 502 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 503 return resp 504 } 505 return resp 506 } 507 508 func (s *Service) handlePatchCNStore(ctx context.Context, req pb.Request) pb.Response { 509 stateLabel := req.CNStateLabel 510 resp := getResponse(req) 511 if err := s.store.patchCNStore(ctx, *stateLabel); err != nil { 512 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 513 return resp 514 } 515 return resp 516 } 517 518 func (s *Service) handleDeleteCNStore(ctx context.Context, req pb.Request) pb.Response { 519 cnStore := req.DeleteCNStore 520 resp := getResponse(req) 521 if err := s.store.deleteCNStore(ctx, *cnStore); err != nil { 522 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 523 return resp 524 } 525 return resp 526 } 527 528 func (s *Service) handleProxyHeartbeat(ctx context.Context, req pb.Request) pb.Response { 529 resp := getResponse(req) 530 if cb, err := s.store.addProxyHeartbeat(ctx, *req.ProxyHeartbeat); err != nil { 531 resp.ErrorCode, resp.ErrorMessage = toErrorCode(err) 532 return resp 533 } else { 534 resp.CommandBatch = &cb 535 } 536 return resp 537 } 538 539 func (s *Service) getBackendOptions() []morpc.BackendOption { 540 return []morpc.BackendOption{ 541 morpc.WithBackendFilter(func(msg morpc.Message, backendAddr string) bool { 542 m, ok := msg.(*RPCRequest) 543 if !ok { 544 return true 545 } 546 return s.options.backendFilter == nil || s.options.backendFilter(m, backendAddr) 547 }), 548 } 549 } 550 551 // NB: leave an empty method for future extension. 552 func (s *Service) getClientOptions() []morpc.ClientOption { 553 return []morpc.ClientOption{} 554 }