github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/store.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package logservice 16 17 import ( 18 "context" 19 "fmt" 20 "sync" 21 "sync/atomic" 22 "time" 23 24 "github.com/cockroachdb/errors" 25 "github.com/lni/dragonboat/v4" 26 cli "github.com/lni/dragonboat/v4/client" 27 "github.com/lni/dragonboat/v4/config" 28 "github.com/lni/dragonboat/v4/plugin/tan" 29 "github.com/lni/dragonboat/v4/plugin/tee" 30 "github.com/lni/dragonboat/v4/raftpb" 31 sm "github.com/lni/dragonboat/v4/statemachine" 32 "github.com/matrixorigin/matrixone/pkg/common/moerr" 33 "github.com/matrixorigin/matrixone/pkg/common/runtime" 34 "github.com/matrixorigin/matrixone/pkg/common/stopper" 35 "github.com/matrixorigin/matrixone/pkg/hakeeper" 36 "github.com/matrixorigin/matrixone/pkg/hakeeper/bootstrap" 37 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers" 38 "github.com/matrixorigin/matrixone/pkg/hakeeper/task" 39 "github.com/matrixorigin/matrixone/pkg/logutil" 40 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 41 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 42 "github.com/matrixorigin/matrixone/pkg/taskservice" 43 "go.uber.org/zap" 44 ) 45 46 type storeMeta struct { 47 serviceAddress string 48 } 49 50 func (l *storeMeta) marshal() []byte { 51 return []byte(l.serviceAddress) 52 } 53 54 func (l *storeMeta) unmarshal(data []byte) { 55 l.serviceAddress = string(data) 56 } 57 58 func isUserUpdate(cmd []byte) bool { 59 return parseCmdTag(cmd) == pb.UserEntryUpdate 60 } 61 62 func isSetLeaseHolderUpdate(cmd []byte) bool { 63 return parseCmdTag(cmd) == pb.LeaseHolderIDUpdate 64 } 65 66 func getNodeHostConfig(cfg Config) config.NodeHostConfig { 67 meta := storeMeta{ 68 serviceAddress: cfg.LogServiceServiceAddr(), 69 } 70 if cfg.GossipProbeInterval.Duration == 0 { 71 panic("cfg.GossipProbeInterval.Duration is 0") 72 } 73 logdb := config.GetTinyMemLogDBConfig() 74 logdb.KVWriteBufferSize = cfg.LogDBBufferSize 75 logdbFactory := (config.LogDBFactory)(nil) 76 logdbFactory = tan.Factory 77 if cfg.UseTeeLogDB { 78 logutil.Warn("using tee based logdb backed by pebble and tan, for testing purposes only") 79 logdbFactory = tee.TanPebbleLogDBFactory 80 } 81 return config.NodeHostConfig{ 82 DeploymentID: cfg.DeploymentID, 83 NodeHostID: cfg.UUID, 84 NodeHostDir: cfg.DataDir, 85 RTTMillisecond: cfg.RTTMillisecond, 86 AddressByNodeHostID: true, 87 RaftAddress: cfg.RaftServiceAddr(), 88 ListenAddress: cfg.RaftListenAddr(), 89 Expert: config.ExpertConfig{ 90 FS: cfg.FS, 91 LogDBFactory: logdbFactory, 92 // FIXME: dragonboat need to be updated to make this field a first class 93 // citizen 94 TestGossipProbeInterval: cfg.GossipProbeInterval.Duration, 95 LogDB: logdb, 96 ExplicitHostname: cfg.ExplicitHostname, 97 }, 98 Gossip: config.GossipConfig{ 99 BindAddress: cfg.GossipListenAddr(), 100 AdvertiseAddress: cfg.GossipServiceAddr(), 101 Seed: cfg.GossipSeedAddresses, 102 Meta: meta.marshal(), 103 CanUseSelfAsSeed: cfg.GossipAllowSelfAsSeed, 104 }, 105 } 106 } 107 108 func getRaftConfig(shardID uint64, replicaID uint64) config.Config { 109 return config.Config{ 110 ShardID: shardID, 111 ReplicaID: replicaID, 112 CheckQuorum: true, 113 PreVote: true, 114 ElectionRTT: 10, 115 HeartbeatRTT: 1, 116 OrderedConfigChange: true, 117 } 118 } 119 120 // store manages log shards including the HAKeeper shard on each node. 121 type store struct { 122 cfg Config 123 nh *dragonboat.NodeHost 124 haKeeperReplicaID uint64 125 checker hakeeper.Checker 126 alloc hakeeper.IDAllocator 127 stopper *stopper.Stopper 128 tickerStopper *stopper.Stopper 129 runtime runtime.Runtime 130 131 bootstrapCheckCycles uint64 132 bootstrapMgr *bootstrap.Manager 133 134 taskScheduler hakeeper.TaskScheduler 135 136 mu struct { 137 sync.Mutex 138 metadata metadata.LogStore 139 } 140 shardSnapshotInfo shardSnapshotInfo 141 snapshotMgr *snapshotManager 142 } 143 144 func newLogStore(cfg Config, 145 taskServiceGetter func() taskservice.TaskService, 146 rt runtime.Runtime) (*store, error) { 147 nh, err := dragonboat.NewNodeHost(getNodeHostConfig(cfg)) 148 if err != nil { 149 return nil, err 150 } 151 hakeeperConfig := cfg.GetHAKeeperConfig() 152 rt.SubLogger(runtime.SystemInit).Info("HAKeeper Timeout Configs", 153 zap.Int64("LogStoreTimeout", int64(hakeeperConfig.LogStoreTimeout)), 154 zap.Int64("DNStoreTimeout", int64(hakeeperConfig.TNStoreTimeout)), 155 zap.Int64("CNStoreTimeout", int64(hakeeperConfig.CNStoreTimeout)), 156 ) 157 ls := &store{ 158 cfg: cfg, 159 nh: nh, 160 checker: checkers.NewCoordinator(hakeeperConfig), 161 taskScheduler: task.NewScheduler(taskServiceGetter, hakeeperConfig), 162 alloc: newIDAllocator(), 163 stopper: stopper.NewStopper("log-store"), 164 tickerStopper: stopper.NewStopper("hakeeper-ticker"), 165 runtime: rt, 166 167 shardSnapshotInfo: newShardSnapshotInfo(), 168 snapshotMgr: newSnapshotManager(&cfg), 169 } 170 ls.mu.metadata = metadata.LogStore{UUID: cfg.UUID} 171 if err := ls.stopper.RunNamedTask("truncation-worker", func(ctx context.Context) { 172 rt.SubLogger(runtime.SystemInit).Info("logservice truncation worker started") 173 ls.truncationWorker(ctx) 174 }); err != nil { 175 return nil, err 176 } 177 return ls, nil 178 } 179 180 func (l *store) close() error { 181 l.tickerStopper.Stop() 182 l.stopper.Stop() 183 if l.nh != nil { 184 l.nh.Close() 185 } 186 return nil 187 } 188 189 func (l *store) id() string { 190 return l.nh.ID() 191 } 192 193 func (l *store) startReplicas() error { 194 l.mu.Lock() 195 shards := make([]metadata.LogShard, 0) 196 shards = append(shards, l.mu.metadata.Shards...) 197 l.mu.Unlock() 198 199 for _, rec := range shards { 200 if rec.ShardID == hakeeper.DefaultHAKeeperShardID { 201 if err := l.startHAKeeperReplica(rec.ReplicaID, nil, false); err != nil { 202 return err 203 } 204 } else { 205 if err := l.startReplica(rec.ShardID, rec.ReplicaID, nil, false); err != nil { 206 return err 207 } 208 } 209 } 210 return nil 211 } 212 213 func (l *store) startHAKeeperReplica(replicaID uint64, 214 initialReplicas map[uint64]dragonboat.Target, join bool) error { 215 raftConfig := getRaftConfig(hakeeper.DefaultHAKeeperShardID, replicaID) 216 if err := l.nh.StartReplica(initialReplicas, 217 join, hakeeper.NewStateMachine, raftConfig); err != nil { 218 return err 219 } 220 l.addMetadata(hakeeper.DefaultHAKeeperShardID, replicaID) 221 atomic.StoreUint64(&l.haKeeperReplicaID, replicaID) 222 if !l.cfg.DisableWorkers { 223 if err := l.tickerStopper.RunNamedTask("hakeeper-ticker", func(ctx context.Context) { 224 l.runtime.SubLogger(runtime.SystemInit).Info("HAKeeper ticker started") 225 l.ticker(ctx) 226 }); err != nil { 227 return err 228 } 229 } 230 return nil 231 } 232 233 func (l *store) startReplica(shardID uint64, replicaID uint64, 234 initialReplicas map[uint64]dragonboat.Target, join bool) error { 235 if shardID == hakeeper.DefaultHAKeeperShardID { 236 return moerr.NewInvalidInputNoCtx("shardID %d does not match DefaultHAKeeperShardID %d", shardID, hakeeper.DefaultHAKeeperShardID) 237 } 238 cfg := getRaftConfig(shardID, replicaID) 239 if err := l.snapshotMgr.Init(shardID, replicaID); err != nil { 240 panic(err) 241 } 242 if err := l.nh.StartReplica(initialReplicas, join, newStateMachine, cfg); err != nil { 243 return err 244 } 245 l.addMetadata(shardID, replicaID) 246 return nil 247 } 248 249 func (l *store) stopReplica(shardID uint64, replicaID uint64) error { 250 if shardID == hakeeper.DefaultHAKeeperShardID { 251 defer func() { 252 atomic.StoreUint64(&l.haKeeperReplicaID, 0) 253 }() 254 } 255 return l.nh.StopReplica(shardID, replicaID) 256 } 257 258 func (l *store) requestLeaderTransfer(shardID uint64, targetReplicaID uint64) error { 259 return l.nh.RequestLeaderTransfer(shardID, targetReplicaID) 260 } 261 262 func (l *store) addReplica(shardID uint64, replicaID uint64, 263 target dragonboat.Target, cci uint64) error { 264 // Set timeout to a little bigger value to prevent Timeout Error and 265 // returns a dragonboat.ErrRejected at last, in which case, it will take 266 // longer time to finish this operation. 267 ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) 268 defer cancel() 269 count := 0 270 for { 271 count++ 272 if err := l.nh.SyncRequestAddReplica(ctx, shardID, replicaID, target, cci); err != nil { 273 if errors.Is(err, dragonboat.ErrShardNotReady) { 274 l.retryWait() 275 continue 276 } 277 if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 { 278 return dragonboat.ErrTimeout 279 } 280 return err 281 } 282 return nil 283 } 284 } 285 286 func (l *store) removeReplica(shardID uint64, replicaID uint64, cci uint64) error { 287 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 288 defer cancel() 289 count := 0 290 for { 291 count++ 292 if err := l.nh.SyncRequestDeleteReplica(ctx, shardID, replicaID, cci); err != nil { 293 if errors.Is(err, dragonboat.ErrShardNotReady) { 294 l.retryWait() 295 continue 296 } 297 // FIXME: internally handle dragonboat.ErrTimeoutTooSmall 298 if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 { 299 return dragonboat.ErrTimeout 300 } 301 return err 302 } 303 l.removeMetadata(shardID, replicaID) 304 return nil 305 } 306 } 307 308 func (l *store) retryWait() { 309 if l.nh.NodeHostConfig().RTTMillisecond == 1 { 310 time.Sleep(time.Millisecond) 311 } 312 time.Sleep(time.Duration(l.nh.NodeHostConfig().RTTMillisecond/2) * time.Millisecond) 313 } 314 315 func (l *store) propose(ctx context.Context, 316 session *cli.Session, cmd []byte) (sm.Result, error) { 317 count := 0 318 for { 319 count++ 320 result, err := l.nh.SyncPropose(ctx, session, cmd) 321 if err != nil { 322 if errors.Is(err, dragonboat.ErrShardNotReady) || 323 errors.Is(err, dragonboat.ErrSystemBusy) { 324 l.retryWait() 325 continue 326 } 327 if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 { 328 return sm.Result{}, dragonboat.ErrTimeout 329 } 330 return sm.Result{}, err 331 } 332 return result, nil 333 } 334 } 335 336 func (l *store) read(ctx context.Context, 337 shardID uint64, query interface{}) (interface{}, error) { 338 count := 0 339 for { 340 count++ 341 result, err := l.nh.SyncRead(ctx, shardID, query) 342 if err != nil { 343 if errors.Is(err, dragonboat.ErrShardNotReady) { 344 l.retryWait() 345 continue 346 } 347 if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 { 348 return nil, dragonboat.ErrTimeout 349 } 350 return nil, err 351 } 352 return result, nil 353 } 354 } 355 356 func (l *store) getOrExtendTNLease(ctx context.Context, 357 shardID uint64, tnID uint64) error { 358 session := l.nh.GetNoOPSession(shardID) 359 cmd := getSetLeaseHolderCmd(tnID) 360 _, err := l.propose(ctx, session, cmd) 361 return err 362 } 363 364 func (l *store) truncateLog(ctx context.Context, 365 shardID uint64, index Lsn) error { 366 session := l.nh.GetNoOPSession(shardID) 367 cmd := getSetTruncatedLsnCmd(index) 368 result, err := l.propose(ctx, session, cmd) 369 if err != nil { 370 l.runtime.Logger().Error("propose truncate log cmd failed", zap.Error(err)) 371 return err 372 } 373 if result.Value > 0 { 374 l.runtime.Logger().Error(fmt.Sprintf("shardID %d already truncated to index %d", shardID, result.Value)) 375 return moerr.NewInvalidTruncateLsn(ctx, shardID, result.Value) 376 } 377 return nil 378 } 379 380 func (l *store) append(ctx context.Context, 381 shardID uint64, cmd []byte) (Lsn, error) { 382 session := l.nh.GetNoOPSession(shardID) 383 result, err := l.propose(ctx, session, cmd) 384 if err != nil { 385 l.runtime.Logger().Error("propose failed", zap.Error(err)) 386 return 0, err 387 } 388 if len(result.Data) > 0 { 389 l.runtime.Logger().Error("not current lease holder", zap.Uint64("data", binaryEnc.Uint64(result.Data))) 390 return 0, moerr.NewNotLeaseHolder(ctx, binaryEnc.Uint64(result.Data)) 391 } 392 if result.Value == 0 { 393 panic(moerr.NewInvalidState(ctx, "unexpected Lsn value")) 394 } 395 return result.Value, nil 396 } 397 398 func (l *store) getTruncatedLsn(ctx context.Context, 399 shardID uint64) (uint64, error) { 400 v, err := l.read(ctx, shardID, truncatedLsnQuery{}) 401 if err != nil { 402 return 0, err 403 } 404 return v.(uint64), nil 405 } 406 407 func (l *store) tsoUpdate(ctx context.Context, count uint64) (uint64, error) { 408 cmd := getTsoUpdateCmd(count) 409 session := l.nh.GetNoOPSession(firstLogShardID) 410 result, err := l.propose(ctx, session, cmd) 411 if err != nil { 412 l.runtime.Logger().Error("failed to propose tso updat", zap.Error(err)) 413 return 0, err 414 } 415 return result.Value, nil 416 } 417 418 func handleNotHAKeeperError(ctx context.Context, err error) error { 419 if err == nil { 420 return err 421 } 422 if errors.Is(err, dragonboat.ErrShardNotFound) { 423 return moerr.NewNoHAKeeper(ctx) 424 } 425 return err 426 } 427 428 func (l *store) addLogStoreHeartbeat(ctx context.Context, 429 hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) { 430 data := MustMarshal(&hb) 431 cmd := hakeeper.GetLogStoreHeartbeatCmd(data) 432 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 433 if result, err := l.propose(ctx, session, cmd); err != nil { 434 l.runtime.Logger().Error("propose failed", zap.Error(err)) 435 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 436 } else { 437 var cb pb.CommandBatch 438 MustUnmarshal(&cb, result.Data) 439 return cb, nil 440 } 441 } 442 443 func (l *store) addCNStoreHeartbeat(ctx context.Context, 444 hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) { 445 data := MustMarshal(&hb) 446 cmd := hakeeper.GetCNStoreHeartbeatCmd(data) 447 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 448 if result, err := l.propose(ctx, session, cmd); err != nil { 449 l.runtime.Logger().Error("propose failed", zap.Error(err)) 450 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 451 } else { 452 var cb pb.CommandBatch 453 MustUnmarshal(&cb, result.Data) 454 return cb, nil 455 } 456 } 457 458 func (l *store) cnAllocateID(ctx context.Context, 459 req pb.CNAllocateID) (uint64, error) { 460 cmd := hakeeper.GetAllocateIDCmd(req) 461 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 462 result, err := l.propose(ctx, session, cmd) 463 if err != nil { 464 l.runtime.Logger().Error("propose get id failed", zap.Error(err)) 465 return 0, err 466 } 467 return result.Value, nil 468 } 469 470 func (l *store) addTNStoreHeartbeat(ctx context.Context, 471 hb pb.TNStoreHeartbeat) (pb.CommandBatch, error) { 472 data := MustMarshal(&hb) 473 cmd := hakeeper.GetTNStoreHeartbeatCmd(data) 474 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 475 if result, err := l.propose(ctx, session, cmd); err != nil { 476 l.runtime.Logger().Error("propose failed", zap.Error(err)) 477 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 478 } else { 479 var cb pb.CommandBatch 480 MustUnmarshal(&cb, result.Data) 481 return cb, nil 482 } 483 } 484 485 func (l *store) getCommandBatch(ctx context.Context, 486 uuid string) (pb.CommandBatch, error) { 487 v, err := l.read(ctx, 488 hakeeper.DefaultHAKeeperShardID, &hakeeper.ScheduleCommandQuery{UUID: uuid}) 489 if err != nil { 490 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 491 } 492 return *(v.(*pb.CommandBatch)), nil 493 } 494 495 func (l *store) getClusterDetails(ctx context.Context) (pb.ClusterDetails, error) { 496 v, err := l.read(ctx, 497 hakeeper.DefaultHAKeeperShardID, &hakeeper.ClusterDetailsQuery{Cfg: l.cfg.GetHAKeeperConfig()}) 498 if err != nil { 499 return pb.ClusterDetails{}, handleNotHAKeeperError(ctx, err) 500 } 501 return *(v.(*pb.ClusterDetails)), nil 502 } 503 504 func (l *store) addScheduleCommands(ctx context.Context, 505 term uint64, cmds []pb.ScheduleCommand) error { 506 cmd := hakeeper.GetUpdateCommandsCmd(term, cmds) 507 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 508 if _, err := l.propose(ctx, session, cmd); err != nil { 509 return handleNotHAKeeperError(ctx, err) 510 } 511 return nil 512 } 513 514 func (l *store) getLeaseHolderID(ctx context.Context, 515 shardID uint64, entries []raftpb.Entry) (uint64, error) { 516 if len(entries) == 0 { 517 panic("empty entries") 518 } 519 // first entry is an update lease cmd 520 e := entries[0] 521 if !isRaftInternalEntry(e) && isSetLeaseHolderUpdate(l.decodeCmd(ctx, e)) { 522 return parseLeaseHolderID(l.decodeCmd(ctx, e)), nil 523 } 524 v, err := l.read(ctx, shardID, leaseHistoryQuery{lsn: e.Index}) 525 if err != nil { 526 l.runtime.Logger().Error("failed to read", zap.Error(err)) 527 return 0, err 528 } 529 return v.(uint64), nil 530 } 531 532 func (l *store) updateCNLabel(ctx context.Context, label pb.CNStoreLabel) error { 533 state, err := l.getCheckerState() 534 if err != nil { 535 return err 536 } 537 if _, ok := state.CNState.Stores[label.UUID]; !ok { 538 return moerr.NewInternalError(ctx, "CN [%s] does not exist", label.UUID) 539 } 540 cmd := hakeeper.GetUpdateCNLabelCmd(label) 541 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 542 if result, err := l.propose(ctx, session, cmd); err != nil { 543 l.runtime.Logger().Error("failed to propose CN label", 544 zap.String("label", label.String()), 545 zap.Error(err)) 546 return handleNotHAKeeperError(ctx, err) 547 } else { 548 var cb pb.CommandBatch 549 MustUnmarshal(&cb, result.Data) 550 return nil 551 } 552 } 553 554 func (l *store) updateCNWorkState(ctx context.Context, workState pb.CNWorkState) error { 555 state, err := l.getCheckerState() 556 if err != nil { 557 return err 558 } 559 if _, ok := state.CNState.Stores[workState.UUID]; !ok { 560 return moerr.NewInternalError(ctx, "CN [%s] does not exist", workState.UUID) 561 } 562 cmd := hakeeper.GetUpdateCNWorkStateCmd(workState) 563 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 564 if result, err := l.propose(ctx, session, cmd); err != nil { 565 l.runtime.Logger().Error("failed to propose CN work state", 566 zap.String("state", state.String()), 567 zap.Error(err)) 568 return handleNotHAKeeperError(ctx, err) 569 } else { 570 var cb pb.CommandBatch 571 MustUnmarshal(&cb, result.Data) 572 return nil 573 } 574 } 575 576 func (l *store) patchCNStore(ctx context.Context, stateLabel pb.CNStateLabel) error { 577 state, err := l.getCheckerState() 578 if err != nil { 579 return err 580 } 581 if _, ok := state.CNState.Stores[stateLabel.UUID]; !ok { 582 return moerr.NewInternalError(ctx, "CN [%s] does not exist", stateLabel.UUID) 583 } 584 cmd := hakeeper.GetPatchCNStoreCmd(stateLabel) 585 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 586 if result, err := l.propose(ctx, session, cmd); err != nil { 587 l.runtime.Logger().Error("failed to propose CN patch store", 588 zap.String("state", state.String()), 589 zap.Error(err)) 590 return handleNotHAKeeperError(ctx, err) 591 } else { 592 var cb pb.CommandBatch 593 MustUnmarshal(&cb, result.Data) 594 return nil 595 } 596 } 597 598 func (l *store) deleteCNStore(ctx context.Context, cnStore pb.DeleteCNStore) error { 599 state, err := l.getCheckerState() 600 if err != nil { 601 return err 602 } 603 if _, ok := state.CNState.Stores[cnStore.StoreID]; !ok { 604 return nil 605 } 606 cmd := hakeeper.GetDeleteCNStoreCmd(cnStore) 607 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 608 if result, err := l.propose(ctx, session, cmd); err != nil { 609 l.runtime.Logger().Error("failed to propose delete CN store", 610 zap.String("state", state.String()), 611 zap.Error(err)) 612 return handleNotHAKeeperError(ctx, err) 613 } else { 614 var cb pb.CommandBatch 615 MustUnmarshal(&cb, result.Data) 616 return nil 617 } 618 } 619 620 func (l *store) addProxyHeartbeat(ctx context.Context, hb pb.ProxyHeartbeat) (pb.CommandBatch, error) { 621 data := MustMarshal(&hb) 622 cmd := hakeeper.GetProxyHeartbeatCmd(data) 623 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 624 if result, err := l.propose(ctx, session, cmd); err != nil { 625 l.runtime.Logger().Error("propose failed", zap.Error(err)) 626 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 627 } else { 628 var cb pb.CommandBatch 629 MustUnmarshal(&cb, result.Data) 630 return cb, nil 631 } 632 } 633 634 func (l *store) decodeCmd(ctx context.Context, e raftpb.Entry) []byte { 635 if e.Type == raftpb.ApplicationEntry { 636 panic(moerr.NewInvalidState(ctx, "unexpected entry type")) 637 } 638 if e.Type == raftpb.EncodedEntry { 639 if e.Cmd[0] != 0 { 640 panic(moerr.NewInvalidState(ctx, "unexpected cmd header")) 641 } 642 return e.Cmd[1:] 643 } 644 panic(moerr.NewInvalidState(ctx, "invalid cmd")) 645 } 646 647 func isRaftInternalEntry(e raftpb.Entry) bool { 648 if len(e.Cmd) == 0 { 649 return true 650 } 651 return e.Type == raftpb.ConfigChangeEntry || e.Type == raftpb.MetadataEntry 652 } 653 654 func (l *store) markEntries(ctx context.Context, 655 shardID uint64, entries []raftpb.Entry) ([]pb.LogRecord, error) { 656 if len(entries) == 0 { 657 return []pb.LogRecord{}, nil 658 } 659 leaseHolderID, err := l.getLeaseHolderID(ctx, shardID, entries) 660 if err != nil { 661 return nil, err 662 } 663 result := make([]pb.LogRecord, 0) 664 for _, e := range entries { 665 if isRaftInternalEntry(e) { 666 // raft internal stuff 667 result = append(result, LogRecord{ 668 Type: pb.Internal, 669 Lsn: e.Index, 670 }) 671 continue 672 } 673 cmd := l.decodeCmd(ctx, e) 674 if isSetLeaseHolderUpdate(cmd) { 675 leaseHolderID = parseLeaseHolderID(cmd) 676 result = append(result, LogRecord{ 677 Type: pb.LeaseUpdate, 678 Lsn: e.Index, 679 }) 680 continue 681 } 682 if isUserUpdate(cmd) { 683 if parseLeaseHolderID(cmd) != leaseHolderID { 684 // lease not match, skip 685 result = append(result, LogRecord{ 686 Type: pb.LeaseRejected, 687 Lsn: e.Index, 688 }) 689 continue 690 } 691 result = append(result, LogRecord{ 692 Data: cmd, 693 Type: pb.UserRecord, 694 Lsn: e.Index, 695 }) 696 } 697 } 698 return result, nil 699 } 700 701 func getNextIndex(entries []raftpb.Entry, firstIndex Lsn, lastIndex Lsn) Lsn { 702 if len(entries) == 0 { 703 return firstIndex 704 } 705 lastResultIndex := entries[len(entries)-1].Index 706 if lastResultIndex+1 < lastIndex { 707 return lastResultIndex + 1 708 } 709 return firstIndex 710 } 711 712 // high priority test 713 // FIXME: add a test that queries the log with LeaseUpdate, LeaseRejected 714 // entries, no matter what is the firstLsn specified in queryLog(), returned 715 // results should make sense 716 func (l *store) queryLog(ctx context.Context, shardID uint64, 717 firstIndex Lsn, maxSize uint64) ([]LogRecord, Lsn, error) { 718 v, err := l.read(ctx, shardID, indexQuery{}) 719 if err != nil { 720 return nil, 0, err 721 } 722 lastIndex := v.(uint64) 723 // FIXME: check whether lastIndex >= firstIndex 724 rs, err := l.nh.QueryRaftLog(shardID, firstIndex, lastIndex+1, maxSize) 725 if err != nil { 726 l.runtime.Logger().Error("QueryRaftLog failed", zap.Error(err)) 727 return nil, 0, err 728 } 729 select { 730 case v := <-rs.ResultC(): 731 if v.Completed() { 732 entries, logRange := v.RaftLogs() 733 next := getNextIndex(entries, firstIndex, logRange.LastIndex) 734 results, err := l.markEntries(ctx, shardID, entries) 735 if err != nil { 736 l.runtime.Logger().Error("markEntries failed", zap.Error(err)) 737 return nil, 0, err 738 } 739 return results, next, nil 740 } else if v.RequestOutOfRange() { 741 // FIXME: add more details to the log, what is the available range 742 l.runtime.Logger().Error("OutOfRange query found") 743 return nil, 0, dragonboat.ErrInvalidRange 744 } 745 panic(moerr.NewInvalidState(ctx, "unexpected rs state")) 746 case <-ctx.Done(): 747 return nil, 0, ctx.Err() 748 } 749 } 750 751 func (l *store) tickerForTaskSchedule(ctx context.Context, duration time.Duration) { 752 ticker := time.NewTicker(duration) 753 defer ticker.Stop() 754 755 for { 756 select { 757 case <-ticker.C: 758 state, _ := l.getCheckerStateFromLeader() 759 if state != nil && state.State == pb.HAKeeperRunning { 760 l.taskSchedule(state) 761 } 762 763 case <-ctx.Done(): 764 return 765 } 766 767 // l.taskSchedule could be blocking a long time, this extra select 768 // can give a chance immediately to check the ctx status when it resumes. 769 select { 770 case <-ctx.Done(): 771 return 772 default: 773 // nothing to do 774 } 775 } 776 777 } 778 779 func (l *store) ticker(ctx context.Context) { 780 if l.cfg.HAKeeperTickInterval.Duration == 0 { 781 panic("invalid HAKeeperTickInterval") 782 } 783 l.runtime.Logger().Info("Hakeeper interval configs", 784 zap.Int64("HAKeeperTickInterval", int64(l.cfg.HAKeeperTickInterval.Duration)), 785 zap.Int64("HAKeeperCheckInterval", int64(l.cfg.HAKeeperCheckInterval.Duration))) 786 ticker := time.NewTicker(l.cfg.HAKeeperTickInterval.Duration) 787 defer ticker.Stop() 788 if l.cfg.HAKeeperCheckInterval.Duration == 0 { 789 panic("invalid HAKeeperCheckInterval") 790 } 791 defer func() { 792 l.runtime.Logger().Info("HAKeeper ticker stopped") 793 }() 794 haTicker := time.NewTicker(l.cfg.HAKeeperCheckInterval.Duration) 795 defer haTicker.Stop() 796 797 // moving task schedule from the ticker normal routine to a 798 // separate goroutine can avoid the hakeeper's health check and tick update 799 // operations being blocked by task schedule, or the tick will be skipped and 800 // can not correctly estimate the time passing. 801 go l.tickerForTaskSchedule(ctx, l.cfg.HAKeeperCheckInterval.Duration) 802 803 for { 804 select { 805 case <-ticker.C: 806 l.hakeeperTick() 807 case <-haTicker.C: 808 l.hakeeperCheck() 809 case <-ctx.Done(): 810 return 811 } 812 813 select { 814 case <-ctx.Done(): 815 return 816 default: 817 } 818 } 819 } 820 821 func (l *store) isLeaderHAKeeper() (bool, uint64, error) { 822 leaderID, term, ok, err := l.nh.GetLeaderID(hakeeper.DefaultHAKeeperShardID) 823 if err != nil { 824 return false, 0, err 825 } 826 replicaID := atomic.LoadUint64(&l.haKeeperReplicaID) 827 return ok && replicaID != 0 && leaderID == replicaID, term, nil 828 } 829 830 // TODO: add test for this 831 func (l *store) hakeeperTick() { 832 isLeader, _, err := l.isLeaderHAKeeper() 833 if err != nil { 834 l.runtime.Logger().Error("failed to get HAKeeper Leader ID", zap.Error(err)) 835 return 836 } 837 838 if isLeader { 839 cmd := hakeeper.GetTickCmd() 840 ctx, cancel := context.WithTimeout(context.Background(), hakeeperDefaultTimeout) 841 defer cancel() 842 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 843 if _, err := l.propose(ctx, session, cmd); err != nil { 844 l.runtime.Logger().Error("propose tick failed", zap.Error(err)) 845 return 846 } 847 } 848 } 849 850 func (l *store) getHeartbeatMessage() pb.LogStoreHeartbeat { 851 m := pb.LogStoreHeartbeat{ 852 UUID: l.id(), 853 RaftAddress: l.cfg.RaftServiceAddr(), 854 ServiceAddress: l.cfg.LogServiceServiceAddr(), 855 GossipAddress: l.cfg.GossipServiceAddr(), 856 Replicas: make([]pb.LogReplicaInfo, 0), 857 } 858 opts := dragonboat.NodeHostInfoOption{ 859 SkipLogInfo: true, 860 } 861 nhi := l.nh.GetNodeHostInfo(opts) 862 for _, ci := range nhi.ShardInfoList { 863 if ci.Pending { 864 l.runtime.Logger().Info(fmt.Sprintf("shard %d is pending, not included into the heartbeat", 865 ci.ShardID)) 866 continue 867 } 868 if ci.ConfigChangeIndex == 0 { 869 panic("ci.ConfigChangeIndex is 0") 870 } 871 replicaInfo := pb.LogReplicaInfo{ 872 LogShardInfo: pb.LogShardInfo{ 873 ShardID: ci.ShardID, 874 Replicas: ci.Nodes, 875 Epoch: ci.ConfigChangeIndex, 876 LeaderID: ci.LeaderID, 877 Term: ci.Term, 878 }, 879 ReplicaID: ci.ReplicaID, 880 } 881 // FIXME: why we need this? 882 if replicaInfo.Replicas == nil { 883 replicaInfo.Replicas = make(map[uint64]dragonboat.Target) 884 } 885 m.Replicas = append(m.Replicas, replicaInfo) 886 } 887 return m 888 } 889 890 // leaderID returns the leader ID of the specified shard. 891 func (l *store) leaderID(shardID uint64) (uint64, error) { 892 leaderID, _, ok, err := l.nh.GetLeaderID(shardID) 893 if err != nil { 894 return 0, err 895 } 896 if !ok { 897 return 0, nil 898 } 899 return leaderID, nil 900 }