github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/store.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package logservice 16 17 import ( 18 "context" 19 "fmt" 20 "sync" 21 "sync/atomic" 22 "time" 23 24 "github.com/cockroachdb/errors" 25 "github.com/lni/dragonboat/v4" 26 cli "github.com/lni/dragonboat/v4/client" 27 "github.com/lni/dragonboat/v4/config" 28 "github.com/lni/dragonboat/v4/plugin/tan" 29 "github.com/lni/dragonboat/v4/plugin/tee" 30 "github.com/lni/dragonboat/v4/raftpb" 31 sm "github.com/lni/dragonboat/v4/statemachine" 32 "github.com/matrixorigin/matrixone/pkg/common/moerr" 33 "github.com/matrixorigin/matrixone/pkg/common/runtime" 34 "github.com/matrixorigin/matrixone/pkg/common/stopper" 35 "github.com/matrixorigin/matrixone/pkg/hakeeper" 36 "github.com/matrixorigin/matrixone/pkg/hakeeper/bootstrap" 37 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers" 38 "github.com/matrixorigin/matrixone/pkg/hakeeper/task" 39 "github.com/matrixorigin/matrixone/pkg/logutil" 40 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 41 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 42 "github.com/matrixorigin/matrixone/pkg/taskservice" 43 "go.uber.org/zap" 44 ) 45 46 type storeMeta struct { 47 serviceAddress string 48 } 49 50 func (l *storeMeta) marshal() []byte { 51 return []byte(l.serviceAddress) 52 } 53 54 func (l *storeMeta) unmarshal(data []byte) { 55 l.serviceAddress = string(data) 56 } 57 58 func isUserUpdate(cmd []byte) bool { 59 return parseCmdTag(cmd) == pb.UserEntryUpdate 60 } 61 62 func isSetLeaseHolderUpdate(cmd []byte) bool { 63 return parseCmdTag(cmd) == pb.LeaseHolderIDUpdate 64 } 65 66 func getNodeHostConfig(cfg Config) config.NodeHostConfig { 67 meta := storeMeta{ 68 serviceAddress: cfg.ServiceAddress, 69 } 70 if cfg.GossipProbeInterval.Duration == 0 { 71 panic("cfg.GossipProbeInterval.Duration is 0") 72 } 73 logdb := config.GetTinyMemLogDBConfig() 74 logdb.KVWriteBufferSize = cfg.LogDBBufferSize 75 logdbFactory := (config.LogDBFactory)(nil) 76 logdbFactory = tan.Factory 77 if cfg.UseTeeLogDB { 78 logutil.Warn("using tee based logdb backed by pebble and tan, for testing purposes only") 79 logdbFactory = tee.TanPebbleLogDBFactory 80 } 81 return config.NodeHostConfig{ 82 DeploymentID: cfg.DeploymentID, 83 NodeHostID: cfg.UUID, 84 NodeHostDir: cfg.DataDir, 85 RTTMillisecond: cfg.RTTMillisecond, 86 AddressByNodeHostID: true, 87 RaftAddress: cfg.RaftAddress, 88 ListenAddress: cfg.RaftListenAddress, 89 Expert: config.ExpertConfig{ 90 FS: cfg.FS, 91 LogDBFactory: logdbFactory, 92 // FIXME: dragonboat need to be updated to make this field a first class 93 // citizen 94 TestGossipProbeInterval: cfg.GossipProbeInterval.Duration, 95 LogDB: logdb, 96 }, 97 Gossip: config.GossipConfig{ 98 BindAddress: cfg.GossipListenAddress, 99 AdvertiseAddress: cfg.GossipAddress, 100 Seed: cfg.GossipSeedAddresses, 101 Meta: meta.marshal(), 102 CanUseSelfAsSeed: cfg.GossipAllowSelfAsSeed, 103 }, 104 } 105 } 106 107 func getRaftConfig(shardID uint64, replicaID uint64) config.Config { 108 return config.Config{ 109 ShardID: shardID, 110 ReplicaID: replicaID, 111 CheckQuorum: true, 112 PreVote: true, 113 ElectionRTT: 10, 114 HeartbeatRTT: 1, 115 OrderedConfigChange: true, 116 } 117 } 118 119 // store manages log shards including the HAKeeper shard on each node. 120 type store struct { 121 cfg Config 122 nh *dragonboat.NodeHost 123 haKeeperReplicaID uint64 124 checker hakeeper.Checker 125 alloc hakeeper.IDAllocator 126 stopper *stopper.Stopper 127 tickerStopper *stopper.Stopper 128 runtime runtime.Runtime 129 130 bootstrapCheckCycles uint64 131 bootstrapMgr *bootstrap.Manager 132 133 taskScheduler hakeeper.TaskScheduler 134 135 mu struct { 136 sync.Mutex 137 metadata metadata.LogStore 138 } 139 shardSnapshotInfo shardSnapshotInfo 140 snapshotMgr *snapshotManager 141 } 142 143 func newLogStore(cfg Config, 144 taskServiceGetter func() taskservice.TaskService, 145 rt runtime.Runtime) (*store, error) { 146 nh, err := dragonboat.NewNodeHost(getNodeHostConfig(cfg)) 147 if err != nil { 148 return nil, err 149 } 150 hakeeperConfig := cfg.GetHAKeeperConfig() 151 rt.SubLogger(runtime.SystemInit).Info("HAKeeper Timeout Configs", 152 zap.Int64("LogStoreTimeout", int64(hakeeperConfig.LogStoreTimeout)), 153 zap.Int64("DNStoreTimeout", int64(hakeeperConfig.DNStoreTimeout)), 154 zap.Int64("CNStoreTimeout", int64(hakeeperConfig.CNStoreTimeout)), 155 ) 156 ls := &store{ 157 cfg: cfg, 158 nh: nh, 159 checker: checkers.NewCoordinator(hakeeperConfig), 160 taskScheduler: task.NewScheduler(taskServiceGetter, hakeeperConfig), 161 alloc: newIDAllocator(), 162 stopper: stopper.NewStopper("log-store"), 163 tickerStopper: stopper.NewStopper("hakeeper-ticker"), 164 runtime: rt, 165 166 shardSnapshotInfo: newShardSnapshotInfo(), 167 snapshotMgr: newSnapshotManager(&cfg), 168 } 169 ls.mu.metadata = metadata.LogStore{UUID: cfg.UUID} 170 if err := ls.stopper.RunNamedTask("truncation-worker", func(ctx context.Context) { 171 rt.SubLogger(runtime.SystemInit).Info("logservice truncation worker started") 172 ls.truncationWorker(ctx) 173 }); err != nil { 174 return nil, err 175 } 176 return ls, nil 177 } 178 179 func (l *store) close() error { 180 l.tickerStopper.Stop() 181 l.stopper.Stop() 182 if l.nh != nil { 183 l.nh.Close() 184 } 185 return nil 186 } 187 188 func (l *store) id() string { 189 return l.nh.ID() 190 } 191 192 func (l *store) startReplicas() error { 193 l.mu.Lock() 194 shards := make([]metadata.LogShard, 0) 195 shards = append(shards, l.mu.metadata.Shards...) 196 l.mu.Unlock() 197 198 for _, rec := range shards { 199 if rec.ShardID == hakeeper.DefaultHAKeeperShardID { 200 if err := l.startHAKeeperReplica(rec.ReplicaID, nil, false); err != nil { 201 return err 202 } 203 } else { 204 if err := l.startReplica(rec.ShardID, rec.ReplicaID, nil, false); err != nil { 205 return err 206 } 207 } 208 } 209 return nil 210 } 211 212 func (l *store) startHAKeeperReplica(replicaID uint64, 213 initialReplicas map[uint64]dragonboat.Target, join bool) error { 214 raftConfig := getRaftConfig(hakeeper.DefaultHAKeeperShardID, replicaID) 215 if err := l.nh.StartReplica(initialReplicas, 216 join, hakeeper.NewStateMachine, raftConfig); err != nil { 217 return err 218 } 219 l.addMetadata(hakeeper.DefaultHAKeeperShardID, replicaID) 220 atomic.StoreUint64(&l.haKeeperReplicaID, replicaID) 221 if !l.cfg.DisableWorkers { 222 if err := l.tickerStopper.RunNamedTask("hakeeper-ticker", func(ctx context.Context) { 223 l.runtime.SubLogger(runtime.SystemInit).Info("HAKeeper ticker started") 224 l.ticker(ctx) 225 }); err != nil { 226 return err 227 } 228 } 229 return nil 230 } 231 232 func (l *store) startReplica(shardID uint64, replicaID uint64, 233 initialReplicas map[uint64]dragonboat.Target, join bool) error { 234 if shardID == hakeeper.DefaultHAKeeperShardID { 235 return moerr.NewInvalidInputNoCtx("shardID %d does not match DefaultHAKeeperShardID %d", shardID, hakeeper.DefaultHAKeeperShardID) 236 } 237 cfg := getRaftConfig(shardID, replicaID) 238 if err := l.snapshotMgr.Init(shardID, replicaID); err != nil { 239 panic(err) 240 } 241 if err := l.nh.StartReplica(initialReplicas, join, newStateMachine, cfg); err != nil { 242 return err 243 } 244 l.addMetadata(shardID, replicaID) 245 return nil 246 } 247 248 func (l *store) stopReplica(shardID uint64, replicaID uint64) error { 249 if shardID == hakeeper.DefaultHAKeeperShardID { 250 defer func() { 251 atomic.StoreUint64(&l.haKeeperReplicaID, 0) 252 }() 253 } 254 return l.nh.StopReplica(shardID, replicaID) 255 } 256 257 func (l *store) requestLeaderTransfer(shardID uint64, targetReplicaID uint64) error { 258 return l.nh.RequestLeaderTransfer(shardID, targetReplicaID) 259 } 260 261 func (l *store) addReplica(shardID uint64, replicaID uint64, 262 target dragonboat.Target, cci uint64) error { 263 // Set timeout to a little bigger value to prevent Timeout Error and 264 // returns a dragonboat.ErrRejected at last, in which case, it will take 265 // longer time to finish this operation. 266 ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) 267 defer cancel() 268 count := 0 269 for { 270 count++ 271 if err := l.nh.SyncRequestAddReplica(ctx, shardID, replicaID, target, cci); err != nil { 272 if errors.Is(err, dragonboat.ErrShardNotReady) { 273 l.retryWait() 274 continue 275 } 276 if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 { 277 return dragonboat.ErrTimeout 278 } 279 return err 280 } 281 return nil 282 } 283 } 284 285 func (l *store) removeReplica(shardID uint64, replicaID uint64, cci uint64) error { 286 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 287 defer cancel() 288 count := 0 289 for { 290 count++ 291 if err := l.nh.SyncRequestDeleteReplica(ctx, shardID, replicaID, cci); err != nil { 292 if errors.Is(err, dragonboat.ErrShardNotReady) { 293 l.retryWait() 294 continue 295 } 296 // FIXME: internally handle dragonboat.ErrTimeoutTooSmall 297 if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 { 298 return dragonboat.ErrTimeout 299 } 300 return err 301 } 302 l.removeMetadata(shardID, replicaID) 303 return nil 304 } 305 } 306 307 func (l *store) retryWait() { 308 if l.nh.NodeHostConfig().RTTMillisecond == 1 { 309 time.Sleep(time.Millisecond) 310 } 311 time.Sleep(time.Duration(l.nh.NodeHostConfig().RTTMillisecond/2) * time.Millisecond) 312 } 313 314 func (l *store) propose(ctx context.Context, 315 session *cli.Session, cmd []byte) (sm.Result, error) { 316 count := 0 317 for { 318 count++ 319 result, err := l.nh.SyncPropose(ctx, session, cmd) 320 if err != nil { 321 if errors.Is(err, dragonboat.ErrShardNotReady) || 322 errors.Is(err, dragonboat.ErrSystemBusy) { 323 l.retryWait() 324 continue 325 } 326 if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 { 327 return sm.Result{}, dragonboat.ErrTimeout 328 } 329 return sm.Result{}, err 330 } 331 return result, nil 332 } 333 } 334 335 func (l *store) read(ctx context.Context, 336 shardID uint64, query interface{}) (interface{}, error) { 337 count := 0 338 for { 339 count++ 340 result, err := l.nh.SyncRead(ctx, shardID, query) 341 if err != nil { 342 if errors.Is(err, dragonboat.ErrShardNotReady) { 343 l.retryWait() 344 continue 345 } 346 if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 { 347 return nil, dragonboat.ErrTimeout 348 } 349 return nil, err 350 } 351 return result, nil 352 } 353 } 354 355 func (l *store) getOrExtendDNLease(ctx context.Context, 356 shardID uint64, dnID uint64) error { 357 session := l.nh.GetNoOPSession(shardID) 358 cmd := getSetLeaseHolderCmd(dnID) 359 _, err := l.propose(ctx, session, cmd) 360 return err 361 } 362 363 func (l *store) truncateLog(ctx context.Context, 364 shardID uint64, index Lsn) error { 365 session := l.nh.GetNoOPSession(shardID) 366 cmd := getSetTruncatedLsnCmd(index) 367 result, err := l.propose(ctx, session, cmd) 368 if err != nil { 369 l.runtime.Logger().Error("propose truncate log cmd failed", zap.Error(err)) 370 return err 371 } 372 if result.Value > 0 { 373 l.runtime.Logger().Error(fmt.Sprintf("shardID %d already truncated to index %d", shardID, result.Value)) 374 return moerr.NewInvalidTruncateLsn(ctx, shardID, result.Value) 375 } 376 return nil 377 } 378 379 func (l *store) append(ctx context.Context, 380 shardID uint64, cmd []byte) (Lsn, error) { 381 session := l.nh.GetNoOPSession(shardID) 382 result, err := l.propose(ctx, session, cmd) 383 if err != nil { 384 l.runtime.Logger().Error("propose failed", zap.Error(err)) 385 return 0, err 386 } 387 if len(result.Data) > 0 { 388 l.runtime.Logger().Error("not current lease holder", zap.Uint64("data", binaryEnc.Uint64(result.Data))) 389 return 0, moerr.NewNotLeaseHolder(ctx, binaryEnc.Uint64(result.Data)) 390 } 391 if result.Value == 0 { 392 panic(moerr.NewInvalidState(ctx, "unexpected Lsn value")) 393 } 394 return result.Value, nil 395 } 396 397 func (l *store) getTruncatedLsn(ctx context.Context, 398 shardID uint64) (uint64, error) { 399 v, err := l.read(ctx, shardID, truncatedLsnQuery{}) 400 if err != nil { 401 return 0, err 402 } 403 return v.(uint64), nil 404 } 405 406 func (l *store) tsoUpdate(ctx context.Context, count uint64) (uint64, error) { 407 cmd := getTsoUpdateCmd(count) 408 session := l.nh.GetNoOPSession(firstLogShardID) 409 result, err := l.propose(ctx, session, cmd) 410 if err != nil { 411 l.runtime.Logger().Error("failed to propose tso updat", zap.Error(err)) 412 return 0, err 413 } 414 return result.Value, nil 415 } 416 417 func handleNotHAKeeperError(ctx context.Context, err error) error { 418 if err == nil { 419 return err 420 } 421 if errors.Is(err, dragonboat.ErrShardNotFound) { 422 return moerr.NewNoHAKeeper(ctx) 423 } 424 return err 425 } 426 427 func (l *store) addLogStoreHeartbeat(ctx context.Context, 428 hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) { 429 data := MustMarshal(&hb) 430 cmd := hakeeper.GetLogStoreHeartbeatCmd(data) 431 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 432 if result, err := l.propose(ctx, session, cmd); err != nil { 433 l.runtime.Logger().Error("propose failed", zap.Error(err)) 434 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 435 } else { 436 var cb pb.CommandBatch 437 MustUnmarshal(&cb, result.Data) 438 return cb, nil 439 } 440 } 441 442 func (l *store) addCNStoreHeartbeat(ctx context.Context, 443 hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) { 444 data := MustMarshal(&hb) 445 cmd := hakeeper.GetCNStoreHeartbeatCmd(data) 446 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 447 if result, err := l.propose(ctx, session, cmd); err != nil { 448 l.runtime.Logger().Error("propose failed", zap.Error(err)) 449 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 450 } else { 451 var cb pb.CommandBatch 452 MustUnmarshal(&cb, result.Data) 453 return cb, nil 454 } 455 } 456 457 func (l *store) cnAllocateID(ctx context.Context, 458 req pb.CNAllocateID) (uint64, error) { 459 cmd := hakeeper.GetGetIDCmd(req.Batch) 460 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 461 result, err := l.propose(ctx, session, cmd) 462 if err != nil { 463 l.runtime.Logger().Error("propose get id failed", zap.Error(err)) 464 return 0, err 465 } 466 return result.Value, nil 467 } 468 469 func (l *store) addDNStoreHeartbeat(ctx context.Context, 470 hb pb.DNStoreHeartbeat) (pb.CommandBatch, error) { 471 data := MustMarshal(&hb) 472 cmd := hakeeper.GetDNStoreHeartbeatCmd(data) 473 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 474 if result, err := l.propose(ctx, session, cmd); err != nil { 475 l.runtime.Logger().Error("propose failed", zap.Error(err)) 476 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 477 } else { 478 var cb pb.CommandBatch 479 MustUnmarshal(&cb, result.Data) 480 return cb, nil 481 } 482 } 483 484 func (l *store) getCommandBatch(ctx context.Context, 485 uuid string) (pb.CommandBatch, error) { 486 v, err := l.read(ctx, 487 hakeeper.DefaultHAKeeperShardID, &hakeeper.ScheduleCommandQuery{UUID: uuid}) 488 if err != nil { 489 return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err) 490 } 491 return *(v.(*pb.CommandBatch)), nil 492 } 493 494 func (l *store) getClusterDetails(ctx context.Context) (pb.ClusterDetails, error) { 495 v, err := l.read(ctx, 496 hakeeper.DefaultHAKeeperShardID, &hakeeper.ClusterDetailsQuery{Cfg: l.cfg.GetHAKeeperConfig()}) 497 if err != nil { 498 return pb.ClusterDetails{}, handleNotHAKeeperError(ctx, err) 499 } 500 return *(v.(*pb.ClusterDetails)), nil 501 } 502 503 func (l *store) addScheduleCommands(ctx context.Context, 504 term uint64, cmds []pb.ScheduleCommand) error { 505 cmd := hakeeper.GetUpdateCommandsCmd(term, cmds) 506 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 507 if _, err := l.propose(ctx, session, cmd); err != nil { 508 return handleNotHAKeeperError(ctx, err) 509 } 510 return nil 511 } 512 513 func (l *store) getLeaseHolderID(ctx context.Context, 514 shardID uint64, entries []raftpb.Entry) (uint64, error) { 515 if len(entries) == 0 { 516 panic("empty entries") 517 } 518 // first entry is an update lease cmd 519 e := entries[0] 520 if !isRaftInternalEntry(e) && isSetLeaseHolderUpdate(l.decodeCmd(ctx, e)) { 521 return parseLeaseHolderID(l.decodeCmd(ctx, e)), nil 522 } 523 v, err := l.read(ctx, shardID, leaseHistoryQuery{lsn: e.Index}) 524 if err != nil { 525 l.runtime.Logger().Error("failed to read", zap.Error(err)) 526 return 0, err 527 } 528 return v.(uint64), nil 529 } 530 531 func (l *store) decodeCmd(ctx context.Context, e raftpb.Entry) []byte { 532 if e.Type == raftpb.ApplicationEntry { 533 panic(moerr.NewInvalidState(ctx, "unexpected entry type")) 534 } 535 if e.Type == raftpb.EncodedEntry { 536 if e.Cmd[0] != 0 { 537 panic(moerr.NewInvalidState(ctx, "unexpected cmd header")) 538 } 539 return e.Cmd[1:] 540 } 541 panic(moerr.NewInvalidState(ctx, "invalid cmd")) 542 } 543 544 func isRaftInternalEntry(e raftpb.Entry) bool { 545 if len(e.Cmd) == 0 { 546 return true 547 } 548 return e.Type == raftpb.ConfigChangeEntry || e.Type == raftpb.MetadataEntry 549 } 550 551 func (l *store) markEntries(ctx context.Context, 552 shardID uint64, entries []raftpb.Entry) ([]pb.LogRecord, error) { 553 if len(entries) == 0 { 554 return []pb.LogRecord{}, nil 555 } 556 leaseHolderID, err := l.getLeaseHolderID(ctx, shardID, entries) 557 if err != nil { 558 return nil, err 559 } 560 result := make([]pb.LogRecord, 0) 561 for _, e := range entries { 562 if isRaftInternalEntry(e) { 563 // raft internal stuff 564 result = append(result, LogRecord{ 565 Type: pb.Internal, 566 Lsn: e.Index, 567 }) 568 continue 569 } 570 cmd := l.decodeCmd(ctx, e) 571 if isSetLeaseHolderUpdate(cmd) { 572 leaseHolderID = parseLeaseHolderID(cmd) 573 result = append(result, LogRecord{ 574 Type: pb.LeaseUpdate, 575 Lsn: e.Index, 576 }) 577 continue 578 } 579 if isUserUpdate(cmd) { 580 if parseLeaseHolderID(cmd) != leaseHolderID { 581 // lease not match, skip 582 result = append(result, LogRecord{ 583 Type: pb.LeaseRejected, 584 Lsn: e.Index, 585 }) 586 continue 587 } 588 result = append(result, LogRecord{ 589 Data: cmd, 590 Type: pb.UserRecord, 591 Lsn: e.Index, 592 }) 593 } 594 } 595 return result, nil 596 } 597 598 func getNextIndex(entries []raftpb.Entry, firstIndex Lsn, lastIndex Lsn) Lsn { 599 if len(entries) == 0 { 600 return firstIndex 601 } 602 lastResultIndex := entries[len(entries)-1].Index 603 if lastResultIndex+1 < lastIndex { 604 return lastResultIndex + 1 605 } 606 return firstIndex 607 } 608 609 // high priority test 610 // FIXME: add a test that queries the log with LeaseUpdate, LeaseRejected 611 // entries, no matter what is the firstLsn specified in queryLog(), returned 612 // results should make sense 613 func (l *store) queryLog(ctx context.Context, shardID uint64, 614 firstIndex Lsn, maxSize uint64) ([]LogRecord, Lsn, error) { 615 v, err := l.read(ctx, shardID, indexQuery{}) 616 if err != nil { 617 return nil, 0, err 618 } 619 lastIndex := v.(uint64) 620 // FIXME: check whether lastIndex >= firstIndex 621 rs, err := l.nh.QueryRaftLog(shardID, firstIndex, lastIndex+1, maxSize) 622 if err != nil { 623 l.runtime.Logger().Error("QueryRaftLog failed", zap.Error(err)) 624 return nil, 0, err 625 } 626 select { 627 case v := <-rs.ResultC(): 628 if v.Completed() { 629 entries, logRange := v.RaftLogs() 630 next := getNextIndex(entries, firstIndex, logRange.LastIndex) 631 results, err := l.markEntries(ctx, shardID, entries) 632 if err != nil { 633 l.runtime.Logger().Error("markEntries failed", zap.Error(err)) 634 return nil, 0, err 635 } 636 return results, next, nil 637 } else if v.RequestOutOfRange() { 638 // FIXME: add more details to the log, what is the available range 639 l.runtime.Logger().Error("OutOfRange query found") 640 return nil, 0, dragonboat.ErrInvalidRange 641 } 642 panic(moerr.NewInvalidState(ctx, "unexpected rs state")) 643 case <-ctx.Done(): 644 return nil, 0, ctx.Err() 645 } 646 } 647 648 func (l *store) ticker(ctx context.Context) { 649 if l.cfg.HAKeeperTickInterval.Duration == 0 { 650 panic("invalid HAKeeperTickInterval") 651 } 652 l.runtime.Logger().Info("Hakeeper interval configs", 653 zap.Int64("HAKeeperTickInterval", int64(l.cfg.HAKeeperTickInterval.Duration)), 654 zap.Int64("HAKeeperCheckInterval", int64(l.cfg.HAKeeperCheckInterval.Duration))) 655 ticker := time.NewTicker(l.cfg.HAKeeperTickInterval.Duration) 656 defer ticker.Stop() 657 if l.cfg.HAKeeperCheckInterval.Duration == 0 { 658 panic("invalid HAKeeperCheckInterval") 659 } 660 defer func() { 661 l.runtime.Logger().Info("HAKeeper ticker stopped") 662 }() 663 haTicker := time.NewTicker(l.cfg.HAKeeperCheckInterval.Duration) 664 defer haTicker.Stop() 665 666 for { 667 select { 668 case <-ticker.C: 669 l.hakeeperTick() 670 case <-haTicker.C: 671 l.hakeeperCheck() 672 case <-ctx.Done(): 673 return 674 } 675 676 select { 677 case <-ctx.Done(): 678 return 679 default: 680 } 681 } 682 } 683 684 func (l *store) isLeaderHAKeeper() (bool, uint64, error) { 685 leaderID, term, ok, err := l.nh.GetLeaderID(hakeeper.DefaultHAKeeperShardID) 686 if err != nil { 687 return false, 0, err 688 } 689 replicaID := atomic.LoadUint64(&l.haKeeperReplicaID) 690 return ok && replicaID != 0 && leaderID == replicaID, term, nil 691 } 692 693 // TODO: add test for this 694 func (l *store) hakeeperTick() { 695 isLeader, _, err := l.isLeaderHAKeeper() 696 if err != nil { 697 l.runtime.Logger().Error("failed to get HAKeeper Leader ID", zap.Error(err)) 698 return 699 } 700 701 if isLeader { 702 cmd := hakeeper.GetTickCmd() 703 ctx, cancel := context.WithTimeout(context.Background(), hakeeperDefaultTimeout) 704 defer cancel() 705 session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID) 706 if _, err := l.propose(ctx, session, cmd); err != nil { 707 l.runtime.Logger().Error("propose tick failed", zap.Error(err)) 708 return 709 } 710 } 711 } 712 713 func (l *store) getHeartbeatMessage() pb.LogStoreHeartbeat { 714 m := pb.LogStoreHeartbeat{ 715 UUID: l.id(), 716 RaftAddress: l.cfg.RaftAddress, 717 ServiceAddress: l.cfg.ServiceAddress, 718 GossipAddress: l.cfg.GossipAddress, 719 Replicas: make([]pb.LogReplicaInfo, 0), 720 } 721 opts := dragonboat.NodeHostInfoOption{ 722 SkipLogInfo: true, 723 } 724 nhi := l.nh.GetNodeHostInfo(opts) 725 for _, ci := range nhi.ShardInfoList { 726 if ci.Pending { 727 l.runtime.Logger().Info(fmt.Sprintf("shard %d is pending, not included into the heartbeat", 728 ci.ShardID)) 729 continue 730 } 731 if ci.ConfigChangeIndex == 0 { 732 panic("ci.ConfigChangeIndex is 0") 733 } 734 replicaInfo := pb.LogReplicaInfo{ 735 LogShardInfo: pb.LogShardInfo{ 736 ShardID: ci.ShardID, 737 Replicas: ci.Nodes, 738 Epoch: ci.ConfigChangeIndex, 739 LeaderID: ci.LeaderID, 740 Term: ci.Term, 741 }, 742 ReplicaID: ci.ReplicaID, 743 } 744 // FIXME: why we need this? 745 if replicaInfo.Replicas == nil { 746 replicaInfo.Replicas = make(map[uint64]dragonboat.Target) 747 } 748 m.Replicas = append(m.Replicas, replicaInfo) 749 } 750 return m 751 }