github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/store.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/cockroachdb/errors"
    25  	"github.com/lni/dragonboat/v4"
    26  	cli "github.com/lni/dragonboat/v4/client"
    27  	"github.com/lni/dragonboat/v4/config"
    28  	"github.com/lni/dragonboat/v4/plugin/tan"
    29  	"github.com/lni/dragonboat/v4/plugin/tee"
    30  	"github.com/lni/dragonboat/v4/raftpb"
    31  	sm "github.com/lni/dragonboat/v4/statemachine"
    32  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    33  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    34  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    35  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    36  	"github.com/matrixorigin/matrixone/pkg/hakeeper/bootstrap"
    37  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers"
    38  	"github.com/matrixorigin/matrixone/pkg/hakeeper/task"
    39  	"github.com/matrixorigin/matrixone/pkg/logutil"
    40  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    41  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    42  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    43  	"go.uber.org/zap"
    44  )
    45  
    46  type storeMeta struct {
    47  	serviceAddress string
    48  }
    49  
    50  func (l *storeMeta) marshal() []byte {
    51  	return []byte(l.serviceAddress)
    52  }
    53  
    54  func (l *storeMeta) unmarshal(data []byte) {
    55  	l.serviceAddress = string(data)
    56  }
    57  
    58  func isUserUpdate(cmd []byte) bool {
    59  	return parseCmdTag(cmd) == pb.UserEntryUpdate
    60  }
    61  
    62  func isSetLeaseHolderUpdate(cmd []byte) bool {
    63  	return parseCmdTag(cmd) == pb.LeaseHolderIDUpdate
    64  }
    65  
    66  func getNodeHostConfig(cfg Config) config.NodeHostConfig {
    67  	meta := storeMeta{
    68  		serviceAddress: cfg.ServiceAddress,
    69  	}
    70  	if cfg.GossipProbeInterval.Duration == 0 {
    71  		panic("cfg.GossipProbeInterval.Duration is 0")
    72  	}
    73  	logdb := config.GetTinyMemLogDBConfig()
    74  	logdb.KVWriteBufferSize = cfg.LogDBBufferSize
    75  	logdbFactory := (config.LogDBFactory)(nil)
    76  	logdbFactory = tan.Factory
    77  	if cfg.UseTeeLogDB {
    78  		logutil.Warn("using tee based logdb backed by pebble and tan, for testing purposes only")
    79  		logdbFactory = tee.TanPebbleLogDBFactory
    80  	}
    81  	return config.NodeHostConfig{
    82  		DeploymentID:        cfg.DeploymentID,
    83  		NodeHostID:          cfg.UUID,
    84  		NodeHostDir:         cfg.DataDir,
    85  		RTTMillisecond:      cfg.RTTMillisecond,
    86  		AddressByNodeHostID: true,
    87  		RaftAddress:         cfg.RaftAddress,
    88  		ListenAddress:       cfg.RaftListenAddress,
    89  		Expert: config.ExpertConfig{
    90  			FS:           cfg.FS,
    91  			LogDBFactory: logdbFactory,
    92  			// FIXME: dragonboat need to be updated to make this field a first class
    93  			// citizen
    94  			TestGossipProbeInterval: cfg.GossipProbeInterval.Duration,
    95  			LogDB:                   logdb,
    96  		},
    97  		Gossip: config.GossipConfig{
    98  			BindAddress:      cfg.GossipListenAddress,
    99  			AdvertiseAddress: cfg.GossipAddress,
   100  			Seed:             cfg.GossipSeedAddresses,
   101  			Meta:             meta.marshal(),
   102  			CanUseSelfAsSeed: cfg.GossipAllowSelfAsSeed,
   103  		},
   104  	}
   105  }
   106  
   107  func getRaftConfig(shardID uint64, replicaID uint64) config.Config {
   108  	return config.Config{
   109  		ShardID:             shardID,
   110  		ReplicaID:           replicaID,
   111  		CheckQuorum:         true,
   112  		PreVote:             true,
   113  		ElectionRTT:         10,
   114  		HeartbeatRTT:        1,
   115  		OrderedConfigChange: true,
   116  	}
   117  }
   118  
   119  // store manages log shards including the HAKeeper shard on each node.
   120  type store struct {
   121  	cfg               Config
   122  	nh                *dragonboat.NodeHost
   123  	haKeeperReplicaID uint64
   124  	checker           hakeeper.Checker
   125  	alloc             hakeeper.IDAllocator
   126  	stopper           *stopper.Stopper
   127  	tickerStopper     *stopper.Stopper
   128  	runtime           runtime.Runtime
   129  
   130  	bootstrapCheckCycles uint64
   131  	bootstrapMgr         *bootstrap.Manager
   132  
   133  	taskScheduler hakeeper.TaskScheduler
   134  
   135  	mu struct {
   136  		sync.Mutex
   137  		metadata metadata.LogStore
   138  	}
   139  	shardSnapshotInfo shardSnapshotInfo
   140  	snapshotMgr       *snapshotManager
   141  }
   142  
   143  func newLogStore(cfg Config,
   144  	taskServiceGetter func() taskservice.TaskService,
   145  	rt runtime.Runtime) (*store, error) {
   146  	nh, err := dragonboat.NewNodeHost(getNodeHostConfig(cfg))
   147  	if err != nil {
   148  		return nil, err
   149  	}
   150  	hakeeperConfig := cfg.GetHAKeeperConfig()
   151  	rt.SubLogger(runtime.SystemInit).Info("HAKeeper Timeout Configs",
   152  		zap.Int64("LogStoreTimeout", int64(hakeeperConfig.LogStoreTimeout)),
   153  		zap.Int64("DNStoreTimeout", int64(hakeeperConfig.DNStoreTimeout)),
   154  		zap.Int64("CNStoreTimeout", int64(hakeeperConfig.CNStoreTimeout)),
   155  	)
   156  	ls := &store{
   157  		cfg:           cfg,
   158  		nh:            nh,
   159  		checker:       checkers.NewCoordinator(hakeeperConfig),
   160  		taskScheduler: task.NewScheduler(taskServiceGetter, hakeeperConfig),
   161  		alloc:         newIDAllocator(),
   162  		stopper:       stopper.NewStopper("log-store"),
   163  		tickerStopper: stopper.NewStopper("hakeeper-ticker"),
   164  		runtime:       rt,
   165  
   166  		shardSnapshotInfo: newShardSnapshotInfo(),
   167  		snapshotMgr:       newSnapshotManager(&cfg),
   168  	}
   169  	ls.mu.metadata = metadata.LogStore{UUID: cfg.UUID}
   170  	if err := ls.stopper.RunNamedTask("truncation-worker", func(ctx context.Context) {
   171  		rt.SubLogger(runtime.SystemInit).Info("logservice truncation worker started")
   172  		ls.truncationWorker(ctx)
   173  	}); err != nil {
   174  		return nil, err
   175  	}
   176  	return ls, nil
   177  }
   178  
   179  func (l *store) close() error {
   180  	l.tickerStopper.Stop()
   181  	l.stopper.Stop()
   182  	if l.nh != nil {
   183  		l.nh.Close()
   184  	}
   185  	return nil
   186  }
   187  
   188  func (l *store) id() string {
   189  	return l.nh.ID()
   190  }
   191  
   192  func (l *store) startReplicas() error {
   193  	l.mu.Lock()
   194  	shards := make([]metadata.LogShard, 0)
   195  	shards = append(shards, l.mu.metadata.Shards...)
   196  	l.mu.Unlock()
   197  
   198  	for _, rec := range shards {
   199  		if rec.ShardID == hakeeper.DefaultHAKeeperShardID {
   200  			if err := l.startHAKeeperReplica(rec.ReplicaID, nil, false); err != nil {
   201  				return err
   202  			}
   203  		} else {
   204  			if err := l.startReplica(rec.ShardID, rec.ReplicaID, nil, false); err != nil {
   205  				return err
   206  			}
   207  		}
   208  	}
   209  	return nil
   210  }
   211  
   212  func (l *store) startHAKeeperReplica(replicaID uint64,
   213  	initialReplicas map[uint64]dragonboat.Target, join bool) error {
   214  	raftConfig := getRaftConfig(hakeeper.DefaultHAKeeperShardID, replicaID)
   215  	if err := l.nh.StartReplica(initialReplicas,
   216  		join, hakeeper.NewStateMachine, raftConfig); err != nil {
   217  		return err
   218  	}
   219  	l.addMetadata(hakeeper.DefaultHAKeeperShardID, replicaID)
   220  	atomic.StoreUint64(&l.haKeeperReplicaID, replicaID)
   221  	if !l.cfg.DisableWorkers {
   222  		if err := l.tickerStopper.RunNamedTask("hakeeper-ticker", func(ctx context.Context) {
   223  			l.runtime.SubLogger(runtime.SystemInit).Info("HAKeeper ticker started")
   224  			l.ticker(ctx)
   225  		}); err != nil {
   226  			return err
   227  		}
   228  	}
   229  	return nil
   230  }
   231  
   232  func (l *store) startReplica(shardID uint64, replicaID uint64,
   233  	initialReplicas map[uint64]dragonboat.Target, join bool) error {
   234  	if shardID == hakeeper.DefaultHAKeeperShardID {
   235  		return moerr.NewInvalidInputNoCtx("shardID %d does not match DefaultHAKeeperShardID %d", shardID, hakeeper.DefaultHAKeeperShardID)
   236  	}
   237  	cfg := getRaftConfig(shardID, replicaID)
   238  	if err := l.snapshotMgr.Init(shardID, replicaID); err != nil {
   239  		panic(err)
   240  	}
   241  	if err := l.nh.StartReplica(initialReplicas, join, newStateMachine, cfg); err != nil {
   242  		return err
   243  	}
   244  	l.addMetadata(shardID, replicaID)
   245  	return nil
   246  }
   247  
   248  func (l *store) stopReplica(shardID uint64, replicaID uint64) error {
   249  	if shardID == hakeeper.DefaultHAKeeperShardID {
   250  		defer func() {
   251  			atomic.StoreUint64(&l.haKeeperReplicaID, 0)
   252  		}()
   253  	}
   254  	return l.nh.StopReplica(shardID, replicaID)
   255  }
   256  
   257  func (l *store) requestLeaderTransfer(shardID uint64, targetReplicaID uint64) error {
   258  	return l.nh.RequestLeaderTransfer(shardID, targetReplicaID)
   259  }
   260  
   261  func (l *store) addReplica(shardID uint64, replicaID uint64,
   262  	target dragonboat.Target, cci uint64) error {
   263  	// Set timeout to a little bigger value to prevent Timeout Error and
   264  	// returns a dragonboat.ErrRejected at last, in which case, it will take
   265  	// longer time to finish this operation.
   266  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
   267  	defer cancel()
   268  	count := 0
   269  	for {
   270  		count++
   271  		if err := l.nh.SyncRequestAddReplica(ctx, shardID, replicaID, target, cci); err != nil {
   272  			if errors.Is(err, dragonboat.ErrShardNotReady) {
   273  				l.retryWait()
   274  				continue
   275  			}
   276  			if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 {
   277  				return dragonboat.ErrTimeout
   278  			}
   279  			return err
   280  		}
   281  		return nil
   282  	}
   283  }
   284  
   285  func (l *store) removeReplica(shardID uint64, replicaID uint64, cci uint64) error {
   286  	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   287  	defer cancel()
   288  	count := 0
   289  	for {
   290  		count++
   291  		if err := l.nh.SyncRequestDeleteReplica(ctx, shardID, replicaID, cci); err != nil {
   292  			if errors.Is(err, dragonboat.ErrShardNotReady) {
   293  				l.retryWait()
   294  				continue
   295  			}
   296  			// FIXME: internally handle dragonboat.ErrTimeoutTooSmall
   297  			if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 {
   298  				return dragonboat.ErrTimeout
   299  			}
   300  			return err
   301  		}
   302  		l.removeMetadata(shardID, replicaID)
   303  		return nil
   304  	}
   305  }
   306  
   307  func (l *store) retryWait() {
   308  	if l.nh.NodeHostConfig().RTTMillisecond == 1 {
   309  		time.Sleep(time.Millisecond)
   310  	}
   311  	time.Sleep(time.Duration(l.nh.NodeHostConfig().RTTMillisecond/2) * time.Millisecond)
   312  }
   313  
   314  func (l *store) propose(ctx context.Context,
   315  	session *cli.Session, cmd []byte) (sm.Result, error) {
   316  	count := 0
   317  	for {
   318  		count++
   319  		result, err := l.nh.SyncPropose(ctx, session, cmd)
   320  		if err != nil {
   321  			if errors.Is(err, dragonboat.ErrShardNotReady) ||
   322  				errors.Is(err, dragonboat.ErrSystemBusy) {
   323  				l.retryWait()
   324  				continue
   325  			}
   326  			if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 {
   327  				return sm.Result{}, dragonboat.ErrTimeout
   328  			}
   329  			return sm.Result{}, err
   330  		}
   331  		return result, nil
   332  	}
   333  }
   334  
   335  func (l *store) read(ctx context.Context,
   336  	shardID uint64, query interface{}) (interface{}, error) {
   337  	count := 0
   338  	for {
   339  		count++
   340  		result, err := l.nh.SyncRead(ctx, shardID, query)
   341  		if err != nil {
   342  			if errors.Is(err, dragonboat.ErrShardNotReady) {
   343  				l.retryWait()
   344  				continue
   345  			}
   346  			if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 {
   347  				return nil, dragonboat.ErrTimeout
   348  			}
   349  			return nil, err
   350  		}
   351  		return result, nil
   352  	}
   353  }
   354  
   355  func (l *store) getOrExtendDNLease(ctx context.Context,
   356  	shardID uint64, dnID uint64) error {
   357  	session := l.nh.GetNoOPSession(shardID)
   358  	cmd := getSetLeaseHolderCmd(dnID)
   359  	_, err := l.propose(ctx, session, cmd)
   360  	return err
   361  }
   362  
   363  func (l *store) truncateLog(ctx context.Context,
   364  	shardID uint64, index Lsn) error {
   365  	session := l.nh.GetNoOPSession(shardID)
   366  	cmd := getSetTruncatedLsnCmd(index)
   367  	result, err := l.propose(ctx, session, cmd)
   368  	if err != nil {
   369  		l.runtime.Logger().Error("propose truncate log cmd failed", zap.Error(err))
   370  		return err
   371  	}
   372  	if result.Value > 0 {
   373  		l.runtime.Logger().Error(fmt.Sprintf("shardID %d already truncated to index %d", shardID, result.Value))
   374  		return moerr.NewInvalidTruncateLsn(ctx, shardID, result.Value)
   375  	}
   376  	return nil
   377  }
   378  
   379  func (l *store) append(ctx context.Context,
   380  	shardID uint64, cmd []byte) (Lsn, error) {
   381  	session := l.nh.GetNoOPSession(shardID)
   382  	result, err := l.propose(ctx, session, cmd)
   383  	if err != nil {
   384  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   385  		return 0, err
   386  	}
   387  	if len(result.Data) > 0 {
   388  		l.runtime.Logger().Error("not current lease holder", zap.Uint64("data", binaryEnc.Uint64(result.Data)))
   389  		return 0, moerr.NewNotLeaseHolder(ctx, binaryEnc.Uint64(result.Data))
   390  	}
   391  	if result.Value == 0 {
   392  		panic(moerr.NewInvalidState(ctx, "unexpected Lsn value"))
   393  	}
   394  	return result.Value, nil
   395  }
   396  
   397  func (l *store) getTruncatedLsn(ctx context.Context,
   398  	shardID uint64) (uint64, error) {
   399  	v, err := l.read(ctx, shardID, truncatedLsnQuery{})
   400  	if err != nil {
   401  		return 0, err
   402  	}
   403  	return v.(uint64), nil
   404  }
   405  
   406  func (l *store) tsoUpdate(ctx context.Context, count uint64) (uint64, error) {
   407  	cmd := getTsoUpdateCmd(count)
   408  	session := l.nh.GetNoOPSession(firstLogShardID)
   409  	result, err := l.propose(ctx, session, cmd)
   410  	if err != nil {
   411  		l.runtime.Logger().Error("failed to propose tso updat", zap.Error(err))
   412  		return 0, err
   413  	}
   414  	return result.Value, nil
   415  }
   416  
   417  func handleNotHAKeeperError(ctx context.Context, err error) error {
   418  	if err == nil {
   419  		return err
   420  	}
   421  	if errors.Is(err, dragonboat.ErrShardNotFound) {
   422  		return moerr.NewNoHAKeeper(ctx)
   423  	}
   424  	return err
   425  }
   426  
   427  func (l *store) addLogStoreHeartbeat(ctx context.Context,
   428  	hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) {
   429  	data := MustMarshal(&hb)
   430  	cmd := hakeeper.GetLogStoreHeartbeatCmd(data)
   431  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   432  	if result, err := l.propose(ctx, session, cmd); err != nil {
   433  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   434  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   435  	} else {
   436  		var cb pb.CommandBatch
   437  		MustUnmarshal(&cb, result.Data)
   438  		return cb, nil
   439  	}
   440  }
   441  
   442  func (l *store) addCNStoreHeartbeat(ctx context.Context,
   443  	hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) {
   444  	data := MustMarshal(&hb)
   445  	cmd := hakeeper.GetCNStoreHeartbeatCmd(data)
   446  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   447  	if result, err := l.propose(ctx, session, cmd); err != nil {
   448  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   449  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   450  	} else {
   451  		var cb pb.CommandBatch
   452  		MustUnmarshal(&cb, result.Data)
   453  		return cb, nil
   454  	}
   455  }
   456  
   457  func (l *store) cnAllocateID(ctx context.Context,
   458  	req pb.CNAllocateID) (uint64, error) {
   459  	cmd := hakeeper.GetGetIDCmd(req.Batch)
   460  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   461  	result, err := l.propose(ctx, session, cmd)
   462  	if err != nil {
   463  		l.runtime.Logger().Error("propose get id failed", zap.Error(err))
   464  		return 0, err
   465  	}
   466  	return result.Value, nil
   467  }
   468  
   469  func (l *store) addDNStoreHeartbeat(ctx context.Context,
   470  	hb pb.DNStoreHeartbeat) (pb.CommandBatch, error) {
   471  	data := MustMarshal(&hb)
   472  	cmd := hakeeper.GetDNStoreHeartbeatCmd(data)
   473  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   474  	if result, err := l.propose(ctx, session, cmd); err != nil {
   475  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   476  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   477  	} else {
   478  		var cb pb.CommandBatch
   479  		MustUnmarshal(&cb, result.Data)
   480  		return cb, nil
   481  	}
   482  }
   483  
   484  func (l *store) getCommandBatch(ctx context.Context,
   485  	uuid string) (pb.CommandBatch, error) {
   486  	v, err := l.read(ctx,
   487  		hakeeper.DefaultHAKeeperShardID, &hakeeper.ScheduleCommandQuery{UUID: uuid})
   488  	if err != nil {
   489  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   490  	}
   491  	return *(v.(*pb.CommandBatch)), nil
   492  }
   493  
   494  func (l *store) getClusterDetails(ctx context.Context) (pb.ClusterDetails, error) {
   495  	v, err := l.read(ctx,
   496  		hakeeper.DefaultHAKeeperShardID, &hakeeper.ClusterDetailsQuery{Cfg: l.cfg.GetHAKeeperConfig()})
   497  	if err != nil {
   498  		return pb.ClusterDetails{}, handleNotHAKeeperError(ctx, err)
   499  	}
   500  	return *(v.(*pb.ClusterDetails)), nil
   501  }
   502  
   503  func (l *store) addScheduleCommands(ctx context.Context,
   504  	term uint64, cmds []pb.ScheduleCommand) error {
   505  	cmd := hakeeper.GetUpdateCommandsCmd(term, cmds)
   506  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   507  	if _, err := l.propose(ctx, session, cmd); err != nil {
   508  		return handleNotHAKeeperError(ctx, err)
   509  	}
   510  	return nil
   511  }
   512  
   513  func (l *store) getLeaseHolderID(ctx context.Context,
   514  	shardID uint64, entries []raftpb.Entry) (uint64, error) {
   515  	if len(entries) == 0 {
   516  		panic("empty entries")
   517  	}
   518  	// first entry is an update lease cmd
   519  	e := entries[0]
   520  	if !isRaftInternalEntry(e) && isSetLeaseHolderUpdate(l.decodeCmd(ctx, e)) {
   521  		return parseLeaseHolderID(l.decodeCmd(ctx, e)), nil
   522  	}
   523  	v, err := l.read(ctx, shardID, leaseHistoryQuery{lsn: e.Index})
   524  	if err != nil {
   525  		l.runtime.Logger().Error("failed to read", zap.Error(err))
   526  		return 0, err
   527  	}
   528  	return v.(uint64), nil
   529  }
   530  
   531  func (l *store) decodeCmd(ctx context.Context, e raftpb.Entry) []byte {
   532  	if e.Type == raftpb.ApplicationEntry {
   533  		panic(moerr.NewInvalidState(ctx, "unexpected entry type"))
   534  	}
   535  	if e.Type == raftpb.EncodedEntry {
   536  		if e.Cmd[0] != 0 {
   537  			panic(moerr.NewInvalidState(ctx, "unexpected cmd header"))
   538  		}
   539  		return e.Cmd[1:]
   540  	}
   541  	panic(moerr.NewInvalidState(ctx, "invalid cmd"))
   542  }
   543  
   544  func isRaftInternalEntry(e raftpb.Entry) bool {
   545  	if len(e.Cmd) == 0 {
   546  		return true
   547  	}
   548  	return e.Type == raftpb.ConfigChangeEntry || e.Type == raftpb.MetadataEntry
   549  }
   550  
   551  func (l *store) markEntries(ctx context.Context,
   552  	shardID uint64, entries []raftpb.Entry) ([]pb.LogRecord, error) {
   553  	if len(entries) == 0 {
   554  		return []pb.LogRecord{}, nil
   555  	}
   556  	leaseHolderID, err := l.getLeaseHolderID(ctx, shardID, entries)
   557  	if err != nil {
   558  		return nil, err
   559  	}
   560  	result := make([]pb.LogRecord, 0)
   561  	for _, e := range entries {
   562  		if isRaftInternalEntry(e) {
   563  			// raft internal stuff
   564  			result = append(result, LogRecord{
   565  				Type: pb.Internal,
   566  				Lsn:  e.Index,
   567  			})
   568  			continue
   569  		}
   570  		cmd := l.decodeCmd(ctx, e)
   571  		if isSetLeaseHolderUpdate(cmd) {
   572  			leaseHolderID = parseLeaseHolderID(cmd)
   573  			result = append(result, LogRecord{
   574  				Type: pb.LeaseUpdate,
   575  				Lsn:  e.Index,
   576  			})
   577  			continue
   578  		}
   579  		if isUserUpdate(cmd) {
   580  			if parseLeaseHolderID(cmd) != leaseHolderID {
   581  				// lease not match, skip
   582  				result = append(result, LogRecord{
   583  					Type: pb.LeaseRejected,
   584  					Lsn:  e.Index,
   585  				})
   586  				continue
   587  			}
   588  			result = append(result, LogRecord{
   589  				Data: cmd,
   590  				Type: pb.UserRecord,
   591  				Lsn:  e.Index,
   592  			})
   593  		}
   594  	}
   595  	return result, nil
   596  }
   597  
   598  func getNextIndex(entries []raftpb.Entry, firstIndex Lsn, lastIndex Lsn) Lsn {
   599  	if len(entries) == 0 {
   600  		return firstIndex
   601  	}
   602  	lastResultIndex := entries[len(entries)-1].Index
   603  	if lastResultIndex+1 < lastIndex {
   604  		return lastResultIndex + 1
   605  	}
   606  	return firstIndex
   607  }
   608  
   609  // high priority test
   610  // FIXME: add a test that queries the log with LeaseUpdate, LeaseRejected
   611  // entries, no matter what is the firstLsn specified in queryLog(), returned
   612  // results should make sense
   613  func (l *store) queryLog(ctx context.Context, shardID uint64,
   614  	firstIndex Lsn, maxSize uint64) ([]LogRecord, Lsn, error) {
   615  	v, err := l.read(ctx, shardID, indexQuery{})
   616  	if err != nil {
   617  		return nil, 0, err
   618  	}
   619  	lastIndex := v.(uint64)
   620  	// FIXME: check whether lastIndex >= firstIndex
   621  	rs, err := l.nh.QueryRaftLog(shardID, firstIndex, lastIndex+1, maxSize)
   622  	if err != nil {
   623  		l.runtime.Logger().Error("QueryRaftLog failed", zap.Error(err))
   624  		return nil, 0, err
   625  	}
   626  	select {
   627  	case v := <-rs.ResultC():
   628  		if v.Completed() {
   629  			entries, logRange := v.RaftLogs()
   630  			next := getNextIndex(entries, firstIndex, logRange.LastIndex)
   631  			results, err := l.markEntries(ctx, shardID, entries)
   632  			if err != nil {
   633  				l.runtime.Logger().Error("markEntries failed", zap.Error(err))
   634  				return nil, 0, err
   635  			}
   636  			return results, next, nil
   637  		} else if v.RequestOutOfRange() {
   638  			// FIXME: add more details to the log, what is the available range
   639  			l.runtime.Logger().Error("OutOfRange query found")
   640  			return nil, 0, dragonboat.ErrInvalidRange
   641  		}
   642  		panic(moerr.NewInvalidState(ctx, "unexpected rs state"))
   643  	case <-ctx.Done():
   644  		return nil, 0, ctx.Err()
   645  	}
   646  }
   647  
   648  func (l *store) ticker(ctx context.Context) {
   649  	if l.cfg.HAKeeperTickInterval.Duration == 0 {
   650  		panic("invalid HAKeeperTickInterval")
   651  	}
   652  	l.runtime.Logger().Info("Hakeeper interval configs",
   653  		zap.Int64("HAKeeperTickInterval", int64(l.cfg.HAKeeperTickInterval.Duration)),
   654  		zap.Int64("HAKeeperCheckInterval", int64(l.cfg.HAKeeperCheckInterval.Duration)))
   655  	ticker := time.NewTicker(l.cfg.HAKeeperTickInterval.Duration)
   656  	defer ticker.Stop()
   657  	if l.cfg.HAKeeperCheckInterval.Duration == 0 {
   658  		panic("invalid HAKeeperCheckInterval")
   659  	}
   660  	defer func() {
   661  		l.runtime.Logger().Info("HAKeeper ticker stopped")
   662  	}()
   663  	haTicker := time.NewTicker(l.cfg.HAKeeperCheckInterval.Duration)
   664  	defer haTicker.Stop()
   665  
   666  	for {
   667  		select {
   668  		case <-ticker.C:
   669  			l.hakeeperTick()
   670  		case <-haTicker.C:
   671  			l.hakeeperCheck()
   672  		case <-ctx.Done():
   673  			return
   674  		}
   675  
   676  		select {
   677  		case <-ctx.Done():
   678  			return
   679  		default:
   680  		}
   681  	}
   682  }
   683  
   684  func (l *store) isLeaderHAKeeper() (bool, uint64, error) {
   685  	leaderID, term, ok, err := l.nh.GetLeaderID(hakeeper.DefaultHAKeeperShardID)
   686  	if err != nil {
   687  		return false, 0, err
   688  	}
   689  	replicaID := atomic.LoadUint64(&l.haKeeperReplicaID)
   690  	return ok && replicaID != 0 && leaderID == replicaID, term, nil
   691  }
   692  
   693  // TODO: add test for this
   694  func (l *store) hakeeperTick() {
   695  	isLeader, _, err := l.isLeaderHAKeeper()
   696  	if err != nil {
   697  		l.runtime.Logger().Error("failed to get HAKeeper Leader ID", zap.Error(err))
   698  		return
   699  	}
   700  
   701  	if isLeader {
   702  		cmd := hakeeper.GetTickCmd()
   703  		ctx, cancel := context.WithTimeout(context.Background(), hakeeperDefaultTimeout)
   704  		defer cancel()
   705  		session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   706  		if _, err := l.propose(ctx, session, cmd); err != nil {
   707  			l.runtime.Logger().Error("propose tick failed", zap.Error(err))
   708  			return
   709  		}
   710  	}
   711  }
   712  
   713  func (l *store) getHeartbeatMessage() pb.LogStoreHeartbeat {
   714  	m := pb.LogStoreHeartbeat{
   715  		UUID:           l.id(),
   716  		RaftAddress:    l.cfg.RaftAddress,
   717  		ServiceAddress: l.cfg.ServiceAddress,
   718  		GossipAddress:  l.cfg.GossipAddress,
   719  		Replicas:       make([]pb.LogReplicaInfo, 0),
   720  	}
   721  	opts := dragonboat.NodeHostInfoOption{
   722  		SkipLogInfo: true,
   723  	}
   724  	nhi := l.nh.GetNodeHostInfo(opts)
   725  	for _, ci := range nhi.ShardInfoList {
   726  		if ci.Pending {
   727  			l.runtime.Logger().Info(fmt.Sprintf("shard %d is pending, not included into the heartbeat",
   728  				ci.ShardID))
   729  			continue
   730  		}
   731  		if ci.ConfigChangeIndex == 0 {
   732  			panic("ci.ConfigChangeIndex is 0")
   733  		}
   734  		replicaInfo := pb.LogReplicaInfo{
   735  			LogShardInfo: pb.LogShardInfo{
   736  				ShardID:  ci.ShardID,
   737  				Replicas: ci.Nodes,
   738  				Epoch:    ci.ConfigChangeIndex,
   739  				LeaderID: ci.LeaderID,
   740  				Term:     ci.Term,
   741  			},
   742  			ReplicaID: ci.ReplicaID,
   743  		}
   744  		// FIXME: why we need this?
   745  		if replicaInfo.Replicas == nil {
   746  			replicaInfo.Replicas = make(map[uint64]dragonboat.Target)
   747  		}
   748  		m.Replicas = append(m.Replicas, replicaInfo)
   749  	}
   750  	return m
   751  }