github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/store.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sync"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/cockroachdb/errors"
    25  	"github.com/lni/dragonboat/v4"
    26  	cli "github.com/lni/dragonboat/v4/client"
    27  	"github.com/lni/dragonboat/v4/config"
    28  	"github.com/lni/dragonboat/v4/plugin/tan"
    29  	"github.com/lni/dragonboat/v4/plugin/tee"
    30  	"github.com/lni/dragonboat/v4/raftpb"
    31  	sm "github.com/lni/dragonboat/v4/statemachine"
    32  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    33  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    34  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    35  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    36  	"github.com/matrixorigin/matrixone/pkg/hakeeper/bootstrap"
    37  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers"
    38  	"github.com/matrixorigin/matrixone/pkg/hakeeper/task"
    39  	"github.com/matrixorigin/matrixone/pkg/logutil"
    40  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    41  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    42  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    43  	"go.uber.org/zap"
    44  )
    45  
    46  type storeMeta struct {
    47  	serviceAddress string
    48  }
    49  
    50  func (l *storeMeta) marshal() []byte {
    51  	return []byte(l.serviceAddress)
    52  }
    53  
    54  func (l *storeMeta) unmarshal(data []byte) {
    55  	l.serviceAddress = string(data)
    56  }
    57  
    58  func isUserUpdate(cmd []byte) bool {
    59  	return parseCmdTag(cmd) == pb.UserEntryUpdate
    60  }
    61  
    62  func isSetLeaseHolderUpdate(cmd []byte) bool {
    63  	return parseCmdTag(cmd) == pb.LeaseHolderIDUpdate
    64  }
    65  
    66  func getNodeHostConfig(cfg Config) config.NodeHostConfig {
    67  	meta := storeMeta{
    68  		serviceAddress: cfg.LogServiceServiceAddr(),
    69  	}
    70  	if cfg.GossipProbeInterval.Duration == 0 {
    71  		panic("cfg.GossipProbeInterval.Duration is 0")
    72  	}
    73  	logdb := config.GetTinyMemLogDBConfig()
    74  	logdb.KVWriteBufferSize = cfg.LogDBBufferSize
    75  	logdbFactory := (config.LogDBFactory)(nil)
    76  	logdbFactory = tan.Factory
    77  	if cfg.UseTeeLogDB {
    78  		logutil.Warn("using tee based logdb backed by pebble and tan, for testing purposes only")
    79  		logdbFactory = tee.TanPebbleLogDBFactory
    80  	}
    81  	return config.NodeHostConfig{
    82  		DeploymentID:        cfg.DeploymentID,
    83  		NodeHostID:          cfg.UUID,
    84  		NodeHostDir:         cfg.DataDir,
    85  		RTTMillisecond:      cfg.RTTMillisecond,
    86  		AddressByNodeHostID: true,
    87  		RaftAddress:         cfg.RaftServiceAddr(),
    88  		ListenAddress:       cfg.RaftListenAddr(),
    89  		Expert: config.ExpertConfig{
    90  			FS:           cfg.FS,
    91  			LogDBFactory: logdbFactory,
    92  			// FIXME: dragonboat need to be updated to make this field a first class
    93  			// citizen
    94  			TestGossipProbeInterval: cfg.GossipProbeInterval.Duration,
    95  			LogDB:                   logdb,
    96  			ExplicitHostname:        cfg.ExplicitHostname,
    97  		},
    98  		Gossip: config.GossipConfig{
    99  			BindAddress:      cfg.GossipListenAddr(),
   100  			AdvertiseAddress: cfg.GossipServiceAddr(),
   101  			Seed:             cfg.GossipSeedAddresses,
   102  			Meta:             meta.marshal(),
   103  			CanUseSelfAsSeed: cfg.GossipAllowSelfAsSeed,
   104  		},
   105  	}
   106  }
   107  
   108  func getRaftConfig(shardID uint64, replicaID uint64) config.Config {
   109  	return config.Config{
   110  		ShardID:             shardID,
   111  		ReplicaID:           replicaID,
   112  		CheckQuorum:         true,
   113  		PreVote:             true,
   114  		ElectionRTT:         10,
   115  		HeartbeatRTT:        1,
   116  		OrderedConfigChange: true,
   117  	}
   118  }
   119  
   120  // store manages log shards including the HAKeeper shard on each node.
   121  type store struct {
   122  	cfg               Config
   123  	nh                *dragonboat.NodeHost
   124  	haKeeperReplicaID uint64
   125  	checker           hakeeper.Checker
   126  	alloc             hakeeper.IDAllocator
   127  	stopper           *stopper.Stopper
   128  	tickerStopper     *stopper.Stopper
   129  	runtime           runtime.Runtime
   130  
   131  	bootstrapCheckCycles uint64
   132  	bootstrapMgr         *bootstrap.Manager
   133  
   134  	taskScheduler hakeeper.TaskScheduler
   135  
   136  	mu struct {
   137  		sync.Mutex
   138  		metadata metadata.LogStore
   139  	}
   140  	shardSnapshotInfo shardSnapshotInfo
   141  	snapshotMgr       *snapshotManager
   142  }
   143  
   144  func newLogStore(cfg Config,
   145  	taskServiceGetter func() taskservice.TaskService,
   146  	rt runtime.Runtime) (*store, error) {
   147  	nh, err := dragonboat.NewNodeHost(getNodeHostConfig(cfg))
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  	hakeeperConfig := cfg.GetHAKeeperConfig()
   152  	rt.SubLogger(runtime.SystemInit).Info("HAKeeper Timeout Configs",
   153  		zap.Int64("LogStoreTimeout", int64(hakeeperConfig.LogStoreTimeout)),
   154  		zap.Int64("DNStoreTimeout", int64(hakeeperConfig.TNStoreTimeout)),
   155  		zap.Int64("CNStoreTimeout", int64(hakeeperConfig.CNStoreTimeout)),
   156  	)
   157  	ls := &store{
   158  		cfg:           cfg,
   159  		nh:            nh,
   160  		checker:       checkers.NewCoordinator(hakeeperConfig),
   161  		taskScheduler: task.NewScheduler(taskServiceGetter, hakeeperConfig),
   162  		alloc:         newIDAllocator(),
   163  		stopper:       stopper.NewStopper("log-store"),
   164  		tickerStopper: stopper.NewStopper("hakeeper-ticker"),
   165  		runtime:       rt,
   166  
   167  		shardSnapshotInfo: newShardSnapshotInfo(),
   168  		snapshotMgr:       newSnapshotManager(&cfg),
   169  	}
   170  	ls.mu.metadata = metadata.LogStore{UUID: cfg.UUID}
   171  	if err := ls.stopper.RunNamedTask("truncation-worker", func(ctx context.Context) {
   172  		rt.SubLogger(runtime.SystemInit).Info("logservice truncation worker started")
   173  		ls.truncationWorker(ctx)
   174  	}); err != nil {
   175  		return nil, err
   176  	}
   177  	return ls, nil
   178  }
   179  
   180  func (l *store) close() error {
   181  	l.tickerStopper.Stop()
   182  	l.stopper.Stop()
   183  	if l.nh != nil {
   184  		l.nh.Close()
   185  	}
   186  	return nil
   187  }
   188  
   189  func (l *store) id() string {
   190  	return l.nh.ID()
   191  }
   192  
   193  func (l *store) startReplicas() error {
   194  	l.mu.Lock()
   195  	shards := make([]metadata.LogShard, 0)
   196  	shards = append(shards, l.mu.metadata.Shards...)
   197  	l.mu.Unlock()
   198  
   199  	for _, rec := range shards {
   200  		if rec.ShardID == hakeeper.DefaultHAKeeperShardID {
   201  			if err := l.startHAKeeperReplica(rec.ReplicaID, nil, false); err != nil {
   202  				return err
   203  			}
   204  		} else {
   205  			if err := l.startReplica(rec.ShardID, rec.ReplicaID, nil, false); err != nil {
   206  				return err
   207  			}
   208  		}
   209  	}
   210  	return nil
   211  }
   212  
   213  func (l *store) startHAKeeperReplica(replicaID uint64,
   214  	initialReplicas map[uint64]dragonboat.Target, join bool) error {
   215  	raftConfig := getRaftConfig(hakeeper.DefaultHAKeeperShardID, replicaID)
   216  	if err := l.nh.StartReplica(initialReplicas,
   217  		join, hakeeper.NewStateMachine, raftConfig); err != nil {
   218  		return err
   219  	}
   220  	l.addMetadata(hakeeper.DefaultHAKeeperShardID, replicaID)
   221  	atomic.StoreUint64(&l.haKeeperReplicaID, replicaID)
   222  	if !l.cfg.DisableWorkers {
   223  		if err := l.tickerStopper.RunNamedTask("hakeeper-ticker", func(ctx context.Context) {
   224  			l.runtime.SubLogger(runtime.SystemInit).Info("HAKeeper ticker started")
   225  			l.ticker(ctx)
   226  		}); err != nil {
   227  			return err
   228  		}
   229  	}
   230  	return nil
   231  }
   232  
   233  func (l *store) startReplica(shardID uint64, replicaID uint64,
   234  	initialReplicas map[uint64]dragonboat.Target, join bool) error {
   235  	if shardID == hakeeper.DefaultHAKeeperShardID {
   236  		return moerr.NewInvalidInputNoCtx("shardID %d does not match DefaultHAKeeperShardID %d", shardID, hakeeper.DefaultHAKeeperShardID)
   237  	}
   238  	cfg := getRaftConfig(shardID, replicaID)
   239  	if err := l.snapshotMgr.Init(shardID, replicaID); err != nil {
   240  		panic(err)
   241  	}
   242  	if err := l.nh.StartReplica(initialReplicas, join, newStateMachine, cfg); err != nil {
   243  		return err
   244  	}
   245  	l.addMetadata(shardID, replicaID)
   246  	return nil
   247  }
   248  
   249  func (l *store) stopReplica(shardID uint64, replicaID uint64) error {
   250  	if shardID == hakeeper.DefaultHAKeeperShardID {
   251  		defer func() {
   252  			atomic.StoreUint64(&l.haKeeperReplicaID, 0)
   253  		}()
   254  	}
   255  	return l.nh.StopReplica(shardID, replicaID)
   256  }
   257  
   258  func (l *store) requestLeaderTransfer(shardID uint64, targetReplicaID uint64) error {
   259  	return l.nh.RequestLeaderTransfer(shardID, targetReplicaID)
   260  }
   261  
   262  func (l *store) addReplica(shardID uint64, replicaID uint64,
   263  	target dragonboat.Target, cci uint64) error {
   264  	// Set timeout to a little bigger value to prevent Timeout Error and
   265  	// returns a dragonboat.ErrRejected at last, in which case, it will take
   266  	// longer time to finish this operation.
   267  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
   268  	defer cancel()
   269  	count := 0
   270  	for {
   271  		count++
   272  		if err := l.nh.SyncRequestAddReplica(ctx, shardID, replicaID, target, cci); err != nil {
   273  			if errors.Is(err, dragonboat.ErrShardNotReady) {
   274  				l.retryWait()
   275  				continue
   276  			}
   277  			if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 {
   278  				return dragonboat.ErrTimeout
   279  			}
   280  			return err
   281  		}
   282  		return nil
   283  	}
   284  }
   285  
   286  func (l *store) removeReplica(shardID uint64, replicaID uint64, cci uint64) error {
   287  	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   288  	defer cancel()
   289  	count := 0
   290  	for {
   291  		count++
   292  		if err := l.nh.SyncRequestDeleteReplica(ctx, shardID, replicaID, cci); err != nil {
   293  			if errors.Is(err, dragonboat.ErrShardNotReady) {
   294  				l.retryWait()
   295  				continue
   296  			}
   297  			// FIXME: internally handle dragonboat.ErrTimeoutTooSmall
   298  			if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 {
   299  				return dragonboat.ErrTimeout
   300  			}
   301  			return err
   302  		}
   303  		l.removeMetadata(shardID, replicaID)
   304  		return nil
   305  	}
   306  }
   307  
   308  func (l *store) retryWait() {
   309  	if l.nh.NodeHostConfig().RTTMillisecond == 1 {
   310  		time.Sleep(time.Millisecond)
   311  	}
   312  	time.Sleep(time.Duration(l.nh.NodeHostConfig().RTTMillisecond/2) * time.Millisecond)
   313  }
   314  
   315  func (l *store) propose(ctx context.Context,
   316  	session *cli.Session, cmd []byte) (sm.Result, error) {
   317  	count := 0
   318  	for {
   319  		count++
   320  		result, err := l.nh.SyncPropose(ctx, session, cmd)
   321  		if err != nil {
   322  			if errors.Is(err, dragonboat.ErrShardNotReady) ||
   323  				errors.Is(err, dragonboat.ErrSystemBusy) {
   324  				l.retryWait()
   325  				continue
   326  			}
   327  			if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 {
   328  				return sm.Result{}, dragonboat.ErrTimeout
   329  			}
   330  			return sm.Result{}, err
   331  		}
   332  		return result, nil
   333  	}
   334  }
   335  
   336  func (l *store) read(ctx context.Context,
   337  	shardID uint64, query interface{}) (interface{}, error) {
   338  	count := 0
   339  	for {
   340  		count++
   341  		result, err := l.nh.SyncRead(ctx, shardID, query)
   342  		if err != nil {
   343  			if errors.Is(err, dragonboat.ErrShardNotReady) {
   344  				l.retryWait()
   345  				continue
   346  			}
   347  			if errors.Is(err, dragonboat.ErrTimeoutTooSmall) && count > 1 {
   348  				return nil, dragonboat.ErrTimeout
   349  			}
   350  			return nil, err
   351  		}
   352  		return result, nil
   353  	}
   354  }
   355  
   356  func (l *store) getOrExtendTNLease(ctx context.Context,
   357  	shardID uint64, tnID uint64) error {
   358  	session := l.nh.GetNoOPSession(shardID)
   359  	cmd := getSetLeaseHolderCmd(tnID)
   360  	_, err := l.propose(ctx, session, cmd)
   361  	return err
   362  }
   363  
   364  func (l *store) truncateLog(ctx context.Context,
   365  	shardID uint64, index Lsn) error {
   366  	session := l.nh.GetNoOPSession(shardID)
   367  	cmd := getSetTruncatedLsnCmd(index)
   368  	result, err := l.propose(ctx, session, cmd)
   369  	if err != nil {
   370  		l.runtime.Logger().Error("propose truncate log cmd failed", zap.Error(err))
   371  		return err
   372  	}
   373  	if result.Value > 0 {
   374  		l.runtime.Logger().Error(fmt.Sprintf("shardID %d already truncated to index %d", shardID, result.Value))
   375  		return moerr.NewInvalidTruncateLsn(ctx, shardID, result.Value)
   376  	}
   377  	return nil
   378  }
   379  
   380  func (l *store) append(ctx context.Context,
   381  	shardID uint64, cmd []byte) (Lsn, error) {
   382  	session := l.nh.GetNoOPSession(shardID)
   383  	result, err := l.propose(ctx, session, cmd)
   384  	if err != nil {
   385  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   386  		return 0, err
   387  	}
   388  	if len(result.Data) > 0 {
   389  		l.runtime.Logger().Error("not current lease holder", zap.Uint64("data", binaryEnc.Uint64(result.Data)))
   390  		return 0, moerr.NewNotLeaseHolder(ctx, binaryEnc.Uint64(result.Data))
   391  	}
   392  	if result.Value == 0 {
   393  		panic(moerr.NewInvalidState(ctx, "unexpected Lsn value"))
   394  	}
   395  	return result.Value, nil
   396  }
   397  
   398  func (l *store) getTruncatedLsn(ctx context.Context,
   399  	shardID uint64) (uint64, error) {
   400  	v, err := l.read(ctx, shardID, truncatedLsnQuery{})
   401  	if err != nil {
   402  		return 0, err
   403  	}
   404  	return v.(uint64), nil
   405  }
   406  
   407  func (l *store) tsoUpdate(ctx context.Context, count uint64) (uint64, error) {
   408  	cmd := getTsoUpdateCmd(count)
   409  	session := l.nh.GetNoOPSession(firstLogShardID)
   410  	result, err := l.propose(ctx, session, cmd)
   411  	if err != nil {
   412  		l.runtime.Logger().Error("failed to propose tso updat", zap.Error(err))
   413  		return 0, err
   414  	}
   415  	return result.Value, nil
   416  }
   417  
   418  func handleNotHAKeeperError(ctx context.Context, err error) error {
   419  	if err == nil {
   420  		return err
   421  	}
   422  	if errors.Is(err, dragonboat.ErrShardNotFound) {
   423  		return moerr.NewNoHAKeeper(ctx)
   424  	}
   425  	return err
   426  }
   427  
   428  func (l *store) addLogStoreHeartbeat(ctx context.Context,
   429  	hb pb.LogStoreHeartbeat) (pb.CommandBatch, error) {
   430  	data := MustMarshal(&hb)
   431  	cmd := hakeeper.GetLogStoreHeartbeatCmd(data)
   432  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   433  	if result, err := l.propose(ctx, session, cmd); err != nil {
   434  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   435  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   436  	} else {
   437  		var cb pb.CommandBatch
   438  		MustUnmarshal(&cb, result.Data)
   439  		return cb, nil
   440  	}
   441  }
   442  
   443  func (l *store) addCNStoreHeartbeat(ctx context.Context,
   444  	hb pb.CNStoreHeartbeat) (pb.CommandBatch, error) {
   445  	data := MustMarshal(&hb)
   446  	cmd := hakeeper.GetCNStoreHeartbeatCmd(data)
   447  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   448  	if result, err := l.propose(ctx, session, cmd); err != nil {
   449  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   450  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   451  	} else {
   452  		var cb pb.CommandBatch
   453  		MustUnmarshal(&cb, result.Data)
   454  		return cb, nil
   455  	}
   456  }
   457  
   458  func (l *store) cnAllocateID(ctx context.Context,
   459  	req pb.CNAllocateID) (uint64, error) {
   460  	cmd := hakeeper.GetAllocateIDCmd(req)
   461  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   462  	result, err := l.propose(ctx, session, cmd)
   463  	if err != nil {
   464  		l.runtime.Logger().Error("propose get id failed", zap.Error(err))
   465  		return 0, err
   466  	}
   467  	return result.Value, nil
   468  }
   469  
   470  func (l *store) addTNStoreHeartbeat(ctx context.Context,
   471  	hb pb.TNStoreHeartbeat) (pb.CommandBatch, error) {
   472  	data := MustMarshal(&hb)
   473  	cmd := hakeeper.GetTNStoreHeartbeatCmd(data)
   474  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   475  	if result, err := l.propose(ctx, session, cmd); err != nil {
   476  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   477  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   478  	} else {
   479  		var cb pb.CommandBatch
   480  		MustUnmarshal(&cb, result.Data)
   481  		return cb, nil
   482  	}
   483  }
   484  
   485  func (l *store) getCommandBatch(ctx context.Context,
   486  	uuid string) (pb.CommandBatch, error) {
   487  	v, err := l.read(ctx,
   488  		hakeeper.DefaultHAKeeperShardID, &hakeeper.ScheduleCommandQuery{UUID: uuid})
   489  	if err != nil {
   490  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   491  	}
   492  	return *(v.(*pb.CommandBatch)), nil
   493  }
   494  
   495  func (l *store) getClusterDetails(ctx context.Context) (pb.ClusterDetails, error) {
   496  	v, err := l.read(ctx,
   497  		hakeeper.DefaultHAKeeperShardID, &hakeeper.ClusterDetailsQuery{Cfg: l.cfg.GetHAKeeperConfig()})
   498  	if err != nil {
   499  		return pb.ClusterDetails{}, handleNotHAKeeperError(ctx, err)
   500  	}
   501  	return *(v.(*pb.ClusterDetails)), nil
   502  }
   503  
   504  func (l *store) addScheduleCommands(ctx context.Context,
   505  	term uint64, cmds []pb.ScheduleCommand) error {
   506  	cmd := hakeeper.GetUpdateCommandsCmd(term, cmds)
   507  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   508  	if _, err := l.propose(ctx, session, cmd); err != nil {
   509  		return handleNotHAKeeperError(ctx, err)
   510  	}
   511  	return nil
   512  }
   513  
   514  func (l *store) getLeaseHolderID(ctx context.Context,
   515  	shardID uint64, entries []raftpb.Entry) (uint64, error) {
   516  	if len(entries) == 0 {
   517  		panic("empty entries")
   518  	}
   519  	// first entry is an update lease cmd
   520  	e := entries[0]
   521  	if !isRaftInternalEntry(e) && isSetLeaseHolderUpdate(l.decodeCmd(ctx, e)) {
   522  		return parseLeaseHolderID(l.decodeCmd(ctx, e)), nil
   523  	}
   524  	v, err := l.read(ctx, shardID, leaseHistoryQuery{lsn: e.Index})
   525  	if err != nil {
   526  		l.runtime.Logger().Error("failed to read", zap.Error(err))
   527  		return 0, err
   528  	}
   529  	return v.(uint64), nil
   530  }
   531  
   532  func (l *store) updateCNLabel(ctx context.Context, label pb.CNStoreLabel) error {
   533  	state, err := l.getCheckerState()
   534  	if err != nil {
   535  		return err
   536  	}
   537  	if _, ok := state.CNState.Stores[label.UUID]; !ok {
   538  		return moerr.NewInternalError(ctx, "CN [%s] does not exist", label.UUID)
   539  	}
   540  	cmd := hakeeper.GetUpdateCNLabelCmd(label)
   541  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   542  	if result, err := l.propose(ctx, session, cmd); err != nil {
   543  		l.runtime.Logger().Error("failed to propose CN label",
   544  			zap.String("label", label.String()),
   545  			zap.Error(err))
   546  		return handleNotHAKeeperError(ctx, err)
   547  	} else {
   548  		var cb pb.CommandBatch
   549  		MustUnmarshal(&cb, result.Data)
   550  		return nil
   551  	}
   552  }
   553  
   554  func (l *store) updateCNWorkState(ctx context.Context, workState pb.CNWorkState) error {
   555  	state, err := l.getCheckerState()
   556  	if err != nil {
   557  		return err
   558  	}
   559  	if _, ok := state.CNState.Stores[workState.UUID]; !ok {
   560  		return moerr.NewInternalError(ctx, "CN [%s] does not exist", workState.UUID)
   561  	}
   562  	cmd := hakeeper.GetUpdateCNWorkStateCmd(workState)
   563  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   564  	if result, err := l.propose(ctx, session, cmd); err != nil {
   565  		l.runtime.Logger().Error("failed to propose CN work state",
   566  			zap.String("state", state.String()),
   567  			zap.Error(err))
   568  		return handleNotHAKeeperError(ctx, err)
   569  	} else {
   570  		var cb pb.CommandBatch
   571  		MustUnmarshal(&cb, result.Data)
   572  		return nil
   573  	}
   574  }
   575  
   576  func (l *store) patchCNStore(ctx context.Context, stateLabel pb.CNStateLabel) error {
   577  	state, err := l.getCheckerState()
   578  	if err != nil {
   579  		return err
   580  	}
   581  	if _, ok := state.CNState.Stores[stateLabel.UUID]; !ok {
   582  		return moerr.NewInternalError(ctx, "CN [%s] does not exist", stateLabel.UUID)
   583  	}
   584  	cmd := hakeeper.GetPatchCNStoreCmd(stateLabel)
   585  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   586  	if result, err := l.propose(ctx, session, cmd); err != nil {
   587  		l.runtime.Logger().Error("failed to propose CN patch store",
   588  			zap.String("state", state.String()),
   589  			zap.Error(err))
   590  		return handleNotHAKeeperError(ctx, err)
   591  	} else {
   592  		var cb pb.CommandBatch
   593  		MustUnmarshal(&cb, result.Data)
   594  		return nil
   595  	}
   596  }
   597  
   598  func (l *store) deleteCNStore(ctx context.Context, cnStore pb.DeleteCNStore) error {
   599  	state, err := l.getCheckerState()
   600  	if err != nil {
   601  		return err
   602  	}
   603  	if _, ok := state.CNState.Stores[cnStore.StoreID]; !ok {
   604  		return nil
   605  	}
   606  	cmd := hakeeper.GetDeleteCNStoreCmd(cnStore)
   607  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   608  	if result, err := l.propose(ctx, session, cmd); err != nil {
   609  		l.runtime.Logger().Error("failed to propose delete CN store",
   610  			zap.String("state", state.String()),
   611  			zap.Error(err))
   612  		return handleNotHAKeeperError(ctx, err)
   613  	} else {
   614  		var cb pb.CommandBatch
   615  		MustUnmarshal(&cb, result.Data)
   616  		return nil
   617  	}
   618  }
   619  
   620  func (l *store) addProxyHeartbeat(ctx context.Context, hb pb.ProxyHeartbeat) (pb.CommandBatch, error) {
   621  	data := MustMarshal(&hb)
   622  	cmd := hakeeper.GetProxyHeartbeatCmd(data)
   623  	session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   624  	if result, err := l.propose(ctx, session, cmd); err != nil {
   625  		l.runtime.Logger().Error("propose failed", zap.Error(err))
   626  		return pb.CommandBatch{}, handleNotHAKeeperError(ctx, err)
   627  	} else {
   628  		var cb pb.CommandBatch
   629  		MustUnmarshal(&cb, result.Data)
   630  		return cb, nil
   631  	}
   632  }
   633  
   634  func (l *store) decodeCmd(ctx context.Context, e raftpb.Entry) []byte {
   635  	if e.Type == raftpb.ApplicationEntry {
   636  		panic(moerr.NewInvalidState(ctx, "unexpected entry type"))
   637  	}
   638  	if e.Type == raftpb.EncodedEntry {
   639  		if e.Cmd[0] != 0 {
   640  			panic(moerr.NewInvalidState(ctx, "unexpected cmd header"))
   641  		}
   642  		return e.Cmd[1:]
   643  	}
   644  	panic(moerr.NewInvalidState(ctx, "invalid cmd"))
   645  }
   646  
   647  func isRaftInternalEntry(e raftpb.Entry) bool {
   648  	if len(e.Cmd) == 0 {
   649  		return true
   650  	}
   651  	return e.Type == raftpb.ConfigChangeEntry || e.Type == raftpb.MetadataEntry
   652  }
   653  
   654  func (l *store) markEntries(ctx context.Context,
   655  	shardID uint64, entries []raftpb.Entry) ([]pb.LogRecord, error) {
   656  	if len(entries) == 0 {
   657  		return []pb.LogRecord{}, nil
   658  	}
   659  	leaseHolderID, err := l.getLeaseHolderID(ctx, shardID, entries)
   660  	if err != nil {
   661  		return nil, err
   662  	}
   663  	result := make([]pb.LogRecord, 0)
   664  	for _, e := range entries {
   665  		if isRaftInternalEntry(e) {
   666  			// raft internal stuff
   667  			result = append(result, LogRecord{
   668  				Type: pb.Internal,
   669  				Lsn:  e.Index,
   670  			})
   671  			continue
   672  		}
   673  		cmd := l.decodeCmd(ctx, e)
   674  		if isSetLeaseHolderUpdate(cmd) {
   675  			leaseHolderID = parseLeaseHolderID(cmd)
   676  			result = append(result, LogRecord{
   677  				Type: pb.LeaseUpdate,
   678  				Lsn:  e.Index,
   679  			})
   680  			continue
   681  		}
   682  		if isUserUpdate(cmd) {
   683  			if parseLeaseHolderID(cmd) != leaseHolderID {
   684  				// lease not match, skip
   685  				result = append(result, LogRecord{
   686  					Type: pb.LeaseRejected,
   687  					Lsn:  e.Index,
   688  				})
   689  				continue
   690  			}
   691  			result = append(result, LogRecord{
   692  				Data: cmd,
   693  				Type: pb.UserRecord,
   694  				Lsn:  e.Index,
   695  			})
   696  		}
   697  	}
   698  	return result, nil
   699  }
   700  
   701  func getNextIndex(entries []raftpb.Entry, firstIndex Lsn, lastIndex Lsn) Lsn {
   702  	if len(entries) == 0 {
   703  		return firstIndex
   704  	}
   705  	lastResultIndex := entries[len(entries)-1].Index
   706  	if lastResultIndex+1 < lastIndex {
   707  		return lastResultIndex + 1
   708  	}
   709  	return firstIndex
   710  }
   711  
   712  // high priority test
   713  // FIXME: add a test that queries the log with LeaseUpdate, LeaseRejected
   714  // entries, no matter what is the firstLsn specified in queryLog(), returned
   715  // results should make sense
   716  func (l *store) queryLog(ctx context.Context, shardID uint64,
   717  	firstIndex Lsn, maxSize uint64) ([]LogRecord, Lsn, error) {
   718  	v, err := l.read(ctx, shardID, indexQuery{})
   719  	if err != nil {
   720  		return nil, 0, err
   721  	}
   722  	lastIndex := v.(uint64)
   723  	// FIXME: check whether lastIndex >= firstIndex
   724  	rs, err := l.nh.QueryRaftLog(shardID, firstIndex, lastIndex+1, maxSize)
   725  	if err != nil {
   726  		l.runtime.Logger().Error("QueryRaftLog failed", zap.Error(err))
   727  		return nil, 0, err
   728  	}
   729  	select {
   730  	case v := <-rs.ResultC():
   731  		if v.Completed() {
   732  			entries, logRange := v.RaftLogs()
   733  			next := getNextIndex(entries, firstIndex, logRange.LastIndex)
   734  			results, err := l.markEntries(ctx, shardID, entries)
   735  			if err != nil {
   736  				l.runtime.Logger().Error("markEntries failed", zap.Error(err))
   737  				return nil, 0, err
   738  			}
   739  			return results, next, nil
   740  		} else if v.RequestOutOfRange() {
   741  			// FIXME: add more details to the log, what is the available range
   742  			l.runtime.Logger().Error("OutOfRange query found")
   743  			return nil, 0, dragonboat.ErrInvalidRange
   744  		}
   745  		panic(moerr.NewInvalidState(ctx, "unexpected rs state"))
   746  	case <-ctx.Done():
   747  		return nil, 0, ctx.Err()
   748  	}
   749  }
   750  
   751  func (l *store) tickerForTaskSchedule(ctx context.Context, duration time.Duration) {
   752  	ticker := time.NewTicker(duration)
   753  	defer ticker.Stop()
   754  
   755  	for {
   756  		select {
   757  		case <-ticker.C:
   758  			state, _ := l.getCheckerStateFromLeader()
   759  			if state != nil && state.State == pb.HAKeeperRunning {
   760  				l.taskSchedule(state)
   761  			}
   762  
   763  		case <-ctx.Done():
   764  			return
   765  		}
   766  
   767  		// l.taskSchedule could be blocking a long time, this extra select
   768  		// can give a chance immediately to check the ctx status when it resumes.
   769  		select {
   770  		case <-ctx.Done():
   771  			return
   772  		default:
   773  			// nothing to do
   774  		}
   775  	}
   776  
   777  }
   778  
   779  func (l *store) ticker(ctx context.Context) {
   780  	if l.cfg.HAKeeperTickInterval.Duration == 0 {
   781  		panic("invalid HAKeeperTickInterval")
   782  	}
   783  	l.runtime.Logger().Info("Hakeeper interval configs",
   784  		zap.Int64("HAKeeperTickInterval", int64(l.cfg.HAKeeperTickInterval.Duration)),
   785  		zap.Int64("HAKeeperCheckInterval", int64(l.cfg.HAKeeperCheckInterval.Duration)))
   786  	ticker := time.NewTicker(l.cfg.HAKeeperTickInterval.Duration)
   787  	defer ticker.Stop()
   788  	if l.cfg.HAKeeperCheckInterval.Duration == 0 {
   789  		panic("invalid HAKeeperCheckInterval")
   790  	}
   791  	defer func() {
   792  		l.runtime.Logger().Info("HAKeeper ticker stopped")
   793  	}()
   794  	haTicker := time.NewTicker(l.cfg.HAKeeperCheckInterval.Duration)
   795  	defer haTicker.Stop()
   796  
   797  	// moving task schedule from the ticker normal routine to a
   798  	// separate goroutine can avoid the hakeeper's health check and tick update
   799  	// operations being blocked by task schedule, or the tick will be skipped and
   800  	// can not correctly estimate the time passing.
   801  	go l.tickerForTaskSchedule(ctx, l.cfg.HAKeeperCheckInterval.Duration)
   802  
   803  	for {
   804  		select {
   805  		case <-ticker.C:
   806  			l.hakeeperTick()
   807  		case <-haTicker.C:
   808  			l.hakeeperCheck()
   809  		case <-ctx.Done():
   810  			return
   811  		}
   812  
   813  		select {
   814  		case <-ctx.Done():
   815  			return
   816  		default:
   817  		}
   818  	}
   819  }
   820  
   821  func (l *store) isLeaderHAKeeper() (bool, uint64, error) {
   822  	leaderID, term, ok, err := l.nh.GetLeaderID(hakeeper.DefaultHAKeeperShardID)
   823  	if err != nil {
   824  		return false, 0, err
   825  	}
   826  	replicaID := atomic.LoadUint64(&l.haKeeperReplicaID)
   827  	return ok && replicaID != 0 && leaderID == replicaID, term, nil
   828  }
   829  
   830  // TODO: add test for this
   831  func (l *store) hakeeperTick() {
   832  	isLeader, _, err := l.isLeaderHAKeeper()
   833  	if err != nil {
   834  		l.runtime.Logger().Error("failed to get HAKeeper Leader ID", zap.Error(err))
   835  		return
   836  	}
   837  
   838  	if isLeader {
   839  		cmd := hakeeper.GetTickCmd()
   840  		ctx, cancel := context.WithTimeout(context.Background(), hakeeperDefaultTimeout)
   841  		defer cancel()
   842  		session := l.nh.GetNoOPSession(hakeeper.DefaultHAKeeperShardID)
   843  		if _, err := l.propose(ctx, session, cmd); err != nil {
   844  			l.runtime.Logger().Error("propose tick failed", zap.Error(err))
   845  			return
   846  		}
   847  	}
   848  }
   849  
   850  func (l *store) getHeartbeatMessage() pb.LogStoreHeartbeat {
   851  	m := pb.LogStoreHeartbeat{
   852  		UUID:           l.id(),
   853  		RaftAddress:    l.cfg.RaftServiceAddr(),
   854  		ServiceAddress: l.cfg.LogServiceServiceAddr(),
   855  		GossipAddress:  l.cfg.GossipServiceAddr(),
   856  		Replicas:       make([]pb.LogReplicaInfo, 0),
   857  	}
   858  	opts := dragonboat.NodeHostInfoOption{
   859  		SkipLogInfo: true,
   860  	}
   861  	nhi := l.nh.GetNodeHostInfo(opts)
   862  	for _, ci := range nhi.ShardInfoList {
   863  		if ci.Pending {
   864  			l.runtime.Logger().Info(fmt.Sprintf("shard %d is pending, not included into the heartbeat",
   865  				ci.ShardID))
   866  			continue
   867  		}
   868  		if ci.ConfigChangeIndex == 0 {
   869  			panic("ci.ConfigChangeIndex is 0")
   870  		}
   871  		replicaInfo := pb.LogReplicaInfo{
   872  			LogShardInfo: pb.LogShardInfo{
   873  				ShardID:  ci.ShardID,
   874  				Replicas: ci.Nodes,
   875  				Epoch:    ci.ConfigChangeIndex,
   876  				LeaderID: ci.LeaderID,
   877  				Term:     ci.Term,
   878  			},
   879  			ReplicaID: ci.ReplicaID,
   880  		}
   881  		// FIXME: why we need this?
   882  		if replicaInfo.Replicas == nil {
   883  			replicaInfo.Replicas = make(map[uint64]dragonboat.Target)
   884  		}
   885  		m.Replicas = append(m.Replicas, replicaInfo)
   886  	}
   887  	return m
   888  }
   889  
   890  // leaderID returns the leader ID of the specified shard.
   891  func (l *store) leaderID(shardID uint64) (uint64, error) {
   892  	leaderID, _, ok, err := l.nh.GetLeaderID(shardID)
   893  	if err != nil {
   894  		return 0, err
   895  	}
   896  	if !ok {
   897  		return 0, nil
   898  	}
   899  	return leaderID, nil
   900  }