github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/store_test.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"sync/atomic"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/google/uuid"
    26  	"github.com/lni/dragonboat/v4"
    27  	"github.com/lni/goutils/leaktest"
    28  	"github.com/lni/vfs"
    29  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    30  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    31  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    32  	"github.com/matrixorigin/matrixone/pkg/logutil"
    33  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    34  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    35  	"github.com/matrixorigin/matrixone/pkg/pb/task"
    36  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    37  	"github.com/stretchr/testify/assert"
    38  	"github.com/stretchr/testify/require"
    39  )
    40  
    41  func TestMain(m *testing.M) {
    42  	logutil.SetupMOLogger(&logutil.LogConfig{
    43  		Level:  "debug",
    44  		Format: "console",
    45  	})
    46  
    47  	runtime.SetupProcessLevelRuntime(runtime.NewRuntime(metadata.ServiceType_LOG, "test", logutil.GetGlobalLogger()))
    48  	m.Run()
    49  }
    50  
    51  var (
    52  	testIOTimeout = 5 * time.Second
    53  )
    54  
    55  func TestNodeHostConfig(t *testing.T) {
    56  	cfg := DefaultConfig()
    57  	cfg.DeploymentID = 1234
    58  	cfg.DataDir = "lalala"
    59  	nhConfig := getNodeHostConfig(cfg)
    60  	assert.Equal(t, cfg.DeploymentID, nhConfig.DeploymentID)
    61  	assert.Equal(t, cfg.DataDir, nhConfig.NodeHostDir)
    62  	assert.True(t, nhConfig.AddressByNodeHostID)
    63  }
    64  
    65  func TestRaftConfig(t *testing.T) {
    66  	cfg := getRaftConfig(1, 1)
    67  	assert.True(t, cfg.CheckQuorum)
    68  	assert.True(t, cfg.OrderedConfigChange)
    69  }
    70  
    71  func getStoreTestConfig() Config {
    72  	cfg := DefaultConfig()
    73  	cfg.UUID = uuid.New().String()
    74  	cfg.RTTMillisecond = 10
    75  	cfg.GossipPort = testGossipPort
    76  	cfg.GossipSeedAddresses = []string{testGossipAddress, dummyGossipSeedAddress}
    77  	cfg.DeploymentID = 1
    78  	cfg.FS = vfs.NewStrictMem()
    79  	cfg.UseTeeLogDB = true
    80  	return cfg
    81  }
    82  
    83  func TestStoreCanBeCreatedAndClosed(t *testing.T) {
    84  	defer leaktest.AfterTest(t)()
    85  	cfg := getStoreTestConfig()
    86  	defer vfs.ReportLeakedFD(cfg.FS, t)
    87  	store, err := newLogStore(cfg, nil, runtime.DefaultRuntime())
    88  	assert.NoError(t, err)
    89  	runtime.DefaultRuntime().Logger().Info("1")
    90  	defer func() {
    91  		assert.NoError(t, store.close())
    92  	}()
    93  	runtime.DefaultRuntime().Logger().Info("2")
    94  }
    95  
    96  func getTestStore(cfg Config, startLogReplica bool, taskService taskservice.TaskService) (*store, error) {
    97  	store, err := newLogStore(cfg, func() taskservice.TaskService { return taskService }, runtime.DefaultRuntime())
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  	if startLogReplica {
   102  		peers := make(map[uint64]dragonboat.Target)
   103  		peers[2] = store.nh.ID()
   104  		if err := store.startReplica(1, 2, peers, false); err != nil {
   105  			store.close()
   106  			return nil, err
   107  		}
   108  	}
   109  	return store, nil
   110  }
   111  
   112  func TestHAKeeperCanBeStarted(t *testing.T) {
   113  	defer leaktest.AfterTest(t)()
   114  	cfg := getStoreTestConfig()
   115  	defer vfs.ReportLeakedFD(cfg.FS, t)
   116  	store, err := newLogStore(cfg, nil, runtime.DefaultRuntime())
   117  	assert.NoError(t, err)
   118  	peers := make(map[uint64]dragonboat.Target)
   119  	peers[2] = store.nh.ID()
   120  	assert.NoError(t, store.startHAKeeperReplica(2, peers, false))
   121  	defer func() {
   122  		assert.NoError(t, store.close())
   123  	}()
   124  	mustHaveReplica(t, store, hakeeper.DefaultHAKeeperShardID, 2)
   125  }
   126  
   127  func TestStateMachineCanBeStarted(t *testing.T) {
   128  	defer leaktest.AfterTest(t)()
   129  	cfg := getStoreTestConfig()
   130  	defer vfs.ReportLeakedFD(cfg.FS, t)
   131  	store, err := getTestStore(cfg, true, nil)
   132  	assert.NoError(t, err)
   133  	defer func() {
   134  		assert.NoError(t, store.close())
   135  	}()
   136  	mustHaveReplica(t, store, 1, 2)
   137  }
   138  
   139  func TestReplicaCanBeStopped(t *testing.T) {
   140  	defer leaktest.AfterTest(t)()
   141  	cfg := getStoreTestConfig()
   142  	defer vfs.ReportLeakedFD(cfg.FS, t)
   143  	store, err := getTestStore(cfg, true, nil)
   144  	assert.NoError(t, err)
   145  	defer func() {
   146  		assert.NoError(t, store.close())
   147  	}()
   148  	mustHaveReplica(t, store, 1, 2)
   149  	require.NoError(t, store.stopReplica(1, 2))
   150  	assert.False(t, hasReplica(store, 1, 2))
   151  }
   152  
   153  func runStoreTest(t *testing.T, fn func(*testing.T, *store)) {
   154  	defer leaktest.AfterTest(t)()
   155  	cfg := getStoreTestConfig()
   156  	defer vfs.ReportLeakedFD(cfg.FS, t)
   157  	store, err := getTestStore(cfg, true, nil)
   158  	assert.NoError(t, err)
   159  	defer func() {
   160  		assert.NoError(t, store.close())
   161  	}()
   162  	fn(t, store)
   163  }
   164  
   165  func getTestUserEntry() []byte {
   166  	cmd := make([]byte, headerSize+8+8)
   167  	binaryEnc.PutUint32(cmd, uint32(pb.UserEntryUpdate))
   168  	binaryEnc.PutUint64(cmd[headerSize:], 100)
   169  	binaryEnc.PutUint64(cmd[headerSize+8:], 1234567890)
   170  	return cmd
   171  }
   172  
   173  func TestGetOrExtendLease(t *testing.T) {
   174  	fn := func(t *testing.T, store *store) {
   175  		ctx, cancel := context.WithTimeout(context.Background(), testIOTimeout)
   176  		defer cancel()
   177  		assert.NoError(t, store.getOrExtendTNLease(ctx, 1, 100))
   178  	}
   179  	runStoreTest(t, fn)
   180  }
   181  
   182  func TestAppendLog(t *testing.T) {
   183  	fn := func(t *testing.T, store *store) {
   184  		ctx, cancel := context.WithTimeout(context.Background(), testIOTimeout)
   185  		defer cancel()
   186  		assert.NoError(t, store.getOrExtendTNLease(ctx, 1, 100))
   187  		cmd := getTestUserEntry()
   188  		lsn, err := store.append(ctx, 1, cmd)
   189  		assert.NoError(t, err)
   190  		assert.Equal(t, uint64(4), lsn)
   191  	}
   192  	runStoreTest(t, fn)
   193  }
   194  
   195  func TestAppendLogIsRejectedForMismatchedLeaseHolderID(t *testing.T) {
   196  	fn := func(t *testing.T, store *store) {
   197  		ctx, cancel := context.WithTimeout(context.Background(), testIOTimeout)
   198  		defer cancel()
   199  		assert.NoError(t, store.getOrExtendTNLease(ctx, 1, 100))
   200  		cmd := make([]byte, headerSize+8+8)
   201  		binaryEnc.PutUint32(cmd, uint32(pb.UserEntryUpdate))
   202  		binaryEnc.PutUint64(cmd[headerSize:], 101)
   203  		binaryEnc.PutUint64(cmd[headerSize+8:], 1234567890)
   204  		_, err := store.append(ctx, 1, cmd)
   205  		assert.True(t, moerr.IsMoErrCode(err, moerr.ErrNotLeaseHolder))
   206  	}
   207  	runStoreTest(t, fn)
   208  }
   209  
   210  func TestStoreTsoUpdate(t *testing.T) {
   211  	fn := func(t *testing.T, store *store) {
   212  		ctx, cancel := context.WithTimeout(context.Background(), testIOTimeout)
   213  		defer cancel()
   214  		v1, err := store.tsoUpdate(ctx, 100)
   215  		require.NoError(t, err)
   216  		assert.Equal(t, uint64(1), v1)
   217  		v2, err := store.tsoUpdate(ctx, 1000)
   218  		require.NoError(t, err)
   219  		assert.Equal(t, uint64(101), v2)
   220  	}
   221  	runStoreTest(t, fn)
   222  }
   223  
   224  func TestTruncateLog(t *testing.T) {
   225  	fn := func(t *testing.T, store *store) {
   226  		ctx, cancel := context.WithTimeout(context.Background(), testIOTimeout)
   227  		defer cancel()
   228  		assert.NoError(t, store.getOrExtendTNLease(ctx, 1, 100))
   229  		cmd := getTestUserEntry()
   230  		_, err := store.append(ctx, 1, cmd)
   231  		assert.NoError(t, err)
   232  		assert.NoError(t, store.truncateLog(ctx, 1, 4))
   233  		err = store.truncateLog(ctx, 1, 3)
   234  		assert.True(t, moerr.IsMoErrCode(err, moerr.ErrInvalidTruncateLsn))
   235  	}
   236  	runStoreTest(t, fn)
   237  }
   238  
   239  func TestGetTruncatedIndex(t *testing.T) {
   240  	fn := func(t *testing.T, store *store) {
   241  		ctx, cancel := context.WithTimeout(context.Background(), testIOTimeout)
   242  		defer cancel()
   243  		index, err := store.getTruncatedLsn(ctx, 1)
   244  		assert.Equal(t, uint64(0), index)
   245  		assert.NoError(t, err)
   246  		assert.NoError(t, store.getOrExtendTNLease(ctx, 1, 100))
   247  		cmd := getTestUserEntry()
   248  		_, err = store.append(ctx, 1, cmd)
   249  		assert.NoError(t, err)
   250  		assert.NoError(t, store.truncateLog(ctx, 1, 4))
   251  		index, err = store.getTruncatedLsn(ctx, 1)
   252  		assert.Equal(t, uint64(4), index)
   253  		assert.NoError(t, err)
   254  	}
   255  	runStoreTest(t, fn)
   256  }
   257  
   258  func TestQueryLog(t *testing.T) {
   259  	fn := func(t *testing.T, store *store) {
   260  		ctx, cancel := context.WithTimeout(context.Background(), testIOTimeout)
   261  		defer cancel()
   262  		assert.NoError(t, store.getOrExtendTNLease(ctx, 1, 100))
   263  		cmd := getTestUserEntry()
   264  		_, err := store.append(ctx, 1, cmd)
   265  		assert.NoError(t, err)
   266  		entries, lsn, err := store.queryLog(ctx, 1, 4, math.MaxUint64)
   267  		assert.NoError(t, err)
   268  		assert.Equal(t, 1, len(entries))
   269  		assert.Equal(t, uint64(4), lsn)
   270  		assert.Equal(t, entries[0].Data, cmd)
   271  		// leaseholder ID update cmd at entry index 3
   272  		entries, lsn, err = store.queryLog(ctx, 1, 3, math.MaxUint64)
   273  		assert.NoError(t, err)
   274  		assert.Equal(t, 2, len(entries))
   275  		assert.Equal(t, uint64(3), lsn)
   276  		assert.Equal(t, cmd, entries[1].Data)
   277  		assert.Equal(t, pb.LeaseUpdate, entries[0].Type)
   278  		assert.Equal(t, pb.UserRecord, entries[1].Type)
   279  
   280  		// size limited
   281  		_, err = store.append(ctx, 1, cmd)
   282  		assert.NoError(t, err)
   283  		entries, lsn, err = store.queryLog(ctx, 1, 4, 1)
   284  		assert.NoError(t, err)
   285  		assert.Equal(t, 1, len(entries))
   286  		assert.Equal(t, uint64(5), lsn)
   287  		assert.Equal(t, entries[0].Data, cmd)
   288  		// more log available
   289  		entries, lsn, err = store.queryLog(ctx, 1, 5, 1)
   290  		assert.NoError(t, err)
   291  		assert.Equal(t, 1, len(entries))
   292  		assert.Equal(t, uint64(5), lsn)
   293  		assert.Equal(t, entries[0].Data, cmd)
   294  	}
   295  	runStoreTest(t, fn)
   296  }
   297  
   298  func proceedHAKeeperToRunning(t *testing.T, store *store) {
   299  	state, err := store.getCheckerState()
   300  	assert.NoError(t, err)
   301  	assert.Equal(t, pb.HAKeeperCreated, state.State)
   302  
   303  	nextIDByKey := map[string]uint64{"a": 1, "b": 2}
   304  	err = store.setInitialClusterInfo(1, 1, 1, hakeeper.K8SIDRangeEnd+10, nextIDByKey)
   305  	assert.NoError(t, err)
   306  
   307  	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   308  	defer cancel()
   309  
   310  	hb := store.getHeartbeatMessage()
   311  	_, err = store.addLogStoreHeartbeat(ctx, hb)
   312  	assert.NoError(t, err)
   313  
   314  	state, err = store.getCheckerState()
   315  	assert.NoError(t, err)
   316  	assert.Equal(t, pb.HAKeeperBootstrapping, state.State)
   317  	assert.Equal(t, hakeeper.K8SIDRangeEnd+10, state.NextId)
   318  	assert.Equal(t, nextIDByKey, state.NextIDByKey)
   319  
   320  	_, term, err := store.isLeaderHAKeeper()
   321  	assert.NoError(t, err)
   322  
   323  	store.bootstrap(term, state)
   324  	state, err = store.getCheckerState()
   325  
   326  	assert.NoError(t, err)
   327  	assert.Equal(t, pb.HAKeeperBootstrapCommandsReceived, state.State)
   328  
   329  	cmd, err := store.getCommandBatch(ctx, store.id())
   330  	require.NoError(t, err)
   331  	require.Equal(t, 1, len(cmd.Commands))
   332  	assert.True(t, cmd.Commands[0].Bootstrapping)
   333  
   334  	// handle startReplica to make sure logHeartbeat msg contain shards info,
   335  	// which used in store.checkBootstrap to determine if all log shards ready
   336  	service := &Service{store: store}
   337  	service.handleStartReplica(cmd.Commands[0])
   338  
   339  	for state.State != pb.HAKeeperRunning && store.bootstrapCheckCycles > 0 {
   340  		func() {
   341  			ctx, cancel = context.WithTimeout(context.Background(), time.Second)
   342  			defer cancel()
   343  
   344  			_, err = store.addLogStoreHeartbeat(ctx, store.getHeartbeatMessage())
   345  			assert.NoError(t, err)
   346  
   347  			store.checkBootstrap(state)
   348  			state, err = store.getCheckerState()
   349  			assert.NoError(t, err)
   350  
   351  			time.Sleep(time.Millisecond * 100)
   352  		}()
   353  	}
   354  
   355  	assert.Equal(t, pb.HAKeeperRunning, state.State)
   356  }
   357  
   358  // test if the tickerForTaskSchedule can push forward these routine
   359  func TestTickerForTaskSchedule(t *testing.T) {
   360  	fn := func(t *testing.T, store *store, taskService taskservice.TaskService) {
   361  
   362  		tickerCxt, tickerCancel := context.WithCancel(context.Background())
   363  		defer tickerCancel()
   364  
   365  		//do task schedule background
   366  		go store.tickerForTaskSchedule(tickerCxt, time.Millisecond*10)
   367  
   368  		// making hakeeper state proceeds to running before test task schedule
   369  		proceedHAKeeperToRunning(t, store)
   370  		ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   371  		defer cancel()
   372  
   373  		err := taskService.CreateAsyncTask(ctx, task.TaskMetadata{ID: "1234"})
   374  		assert.NoError(t, err)
   375  
   376  		cnUUID := uuid.New().String()
   377  		cmd := pb.CNStoreHeartbeat{UUID: cnUUID}
   378  		_, err = store.addCNStoreHeartbeat(ctx, cmd)
   379  		assert.NoError(t, err)
   380  
   381  		ticker := time.NewTicker(time.Millisecond * 100)
   382  		defer ticker.Stop()
   383  		timeout := time.NewTimer(time.Second * 10)
   384  		defer timeout.Stop()
   385  
   386  		for {
   387  			select {
   388  			case <-ticker.C:
   389  				tasks, err := taskService.QueryAsyncTask(ctx, taskservice.WithTaskRunnerCond(taskservice.EQ, cnUUID))
   390  				assert.NoError(t, err)
   391  				if len(tasks) == 1 {
   392  					return
   393  				}
   394  
   395  			case <-timeout.C:
   396  				panic("task schedule timeout")
   397  			}
   398  		}
   399  
   400  	}
   401  
   402  	runHakeeperTaskServiceTest(t, fn)
   403  }
   404  
   405  func TestHAKeeperTick(t *testing.T) {
   406  	fn := func(t *testing.T, store *store) {
   407  		peers := make(map[uint64]dragonboat.Target)
   408  		peers[1] = store.id()
   409  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   410  		store.hakeeperTick()
   411  	}
   412  	runStoreTest(t, fn)
   413  }
   414  
   415  func TestAddScheduleCommands(t *testing.T) {
   416  	fn := func(t *testing.T, store *store) {
   417  		peers := make(map[uint64]dragonboat.Target)
   418  		peers[1] = store.id()
   419  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   420  		ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   421  		defer cancel()
   422  		sc1 := pb.ScheduleCommand{
   423  			UUID: "uuid1",
   424  			ConfigChange: &pb.ConfigChange{
   425  				Replica: pb.Replica{
   426  					ShardID: 1,
   427  				},
   428  			},
   429  		}
   430  		sc2 := pb.ScheduleCommand{
   431  			UUID: "uuid2",
   432  			ConfigChange: &pb.ConfigChange{
   433  				Replica: pb.Replica{
   434  					ShardID: 2,
   435  				},
   436  			},
   437  		}
   438  		sc3 := pb.ScheduleCommand{
   439  			UUID: "uuid1",
   440  			ConfigChange: &pb.ConfigChange{
   441  				Replica: pb.Replica{
   442  					ShardID: 3,
   443  				},
   444  			},
   445  		}
   446  		require.NoError(t,
   447  			store.addScheduleCommands(ctx, 1, []pb.ScheduleCommand{sc1, sc2, sc3}))
   448  		cb, err := store.getCommandBatch(ctx, "uuid1")
   449  		require.NoError(t, err)
   450  		assert.Equal(t, []pb.ScheduleCommand{sc1, sc3}, cb.Commands)
   451  		cb, err = store.getCommandBatch(ctx, "uuid2")
   452  		require.NoError(t, err)
   453  		assert.Equal(t, []pb.ScheduleCommand{sc2}, cb.Commands)
   454  	}
   455  	runStoreTest(t, fn)
   456  }
   457  
   458  func TestGetHeartbeatMessage(t *testing.T) {
   459  	fn := func(t *testing.T, store *store) {
   460  		peers := make(map[uint64]dragonboat.Target)
   461  		peers[1] = store.id()
   462  		assert.NoError(t, store.startReplica(10, 1, peers, false))
   463  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   464  
   465  		for i := 0; i < 5000; i++ {
   466  			m := store.getHeartbeatMessage()
   467  			if len(m.Replicas) != 3 {
   468  				time.Sleep(time.Millisecond)
   469  			} else {
   470  				return
   471  			}
   472  		}
   473  		t.Fatalf("failed to get all replicas details from heartbeat message")
   474  	}
   475  	runStoreTest(t, fn)
   476  }
   477  
   478  func TestAddHeartbeat(t *testing.T) {
   479  	fn := func(t *testing.T, store *store) {
   480  		peers := make(map[uint64]dragonboat.Target)
   481  		peers[1] = store.id()
   482  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   483  
   484  		m := store.getHeartbeatMessage()
   485  		ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   486  		defer cancel()
   487  		_, err := store.addLogStoreHeartbeat(ctx, m)
   488  		assert.NoError(t, err)
   489  
   490  		cnMsg := pb.CNStoreHeartbeat{
   491  			UUID: store.id(),
   492  		}
   493  		_, err = store.addCNStoreHeartbeat(ctx, cnMsg)
   494  		assert.NoError(t, err)
   495  
   496  		tnMsg := pb.TNStoreHeartbeat{
   497  			UUID:   store.id(),
   498  			Shards: make([]pb.TNShardInfo, 0),
   499  		}
   500  		tnMsg.Shards = append(tnMsg.Shards, pb.TNShardInfo{ShardID: 2, ReplicaID: 3})
   501  		_, err = store.addTNStoreHeartbeat(ctx, tnMsg)
   502  		assert.NoError(t, err)
   503  
   504  		proxyMsg := pb.ProxyHeartbeat{
   505  			UUID: store.id(),
   506  		}
   507  		_, err = store.addProxyHeartbeat(ctx, proxyMsg)
   508  		assert.NoError(t, err)
   509  	}
   510  	runStoreTest(t, fn)
   511  }
   512  
   513  func TestAddReplicaRejectedForInvalidCCI(t *testing.T) {
   514  	fn := func(t *testing.T, store *store) {
   515  		err := store.addReplica(1, 100, uuid.New().String(), 0)
   516  		assert.Equal(t, dragonboat.ErrRejected, err)
   517  	}
   518  	runStoreTest(t, fn)
   519  }
   520  
   521  func TestAddReplica(t *testing.T) {
   522  	fn := func(t *testing.T, store *store) {
   523  		for {
   524  			_, _, ok, err := store.nh.GetLeaderID(1)
   525  			require.NoError(t, err)
   526  			if ok {
   527  				break
   528  			}
   529  			time.Sleep(time.Millisecond)
   530  		}
   531  		ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   532  		defer cancel()
   533  		m, err := store.nh.SyncGetShardMembership(ctx, 1)
   534  		require.NoError(t, err)
   535  		err = store.addReplica(1, 100, uuid.New().String(), m.ConfigChangeID)
   536  		assert.NoError(t, err)
   537  		hb := store.getHeartbeatMessage()
   538  		assert.Equal(t, 2, len(hb.Replicas[0].Replicas))
   539  	}
   540  	runStoreTest(t, fn)
   541  }
   542  
   543  func getTestStores() (*store, *store, error) {
   544  	cfg1 := DefaultConfig()
   545  	cfg1.UUID = uuid.NewString()
   546  	cfg1.FS = vfs.NewStrictMem()
   547  	cfg1.DeploymentID = 1
   548  	cfg1.RTTMillisecond = 5
   549  	cfg1.DataDir = "data-1"
   550  	cfg1.LogServicePort = 9001
   551  	cfg1.RaftPort = 9002
   552  	cfg1.GossipPort = 9011
   553  	cfg1.GossipSeedAddresses = []string{"127.0.0.1:9011", "127.0.0.1:9012"}
   554  	store1, err := newLogStore(cfg1, nil, runtime.DefaultRuntime())
   555  	if err != nil {
   556  		return nil, nil, err
   557  	}
   558  	cfg2 := DefaultConfig()
   559  	cfg2.UUID = uuid.NewString()
   560  	cfg2.FS = vfs.NewStrictMem()
   561  	cfg2.DeploymentID = 1
   562  	cfg2.RTTMillisecond = 5
   563  	cfg2.DataDir = "data-1"
   564  	cfg2.LogServicePort = 9006
   565  	cfg2.RaftPort = 9007
   566  	cfg2.GossipPort = 9012
   567  	cfg2.GossipSeedAddresses = []string{"127.0.0.1:9011", "127.0.0.1:9012"}
   568  	store2, err := newLogStore(cfg2, nil, runtime.DefaultRuntime())
   569  	if err != nil {
   570  		return nil, nil, err
   571  	}
   572  
   573  	peers1 := make(map[uint64]dragonboat.Target)
   574  	peers1[1] = store1.nh.ID()
   575  	peers1[2] = store2.nh.ID()
   576  	if err := store1.startReplica(1, 1, peers1, false); err != nil {
   577  		return nil, nil, err
   578  	}
   579  	peers2 := make(map[uint64]dragonboat.Target)
   580  	peers2[1] = store1.nh.ID()
   581  	peers2[2] = store2.nh.ID()
   582  	if err := store2.startReplica(1, 2, peers2, false); err != nil {
   583  		return nil, nil, err
   584  	}
   585  
   586  	for i := 0; i <= 30000; i++ {
   587  		leaderID, _, ok, err := store1.nh.GetLeaderID(1)
   588  		if err != nil {
   589  			return nil, nil, err
   590  		}
   591  		if ok && leaderID == 1 {
   592  			break
   593  		}
   594  		if ok && leaderID != 1 {
   595  			if err := store1.requestLeaderTransfer(1, 1); err != nil {
   596  				runtime.DefaultRuntime().Logger().Error("failed to transfer leader")
   597  			}
   598  		}
   599  		time.Sleep(time.Millisecond)
   600  		if i == 30000 {
   601  			panic("failed to have leader elected in 30 seconds")
   602  		}
   603  	}
   604  	return store1, store2, nil
   605  }
   606  
   607  func TestRemoveReplica(t *testing.T) {
   608  	store1, store2, err := getTestStores()
   609  	require.NoError(t, err)
   610  	defer func() {
   611  		require.NoError(t, store1.close())
   612  		require.NoError(t, store2.close())
   613  	}()
   614  	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   615  	defer cancel()
   616  	for {
   617  		m, err := store1.nh.SyncGetShardMembership(ctx, 1)
   618  		if err == dragonboat.ErrShardNotReady {
   619  			time.Sleep(time.Millisecond)
   620  			continue
   621  		}
   622  		require.NoError(t, err)
   623  		require.NoError(t, store1.removeReplica(1, 2, m.ConfigChangeID))
   624  		return
   625  	}
   626  }
   627  
   628  func TestStopReplicaCanResetHAKeeperReplicaID(t *testing.T) {
   629  	fn := func(t *testing.T, store *store) {
   630  		peers := make(map[uint64]dragonboat.Target)
   631  		peers[1] = store.id()
   632  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   633  		assert.Equal(t, uint64(1), atomic.LoadUint64(&store.haKeeperReplicaID))
   634  		assert.NoError(t, store.stopReplica(hakeeper.DefaultHAKeeperShardID, 1))
   635  		assert.Equal(t, uint64(0), atomic.LoadUint64(&store.haKeeperReplicaID))
   636  	}
   637  	runStoreTest(t, fn)
   638  }
   639  
   640  func hasShard(s *store, shardID uint64) bool {
   641  	hb := s.getHeartbeatMessage()
   642  	for _, info := range hb.Replicas {
   643  		if info.ShardID == shardID {
   644  			return true
   645  		}
   646  	}
   647  	return false
   648  }
   649  
   650  func hasReplica(s *store, shardID uint64, replicaID uint64) bool {
   651  	hb := s.getHeartbeatMessage()
   652  	for _, info := range hb.Replicas {
   653  		if info.ShardID == shardID {
   654  			for r := range info.Replicas {
   655  				if r == replicaID {
   656  					return true
   657  				}
   658  			}
   659  		}
   660  	}
   661  	return false
   662  }
   663  
   664  func mustHaveReplica(t *testing.T,
   665  	s *store, shardID uint64, replicaID uint64) {
   666  	for i := 0; i < 100; i++ {
   667  		if hasReplica(s, shardID, replicaID) {
   668  			return
   669  		}
   670  		time.Sleep(10 * time.Millisecond)
   671  	}
   672  	t.Fatalf("failed to locate the replica")
   673  }
   674  
   675  func TestUpdateCNLabel(t *testing.T) {
   676  	fn := func(t *testing.T, store *store) {
   677  		peers := make(map[uint64]dragonboat.Target)
   678  		peers[1] = store.id()
   679  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   680  
   681  		ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
   682  		defer cancel()
   683  
   684  		uuid := "uuid1"
   685  		label := pb.CNStoreLabel{
   686  			UUID: uuid,
   687  			Labels: map[string]metadata.LabelList{
   688  				"account": {Labels: []string{"a", "b"}},
   689  				"role":    {Labels: []string{"1", "2"}},
   690  			},
   691  		}
   692  		err := store.updateCNLabel(ctx, label)
   693  		assert.EqualError(t, err, fmt.Sprintf("internal error: CN [%s] does not exist", uuid))
   694  
   695  		// begin heartbeat to add CN store.
   696  		hb := pb.CNStoreHeartbeat{
   697  			UUID: uuid,
   698  		}
   699  		_, err = store.addCNStoreHeartbeat(ctx, hb)
   700  		assert.NoError(t, err)
   701  
   702  		label = pb.CNStoreLabel{
   703  			UUID: uuid,
   704  			Labels: map[string]metadata.LabelList{
   705  				"account": {Labels: []string{"a", "b"}},
   706  				"role":    {Labels: []string{"1", "2"}},
   707  			},
   708  		}
   709  		err = store.updateCNLabel(ctx, label)
   710  		assert.NoError(t, err)
   711  
   712  		state, err := store.getCheckerState()
   713  		assert.NoError(t, err)
   714  		assert.NotEmpty(t, state)
   715  		info, ok1 := state.CNState.Stores[uuid]
   716  		assert.True(t, ok1)
   717  		labels1, ok2 := info.Labels["account"]
   718  		assert.True(t, ok2)
   719  		assert.Equal(t, labels1.Labels, []string{"a", "b"})
   720  		labels2, ok3 := info.Labels["role"]
   721  		assert.True(t, ok3)
   722  		assert.Equal(t, labels2.Labels, []string{"1", "2"})
   723  
   724  		label = pb.CNStoreLabel{
   725  			UUID: uuid,
   726  			Labels: map[string]metadata.LabelList{
   727  				"account": {Labels: []string{"a", "b"}},
   728  			},
   729  		}
   730  		err = store.updateCNLabel(ctx, label)
   731  		assert.NoError(t, err)
   732  
   733  		state, err = store.getCheckerState()
   734  		assert.NoError(t, err)
   735  		assert.NotEmpty(t, state)
   736  		info, ok1 = state.CNState.Stores[uuid]
   737  		assert.True(t, ok1)
   738  		labels1, ok2 = info.Labels["account"]
   739  		assert.True(t, ok2)
   740  		assert.Equal(t, labels1.Labels, []string{"a", "b"})
   741  		_, ok3 = info.Labels["role"]
   742  		assert.False(t, ok3)
   743  	}
   744  	runStoreTest(t, fn)
   745  }
   746  
   747  func TestUpdateCNWorkState(t *testing.T) {
   748  	fn := func(t *testing.T, store *store) {
   749  		peers := make(map[uint64]dragonboat.Target)
   750  		peers[1] = store.id()
   751  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   752  
   753  		ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
   754  		defer cancel()
   755  
   756  		uuid := "uuid1"
   757  		workState := pb.CNWorkState{
   758  			UUID:  uuid,
   759  			State: metadata.WorkState_Working,
   760  		}
   761  		err := store.updateCNWorkState(ctx, workState)
   762  		assert.EqualError(t, err, fmt.Sprintf("internal error: CN [%s] does not exist", uuid))
   763  
   764  		// begin heartbeat to add CN store.
   765  		hb := pb.CNStoreHeartbeat{
   766  			UUID: uuid,
   767  		}
   768  		_, err = store.addCNStoreHeartbeat(ctx, hb)
   769  		assert.NoError(t, err)
   770  
   771  		err = store.updateCNWorkState(ctx, workState)
   772  		assert.NoError(t, err)
   773  
   774  		state, err := store.getCheckerState()
   775  		assert.NoError(t, err)
   776  		assert.NotEmpty(t, state)
   777  		info, ok1 := state.CNState.Stores[uuid]
   778  		assert.True(t, ok1)
   779  		assert.Equal(t, metadata.WorkState_Working, info.WorkState)
   780  
   781  		workState = pb.CNWorkState{
   782  			UUID:  uuid,
   783  			State: metadata.WorkState_Draining,
   784  		}
   785  		err = store.updateCNWorkState(ctx, workState)
   786  		assert.NoError(t, err)
   787  
   788  		state, err = store.getCheckerState()
   789  		assert.NoError(t, err)
   790  		assert.NotEmpty(t, state)
   791  		info, ok1 = state.CNState.Stores[uuid]
   792  		assert.True(t, ok1)
   793  		assert.Equal(t, metadata.WorkState_Draining, info.WorkState)
   794  
   795  		workState = pb.CNWorkState{
   796  			UUID:  uuid,
   797  			State: metadata.WorkState_Working,
   798  		}
   799  		err = store.updateCNWorkState(ctx, workState)
   800  		assert.NoError(t, err)
   801  
   802  		state, err = store.getCheckerState()
   803  		assert.NoError(t, err)
   804  		assert.NotEmpty(t, state)
   805  		info, ok1 = state.CNState.Stores[uuid]
   806  		assert.True(t, ok1)
   807  		assert.Equal(t, metadata.WorkState_Working, info.WorkState)
   808  	}
   809  	runStoreTest(t, fn)
   810  }
   811  
   812  func TestPatchCNStore(t *testing.T) {
   813  	fn := func(t *testing.T, store *store) {
   814  		peers := make(map[uint64]dragonboat.Target)
   815  		peers[1] = store.id()
   816  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   817  
   818  		ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
   819  		defer cancel()
   820  
   821  		uuid := "uuid1"
   822  		stateLabel := pb.CNStateLabel{
   823  			UUID:  uuid,
   824  			State: metadata.WorkState_Working,
   825  			Labels: map[string]metadata.LabelList{
   826  				"account": {Labels: []string{"a", "b"}},
   827  				"role":    {Labels: []string{"1", "2"}},
   828  			},
   829  		}
   830  		err := store.patchCNStore(ctx, stateLabel)
   831  		assert.EqualError(t, err, fmt.Sprintf("internal error: CN [%s] does not exist", uuid))
   832  
   833  		// begin heartbeat to add CN store.
   834  		hb := pb.CNStoreHeartbeat{
   835  			UUID: uuid,
   836  		}
   837  		_, err = store.addCNStoreHeartbeat(ctx, hb)
   838  		assert.NoError(t, err)
   839  
   840  		err = store.patchCNStore(ctx, stateLabel)
   841  		assert.NoError(t, err)
   842  
   843  		state, err := store.getCheckerState()
   844  		assert.NoError(t, err)
   845  		assert.NotEmpty(t, state)
   846  		info, ok1 := state.CNState.Stores[uuid]
   847  		assert.True(t, ok1)
   848  		assert.Equal(t, metadata.WorkState_Working, info.WorkState)
   849  		labels1, ok2 := info.Labels["account"]
   850  		assert.True(t, ok2)
   851  		assert.Equal(t, labels1.Labels, []string{"a", "b"})
   852  		labels2, ok3 := info.Labels["role"]
   853  		assert.True(t, ok3)
   854  		assert.Equal(t, labels2.Labels, []string{"1", "2"})
   855  
   856  		stateLabel = pb.CNStateLabel{
   857  			UUID:  uuid,
   858  			State: metadata.WorkState_Draining,
   859  		}
   860  		err = store.patchCNStore(ctx, stateLabel)
   861  		assert.NoError(t, err)
   862  
   863  		state, err = store.getCheckerState()
   864  		assert.NoError(t, err)
   865  		assert.NotEmpty(t, state)
   866  		info, ok1 = state.CNState.Stores[uuid]
   867  		assert.True(t, ok1)
   868  		assert.Equal(t, metadata.WorkState_Draining, info.WorkState)
   869  		labels1, ok2 = info.Labels["account"]
   870  		assert.True(t, ok2)
   871  		assert.Equal(t, labels1.Labels, []string{"a", "b"})
   872  		labels2, ok3 = info.Labels["role"]
   873  		assert.True(t, ok3)
   874  		assert.Equal(t, labels2.Labels, []string{"1", "2"})
   875  
   876  		stateLabel = pb.CNStateLabel{
   877  			UUID: uuid,
   878  			Labels: map[string]metadata.LabelList{
   879  				"account": {Labels: []string{"a", "b"}},
   880  			},
   881  		}
   882  		err = store.patchCNStore(ctx, stateLabel)
   883  		assert.NoError(t, err)
   884  
   885  		state, err = store.getCheckerState()
   886  		assert.NoError(t, err)
   887  		assert.NotEmpty(t, state)
   888  		info, ok1 = state.CNState.Stores[uuid]
   889  		assert.True(t, ok1)
   890  		assert.Equal(t, metadata.WorkState_Working, info.WorkState)
   891  		labels1, ok2 = info.Labels["account"]
   892  		assert.True(t, ok2)
   893  		assert.Equal(t, labels1.Labels, []string{"a", "b"})
   894  		_, ok3 = info.Labels["role"]
   895  		assert.False(t, ok3)
   896  	}
   897  	runStoreTest(t, fn)
   898  }
   899  
   900  func TestDeleteCNStore(t *testing.T) {
   901  	fn := func(t *testing.T, store *store) {
   902  		peers := make(map[uint64]dragonboat.Target)
   903  		peers[1] = store.id()
   904  		assert.NoError(t, store.startHAKeeperReplica(1, peers, false))
   905  
   906  		ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
   907  		defer cancel()
   908  
   909  		uuid := "uuid1"
   910  		hb := pb.CNStoreHeartbeat{
   911  			UUID: uuid,
   912  		}
   913  		_, err := store.addCNStoreHeartbeat(ctx, hb)
   914  		assert.NoError(t, err)
   915  		state, err := store.getCheckerState()
   916  		assert.NoError(t, err)
   917  		assert.NotEmpty(t, state)
   918  		assert.Equal(t, 1, len(state.CNState.Stores))
   919  		_, ok := state.CNState.Stores[uuid]
   920  		assert.Equal(t, true, ok)
   921  
   922  		cnStore := pb.DeleteCNStore{
   923  			StoreID: uuid,
   924  		}
   925  		err = store.deleteCNStore(ctx, cnStore)
   926  		assert.NoError(t, err)
   927  
   928  		state, err = store.getCheckerState()
   929  		assert.NoError(t, err)
   930  		assert.NotEmpty(t, state)
   931  		assert.NoError(t, err)
   932  		assert.NotEmpty(t, state)
   933  		assert.Equal(t, 0, len(state.CNState.Stores))
   934  	}
   935  	runStoreTest(t, fn)
   936  }