github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/checkers/dnservice/check_test.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dnservice
    16  
    17  import (
    18  	"fmt"
    19  	"testing"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    22  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    23  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/util"
    24  	"github.com/matrixorigin/matrixone/pkg/hakeeper/operator"
    25  	"github.com/matrixorigin/matrixone/pkg/logutil"
    26  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    27  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    28  	"github.com/stretchr/testify/require"
    29  )
    30  
    31  func TestMain(m *testing.M) {
    32  	logutil.SetupMOLogger(&logutil.LogConfig{
    33  		Level:  "debug",
    34  		Format: "console",
    35  	})
    36  
    37  	runtime.SetupProcessLevelRuntime(runtime.NewRuntime(metadata.ServiceType_LOG, "test", logutil.GetGlobalLogger()))
    38  	m.Run()
    39  }
    40  
    41  func TestExpiredReplicas(t *testing.T) {
    42  	replicaIDs := []uint64{11, 13, 12, 14, 15}
    43  	retFirst := expiredReplicas(mockTnShard(10, nil, replicaIDs))
    44  	retSecond := expiredReplicas(mockTnShard(10, nil, replicaIDs))
    45  
    46  	require.Equal(t, len(retFirst), len(retSecond))
    47  	for i := 0; i < len(retFirst); i++ {
    48  		require.Equal(t, retFirst[i].replicaID, retSecond[i].replicaID)
    49  	}
    50  }
    51  
    52  func TestExtraWorkingReplicas(t *testing.T) {
    53  	workingIDs := []uint64{11, 13, 12, 14, 15}
    54  	shard := mockTnShard(10, workingIDs, nil)
    55  
    56  	extraFirst := extraWorkingReplicas(shard)
    57  	require.Equal(t, 4, len(extraFirst))
    58  
    59  	extraSecond := extraWorkingReplicas(shard)
    60  	require.Equal(t, 4, len(extraSecond))
    61  
    62  	// whether the order is deterministic or not
    63  	for i := 0; i < len(extraFirst); i++ {
    64  		require.Equal(t,
    65  			extraFirst[i].replicaID,
    66  			extraSecond[i].replicaID,
    67  		)
    68  	}
    69  
    70  	// get max replica ID
    71  	maxReplicaID := uint64(0)
    72  	for _, id := range workingIDs {
    73  		if id > maxReplicaID {
    74  			maxReplicaID = id
    75  		}
    76  	}
    77  	// max replica ID not in
    78  	for i := 0; i < len(extraFirst); i++ {
    79  		require.NotEqual(t,
    80  			maxReplicaID,
    81  			extraFirst[i].replicaID,
    82  		)
    83  	}
    84  }
    85  
    86  func TestConsumeLeastSpareStore(t *testing.T) {
    87  	var working []*util.Store
    88  	_, err := consumeLeastSpareStore(working)
    89  	require.Error(t, err)
    90  
    91  	working = []*util.Store{
    92  		util.NewStore("store13", 1, TnStoreCapacity),
    93  		util.NewStore("store12", 1, TnStoreCapacity),
    94  		util.NewStore("store11", 2, TnStoreCapacity),
    95  	}
    96  
    97  	id, err := consumeLeastSpareStore(working)
    98  	require.NoError(t, err)
    99  	require.Equal(t, "store12", id)
   100  
   101  	id, err = consumeLeastSpareStore(working)
   102  	require.NoError(t, err)
   103  	require.Equal(t, "store13", id)
   104  
   105  	id, err = consumeLeastSpareStore(working)
   106  	require.NoError(t, err)
   107  	require.Equal(t, "store11", id)
   108  }
   109  
   110  func TestCheckShard(t *testing.T) {
   111  	// normal running cluster
   112  	{
   113  		nextReplicaID := uint64(100)
   114  		enough := true
   115  		idAlloc := newMockIDAllocator(nextReplicaID, enough)
   116  		mapper := mockShardMapper()
   117  
   118  		workingStores := []*util.Store{
   119  			util.NewStore("store1", 2, TnStoreCapacity),
   120  			util.NewStore("store2", 3, TnStoreCapacity),
   121  			util.NewStore("store3", 4, TnStoreCapacity),
   122  		}
   123  
   124  		shardID := uint64(10)
   125  		shard := newTnShard(10)
   126  
   127  		// register an expired replica => should add a new replica
   128  		shard.register(newReplica(11, shardID, "store11"), true)
   129  		steps := checkShard(shard, mapper, workingStores, idAlloc)
   130  		require.Equal(t, 1, len(steps))
   131  		add, ok := (steps[0]).(operator.AddTnReplica)
   132  		require.True(t, ok)
   133  		require.Equal(t, nextReplicaID, add.ReplicaID)
   134  		require.Equal(t, shardID, add.ShardID)
   135  		require.Equal(t, "store1", add.StoreID)
   136  
   137  		// register a working replica => no more step
   138  		shard.register(newReplica(12, shardID, "store12"), false)
   139  		steps = checkShard(shard, mapper, workingStores, idAlloc)
   140  		require.Equal(t, 0, len(steps))
   141  
   142  		// register another working replica => should remove extra replicas
   143  		shard.register(newReplica(13, shardID, "store13"), false)
   144  		steps = checkShard(shard, mapper, workingStores, idAlloc)
   145  		require.Equal(t, 1, len(steps))
   146  		remove, ok := (steps[0]).(operator.RemoveTnReplica)
   147  		require.True(t, ok)
   148  		require.Equal(t, uint64(12), remove.ReplicaID)
   149  		require.Equal(t, shardID, remove.ShardID)
   150  		require.Equal(t, "store12", remove.StoreID)
   151  	}
   152  
   153  	{
   154  		// ID exhausted temporarily
   155  		enough := false
   156  		idAlloc := newMockIDAllocator(0, enough)
   157  		mapper := mockShardMapper()
   158  
   159  		workingStores := []*util.Store{
   160  			util.NewStore("store1", 2, TnStoreCapacity),
   161  			util.NewStore("store2", 3, TnStoreCapacity),
   162  			util.NewStore("store3", 4, TnStoreCapacity),
   163  		}
   164  
   165  		anotherShard := uint64(100)
   166  		// register another expired replica, should add a new replica
   167  		shard := mockTnShard(anotherShard, nil, []uint64{101})
   168  		steps := checkShard(shard, mapper, workingStores, idAlloc)
   169  		require.Equal(t, 0, len(steps))
   170  	}
   171  }
   172  
   173  func mockTnShard(
   174  	shardID uint64, workingReplicas, expiredReplica []uint64,
   175  ) *tnShard {
   176  	shard := newTnShard(shardID)
   177  
   178  	// register working replicas
   179  	for i, replicaID := range workingReplicas {
   180  		replica := newReplica(
   181  			replicaID, shardID,
   182  			fmt.Sprintf("store%d", i),
   183  		)
   184  		shard.register(replica, false)
   185  	}
   186  
   187  	// register expired replicas
   188  	for i, replicaID := range expiredReplica {
   189  		replica := newReplica(
   190  			replicaID, shardID,
   191  			fmt.Sprintf("store%d", i+len(workingReplicas)),
   192  		)
   193  		shard.register(replica, true)
   194  	}
   195  
   196  	return shard
   197  }
   198  
   199  func TestCheck(t *testing.T) {
   200  	// clear all records, or other test would fail
   201  	defer func() {
   202  		waitingShards.clear()
   203  	}()
   204  
   205  	staleTick := uint64(10)
   206  	// construct current tick in order to make heartbeat tick expired
   207  	config := hakeeper.Config{}
   208  	config.Fill()
   209  	currTick := config.ExpiredTick(staleTick, config.TNStoreTimeout) + 1
   210  
   211  	enough := true
   212  	newReplicaID := uint64(100)
   213  	idAlloc := newMockIDAllocator(newReplicaID, enough)
   214  
   215  	// 1. no working tn stores
   216  	{
   217  		tnState := pb.TNState{
   218  			Stores: map[string]pb.TNStoreInfo{
   219  				"expired1": {
   220  					Tick: staleTick,
   221  					Shards: []pb.TNShardInfo{
   222  						mockTnShardInfo(10, 12),
   223  					},
   224  				},
   225  				"expired2": {
   226  					Tick: staleTick,
   227  					Shards: []pb.TNShardInfo{
   228  						mockTnShardInfo(11, 13),
   229  					},
   230  				},
   231  			},
   232  		}
   233  
   234  		clusterInfo := mockClusterInfo(10, 11)
   235  
   236  		steps := Check(idAlloc, config, clusterInfo, tnState, pb.TaskTableUser{}, currTick)
   237  		require.Equal(t, len(steps), 0)
   238  	}
   239  
   240  	// 2. running cluster
   241  	{
   242  		tnState := pb.TNState{
   243  			Stores: map[string]pb.TNStoreInfo{
   244  				"expired1": {
   245  					Tick: staleTick,
   246  					Shards: []pb.TNShardInfo{
   247  						mockTnShardInfo(10, 11),
   248  						mockTnShardInfo(14, 17),
   249  					},
   250  				},
   251  				"working1": {
   252  					Tick: currTick,
   253  					Shards: []pb.TNShardInfo{
   254  						mockTnShardInfo(12, 13),
   255  					},
   256  				},
   257  				"working2": {
   258  					Tick: currTick,
   259  					Shards: []pb.TNShardInfo{
   260  						mockTnShardInfo(14, 15),
   261  						mockTnShardInfo(12, 16),
   262  					},
   263  				},
   264  				"working3": {
   265  					Tick: currTick,
   266  					Shards: []pb.TNShardInfo{
   267  						mockTnShardInfo(12, 18),
   268  					},
   269  				},
   270  			},
   271  		}
   272  
   273  		// all shards were reported
   274  		clusterInfo := mockClusterInfo(10, 12, 14)
   275  
   276  		// At current tick, shard 10, 12, 14:
   277  		//  10 - add replica
   278  		//  12 - remove two extra replica (16, 13)
   279  		//  14 - no command
   280  		operators := Check(idAlloc, config, clusterInfo, tnState, pb.TaskTableUser{}, currTick)
   281  		require.Equal(t, 2, len(operators))
   282  
   283  		// shard 10 - single operator step
   284  		op := operators[0]
   285  		require.Equal(t, op.ShardID(), uint64(10))
   286  		steps := op.OpSteps()
   287  		require.Equal(t, len(steps), 1)
   288  		add, ok := steps[0].(operator.AddTnReplica)
   289  		require.True(t, ok)
   290  		require.Equal(t, add.StoreID, "working1")
   291  
   292  		// shard 12 - two operator steps
   293  		op = operators[1]
   294  		require.Equal(t, op.ShardID(), uint64(12))
   295  		steps = op.OpSteps()
   296  		require.Equal(t, len(steps), 2)
   297  		remove, ok := steps[0].(operator.RemoveTnReplica)
   298  		require.True(t, ok)
   299  		require.Equal(t, remove.StoreID, "working1")
   300  		require.Equal(t, remove.ReplicaID, uint64(13))
   301  		remove, ok = steps[1].(operator.RemoveTnReplica)
   302  		require.True(t, ok)
   303  		require.Equal(t, remove.StoreID, "working2")
   304  		require.Equal(t, remove.ReplicaID, uint64(16))
   305  	}
   306  
   307  	// 3. cluster running with initial shard
   308  	{
   309  
   310  		tnState := pb.TNState{
   311  			Stores: map[string]pb.TNStoreInfo{
   312  				"expired1": {
   313  					Tick: staleTick,
   314  					Shards: []pb.TNShardInfo{
   315  						mockTnShardInfo(14, 17),
   316  					},
   317  				},
   318  				"working1": {
   319  					Tick: currTick,
   320  					Shards: []pb.TNShardInfo{
   321  						mockTnShardInfo(12, 16),
   322  					},
   323  				},
   324  			},
   325  		}
   326  
   327  		// with un-reported shard ID
   328  		unreported := uint64(20)
   329  		cluster := mockClusterInfo(unreported, 12, 14)
   330  
   331  		// at the tick of `staleTick`, shard 14, 20:
   332  		//  14 - no command
   333  		//  20 - add replica after a while
   334  		bootstrapping = false
   335  		operators := Check(idAlloc, config, cluster, tnState, pb.TaskTableUser{}, staleTick)
   336  		require.Equal(t, 0, len(operators))
   337  
   338  		// at the tick of `currTick`, shard 14, 20:
   339  		//  14 - add replica
   340  		//  20 - add replica
   341  		operators = Check(idAlloc, config, cluster, tnState, pb.TaskTableUser{}, currTick)
   342  		require.Equal(t, 2, len(operators))
   343  
   344  		// shard 14 - single operator step
   345  		op := operators[0]
   346  		require.Equal(t, op.ShardID(), uint64(14))
   347  		steps := op.OpSteps()
   348  		require.Equal(t, len(steps), 1)
   349  		add, ok := steps[0].(operator.AddTnReplica)
   350  		require.True(t, ok)
   351  		require.Equal(t, add.StoreID, "working1")
   352  
   353  		// shard 20 - add replica after a while
   354  		op = operators[1]
   355  		require.Equal(t, op.ShardID(), uint64(20))
   356  		steps = op.OpSteps()
   357  		require.Equal(t, len(steps), 1)
   358  		add, ok = steps[0].(operator.AddTnReplica)
   359  		require.True(t, ok)
   360  		require.Equal(t, add.StoreID, "working1")
   361  	}
   362  }
   363  
   364  type mockIDAllocator struct {
   365  	next   uint64
   366  	enough bool
   367  }
   368  
   369  func newMockIDAllocator(next uint64, enough bool) *mockIDAllocator {
   370  	return &mockIDAllocator{
   371  		next:   next,
   372  		enough: enough,
   373  	}
   374  }
   375  
   376  func (idAlloc *mockIDAllocator) Next() (uint64, bool) {
   377  	if !idAlloc.enough {
   378  		return 0, false
   379  	}
   380  
   381  	id := idAlloc.next
   382  	idAlloc.next += 1
   383  	return id, true
   384  }
   385  
   386  func mockClusterInfo(ids ...uint64) pb.ClusterInfo {
   387  	var c pb.ClusterInfo
   388  
   389  	records := make([]metadata.TNShardRecord, 0, len(ids))
   390  	for _, id := range ids {
   391  		records = append(records, metadata.TNShardRecord{
   392  			ShardID:    id,
   393  			LogShardID: id,
   394  		})
   395  	}
   396  	c.TNShards = records
   397  
   398  	return c
   399  }
   400  
   401  type mapper struct{}
   402  
   403  func (m mapper) getLogShardID(shardID uint64) (uint64, error) {
   404  	return shardID, nil
   405  }
   406  
   407  func mockShardMapper() ShardMapper {
   408  	return mapper{}
   409  }