github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/checkers/syshealth/check_test.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package syshealth
    16  
    17  import (
    18  	"testing"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    21  	"github.com/matrixorigin/matrixone/pkg/hakeeper/operator"
    22  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    23  	"github.com/stretchr/testify/require"
    24  )
    25  
    26  func TestShutdownStores(t *testing.T) {
    27  	stores := map[string]struct{}{
    28  		"11": {},
    29  		"12": {},
    30  		"13": {},
    31  	}
    32  
    33  	// operator for log service
    34  	{
    35  		serviceType := pb.LogService
    36  		ops := shutdownStores(serviceType, stores)
    37  		require.Equal(t, len(stores), len(ops))
    38  
    39  		for i := 0; i < len(ops); i++ {
    40  			op := ops[i]
    41  			steps := op.OpSteps()
    42  			require.Equal(t, 1, len(steps))
    43  
    44  			_, ok := steps[0].(operator.StopLogStore)
    45  			require.True(t, ok)
    46  		}
    47  	}
    48  
    49  	// operator for tn service
    50  	{
    51  		serviceType := pb.TNService
    52  		ops := shutdownStores(serviceType, stores)
    53  		require.Equal(t, len(stores), len(ops))
    54  
    55  		for i := 0; i < len(ops); i++ {
    56  			op := ops[i]
    57  			steps := op.OpSteps()
    58  			require.Equal(t, 1, len(steps))
    59  
    60  			_, ok := steps[0].(operator.StopTnStore)
    61  			require.True(t, ok)
    62  		}
    63  	}
    64  }
    65  
    66  func TestParseLogStores(t *testing.T) {
    67  	expiredTick := uint64(10)
    68  	// construct current tick in order to make heartbeat tick expired
    69  	cfg := hakeeper.Config{}
    70  	cfg.Fill()
    71  	currTick := cfg.ExpiredTick(expiredTick, cfg.LogStoreTimeout) + 1
    72  
    73  	logState := pb.LogState{
    74  		Stores: map[string]pb.LogStoreInfo{
    75  			"expired1": mockLogStoreInfo(
    76  				expiredTick,
    77  				mockLogReplicaInfo(10, 100),
    78  				mockLogReplicaInfo(11, 101),
    79  			),
    80  			"working1": mockLogStoreInfo(
    81  				currTick,
    82  				mockLogReplicaInfo(10, 102),
    83  				mockLogReplicaInfo(11, 103),
    84  			),
    85  			"working2": mockLogStoreInfo(
    86  				currTick,
    87  				mockLogReplicaInfo(10, 104),
    88  				mockLogReplicaInfo(11, 105),
    89  			),
    90  		},
    91  	}
    92  
    93  	logStores := parseLogState(cfg, logState, currTick)
    94  	require.Equal(t, len(logState.Stores), logStores.length())
    95  	require.Equal(t, pb.LogService, logStores.serviceType)
    96  	require.Equal(t, 1, len(logStores.expired))
    97  	require.Equal(t, 1, len(logStores.shutdownExpiredStores()))
    98  	require.Equal(t, 2, len(logStores.working))
    99  	require.Equal(t, 2, len(logStores.shutdownWorkingStores()))
   100  }
   101  
   102  func TestParseTnStores(t *testing.T) {
   103  	expiredTick := uint64(10)
   104  	// construct current tick in order to make heartbeat tick expired
   105  	cfg := hakeeper.Config{}
   106  	cfg.Fill()
   107  	currTick := cfg.ExpiredTick(expiredTick, cfg.TNStoreTimeout) + 1
   108  
   109  	tnState := pb.TNState{
   110  		Stores: map[string]pb.TNStoreInfo{
   111  			"expired1": {
   112  				Tick: expiredTick,
   113  				Shards: []pb.TNShardInfo{
   114  					mockTnShardInfo(10, 100),
   115  				},
   116  			},
   117  			"working1": {
   118  				Tick: currTick,
   119  				Shards: []pb.TNShardInfo{
   120  					mockTnShardInfo(11, 101),
   121  				},
   122  			},
   123  			"working2": {
   124  				Tick: currTick,
   125  				Shards: []pb.TNShardInfo{
   126  					mockTnShardInfo(12, 102),
   127  					mockTnShardInfo(13, 103),
   128  				},
   129  			},
   130  		},
   131  	}
   132  
   133  	tnStores := parseTnState(cfg, tnState, currTick)
   134  	require.Equal(t, len(tnState.Stores), tnStores.length())
   135  	require.Equal(t, pb.TNService, tnStores.serviceType)
   136  	require.Equal(t, 1, len(tnStores.expired))
   137  	require.Equal(t, 1, len(tnStores.shutdownExpiredStores()))
   138  	require.Equal(t, 2, len(tnStores.working))
   139  	require.Equal(t, 2, len(tnStores.shutdownWorkingStores()))
   140  }
   141  
   142  func TestLogShard(t *testing.T) {
   143  	// odd shard size
   144  	{
   145  		logShard := newLogShard(10, defaultLogShardSize)
   146  		require.False(t, logShard.healthy())
   147  
   148  		logShard.registerExpiredReplica(100)
   149  		require.False(t, logShard.healthy())
   150  
   151  		logShard.registerWorkingReplica(101)
   152  		require.False(t, logShard.healthy())
   153  
   154  		logShard.registerWorkingReplica(102)
   155  		require.True(t, logShard.healthy())
   156  
   157  		logShard.registerExpiredReplica(103)
   158  		require.True(t, logShard.healthy())
   159  	}
   160  
   161  	// even shard size
   162  	{
   163  		logShard := newLogShard(10, 2)
   164  		require.False(t, logShard.healthy())
   165  
   166  		// register a working replica
   167  		logShard.registerWorkingReplica(104)
   168  		require.False(t, logShard.healthy())
   169  
   170  		// repeated register
   171  		logShard.registerWorkingReplica(104)
   172  		require.False(t, logShard.healthy())
   173  
   174  		// register another working replica
   175  		logShard.registerWorkingReplica(105)
   176  		require.True(t, logShard.healthy())
   177  
   178  		// register a expired replica
   179  		logShard.registerExpiredReplica(100)
   180  		require.True(t, logShard.healthy())
   181  	}
   182  }
   183  
   184  func TestLogShardMap(t *testing.T) {
   185  	expiredStores := map[string]struct{}{
   186  		"expired1": {},
   187  		"expired2": {},
   188  		"expired3": {},
   189  	}
   190  
   191  	workingStores := map[string]struct{}{
   192  		"working1": {},
   193  		"working2": {},
   194  	}
   195  
   196  	tick := uint64(10)
   197  
   198  	logState := pb.LogState{
   199  		Stores: map[string]pb.LogStoreInfo{
   200  			"expired1": mockLogStoreInfo(
   201  				tick,
   202  				mockLogReplicaInfo(10, 100),
   203  				mockLogReplicaInfo(11, 101),
   204  			),
   205  			"expired2": mockLogStoreInfo(
   206  				tick,
   207  				mockLogReplicaInfo(10, 102),
   208  				mockLogReplicaInfo(12, 103),
   209  			),
   210  			"expired3": mockLogStoreInfo(
   211  				tick,
   212  				mockLogReplicaInfo(13, 104),
   213  			),
   214  			"working1": mockLogStoreInfo(
   215  				tick,
   216  				mockLogReplicaInfo(10, 106),
   217  				mockLogReplicaInfo(11, 107),
   218  			),
   219  			"working2": mockLogStoreInfo(
   220  				tick,
   221  				mockLogReplicaInfo(10, 108),
   222  				mockLogReplicaInfo(14, 109),
   223  				mockLogReplicaInfo(11, 110),
   224  			),
   225  		},
   226  	}
   227  
   228  	shards := listExpiredShards(expiredStores, workingStores, logState, pb.ClusterInfo{})
   229  	require.Equal(t, 4, len(shards))
   230  
   231  	require.Equal(t, 2, len(shards[10].expiredReplicas))
   232  	require.Equal(t, 2, len(shards[10].workingReplicas))
   233  
   234  	require.Equal(t, 1, len(shards[11].expiredReplicas))
   235  	require.Equal(t, 2, len(shards[11].workingReplicas))
   236  
   237  	require.Equal(t, 1, len(shards[12].expiredReplicas))
   238  	require.Equal(t, 0, len(shards[12].workingReplicas))
   239  
   240  	require.Equal(t, 1, len(shards[13].expiredReplicas))
   241  	require.Equal(t, 0, len(shards[13].workingReplicas))
   242  }
   243  
   244  func TestCheck(t *testing.T) {
   245  	expiredTick := uint64(10)
   246  	// construct current tick in order to make heartbeat tick expired
   247  	cfg := hakeeper.Config{}
   248  	cfg.Fill()
   249  	currTick := cfg.ExpiredTick(expiredTick, cfg.LogStoreTimeout) + 1
   250  
   251  	// system healthy
   252  	{
   253  		logState := pb.LogState{
   254  			Stores: map[string]pb.LogStoreInfo{
   255  				"expired1": mockLogStoreInfo(
   256  					expiredTick,
   257  					mockLogReplicaInfo(10, 100),
   258  					mockLogReplicaInfo(11, 101),
   259  				),
   260  				"working1": mockLogStoreInfo(
   261  					currTick,
   262  					mockLogReplicaInfo(10, 102),
   263  					mockLogReplicaInfo(11, 103),
   264  				),
   265  				"working2": mockLogStoreInfo(
   266  					currTick,
   267  					mockLogReplicaInfo(10, 104),
   268  					mockLogReplicaInfo(11, 105),
   269  				),
   270  			},
   271  		}
   272  
   273  		tnState := pb.TNState{
   274  			Stores: map[string]pb.TNStoreInfo{
   275  				"expired11": {
   276  					Tick: expiredTick,
   277  					Shards: []pb.TNShardInfo{
   278  						mockTnShardInfo(10, 100),
   279  					},
   280  				},
   281  				"working11": {
   282  					Tick: currTick,
   283  					Shards: []pb.TNShardInfo{
   284  						mockTnShardInfo(11, 101),
   285  					},
   286  				},
   287  				"working12": {
   288  					Tick: currTick,
   289  					Shards: []pb.TNShardInfo{
   290  						mockTnShardInfo(12, 102),
   291  						mockTnShardInfo(13, 103),
   292  					},
   293  				},
   294  			},
   295  		}
   296  
   297  		ops, healthy := Check(cfg, pb.ClusterInfo{}, tnState, logState, currTick)
   298  		require.True(t, healthy)
   299  		require.Equal(t, 0, len(ops))
   300  	}
   301  
   302  	// system unhealthy
   303  	{
   304  		logState := pb.LogState{
   305  			Stores: map[string]pb.LogStoreInfo{
   306  				"expired1": mockLogStoreInfo(
   307  					expiredTick,
   308  					mockLogReplicaInfo(10, 100),
   309  					mockLogReplicaInfo(11, 101),
   310  				),
   311  				"expired2": mockLogStoreInfo(
   312  					expiredTick,
   313  					mockLogReplicaInfo(10, 102),
   314  					mockLogReplicaInfo(11, 103),
   315  				),
   316  				"working2": mockLogStoreInfo(
   317  					currTick,
   318  					mockLogReplicaInfo(10, 104),
   319  					mockLogReplicaInfo(11, 105),
   320  				),
   321  			},
   322  		}
   323  
   324  		tnState := pb.TNState{
   325  			Stores: map[string]pb.TNStoreInfo{
   326  				"expired11": {
   327  					Tick: expiredTick,
   328  					Shards: []pb.TNShardInfo{
   329  						mockTnShardInfo(10, 100),
   330  					},
   331  				},
   332  				"working11": {
   333  					Tick: currTick,
   334  					Shards: []pb.TNShardInfo{
   335  						mockTnShardInfo(11, 101),
   336  					},
   337  				},
   338  				"working12": {
   339  					Tick: currTick,
   340  					Shards: []pb.TNShardInfo{
   341  						mockTnShardInfo(12, 102),
   342  						mockTnShardInfo(13, 103),
   343  					},
   344  				},
   345  			},
   346  		}
   347  
   348  		ops, healthy := Check(cfg, pb.ClusterInfo{}, tnState, logState, currTick)
   349  		require.False(t, healthy)
   350  		require.Equal(t, 6, len(ops))
   351  	}
   352  }
   353  
   354  func mockLogReplicaInfo(shardID, replicaID uint64) pb.LogReplicaInfo {
   355  	info := pb.LogReplicaInfo{
   356  		ReplicaID: replicaID,
   357  	}
   358  	info.ShardID = shardID
   359  	return info
   360  }
   361  
   362  func mockLogStoreInfo(tick uint64, replicas ...pb.LogReplicaInfo) pb.LogStoreInfo {
   363  	return pb.LogStoreInfo{
   364  		Tick:     tick,
   365  		Replicas: replicas,
   366  	}
   367  }
   368  
   369  func mockTnShardInfo(shardID, replicaID uint64) pb.TNShardInfo {
   370  	return pb.TNShardInfo{
   371  		ShardID:   shardID,
   372  		ReplicaID: replicaID,
   373  	}
   374  }