github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/checkers/coordinator_test.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package checkers
    16  
    17  import (
    18  	"fmt"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    23  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    24  	"github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/util"
    25  	"github.com/matrixorigin/matrixone/pkg/hakeeper/operator"
    26  	"github.com/matrixorigin/matrixone/pkg/logutil"
    27  	pb "github.com/matrixorigin/matrixone/pkg/pb/logservice"
    28  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    29  	"github.com/stretchr/testify/assert"
    30  )
    31  
    32  func TestMain(m *testing.M) {
    33  	logutil.SetupMOLogger(&logutil.LogConfig{
    34  		Level:  "debug",
    35  		Format: "console",
    36  	})
    37  
    38  	runtime.SetupProcessLevelRuntime(runtime.NewRuntime(metadata.ServiceType_LOG, "test", logutil.GetGlobalLogger()))
    39  	m.Run()
    40  }
    41  
    42  var expiredTick = uint64(hakeeper.DefaultLogStoreTimeout / time.Second * hakeeper.DefaultTickPerSecond)
    43  
    44  func TestFixExpiredStore(t *testing.T) {
    45  	cases := []struct {
    46  		desc        string
    47  		idAlloc     *util.TestIDAllocator
    48  		cluster     pb.ClusterInfo
    49  		tn          pb.TNState
    50  		log         pb.LogState
    51  		currentTick uint64
    52  		expected    []pb.ScheduleCommand
    53  	}{
    54  		{
    55  			desc:    "normal case",
    56  			idAlloc: util.NewTestIDAllocator(3),
    57  			cluster: pb.ClusterInfo{
    58  				LogShards: []metadata.LogShardRecord{{
    59  					ShardID:          1,
    60  					NumberOfReplicas: 3,
    61  				}},
    62  			},
    63  			log: pb.LogState{
    64  				Shards: map[uint64]pb.LogShardInfo{1: {
    65  					ShardID:  1,
    66  					Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
    67  					Epoch:    1,
    68  					LeaderID: 1,
    69  				}},
    70  				Stores: map[string]pb.LogStoreInfo{
    71  					"a": {
    72  						Tick: 0,
    73  						Replicas: []pb.LogReplicaInfo{{
    74  							LogShardInfo: pb.LogShardInfo{
    75  								ShardID:  1,
    76  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
    77  								Epoch:    1, LeaderID: 1},
    78  							ReplicaID: 1},
    79  						}},
    80  					"b": {
    81  						Tick: 0,
    82  						Replicas: []pb.LogReplicaInfo{{
    83  							LogShardInfo: pb.LogShardInfo{
    84  								ShardID:  1,
    85  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
    86  								Epoch:    1,
    87  								LeaderID: 1},
    88  							ReplicaID: 2,
    89  						}},
    90  					},
    91  					"c": {
    92  						Tick: 0,
    93  						Replicas: []pb.LogReplicaInfo{{
    94  							LogShardInfo: pb.LogShardInfo{
    95  								ShardID:  1,
    96  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
    97  								Epoch:    1,
    98  								LeaderID: 1},
    99  							ReplicaID: 3,
   100  						}},
   101  					},
   102  				},
   103  			},
   104  			currentTick: 0,
   105  			expected:    []pb.ScheduleCommand(nil),
   106  		},
   107  		{
   108  			desc:    "store a is expired",
   109  			idAlloc: util.NewTestIDAllocator(3),
   110  			cluster: pb.ClusterInfo{
   111  				LogShards: []metadata.LogShardRecord{{
   112  					ShardID:          1,
   113  					NumberOfReplicas: 3,
   114  				}},
   115  			},
   116  			log: pb.LogState{
   117  				Shards: map[uint64]pb.LogShardInfo{1: {
   118  					ShardID:  1,
   119  					Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   120  					Epoch:    1,
   121  					LeaderID: 1,
   122  				}},
   123  				Stores: map[string]pb.LogStoreInfo{
   124  					"a": {
   125  						Tick: 0,
   126  						Replicas: []pb.LogReplicaInfo{{
   127  							LogShardInfo: pb.LogShardInfo{
   128  								ShardID:  1,
   129  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   130  								Epoch:    1, LeaderID: 1},
   131  							ReplicaID: 1},
   132  						}},
   133  					"b": {
   134  						Tick: expiredTick,
   135  						Replicas: []pb.LogReplicaInfo{{
   136  							LogShardInfo: pb.LogShardInfo{
   137  								ShardID:  1,
   138  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   139  								Epoch:    1,
   140  								LeaderID: 1},
   141  							ReplicaID: 2,
   142  						}},
   143  					},
   144  					"c": {
   145  						Tick: expiredTick,
   146  						Replicas: []pb.LogReplicaInfo{{
   147  							LogShardInfo: pb.LogShardInfo{
   148  								ShardID:  1,
   149  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   150  								Epoch:    1,
   151  								LeaderID: 1},
   152  							ReplicaID: 3,
   153  						}},
   154  					},
   155  				},
   156  			},
   157  			currentTick: expiredTick + 1,
   158  			expected: []pb.ScheduleCommand{{
   159  				UUID: "b",
   160  				ConfigChange: &pb.ConfigChange{
   161  					Replica: pb.Replica{
   162  						UUID:      "a",
   163  						ShardID:   1,
   164  						ReplicaID: 1,
   165  						Epoch:     1,
   166  					},
   167  					ChangeType: pb.RemoveReplica,
   168  				},
   169  				ServiceType: pb.LogService,
   170  			}},
   171  		},
   172  		{
   173  			desc:    "shard 1 has 2 replicas, which expected to be 3",
   174  			idAlloc: util.NewTestIDAllocator(3),
   175  			cluster: pb.ClusterInfo{
   176  				LogShards: []metadata.LogShardRecord{{
   177  					ShardID:          1,
   178  					NumberOfReplicas: 3,
   179  				}},
   180  			},
   181  			log: pb.LogState{
   182  				Shards: map[uint64]pb.LogShardInfo{1: {
   183  					ShardID:  1,
   184  					Replicas: map[uint64]string{2: "b", 3: "c"},
   185  					Epoch:    1,
   186  					LeaderID: 1,
   187  				}},
   188  				Stores: map[string]pb.LogStoreInfo{
   189  					"a": {Tick: 0, Replicas: []pb.LogReplicaInfo{}},
   190  					"b": {
   191  						Tick: 0,
   192  						Replicas: []pb.LogReplicaInfo{{
   193  							LogShardInfo: pb.LogShardInfo{
   194  								ShardID:  1,
   195  								Replicas: map[uint64]string{2: "b", 3: "c"},
   196  								Epoch:    1,
   197  								LeaderID: 1},
   198  							ReplicaID: 2,
   199  						}},
   200  					},
   201  					"c": {
   202  						Tick: 0,
   203  						Replicas: []pb.LogReplicaInfo{{
   204  							LogShardInfo: pb.LogShardInfo{
   205  								ShardID:  1,
   206  								Replicas: map[uint64]string{2: "b", 3: "c"},
   207  								Epoch:    1,
   208  								LeaderID: 1},
   209  							ReplicaID: 3,
   210  						}},
   211  					},
   212  				},
   213  			},
   214  			currentTick: 0,
   215  			expected: []pb.ScheduleCommand{{
   216  				UUID: "b",
   217  				ConfigChange: &pb.ConfigChange{
   218  					Replica: pb.Replica{
   219  						UUID:      "a",
   220  						ShardID:   1,
   221  						ReplicaID: 4,
   222  						Epoch:     1,
   223  					},
   224  					ChangeType: pb.AddReplica,
   225  				},
   226  				ServiceType: pb.LogService,
   227  			}},
   228  		},
   229  		{
   230  			desc:    "replica on store a is not started",
   231  			idAlloc: util.NewTestIDAllocator(3),
   232  			cluster: pb.ClusterInfo{
   233  				LogShards: []metadata.LogShardRecord{{
   234  					ShardID:          1,
   235  					NumberOfReplicas: 3,
   236  				}},
   237  			},
   238  			log: pb.LogState{
   239  				Shards: map[uint64]pb.LogShardInfo{1: {
   240  					ShardID:  1,
   241  					Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   242  					Epoch:    1,
   243  					LeaderID: 1,
   244  				}},
   245  				Stores: map[string]pb.LogStoreInfo{
   246  					"a": {Tick: 0, Replicas: []pb.LogReplicaInfo{}},
   247  					"b": {
   248  						Tick: 0,
   249  						Replicas: []pb.LogReplicaInfo{{
   250  							LogShardInfo: pb.LogShardInfo{
   251  								ShardID:  1,
   252  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   253  								Epoch:    1,
   254  								LeaderID: 1},
   255  							ReplicaID: 2,
   256  						}},
   257  					},
   258  					"c": {
   259  						Tick: 0,
   260  						Replicas: []pb.LogReplicaInfo{{
   261  							LogShardInfo: pb.LogShardInfo{
   262  								ShardID:  1,
   263  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   264  								Epoch:    1,
   265  								LeaderID: 1},
   266  							ReplicaID: 3,
   267  						}},
   268  					},
   269  				},
   270  			},
   271  			currentTick: 0,
   272  			expected: []pb.ScheduleCommand{
   273  				{
   274  					UUID: "a",
   275  					ConfigChange: &pb.ConfigChange{
   276  						Replica: pb.Replica{
   277  							UUID:      "a",
   278  							ShardID:   1,
   279  							ReplicaID: 1,
   280  						},
   281  						ChangeType: pb.StartReplica,
   282  					},
   283  					ServiceType: pb.LogService,
   284  				},
   285  			},
   286  		},
   287  	}
   288  
   289  	for i, c := range cases {
   290  		fmt.Printf("case %v: %s\n", i, c.desc)
   291  		coordinator := NewCoordinator(hakeeper.Config{})
   292  		output := coordinator.Check(c.idAlloc, pb.CheckerState{
   293  			Tick:        c.currentTick,
   294  			ClusterInfo: c.cluster,
   295  			TNState:     c.tn,
   296  			LogState:    c.log,
   297  		})
   298  		assert.Equal(t, c.expected, output)
   299  	}
   300  }
   301  
   302  func TestFixZombie(t *testing.T) {
   303  	cases := []struct {
   304  		desc     string
   305  		idAlloc  *util.TestIDAllocator
   306  		cluster  pb.ClusterInfo
   307  		tn       pb.TNState
   308  		log      pb.LogState
   309  		tick     uint64
   310  		expected []pb.ScheduleCommand
   311  	}{
   312  		{
   313  			desc:    "replica on store c is a zombie",
   314  			idAlloc: util.NewTestIDAllocator(3),
   315  			cluster: pb.ClusterInfo{
   316  				LogShards: []metadata.LogShardRecord{{
   317  					ShardID:          1,
   318  					NumberOfReplicas: 3,
   319  				}},
   320  			},
   321  			log: pb.LogState{
   322  				Shards: map[uint64]pb.LogShardInfo{1: {
   323  					ShardID:  1,
   324  					Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"},
   325  					Epoch:    2,
   326  					LeaderID: 1,
   327  				}},
   328  				Stores: map[string]pb.LogStoreInfo{
   329  					"a": {
   330  						Replicas: []pb.LogReplicaInfo{{
   331  							LogShardInfo: pb.LogShardInfo{
   332  								ShardID:  1,
   333  								Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"},
   334  								Epoch:    2,
   335  								LeaderID: 1},
   336  							ReplicaID: 1},
   337  						}},
   338  					"b": {
   339  						Replicas: []pb.LogReplicaInfo{{
   340  							LogShardInfo: pb.LogShardInfo{
   341  								ShardID:  1,
   342  								Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"},
   343  								Epoch:    2,
   344  								LeaderID: 1},
   345  							ReplicaID: 2,
   346  						}},
   347  					},
   348  					"c": {
   349  						Replicas: []pb.LogReplicaInfo{{
   350  							LogShardInfo: pb.LogShardInfo{
   351  								ShardID:  1,
   352  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   353  								Epoch:    1,
   354  								LeaderID: 1},
   355  							ReplicaID: 3,
   356  						}},
   357  					},
   358  					"d": {
   359  						Replicas: []pb.LogReplicaInfo{{
   360  							LogShardInfo: pb.LogShardInfo{
   361  								ShardID:  1,
   362  								Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"},
   363  								Epoch:    2,
   364  								LeaderID: 1},
   365  							ReplicaID: 3,
   366  						}},
   367  					},
   368  				},
   369  			},
   370  			expected: []pb.ScheduleCommand{
   371  				{
   372  					UUID: "c",
   373  					ConfigChange: &pb.ConfigChange{
   374  						Replica: pb.Replica{
   375  							UUID:      "c",
   376  							ShardID:   1,
   377  							ReplicaID: 3,
   378  						},
   379  						ChangeType: pb.KillZombie,
   380  					},
   381  					ServiceType: pb.LogService,
   382  				},
   383  			},
   384  		},
   385  		{
   386  			desc:    "store c is expired, thus replicas on it are not zombies.",
   387  			idAlloc: util.NewTestIDAllocator(3),
   388  			cluster: pb.ClusterInfo{
   389  				LogShards: []metadata.LogShardRecord{{
   390  					ShardID:          1,
   391  					NumberOfReplicas: 3,
   392  				}},
   393  			},
   394  			log: pb.LogState{
   395  				Shards: map[uint64]pb.LogShardInfo{1: {
   396  					ShardID:  1,
   397  					Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"},
   398  					Epoch:    2,
   399  					LeaderID: 1,
   400  				}},
   401  				Stores: map[string]pb.LogStoreInfo{
   402  					"a": {
   403  						Tick: expiredTick + 1,
   404  						Replicas: []pb.LogReplicaInfo{{
   405  							LogShardInfo: pb.LogShardInfo{
   406  								ShardID:  1,
   407  								Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"},
   408  								Epoch:    2,
   409  								LeaderID: 1},
   410  							ReplicaID: 1},
   411  						}},
   412  					"b": {
   413  						Tick: expiredTick + 1,
   414  						Replicas: []pb.LogReplicaInfo{{
   415  							LogShardInfo: pb.LogShardInfo{
   416  								ShardID:  1,
   417  								Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"},
   418  								Epoch:    2,
   419  								LeaderID: 1},
   420  							ReplicaID: 2,
   421  						}},
   422  					},
   423  					"c": {
   424  						Tick: 0,
   425  						Replicas: []pb.LogReplicaInfo{{
   426  							LogShardInfo: pb.LogShardInfo{
   427  								ShardID:  1,
   428  								Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"},
   429  								Epoch:    1,
   430  								LeaderID: 1},
   431  							ReplicaID: 3,
   432  						}},
   433  					},
   434  					"d": {
   435  						Tick: expiredTick + 1,
   436  						Replicas: []pb.LogReplicaInfo{{
   437  							LogShardInfo: pb.LogShardInfo{
   438  								ShardID:  1,
   439  								Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"},
   440  								Epoch:    2,
   441  								LeaderID: 1},
   442  							ReplicaID: 3,
   443  						}},
   444  					},
   445  				},
   446  			},
   447  			tick:     expiredTick + 1,
   448  			expected: nil,
   449  		},
   450  	}
   451  
   452  	for i, c := range cases {
   453  		fmt.Printf("case %v: %s\n", i, c.desc)
   454  		coordinator := NewCoordinator(hakeeper.Config{})
   455  		output := coordinator.Check(c.idAlloc, pb.CheckerState{
   456  			Tick:        c.tick,
   457  			ClusterInfo: c.cluster,
   458  			TNState:     c.tn,
   459  			LogState:    c.log,
   460  		})
   461  		assert.Equal(t, c.expected, output)
   462  	}
   463  }
   464  
   465  func TestOpExpiredAndThenCompleted(t *testing.T) {
   466  	cluster := pb.ClusterInfo{LogShards: []metadata.LogShardRecord{{ShardID: 1, NumberOfReplicas: 3}}}
   467  	idAlloc := util.NewTestIDAllocator(2)
   468  	coordinator := NewCoordinator(hakeeper.Config{})
   469  	fn := func(time uint64) uint64 { return time * hakeeper.DefaultTickPerSecond }
   470  	currentTick := fn(uint64(hakeeper.DefaultLogStoreTimeout / time.Second))
   471  
   472  	replicas := map[uint64]string{1: "a", 2: "b"}
   473  	logShardInfo := pb.LogShardInfo{ShardID: 1, Replicas: replicas, Epoch: 2, LeaderID: 1}
   474  	logState := pb.LogState{
   475  		Shards: map[uint64]pb.LogShardInfo{1: {ShardID: 1, Replicas: replicas, Epoch: 1, LeaderID: 1}},
   476  		Stores: map[string]pb.LogStoreInfo{
   477  			"a": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 1}}},
   478  			"b": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 2}}},
   479  			"c": {Tick: 1},
   480  		},
   481  	}
   482  
   483  	assert.NotNil(t, coordinator.Check(idAlloc, pb.CheckerState{
   484  		Tick:        currentTick,
   485  		ClusterInfo: cluster,
   486  		LogState:    logState,
   487  	}))
   488  	assert.Nil(t, coordinator.Check(idAlloc, pb.CheckerState{
   489  		Tick:        currentTick,
   490  		ClusterInfo: cluster,
   491  		LogState:    logState,
   492  	}))
   493  
   494  	ops := coordinator.OperatorController.GetOperators(1)
   495  	assert.Equal(t, 1, len(ops))
   496  	ops[0].SetStatus(operator.EXPIRED)
   497  
   498  	assert.NotNil(t, coordinator.Check(idAlloc, pb.CheckerState{
   499  		Tick:        currentTick,
   500  		ClusterInfo: cluster,
   501  		LogState:    logState,
   502  	}))
   503  	ops = coordinator.OperatorController.GetOperators(1)
   504  	assert.Equal(t, 1, len(ops))
   505  
   506  	replicas = map[uint64]string{1: "a", 2: "b", 4: "c"}
   507  	logShardInfo = pb.LogShardInfo{ShardID: 1, Replicas: replicas, Epoch: 2, LeaderID: 1}
   508  	logState = pb.LogState{
   509  		Shards: map[uint64]pb.LogShardInfo{1: {ShardID: 1, Replicas: replicas, Epoch: 1, LeaderID: 1}},
   510  		Stores: map[string]pb.LogStoreInfo{
   511  			"a": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 1}}},
   512  			"b": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 2}}},
   513  			"c": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 4}}},
   514  		},
   515  	}
   516  
   517  	assert.Nil(t, coordinator.Check(idAlloc, pb.CheckerState{
   518  		Tick:        currentTick,
   519  		ClusterInfo: cluster,
   520  		LogState:    logState,
   521  	}))
   522  }