github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/checkers/coordinator_test.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package checkers 16 17 import ( 18 "fmt" 19 "testing" 20 "time" 21 22 "github.com/matrixorigin/matrixone/pkg/common/runtime" 23 "github.com/matrixorigin/matrixone/pkg/hakeeper" 24 "github.com/matrixorigin/matrixone/pkg/hakeeper/checkers/util" 25 "github.com/matrixorigin/matrixone/pkg/hakeeper/operator" 26 "github.com/matrixorigin/matrixone/pkg/logutil" 27 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 28 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 29 "github.com/stretchr/testify/assert" 30 ) 31 32 func TestMain(m *testing.M) { 33 logutil.SetupMOLogger(&logutil.LogConfig{ 34 Level: "debug", 35 Format: "console", 36 }) 37 38 runtime.SetupProcessLevelRuntime(runtime.NewRuntime(metadata.ServiceType_LOG, "test", logutil.GetGlobalLogger())) 39 m.Run() 40 } 41 42 var expiredTick = uint64(hakeeper.DefaultLogStoreTimeout / time.Second * hakeeper.DefaultTickPerSecond) 43 44 func TestFixExpiredStore(t *testing.T) { 45 cases := []struct { 46 desc string 47 idAlloc *util.TestIDAllocator 48 cluster pb.ClusterInfo 49 tn pb.TNState 50 log pb.LogState 51 currentTick uint64 52 expected []pb.ScheduleCommand 53 }{ 54 { 55 desc: "normal case", 56 idAlloc: util.NewTestIDAllocator(3), 57 cluster: pb.ClusterInfo{ 58 LogShards: []metadata.LogShardRecord{{ 59 ShardID: 1, 60 NumberOfReplicas: 3, 61 }}, 62 }, 63 log: pb.LogState{ 64 Shards: map[uint64]pb.LogShardInfo{1: { 65 ShardID: 1, 66 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 67 Epoch: 1, 68 LeaderID: 1, 69 }}, 70 Stores: map[string]pb.LogStoreInfo{ 71 "a": { 72 Tick: 0, 73 Replicas: []pb.LogReplicaInfo{{ 74 LogShardInfo: pb.LogShardInfo{ 75 ShardID: 1, 76 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 77 Epoch: 1, LeaderID: 1}, 78 ReplicaID: 1}, 79 }}, 80 "b": { 81 Tick: 0, 82 Replicas: []pb.LogReplicaInfo{{ 83 LogShardInfo: pb.LogShardInfo{ 84 ShardID: 1, 85 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 86 Epoch: 1, 87 LeaderID: 1}, 88 ReplicaID: 2, 89 }}, 90 }, 91 "c": { 92 Tick: 0, 93 Replicas: []pb.LogReplicaInfo{{ 94 LogShardInfo: pb.LogShardInfo{ 95 ShardID: 1, 96 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 97 Epoch: 1, 98 LeaderID: 1}, 99 ReplicaID: 3, 100 }}, 101 }, 102 }, 103 }, 104 currentTick: 0, 105 expected: []pb.ScheduleCommand(nil), 106 }, 107 { 108 desc: "store a is expired", 109 idAlloc: util.NewTestIDAllocator(3), 110 cluster: pb.ClusterInfo{ 111 LogShards: []metadata.LogShardRecord{{ 112 ShardID: 1, 113 NumberOfReplicas: 3, 114 }}, 115 }, 116 log: pb.LogState{ 117 Shards: map[uint64]pb.LogShardInfo{1: { 118 ShardID: 1, 119 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 120 Epoch: 1, 121 LeaderID: 1, 122 }}, 123 Stores: map[string]pb.LogStoreInfo{ 124 "a": { 125 Tick: 0, 126 Replicas: []pb.LogReplicaInfo{{ 127 LogShardInfo: pb.LogShardInfo{ 128 ShardID: 1, 129 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 130 Epoch: 1, LeaderID: 1}, 131 ReplicaID: 1}, 132 }}, 133 "b": { 134 Tick: expiredTick, 135 Replicas: []pb.LogReplicaInfo{{ 136 LogShardInfo: pb.LogShardInfo{ 137 ShardID: 1, 138 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 139 Epoch: 1, 140 LeaderID: 1}, 141 ReplicaID: 2, 142 }}, 143 }, 144 "c": { 145 Tick: expiredTick, 146 Replicas: []pb.LogReplicaInfo{{ 147 LogShardInfo: pb.LogShardInfo{ 148 ShardID: 1, 149 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 150 Epoch: 1, 151 LeaderID: 1}, 152 ReplicaID: 3, 153 }}, 154 }, 155 }, 156 }, 157 currentTick: expiredTick + 1, 158 expected: []pb.ScheduleCommand{{ 159 UUID: "b", 160 ConfigChange: &pb.ConfigChange{ 161 Replica: pb.Replica{ 162 UUID: "a", 163 ShardID: 1, 164 ReplicaID: 1, 165 Epoch: 1, 166 }, 167 ChangeType: pb.RemoveReplica, 168 }, 169 ServiceType: pb.LogService, 170 }}, 171 }, 172 { 173 desc: "shard 1 has 2 replicas, which expected to be 3", 174 idAlloc: util.NewTestIDAllocator(3), 175 cluster: pb.ClusterInfo{ 176 LogShards: []metadata.LogShardRecord{{ 177 ShardID: 1, 178 NumberOfReplicas: 3, 179 }}, 180 }, 181 log: pb.LogState{ 182 Shards: map[uint64]pb.LogShardInfo{1: { 183 ShardID: 1, 184 Replicas: map[uint64]string{2: "b", 3: "c"}, 185 Epoch: 1, 186 LeaderID: 1, 187 }}, 188 Stores: map[string]pb.LogStoreInfo{ 189 "a": {Tick: 0, Replicas: []pb.LogReplicaInfo{}}, 190 "b": { 191 Tick: 0, 192 Replicas: []pb.LogReplicaInfo{{ 193 LogShardInfo: pb.LogShardInfo{ 194 ShardID: 1, 195 Replicas: map[uint64]string{2: "b", 3: "c"}, 196 Epoch: 1, 197 LeaderID: 1}, 198 ReplicaID: 2, 199 }}, 200 }, 201 "c": { 202 Tick: 0, 203 Replicas: []pb.LogReplicaInfo{{ 204 LogShardInfo: pb.LogShardInfo{ 205 ShardID: 1, 206 Replicas: map[uint64]string{2: "b", 3: "c"}, 207 Epoch: 1, 208 LeaderID: 1}, 209 ReplicaID: 3, 210 }}, 211 }, 212 }, 213 }, 214 currentTick: 0, 215 expected: []pb.ScheduleCommand{{ 216 UUID: "b", 217 ConfigChange: &pb.ConfigChange{ 218 Replica: pb.Replica{ 219 UUID: "a", 220 ShardID: 1, 221 ReplicaID: 4, 222 Epoch: 1, 223 }, 224 ChangeType: pb.AddReplica, 225 }, 226 ServiceType: pb.LogService, 227 }}, 228 }, 229 { 230 desc: "replica on store a is not started", 231 idAlloc: util.NewTestIDAllocator(3), 232 cluster: pb.ClusterInfo{ 233 LogShards: []metadata.LogShardRecord{{ 234 ShardID: 1, 235 NumberOfReplicas: 3, 236 }}, 237 }, 238 log: pb.LogState{ 239 Shards: map[uint64]pb.LogShardInfo{1: { 240 ShardID: 1, 241 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 242 Epoch: 1, 243 LeaderID: 1, 244 }}, 245 Stores: map[string]pb.LogStoreInfo{ 246 "a": {Tick: 0, Replicas: []pb.LogReplicaInfo{}}, 247 "b": { 248 Tick: 0, 249 Replicas: []pb.LogReplicaInfo{{ 250 LogShardInfo: pb.LogShardInfo{ 251 ShardID: 1, 252 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 253 Epoch: 1, 254 LeaderID: 1}, 255 ReplicaID: 2, 256 }}, 257 }, 258 "c": { 259 Tick: 0, 260 Replicas: []pb.LogReplicaInfo{{ 261 LogShardInfo: pb.LogShardInfo{ 262 ShardID: 1, 263 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 264 Epoch: 1, 265 LeaderID: 1}, 266 ReplicaID: 3, 267 }}, 268 }, 269 }, 270 }, 271 currentTick: 0, 272 expected: []pb.ScheduleCommand{ 273 { 274 UUID: "a", 275 ConfigChange: &pb.ConfigChange{ 276 Replica: pb.Replica{ 277 UUID: "a", 278 ShardID: 1, 279 ReplicaID: 1, 280 }, 281 ChangeType: pb.StartReplica, 282 }, 283 ServiceType: pb.LogService, 284 }, 285 }, 286 }, 287 } 288 289 for i, c := range cases { 290 fmt.Printf("case %v: %s\n", i, c.desc) 291 coordinator := NewCoordinator(hakeeper.Config{}) 292 output := coordinator.Check(c.idAlloc, pb.CheckerState{ 293 Tick: c.currentTick, 294 ClusterInfo: c.cluster, 295 TNState: c.tn, 296 LogState: c.log, 297 }) 298 assert.Equal(t, c.expected, output) 299 } 300 } 301 302 func TestFixZombie(t *testing.T) { 303 cases := []struct { 304 desc string 305 idAlloc *util.TestIDAllocator 306 cluster pb.ClusterInfo 307 tn pb.TNState 308 log pb.LogState 309 tick uint64 310 expected []pb.ScheduleCommand 311 }{ 312 { 313 desc: "replica on store c is a zombie", 314 idAlloc: util.NewTestIDAllocator(3), 315 cluster: pb.ClusterInfo{ 316 LogShards: []metadata.LogShardRecord{{ 317 ShardID: 1, 318 NumberOfReplicas: 3, 319 }}, 320 }, 321 log: pb.LogState{ 322 Shards: map[uint64]pb.LogShardInfo{1: { 323 ShardID: 1, 324 Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"}, 325 Epoch: 2, 326 LeaderID: 1, 327 }}, 328 Stores: map[string]pb.LogStoreInfo{ 329 "a": { 330 Replicas: []pb.LogReplicaInfo{{ 331 LogShardInfo: pb.LogShardInfo{ 332 ShardID: 1, 333 Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"}, 334 Epoch: 2, 335 LeaderID: 1}, 336 ReplicaID: 1}, 337 }}, 338 "b": { 339 Replicas: []pb.LogReplicaInfo{{ 340 LogShardInfo: pb.LogShardInfo{ 341 ShardID: 1, 342 Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"}, 343 Epoch: 2, 344 LeaderID: 1}, 345 ReplicaID: 2, 346 }}, 347 }, 348 "c": { 349 Replicas: []pb.LogReplicaInfo{{ 350 LogShardInfo: pb.LogShardInfo{ 351 ShardID: 1, 352 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 353 Epoch: 1, 354 LeaderID: 1}, 355 ReplicaID: 3, 356 }}, 357 }, 358 "d": { 359 Replicas: []pb.LogReplicaInfo{{ 360 LogShardInfo: pb.LogShardInfo{ 361 ShardID: 1, 362 Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"}, 363 Epoch: 2, 364 LeaderID: 1}, 365 ReplicaID: 3, 366 }}, 367 }, 368 }, 369 }, 370 expected: []pb.ScheduleCommand{ 371 { 372 UUID: "c", 373 ConfigChange: &pb.ConfigChange{ 374 Replica: pb.Replica{ 375 UUID: "c", 376 ShardID: 1, 377 ReplicaID: 3, 378 }, 379 ChangeType: pb.KillZombie, 380 }, 381 ServiceType: pb.LogService, 382 }, 383 }, 384 }, 385 { 386 desc: "store c is expired, thus replicas on it are not zombies.", 387 idAlloc: util.NewTestIDAllocator(3), 388 cluster: pb.ClusterInfo{ 389 LogShards: []metadata.LogShardRecord{{ 390 ShardID: 1, 391 NumberOfReplicas: 3, 392 }}, 393 }, 394 log: pb.LogState{ 395 Shards: map[uint64]pb.LogShardInfo{1: { 396 ShardID: 1, 397 Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"}, 398 Epoch: 2, 399 LeaderID: 1, 400 }}, 401 Stores: map[string]pb.LogStoreInfo{ 402 "a": { 403 Tick: expiredTick + 1, 404 Replicas: []pb.LogReplicaInfo{{ 405 LogShardInfo: pb.LogShardInfo{ 406 ShardID: 1, 407 Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"}, 408 Epoch: 2, 409 LeaderID: 1}, 410 ReplicaID: 1}, 411 }}, 412 "b": { 413 Tick: expiredTick + 1, 414 Replicas: []pb.LogReplicaInfo{{ 415 LogShardInfo: pb.LogShardInfo{ 416 ShardID: 1, 417 Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"}, 418 Epoch: 2, 419 LeaderID: 1}, 420 ReplicaID: 2, 421 }}, 422 }, 423 "c": { 424 Tick: 0, 425 Replicas: []pb.LogReplicaInfo{{ 426 LogShardInfo: pb.LogShardInfo{ 427 ShardID: 1, 428 Replicas: map[uint64]string{1: "a", 2: "b", 3: "c"}, 429 Epoch: 1, 430 LeaderID: 1}, 431 ReplicaID: 3, 432 }}, 433 }, 434 "d": { 435 Tick: expiredTick + 1, 436 Replicas: []pb.LogReplicaInfo{{ 437 LogShardInfo: pb.LogShardInfo{ 438 ShardID: 1, 439 Replicas: map[uint64]string{1: "a", 2: "b", 4: "d"}, 440 Epoch: 2, 441 LeaderID: 1}, 442 ReplicaID: 3, 443 }}, 444 }, 445 }, 446 }, 447 tick: expiredTick + 1, 448 expected: nil, 449 }, 450 } 451 452 for i, c := range cases { 453 fmt.Printf("case %v: %s\n", i, c.desc) 454 coordinator := NewCoordinator(hakeeper.Config{}) 455 output := coordinator.Check(c.idAlloc, pb.CheckerState{ 456 Tick: c.tick, 457 ClusterInfo: c.cluster, 458 TNState: c.tn, 459 LogState: c.log, 460 }) 461 assert.Equal(t, c.expected, output) 462 } 463 } 464 465 func TestOpExpiredAndThenCompleted(t *testing.T) { 466 cluster := pb.ClusterInfo{LogShards: []metadata.LogShardRecord{{ShardID: 1, NumberOfReplicas: 3}}} 467 idAlloc := util.NewTestIDAllocator(2) 468 coordinator := NewCoordinator(hakeeper.Config{}) 469 fn := func(time uint64) uint64 { return time * hakeeper.DefaultTickPerSecond } 470 currentTick := fn(uint64(hakeeper.DefaultLogStoreTimeout / time.Second)) 471 472 replicas := map[uint64]string{1: "a", 2: "b"} 473 logShardInfo := pb.LogShardInfo{ShardID: 1, Replicas: replicas, Epoch: 2, LeaderID: 1} 474 logState := pb.LogState{ 475 Shards: map[uint64]pb.LogShardInfo{1: {ShardID: 1, Replicas: replicas, Epoch: 1, LeaderID: 1}}, 476 Stores: map[string]pb.LogStoreInfo{ 477 "a": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 1}}}, 478 "b": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 2}}}, 479 "c": {Tick: 1}, 480 }, 481 } 482 483 assert.NotNil(t, coordinator.Check(idAlloc, pb.CheckerState{ 484 Tick: currentTick, 485 ClusterInfo: cluster, 486 LogState: logState, 487 })) 488 assert.Nil(t, coordinator.Check(idAlloc, pb.CheckerState{ 489 Tick: currentTick, 490 ClusterInfo: cluster, 491 LogState: logState, 492 })) 493 494 ops := coordinator.OperatorController.GetOperators(1) 495 assert.Equal(t, 1, len(ops)) 496 ops[0].SetStatus(operator.EXPIRED) 497 498 assert.NotNil(t, coordinator.Check(idAlloc, pb.CheckerState{ 499 Tick: currentTick, 500 ClusterInfo: cluster, 501 LogState: logState, 502 })) 503 ops = coordinator.OperatorController.GetOperators(1) 504 assert.Equal(t, 1, len(ops)) 505 506 replicas = map[uint64]string{1: "a", 2: "b", 4: "c"} 507 logShardInfo = pb.LogShardInfo{ShardID: 1, Replicas: replicas, Epoch: 2, LeaderID: 1} 508 logState = pb.LogState{ 509 Shards: map[uint64]pb.LogShardInfo{1: {ShardID: 1, Replicas: replicas, Epoch: 1, LeaderID: 1}}, 510 Stores: map[string]pb.LogStoreInfo{ 511 "a": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 1}}}, 512 "b": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 2}}}, 513 "c": {Tick: 0, Replicas: []pb.LogReplicaInfo{{LogShardInfo: logShardInfo, ReplicaID: 4}}}, 514 }, 515 } 516 517 assert.Nil(t, coordinator.Check(idAlloc, pb.CheckerState{ 518 Tick: currentTick, 519 ClusterInfo: cluster, 520 LogState: logState, 521 })) 522 }