github.com/matrixorigin/matrixone@v1.2.0/pkg/logservice/service_test.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package logservice 16 17 import ( 18 "context" 19 "fmt" 20 "runtime/debug" 21 "sync" 22 "testing" 23 "time" 24 25 "github.com/google/uuid" 26 "github.com/lni/dragonboat/v4" 27 "github.com/lni/goutils/leaktest" 28 "github.com/lni/vfs" 29 "github.com/matrixorigin/matrixone/pkg/common/moerr" 30 "github.com/matrixorigin/matrixone/pkg/common/morpc" 31 hapkg "github.com/matrixorigin/matrixone/pkg/hakeeper" 32 pb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 33 "github.com/matrixorigin/matrixone/pkg/pb/metadata" 34 "github.com/stretchr/testify/assert" 35 "github.com/stretchr/testify/require" 36 ) 37 38 const ( 39 testServicePort = 9000 40 testGossipPort = 9010 41 testServiceAddress = "127.0.0.1:9000" 42 testGossipAddress = "127.0.0.1:9010" 43 dummyGossipSeedAddress = "127.0.0.1:9100" 44 testServerMaxMsgSize = 1000 45 ) 46 47 func getServiceTestConfig() Config { 48 c := DefaultConfig() 49 c.UUID = uuid.New().String() 50 c.RTTMillisecond = 10 51 c.GossipPort = testGossipPort 52 c.GossipSeedAddresses = []string{testGossipAddress, dummyGossipSeedAddress} 53 c.DeploymentID = 1 54 c.FS = vfs.NewStrictMem() 55 c.LogServicePort = testServicePort 56 c.DisableWorkers = true 57 c.UseTeeLogDB = true 58 c.RPC.MaxMessageSize = testServerMaxMsgSize 59 return c 60 } 61 62 func runServiceTest(t *testing.T, 63 hakeeper bool, startReplica bool, fn func(*testing.T, *Service)) { 64 defer leaktest.AfterTest(t)() 65 cfg := getServiceTestConfig() 66 defer vfs.ReportLeakedFD(cfg.FS, t) 67 service, err := NewService(cfg, 68 newFS(), 69 nil, 70 WithBackendFilter(func(msg morpc.Message, backendAddr string) bool { 71 return true 72 }), 73 ) 74 require.NoError(t, err) 75 defer func() { 76 assert.NoError(t, service.Close()) 77 }() 78 79 if startReplica { 80 shardID := hapkg.DefaultHAKeeperShardID 81 peers := make(map[uint64]dragonboat.Target) 82 peers[1] = service.ID() 83 if hakeeper { 84 require.NoError(t, service.store.startHAKeeperReplica(1, peers, false)) 85 } else { 86 shardID = 1 87 require.NoError(t, service.store.startReplica(1, 1, peers, false)) 88 } 89 90 // wait for leader to be elected 91 done := false 92 for i := 0; i < 1000; i++ { 93 _, _, ok, err := service.store.nh.GetLeaderID(shardID) 94 require.NoError(t, err) 95 if ok { 96 done = true 97 break 98 } 99 time.Sleep(10 * time.Millisecond) 100 } 101 require.True(t, done) 102 } 103 104 fn(t, service) 105 } 106 107 func TestNewService(t *testing.T) { 108 defer leaktest.AfterTest(t)() 109 cfg := getServiceTestConfig() 110 defer vfs.ReportLeakedFD(cfg.FS, t) 111 service, err := NewService(cfg, 112 newFS(), 113 nil, 114 WithBackendFilter(func(msg morpc.Message, backendAddr string) bool { 115 return true 116 }), 117 ) 118 require.NoError(t, err) 119 assert.NoError(t, service.Close()) 120 } 121 122 func TestNotSupportCmd(t *testing.T) { 123 fn := func(t *testing.T, s *Service) { 124 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 125 defer cancel() 126 127 req := pb.Request{ 128 Method: 999, 129 } 130 resp, _ := s.handle(ctx, req, nil) 131 assert.Equal(t, uint32(moerr.ErrNotSupported), resp.ErrorCode) 132 } 133 runServiceTest(t, false, true, fn) 134 } 135 136 func TestServiceConnect(t *testing.T) { 137 fn := func(t *testing.T, s *Service) { 138 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 139 defer cancel() 140 141 req := pb.Request{ 142 Method: pb.CONNECT, 143 LogRequest: pb.LogRequest{ 144 ShardID: 1, 145 TNID: 100, 146 }, 147 } 148 resp := s.handleConnect(ctx, req) 149 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 150 } 151 runServiceTest(t, false, true, fn) 152 } 153 154 func TestServiceConnectTimeout(t *testing.T) { 155 fn := func(t *testing.T, s *Service) { 156 ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond) 157 defer cancel() 158 159 req := pb.Request{ 160 Method: pb.CONNECT, 161 LogRequest: pb.LogRequest{ 162 ShardID: 1, 163 TNID: 100, 164 }, 165 } 166 resp := s.handleConnect(ctx, req) 167 assert.Equal(t, uint32(moerr.ErrDragonboatTimeout), resp.ErrorCode) 168 } 169 runServiceTest(t, false, true, fn) 170 } 171 172 func TestServiceConnectRO(t *testing.T) { 173 fn := func(t *testing.T, s *Service) { 174 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 175 defer cancel() 176 177 req := pb.Request{ 178 Method: pb.CONNECT_RO, 179 LogRequest: pb.LogRequest{ 180 ShardID: 1, 181 TNID: 100, 182 }, 183 } 184 resp := s.handleConnect(ctx, req) 185 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 186 } 187 runServiceTest(t, false, true, fn) 188 } 189 190 func getTestAppendCmd(id uint64, data []byte) []byte { 191 cmd := make([]byte, len(data)+headerSize+8) 192 binaryEnc.PutUint32(cmd, uint32(pb.UserEntryUpdate)) 193 binaryEnc.PutUint64(cmd[headerSize:], id) 194 copy(cmd[headerSize+8:], data) 195 return cmd 196 } 197 198 func TestServiceHandleLogHeartbeat(t *testing.T) { 199 fn := func(t *testing.T, s *Service) { 200 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 201 defer cancel() 202 203 req := pb.Request{ 204 Method: pb.LOG_HEARTBEAT, 205 LogHeartbeat: &pb.LogStoreHeartbeat{ 206 UUID: "uuid1", 207 }, 208 } 209 sc1 := pb.ScheduleCommand{ 210 UUID: "uuid1", 211 ConfigChange: &pb.ConfigChange{ 212 Replica: pb.Replica{ 213 ShardID: 1, 214 }, 215 }, 216 } 217 sc2 := pb.ScheduleCommand{ 218 UUID: "uuid2", 219 ConfigChange: &pb.ConfigChange{ 220 Replica: pb.Replica{ 221 ShardID: 2, 222 }, 223 }, 224 } 225 sc3 := pb.ScheduleCommand{ 226 UUID: "uuid1", 227 ConfigChange: &pb.ConfigChange{ 228 Replica: pb.Replica{ 229 ShardID: 3, 230 }, 231 }, 232 } 233 require.NoError(t, 234 s.store.addScheduleCommands(ctx, 1, []pb.ScheduleCommand{sc1, sc2, sc3})) 235 resp := s.handleLogHeartbeat(ctx, req) 236 require.Equal(t, []pb.ScheduleCommand{sc1, sc3}, resp.CommandBatch.Commands) 237 } 238 runServiceTest(t, true, true, fn) 239 } 240 241 func TestServiceHandleCNHeartbeat(t *testing.T) { 242 fn := func(t *testing.T, s *Service) { 243 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 244 defer cancel() 245 246 req := pb.Request{ 247 Method: pb.CN_HEARTBEAT, 248 CNHeartbeat: &pb.CNStoreHeartbeat{ 249 UUID: "uuid1", 250 }, 251 } 252 resp := s.handleCNHeartbeat(ctx, req) 253 assert.Equal(t, &pb.CommandBatch{}, resp.CommandBatch) 254 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 255 } 256 runServiceTest(t, true, true, fn) 257 } 258 259 func TestServiceHandleTNHeartbeat(t *testing.T) { 260 fn := func(t *testing.T, s *Service) { 261 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 262 defer cancel() 263 264 req := pb.Request{ 265 Method: pb.TN_HEARTBEAT, 266 TNHeartbeat: &pb.TNStoreHeartbeat{ 267 UUID: "uuid1", 268 }, 269 } 270 sc1 := pb.ScheduleCommand{ 271 UUID: "uuid1", 272 ConfigChange: &pb.ConfigChange{ 273 Replica: pb.Replica{ 274 ShardID: 1, 275 }, 276 }, 277 } 278 sc2 := pb.ScheduleCommand{ 279 UUID: "uuid2", 280 ConfigChange: &pb.ConfigChange{ 281 Replica: pb.Replica{ 282 ShardID: 2, 283 }, 284 }, 285 } 286 sc3 := pb.ScheduleCommand{ 287 UUID: "uuid1", 288 ConfigChange: &pb.ConfigChange{ 289 Replica: pb.Replica{ 290 ShardID: 3, 291 }, 292 }, 293 } 294 require.NoError(t, 295 s.store.addScheduleCommands(ctx, 1, []pb.ScheduleCommand{sc1, sc2, sc3})) 296 resp := s.handleTNHeartbeat(ctx, req) 297 require.Equal(t, []pb.ScheduleCommand{sc1, sc3}, resp.CommandBatch.Commands) 298 } 299 runServiceTest(t, true, true, fn) 300 } 301 302 func TestServiceHandleAppend(t *testing.T) { 303 fn := func(t *testing.T, s *Service) { 304 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 305 defer cancel() 306 307 req := pb.Request{ 308 Method: pb.CONNECT_RO, 309 LogRequest: pb.LogRequest{ 310 ShardID: 1, 311 TNID: 100, 312 }, 313 } 314 resp := s.handleConnect(ctx, req) 315 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 316 317 data := make([]byte, 8) 318 cmd := getTestAppendCmd(req.LogRequest.TNID, data) 319 req = pb.Request{ 320 Method: pb.APPEND, 321 LogRequest: pb.LogRequest{ 322 ShardID: 1, 323 }, 324 } 325 resp = s.handleAppend(ctx, req, cmd) 326 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 327 assert.Equal(t, uint64(4), resp.LogResponse.Lsn) 328 } 329 runServiceTest(t, false, true, fn) 330 } 331 332 func TestServiceHandleAppendWhenNotBeingTheLeaseHolder(t *testing.T) { 333 fn := func(t *testing.T, s *Service) { 334 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 335 defer cancel() 336 337 req := pb.Request{ 338 Method: pb.CONNECT_RO, 339 LogRequest: pb.LogRequest{ 340 ShardID: 1, 341 TNID: 100, 342 }, 343 } 344 resp := s.handleConnect(ctx, req) 345 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 346 347 data := make([]byte, 8) 348 cmd := getTestAppendCmd(req.LogRequest.TNID+1, data) 349 req = pb.Request{ 350 Method: pb.APPEND, 351 LogRequest: pb.LogRequest{ 352 ShardID: 1, 353 }, 354 } 355 resp = s.handleAppend(ctx, req, cmd) 356 assert.Equal(t, uint32(moerr.ErrNotLeaseHolder), resp.ErrorCode) 357 assert.Equal(t, uint64(0), resp.LogResponse.Lsn) 358 } 359 runServiceTest(t, false, true, fn) 360 } 361 362 func TestServiceHandleRead(t *testing.T) { 363 fn := func(t *testing.T, s *Service) { 364 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 365 defer cancel() 366 367 req := pb.Request{ 368 Method: pb.CONNECT_RO, 369 LogRequest: pb.LogRequest{ 370 ShardID: 1, 371 TNID: 100, 372 }, 373 } 374 resp := s.handleConnect(ctx, req) 375 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 376 377 data := make([]byte, 8) 378 cmd := getTestAppendCmd(req.LogRequest.TNID, data) 379 req = pb.Request{ 380 Method: pb.APPEND, 381 LogRequest: pb.LogRequest{ 382 ShardID: 1, 383 }, 384 } 385 resp = s.handleAppend(ctx, req, cmd) 386 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 387 assert.Equal(t, uint64(4), resp.LogResponse.Lsn) 388 389 req = pb.Request{ 390 Method: pb.READ, 391 LogRequest: pb.LogRequest{ 392 ShardID: 1, 393 Lsn: 1, 394 MaxSize: 1024 * 32, 395 }, 396 } 397 resp, records := s.handleRead(ctx, req) 398 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 399 assert.Equal(t, uint64(1), resp.LogResponse.LastLsn) 400 require.Equal(t, 4, len(records.Records)) 401 assert.Equal(t, pb.Internal, records.Records[0].Type) 402 assert.Equal(t, pb.Internal, records.Records[1].Type) 403 assert.Equal(t, pb.LeaseUpdate, records.Records[2].Type) 404 assert.Equal(t, pb.UserRecord, records.Records[3].Type) 405 assert.Equal(t, cmd, records.Records[3].Data) 406 } 407 runServiceTest(t, false, true, fn) 408 } 409 410 func TestServiceTruncate(t *testing.T) { 411 fn := func(t *testing.T, s *Service) { 412 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 413 defer cancel() 414 415 req := pb.Request{ 416 Method: pb.CONNECT_RO, 417 LogRequest: pb.LogRequest{ 418 ShardID: 1, 419 TNID: 100, 420 }, 421 } 422 resp := s.handleConnect(ctx, req) 423 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 424 425 data := make([]byte, 8) 426 cmd := getTestAppendCmd(req.LogRequest.TNID, data) 427 req = pb.Request{ 428 Method: pb.APPEND, 429 LogRequest: pb.LogRequest{ 430 ShardID: 1, 431 }, 432 } 433 resp = s.handleAppend(ctx, req, cmd) 434 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 435 assert.Equal(t, uint64(4), resp.LogResponse.Lsn) 436 437 req = pb.Request{ 438 Method: pb.TRUNCATE, 439 LogRequest: pb.LogRequest{ 440 ShardID: 1, 441 Lsn: 4, 442 }, 443 } 444 resp = s.handleTruncate(ctx, req) 445 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 446 assert.Equal(t, uint64(0), resp.LogResponse.Lsn) 447 448 req = pb.Request{ 449 Method: pb.GET_TRUNCATE, 450 LogRequest: pb.LogRequest{ 451 ShardID: 1, 452 }, 453 } 454 resp = s.handleGetTruncatedIndex(ctx, req) 455 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 456 assert.Equal(t, uint64(4), resp.LogResponse.Lsn) 457 458 req = pb.Request{ 459 Method: pb.TRUNCATE, 460 LogRequest: pb.LogRequest{ 461 ShardID: 1, 462 Lsn: 3, 463 }, 464 } 465 resp = s.handleTruncate(ctx, req) 466 assert.Equal(t, uint32(moerr.ErrInvalidTruncateLsn), resp.ErrorCode) 467 } 468 runServiceTest(t, false, true, fn) 469 } 470 471 func TestServiceTsoUpdate(t *testing.T) { 472 fn := func(t *testing.T, s *Service) { 473 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 474 defer cancel() 475 476 req := pb.Request{ 477 Method: pb.TSO_UPDATE, 478 TsoRequest: &pb.TsoRequest{ 479 Count: 100, 480 }, 481 } 482 resp := s.handleTsoUpdate(ctx, req) 483 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 484 assert.Equal(t, uint64(1), resp.TsoResponse.Value) 485 486 req.TsoRequest.Count = 1000 487 resp = s.handleTsoUpdate(ctx, req) 488 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 489 assert.Equal(t, uint64(101), resp.TsoResponse.Value) 490 491 resp = s.handleTsoUpdate(ctx, req) 492 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 493 assert.Equal(t, uint64(1101), resp.TsoResponse.Value) 494 } 495 runServiceTest(t, false, true, fn) 496 } 497 498 func TestServiceCheckHAKeeper(t *testing.T) { 499 fn := func(t *testing.T, s *Service) { 500 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 501 defer cancel() 502 503 req := pb.Request{ 504 Method: pb.CHECK_HAKEEPER, 505 } 506 resp := s.handleCheckHAKeeper(ctx, req) 507 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 508 assert.False(t, resp.IsHAKeeper) 509 } 510 runServiceTest(t, false, false, fn) 511 512 fn = func(t *testing.T, s *Service) { 513 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 514 defer cancel() 515 516 init := make(map[uint64]dragonboat.Target) 517 init[1] = s.ID() 518 require.NoError(t, s.store.startHAKeeperReplica(1, init, false)) 519 req := pb.Request{ 520 Method: pb.CHECK_HAKEEPER, 521 } 522 resp := s.handleCheckHAKeeper(ctx, req) 523 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 524 assert.True(t, resp.IsHAKeeper) 525 } 526 runServiceTest(t, false, false, fn) 527 } 528 529 func TestShardInfoCanBeQueried(t *testing.T) { 530 defer leaktest.AfterTest(t)() 531 cfg1 := DefaultConfig() 532 cfg1.UUID = uuid.New().String() 533 cfg1.FS = vfs.NewStrictMem() 534 cfg1.DeploymentID = 1 535 cfg1.RTTMillisecond = 5 536 cfg1.DataDir = "data-1" 537 cfg1.LogServicePort = 9002 538 cfg1.RaftPort = 9000 539 cfg1.GossipPort = 9001 540 cfg1.GossipSeedAddresses = []string{"127.0.0.1:9011"} 541 cfg1.DisableWorkers = true 542 cfg2 := DefaultConfig() 543 cfg2.UUID = uuid.New().String() 544 cfg2.FS = vfs.NewStrictMem() 545 cfg2.DeploymentID = 1 546 cfg2.RTTMillisecond = 5 547 cfg2.DataDir = "data-2" 548 cfg2.LogServicePort = 9012 549 cfg2.RaftPort = 9010 550 cfg2.GossipPort = 9011 551 cfg2.GossipSeedAddresses = []string{"127.0.0.1:9001"} 552 cfg2.DisableWorkers = true 553 service1, err := NewService(cfg1, 554 newFS(), 555 nil, 556 WithBackendFilter(func(msg morpc.Message, backendAddr string) bool { 557 return true 558 }), 559 ) 560 require.NoError(t, err) 561 defer func() { 562 assert.NoError(t, service1.Close()) 563 }() 564 peers1 := make(map[uint64]dragonboat.Target) 565 peers1[1] = service1.ID() 566 assert.NoError(t, service1.store.startReplica(1, 1, peers1, false)) 567 service2, err := NewService(cfg2, 568 newFS(), 569 nil, 570 WithBackendFilter(func(msg morpc.Message, backendAddr string) bool { 571 return true 572 }), 573 ) 574 require.NoError(t, err) 575 defer func() { 576 assert.NoError(t, service2.Close()) 577 }() 578 peers2 := make(map[uint64]dragonboat.Target) 579 peers2[1] = service2.ID() 580 assert.NoError(t, service2.store.startReplica(2, 1, peers2, false)) 581 582 nhID1 := service1.ID() 583 nhID2 := service2.ID() 584 585 done := false 586 587 // FIXME: 588 // as per #3478, this test is flaky, increased loop count to 6000 to 589 // see whether gossip can finish syncing in 6 seconds time. also added some 590 // logging to get collect more details 591 for i := 0; i < 6000; i++ { 592 si1, ok := service1.getShardInfo(1) 593 if !ok || si1.LeaderID != 1 { 594 testLogger.Error("shard 1 info missing on service 1") 595 time.Sleep(time.Millisecond) 596 continue 597 } 598 assert.Equal(t, 1, len(si1.Replicas)) 599 require.Equal(t, uint64(1), si1.ShardID) 600 ri, ok := si1.Replicas[1] 601 assert.True(t, ok) 602 assert.Equal(t, nhID1, ri.UUID) 603 assert.Equal(t, cfg1.LogServiceServiceAddr(), ri.ServiceAddress) 604 605 si2, ok := service1.getShardInfo(2) 606 if !ok || si2.LeaderID != 1 { 607 testLogger.Error("shard 2 info missing on service 1") 608 time.Sleep(time.Millisecond) 609 continue 610 } 611 assert.Equal(t, 1, len(si2.Replicas)) 612 require.Equal(t, uint64(2), si2.ShardID) 613 ri, ok = si2.Replicas[1] 614 assert.True(t, ok) 615 assert.Equal(t, nhID2, ri.UUID) 616 assert.Equal(t, cfg2.LogServiceServiceAddr(), ri.ServiceAddress) 617 618 si1, ok = service2.getShardInfo(1) 619 if !ok || si1.LeaderID != 1 { 620 testLogger.Error("shard 1 info missing on service 2") 621 time.Sleep(time.Millisecond) 622 continue 623 } 624 assert.Equal(t, 1, len(si1.Replicas)) 625 require.Equal(t, uint64(1), si1.ShardID) 626 ri, ok = si1.Replicas[1] 627 assert.True(t, ok) 628 assert.Equal(t, nhID1, ri.UUID) 629 assert.Equal(t, cfg1.LogServiceServiceAddr(), ri.ServiceAddress) 630 631 si2, ok = service2.getShardInfo(2) 632 if !ok || si2.LeaderID != 1 { 633 testLogger.Error("shard 2 info missing on service 2") 634 time.Sleep(time.Millisecond) 635 continue 636 } 637 assert.Equal(t, 1, len(si2.Replicas)) 638 require.Equal(t, uint64(2), si2.ShardID) 639 ri, ok = si2.Replicas[1] 640 assert.True(t, ok) 641 assert.Equal(t, nhID2, ri.UUID) 642 assert.Equal(t, cfg2.LogServiceServiceAddr(), ri.ServiceAddress) 643 644 done = true 645 break 646 } 647 assert.True(t, done) 648 } 649 650 func TestGossipInSimulatedCluster(t *testing.T) { 651 defer leaktest.AfterTest(t)() 652 debug.SetMemoryLimit(1 << 30) 653 // start all services 654 nodeCount := 24 655 shardCount := nodeCount / 3 656 configs := make([]Config, 0) 657 services := make([]*Service, 0) 658 for i := 0; i < nodeCount; i++ { 659 cfg := DefaultConfig() 660 cfg.FS = vfs.NewStrictMem() 661 cfg.UUID = uuid.New().String() 662 cfg.DeploymentID = 1 663 cfg.RTTMillisecond = 200 664 cfg.DataDir = fmt.Sprintf("data-%d", i) 665 cfg.LogServicePort = 26000 + 10*i 666 cfg.RaftPort = 26000 + 10*i + 1 667 cfg.GossipPort = 26000 + 10*i + 2 668 cfg.GossipSeedAddresses = []string{ 669 "127.0.0.1:26002", 670 "127.0.0.1:26012", 671 "127.0.0.1:26022", 672 "127.0.0.1:26032", 673 "127.0.0.1:26042", 674 "127.0.0.1:26052", 675 "127.0.0.1:26062", 676 "127.0.0.1:26072", 677 "127.0.0.1:26082", 678 "127.0.0.1:26092", 679 } 680 cfg.DisableWorkers = true 681 cfg.LogDBBufferSize = 1024 * 16 682 cfg.GossipProbeInterval.Duration = 350 * time.Millisecond 683 configs = append(configs, cfg) 684 service, err := NewService(cfg, 685 newFS(), 686 nil, 687 WithBackendFilter(func(msg morpc.Message, backendAddr string) bool { 688 return true 689 }), 690 ) 691 require.NoError(t, err) 692 services = append(services, service) 693 } 694 defer func() { 695 testLogger.Info("going to close all services") 696 var wg sync.WaitGroup 697 for _, s := range services { 698 if s != nil { 699 selected := s 700 wg.Add(1) 701 go func() { 702 require.NoError(t, selected.Close()) 703 wg.Done() 704 testLogger.Info("closed a service") 705 }() 706 } 707 } 708 wg.Wait() 709 time.Sleep(time.Second * 2) 710 }() 711 // start all replicas 712 // shardID: [1, 16] 713 id := uint64(100) 714 for i := uint64(0); i < uint64(shardCount); i++ { 715 shardID := i + 1 716 r1 := id 717 r2 := id + 1 718 r3 := id + 2 719 id += 3 720 replicas := make(map[uint64]dragonboat.Target) 721 replicas[r1] = services[i*3].ID() 722 replicas[r2] = services[i*3+1].ID() 723 replicas[r3] = services[i*3+2].ID() 724 require.NoError(t, services[i*3+0].store.startReplica(shardID, r1, replicas, false)) 725 require.NoError(t, services[i*3+1].store.startReplica(shardID, r2, replicas, false)) 726 require.NoError(t, services[i*3+2].store.startReplica(shardID, r3, replicas, false)) 727 } 728 wait := func() { 729 time.Sleep(50 * time.Millisecond) 730 } 731 // check & wait all leaders to be elected and known to all services 732 cci := uint64(0) 733 iterations := 1000 734 for retry := 0; retry < iterations; retry++ { 735 notReady := 0 736 for i := 0; i < nodeCount; i++ { 737 shardID := uint64(i/3 + 1) 738 service := services[i] 739 info, ok := service.getShardInfo(shardID) 740 if !ok || info.LeaderID == 0 { 741 notReady++ 742 wait() 743 continue 744 } 745 if shardID == 1 && info.Epoch != 0 { 746 cci = info.Epoch 747 } 748 } 749 if notReady <= 1 { 750 break 751 } 752 require.True(t, retry < iterations-1) 753 } 754 require.True(t, cci != 0) 755 // all good now, add a replica to shard 1 756 id += 1 757 758 for i := 0; i < iterations; i++ { 759 err := services[0].store.addReplica(1, id, services[3].ID(), cci) 760 if err == nil { 761 break 762 } else if err == dragonboat.ErrTimeout || err == dragonboat.ErrSystemBusy || 763 err == dragonboat.ErrInvalidDeadline || err == dragonboat.ErrTimeoutTooSmall { 764 info, ok := services[0].getShardInfo(1) 765 if ok && info.LeaderID != 0 && len(info.Replicas) == 4 { 766 break 767 } 768 wait() 769 continue 770 } else if err == dragonboat.ErrRejected { 771 break 772 } 773 t.Fatalf("failed to add replica, %v", err) 774 } 775 776 // check the above change can be observed by all services 777 for retry := 0; retry < iterations; retry++ { 778 notReady := 0 779 for i := 0; i < nodeCount; i++ { 780 service := services[i] 781 info, ok := service.getShardInfo(1) 782 if !ok || info.LeaderID == 0 || len(info.Replicas) != 4 { 783 notReady++ 784 wait() 785 continue 786 } 787 } 788 if notReady <= 1 { 789 break 790 } 791 require.True(t, retry < iterations-1) 792 } 793 // restart a service, watch how long will it take to get all required 794 // shard info 795 require.NoError(t, services[12].Close()) 796 services[12] = nil 797 time.Sleep(2 * time.Second) 798 service, err := NewService(configs[12], 799 newFS(), 800 nil, 801 WithBackendFilter(func(msg morpc.Message, backendAddr string) bool { 802 return true 803 }), 804 ) 805 require.NoError(t, err) 806 defer func() { 807 require.NoError(t, service.Close()) 808 }() 809 for retry := 0; retry < iterations; retry++ { 810 notReady := 0 811 for i := uint64(0); i < uint64(shardCount); i++ { 812 shardID := i + 1 813 info, ok := service.getShardInfo(shardID) 814 if !ok || info.LeaderID == 0 { 815 notReady++ 816 wait() 817 continue 818 } 819 } 820 if notReady <= 1 { 821 break 822 } 823 require.True(t, retry < iterations-1) 824 } 825 } 826 827 func TestServiceHandleCNUpdateLabel(t *testing.T) { 828 fn := func(t *testing.T, s *Service) { 829 uuid := "uuid1" 830 ctx0, cancel0 := context.WithTimeout(context.Background(), time.Second) 831 defer cancel0() 832 req := pb.Request{ 833 Method: pb.UPDATE_CN_LABEL, 834 CNStoreLabel: &pb.CNStoreLabel{ 835 UUID: uuid, 836 Labels: map[string]metadata.LabelList{ 837 "account": {Labels: []string{"a", "b"}}, 838 "role": {Labels: []string{"1", "2"}}, 839 }, 840 }, 841 } 842 resp := s.handleUpdateCNLabel(ctx0, req) 843 assert.Equal(t, uint32(20101), resp.ErrorCode) 844 assert.Equal(t, fmt.Sprintf("internal error: CN [%s] does not exist", uuid), resp.ErrorMessage) 845 846 ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second) 847 defer cancel1() 848 req = pb.Request{ 849 Method: pb.CN_HEARTBEAT, 850 CNHeartbeat: &pb.CNStoreHeartbeat{ 851 UUID: uuid, 852 }, 853 } 854 resp = s.handleCNHeartbeat(ctx1, req) 855 assert.Equal(t, &pb.CommandBatch{}, resp.CommandBatch) 856 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 857 858 ctx2, cancel2 := context.WithTimeout(context.Background(), time.Second) 859 defer cancel2() 860 req = pb.Request{ 861 Method: pb.UPDATE_CN_LABEL, 862 CNStoreLabel: &pb.CNStoreLabel{ 863 UUID: uuid, 864 Labels: map[string]metadata.LabelList{ 865 "account": {Labels: []string{"a", "b"}}, 866 "role": {Labels: []string{"1", "2"}}, 867 }, 868 }, 869 } 870 resp = s.handleUpdateCNLabel(ctx2, req) 871 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 872 873 ctx3, cancel3 := context.WithTimeout(context.Background(), time.Second) 874 defer cancel3() 875 req = pb.Request{ 876 Method: pb.GET_CLUSTER_STATE, 877 } 878 resp = s.handleGetCheckerState(ctx3, req) 879 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 880 assert.NotEmpty(t, resp.CheckerState) 881 info, ok1 := resp.CheckerState.CNState.Stores[uuid] 882 assert.True(t, ok1) 883 labels1, ok2 := info.Labels["account"] 884 assert.True(t, ok2) 885 assert.Equal(t, labels1.Labels, []string{"a", "b"}) 886 labels2, ok3 := info.Labels["role"] 887 assert.True(t, ok3) 888 assert.Equal(t, labels2.Labels, []string{"1", "2"}) 889 890 ctx4, cancel4 := context.WithTimeout(context.Background(), time.Second) 891 defer cancel4() 892 req = pb.Request{ 893 Method: pb.UPDATE_CN_LABEL, 894 CNStoreLabel: &pb.CNStoreLabel{ 895 UUID: uuid, 896 Labels: map[string]metadata.LabelList{ 897 "role": {Labels: []string{"1", "2"}}, 898 }, 899 }, 900 } 901 resp = s.handleUpdateCNLabel(ctx4, req) 902 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 903 904 ctx5, cancel5 := context.WithTimeout(context.Background(), time.Second) 905 defer cancel5() 906 req = pb.Request{ 907 Method: pb.GET_CLUSTER_STATE, 908 } 909 resp = s.handleGetCheckerState(ctx5, req) 910 assert.NotEmpty(t, resp.CheckerState) 911 info, ok4 := resp.CheckerState.CNState.Stores[uuid] 912 assert.True(t, ok4) 913 _, ok5 := info.Labels["account"] 914 assert.False(t, ok5) 915 labels3, ok6 := info.Labels["role"] 916 assert.True(t, ok6) 917 assert.Equal(t, labels3.Labels, []string{"1", "2"}) 918 } 919 runServiceTest(t, true, true, fn) 920 } 921 922 func TestServiceHandleCNUpdateWorkState(t *testing.T) { 923 fn := func(t *testing.T, s *Service) { 924 uuid := "uuid1" 925 ctx0, cancel0 := context.WithTimeout(context.Background(), time.Second) 926 defer cancel0() 927 req := pb.Request{ 928 Method: pb.UPDATE_CN_WORK_STATE, 929 CNWorkState: &pb.CNWorkState{ 930 UUID: uuid, 931 State: metadata.WorkState_Working, 932 }, 933 } 934 resp := s.handleUpdateCNWorkState(ctx0, req) 935 assert.Equal(t, uint32(20101), resp.ErrorCode) 936 assert.Equal(t, fmt.Sprintf("internal error: CN [%s] does not exist", uuid), resp.ErrorMessage) 937 938 ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second) 939 defer cancel1() 940 req = pb.Request{ 941 Method: pb.CN_HEARTBEAT, 942 CNHeartbeat: &pb.CNStoreHeartbeat{ 943 UUID: uuid, 944 }, 945 } 946 resp = s.handleCNHeartbeat(ctx1, req) 947 assert.Equal(t, &pb.CommandBatch{}, resp.CommandBatch) 948 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 949 950 ctx2, cancel2 := context.WithTimeout(context.Background(), time.Second) 951 defer cancel2() 952 req = pb.Request{ 953 Method: pb.UPDATE_CN_WORK_STATE, 954 CNWorkState: &pb.CNWorkState{ 955 UUID: uuid, 956 State: metadata.WorkState_Working, 957 }, 958 } 959 resp = s.handleUpdateCNWorkState(ctx2, req) 960 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 961 962 ctx3, cancel3 := context.WithTimeout(context.Background(), time.Second) 963 defer cancel3() 964 req = pb.Request{ 965 Method: pb.GET_CLUSTER_STATE, 966 } 967 resp = s.handleGetCheckerState(ctx3, req) 968 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 969 assert.NotEmpty(t, resp.CheckerState) 970 info, ok1 := resp.CheckerState.CNState.Stores[uuid] 971 assert.True(t, ok1) 972 assert.Equal(t, metadata.WorkState_Working, info.WorkState) 973 974 ctx4, cancel4 := context.WithTimeout(context.Background(), time.Second) 975 defer cancel4() 976 req = pb.Request{ 977 Method: pb.UPDATE_CN_WORK_STATE, 978 CNWorkState: &pb.CNWorkState{ 979 UUID: uuid, 980 State: metadata.WorkState_Unknown, 981 }, 982 } 983 resp = s.handleUpdateCNWorkState(ctx4, req) 984 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 985 986 ctx5, cancel5 := context.WithTimeout(context.Background(), time.Second) 987 defer cancel5() 988 req = pb.Request{ 989 Method: pb.GET_CLUSTER_STATE, 990 } 991 resp = s.handleGetCheckerState(ctx5, req) 992 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 993 assert.NotEmpty(t, resp.CheckerState) 994 info, ok1 = resp.CheckerState.CNState.Stores[uuid] 995 assert.True(t, ok1) 996 assert.Equal(t, metadata.WorkState_Working, info.WorkState) 997 } 998 runServiceTest(t, true, true, fn) 999 } 1000 1001 func TestServiceHandleCNPatchStore(t *testing.T) { 1002 fn := func(t *testing.T, s *Service) { 1003 uuid := "uuid1" 1004 ctx0, cancel0 := context.WithTimeout(context.Background(), time.Second) 1005 defer cancel0() 1006 req := pb.Request{ 1007 Method: pb.PATCH_CN_STORE, 1008 CNStateLabel: &pb.CNStateLabel{ 1009 UUID: uuid, 1010 State: metadata.WorkState_Working, 1011 Labels: map[string]metadata.LabelList{ 1012 "account": {Labels: []string{"a", "b"}}, 1013 "role": {Labels: []string{"1", "2"}}, 1014 }, 1015 }, 1016 } 1017 resp := s.handlePatchCNStore(ctx0, req) 1018 assert.Equal(t, uint32(20101), resp.ErrorCode) 1019 assert.Equal(t, fmt.Sprintf("internal error: CN [%s] does not exist", uuid), resp.ErrorMessage) 1020 1021 ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second) 1022 defer cancel1() 1023 req = pb.Request{ 1024 Method: pb.CN_HEARTBEAT, 1025 CNHeartbeat: &pb.CNStoreHeartbeat{ 1026 UUID: uuid, 1027 }, 1028 } 1029 resp = s.handleCNHeartbeat(ctx1, req) 1030 assert.Equal(t, &pb.CommandBatch{}, resp.CommandBatch) 1031 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1032 1033 ctx2, cancel2 := context.WithTimeout(context.Background(), time.Second) 1034 defer cancel2() 1035 req = pb.Request{ 1036 Method: pb.PATCH_CN_STORE, 1037 CNStateLabel: &pb.CNStateLabel{ 1038 UUID: uuid, 1039 State: metadata.WorkState_Working, 1040 Labels: map[string]metadata.LabelList{ 1041 "account": {Labels: []string{"a", "b"}}, 1042 "role": {Labels: []string{"1", "2"}}, 1043 }, 1044 }, 1045 } 1046 resp = s.handlePatchCNStore(ctx2, req) 1047 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1048 1049 ctx3, cancel3 := context.WithTimeout(context.Background(), time.Second) 1050 defer cancel3() 1051 req = pb.Request{ 1052 Method: pb.GET_CLUSTER_STATE, 1053 } 1054 resp = s.handleGetCheckerState(ctx3, req) 1055 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1056 assert.NotEmpty(t, resp.CheckerState) 1057 info, ok1 := resp.CheckerState.CNState.Stores[uuid] 1058 assert.True(t, ok1) 1059 assert.Equal(t, metadata.WorkState_Working, info.WorkState) 1060 labels1, ok2 := info.Labels["account"] 1061 assert.True(t, ok2) 1062 assert.Equal(t, labels1.Labels, []string{"a", "b"}) 1063 labels2, ok3 := info.Labels["role"] 1064 assert.True(t, ok3) 1065 assert.Equal(t, labels2.Labels, []string{"1", "2"}) 1066 1067 ctx4, cancel4 := context.WithTimeout(context.Background(), time.Second) 1068 defer cancel4() 1069 req = pb.Request{ 1070 Method: pb.PATCH_CN_STORE, 1071 CNStateLabel: &pb.CNStateLabel{ 1072 UUID: uuid, 1073 State: metadata.WorkState_Draining, 1074 }, 1075 } 1076 resp = s.handlePatchCNStore(ctx4, req) 1077 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1078 1079 ctx5, cancel5 := context.WithTimeout(context.Background(), time.Second) 1080 defer cancel5() 1081 req = pb.Request{ 1082 Method: pb.GET_CLUSTER_STATE, 1083 } 1084 resp = s.handleGetCheckerState(ctx5, req) 1085 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1086 assert.NotEmpty(t, resp.CheckerState) 1087 info, ok1 = resp.CheckerState.CNState.Stores[uuid] 1088 assert.True(t, ok1) 1089 assert.Equal(t, metadata.WorkState_Draining, info.WorkState) 1090 labels1, ok2 = info.Labels["account"] 1091 assert.True(t, ok2) 1092 assert.Equal(t, labels1.Labels, []string{"a", "b"}) 1093 labels2, ok3 = info.Labels["role"] 1094 assert.True(t, ok3) 1095 assert.Equal(t, labels2.Labels, []string{"1", "2"}) 1096 } 1097 runServiceTest(t, true, true, fn) 1098 } 1099 1100 func TestServiceHandleCNDeleteStore(t *testing.T) { 1101 fn := func(t *testing.T, s *Service) { 1102 uuid := "uuid1" 1103 ctx0, cancel0 := context.WithTimeout(context.Background(), time.Second) 1104 defer cancel0() 1105 req := pb.Request{ 1106 Method: pb.CN_HEARTBEAT, 1107 CNHeartbeat: &pb.CNStoreHeartbeat{ 1108 UUID: uuid, 1109 }, 1110 } 1111 resp := s.handleCNHeartbeat(ctx0, req) 1112 assert.Equal(t, &pb.CommandBatch{}, resp.CommandBatch) 1113 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1114 1115 ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second) 1116 defer cancel1() 1117 req = pb.Request{ 1118 Method: pb.GET_CLUSTER_STATE, 1119 } 1120 resp = s.handleGetCheckerState(ctx1, req) 1121 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1122 assert.NotEmpty(t, resp.CheckerState) 1123 _, ok := resp.CheckerState.CNState.Stores[uuid] 1124 assert.True(t, ok) 1125 1126 ctx2, cancel2 := context.WithTimeout(context.Background(), time.Second) 1127 defer cancel2() 1128 req = pb.Request{ 1129 Method: pb.DELETE_CN_STORE, 1130 DeleteCNStore: &pb.DeleteCNStore{ 1131 StoreID: uuid, 1132 }, 1133 } 1134 resp = s.handleDeleteCNStore(ctx2, req) 1135 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1136 1137 ctx3, cancel3 := context.WithTimeout(context.Background(), time.Second) 1138 defer cancel3() 1139 req = pb.Request{ 1140 Method: pb.GET_CLUSTER_STATE, 1141 } 1142 resp = s.handleGetCheckerState(ctx3, req) 1143 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1144 assert.NotEmpty(t, resp.CheckerState) 1145 _, ok = resp.CheckerState.CNState.Stores[uuid] 1146 assert.False(t, ok) 1147 } 1148 runServiceTest(t, true, true, fn) 1149 } 1150 1151 func TestServiceHandleProxyHeartbeat(t *testing.T) { 1152 fn := func(t *testing.T, s *Service) { 1153 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 1154 defer cancel() 1155 req := pb.Request{ 1156 Method: pb.PROXY_HEARTBEAT, 1157 ProxyHeartbeat: &pb.ProxyHeartbeat{ 1158 UUID: "uuid1", 1159 }, 1160 } 1161 resp := s.handleProxyHeartbeat(ctx, req) 1162 assert.Equal(t, &pb.CommandBatch{}, resp.CommandBatch) 1163 assert.Equal(t, uint32(moerr.Ok), resp.ErrorCode) 1164 } 1165 runServiceTest(t, true, true, fn) 1166 }