github.com/matrixorigin/matrixone@v1.2.0/pkg/tests/service/service_test.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package service 16 17 import ( 18 "context" 19 "testing" 20 21 "github.com/lni/goutils/leaktest" 22 "github.com/stretchr/testify/assert" 23 "github.com/stretchr/testify/require" 24 25 "github.com/matrixorigin/matrixone/pkg/logservice" 26 logpb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 27 ) 28 29 const ( 30 supportMultiTN = false 31 ) 32 33 func TestClusterStart(t *testing.T) { 34 defer leaktest.AfterTest(t)() 35 if testing.Short() { 36 t.Skip("skipping in short mode.") 37 return 38 } 39 ctx := context.Background() 40 41 // initialize cluster 42 c, err := NewCluster(ctx, t, DefaultOptions()) 43 require.NoError(t, err) 44 // close the cluster 45 defer func(c Cluster) { 46 require.NoError(t, c.Close()) 47 }(c) 48 // start the cluster 49 require.NoError(t, c.Start()) 50 } 51 52 func TestAllocateID(t *testing.T) { 53 defer leaktest.AfterTest(t)() 54 if testing.Short() { 55 t.Skip("skipping in short mode.") 56 return 57 } 58 ctx := context.Background() 59 60 // initialize cluster 61 c, err := NewCluster(ctx, t, DefaultOptions()) 62 require.NoError(t, err) 63 64 // close the cluster 65 defer func(c Cluster) { 66 require.NoError(t, c.Close()) 67 }(c) 68 // start the cluster 69 require.NoError(t, c.Start()) 70 71 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 72 defer cancel() 73 c.WaitHAKeeperState(ctx, logpb.HAKeeperRunning) 74 75 cfg := logservice.HAKeeperClientConfig{ 76 ServiceAddresses: []string{c.(*testCluster).network.addresses.logAddresses[0].listenAddr}, 77 AllocateIDBatch: 10, 78 } 79 hc, err := logservice.NewCNHAKeeperClient(ctx, cfg) 80 require.NoError(t, err) 81 defer func() { 82 assert.NoError(t, hc.Close()) 83 }() 84 85 last := uint64(0) 86 for i := 0; i < int(cfg.AllocateIDBatch)-1; i++ { 87 v, err := hc.AllocateID(ctx) 88 require.NoError(t, err) 89 assert.True(t, v > 0) 90 if last != 0 { 91 assert.Equal(t, v, last+1, i) 92 } 93 last = v 94 } 95 } 96 97 func TestAllocateIDByKey(t *testing.T) { 98 defer leaktest.AfterTest(t)() 99 if testing.Short() { 100 t.Skip("skipping in short mode.") 101 return 102 } 103 ctx := context.Background() 104 105 // initialize cluster 106 c, err := NewCluster(ctx, t, DefaultOptions()) 107 require.NoError(t, err) 108 109 // close the cluster 110 defer func(c Cluster) { 111 require.NoError(t, c.Close()) 112 }(c) 113 // start the cluster 114 require.NoError(t, c.Start()) 115 116 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 117 defer cancel() 118 c.WaitHAKeeperState(ctx, logpb.HAKeeperRunning) 119 120 cfg := logservice.HAKeeperClientConfig{ 121 ServiceAddresses: []string{c.(*testCluster).network.addresses.logAddresses[0].listenAddr}, 122 AllocateIDBatch: 10, 123 } 124 hc, err := logservice.NewCNHAKeeperClient(ctx, cfg) 125 require.NoError(t, err) 126 defer func() { 127 assert.NoError(t, hc.Close()) 128 }() 129 130 last := uint64(0) 131 for i := 0; i < int(cfg.AllocateIDBatch)-1; i++ { 132 v, err := hc.AllocateIDByKey(ctx, "k1") 133 require.NoError(t, err) 134 assert.True(t, v > 0) 135 if last != 0 { 136 assert.Equal(t, v, last+1, i) 137 } 138 last = v 139 } 140 v2, err := hc.AllocateIDByKey(ctx, "k2") 141 require.NoError(t, err) 142 assert.Equal(t, v2, uint64(1)) 143 v3, err := hc.AllocateIDByKey(ctx, "k3") 144 require.NoError(t, err) 145 assert.Equal(t, v3, uint64(1)) 146 } 147 148 func TestClusterAwareness(t *testing.T) { 149 defer leaktest.AfterTest(t)() 150 if testing.Short() { 151 t.Skip("skipping in short mode.") 152 return 153 } 154 ctx := context.Background() 155 156 if !supportMultiTN { 157 t.Skip("skipping, multi db not support") 158 return 159 } 160 161 tnSvcNum := 2 162 logSvcNum := 3 163 opt := DefaultOptions(). 164 WithTNServiceNum(tnSvcNum). 165 WithLogServiceNum(logSvcNum) 166 167 // initialize cluster 168 c, err := NewCluster(ctx, t, opt) 169 require.NoError(t, err) 170 171 // close the cluster 172 defer func(c Cluster) { 173 require.NoError(t, c.Close()) 174 }(c) 175 // start the cluster 176 require.NoError(t, c.Start()) 177 178 // ------------------------------------------- 179 // the following would test `ClusterAwareness` 180 // ------------------------------------------- 181 dsuuids := c.ListTNServices() 182 require.Equal(t, tnSvcNum, len(dsuuids)) 183 184 lsuuids := c.ListLogServices() 185 require.Equal(t, logSvcNum, len(lsuuids)) 186 187 hksvcs := c.ListHAKeeperServices() 188 require.NotZero(t, len(hksvcs)) 189 190 tn, err := c.GetTNService(dsuuids[0]) 191 require.NoError(t, err) 192 require.Equal(t, ServiceStarted, tn.Status()) 193 194 log, err := c.GetLogService(lsuuids[0]) 195 require.NoError(t, err) 196 require.Equal(t, ServiceStarted, log.Status()) 197 198 ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout) 199 defer cancel1() 200 leader := c.WaitHAKeeperLeader(ctx1) 201 require.NotNil(t, leader) 202 203 // we must wait for hakeeper's running state, or hakeeper wouldn't receive hearbeat. 204 ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout) 205 defer cancel2() 206 c.WaitHAKeeperState(ctx2, logpb.HAKeeperRunning) 207 208 ctx3, cancel3 := context.WithTimeout(context.Background(), defaultTestTimeout) 209 defer cancel3() 210 state, err := c.GetClusterState(ctx3) 211 require.NoError(t, err) 212 require.Equal(t, tnSvcNum, len(state.TNState.Stores)) 213 require.Equal(t, logSvcNum, len(state.LogState.Stores)) 214 } 215 216 func TestClusterOperation(t *testing.T) { 217 defer leaktest.AfterTest(t)() 218 if testing.Short() { 219 t.Skip("skipping in short mode.") 220 return 221 } 222 ctx := context.Background() 223 224 if !supportMultiTN { 225 t.Skip("skipping, multi db not support") 226 return 227 } 228 229 tnSvcNum := 3 230 logSvcNum := 3 231 opt := DefaultOptions(). 232 WithTNServiceNum(tnSvcNum). 233 WithLogServiceNum(logSvcNum) 234 235 // initialize cluster 236 c, err := NewCluster(ctx, t, opt) 237 require.NoError(t, err) 238 239 // close the cluster 240 defer func(c Cluster) { 241 require.NoError(t, c.Close()) 242 }(c) 243 // start the cluster 244 require.NoError(t, c.Start()) 245 246 // ------------------------------------------- 247 // the following would test `ClusterOperation` 248 // ------------------------------------------- 249 250 // 1. start/close tn services via different ways 251 dsuuids := c.ListTNServices() 252 require.Equal(t, tnSvcNum, len(dsuuids)) 253 // 1.a start/close tn service by uuid 254 { 255 index := 0 256 dsuuid := dsuuids[index] 257 258 // get the instance of tn service 259 ds, err := c.GetTNService(dsuuid) 260 require.NoError(t, err) 261 require.Equal(t, ServiceStarted, ds.Status()) 262 263 // start it 264 err = c.StartTNService(dsuuid) 265 require.NoError(t, err) 266 require.Equal(t, ServiceStarted, ds.Status()) 267 268 // close it 269 err = c.CloseTNService(dsuuid) 270 require.NoError(t, err) 271 require.Equal(t, ServiceClosed, ds.Status()) 272 } 273 274 // 1.b start/close tn service by index 275 { 276 index := 1 277 278 // get the instance of tn service 279 ds, err := c.GetTNServiceIndexed(index) 280 require.NoError(t, err) 281 require.Equal(t, ServiceStarted, ds.Status()) 282 283 // start it 284 err = c.StartTNServiceIndexed(index) 285 require.NoError(t, err) 286 require.Equal(t, ServiceStarted, ds.Status()) 287 288 // close it 289 err = c.CloseTNServiceIndexed(index) 290 require.NoError(t, err) 291 require.Equal(t, ServiceClosed, ds.Status()) 292 } 293 294 // 1.c start/close tn service by instance 295 { 296 index := 2 297 298 // get the instance of tn service 299 ds, err := c.GetTNServiceIndexed(index) 300 require.NoError(t, err) 301 require.Equal(t, ServiceStarted, ds.Status()) 302 303 // start it 304 err = ds.Start() 305 require.NoError(t, err) 306 require.Equal(t, ServiceStarted, ds.Status()) 307 308 // close it 309 err = ds.Close() 310 require.NoError(t, err) 311 require.Equal(t, ServiceClosed, ds.Status()) 312 } 313 314 // 2. start/close log services by different ways 315 lsuuids := c.ListLogServices() 316 require.Equal(t, logSvcNum, len(lsuuids)) 317 // 2.a start/close log service by uuid 318 { 319 index := 0 320 lsuuid := lsuuids[index] 321 322 // get the instance of log service 323 ls, err := c.GetLogService(lsuuid) 324 require.NoError(t, err) 325 require.Equal(t, ServiceStarted, ls.Status()) 326 327 // start it 328 err = c.StartLogService(lsuuid) 329 require.NoError(t, err) 330 require.Equal(t, ServiceStarted, ls.Status()) 331 332 // close it 333 err = c.CloseLogService(lsuuid) 334 require.NoError(t, err) 335 require.Equal(t, ServiceClosed, ls.Status()) 336 } 337 338 // 2.b start/close log service by index 339 { 340 index := 1 341 342 // get the instance of log service 343 ls, err := c.GetLogServiceIndexed(index) 344 require.NoError(t, err) 345 require.Equal(t, ServiceStarted, ls.Status()) 346 347 // start it 348 err = c.StartLogServiceIndexed(index) 349 require.NoError(t, err) 350 require.Equal(t, ServiceStarted, ls.Status()) 351 352 // close it 353 err = c.CloseLogServiceIndexed(index) 354 require.NoError(t, err) 355 require.Equal(t, ServiceClosed, ls.Status()) 356 } 357 358 // 2.c start/close log service by instance 359 { 360 index := 2 361 362 // get the instance of log service 363 ls, err := c.GetLogServiceIndexed(index) 364 require.NoError(t, err) 365 require.Equal(t, ServiceStarted, ls.Status()) 366 367 // start it 368 err = ls.Start() 369 require.NoError(t, err) 370 require.Equal(t, ServiceStarted, ls.Status()) 371 372 // close it 373 err = ls.Close() 374 require.NoError(t, err) 375 require.Equal(t, ServiceClosed, ls.Status()) 376 } 377 } 378 379 func TestClusterState(t *testing.T) { 380 defer leaktest.AfterTest(t)() 381 if testing.Short() { 382 t.Skip("skipping in short mode.") 383 return 384 } 385 ctx := context.Background() 386 387 if !supportMultiTN { 388 t.Skip("skipping, multi db not support") 389 return 390 } 391 392 tnSvcNum := 2 393 logSvcNum := 3 394 opt := DefaultOptions(). 395 WithTNServiceNum(tnSvcNum). 396 WithLogServiceNum(logSvcNum) 397 398 // initialize cluster 399 c, err := NewCluster(ctx, t, opt) 400 require.NoError(t, err) 401 402 // close the cluster 403 defer func(c Cluster) { 404 require.NoError(t, c.Close()) 405 }(c) 406 // start the cluster 407 require.NoError(t, c.Start()) 408 409 // ---------------------------------------- 410 // the following would test `ClusterState`. 411 // ---------------------------------------- 412 ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout) 413 defer cancel1() 414 leader := c.WaitHAKeeperLeader(ctx1) 415 require.NotNil(t, leader) 416 417 dsuuids := c.ListTNServices() 418 require.Equal(t, tnSvcNum, len(dsuuids)) 419 420 lsuuids := c.ListLogServices() 421 require.Equal(t, logSvcNum, len(lsuuids)) 422 423 // we must wait for hakeeper's running state, or hakeeper wouldn't receive hearbeat. 424 ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout) 425 defer cancel2() 426 c.WaitHAKeeperState(ctx2, logpb.HAKeeperRunning) 427 428 hkstate := c.GetHAKeeperState() 429 require.Equal(t, logpb.HAKeeperRunning, hkstate) 430 431 // cluster should be healthy 432 require.True(t, c.IsClusterHealthy()) 433 434 ctx3, cancel3 := context.WithTimeout(context.Background(), defaultTestTimeout) 435 defer cancel3() 436 state, err := c.GetClusterState(ctx3) 437 require.NoError(t, err) 438 require.Equal(t, tnSvcNum, len(state.TNState.Stores)) 439 require.Equal(t, logSvcNum, len(state.LogState.Stores)) 440 441 // FIXME: validate the result list of tn shards 442 ctx4, cancel4 := context.WithTimeout(context.Background(), defaultTestTimeout) 443 defer cancel4() 444 _, err = c.ListTNShards(ctx4) 445 require.NoError(t, err) 446 447 // FIXME: validate the result list of log shards 448 ctx5, cancel5 := context.WithTimeout(context.Background(), defaultTestTimeout) 449 defer cancel5() 450 _, err = c.ListLogShards(ctx5) 451 require.NoError(t, err) 452 453 // test for: 454 // - GetDNStoreInfo 455 // - GetDNStoreInfoIndexed 456 // - DNStoreExpired 457 // - DNStoreExpiredIndexed 458 { 459 tnIndex := 0 460 dsuuid := dsuuids[tnIndex] 461 462 ctx6, cancel6 := context.WithTimeout(context.Background(), defaultTestTimeout) 463 defer cancel6() 464 tnStoreInfo1, err := c.GetTNStoreInfo(ctx6, dsuuid) 465 require.NoError(t, err) 466 467 ctx7, cancel7 := context.WithTimeout(context.Background(), defaultTestTimeout) 468 defer cancel7() 469 tnStoreInfo2, err := c.GetTNStoreInfoIndexed(ctx7, tnIndex) 470 require.NoError(t, err) 471 require.Equal(t, tnStoreInfo1.Shards, tnStoreInfo2.Shards) 472 473 expired1, err := c.TNStoreExpired(dsuuid) 474 require.NoError(t, err) 475 require.False(t, expired1) 476 477 expired2, err := c.TNStoreExpiredIndexed(tnIndex) 478 require.NoError(t, err) 479 require.False(t, expired2) 480 } 481 482 // test for: 483 // - GetLogStoreInfo 484 // - GetLogStoreInfoIndexed 485 // - LogStoreExpired 486 // - LogStoreExpiredIndexed 487 { 488 logIndex := 1 489 lsuuid := lsuuids[logIndex] 490 491 ctx8, cancel8 := context.WithTimeout(context.Background(), defaultTestTimeout) 492 defer cancel8() 493 logStoreInfo1, err := c.GetLogStoreInfo(ctx8, lsuuid) 494 require.NoError(t, err) 495 496 ctx9, cancel9 := context.WithTimeout(context.Background(), defaultTestTimeout) 497 defer cancel9() 498 logStoreInfo2, err := c.GetLogStoreInfoIndexed(ctx9, logIndex) 499 require.NoError(t, err) 500 require.Equal(t, len(logStoreInfo1.Replicas), len(logStoreInfo2.Replicas)) // TODO: sort and compare detail. 501 502 expired1, err := c.LogStoreExpired(lsuuid) 503 require.NoError(t, err) 504 require.False(t, expired1) 505 506 expired2, err := c.LogStoreExpiredIndexed(logIndex) 507 require.NoError(t, err) 508 require.False(t, expired2) 509 } 510 } 511 512 func TestClusterWaitState(t *testing.T) { 513 defer leaktest.AfterTest(t)() 514 if testing.Short() { 515 t.Skip("skipping in short mode.") 516 return 517 } 518 ctx := context.Background() 519 520 if !supportMultiTN { 521 t.Skip("skipping, multi db not support") 522 return 523 } 524 525 tnSvcNum := 2 526 logSvcNum := 3 527 opt := DefaultOptions(). 528 WithTNServiceNum(tnSvcNum). 529 WithLogServiceNum(logSvcNum) 530 531 // initialize cluster 532 c, err := NewCluster(ctx, t, opt) 533 require.NoError(t, err) 534 535 // close the cluster 536 defer func(c Cluster) { 537 require.NoError(t, c.Close()) 538 }(c) 539 // start the cluster 540 require.NoError(t, c.Start()) 541 542 // we must wait for hakeeper's running state, or hakeeper wouldn't receive hearbeat. 543 ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout) 544 defer cancel1() 545 c.WaitHAKeeperState(ctx1, logpb.HAKeeperRunning) 546 547 // -------------------------------------------- 548 // the following would test `ClusterWaitState`. 549 // -------------------------------------------- 550 551 // test WaitDNShardsReported 552 { 553 ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout) 554 defer cancel2() 555 c.WaitTNShardsReported(ctx2) 556 } 557 558 // test WaitLogShardsReported 559 { 560 ctx3, cancel3 := context.WithTimeout(context.Background(), defaultTestTimeout) 561 defer cancel3() 562 c.WaitLogShardsReported(ctx3) 563 } 564 565 // test WaitDNReplicaReported 566 { 567 ctx4, cancel4 := context.WithTimeout(context.Background(), defaultTestTimeout) 568 defer cancel4() 569 tnShards, err := c.ListTNShards(ctx4) 570 require.NoError(t, err) 571 require.NotZero(t, len(tnShards)) 572 573 tnShardID := tnShards[0].ShardID 574 ctx5, cancel5 := context.WithTimeout(context.Background(), defaultTestTimeout) 575 defer cancel5() 576 c.WaitTNReplicaReported(ctx5, tnShardID) 577 } 578 579 // test WaitLogReplicaReported 580 { 581 ctx6, cancel6 := context.WithTimeout(context.Background(), defaultTestTimeout) 582 defer cancel6() 583 logShards, err := c.ListLogShards(ctx6) 584 require.NotZero(t, len(logShards)) 585 require.NoError(t, err) 586 587 logShardID := logShards[0].ShardID 588 ctx7, cancel7 := context.WithTimeout(context.Background(), defaultTestTimeout) 589 defer cancel7() 590 c.WaitLogReplicaReported(ctx7, logShardID) 591 } 592 } 593 594 func TestNetworkPartition(t *testing.T) { 595 defer leaktest.AfterTest(t)() 596 if testing.Short() { 597 t.Skip("skipping in short mode.") 598 return 599 } 600 ctx := context.Background() 601 602 if !supportMultiTN { 603 t.Skip("skipping, multi db not support") 604 return 605 } 606 607 tnSvcNum := 2 608 logSvcNum := 4 609 opt := DefaultOptions(). 610 WithTNServiceNum(tnSvcNum). 611 WithLogServiceNum(logSvcNum) 612 613 // initialize cluster 614 c, err := NewCluster(ctx, t, opt) 615 require.NoError(t, err) 616 617 // close the cluster 618 defer func(c Cluster) { 619 require.NoError(t, c.Close()) 620 }(c) 621 // start the cluster 622 require.NoError(t, c.Start()) 623 624 // we must wait for hakeeper's running state, or hakeeper wouldn't receive hearbeat. 625 ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout) 626 defer cancel1() 627 c.WaitHAKeeperState(ctx1, logpb.HAKeeperRunning) 628 629 // -------------------------------------------- 630 // the following would test network partition 631 // -------------------------------------------- 632 633 // tn service index: 0, 1 634 // log service index: 0, 1, 2, 3 635 // separate tn service 1 from other services 636 partition1 := c.NewNetworkPartition([]uint32{1}, nil, nil) 637 require.Equal(t, []uint32{1}, partition1.ListTNServiceIndex()) 638 require.Nil(t, partition1.ListLogServiceIndex()) 639 640 partition2 := c.RemainingNetworkPartition(partition1) 641 require.Equal(t, []uint32{0}, partition2.ListTNServiceIndex()) 642 require.Equal(t, []uint32{0, 1, 2, 3}, partition2.ListLogServiceIndex()) 643 644 // enable network partition 645 c.StartNetworkPartition(partition1, partition2) 646 ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout) 647 defer cancel2() 648 c.WaitTNStoreTimeoutIndexed(ctx2, 1) 649 650 // disable network partition 651 c.CloseNetworkPartition() 652 ctx3, cancel3 := context.WithTimeout(context.Background(), defaultTestTimeout) 653 defer cancel3() 654 c.WaitTNStoreReportedIndexed(ctx3, 1) 655 656 // tn service index: 0, 1 657 // log service index: 0, 1, 2, 3 658 // separate log service 3 from other services 659 partition3 := c.NewNetworkPartition(nil, []uint32{3}, nil) 660 require.Nil(t, partition3.ListTNServiceIndex()) 661 require.Equal(t, []uint32{3}, partition3.ListLogServiceIndex()) 662 663 partition4 := c.RemainingNetworkPartition(partition3) 664 require.Equal(t, []uint32{0, 1}, partition4.ListTNServiceIndex()) 665 require.Equal(t, []uint32{0, 1, 2}, partition4.ListLogServiceIndex()) 666 667 // enable network partition 668 c.StartNetworkPartition(partition3, partition4) 669 ctx4, cancel4 := context.WithTimeout(context.Background(), defaultTestTimeout) 670 defer cancel4() 671 c.WaitLogStoreTimeoutIndexed(ctx4, 3) 672 673 // disable network partition 674 c.CloseNetworkPartition() 675 ctx5, cancel5 := context.WithTimeout(context.Background(), defaultTestTimeout) 676 defer cancel5() 677 c.WaitLogStoreReportedIndexed(ctx5, 3) 678 }