github.com/matrixorigin/matrixone@v0.7.0/pkg/tests/service/service_test.go (about) 1 // Copyright 2021 - 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package service 16 17 import ( 18 "context" 19 "testing" 20 21 "github.com/lni/goutils/leaktest" 22 "github.com/stretchr/testify/assert" 23 "github.com/stretchr/testify/require" 24 25 "github.com/matrixorigin/matrixone/pkg/logservice" 26 logpb "github.com/matrixorigin/matrixone/pkg/pb/logservice" 27 ) 28 29 const ( 30 supportMultiDN = false 31 ) 32 33 func TestClusterStart(t *testing.T) { 34 defer leaktest.AfterTest(t)() 35 if testing.Short() { 36 t.Skip("skipping in short mode.") 37 return 38 } 39 40 // initialize cluster 41 c, err := NewCluster(t, DefaultOptions()) 42 require.NoError(t, err) 43 // close the cluster 44 defer func(c Cluster) { 45 require.NoError(t, c.Close()) 46 }(c) 47 // start the cluster 48 require.NoError(t, c.Start()) 49 } 50 51 func TestAllocateID(t *testing.T) { 52 defer leaktest.AfterTest(t)() 53 if testing.Short() { 54 t.Skip("skipping in short mode.") 55 return 56 } 57 58 // initialize cluster 59 c, err := NewCluster(t, DefaultOptions()) 60 require.NoError(t, err) 61 62 // close the cluster 63 defer func(c Cluster) { 64 require.NoError(t, c.Close()) 65 }(c) 66 // start the cluster 67 require.NoError(t, c.Start()) 68 69 ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) 70 defer cancel() 71 c.WaitHAKeeperState(ctx, logpb.HAKeeperRunning) 72 73 cfg := logservice.HAKeeperClientConfig{ 74 ServiceAddresses: []string{c.(*testCluster).network.addresses.logAddresses[0].listenAddr}, 75 AllocateIDBatch: 10, 76 } 77 hc, err := logservice.NewCNHAKeeperClient(ctx, cfg) 78 require.NoError(t, err) 79 defer func() { 80 assert.NoError(t, hc.Close()) 81 }() 82 83 last := uint64(0) 84 for i := 0; i < int(cfg.AllocateIDBatch)-1; i++ { 85 v, err := hc.AllocateID(ctx) 86 require.NoError(t, err) 87 assert.True(t, v > 0) 88 if last != 0 { 89 assert.Equal(t, v, last+1, i) 90 } 91 last = v 92 } 93 } 94 95 func TestClusterAwareness(t *testing.T) { 96 defer leaktest.AfterTest(t)() 97 if testing.Short() { 98 t.Skip("skipping in short mode.") 99 return 100 } 101 102 if !supportMultiDN { 103 t.Skip("skipping, multi db not support") 104 return 105 } 106 107 dnSvcNum := 2 108 logSvcNum := 3 109 opt := DefaultOptions(). 110 WithDNServiceNum(dnSvcNum). 111 WithLogServiceNum(logSvcNum) 112 113 // initialize cluster 114 c, err := NewCluster(t, opt) 115 require.NoError(t, err) 116 117 // close the cluster 118 defer func(c Cluster) { 119 require.NoError(t, c.Close()) 120 }(c) 121 // start the cluster 122 require.NoError(t, c.Start()) 123 124 // ------------------------------------------- 125 // the following would test `ClusterAwareness` 126 // ------------------------------------------- 127 dsuuids := c.ListDNServices() 128 require.Equal(t, dnSvcNum, len(dsuuids)) 129 130 lsuuids := c.ListLogServices() 131 require.Equal(t, logSvcNum, len(lsuuids)) 132 133 hksvcs := c.ListHAKeeperServices() 134 require.NotZero(t, len(hksvcs)) 135 136 dn, err := c.GetDNService(dsuuids[0]) 137 require.NoError(t, err) 138 require.Equal(t, ServiceStarted, dn.Status()) 139 140 log, err := c.GetLogService(lsuuids[0]) 141 require.NoError(t, err) 142 require.Equal(t, ServiceStarted, log.Status()) 143 144 ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout) 145 defer cancel1() 146 leader := c.WaitHAKeeperLeader(ctx1) 147 require.NotNil(t, leader) 148 149 // we must wait for hakeeper's running state, or hakeeper wouldn't receive hearbeat. 150 ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout) 151 defer cancel2() 152 c.WaitHAKeeperState(ctx2, logpb.HAKeeperRunning) 153 154 ctx3, cancel3 := context.WithTimeout(context.Background(), defaultTestTimeout) 155 defer cancel3() 156 state, err := c.GetClusterState(ctx3) 157 require.NoError(t, err) 158 require.Equal(t, dnSvcNum, len(state.DNState.Stores)) 159 require.Equal(t, logSvcNum, len(state.LogState.Stores)) 160 } 161 162 func TestClusterOperation(t *testing.T) { 163 defer leaktest.AfterTest(t)() 164 if testing.Short() { 165 t.Skip("skipping in short mode.") 166 return 167 } 168 169 if !supportMultiDN { 170 t.Skip("skipping, multi db not support") 171 return 172 } 173 174 dnSvcNum := 3 175 logSvcNum := 3 176 opt := DefaultOptions(). 177 WithDNServiceNum(dnSvcNum). 178 WithLogServiceNum(logSvcNum) 179 180 // initialize cluster 181 c, err := NewCluster(t, opt) 182 require.NoError(t, err) 183 184 // close the cluster 185 defer func(c Cluster) { 186 require.NoError(t, c.Close()) 187 }(c) 188 // start the cluster 189 require.NoError(t, c.Start()) 190 191 // ------------------------------------------- 192 // the following would test `ClusterOperation` 193 // ------------------------------------------- 194 195 // 1. start/close dn services via different ways 196 dsuuids := c.ListDNServices() 197 require.Equal(t, dnSvcNum, len(dsuuids)) 198 // 1.a start/close dn service by uuid 199 { 200 index := 0 201 dsuuid := dsuuids[index] 202 203 // get the instance of dn service 204 ds, err := c.GetDNService(dsuuid) 205 require.NoError(t, err) 206 require.Equal(t, ServiceStarted, ds.Status()) 207 208 // start it 209 err = c.StartDNService(dsuuid) 210 require.NoError(t, err) 211 require.Equal(t, ServiceStarted, ds.Status()) 212 213 // close it 214 err = c.CloseDNService(dsuuid) 215 require.NoError(t, err) 216 require.Equal(t, ServiceClosed, ds.Status()) 217 } 218 219 // 1.b start/close dn service by index 220 { 221 index := 1 222 223 // get the instance of dn service 224 ds, err := c.GetDNServiceIndexed(index) 225 require.NoError(t, err) 226 require.Equal(t, ServiceStarted, ds.Status()) 227 228 // start it 229 err = c.StartDNServiceIndexed(index) 230 require.NoError(t, err) 231 require.Equal(t, ServiceStarted, ds.Status()) 232 233 // close it 234 err = c.CloseDNServiceIndexed(index) 235 require.NoError(t, err) 236 require.Equal(t, ServiceClosed, ds.Status()) 237 } 238 239 // 1.c start/close dn service by instance 240 { 241 index := 2 242 243 // get the instance of dn service 244 ds, err := c.GetDNServiceIndexed(index) 245 require.NoError(t, err) 246 require.Equal(t, ServiceStarted, ds.Status()) 247 248 // start it 249 err = ds.Start() 250 require.NoError(t, err) 251 require.Equal(t, ServiceStarted, ds.Status()) 252 253 // close it 254 err = ds.Close() 255 require.NoError(t, err) 256 require.Equal(t, ServiceClosed, ds.Status()) 257 } 258 259 // 2. start/close log services by different ways 260 lsuuids := c.ListLogServices() 261 require.Equal(t, logSvcNum, len(lsuuids)) 262 // 2.a start/close log service by uuid 263 { 264 index := 0 265 lsuuid := lsuuids[index] 266 267 // get the instance of log service 268 ls, err := c.GetLogService(lsuuid) 269 require.NoError(t, err) 270 require.Equal(t, ServiceStarted, ls.Status()) 271 272 // start it 273 err = c.StartLogService(lsuuid) 274 require.NoError(t, err) 275 require.Equal(t, ServiceStarted, ls.Status()) 276 277 // close it 278 err = c.CloseLogService(lsuuid) 279 require.NoError(t, err) 280 require.Equal(t, ServiceClosed, ls.Status()) 281 } 282 283 // 2.b start/close log service by index 284 { 285 index := 1 286 287 // get the instance of log service 288 ls, err := c.GetLogServiceIndexed(index) 289 require.NoError(t, err) 290 require.Equal(t, ServiceStarted, ls.Status()) 291 292 // start it 293 err = c.StartLogServiceIndexed(index) 294 require.NoError(t, err) 295 require.Equal(t, ServiceStarted, ls.Status()) 296 297 // close it 298 err = c.CloseLogServiceIndexed(index) 299 require.NoError(t, err) 300 require.Equal(t, ServiceClosed, ls.Status()) 301 } 302 303 // 2.c start/close log service by instance 304 { 305 index := 2 306 307 // get the instance of log service 308 ls, err := c.GetLogServiceIndexed(index) 309 require.NoError(t, err) 310 require.Equal(t, ServiceStarted, ls.Status()) 311 312 // start it 313 err = ls.Start() 314 require.NoError(t, err) 315 require.Equal(t, ServiceStarted, ls.Status()) 316 317 // close it 318 err = ls.Close() 319 require.NoError(t, err) 320 require.Equal(t, ServiceClosed, ls.Status()) 321 } 322 } 323 324 func TestClusterState(t *testing.T) { 325 defer leaktest.AfterTest(t)() 326 if testing.Short() { 327 t.Skip("skipping in short mode.") 328 return 329 } 330 331 if !supportMultiDN { 332 t.Skip("skipping, multi db not support") 333 return 334 } 335 336 dnSvcNum := 2 337 logSvcNum := 3 338 opt := DefaultOptions(). 339 WithDNServiceNum(dnSvcNum). 340 WithLogServiceNum(logSvcNum) 341 342 // initialize cluster 343 c, err := NewCluster(t, opt) 344 require.NoError(t, err) 345 346 // close the cluster 347 defer func(c Cluster) { 348 require.NoError(t, c.Close()) 349 }(c) 350 // start the cluster 351 require.NoError(t, c.Start()) 352 353 // ---------------------------------------- 354 // the following would test `ClusterState`. 355 // ---------------------------------------- 356 ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout) 357 defer cancel1() 358 leader := c.WaitHAKeeperLeader(ctx1) 359 require.NotNil(t, leader) 360 361 dsuuids := c.ListDNServices() 362 require.Equal(t, dnSvcNum, len(dsuuids)) 363 364 lsuuids := c.ListLogServices() 365 require.Equal(t, logSvcNum, len(lsuuids)) 366 367 // we must wait for hakeeper's running state, or hakeeper wouldn't receive hearbeat. 368 ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout) 369 defer cancel2() 370 c.WaitHAKeeperState(ctx2, logpb.HAKeeperRunning) 371 372 hkstate := c.GetHAKeeperState() 373 require.Equal(t, logpb.HAKeeperRunning, hkstate) 374 375 // cluster should be healthy 376 require.True(t, c.IsClusterHealthy()) 377 378 ctx3, cancel3 := context.WithTimeout(context.Background(), defaultTestTimeout) 379 defer cancel3() 380 state, err := c.GetClusterState(ctx3) 381 require.NoError(t, err) 382 require.Equal(t, dnSvcNum, len(state.DNState.Stores)) 383 require.Equal(t, logSvcNum, len(state.LogState.Stores)) 384 385 // FIXME: validate the result list of dn shards 386 ctx4, cancel4 := context.WithTimeout(context.Background(), defaultTestTimeout) 387 defer cancel4() 388 _, err = c.ListDNShards(ctx4) 389 require.NoError(t, err) 390 391 // FIXME: validate the result list of log shards 392 ctx5, cancel5 := context.WithTimeout(context.Background(), defaultTestTimeout) 393 defer cancel5() 394 _, err = c.ListLogShards(ctx5) 395 require.NoError(t, err) 396 397 // test for: 398 // - GetDNStoreInfo 399 // - GetDNStoreInfoIndexed 400 // - DNStoreExpired 401 // - DNStoreExpiredIndexed 402 { 403 dnIndex := 0 404 dsuuid := dsuuids[dnIndex] 405 406 ctx6, cancel6 := context.WithTimeout(context.Background(), defaultTestTimeout) 407 defer cancel6() 408 dnStoreInfo1, err := c.GetDNStoreInfo(ctx6, dsuuid) 409 require.NoError(t, err) 410 411 ctx7, cancel7 := context.WithTimeout(context.Background(), defaultTestTimeout) 412 defer cancel7() 413 dnStoreInfo2, err := c.GetDNStoreInfoIndexed(ctx7, dnIndex) 414 require.NoError(t, err) 415 require.Equal(t, dnStoreInfo1.Shards, dnStoreInfo2.Shards) 416 417 expired1, err := c.DNStoreExpired(dsuuid) 418 require.NoError(t, err) 419 require.False(t, expired1) 420 421 expired2, err := c.DNStoreExpiredIndexed(dnIndex) 422 require.NoError(t, err) 423 require.False(t, expired2) 424 } 425 426 // test for: 427 // - GetLogStoreInfo 428 // - GetLogStoreInfoIndexed 429 // - LogStoreExpired 430 // - LogStoreExpiredIndexed 431 { 432 logIndex := 1 433 lsuuid := lsuuids[logIndex] 434 435 ctx8, cancel8 := context.WithTimeout(context.Background(), defaultTestTimeout) 436 defer cancel8() 437 logStoreInfo1, err := c.GetLogStoreInfo(ctx8, lsuuid) 438 require.NoError(t, err) 439 440 ctx9, cancel9 := context.WithTimeout(context.Background(), defaultTestTimeout) 441 defer cancel9() 442 logStoreInfo2, err := c.GetLogStoreInfoIndexed(ctx9, logIndex) 443 require.NoError(t, err) 444 require.Equal(t, len(logStoreInfo1.Replicas), len(logStoreInfo2.Replicas)) // TODO: sort and compare detail. 445 446 expired1, err := c.LogStoreExpired(lsuuid) 447 require.NoError(t, err) 448 require.False(t, expired1) 449 450 expired2, err := c.LogStoreExpiredIndexed(logIndex) 451 require.NoError(t, err) 452 require.False(t, expired2) 453 } 454 } 455 456 func TestClusterWaitState(t *testing.T) { 457 defer leaktest.AfterTest(t)() 458 if testing.Short() { 459 t.Skip("skipping in short mode.") 460 return 461 } 462 463 if !supportMultiDN { 464 t.Skip("skipping, multi db not support") 465 return 466 } 467 468 dnSvcNum := 2 469 logSvcNum := 3 470 opt := DefaultOptions(). 471 WithDNServiceNum(dnSvcNum). 472 WithLogServiceNum(logSvcNum) 473 474 // initialize cluster 475 c, err := NewCluster(t, opt) 476 require.NoError(t, err) 477 478 // close the cluster 479 defer func(c Cluster) { 480 require.NoError(t, c.Close()) 481 }(c) 482 // start the cluster 483 require.NoError(t, c.Start()) 484 485 // we must wait for hakeeper's running state, or hakeeper wouldn't receive hearbeat. 486 ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout) 487 defer cancel1() 488 c.WaitHAKeeperState(ctx1, logpb.HAKeeperRunning) 489 490 // -------------------------------------------- 491 // the following would test `ClusterWaitState`. 492 // -------------------------------------------- 493 494 // test WaitDNShardsReported 495 { 496 ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout) 497 defer cancel2() 498 c.WaitDNShardsReported(ctx2) 499 } 500 501 // test WaitLogShardsReported 502 { 503 ctx3, cancel3 := context.WithTimeout(context.Background(), defaultTestTimeout) 504 defer cancel3() 505 c.WaitLogShardsReported(ctx3) 506 } 507 508 // test WaitDNReplicaReported 509 { 510 ctx4, cancel4 := context.WithTimeout(context.Background(), defaultTestTimeout) 511 defer cancel4() 512 dnShards, err := c.ListDNShards(ctx4) 513 require.NoError(t, err) 514 require.NotZero(t, len(dnShards)) 515 516 dnShardID := dnShards[0].ShardID 517 ctx5, cancel5 := context.WithTimeout(context.Background(), defaultTestTimeout) 518 defer cancel5() 519 c.WaitDNReplicaReported(ctx5, dnShardID) 520 } 521 522 // test WaitLogReplicaReported 523 { 524 ctx6, cancel6 := context.WithTimeout(context.Background(), defaultTestTimeout) 525 defer cancel6() 526 logShards, err := c.ListLogShards(ctx6) 527 require.NotZero(t, len(logShards)) 528 require.NoError(t, err) 529 530 logShardID := logShards[0].ShardID 531 ctx7, cancel7 := context.WithTimeout(context.Background(), defaultTestTimeout) 532 defer cancel7() 533 c.WaitLogReplicaReported(ctx7, logShardID) 534 } 535 } 536 537 func TestNetworkPartition(t *testing.T) { 538 defer leaktest.AfterTest(t)() 539 if testing.Short() { 540 t.Skip("skipping in short mode.") 541 return 542 } 543 544 if !supportMultiDN { 545 t.Skip("skipping, multi db not support") 546 return 547 } 548 549 dnSvcNum := 2 550 logSvcNum := 4 551 opt := DefaultOptions(). 552 WithDNServiceNum(dnSvcNum). 553 WithLogServiceNum(logSvcNum) 554 555 // initialize cluster 556 c, err := NewCluster(t, opt) 557 require.NoError(t, err) 558 559 // close the cluster 560 defer func(c Cluster) { 561 require.NoError(t, c.Close()) 562 }(c) 563 // start the cluster 564 require.NoError(t, c.Start()) 565 566 // we must wait for hakeeper's running state, or hakeeper wouldn't receive hearbeat. 567 ctx1, cancel1 := context.WithTimeout(context.Background(), defaultTestTimeout) 568 defer cancel1() 569 c.WaitHAKeeperState(ctx1, logpb.HAKeeperRunning) 570 571 // -------------------------------------------- 572 // the following would test network partition 573 // -------------------------------------------- 574 575 // dn service index: 0, 1 576 // log service index: 0, 1, 2, 3 577 // seperate dn service 1 from other services 578 partition1 := c.NewNetworkPartition([]uint32{1}, nil, nil) 579 require.Equal(t, []uint32{1}, partition1.ListDNServiceIndex()) 580 require.Nil(t, partition1.ListLogServiceIndex()) 581 582 partition2 := c.RemainingNetworkPartition(partition1) 583 require.Equal(t, []uint32{0}, partition2.ListDNServiceIndex()) 584 require.Equal(t, []uint32{0, 1, 2, 3}, partition2.ListLogServiceIndex()) 585 586 // enable network partition 587 c.StartNetworkPartition(partition1, partition2) 588 ctx2, cancel2 := context.WithTimeout(context.Background(), defaultTestTimeout) 589 defer cancel2() 590 c.WaitDNStoreTimeoutIndexed(ctx2, 1) 591 592 // disable network partition 593 c.CloseNetworkPartition() 594 ctx3, cancel3 := context.WithTimeout(context.Background(), defaultTestTimeout) 595 defer cancel3() 596 c.WaitDNStoreReportedIndexed(ctx3, 1) 597 598 // dn service index: 0, 1 599 // log service index: 0, 1, 2, 3 600 // seperate log service 3 from other services 601 partition3 := c.NewNetworkPartition(nil, []uint32{3}, nil) 602 require.Nil(t, partition3.ListDNServiceIndex()) 603 require.Equal(t, []uint32{3}, partition3.ListLogServiceIndex()) 604 605 partition4 := c.RemainingNetworkPartition(partition3) 606 require.Equal(t, []uint32{0, 1}, partition4.ListDNServiceIndex()) 607 require.Equal(t, []uint32{0, 1, 2}, partition4.ListLogServiceIndex()) 608 609 // enable network partition 610 c.StartNetworkPartition(partition3, partition4) 611 ctx4, cancel4 := context.WithTimeout(context.Background(), defaultTestTimeout) 612 defer cancel4() 613 c.WaitLogStoreTimeoutIndexed(ctx4, 3) 614 615 // disable network partition 616 c.CloseNetworkPartition() 617 ctx5, cancel5 := context.WithTimeout(context.Background(), defaultTestTimeout) 618 defer cancel5() 619 c.WaitLogStoreReportedIndexed(ctx5, 3) 620 }