github.com/m3db/m3@v1.5.0/src/cluster/services/services_test.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package services 22 23 import ( 24 "errors" 25 "fmt" 26 "sync" 27 "testing" 28 "time" 29 30 "github.com/m3db/m3/src/cluster/generated/proto/metadatapb" 31 "github.com/m3db/m3/src/cluster/generated/proto/placementpb" 32 "github.com/m3db/m3/src/cluster/kv" 33 "github.com/m3db/m3/src/cluster/kv/mem" 34 "github.com/m3db/m3/src/cluster/placement" 35 "github.com/m3db/m3/src/cluster/placement/storage" 36 "github.com/m3db/m3/src/cluster/shard" 37 "github.com/m3db/m3/src/x/instrument" 38 xos "github.com/m3db/m3/src/x/os" 39 xwatch "github.com/m3db/m3/src/x/watch" 40 41 "github.com/golang/mock/gomock" 42 "github.com/stretchr/testify/assert" 43 "github.com/stretchr/testify/require" 44 ) 45 46 func TestConvertBetweenProtoAndService(t *testing.T) { 47 protoShards := getProtoShards([]uint32{0, 1, 2}) 48 sid := NewServiceID(). 49 SetName("test_service"). 50 SetEnvironment("test_env"). 51 SetZone("test_zone") 52 p := &placementpb.Placement{ 53 Instances: map[string]*placementpb.Instance{ 54 "i1": { 55 Id: "i1", 56 IsolationGroup: "r1", 57 Zone: "z1", 58 Endpoint: "e1", 59 Weight: 1, 60 Shards: protoShards, 61 }, 62 "i2": { 63 Id: "i2", 64 IsolationGroup: "r2", 65 Zone: "z1", 66 Endpoint: "e2", 67 Weight: 1, 68 Shards: protoShards, 69 }, 70 }, 71 ReplicaFactor: 2, 72 NumShards: 3, 73 IsSharded: true, 74 } 75 76 s, err := NewServiceFromProto(p, sid) 77 assert.NoError(t, err) 78 assert.Equal(t, 2, s.Replication().Replicas()) 79 assert.Equal(t, 3, s.Sharding().NumShards()) 80 assert.True(t, s.Sharding().IsSharded()) 81 82 i1, err := s.Instance("i1") 83 assert.NoError(t, err) 84 assert.Equal(t, "i1", i1.InstanceID()) 85 assert.Equal(t, "e1", i1.Endpoint()) 86 assert.Equal(t, 3, i1.Shards().NumShards()) 87 assert.Equal(t, sid, i1.ServiceID()) 88 assert.True(t, i1.Shards().Contains(0)) 89 assert.True(t, i1.Shards().Contains(1)) 90 assert.True(t, i1.Shards().Contains(2)) 91 92 i2, err := s.Instance("i2") 93 assert.NoError(t, err) 94 assert.Equal(t, "i2", i2.InstanceID()) 95 assert.Equal(t, "e2", i2.Endpoint()) 96 assert.Equal(t, 3, i2.Shards().NumShards()) 97 assert.Equal(t, sid, i2.ServiceID()) 98 assert.True(t, i2.Shards().Contains(0)) 99 assert.True(t, i2.Shards().Contains(1)) 100 assert.True(t, i2.Shards().Contains(2)) 101 } 102 103 func getProtoShards(ids []uint32) []*placementpb.Shard { 104 r := make([]*placementpb.Shard, len(ids)) 105 for i, id := range ids { 106 r[i] = &placementpb.Shard{ 107 Id: id, 108 State: placementpb.ShardState_AVAILABLE, 109 } 110 } 111 return r 112 } 113 114 func TestMetadata(t *testing.T) { 115 opts, _ := testSetup() 116 117 sd, err := NewServices(opts) 118 require.NoError(t, err) 119 120 sid := NewServiceID() 121 _, err = sd.Metadata(sid) 122 require.Error(t, err) 123 require.Equal(t, errNoServiceName, err) 124 125 sid = sid.SetName("m3db") 126 _, err = sd.Metadata(sid) 127 require.Error(t, err) 128 require.Equal(t, kv.ErrNotFound, err) 129 130 m := NewMetadata(). 131 SetPort(1). 132 SetLivenessInterval(30 * time.Second). 133 SetHeartbeatInterval(10 * time.Second) 134 err = sd.SetMetadata(sid, m) 135 require.NoError(t, err) 136 137 mGet, err := sd.Metadata(sid) 138 require.NoError(t, err) 139 require.Equal(t, m, mGet) 140 141 err = sd.DeleteMetadata(sid) 142 require.NoError(t, err) 143 144 mGet, err = sd.Metadata(sid) 145 require.Error(t, err) 146 require.Nil(t, mGet) 147 } 148 149 func TestAdvertiseErrors(t *testing.T) { 150 opts, _ := testSetup() 151 152 sd, err := NewServices(opts) 153 require.NoError(t, err) 154 155 ad := NewAdvertisement() 156 err = sd.Advertise(ad) 157 require.Error(t, err) 158 require.Equal(t, errAdPlacementMissing, err) 159 160 ad = NewAdvertisement(). 161 SetPlacementInstance(placement.NewInstance()) 162 err = sd.Advertise(ad) 163 require.Error(t, err) 164 require.Equal(t, errNoServiceID, err) 165 166 sid := NewServiceID() 167 ad = ad.SetServiceID(sid) 168 err = sd.Advertise(ad) 169 require.Error(t, err) 170 require.Equal(t, errNoInstanceID, err) 171 172 i1 := placement.NewInstance().SetID("i1") 173 174 ad = ad.SetPlacementInstance(i1) 175 err = sd.Advertise(ad) 176 require.Error(t, err) 177 require.Equal(t, errNoServiceName, err) 178 179 sid = sid.SetName("m3db") 180 ad = ad.SetServiceID(sid) 181 err = sd.Advertise(ad) 182 require.Error(t, err) 183 require.Equal(t, kv.ErrNotFound, err) 184 185 err = sd.SetMetadata( 186 sid, 187 NewMetadata(). 188 SetLivenessInterval(2*time.Second). 189 SetHeartbeatInterval(time.Second), 190 ) 191 require.NoError(t, err) 192 193 err = sd.Advertise(ad) 194 require.NoError(t, err) 195 196 // the service and instance is already being advertised 197 err = sd.Advertise(ad) 198 require.Error(t, err) 199 } 200 201 func TestAdvertise_NoDelay(t *testing.T) { 202 opts, hbGen := testSetup() 203 204 sd, err := NewServices(opts) 205 require.NoError(t, err) 206 207 ad := NewAdvertisement() 208 err = sd.Advertise(ad) 209 require.Error(t, err) 210 require.Equal(t, errAdPlacementMissing, err) 211 212 ad = NewAdvertisement(). 213 SetPlacementInstance(placement.NewInstance()) 214 err = sd.Advertise(ad) 215 require.Error(t, err) 216 require.Equal(t, errNoServiceID, err) 217 218 sid := NewServiceID() 219 ad = ad.SetServiceID(sid) 220 err = sd.Advertise(ad) 221 require.Error(t, err) 222 require.Equal(t, errNoInstanceID, err) 223 224 i1 := placement.NewInstance().SetID("i1") 225 226 ad = ad.SetPlacementInstance(i1) 227 err = sd.Advertise(ad) 228 require.Error(t, err) 229 require.Equal(t, errNoServiceName, err) 230 231 sid = sid.SetName("m3db") 232 ad = ad.SetServiceID(sid) 233 err = sd.Advertise(ad) 234 require.Error(t, err) 235 require.Equal(t, kv.ErrNotFound, err) 236 237 err = sd.SetMetadata( 238 sid, 239 NewMetadata(). 240 SetLivenessInterval(time.Hour). 241 SetHeartbeatInterval(30*time.Minute), 242 ) 243 require.NoError(t, err) 244 245 err = sd.Advertise(ad) 246 require.NoError(t, err) 247 248 hbGen.Lock() 249 hb := hbGen.hbs[serviceKey(sid)] 250 hbGen.Unlock() 251 252 // hb store should show advertisement (almost) immediately 253 var insts []string 254 for { 255 insts, err = hb.Get() 256 if len(insts) == 1 || err != nil { 257 break 258 } 259 } 260 assert.NoError(t, err) 261 assert.Equal(t, []string{"i1"}, insts) 262 } 263 264 func TestUnadvertiseErrors(t *testing.T) { 265 opts, _ := testSetup() 266 267 sd, err := NewServices(opts) 268 require.NoError(t, err) 269 270 err = sd.Unadvertise(nil, "") 271 require.Error(t, err) 272 require.Equal(t, errNoServiceID, err) 273 274 sid := NewServiceID() 275 err = sd.Unadvertise(sid, "") 276 require.Error(t, err) 277 require.Equal(t, errNoInstanceID, err) 278 279 // could not find heartbeat from this service instance 280 err = sd.Unadvertise(sid, "i1") 281 require.Error(t, err) 282 } 283 284 func TestUnadvertise(t *testing.T) { 285 opts, m := testSetup() 286 287 sd, err := NewServices(opts) 288 require.NoError(t, err) 289 290 sid := NewServiceID().SetName("m3db").SetZone("zone1") 291 292 err = sd.Unadvertise(sid, "i1") 293 require.Error(t, err) 294 295 s, ok := m.getMockStore(sid) 296 require.True(t, ok) 297 298 i1 := placement.NewInstance().SetID("i1") 299 300 err = s.Heartbeat(i1, time.Hour) 301 require.NoError(t, err) 302 303 err = sd.Unadvertise(sid, "i1") 304 require.NoError(t, err) 305 306 err = sd.Unadvertise(sid, "i1") 307 require.Error(t, err) 308 } 309 310 func TestAdvertiseUnadvertise(t *testing.T) { 311 opts, m := testSetup() 312 313 sd, err := NewServices(opts) 314 require.NoError(t, err) 315 316 sid := NewServiceID().SetName("m3db").SetZone("zone1") 317 hbInterval := 10 * time.Millisecond 318 err = sd.SetMetadata( 319 sid, 320 NewMetadata(). 321 SetLivenessInterval(2*time.Second). 322 SetHeartbeatInterval(hbInterval), 323 ) 324 require.NoError(t, err) 325 326 ad := NewAdvertisement(). 327 SetServiceID(sid). 328 SetPlacementInstance(placement.NewInstance().SetID("i1")) 329 330 require.NoError(t, sd.Advertise(ad)) 331 s, ok := m.getMockStore(sid) 332 require.True(t, ok) 333 334 // wait for one heartbeat 335 for { 336 ids, _ := s.Get() 337 if len(ids) == 1 { 338 break 339 } 340 } 341 342 require.NoError(t, sd.Unadvertise(sid, "i1")) 343 ids, err := s.Get() 344 require.NoError(t, err) 345 require.Equal(t, 0, len(ids), fmt.Sprintf("ids: %v", ids)) 346 347 // give enough time for another heartbeat 348 time.Sleep(hbInterval) 349 ids, err = s.Get() 350 require.NoError(t, err) 351 require.Equal(t, 0, len(ids), fmt.Sprintf("ids: %v", ids)) 352 353 // resume heartbeat 354 require.NoError(t, sd.Advertise(ad)) 355 for { 356 ids, err = s.Get() 357 require.NoError(t, err) 358 if len(ids) == 1 { 359 break 360 } 361 } 362 } 363 364 func TestIsHealthy(t *testing.T) { 365 require.True(t, isHealthy(NewAdvertisement())) 366 367 require.True(t, isHealthy(NewAdvertisement().SetHealth(func() error { return nil }))) 368 369 require.False(t, isHealthy(NewAdvertisement().SetHealth(func() error { return errors.New("err") }))) 370 } 371 372 func TestQueryIncludeUnhealthy(t *testing.T) { 373 opts, _ := testSetup() 374 375 sd, err := NewServices(opts) 376 require.NoError(t, err) 377 378 sid := NewServiceID() 379 qopts := NewQueryOptions().SetIncludeUnhealthy(true) 380 _, err = sd.Query(sid, qopts) 381 require.Error(t, err) 382 require.Equal(t, errNoServiceName, err) 383 384 sid = sid.SetName("m3db") 385 _, err = sd.Query(sid, qopts) 386 require.Error(t, err) 387 require.Equal(t, kv.ErrNotFound, err) 388 389 p := placement.NewPlacement().SetInstances([]placement.Instance{ 390 placement.NewInstance(). 391 SetID("i1"). 392 SetEndpoint("e1"). 393 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 394 placement.NewInstance(). 395 SetID("i2"). 396 SetEndpoint("e2"). 397 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 398 }).SetShards([]uint32{1}).SetReplicaFactor(2).SetIsSharded(true) 399 400 ps, err := newTestPlacementStorage(sid, opts, placement.NewOptions()) 401 require.NoError(t, err) 402 403 _, err = ps.SetIfNotExist(p) 404 require.NoError(t, err) 405 406 s, err := sd.Query(sid, qopts) 407 require.NoError(t, err) 408 require.Equal(t, 2, len(s.Instances())) 409 require.Equal(t, sid, s.Instances()[0].ServiceID()) 410 require.Equal(t, 1, s.Sharding().NumShards()) 411 require.Equal(t, 2, s.Replication().Replicas()) 412 } 413 414 func TestQueryNotIncludeUnhealthy(t *testing.T) { 415 opts, _ := testSetup() 416 417 sd, err := NewServices(opts) 418 require.NoError(t, err) 419 420 sid := NewServiceID() 421 qopts := NewQueryOptions() 422 _, err = sd.Query(sid, qopts) 423 require.Error(t, err) 424 require.Equal(t, errNoServiceName, err) 425 426 sid = sid.SetName("m3db").SetZone("zone1") 427 _, err = sd.Query(sid, qopts) 428 require.Error(t, err) 429 require.Equal(t, kv.ErrNotFound, err) 430 431 p := placement.NewPlacement().SetInstances([]placement.Instance{ 432 placement.NewInstance(). 433 SetID("i1"). 434 SetEndpoint("e1"). 435 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 436 placement.NewInstance(). 437 SetID("i2"). 438 SetEndpoint("e2"). 439 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 440 }).SetShards([]uint32{1}).SetReplicaFactor(2).SetIsSharded(true) 441 442 ps, err := newTestPlacementStorage(sid, opts, placement.NewOptions()) 443 require.NoError(t, err) 444 445 _, err = ps.SetIfNotExist(p) 446 require.NoError(t, err) 447 448 s, err := sd.Query(sid, qopts) 449 require.NoError(t, err) 450 require.Equal(t, 0, len(s.Instances())) 451 require.Equal(t, 1, s.Sharding().NumShards()) 452 require.Equal(t, 2, s.Replication().Replicas()) 453 454 hb, err := opts.HeartbeatGen()(sid) 455 require.NoError(t, err) 456 457 i1 := placement.NewInstance().SetID("i1") 458 459 err = hb.Heartbeat(i1, time.Second) 460 require.NoError(t, err) 461 462 s, err = sd.Query(sid, qopts) 463 require.NoError(t, err) 464 require.Equal(t, 1, len(s.Instances())) 465 si := s.Instances()[0] 466 require.Equal(t, sid, si.ServiceID()) 467 require.Equal(t, "i1", si.InstanceID()) 468 require.Equal(t, 1, s.Sharding().NumShards()) 469 require.Equal(t, 2, s.Replication().Replicas()) 470 } 471 472 func TestWatchIncludeUnhealthy(t *testing.T) { 473 opts, _ := testSetup() 474 475 sd, err := NewServices(opts) 476 require.NoError(t, err) 477 478 qopts := NewQueryOptions().SetIncludeUnhealthy(true) 479 sid := NewServiceID() 480 _, err = sd.Watch(sid, qopts) 481 require.Error(t, err) 482 require.Equal(t, errNoServiceName, err) 483 484 sid = sid.SetName("m3db").SetZone("zone1") 485 _, err = sd.Watch(sid, qopts) 486 require.Error(t, err) 487 488 sd, err = NewServices(opts.SetInitTimeout(defaultInitTimeout)) 489 require.NoError(t, err) 490 491 p := placement.NewPlacement(). 492 SetInstances([]placement.Instance{ 493 placement.NewInstance(). 494 SetID("i1"). 495 SetEndpoint("e1"). 496 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 497 placement.NewInstance(). 498 SetID("i2"). 499 SetEndpoint("e2"). 500 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 501 }). 502 SetShards([]uint32{1}). 503 SetReplicaFactor(2). 504 SetIsSharded(true) 505 506 ps, err := sd.PlacementService(sid, placement.NewOptions()) 507 require.NoError(t, err) 508 _, err = ps.Set(p) 509 require.NoError(t, err) 510 511 w, err := sd.Watch(sid, qopts) 512 require.NoError(t, err) 513 <-w.C() 514 s := w.Get().(Service) 515 require.Equal(t, 2, len(s.Instances())) 516 require.Equal(t, 1, s.Sharding().NumShards()) 517 require.Equal(t, 2, s.Replication().Replicas()) 518 519 p = placement.NewPlacement(). 520 SetInstances([]placement.Instance{ 521 placement.NewInstance(). 522 SetID("i1"). 523 SetEndpoint("e1"). 524 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 525 placement.NewInstance(). 526 SetID("i2"). 527 SetEndpoint("e2"). 528 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(2).SetState(shard.Initializing)})), 529 }). 530 SetShards([]uint32{1, 2}). 531 SetReplicaFactor(1). 532 SetIsSharded(true) 533 534 _, err = ps.Set(p) 535 require.NoError(t, err) 536 537 <-w.C() 538 s = w.Get().(Service) 539 require.Equal(t, 2, len(s.Instances())) 540 require.Equal(t, 2, s.Sharding().NumShards()) 541 require.Equal(t, 1, s.Replication().Replicas()) 542 543 c := sd.(*client) 544 545 c.RLock() 546 kvm, ok := c.kvManagers["zone1"] 547 c.RUnlock() 548 require.True(t, ok) 549 550 // set a bad value for placement 551 v, err := kvm.kv.Set(keyFnWithNamespace(placementPrefix)(sid), &metadatapb.Metadata{Port: 1}) 552 require.NoError(t, err) 553 require.Equal(t, 3, v) 554 555 // make sure the newly set bad value has been propagated to watches 556 testWatch, err := kvm.kv.Watch(keyFnWithNamespace(placementPrefix)(sid)) 557 require.NoError(t, err) 558 for range testWatch.C() { 559 if testWatch.Get().Version() == 3 { 560 break 561 } 562 } 563 testWatch.Close() 564 565 // make sure the bad value has been ignored 566 s = w.Get().(Service) 567 require.Equal(t, 0, len(w.C())) 568 require.Equal(t, 2, len(s.Instances())) 569 require.Equal(t, 2, s.Sharding().NumShards()) 570 require.Equal(t, 1, s.Replication().Replicas()) 571 572 // delete the placement 573 err = ps.Delete() 574 require.NoError(t, err) 575 576 select { 577 case <-w.C(): 578 require.Fail(t, "should not receive notification on delete") 579 case <-time.After(500 * time.Millisecond): 580 } 581 582 p = placement.NewPlacement(). 583 SetInstances([]placement.Instance{ 584 placement.NewInstance(). 585 SetID("i1"). 586 SetEndpoint("e1"). 587 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(0).SetState(shard.Initializing)})), 588 }). 589 SetShards([]uint32{0}). 590 SetReplicaFactor(1). 591 SetIsSharded(true) 592 593 _, err = ps.Set(p) 594 require.NoError(t, err) 595 596 // when the next valid placement came through, the watch will be updated 597 <-w.C() 598 s = w.Get().(Service) 599 require.Equal(t, 1, len(s.Instances())) 600 require.Equal(t, 1, s.Sharding().NumShards()) 601 require.Equal(t, 1, s.Replication().Replicas()) 602 require.Equal(t, true, s.Sharding().IsSharded()) 603 604 w.Close() 605 } 606 607 func TestWatchNotIncludeUnhealthy(t *testing.T) { 608 opts, m := testSetup() 609 610 sd, err := NewServices(opts) 611 require.NoError(t, err) 612 613 qopts := NewQueryOptions() 614 sid := NewServiceID().SetName("m3db").SetZone("zone1") 615 616 _, err = sd.Watch(sid, qopts) 617 require.Error(t, err) 618 619 sd, err = NewServices(opts.SetInitTimeout(defaultInitTimeout)) 620 require.NoError(t, err) 621 622 p := placement.NewPlacement(). 623 SetInstances([]placement.Instance{ 624 placement.NewInstance(). 625 SetID("i1"). 626 SetEndpoint("e1"). 627 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 628 placement.NewInstance(). 629 SetID("i2"). 630 SetEndpoint("e2"). 631 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 632 }). 633 SetShards([]uint32{1}). 634 SetReplicaFactor(2). 635 SetIsSharded(true) 636 637 ps, err := sd.PlacementService(sid, placement.NewOptions()) 638 require.NoError(t, err) 639 _, err = ps.Set(p) 640 require.NoError(t, err) 641 642 w, err := sd.Watch(sid, qopts) 643 require.NoError(t, err) 644 <-w.C() 645 s := w.Get().(Service) 646 // the heartbeat has nil value, so no filtering 647 require.Equal(t, 2, len(s.Instances())) 648 require.Equal(t, 1, s.Sharding().NumShards()) 649 require.Equal(t, 2, s.Replication().Replicas()) 650 651 mockHB, ok := m.getMockStore(sid) 652 require.True(t, ok) 653 654 hbWatchable, ok := mockHB.getWatchable(serviceKey(sid)) 655 require.True(t, ok) 656 657 // heartbeat 658 hbWatchable.Update([]string{"i1"}) 659 <-w.C() 660 s = w.Get().(Service) 661 require.Equal(t, 1, len(s.Instances())) 662 require.Equal(t, "i1", s.Instances()[0].InstanceID()) 663 require.Equal(t, sid, s.Instances()[0].ServiceID()) 664 require.Equal(t, 1, s.Sharding().NumShards()) 665 require.Equal(t, 2, s.Replication().Replicas()) 666 667 hbWatchable.Update([]string{"i1", "i2"}) 668 <-w.C() 669 s = w.Get().(Service) 670 require.Equal(t, 2, len(s.Instances())) 671 require.Equal(t, 1, s.Sharding().NumShards()) 672 require.Equal(t, 2, s.Replication().Replicas()) 673 674 hbWatchable.Update([]string{}) 675 676 <-w.C() 677 s = w.Get().(Service) 678 require.Equal(t, 0, len(s.Instances())) 679 require.Equal(t, 1, s.Sharding().NumShards()) 680 require.Equal(t, 2, s.Replication().Replicas()) 681 682 hbWatchable.Update([]string{"i2"}) 683 684 <-w.C() 685 s = w.Get().(Service) 686 require.Equal(t, 1, len(s.Instances())) 687 require.Equal(t, 1, s.Sharding().NumShards()) 688 require.True(t, s.Sharding().IsSharded()) 689 require.Equal(t, 2, s.Replication().Replicas()) 690 691 c := sd.(*client) 692 693 c.RLock() 694 kvm, ok := c.kvManagers["zone1"] 695 c.RUnlock() 696 require.True(t, ok) 697 698 // set a bad value for placement 699 v, err := kvm.kv.Set(keyFnWithNamespace(placementPrefix)(sid), &metadatapb.Metadata{Port: 1}) 700 require.NoError(t, err) 701 require.Equal(t, 2, v) 702 703 // make sure the newly set bad value has been propagated to watches 704 testWatch, err := kvm.kv.Watch(keyFnWithNamespace(placementPrefix)(sid)) 705 require.NoError(t, err) 706 for range testWatch.C() { 707 if testWatch.Get().Version() == 2 { 708 break 709 } 710 } 711 testWatch.Close() 712 713 // make sure the bad value has been ignored 714 require.Equal(t, 0, len(w.C())) 715 s = w.Get().(Service) 716 require.Equal(t, 1, len(s.Instances())) 717 require.Equal(t, 1, s.Sharding().NumShards()) 718 require.True(t, s.Sharding().IsSharded()) 719 require.Equal(t, 2, s.Replication().Replicas()) 720 721 // now receive a update from heartbeat Store 722 // will try to merge it with existing valid placement 723 hbWatchable.Update([]string{"i1", "i2"}) 724 725 <-w.C() 726 s = w.Get().(Service) 727 require.Equal(t, 2, len(s.Instances())) 728 require.Equal(t, 1, s.Sharding().NumShards()) 729 require.True(t, s.Sharding().IsSharded()) 730 require.Equal(t, 2, s.Replication().Replicas()) 731 732 // delete the placement 733 err = ps.Delete() 734 require.NoError(t, err) 735 736 select { 737 case <-w.C(): 738 require.Fail(t, "should not receive notification on delete") 739 case <-time.After(500 * time.Millisecond): 740 } 741 742 // the heartbeat update will be merged with the last known valid placement 743 hbWatchable.Update([]string{"i1", "i2"}) 744 745 <-w.C() 746 s = w.Get().(Service) 747 require.Equal(t, 2, len(s.Instances())) 748 require.Equal(t, 1, s.Sharding().NumShards()) 749 require.True(t, s.Sharding().IsSharded()) 750 require.Equal(t, 2, s.Replication().Replicas()) 751 752 p = placement.NewPlacement(). 753 SetInstances([]placement.Instance{ 754 placement.NewInstance(). 755 SetID("i1"). 756 SetEndpoint("e1"). 757 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(0).SetState(shard.Initializing)})), 758 }). 759 SetShards([]uint32{0}). 760 SetReplicaFactor(1). 761 SetIsSharded(true) 762 763 _, err = ps.Set(p) 764 require.NoError(t, err) 765 766 // when the next valid placement came through, the watch will be updated 767 <-w.C() 768 s = w.Get().(Service) 769 require.Equal(t, 1, len(s.Instances())) 770 require.Equal(t, 1, s.Sharding().NumShards()) 771 require.Equal(t, 1, s.Replication().Replicas()) 772 require.Equal(t, true, s.Sharding().IsSharded()) 773 } 774 775 func TestMultipleWatches(t *testing.T) { 776 opts, _ := testSetup() 777 778 qopts := NewQueryOptions() 779 sid := NewServiceID().SetName("m3db").SetZone("zone1") 780 781 p := placement.NewPlacement(). 782 SetInstances([]placement.Instance{ 783 placement.NewInstance(). 784 SetID("i1"). 785 SetEndpoint("e1"). 786 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 787 placement.NewInstance(). 788 SetID("i2"). 789 SetEndpoint("e2"). 790 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 791 }). 792 SetShards([]uint32{1}). 793 SetReplicaFactor(2). 794 SetIsSharded(true) 795 796 ps, err := newTestPlacementStorage(sid, opts, placement.NewOptions()) 797 require.NoError(t, err) 798 799 _, err = ps.SetIfNotExist(p) 800 require.NoError(t, err) 801 802 sd, err := NewServices(opts) 803 require.NoError(t, err) 804 805 _, err = sd.Query(sid, qopts) 806 require.NoError(t, err) 807 808 w1, err := sd.Watch(sid, qopts) 809 require.NoError(t, err) 810 811 w2, err := sd.Watch(sid, qopts) 812 require.NoError(t, err) 813 814 kvm, ok := sd.(*client).kvManagers["zone1"] 815 require.True(t, ok) 816 817 require.Equal(t, 1, len(kvm.serviceWatchables)) 818 for _, w := range kvm.serviceWatchables { 819 require.Equal(t, 2, w.watches()) 820 } 821 <-w1.C() 822 <-w2.C() 823 824 require.Equal(t, w1.Get(), w2.Get()) 825 826 w1.Close() 827 w2.Close() 828 } 829 830 func TestWatch_GetAfterTimeout(t *testing.T) { 831 sid := NewServiceID().SetName("m3db").SetZone("zone1") 832 opts, _ := testSetup() 833 834 sd, err := NewServices(opts) 835 require.NoError(t, err) 836 837 qopts := NewQueryOptions().SetIncludeUnhealthy(true) 838 _, err = sd.Watch(sid, qopts) 839 require.Error(t, err) 840 841 sd, err = NewServices(opts.SetInitTimeout(defaultInitTimeout)) 842 require.NoError(t, err) 843 844 p := placement.NewPlacement(). 845 SetInstances([]placement.Instance{ 846 placement.NewInstance(). 847 SetID("i1"). 848 SetEndpoint("e1"). 849 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 850 placement.NewInstance(). 851 SetID("i2"). 852 SetEndpoint("e2"). 853 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(1).SetState(shard.Initializing)})), 854 }). 855 SetShards([]uint32{1}). 856 SetReplicaFactor(2). 857 SetIsSharded(true) 858 859 ps, err := sd.PlacementService(sid, placement.NewOptions()) 860 require.NoError(t, err) 861 _, err = ps.Set(p) 862 require.NoError(t, err) 863 864 sd, err = NewServices(opts) 865 require.NoError(t, err) 866 867 w, err := sd.Watch(sid, qopts) 868 require.NoError(t, err) 869 <-w.C() 870 s := w.Get().(Service) 871 require.Equal(t, 2, len(s.Instances())) 872 require.Equal(t, 1, s.Sharding().NumShards()) 873 require.Equal(t, 2, s.Replication().Replicas()) 874 875 // up the version to 2 and delete it and set a new placement 876 // to verify the watch can still receive update on the new placement 877 _, err = ps.Set(p) 878 require.NoError(t, err) 879 880 err = ps.Delete() 881 require.NoError(t, err) 882 883 p = placement.NewPlacement(). 884 SetInstances([]placement.Instance{ 885 placement.NewInstance(). 886 SetID("i1"). 887 SetEndpoint("e1"). 888 SetShards(shard.NewShards([]shard.Shard{shard.NewShard(0).SetState(shard.Initializing)})), 889 }). 890 SetShards([]uint32{0}). 891 SetReplicaFactor(1). 892 SetIsSharded(true) 893 894 _, err = ps.Set(p) 895 require.NoError(t, err) 896 897 for range w.C() { 898 s = w.Get().(Service) 899 if s.Replication().Replicas() == 1 { 900 break 901 } 902 } 903 } 904 905 func TestWatchInterrupted(t *testing.T) { 906 opts, _ := testSetup() 907 sd, err := NewServices(opts.SetInitTimeout(0)) 908 require.NoError(t, err) 909 910 testWatchInterrupted(t, sd) 911 } 912 913 func TestWatchInterruptedWithTimeout(t *testing.T) { 914 opts, _ := testSetup() 915 sd, err := NewServices(opts.SetInitTimeout(1 * time.Minute)) 916 require.NoError(t, err) 917 918 testWatchInterrupted(t, sd) 919 } 920 921 func testWatchInterrupted(t *testing.T, s Services) { 922 sid := NewServiceID().SetName("m3db").SetZone("zone1") 923 924 interruptedCh := make(chan struct{}) 925 close(interruptedCh) 926 927 qopts := NewQueryOptions(). 928 SetIncludeUnhealthy(true). 929 SetInterruptedCh(interruptedCh) 930 _, err := s.Watch(sid, qopts) 931 require.Error(t, err) 932 require.True(t, errors.Is(err, xos.ErrInterrupted)) 933 } 934 935 func TestHeartbeatService(t *testing.T) { 936 opts, _ := testSetup() 937 938 sd, err := NewServices(opts) 939 require.NoError(t, err) 940 941 sid := NewServiceID() 942 943 _, err = sd.HeartbeatService(sid) 944 assert.Equal(t, errNoServiceName, err) 945 946 sid = sid.SetName("m3db").SetZone("z1") 947 948 hb, err := sd.HeartbeatService(sid) 949 assert.NoError(t, err) 950 assert.NotNil(t, hb) 951 } 952 953 func TestCacheCollisions_Heartbeat(t *testing.T) { 954 opts, _ := testSetup() 955 956 sid := func() ServiceID { 957 return NewServiceID().SetName("svc") 958 } 959 960 sd, err := NewServices(opts) 961 require.NoError(t, err) 962 c := sd.(*client) 963 964 for _, id := range []ServiceID{ 965 sid().SetEnvironment("e1").SetZone("z1"), 966 sid().SetEnvironment("e1").SetZone("z2"), 967 sid().SetEnvironment("e2").SetZone("z1"), 968 sid().SetEnvironment("e2").SetZone("z2"), 969 } { 970 _, err := sd.HeartbeatService(id) 971 assert.NoError(t, err) 972 } 973 974 assert.Equal(t, 4, len(c.hbStores), "cached hb stores should have 4 unique entries") 975 } 976 977 func TestCacheCollisions_Watchables(t *testing.T) { 978 opts, _ := testSetup() 979 980 sd, err := NewServices(opts.SetInitTimeout(defaultInitTimeout)) 981 require.NoError(t, err) 982 983 sid := func() ServiceID { 984 return NewServiceID().SetName("svc") 985 } 986 987 qopts := NewQueryOptions().SetIncludeUnhealthy(true) 988 989 for _, id := range []ServiceID{ 990 sid().SetEnvironment("e1").SetZone("z1"), 991 sid().SetEnvironment("e1").SetZone("z2"), 992 sid().SetEnvironment("e2").SetZone("z1"), 993 sid().SetEnvironment("e2").SetZone("z2"), 994 } { 995 _, err := sd.HeartbeatService(id) 996 assert.NoError(t, err) 997 998 ps, err := sd.PlacementService(id, placement.NewOptions()) 999 require.NoError(t, err) 1000 1001 p := placement.NewPlacement().SetInstances([]placement.Instance{ 1002 placement.NewInstance().SetID("i1").SetEndpoint("i:p"), 1003 }) 1004 _, err = ps.Set(p) 1005 assert.NoError(t, err) 1006 1007 _, err = sd.Watch(id, qopts) 1008 assert.NoError(t, err) 1009 } 1010 1011 for _, z := range []string{"z1", "z2"} { 1012 kvm, err := sd.(*client).getKVManager(z) 1013 require.NoError(t, err) 1014 assert.Equal(t, 2, len(kvm.serviceWatchables), "each zone should have 2 unique watchable entries") 1015 } 1016 } 1017 1018 func TestLeaderService(t *testing.T) { 1019 mc := gomock.NewController(t) 1020 defer mc.Finish() 1021 1022 ld := newTestLeaderGen(mc) 1023 1024 opts, _ := testSetup() 1025 1026 opts = opts.SetLeaderGen(ld) 1027 cl, err := NewServices(opts) 1028 require.NoError(t, err) 1029 1030 sid1 := NewServiceID().SetName("s1") 1031 sid2 := NewServiceID().SetName("s2") 1032 eo1 := NewElectionOptions() 1033 eo2 := NewElectionOptions().SetLeaderTimeout(30 * time.Second) 1034 eo3 := NewElectionOptions().SetResignTimeout(30 * time.Second) 1035 1036 for _, sid := range []ServiceID{sid1, sid2} { 1037 for _, eo := range []ElectionOptions{eo1, eo2, eo3} { 1038 _, err := cl.LeaderService(sid, eo) 1039 assert.NoError(t, err) 1040 } 1041 } 1042 1043 assert.Equal(t, 6, len(cl.(*client).ldSvcs), 1044 "should cache 6 unique client entries") 1045 } 1046 1047 func TestServiceIDEqual(t *testing.T) { 1048 sid := NewServiceID().SetName("name").SetEnvironment("env").SetZone("zone") 1049 assert.Equal(t, "name", sid.Name()) 1050 assert.Equal(t, "env", sid.Environment()) 1051 assert.Equal(t, "zone", sid.Zone()) 1052 1053 assert.True(t, sid.Equal(NewServiceID().SetName("name").SetEnvironment("env").SetZone("zone"))) 1054 assert.False(t, sid.Equal(NewServiceID().SetName("name").SetEnvironment("env"))) 1055 assert.False(t, sid.Equal(NewServiceID().SetName("name").SetZone("zone"))) 1056 assert.False(t, sid.Equal(NewServiceID().SetEnvironment("env").SetZone("zone"))) 1057 assert.False(t, sid.Equal(nil)) 1058 } 1059 1060 func newTestLeaderGen(mc *gomock.Controller) LeaderGen { 1061 svc := NewMockLeaderService(mc) 1062 return func(sid ServiceID, eo ElectionOptions) (LeaderService, error) { 1063 return svc, nil 1064 } 1065 } 1066 1067 func testSetup() (Options, *mockHBGen) { 1068 var ( 1069 lock sync.Mutex 1070 stores = make(map[string]kv.Store) 1071 ) 1072 kvGen := func(zone string) (kv.Store, error) { 1073 lock.Lock() 1074 store, ok := stores[zone] 1075 if ok { 1076 lock.Unlock() 1077 return store, nil 1078 } 1079 store = mem.NewStore() 1080 stores[zone] = store 1081 lock.Unlock() 1082 return store, nil 1083 } 1084 1085 m := &mockHBGen{ 1086 hbs: map[string]*mockHBStore{}, 1087 } 1088 hbGen := func(sid ServiceID) (HeartbeatService, error) { 1089 return m.genMockStore(sid) 1090 } 1091 1092 return NewOptions(). 1093 SetKVGen(kvGen). 1094 SetHeartbeatGen(hbGen). 1095 SetLeaderGen(emptyLdGen). 1096 SetInitTimeout(100 * time.Millisecond). 1097 SetInstrumentsOptions(instrument.NewOptions()), m 1098 } 1099 1100 type mockHBGen struct { 1101 sync.Mutex 1102 1103 hbs map[string]*mockHBStore 1104 } 1105 1106 func (m *mockHBGen) genMockStore(sid ServiceID) (*mockHBStore, error) { 1107 k := serviceKey(sid) 1108 1109 m.Lock() 1110 defer m.Unlock() 1111 1112 s, ok := m.hbs[k] 1113 if ok { 1114 return s, nil 1115 } 1116 s = &mockHBStore{ 1117 hbs: map[string]map[string]time.Time{}, 1118 watchables: map[string]xwatch.Watchable{}, 1119 sid: sid, 1120 } 1121 1122 m.hbs[k] = s 1123 return s, nil 1124 } 1125 1126 func (m *mockHBGen) getMockStore(sid ServiceID) (*mockHBStore, bool) { 1127 m.Lock() 1128 defer m.Unlock() 1129 1130 s, ok := m.hbs[serviceKey(sid)] 1131 return s, ok 1132 } 1133 1134 type mockHBStore struct { 1135 sync.Mutex 1136 1137 sid ServiceID 1138 hbs map[string]map[string]time.Time 1139 watchables map[string]xwatch.Watchable 1140 } 1141 1142 func (hb *mockHBStore) Heartbeat(instance placement.Instance, ttl time.Duration) error { 1143 hb.Lock() 1144 defer hb.Unlock() 1145 hbMap, ok := hb.hbs[serviceKey(hb.sid)] 1146 if !ok { 1147 hbMap = map[string]time.Time{} 1148 hb.hbs[serviceKey(hb.sid)] = hbMap 1149 } 1150 hbMap[instance.ID()] = time.Now() 1151 return nil 1152 } 1153 1154 func (hb *mockHBStore) Get() ([]string, error) { 1155 hb.Lock() 1156 defer hb.Unlock() 1157 1158 var r []string 1159 hbMap, ok := hb.hbs[serviceKey(hb.sid)] 1160 if !ok { 1161 return r, nil 1162 } 1163 1164 r = make([]string, 0, len(hbMap)) 1165 for k := range hbMap { 1166 r = append(r, k) 1167 } 1168 return r, nil 1169 } 1170 1171 func (hb *mockHBStore) GetInstances() ([]placement.Instance, error) { 1172 hb.Lock() 1173 defer hb.Unlock() 1174 1175 var r []placement.Instance 1176 hbMap, ok := hb.hbs[serviceKey(hb.sid)] 1177 if !ok { 1178 return r, nil 1179 } 1180 1181 r = make([]placement.Instance, 0, len(hbMap)) 1182 for k := range hbMap { 1183 r = append(r, placement.NewInstance().SetID(k)) 1184 } 1185 return r, nil 1186 } 1187 1188 func (hb *mockHBStore) Watch() (xwatch.Watch, error) { 1189 hb.Lock() 1190 defer hb.Unlock() 1191 1192 watchable, ok := hb.watchables[serviceKey(hb.sid)] 1193 if ok { 1194 _, w, err := watchable.Watch() 1195 return w, err 1196 } 1197 1198 watchable = xwatch.NewWatchable() 1199 hb.watchables[serviceKey(hb.sid)] = watchable 1200 1201 _, w, err := watchable.Watch() 1202 return w, err 1203 } 1204 1205 func (hb *mockHBStore) getWatchable(s string) (xwatch.Watchable, bool) { 1206 hb.Lock() 1207 defer hb.Unlock() 1208 1209 w, ok := hb.watchables[s] 1210 return w, ok 1211 } 1212 1213 func (hb *mockHBStore) Delete(id string) error { 1214 hb.Lock() 1215 defer hb.Unlock() 1216 1217 hbMap, ok := hb.hbs[serviceKey(hb.sid)] 1218 if !ok { 1219 return errors.New("no hb found") 1220 } 1221 1222 _, ok = hbMap[id] 1223 if !ok { 1224 return errors.New("no hb found") 1225 } 1226 1227 delete(hbMap, id) 1228 return nil 1229 } 1230 1231 func newTestPlacementStorage(sid ServiceID, opts Options, pOpts placement.Options) (placement.Storage, error) { 1232 if opts.KVGen() == nil { 1233 return nil, errNoKVGen 1234 } 1235 store, err := opts.KVGen()(sid.Zone()) 1236 if err != nil { 1237 return nil, err 1238 } 1239 return storage.NewPlacementStorage( 1240 store, 1241 keyFnWithNamespace(placementNamespace(opts.NamespaceOptions().PlacementNamespace()))(sid), 1242 pOpts, 1243 ), nil 1244 }