github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/msg/integration/setup.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package integration 22 23 import ( 24 "fmt" 25 "strconv" 26 "sync" 27 "testing" 28 "time" 29 30 "github.com/m3db/m3/src/cluster/client" 31 "github.com/m3db/m3/src/cluster/kv" 32 "github.com/m3db/m3/src/cluster/kv/mem" 33 "github.com/m3db/m3/src/cluster/placement" 34 "github.com/m3db/m3/src/cluster/placement/service" 35 "github.com/m3db/m3/src/cluster/placement/storage" 36 "github.com/m3db/m3/src/cluster/services" 37 "github.com/m3db/m3/src/msg/consumer" 38 "github.com/m3db/m3/src/msg/producer" 39 "github.com/m3db/m3/src/msg/producer/config" 40 "github.com/m3db/m3/src/msg/topic" 41 "github.com/m3db/m3/src/x/instrument" 42 xio "github.com/m3db/m3/src/x/io" 43 xsync "github.com/m3db/m3/src/x/sync" 44 45 "github.com/golang/mock/gomock" 46 "github.com/stretchr/testify/require" 47 "go.uber.org/atomic" 48 "go.uber.org/zap" 49 "gopkg.in/yaml.v2" 50 ) 51 52 const ( 53 numConcurrentMessages = 10 54 numberOfShards = 10 55 msgPerShard = 200 56 closeTimeout = 30 * time.Second 57 topicName = "topicName" 58 ) 59 60 type consumerServiceConfig struct { 61 ct topic.ConsumptionType 62 instances int 63 replicas int 64 isSharded bool 65 lateJoin bool 66 } 67 68 type op struct { 69 progressPct int 70 fn func() 71 } 72 73 type setup struct { 74 ts topic.Service 75 sd *services.MockServices 76 producers []producer.Producer 77 consumerServices []*testConsumerService 78 totalConsumed *atomic.Int64 79 extraOps []op 80 } 81 82 func newTestSetup( 83 t *testing.T, 84 ctrl *gomock.Controller, 85 numProducers int, 86 configs []consumerServiceConfig, 87 ) *setup { 88 zap.L().Sugar().Debugf("setting up a test with %d producers", numProducers) 89 90 configService := client.NewMockClient(ctrl) 91 configService.EXPECT().Store(gomock.Any()).Return(mem.NewStore(), nil).AnyTimes() 92 93 sd := services.NewMockServices(ctrl) 94 configService.EXPECT().Services(gomock.Any()).Return(sd, nil).AnyTimes() 95 96 var ( 97 testConsumerServices []*testConsumerService 98 topicConsumerServices []topic.ConsumerService 99 totalConsumed = atomic.NewInt64(0) 100 ) 101 for i, config := range configs { 102 zap.L().Sugar().Debugf("setting up a consumer service in %s mode with %d replicas", config.ct.String(), config.replicas) 103 cs := newTestConsumerService(t, i, config, sd, numProducers, totalConsumed) 104 topicConsumerServices = append(topicConsumerServices, cs.consumerService) 105 testConsumerServices = append(testConsumerServices, cs) 106 } 107 108 ts, err := topic.NewService(topic.NewServiceOptions().SetConfigService(configService)) 109 require.NoError(t, err) 110 111 testTopic := topic.NewTopic(). 112 SetName(topicName). 113 SetNumberOfShards(uint32(numberOfShards)). 114 SetConsumerServices(topicConsumerServices) 115 _, err = ts.CheckAndSet(testTopic, kv.UninitializedVersion) 116 require.NoError(t, err) 117 118 var producers []producer.Producer 119 for i := 0; i < numProducers; i++ { 120 p := testProducer(t, configService) 121 require.NoError(t, p.Init()) 122 producers = append(producers, p) 123 } 124 125 return &setup{ 126 ts: ts, 127 sd: sd, 128 producers: producers, 129 consumerServices: testConsumerServices, 130 totalConsumed: totalConsumed, 131 } 132 } 133 134 func newTestConsumerService( 135 t *testing.T, 136 i int, 137 config consumerServiceConfig, 138 sd *services.MockServices, 139 numProducers int, 140 totalConsumed *atomic.Int64, 141 ) *testConsumerService { 142 sid := serviceID(i) 143 consumerService := topic.NewConsumerService().SetServiceID(sid).SetConsumptionType(config.ct) 144 145 ps := testPlacementService(mem.NewStore(), sid, config.isSharded) 146 sd.EXPECT().PlacementService(sid, gomock.Any()).Return(ps, nil).Times(numProducers) 147 148 cs := testConsumerService{ 149 consumed: make(map[string]struct{}), 150 sid: sid, 151 placementService: ps, 152 consumerService: consumerService, 153 config: config, 154 } 155 var ( 156 instances []placement.Instance 157 p placement.Placement 158 err error 159 ) 160 for i := 0; i < config.instances; i++ { 161 c := newTestConsumer(t, &cs) 162 c.consumeAndAck(totalConsumed) 163 cs.testConsumers = append(cs.testConsumers, c) 164 instances = append(instances, c.instance) 165 } 166 if config.isSharded { 167 p, err = ps.BuildInitialPlacement(instances, numberOfShards, config.replicas) 168 } else { 169 p, err = ps.BuildInitialPlacement(instances, 0, config.replicas) 170 } 171 require.NoError(t, err) 172 require.Equal(t, len(instances), p.NumInstances()) 173 return &cs 174 } 175 176 func (s *setup) TotalMessages() int { 177 return msgPerShard * numberOfShards * len(s.producers) 178 } 179 180 func (s *setup) ExpectedNumMessages() int { 181 return msgPerShard * numberOfShards 182 } 183 184 func (s *setup) Run( 185 t *testing.T, 186 ctrl *gomock.Controller, 187 ) { 188 numWritesPerProducer := s.ExpectedNumMessages() 189 mockData := make([]producer.Message, 0, numWritesPerProducer) 190 for i := 0; i < numberOfShards; i++ { 191 for j := 0; j < msgPerShard; j++ { 192 b := fmt.Sprintf("foo%d-%d", i, j) 193 mm := producer.NewMockMessage(ctrl) 194 mm.EXPECT().Size().Return(len(b)).AnyTimes() 195 mm.EXPECT().Bytes().Return([]byte(b)).AnyTimes() 196 mm.EXPECT().Shard().Return(uint32(i)).AnyTimes() 197 mm.EXPECT().Finalize(producer.Consumed).Times(len(s.producers)) 198 mockData = append(mockData, mm) 199 } 200 } 201 202 ops := make(map[int]func(), len(s.extraOps)) 203 for _, op := range s.extraOps { 204 num := op.progressPct * numWritesPerProducer / 100 205 ops[num] = op.fn 206 } 207 zap.L().Sugar().Debug("producing messages") 208 for i := 0; i < numWritesPerProducer; i++ { 209 if fn, ok := ops[i]; ok { 210 fn() 211 } 212 m := mockData[i] 213 for _, p := range s.producers { 214 require.NoError(t, p.Produce(m)) 215 } 216 } 217 zap.L().Sugar().Debug("produced all the messages") 218 s.CloseProducers(closeTimeout) 219 s.CloseConsumers() 220 221 expectedConsumeReplica := 0 222 for _, cs := range s.consumerServices { 223 if cs.config.lateJoin { 224 continue 225 } 226 if cs.config.ct == topic.Shared { 227 expectedConsumeReplica++ 228 continue 229 } 230 expectedConsumeReplica += cs.config.replicas 231 } 232 expectedConsumed := expectedConsumeReplica * numWritesPerProducer * len(s.producers) 233 require.True(t, int(s.totalConsumed.Load()) >= expectedConsumed, fmt.Sprintf("expect %d, consumed %d", expectedConsumed, s.totalConsumed.Load())) 234 zap.L().Sugar().Debug("done") 235 } 236 237 func (s *setup) VerifyConsumers(t *testing.T) { 238 numWritesPerProducer := s.ExpectedNumMessages() 239 for _, cs := range s.consumerServices { 240 require.Equal(t, numWritesPerProducer, cs.numConsumed()) 241 } 242 } 243 244 func (s *setup) CloseProducers(dur time.Duration) { 245 doneCh := make(chan struct{}) 246 247 go func() { 248 for _, p := range s.producers { 249 zap.L().Sugar().Debug("closing producer") 250 p.Close(producer.WaitForConsumption) 251 zap.L().Sugar().Debug("closed producer") 252 } 253 close(doneCh) 254 }() 255 256 select { 257 case <-time.After(dur): 258 panic(fmt.Sprintf("taking more than %v to close producers %v", dur, time.Now())) 259 case <-doneCh: 260 zap.L().Sugar().Debugf("producer closed in %v", dur) 261 return 262 } 263 } 264 265 func (s setup) CloseConsumers() { 266 for _, cs := range s.consumerServices { 267 cs.Close() 268 } 269 } 270 271 func (s *setup) ScheduleOperations(pct int, fn func()) { 272 if pct < 0 || pct > 100 { 273 return 274 } 275 s.extraOps = append(s.extraOps, op{progressPct: pct, fn: fn}) 276 } 277 278 func (s *setup) KillConnection(t *testing.T, idx int) { 279 require.True(t, idx < len(s.consumerServices)) 280 cs := s.consumerServices[idx] 281 282 testConsumers := cs.testConsumers 283 require.NotEmpty(t, testConsumers) 284 c := testConsumers[len(testConsumers)-1] 285 c.closeOneConsumer() 286 287 zap.L().Sugar().Debugf("killed a consumer on instance: %s", c.instance.ID()) 288 p, err := cs.placementService.Placement() 289 require.NoError(t, err) 290 zap.L().Sugar().Debugf("placement: %s", p.String()) 291 } 292 293 func (s *setup) KillInstance(t *testing.T, idx int) { 294 require.True(t, idx < len(s.consumerServices)) 295 cs := s.consumerServices[idx] 296 297 testConsumers := cs.testConsumers 298 require.NotEmpty(t, testConsumers) 299 c := testConsumers[len(testConsumers)-1] 300 c.Close() 301 302 zap.L().Sugar().Debugf("killed instance: %s", c.instance.ID()) 303 p, err := cs.placementService.Placement() 304 require.NoError(t, err) 305 zap.L().Sugar().Debugf("placement: %s", p.String()) 306 } 307 308 func (s *setup) AddInstance(t *testing.T, idx int) { 309 require.True(t, idx < len(s.consumerServices)) 310 cs := s.consumerServices[idx] 311 312 newConsumer := newTestConsumer(t, cs) 313 newConsumer.consumeAndAck(s.totalConsumed) 314 315 p, err := cs.placementService.Placement() 316 require.NoError(t, err) 317 zap.L().Sugar().Debugf("old placement: %s", p.String()) 318 319 p, _, err = cs.placementService.AddInstances([]placement.Instance{newConsumer.instance}) 320 require.NoError(t, err) 321 zap.L().Sugar().Debugf("new placement: %s", p.String()) 322 cs.testConsumers = append(cs.testConsumers, newConsumer) 323 } 324 325 func (s *setup) RemoveInstance(t *testing.T, idx int) { 326 require.True(t, idx < len(s.consumerServices)) 327 cs := s.consumerServices[idx] 328 329 testConsumers := cs.testConsumers 330 require.NotEmpty(t, testConsumers) 331 l := len(testConsumers) 332 oldConsumer := testConsumers[l-1] 333 defer oldConsumer.Close() 334 335 p, err := cs.placementService.Placement() 336 require.NoError(t, err) 337 zap.L().Sugar().Debugf("old placement: %s", p.String()) 338 339 p, err = cs.placementService.RemoveInstances([]string{oldConsumer.instance.ID()}) 340 require.NoError(t, err) 341 zap.L().Sugar().Debugf("new placement: %s", p.String()) 342 cs.testConsumers = testConsumers[:l-1] 343 } 344 345 func (s *setup) ReplaceInstance(t *testing.T, idx int) { 346 require.True(t, idx < len(s.consumerServices)) 347 cs := s.consumerServices[idx] 348 349 newConsumer := newTestConsumer(t, cs) 350 newConsumer.consumeAndAck(s.totalConsumed) 351 352 testConsumers := cs.testConsumers 353 require.NotEmpty(t, testConsumers) 354 l := len(testConsumers) 355 oldConsumer := testConsumers[l-1] 356 defer oldConsumer.Close() 357 358 p, err := cs.placementService.Placement() 359 require.NoError(t, err) 360 zap.L().Sugar().Debugf("old placement: %s", p.String()) 361 362 p, _, err = cs.placementService.ReplaceInstances( 363 []string{oldConsumer.instance.ID()}, 364 []placement.Instance{newConsumer.instance}, 365 ) 366 require.NoError(t, err) 367 zap.L().Sugar().Debugf("new placement: %s", p.String()) 368 cs.testConsumers[l-1] = newConsumer 369 } 370 371 func (s *setup) RemoveConsumerService(t *testing.T, idx int) { 372 require.True(t, idx < len(s.consumerServices)) 373 topic, err := s.ts.Get(topicName) 374 require.NoError(t, err) 375 css := topic.ConsumerServices() 376 topic = topic.SetConsumerServices(append(css[:idx], css[idx+1:]...)) 377 s.ts.CheckAndSet(topic, topic.Version()) 378 tcss := s.consumerServices 379 tcss[idx].Close() 380 s.consumerServices = append(tcss[:idx], tcss[idx+1:]...) 381 } 382 383 func (s *setup) AddConsumerService(t *testing.T, config consumerServiceConfig) { 384 cs := newTestConsumerService(t, len(s.consumerServices), config, s.sd, len(s.producers), s.totalConsumed) 385 s.consumerServices = append(s.consumerServices, cs) 386 topic, err := s.ts.Get(topicName) 387 require.NoError(t, err) 388 topic = topic.SetConsumerServices(append(topic.ConsumerServices(), cs.consumerService)) 389 s.ts.CheckAndSet(topic, topic.Version()) 390 } 391 392 type testConsumerService struct { 393 sync.Mutex 394 395 consumed map[string]struct{} 396 sid services.ServiceID 397 placementService placement.Service 398 consumerService topic.ConsumerService 399 testConsumers []*testConsumer 400 config consumerServiceConfig 401 } 402 403 func (cs *testConsumerService) markConsumed(b []byte) { 404 cs.Lock() 405 defer cs.Unlock() 406 407 cs.consumed[string(b)] = struct{}{} 408 } 409 410 func (cs *testConsumerService) numConsumed() int { 411 cs.Lock() 412 defer cs.Unlock() 413 414 return len(cs.consumed) 415 } 416 417 func (cs *testConsumerService) Close() { 418 for _, c := range cs.testConsumers { 419 c.Close() 420 } 421 } 422 423 type testConsumer struct { 424 sync.RWMutex 425 426 cs *testConsumerService 427 listener consumer.Listener 428 consumers []consumer.Consumer 429 instance placement.Instance 430 consumed int 431 closed bool 432 doneCh chan struct{} 433 } 434 435 func (c *testConsumer) Close() { 436 c.Lock() 437 defer c.Unlock() 438 439 if c.closed { 440 return 441 } 442 c.closed = true 443 c.listener.Close() 444 close(c.doneCh) 445 } 446 447 func (c *testConsumer) numConsumed() int { 448 c.Lock() 449 defer c.Unlock() 450 451 return c.consumed 452 } 453 454 func newTestConsumer(t *testing.T, cs *testConsumerService) *testConsumer { 455 consumerListener, err := consumer.NewListener("127.0.0.1:0", testConsumerOptions(t)) 456 require.NoError(t, err) 457 458 addr := consumerListener.Addr().String() 459 c := &testConsumer{ 460 cs: cs, 461 listener: consumerListener, 462 instance: placement.NewInstance(). 463 SetID(addr). 464 SetEndpoint(addr). 465 SetIsolationGroup(addr). 466 SetWeight(1), 467 consumed: 0, 468 closed: false, 469 doneCh: make(chan struct{}), 470 } 471 return c 472 } 473 474 func (c *testConsumer) closeOneConsumer() { 475 for { 476 c.Lock() 477 l := len(c.consumers) 478 if l == 0 { 479 c.Unlock() 480 time.Sleep(200 * time.Millisecond) 481 continue 482 } 483 c.consumers[l-1].Close() 484 c.consumers = c.consumers[:l-1] 485 c.Unlock() 486 break 487 } 488 } 489 490 func (c *testConsumer) consumeAndAck(totalConsumed *atomic.Int64) { 491 wp := xsync.NewWorkerPool(numConcurrentMessages) 492 wp.Init() 493 494 go func() { 495 for { 496 consumer, err := c.listener.Accept() 497 if err != nil { 498 return 499 } 500 c.Lock() 501 c.consumers = append(c.consumers, consumer) 502 c.Unlock() 503 go func() { 504 for { 505 select { 506 case <-c.doneCh: 507 consumer.Close() 508 return 509 default: 510 msg, err := consumer.Message() 511 if err != nil { 512 consumer.Close() 513 return 514 } 515 if msg.SentAtNanos() <= 0 { 516 panic("sentAtNanos not set") 517 } 518 wp.Go( 519 func() { 520 c.Lock() 521 if c.closed { 522 c.Unlock() 523 return 524 } 525 c.consumed++ 526 c.Unlock() 527 totalConsumed.Inc() 528 c.cs.markConsumed(msg.Bytes()) 529 msg.Ack() 530 }, 531 ) 532 } 533 } 534 }() 535 } 536 }() 537 } 538 539 func testPlacementService(store kv.Store, sid services.ServiceID, isSharded bool) placement.Service { 540 opts := placement.NewOptions().SetShardStateMode(placement.StableShardStateOnly).SetIsSharded(isSharded) 541 542 return service.NewPlacementService( 543 storage.NewPlacementStorage(store, sid.String(), opts), 544 service.WithPlacementOptions(opts)) 545 } 546 547 func testProducer( 548 t *testing.T, 549 cs client.Client, 550 ) producer.Producer { 551 str := ` 552 buffer: 553 closeCheckInterval: 200ms 554 cleanupRetry: 555 initialBackoff: 100ms 556 maxBackoff: 200ms 557 writer: 558 topicName: topicName 559 topicWatchInitTimeout: 100ms 560 placementWatchInitTimeout: 100ms 561 # FIXME: Consumers sharing the same pool trigger false-positives in race detector 562 messagePool: ~ 563 messageRetry: 564 initialBackoff: 20ms 565 maxBackoff: 50ms 566 messageQueueNewWritesScanInterval: 10ms 567 messageQueueFullScanInterval: 50ms 568 closeCheckInterval: 200ms 569 ackErrorRetry: 570 initialBackoff: 20ms 571 maxBackoff: 50ms 572 connection: 573 dialTimeout: 500ms 574 keepAlivePeriod: 2s 575 retry: 576 initialBackoff: 20ms 577 maxBackoff: 50ms 578 flushInterval: 50ms 579 writeBufferSize: 4096 580 resetDelay: 50ms 581 ` 582 583 var cfg config.ProducerConfiguration 584 require.NoError(t, yaml.Unmarshal([]byte(str), &cfg)) 585 586 p, err := cfg.NewProducer(cs, instrument.NewOptions(), xio.NewOptions()) 587 require.NoError(t, err) 588 return p 589 } 590 591 func testConsumerOptions(t *testing.T) consumer.Options { 592 str := ` 593 ackFlushInterval: 100ms 594 ackBufferSize: 4 595 connectionWriteBufferSize: 32 596 ` 597 var cfg consumer.Configuration 598 require.NoError(t, yaml.Unmarshal([]byte(str), &cfg)) 599 600 return cfg.NewOptions(instrument.NewOptions()) 601 } 602 603 func serviceID(id int) services.ServiceID { 604 return services.NewServiceID().SetName("serviceName" + strconv.Itoa(id)) 605 }