github.com/m3db/m3@v1.5.0/src/msg/integration/setup.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package integration
    22  
    23  import (
    24  	"fmt"
    25  	"strconv"
    26  	"sync"
    27  	"testing"
    28  	"time"
    29  
    30  	"github.com/m3db/m3/src/cluster/client"
    31  	"github.com/m3db/m3/src/cluster/kv"
    32  	"github.com/m3db/m3/src/cluster/kv/mem"
    33  	"github.com/m3db/m3/src/cluster/placement"
    34  	"github.com/m3db/m3/src/cluster/placement/service"
    35  	"github.com/m3db/m3/src/cluster/placement/storage"
    36  	"github.com/m3db/m3/src/cluster/services"
    37  	"github.com/m3db/m3/src/msg/consumer"
    38  	"github.com/m3db/m3/src/msg/producer"
    39  	"github.com/m3db/m3/src/msg/producer/config"
    40  	"github.com/m3db/m3/src/msg/topic"
    41  	"github.com/m3db/m3/src/x/instrument"
    42  	xio "github.com/m3db/m3/src/x/io"
    43  	xsync "github.com/m3db/m3/src/x/sync"
    44  
    45  	"github.com/golang/mock/gomock"
    46  	"github.com/stretchr/testify/require"
    47  	"go.uber.org/atomic"
    48  	"go.uber.org/zap"
    49  	"gopkg.in/yaml.v2"
    50  )
    51  
    52  const (
    53  	numConcurrentMessages = 10
    54  	numberOfShards        = 10
    55  	msgPerShard           = 200
    56  	closeTimeout          = 30 * time.Second
    57  	topicName             = "topicName"
    58  )
    59  
    60  type consumerServiceConfig struct {
    61  	ct        topic.ConsumptionType
    62  	instances int
    63  	replicas  int
    64  	isSharded bool
    65  	lateJoin  bool
    66  }
    67  
    68  type op struct {
    69  	progressPct int
    70  	fn          func()
    71  }
    72  
    73  type setup struct {
    74  	ts               topic.Service
    75  	sd               *services.MockServices
    76  	producers        []producer.Producer
    77  	consumerServices []*testConsumerService
    78  	totalConsumed    *atomic.Int64
    79  	extraOps         []op
    80  }
    81  
    82  func newTestSetup(
    83  	t *testing.T,
    84  	ctrl *gomock.Controller,
    85  	numProducers int,
    86  	configs []consumerServiceConfig,
    87  ) *setup {
    88  	zap.L().Sugar().Debugf("setting up a test with %d producers", numProducers)
    89  
    90  	configService := client.NewMockClient(ctrl)
    91  	configService.EXPECT().Store(gomock.Any()).Return(mem.NewStore(), nil).AnyTimes()
    92  
    93  	sd := services.NewMockServices(ctrl)
    94  	configService.EXPECT().Services(gomock.Any()).Return(sd, nil).AnyTimes()
    95  
    96  	var (
    97  		testConsumerServices  []*testConsumerService
    98  		topicConsumerServices []topic.ConsumerService
    99  		totalConsumed         = atomic.NewInt64(0)
   100  	)
   101  	for i, config := range configs {
   102  		zap.L().Sugar().Debugf("setting up a consumer service in %s mode with %d replicas", config.ct.String(), config.replicas)
   103  		cs := newTestConsumerService(t, i, config, sd, numProducers, totalConsumed)
   104  		topicConsumerServices = append(topicConsumerServices, cs.consumerService)
   105  		testConsumerServices = append(testConsumerServices, cs)
   106  	}
   107  
   108  	ts, err := topic.NewService(topic.NewServiceOptions().SetConfigService(configService))
   109  	require.NoError(t, err)
   110  
   111  	testTopic := topic.NewTopic().
   112  		SetName(topicName).
   113  		SetNumberOfShards(uint32(numberOfShards)).
   114  		SetConsumerServices(topicConsumerServices)
   115  	_, err = ts.CheckAndSet(testTopic, kv.UninitializedVersion)
   116  	require.NoError(t, err)
   117  
   118  	var producers []producer.Producer
   119  	for i := 0; i < numProducers; i++ {
   120  		p := testProducer(t, configService)
   121  		require.NoError(t, p.Init())
   122  		producers = append(producers, p)
   123  	}
   124  
   125  	return &setup{
   126  		ts:               ts,
   127  		sd:               sd,
   128  		producers:        producers,
   129  		consumerServices: testConsumerServices,
   130  		totalConsumed:    totalConsumed,
   131  	}
   132  }
   133  
   134  func newTestConsumerService(
   135  	t *testing.T,
   136  	i int,
   137  	config consumerServiceConfig,
   138  	sd *services.MockServices,
   139  	numProducers int,
   140  	totalConsumed *atomic.Int64,
   141  ) *testConsumerService {
   142  	sid := serviceID(i)
   143  	consumerService := topic.NewConsumerService().SetServiceID(sid).SetConsumptionType(config.ct)
   144  
   145  	ps := testPlacementService(mem.NewStore(), sid, config.isSharded)
   146  	sd.EXPECT().PlacementService(sid, gomock.Any()).Return(ps, nil).Times(numProducers)
   147  
   148  	cs := testConsumerService{
   149  		consumed:         make(map[string]struct{}),
   150  		sid:              sid,
   151  		placementService: ps,
   152  		consumerService:  consumerService,
   153  		config:           config,
   154  	}
   155  	var (
   156  		instances []placement.Instance
   157  		p         placement.Placement
   158  		err       error
   159  	)
   160  	for i := 0; i < config.instances; i++ {
   161  		c := newTestConsumer(t, &cs)
   162  		c.consumeAndAck(totalConsumed)
   163  		cs.testConsumers = append(cs.testConsumers, c)
   164  		instances = append(instances, c.instance)
   165  	}
   166  	if config.isSharded {
   167  		p, err = ps.BuildInitialPlacement(instances, numberOfShards, config.replicas)
   168  	} else {
   169  		p, err = ps.BuildInitialPlacement(instances, 0, config.replicas)
   170  	}
   171  	require.NoError(t, err)
   172  	require.Equal(t, len(instances), p.NumInstances())
   173  	return &cs
   174  }
   175  
   176  func (s *setup) TotalMessages() int {
   177  	return msgPerShard * numberOfShards * len(s.producers)
   178  }
   179  
   180  func (s *setup) ExpectedNumMessages() int {
   181  	return msgPerShard * numberOfShards
   182  }
   183  
   184  func (s *setup) Run(
   185  	t *testing.T,
   186  	ctrl *gomock.Controller,
   187  ) {
   188  	numWritesPerProducer := s.ExpectedNumMessages()
   189  	mockData := make([]producer.Message, 0, numWritesPerProducer)
   190  	for i := 0; i < numberOfShards; i++ {
   191  		for j := 0; j < msgPerShard; j++ {
   192  			b := fmt.Sprintf("foo%d-%d", i, j)
   193  			mm := producer.NewMockMessage(ctrl)
   194  			mm.EXPECT().Size().Return(len(b)).AnyTimes()
   195  			mm.EXPECT().Bytes().Return([]byte(b)).AnyTimes()
   196  			mm.EXPECT().Shard().Return(uint32(i)).AnyTimes()
   197  			mm.EXPECT().Finalize(producer.Consumed).Times(len(s.producers))
   198  			mockData = append(mockData, mm)
   199  		}
   200  	}
   201  
   202  	ops := make(map[int]func(), len(s.extraOps))
   203  	for _, op := range s.extraOps {
   204  		num := op.progressPct * numWritesPerProducer / 100
   205  		ops[num] = op.fn
   206  	}
   207  	zap.L().Sugar().Debug("producing messages")
   208  	for i := 0; i < numWritesPerProducer; i++ {
   209  		if fn, ok := ops[i]; ok {
   210  			fn()
   211  		}
   212  		m := mockData[i]
   213  		for _, p := range s.producers {
   214  			require.NoError(t, p.Produce(m))
   215  		}
   216  	}
   217  	zap.L().Sugar().Debug("produced all the messages")
   218  	s.CloseProducers(closeTimeout)
   219  	s.CloseConsumers()
   220  
   221  	expectedConsumeReplica := 0
   222  	for _, cs := range s.consumerServices {
   223  		if cs.config.lateJoin {
   224  			continue
   225  		}
   226  		if cs.config.ct == topic.Shared {
   227  			expectedConsumeReplica++
   228  			continue
   229  		}
   230  		expectedConsumeReplica += cs.config.replicas
   231  	}
   232  	expectedConsumed := expectedConsumeReplica * numWritesPerProducer * len(s.producers)
   233  	require.True(t, int(s.totalConsumed.Load()) >= expectedConsumed, fmt.Sprintf("expect %d, consumed %d", expectedConsumed, s.totalConsumed.Load()))
   234  	zap.L().Sugar().Debug("done")
   235  }
   236  
   237  func (s *setup) VerifyConsumers(t *testing.T) {
   238  	numWritesPerProducer := s.ExpectedNumMessages()
   239  	for _, cs := range s.consumerServices {
   240  		require.Equal(t, numWritesPerProducer, cs.numConsumed())
   241  	}
   242  }
   243  
   244  func (s *setup) CloseProducers(dur time.Duration) {
   245  	doneCh := make(chan struct{})
   246  
   247  	go func() {
   248  		for _, p := range s.producers {
   249  			zap.L().Sugar().Debug("closing producer")
   250  			p.Close(producer.WaitForConsumption)
   251  			zap.L().Sugar().Debug("closed producer")
   252  		}
   253  		close(doneCh)
   254  	}()
   255  
   256  	select {
   257  	case <-time.After(dur):
   258  		panic(fmt.Sprintf("taking more than %v to close producers %v", dur, time.Now()))
   259  	case <-doneCh:
   260  		zap.L().Sugar().Debugf("producer closed in %v", dur)
   261  		return
   262  	}
   263  }
   264  
   265  func (s setup) CloseConsumers() {
   266  	for _, cs := range s.consumerServices {
   267  		cs.Close()
   268  	}
   269  }
   270  
   271  func (s *setup) ScheduleOperations(pct int, fn func()) {
   272  	if pct < 0 || pct > 100 {
   273  		return
   274  	}
   275  	s.extraOps = append(s.extraOps, op{progressPct: pct, fn: fn})
   276  }
   277  
   278  func (s *setup) KillConnection(t *testing.T, idx int) {
   279  	require.True(t, idx < len(s.consumerServices))
   280  	cs := s.consumerServices[idx]
   281  
   282  	testConsumers := cs.testConsumers
   283  	require.NotEmpty(t, testConsumers)
   284  	c := testConsumers[len(testConsumers)-1]
   285  	c.closeOneConsumer()
   286  
   287  	zap.L().Sugar().Debugf("killed a consumer on instance: %s", c.instance.ID())
   288  	p, err := cs.placementService.Placement()
   289  	require.NoError(t, err)
   290  	zap.L().Sugar().Debugf("placement: %s", p.String())
   291  }
   292  
   293  func (s *setup) KillInstance(t *testing.T, idx int) {
   294  	require.True(t, idx < len(s.consumerServices))
   295  	cs := s.consumerServices[idx]
   296  
   297  	testConsumers := cs.testConsumers
   298  	require.NotEmpty(t, testConsumers)
   299  	c := testConsumers[len(testConsumers)-1]
   300  	c.Close()
   301  
   302  	zap.L().Sugar().Debugf("killed instance: %s", c.instance.ID())
   303  	p, err := cs.placementService.Placement()
   304  	require.NoError(t, err)
   305  	zap.L().Sugar().Debugf("placement: %s", p.String())
   306  }
   307  
   308  func (s *setup) AddInstance(t *testing.T, idx int) {
   309  	require.True(t, idx < len(s.consumerServices))
   310  	cs := s.consumerServices[idx]
   311  
   312  	newConsumer := newTestConsumer(t, cs)
   313  	newConsumer.consumeAndAck(s.totalConsumed)
   314  
   315  	p, err := cs.placementService.Placement()
   316  	require.NoError(t, err)
   317  	zap.L().Sugar().Debugf("old placement: %s", p.String())
   318  
   319  	p, _, err = cs.placementService.AddInstances([]placement.Instance{newConsumer.instance})
   320  	require.NoError(t, err)
   321  	zap.L().Sugar().Debugf("new placement: %s", p.String())
   322  	cs.testConsumers = append(cs.testConsumers, newConsumer)
   323  }
   324  
   325  func (s *setup) RemoveInstance(t *testing.T, idx int) {
   326  	require.True(t, idx < len(s.consumerServices))
   327  	cs := s.consumerServices[idx]
   328  
   329  	testConsumers := cs.testConsumers
   330  	require.NotEmpty(t, testConsumers)
   331  	l := len(testConsumers)
   332  	oldConsumer := testConsumers[l-1]
   333  	defer oldConsumer.Close()
   334  
   335  	p, err := cs.placementService.Placement()
   336  	require.NoError(t, err)
   337  	zap.L().Sugar().Debugf("old placement: %s", p.String())
   338  
   339  	p, err = cs.placementService.RemoveInstances([]string{oldConsumer.instance.ID()})
   340  	require.NoError(t, err)
   341  	zap.L().Sugar().Debugf("new placement: %s", p.String())
   342  	cs.testConsumers = testConsumers[:l-1]
   343  }
   344  
   345  func (s *setup) ReplaceInstance(t *testing.T, idx int) {
   346  	require.True(t, idx < len(s.consumerServices))
   347  	cs := s.consumerServices[idx]
   348  
   349  	newConsumer := newTestConsumer(t, cs)
   350  	newConsumer.consumeAndAck(s.totalConsumed)
   351  
   352  	testConsumers := cs.testConsumers
   353  	require.NotEmpty(t, testConsumers)
   354  	l := len(testConsumers)
   355  	oldConsumer := testConsumers[l-1]
   356  	defer oldConsumer.Close()
   357  
   358  	p, err := cs.placementService.Placement()
   359  	require.NoError(t, err)
   360  	zap.L().Sugar().Debugf("old placement: %s", p.String())
   361  
   362  	p, _, err = cs.placementService.ReplaceInstances(
   363  		[]string{oldConsumer.instance.ID()},
   364  		[]placement.Instance{newConsumer.instance},
   365  	)
   366  	require.NoError(t, err)
   367  	zap.L().Sugar().Debugf("new placement: %s", p.String())
   368  	cs.testConsumers[l-1] = newConsumer
   369  }
   370  
   371  func (s *setup) RemoveConsumerService(t *testing.T, idx int) {
   372  	require.True(t, idx < len(s.consumerServices))
   373  	topic, err := s.ts.Get(topicName)
   374  	require.NoError(t, err)
   375  	css := topic.ConsumerServices()
   376  	topic = topic.SetConsumerServices(append(css[:idx], css[idx+1:]...))
   377  	s.ts.CheckAndSet(topic, topic.Version())
   378  	tcss := s.consumerServices
   379  	tcss[idx].Close()
   380  	s.consumerServices = append(tcss[:idx], tcss[idx+1:]...)
   381  }
   382  
   383  func (s *setup) AddConsumerService(t *testing.T, config consumerServiceConfig) {
   384  	cs := newTestConsumerService(t, len(s.consumerServices), config, s.sd, len(s.producers), s.totalConsumed)
   385  	s.consumerServices = append(s.consumerServices, cs)
   386  	topic, err := s.ts.Get(topicName)
   387  	require.NoError(t, err)
   388  	topic = topic.SetConsumerServices(append(topic.ConsumerServices(), cs.consumerService))
   389  	s.ts.CheckAndSet(topic, topic.Version())
   390  }
   391  
   392  type testConsumerService struct {
   393  	sync.Mutex
   394  
   395  	consumed         map[string]struct{}
   396  	sid              services.ServiceID
   397  	placementService placement.Service
   398  	consumerService  topic.ConsumerService
   399  	testConsumers    []*testConsumer
   400  	config           consumerServiceConfig
   401  }
   402  
   403  func (cs *testConsumerService) markConsumed(b []byte) {
   404  	cs.Lock()
   405  	defer cs.Unlock()
   406  
   407  	cs.consumed[string(b)] = struct{}{}
   408  }
   409  
   410  func (cs *testConsumerService) numConsumed() int {
   411  	cs.Lock()
   412  	defer cs.Unlock()
   413  
   414  	return len(cs.consumed)
   415  }
   416  
   417  func (cs *testConsumerService) Close() {
   418  	for _, c := range cs.testConsumers {
   419  		c.Close()
   420  	}
   421  }
   422  
   423  type testConsumer struct {
   424  	sync.RWMutex
   425  
   426  	cs        *testConsumerService
   427  	listener  consumer.Listener
   428  	consumers []consumer.Consumer
   429  	instance  placement.Instance
   430  	consumed  int
   431  	closed    bool
   432  	doneCh    chan struct{}
   433  }
   434  
   435  func (c *testConsumer) Close() {
   436  	c.Lock()
   437  	defer c.Unlock()
   438  
   439  	if c.closed {
   440  		return
   441  	}
   442  	c.closed = true
   443  	c.listener.Close()
   444  	close(c.doneCh)
   445  }
   446  
   447  func (c *testConsumer) numConsumed() int {
   448  	c.Lock()
   449  	defer c.Unlock()
   450  
   451  	return c.consumed
   452  }
   453  
   454  func newTestConsumer(t *testing.T, cs *testConsumerService) *testConsumer {
   455  	consumerListener, err := consumer.NewListener("127.0.0.1:0", testConsumerOptions(t))
   456  	require.NoError(t, err)
   457  
   458  	addr := consumerListener.Addr().String()
   459  	c := &testConsumer{
   460  		cs:       cs,
   461  		listener: consumerListener,
   462  		instance: placement.NewInstance().
   463  			SetID(addr).
   464  			SetEndpoint(addr).
   465  			SetIsolationGroup(addr).
   466  			SetWeight(1),
   467  		consumed: 0,
   468  		closed:   false,
   469  		doneCh:   make(chan struct{}),
   470  	}
   471  	return c
   472  }
   473  
   474  func (c *testConsumer) closeOneConsumer() {
   475  	for {
   476  		c.Lock()
   477  		l := len(c.consumers)
   478  		if l == 0 {
   479  			c.Unlock()
   480  			time.Sleep(200 * time.Millisecond)
   481  			continue
   482  		}
   483  		c.consumers[l-1].Close()
   484  		c.consumers = c.consumers[:l-1]
   485  		c.Unlock()
   486  		break
   487  	}
   488  }
   489  
   490  func (c *testConsumer) consumeAndAck(totalConsumed *atomic.Int64) {
   491  	wp := xsync.NewWorkerPool(numConcurrentMessages)
   492  	wp.Init()
   493  
   494  	go func() {
   495  		for {
   496  			consumer, err := c.listener.Accept()
   497  			if err != nil {
   498  				return
   499  			}
   500  			c.Lock()
   501  			c.consumers = append(c.consumers, consumer)
   502  			c.Unlock()
   503  			go func() {
   504  				for {
   505  					select {
   506  					case <-c.doneCh:
   507  						consumer.Close()
   508  						return
   509  					default:
   510  						msg, err := consumer.Message()
   511  						if err != nil {
   512  							consumer.Close()
   513  							return
   514  						}
   515  						if msg.SentAtNanos() <= 0 {
   516  							panic("sentAtNanos not set")
   517  						}
   518  						wp.Go(
   519  							func() {
   520  								c.Lock()
   521  								if c.closed {
   522  									c.Unlock()
   523  									return
   524  								}
   525  								c.consumed++
   526  								c.Unlock()
   527  								totalConsumed.Inc()
   528  								c.cs.markConsumed(msg.Bytes())
   529  								msg.Ack()
   530  							},
   531  						)
   532  					}
   533  				}
   534  			}()
   535  		}
   536  	}()
   537  }
   538  
   539  func testPlacementService(store kv.Store, sid services.ServiceID, isSharded bool) placement.Service {
   540  	opts := placement.NewOptions().SetShardStateMode(placement.StableShardStateOnly).SetIsSharded(isSharded)
   541  
   542  	return service.NewPlacementService(
   543  		storage.NewPlacementStorage(store, sid.String(), opts),
   544  		service.WithPlacementOptions(opts))
   545  }
   546  
   547  func testProducer(
   548  	t *testing.T,
   549  	cs client.Client,
   550  ) producer.Producer {
   551  	str := `
   552  buffer:
   553    closeCheckInterval: 200ms
   554    cleanupRetry:
   555      initialBackoff: 100ms
   556      maxBackoff: 200ms
   557  writer:
   558    topicName: topicName
   559    topicWatchInitTimeout: 100ms
   560    placementWatchInitTimeout: 100ms
   561    # FIXME: Consumers sharing the same pool trigger false-positives in race detector
   562    messagePool: ~
   563    messageRetry:
   564      initialBackoff: 20ms
   565      maxBackoff: 50ms
   566    messageQueueNewWritesScanInterval: 10ms
   567    messageQueueFullScanInterval: 50ms
   568    closeCheckInterval: 200ms
   569    ackErrorRetry:
   570      initialBackoff: 20ms
   571      maxBackoff: 50ms
   572    connection:
   573      dialTimeout: 500ms
   574      keepAlivePeriod: 2s
   575      retry:
   576        initialBackoff: 20ms
   577        maxBackoff: 50ms
   578      flushInterval: 50ms
   579      writeBufferSize: 4096
   580      resetDelay: 50ms
   581  `
   582  
   583  	var cfg config.ProducerConfiguration
   584  	require.NoError(t, yaml.Unmarshal([]byte(str), &cfg))
   585  
   586  	p, err := cfg.NewProducer(cs, instrument.NewOptions(), xio.NewOptions())
   587  	require.NoError(t, err)
   588  	return p
   589  }
   590  
   591  func testConsumerOptions(t *testing.T) consumer.Options {
   592  	str := `
   593  ackFlushInterval: 100ms
   594  ackBufferSize: 4
   595  connectionWriteBufferSize: 32
   596  `
   597  	var cfg consumer.Configuration
   598  	require.NoError(t, yaml.Unmarshal([]byte(str), &cfg))
   599  
   600  	return cfg.NewOptions(instrument.NewOptions())
   601  }
   602  
   603  func serviceID(id int) services.ServiceID {
   604  	return services.NewServiceID().SetName("serviceName" + strconv.Itoa(id))
   605  }