github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/index_single_node_high_concurrency_test.go (about)

     1  // +build integration
     2  //
     3  // Copyright (c) 2016 Uber Technologies, Inc.
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be included in
    13  // all copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  // THE SOFTWARE.
    22  
    23  package integration
    24  
    25  import (
    26  	"fmt"
    27  	"math/rand"
    28  	"strconv"
    29  	"sync"
    30  	"testing"
    31  	"time"
    32  
    33  	"github.com/m3db/m3/src/dbnode/namespace"
    34  	"github.com/m3db/m3/src/dbnode/storage"
    35  	"github.com/m3db/m3/src/dbnode/storage/index"
    36  	"github.com/m3db/m3/src/m3ninx/idx"
    37  	xclock "github.com/m3db/m3/src/x/clock"
    38  	"github.com/m3db/m3/src/x/context"
    39  	"github.com/m3db/m3/src/x/ident"
    40  	xsync "github.com/m3db/m3/src/x/sync"
    41  	xtime "github.com/m3db/m3/src/x/time"
    42  
    43  	"github.com/stretchr/testify/assert"
    44  	"github.com/stretchr/testify/require"
    45  	"go.uber.org/atomic"
    46  	"go.uber.org/zap"
    47  )
    48  
    49  func TestIndexSingleNodeHighConcurrencyManyTagsLowCardinality(t *testing.T) {
    50  	if testing.Short() {
    51  		t.SkipNow() // Just skip if we're doing a short run
    52  	}
    53  
    54  	testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{
    55  		concurrencyEnqueueWorker: 8,
    56  		concurrencyWrites:        5000,
    57  		enqueuePerWorker:         100,
    58  		numTags:                  10,
    59  	})
    60  }
    61  
    62  func TestIndexSingleNodeHighConcurrencyFewTagsHighCardinalityNoSkipWrites(t *testing.T) {
    63  	if testing.Short() {
    64  		t.SkipNow() // Just skip if we're doing a short run
    65  	}
    66  
    67  	testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{
    68  		concurrencyEnqueueWorker: 8,
    69  		concurrencyWrites:        5000,
    70  		enqueuePerWorker:         10000,
    71  		numTags:                  2,
    72  	})
    73  }
    74  
    75  func TestIndexSingleNodeHighConcurrencyFewTagsHighCardinalitySkipWrites(t *testing.T) {
    76  	if testing.Short() {
    77  		t.SkipNow() // Just skip if we're doing a short run
    78  	}
    79  
    80  	testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{
    81  		concurrencyEnqueueWorker: 8,
    82  		concurrencyWrites:        5000,
    83  		enqueuePerWorker:         10000,
    84  		numTags:                  2,
    85  		skipWrites:               true,
    86  	})
    87  }
    88  
    89  func TestIndexSingleNodeHighConcurrencyFewTagsHighCardinalityQueryDuringWrites(t *testing.T) {
    90  	if testing.Short() {
    91  		t.SkipNow() // Just skip if we're doing a short run
    92  	}
    93  
    94  	testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{
    95  		concurrencyEnqueueWorker:         8,
    96  		concurrencyWrites:                5000,
    97  		enqueuePerWorker:                 100000,
    98  		numTags:                          2,
    99  		concurrencyQueryDuringWrites:     16,
   100  		concurrencyQueryDuringWritesType: indexQuery,
   101  		skipVerify:                       true,
   102  	})
   103  }
   104  
   105  func TestIndexSingleNodeHighConcurrencyFewTagsHighCardinalityAggregateQueryDuringWrites(t *testing.T) {
   106  	if testing.Short() {
   107  		t.SkipNow() // Just skip if we're doing a short run
   108  	}
   109  
   110  	testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{
   111  		concurrencyEnqueueWorker:         8,
   112  		concurrencyWrites:                5000,
   113  		enqueuePerWorker:                 100000,
   114  		numTags:                          2,
   115  		concurrencyQueryDuringWrites:     1,
   116  		concurrencyQueryDuringWritesType: indexAggregateQuery,
   117  		skipVerify:                       true,
   118  	})
   119  }
   120  
   121  type queryType uint
   122  
   123  const (
   124  	indexQuery queryType = iota
   125  	indexAggregateQuery
   126  )
   127  
   128  type testIndexHighConcurrencyOptions struct {
   129  	concurrencyEnqueueWorker int
   130  	concurrencyWrites        int
   131  	enqueuePerWorker         int
   132  	numTags                  int
   133  
   134  	// skipWrites will mix in skipped to make sure
   135  	// it doesn't interrupt the regular real-time ingestion pipeline.
   136  	skipWrites bool
   137  
   138  	// concurrencyQueryDuringWrites will issue queries while we
   139  	// are performing writes.
   140  	concurrencyQueryDuringWrites int
   141  
   142  	// concurrencyQueryDuringWritesType determines the type of queries
   143  	// to issue performing writes.
   144  	concurrencyQueryDuringWritesType queryType
   145  
   146  	// skipVerify will skip verifying the actual series were indexed
   147  	// which is useful if just sanity checking can write/read concurrently
   148  	// without issue/errors and the stats look good.
   149  	skipVerify bool
   150  }
   151  
   152  func testIndexSingleNodeHighConcurrency(
   153  	t *testing.T,
   154  	opts testIndexHighConcurrencyOptions,
   155  ) {
   156  	// Test setup
   157  	md, err := namespace.NewMetadata(testNamespaces[0],
   158  		namespace.NewOptions().
   159  			SetRetentionOptions(DefaultIntegrationTestRetentionOpts).
   160  			SetCleanupEnabled(false).
   161  			SetSnapshotEnabled(false).
   162  			SetFlushEnabled(false).
   163  			SetColdWritesEnabled(true).
   164  			SetIndexOptions(namespace.NewIndexOptions().SetEnabled(true)))
   165  	require.NoError(t, err)
   166  
   167  	testOpts := NewTestOptions(t).
   168  		SetNamespaces([]namespace.Metadata{md}).
   169  		SetWriteNewSeriesAsync(true).
   170  		// Use default time functions (server time not frozen).
   171  		SetNowFn(time.Now)
   172  	testSetup, err := NewTestSetup(t, testOpts, nil,
   173  		func(s storage.Options) storage.Options {
   174  			if opts.skipWrites {
   175  				return s.SetDoNotIndexWithFieldsMap(map[string]string{"skip": "true"})
   176  			}
   177  			return s
   178  		})
   179  	require.NoError(t, err)
   180  	defer testSetup.Close()
   181  
   182  	// Start the server
   183  	log := testSetup.StorageOpts().InstrumentOptions().Logger()
   184  	require.NoError(t, testSetup.StartServer())
   185  
   186  	// Stop the server
   187  	defer func() {
   188  		require.NoError(t, testSetup.StopServer())
   189  		log.Debug("server is now down")
   190  	}()
   191  
   192  	client := testSetup.M3DBClient()
   193  	session, err := client.DefaultSession()
   194  	require.NoError(t, err)
   195  
   196  	var (
   197  		wg              sync.WaitGroup
   198  		numTotalErrors  = atomic.NewUint32(0)
   199  		numTotalSuccess = atomic.NewUint32(0)
   200  	)
   201  	nowFn := testSetup.DB().Options().ClockOptions().NowFn()
   202  	start := time.Now()
   203  	log.Info("starting data write",
   204  		zap.Time("serverTime", nowFn()))
   205  
   206  	workerPool := xsync.NewWorkerPool(opts.concurrencyWrites)
   207  	workerPool.Init()
   208  
   209  	for i := 0; i < opts.concurrencyEnqueueWorker; i++ {
   210  		i := i
   211  		wg.Add(1)
   212  		go func() {
   213  			defer wg.Done()
   214  
   215  			for j := 0; j < opts.enqueuePerWorker; j++ {
   216  				j := j
   217  				wg.Add(1)
   218  				workerPool.Go(func() {
   219  					defer wg.Done()
   220  
   221  					var genOpts []genIDTagsOption
   222  					if opts.skipWrites && j%2 == 0 {
   223  						genOpts = append(genOpts, genIDTagsOption(func(t ident.Tags) ident.Tags {
   224  							t.Append(ident.Tag{
   225  								Name:  ident.StringID("skip"),
   226  								Value: ident.StringID("true"),
   227  							})
   228  							return t
   229  						}))
   230  					}
   231  
   232  					id, tags := genIDTags(i, j, opts.numTags, genOpts...)
   233  					timestamp := xtime.Now()
   234  					err := session.WriteTagged(md.ID(), id, tags,
   235  						timestamp, float64(j), xtime.Second, nil)
   236  					if err != nil {
   237  						if n := numTotalErrors.Inc(); n < 10 {
   238  							// Log the first 10 errors for visibility but not flood.
   239  							log.Error("sampled write error", zap.Error(err))
   240  						}
   241  					} else {
   242  						numTotalSuccess.Inc()
   243  					}
   244  				})
   245  			}
   246  		}()
   247  	}
   248  
   249  	// If concurrent query load enabled while writing also hit with queries.
   250  	queryConcDuringWritesCloseCh := make(chan struct{}, 1)
   251  	numTotalQueryMatches := atomic.NewUint32(0)
   252  	numTotalQueryErrors := atomic.NewUint32(0)
   253  	checkNumTotalQueryMatches := false
   254  	if opts.concurrencyQueryDuringWrites == 0 {
   255  		log.Info("no concurrent queries during writes configured")
   256  	} else {
   257  		log.Info("starting concurrent queries during writes",
   258  			zap.Int("concurrency", opts.concurrencyQueryDuringWrites))
   259  		checkNumTotalQueryMatches = true
   260  		for i := 0; i < opts.concurrencyQueryDuringWrites; i++ {
   261  			i := i
   262  			go func() {
   263  				src := rand.NewSource(int64(i))
   264  				rng := rand.New(src)
   265  				for {
   266  					select {
   267  					case <-queryConcDuringWritesCloseCh:
   268  						return
   269  					default:
   270  					}
   271  
   272  					switch opts.concurrencyQueryDuringWritesType {
   273  					case indexQuery:
   274  						randI := rng.Intn(opts.concurrencyEnqueueWorker)
   275  						randJ := rng.Intn(opts.enqueuePerWorker)
   276  						id, tags := genIDTags(randI, randJ, opts.numTags)
   277  						ok, err := isIndexedChecked(t, session, md.ID(), id, tags)
   278  						if err != nil {
   279  							if n := numTotalQueryErrors.Inc(); n < 10 {
   280  								// Log the first 10 errors for visibility but not flood.
   281  								log.Error("sampled query error", zap.Error(err))
   282  							}
   283  						}
   284  						if ok {
   285  							numTotalQueryMatches.Inc()
   286  						}
   287  					case indexAggregateQuery:
   288  						randI := rng.Intn(opts.concurrencyEnqueueWorker)
   289  						match := idx.NewTermQuery([]byte("common_i"), []byte(strconv.Itoa(randI)))
   290  						q := index.Query{Query: match}
   291  
   292  						now := xtime.Now()
   293  						qOpts := index.AggregationOptions{
   294  							QueryOptions: index.QueryOptions{
   295  								StartInclusive: now.Add(-md.Options().RetentionOptions().RetentionPeriod()),
   296  								EndExclusive:   now,
   297  								DocsLimit:      1000,
   298  							},
   299  						}
   300  
   301  						ctx := context.NewBackground()
   302  						r, err := testSetup.DB().AggregateQuery(ctx, md.ID(), q, qOpts)
   303  						if err != nil {
   304  							panic(err)
   305  						}
   306  
   307  						tagValues := 0
   308  						for _, entry := range r.Results.Map().Iter() {
   309  							values := entry.Value()
   310  							tagValues += values.Size()
   311  						}
   312  
   313  						// Done with resources, return to pool.
   314  						ctx.Close()
   315  
   316  						numTotalQueryMatches.Add(uint32(tagValues))
   317  					default:
   318  						panic("unknown query type")
   319  					}
   320  				}
   321  			}()
   322  		}
   323  	}
   324  
   325  	// Wait for writes to at least be enqueued.
   326  	wg.Wait()
   327  
   328  	// Check no write errors.
   329  	require.Equal(t, int(0), int(numTotalErrors.Load()))
   330  
   331  	if checkNumTotalQueryMatches {
   332  		// Check matches.
   333  		require.True(t, numTotalQueryMatches.Load() > 0, "no query matches")
   334  	}
   335  
   336  	log.Info("test data written",
   337  		zap.Duration("took", time.Since(start)),
   338  		zap.Int("written", int(numTotalSuccess.Load())),
   339  		zap.Time("serverTime", nowFn()),
   340  		zap.Uint32("queryMatches", numTotalQueryMatches.Load()))
   341  
   342  	log.Info("data indexing verify start")
   343  
   344  	// Wait for at least all things to be enqueued for indexing.
   345  	expectStatPrefix := "dbindex.index-attempt+namespace=testNs1,"
   346  	expectStatProcess := expectStatPrefix + "stage=process"
   347  	numIndexTotal := opts.enqueuePerWorker
   348  	multiplyByConcurrency := multiplyBy(opts.concurrencyEnqueueWorker)
   349  	expectNumIndex := multiplyByConcurrency(numIndexTotal)
   350  	indexProcess := xclock.WaitUntil(func() bool {
   351  		counters := testSetup.Scope().Snapshot().Counters()
   352  		counter, ok := counters[expectStatProcess]
   353  		if !ok {
   354  			return false
   355  		}
   356  		return int(counter.Value()) == expectNumIndex
   357  	}, time.Minute*5)
   358  
   359  	counters := testSetup.Scope().Snapshot().Counters()
   360  	counter, ok := counters[expectStatProcess]
   361  
   362  	var value int
   363  	if ok {
   364  		value = int(counter.Value())
   365  	}
   366  	assert.True(t, indexProcess,
   367  		fmt.Sprintf("timeout waiting for index to process: expected to index %d but only processed %d",
   368  			expectNumIndex, value))
   369  
   370  	// Allow concurrent query during writes to finish.
   371  	close(queryConcDuringWritesCloseCh)
   372  
   373  	// Check no query errors.
   374  	require.Equal(t, int(0), int(numTotalErrors.Load()))
   375  
   376  	if !opts.skipVerify {
   377  		log.Info("data indexing each series visible start")
   378  		// Now check all of them are individually indexed.
   379  		var (
   380  			fetchWg        sync.WaitGroup
   381  			notIndexedErrs []error
   382  			notIndexedLock sync.Mutex
   383  		)
   384  		for i := 0; i < opts.concurrencyEnqueueWorker; i++ {
   385  			fetchWg.Add(1)
   386  			i := i
   387  			go func() {
   388  				defer fetchWg.Done()
   389  
   390  				for j := 0; j < opts.enqueuePerWorker; j++ {
   391  					if opts.skipWrites && j%2 == 0 {
   392  						continue // not meant to be indexed.
   393  					}
   394  
   395  					j := j
   396  					fetchWg.Add(1)
   397  					workerPool.Go(func() {
   398  						defer fetchWg.Done()
   399  
   400  						id, tags := genIDTags(i, j, opts.numTags)
   401  						indexed := xclock.WaitUntil(func() bool {
   402  							found := isIndexed(t, session, md.ID(), id, tags)
   403  							return found
   404  						}, 30*time.Second)
   405  						if !indexed {
   406  							err := fmt.Errorf("not indexed series: i=%d, j=%d", i, j)
   407  							notIndexedLock.Lock()
   408  							notIndexedErrs = append(notIndexedErrs, err)
   409  							notIndexedLock.Unlock()
   410  						}
   411  					})
   412  				}
   413  			}()
   414  		}
   415  		fetchWg.Wait()
   416  
   417  		require.Equal(t, 0, len(notIndexedErrs),
   418  			fmt.Sprintf("not indexed errors: %v", notIndexedErrs[:min(5, len(notIndexedErrs))]))
   419  	}
   420  
   421  	log.Info("data indexing verify done", zap.Duration("took", time.Since(start)))
   422  
   423  	// Make sure attempted total indexing = skipped + written.
   424  	counters = testSetup.Scope().Snapshot().Counters()
   425  	totalSkippedWritten := 0
   426  	for _, expectID := range []string{
   427  		expectStatPrefix + "stage=skip",
   428  		expectStatPrefix + "stage=write",
   429  	} {
   430  		actual, ok := counters[expectID]
   431  		assert.True(t, ok,
   432  			fmt.Sprintf("counter not found to test value: id=%s", expectID))
   433  		if ok {
   434  			totalSkippedWritten += int(actual.Value())
   435  		}
   436  	}
   437  
   438  	log.Info("check written + skipped",
   439  		zap.Int("expectedValue", multiplyByConcurrency(numIndexTotal)),
   440  		zap.Int("actualValue", totalSkippedWritten))
   441  	assert.Equal(t, multiplyByConcurrency(numIndexTotal), totalSkippedWritten,
   442  		"total written + skipped mismatch")
   443  }
   444  
   445  func multiplyBy(n int) func(int) int {
   446  	return func(x int) int {
   447  		return n * x
   448  	}
   449  }
   450  
   451  func min(x, y int) int {
   452  	if x < y {
   453  		return x
   454  	}
   455  	return y
   456  }