github.com/m3db/m3@v1.5.0/src/dbnode/integration/index_single_node_high_concurrency_test.go (about) 1 // +build integration 2 // 3 // Copyright (c) 2016 Uber Technologies, Inc. 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in 13 // all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 // THE SOFTWARE. 22 23 package integration 24 25 import ( 26 "fmt" 27 "math/rand" 28 "strconv" 29 "sync" 30 "testing" 31 "time" 32 33 "github.com/m3db/m3/src/dbnode/namespace" 34 "github.com/m3db/m3/src/dbnode/storage" 35 "github.com/m3db/m3/src/dbnode/storage/index" 36 "github.com/m3db/m3/src/m3ninx/idx" 37 xclock "github.com/m3db/m3/src/x/clock" 38 "github.com/m3db/m3/src/x/context" 39 "github.com/m3db/m3/src/x/ident" 40 xsync "github.com/m3db/m3/src/x/sync" 41 xtime "github.com/m3db/m3/src/x/time" 42 43 "github.com/stretchr/testify/assert" 44 "github.com/stretchr/testify/require" 45 "go.uber.org/atomic" 46 "go.uber.org/zap" 47 ) 48 49 func TestIndexSingleNodeHighConcurrencyManyTagsLowCardinality(t *testing.T) { 50 if testing.Short() { 51 t.SkipNow() // Just skip if we're doing a short run 52 } 53 54 testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{ 55 concurrencyEnqueueWorker: 8, 56 concurrencyWrites: 5000, 57 enqueuePerWorker: 100, 58 numTags: 10, 59 }) 60 } 61 62 func TestIndexSingleNodeHighConcurrencyFewTagsHighCardinalityNoSkipWrites(t *testing.T) { 63 if testing.Short() { 64 t.SkipNow() // Just skip if we're doing a short run 65 } 66 67 testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{ 68 concurrencyEnqueueWorker: 8, 69 concurrencyWrites: 5000, 70 enqueuePerWorker: 10000, 71 numTags: 2, 72 }) 73 } 74 75 func TestIndexSingleNodeHighConcurrencyFewTagsHighCardinalitySkipWrites(t *testing.T) { 76 if testing.Short() { 77 t.SkipNow() // Just skip if we're doing a short run 78 } 79 80 testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{ 81 concurrencyEnqueueWorker: 8, 82 concurrencyWrites: 5000, 83 enqueuePerWorker: 10000, 84 numTags: 2, 85 skipWrites: true, 86 }) 87 } 88 89 func TestIndexSingleNodeHighConcurrencyFewTagsHighCardinalityQueryDuringWrites(t *testing.T) { 90 if testing.Short() { 91 t.SkipNow() // Just skip if we're doing a short run 92 } 93 94 testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{ 95 concurrencyEnqueueWorker: 8, 96 concurrencyWrites: 5000, 97 enqueuePerWorker: 100000, 98 numTags: 2, 99 concurrencyQueryDuringWrites: 16, 100 concurrencyQueryDuringWritesType: indexQuery, 101 skipVerify: true, 102 }) 103 } 104 105 func TestIndexSingleNodeHighConcurrencyFewTagsHighCardinalityAggregateQueryDuringWrites(t *testing.T) { 106 if testing.Short() { 107 t.SkipNow() // Just skip if we're doing a short run 108 } 109 110 testIndexSingleNodeHighConcurrency(t, testIndexHighConcurrencyOptions{ 111 concurrencyEnqueueWorker: 8, 112 concurrencyWrites: 5000, 113 enqueuePerWorker: 100000, 114 numTags: 2, 115 concurrencyQueryDuringWrites: 1, 116 concurrencyQueryDuringWritesType: indexAggregateQuery, 117 skipVerify: true, 118 }) 119 } 120 121 type queryType uint 122 123 const ( 124 indexQuery queryType = iota 125 indexAggregateQuery 126 ) 127 128 type testIndexHighConcurrencyOptions struct { 129 concurrencyEnqueueWorker int 130 concurrencyWrites int 131 enqueuePerWorker int 132 numTags int 133 134 // skipWrites will mix in skipped to make sure 135 // it doesn't interrupt the regular real-time ingestion pipeline. 136 skipWrites bool 137 138 // concurrencyQueryDuringWrites will issue queries while we 139 // are performing writes. 140 concurrencyQueryDuringWrites int 141 142 // concurrencyQueryDuringWritesType determines the type of queries 143 // to issue performing writes. 144 concurrencyQueryDuringWritesType queryType 145 146 // skipVerify will skip verifying the actual series were indexed 147 // which is useful if just sanity checking can write/read concurrently 148 // without issue/errors and the stats look good. 149 skipVerify bool 150 } 151 152 func testIndexSingleNodeHighConcurrency( 153 t *testing.T, 154 opts testIndexHighConcurrencyOptions, 155 ) { 156 // Test setup 157 md, err := namespace.NewMetadata(testNamespaces[0], 158 namespace.NewOptions(). 159 SetRetentionOptions(DefaultIntegrationTestRetentionOpts). 160 SetCleanupEnabled(false). 161 SetSnapshotEnabled(false). 162 SetFlushEnabled(false). 163 SetColdWritesEnabled(true). 164 SetIndexOptions(namespace.NewIndexOptions().SetEnabled(true))) 165 require.NoError(t, err) 166 167 testOpts := NewTestOptions(t). 168 SetNamespaces([]namespace.Metadata{md}). 169 SetWriteNewSeriesAsync(true). 170 // Use default time functions (server time not frozen). 171 SetNowFn(time.Now) 172 testSetup, err := NewTestSetup(t, testOpts, nil, 173 func(s storage.Options) storage.Options { 174 if opts.skipWrites { 175 return s.SetDoNotIndexWithFieldsMap(map[string]string{"skip": "true"}) 176 } 177 return s 178 }) 179 require.NoError(t, err) 180 defer testSetup.Close() 181 182 // Start the server 183 log := testSetup.StorageOpts().InstrumentOptions().Logger() 184 require.NoError(t, testSetup.StartServer()) 185 186 // Stop the server 187 defer func() { 188 require.NoError(t, testSetup.StopServer()) 189 log.Debug("server is now down") 190 }() 191 192 client := testSetup.M3DBClient() 193 session, err := client.DefaultSession() 194 require.NoError(t, err) 195 196 var ( 197 wg sync.WaitGroup 198 numTotalErrors = atomic.NewUint32(0) 199 numTotalSuccess = atomic.NewUint32(0) 200 ) 201 nowFn := testSetup.DB().Options().ClockOptions().NowFn() 202 start := time.Now() 203 log.Info("starting data write", 204 zap.Time("serverTime", nowFn())) 205 206 workerPool := xsync.NewWorkerPool(opts.concurrencyWrites) 207 workerPool.Init() 208 209 for i := 0; i < opts.concurrencyEnqueueWorker; i++ { 210 i := i 211 wg.Add(1) 212 go func() { 213 defer wg.Done() 214 215 for j := 0; j < opts.enqueuePerWorker; j++ { 216 j := j 217 wg.Add(1) 218 workerPool.Go(func() { 219 defer wg.Done() 220 221 var genOpts []genIDTagsOption 222 if opts.skipWrites && j%2 == 0 { 223 genOpts = append(genOpts, genIDTagsOption(func(t ident.Tags) ident.Tags { 224 t.Append(ident.Tag{ 225 Name: ident.StringID("skip"), 226 Value: ident.StringID("true"), 227 }) 228 return t 229 })) 230 } 231 232 id, tags := genIDTags(i, j, opts.numTags, genOpts...) 233 timestamp := xtime.Now() 234 err := session.WriteTagged(md.ID(), id, tags, 235 timestamp, float64(j), xtime.Second, nil) 236 if err != nil { 237 if n := numTotalErrors.Inc(); n < 10 { 238 // Log the first 10 errors for visibility but not flood. 239 log.Error("sampled write error", zap.Error(err)) 240 } 241 } else { 242 numTotalSuccess.Inc() 243 } 244 }) 245 } 246 }() 247 } 248 249 // If concurrent query load enabled while writing also hit with queries. 250 queryConcDuringWritesCloseCh := make(chan struct{}, 1) 251 numTotalQueryMatches := atomic.NewUint32(0) 252 numTotalQueryErrors := atomic.NewUint32(0) 253 checkNumTotalQueryMatches := false 254 if opts.concurrencyQueryDuringWrites == 0 { 255 log.Info("no concurrent queries during writes configured") 256 } else { 257 log.Info("starting concurrent queries during writes", 258 zap.Int("concurrency", opts.concurrencyQueryDuringWrites)) 259 checkNumTotalQueryMatches = true 260 for i := 0; i < opts.concurrencyQueryDuringWrites; i++ { 261 i := i 262 go func() { 263 src := rand.NewSource(int64(i)) 264 rng := rand.New(src) 265 for { 266 select { 267 case <-queryConcDuringWritesCloseCh: 268 return 269 default: 270 } 271 272 switch opts.concurrencyQueryDuringWritesType { 273 case indexQuery: 274 randI := rng.Intn(opts.concurrencyEnqueueWorker) 275 randJ := rng.Intn(opts.enqueuePerWorker) 276 id, tags := genIDTags(randI, randJ, opts.numTags) 277 ok, err := isIndexedChecked(t, session, md.ID(), id, tags) 278 if err != nil { 279 if n := numTotalQueryErrors.Inc(); n < 10 { 280 // Log the first 10 errors for visibility but not flood. 281 log.Error("sampled query error", zap.Error(err)) 282 } 283 } 284 if ok { 285 numTotalQueryMatches.Inc() 286 } 287 case indexAggregateQuery: 288 randI := rng.Intn(opts.concurrencyEnqueueWorker) 289 match := idx.NewTermQuery([]byte("common_i"), []byte(strconv.Itoa(randI))) 290 q := index.Query{Query: match} 291 292 now := xtime.Now() 293 qOpts := index.AggregationOptions{ 294 QueryOptions: index.QueryOptions{ 295 StartInclusive: now.Add(-md.Options().RetentionOptions().RetentionPeriod()), 296 EndExclusive: now, 297 DocsLimit: 1000, 298 }, 299 } 300 301 ctx := context.NewBackground() 302 r, err := testSetup.DB().AggregateQuery(ctx, md.ID(), q, qOpts) 303 if err != nil { 304 panic(err) 305 } 306 307 tagValues := 0 308 for _, entry := range r.Results.Map().Iter() { 309 values := entry.Value() 310 tagValues += values.Size() 311 } 312 313 // Done with resources, return to pool. 314 ctx.Close() 315 316 numTotalQueryMatches.Add(uint32(tagValues)) 317 default: 318 panic("unknown query type") 319 } 320 } 321 }() 322 } 323 } 324 325 // Wait for writes to at least be enqueued. 326 wg.Wait() 327 328 // Check no write errors. 329 require.Equal(t, int(0), int(numTotalErrors.Load())) 330 331 if checkNumTotalQueryMatches { 332 // Check matches. 333 require.True(t, numTotalQueryMatches.Load() > 0, "no query matches") 334 } 335 336 log.Info("test data written", 337 zap.Duration("took", time.Since(start)), 338 zap.Int("written", int(numTotalSuccess.Load())), 339 zap.Time("serverTime", nowFn()), 340 zap.Uint32("queryMatches", numTotalQueryMatches.Load())) 341 342 log.Info("data indexing verify start") 343 344 // Wait for at least all things to be enqueued for indexing. 345 expectStatPrefix := "dbindex.index-attempt+namespace=testNs1," 346 expectStatProcess := expectStatPrefix + "stage=process" 347 numIndexTotal := opts.enqueuePerWorker 348 multiplyByConcurrency := multiplyBy(opts.concurrencyEnqueueWorker) 349 expectNumIndex := multiplyByConcurrency(numIndexTotal) 350 indexProcess := xclock.WaitUntil(func() bool { 351 counters := testSetup.Scope().Snapshot().Counters() 352 counter, ok := counters[expectStatProcess] 353 if !ok { 354 return false 355 } 356 return int(counter.Value()) == expectNumIndex 357 }, time.Minute*5) 358 359 counters := testSetup.Scope().Snapshot().Counters() 360 counter, ok := counters[expectStatProcess] 361 362 var value int 363 if ok { 364 value = int(counter.Value()) 365 } 366 assert.True(t, indexProcess, 367 fmt.Sprintf("timeout waiting for index to process: expected to index %d but only processed %d", 368 expectNumIndex, value)) 369 370 // Allow concurrent query during writes to finish. 371 close(queryConcDuringWritesCloseCh) 372 373 // Check no query errors. 374 require.Equal(t, int(0), int(numTotalErrors.Load())) 375 376 if !opts.skipVerify { 377 log.Info("data indexing each series visible start") 378 // Now check all of them are individually indexed. 379 var ( 380 fetchWg sync.WaitGroup 381 notIndexedErrs []error 382 notIndexedLock sync.Mutex 383 ) 384 for i := 0; i < opts.concurrencyEnqueueWorker; i++ { 385 fetchWg.Add(1) 386 i := i 387 go func() { 388 defer fetchWg.Done() 389 390 for j := 0; j < opts.enqueuePerWorker; j++ { 391 if opts.skipWrites && j%2 == 0 { 392 continue // not meant to be indexed. 393 } 394 395 j := j 396 fetchWg.Add(1) 397 workerPool.Go(func() { 398 defer fetchWg.Done() 399 400 id, tags := genIDTags(i, j, opts.numTags) 401 indexed := xclock.WaitUntil(func() bool { 402 found := isIndexed(t, session, md.ID(), id, tags) 403 return found 404 }, 30*time.Second) 405 if !indexed { 406 err := fmt.Errorf("not indexed series: i=%d, j=%d", i, j) 407 notIndexedLock.Lock() 408 notIndexedErrs = append(notIndexedErrs, err) 409 notIndexedLock.Unlock() 410 } 411 }) 412 } 413 }() 414 } 415 fetchWg.Wait() 416 417 require.Equal(t, 0, len(notIndexedErrs), 418 fmt.Sprintf("not indexed errors: %v", notIndexedErrs[:min(5, len(notIndexedErrs))])) 419 } 420 421 log.Info("data indexing verify done", zap.Duration("took", time.Since(start))) 422 423 // Make sure attempted total indexing = skipped + written. 424 counters = testSetup.Scope().Snapshot().Counters() 425 totalSkippedWritten := 0 426 for _, expectID := range []string{ 427 expectStatPrefix + "stage=skip", 428 expectStatPrefix + "stage=write", 429 } { 430 actual, ok := counters[expectID] 431 assert.True(t, ok, 432 fmt.Sprintf("counter not found to test value: id=%s", expectID)) 433 if ok { 434 totalSkippedWritten += int(actual.Value()) 435 } 436 } 437 438 log.Info("check written + skipped", 439 zap.Int("expectedValue", multiplyByConcurrency(numIndexTotal)), 440 zap.Int("actualValue", totalSkippedWritten)) 441 assert.Equal(t, multiplyByConcurrency(numIndexTotal), totalSkippedWritten, 442 "total written + skipped mismatch") 443 } 444 445 func multiplyBy(n int) func(int) int { 446 return func(x int) int { 447 return n * x 448 } 449 } 450 451 func min(x, y int) int { 452 if x < y { 453 return x 454 } 455 return y 456 }