github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/graphite_find_test.go (about) 1 //go:build integration 2 // +build integration 3 4 // Copyright (c) 2021 Uber Technologies, Inc. 5 // 6 // Permission is hereby granted, free of charge, to any person obtaining a copy 7 // of this software and associated documentation files (the "Software"), to deal 8 // in the Software without restriction, including without limitation the rights 9 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 // copies of the Software, and to permit persons to whom the Software is 11 // furnished to do so, subject to the following conditions: 12 // 13 // The above copyright notice and this permission notice shall be included in 14 // all copies or substantial portions of the Software. 15 // 16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 // THE SOFTWARE. 23 24 package integration 25 26 import ( 27 "context" 28 "encoding/json" 29 "fmt" 30 "math/rand" 31 "net/http" 32 "net/url" 33 "reflect" 34 "runtime" 35 "sort" 36 "strings" 37 "sync" 38 "testing" 39 "time" 40 41 // nolint: gci 42 "github.com/stretchr/testify/assert" 43 "github.com/stretchr/testify/require" 44 "go.uber.org/atomic" 45 "go.uber.org/zap" 46 47 "github.com/m3db/m3/src/dbnode/integration/generate" 48 "github.com/m3db/m3/src/dbnode/namespace" 49 "github.com/m3db/m3/src/dbnode/retention" 50 graphitehandler "github.com/m3db/m3/src/query/api/v1/handler/graphite" 51 "github.com/m3db/m3/src/query/graphite/graphite" 52 "github.com/m3db/m3/src/x/headers" 53 "github.com/m3db/m3/src/x/ident" 54 xhttp "github.com/m3db/m3/src/x/net/http" 55 xsync "github.com/m3db/m3/src/x/sync" 56 xtest "github.com/m3db/m3/src/x/test" 57 ) 58 59 type testGraphiteFindDatasetSize uint 60 61 const ( 62 smallDatasetSize testGraphiteFindDatasetSize = iota 63 mediumDatasetSize 64 largeDatasetSize 65 ) 66 67 type testGraphiteFindOptions struct { 68 checkConcurrency int 69 datasetSize testGraphiteFindDatasetSize 70 checkLimit bool 71 } 72 73 func TestGraphiteFindSequential(t *testing.T) { 74 // NB(rob): We need to investigate why using high concurrency (and hence 75 // need to use small dataset size since otherwise verification takes 76 // forever) encounters errors running on CI. 77 testGraphiteFind(t, testGraphiteFindOptions{ 78 checkConcurrency: 1, 79 datasetSize: mediumDatasetSize, 80 }) 81 } 82 83 func TestGraphiteFindParallel(t *testing.T) { 84 // Skip until investigation of why check concurrency encounters errors on CI. 85 t.SkipNow() 86 testGraphiteFind(t, testGraphiteFindOptions{ 87 checkConcurrency: runtime.NumCPU(), 88 datasetSize: largeDatasetSize, 89 }) 90 } 91 92 func TestGraphiteFindLimits(t *testing.T) { 93 testGraphiteFind(t, testGraphiteFindOptions{ 94 checkConcurrency: 1, 95 datasetSize: smallDatasetSize, 96 checkLimit: true, 97 }) 98 } 99 100 func testGraphiteFind(tt *testing.T, testOpts testGraphiteFindOptions) { 101 if testing.Short() { 102 tt.SkipNow() // Just skip if we're doing a short run 103 } 104 105 // Make sure that parallel assertions fail test immediately 106 // by using a TestingT that panics when FailNow is called. 107 t := xtest.FailNowPanicsTestingT(tt) 108 109 queryConfigYAML := ` 110 listenAddress: 127.0.0.1:7201 111 112 logging: 113 level: info 114 115 metrics: 116 scope: 117 prefix: "coordinator" 118 prometheus: 119 handlerPath: /metrics 120 listenAddress: "127.0.0.1:0" 121 sanitization: prometheus 122 samplingRate: 1.0 123 124 local: 125 namespaces: 126 - namespace: default 127 type: unaggregated 128 retention: 12h 129 - namespace: testns 130 type: aggregated 131 retention: 12h 132 resolution: 1m 133 ` 134 135 if testOpts.checkLimit { 136 queryConfigYAML += ` 137 carbon: 138 limitsFind: 139 perQuery: 140 maxFetchedSeries: 10 141 instanceMultiple: 2 142 maxFetchedRange: 2h 143 requireExhaustive: false 144 ` 145 } 146 147 var ( 148 blockSize = 2 * time.Hour 149 retentionPeriod = 6 * blockSize 150 rOpts = retention.NewOptions(). 151 SetRetentionPeriod(retentionPeriod). 152 SetBlockSize(blockSize) 153 idxOpts = namespace.NewIndexOptions(). 154 SetEnabled(true). 155 SetBlockSize(2 * blockSize) 156 nOpts = namespace.NewOptions(). 157 SetRetentionOptions(rOpts). 158 SetIndexOptions(idxOpts) 159 ) 160 ns, err := namespace.NewMetadata(ident.StringID("testns"), nOpts) 161 require.NoError(t, err) 162 163 opts := NewTestOptions(tt). 164 SetNamespaces([]namespace.Metadata{ns}) 165 166 // Test setup. 167 setup, err := NewTestSetup(tt, opts, nil) 168 require.NoError(t, err) 169 defer setup.Close() 170 171 log := setup.StorageOpts().InstrumentOptions().Logger(). 172 With(zap.String("ns", ns.ID().String())) 173 174 require.NoError(t, setup.InitializeBootstrappers(InitializeBootstrappersOptions{ 175 WithFileSystem: true, 176 })) 177 178 // Write test data. 179 now := setup.NowFn()() 180 181 // Create graphite node tree for tests. 182 var ( 183 // nolint: gosec 184 randConstSeedSrc = rand.NewSource(123456789) 185 // nolint: gosec 186 randGen = rand.New(randConstSeedSrc) 187 rootNode = &graphiteNode{} 188 buildNodes func(node *graphiteNode, level int) 189 generateSeries []generate.Series 190 levels int 191 entriesPerLevelMin int 192 entriesPerLevelMax int 193 ) 194 switch testOpts.datasetSize { 195 case smallDatasetSize: 196 levels = 2 197 entriesPerLevelMin = 12 198 entriesPerLevelMax = 15 199 case mediumDatasetSize: 200 levels = 4 201 entriesPerLevelMin = 5 202 entriesPerLevelMax = 7 203 case largeDatasetSize: 204 // Ideally we'd always use a large dataset size, however you do need 205 // high concurrency to validate this entire dataset and CI can't seem 206 // to handle high concurrency without encountering errors. 207 levels = 5 208 entriesPerLevelMin = 6 209 entriesPerLevelMax = 9 210 default: 211 require.FailNow(t, fmt.Sprintf("invalid test dataset size set: %d", testOpts.datasetSize)) 212 } 213 214 buildNodes = func(node *graphiteNode, level int) { 215 entries := entriesPerLevelMin + 216 randGen.Intn(entriesPerLevelMax-entriesPerLevelMin) 217 for entry := 0; entry < entries; entry++ { 218 name := fmt.Sprintf("lvl%02d_entry%02d", level, entry) 219 220 // Create a directory node and spawn more underneath. 221 if nextLevel := level + 1; nextLevel <= levels { 222 childDir := node.child(name+"_dir", graphiteNodeChildOptions{ 223 isLeaf: false, 224 }) 225 buildNodes(childDir, nextLevel) 226 } 227 228 // Create a leaf node. 229 childLeaf := node.child(name+"_leaf", graphiteNodeChildOptions{ 230 isLeaf: true, 231 }) 232 233 // Create series to generate data for the leaf node. 234 tags := make([]ident.Tag, 0, len(childLeaf.pathParts)) 235 for i, pathPartValue := range childLeaf.pathParts { 236 tags = append(tags, ident.Tag{ 237 Name: graphite.TagNameID(i), 238 Value: ident.StringID(pathPartValue), 239 }) 240 } 241 series := generate.Series{ 242 ID: ident.StringID(strings.Join(childLeaf.pathParts, ".")), 243 Tags: ident.NewTags(tags...), 244 } 245 generateSeries = append(generateSeries, series) 246 } 247 } 248 249 // Build tree. 250 log.Info("building graphite data set series") 251 buildNodes(rootNode, 0) 252 253 // Generate and write test data. 254 log.Info("generating graphite data set datapoints", 255 zap.Int("seriesSize", len(generateSeries))) 256 generateBlocks := make([]generate.BlockConfig, 0, len(generateSeries)) 257 for _, series := range generateSeries { 258 generateBlocks = append(generateBlocks, []generate.BlockConfig{ 259 { 260 IDs: []string{series.ID.String()}, 261 Tags: series.Tags, 262 NumPoints: 1, 263 Start: now.Add(-1 * blockSize), 264 }, 265 { 266 IDs: []string{series.ID.String()}, 267 Tags: series.Tags, 268 NumPoints: 1, 269 Start: now, 270 }, 271 }...) 272 } 273 seriesMaps := generate.BlocksByStart(generateBlocks) 274 log.Info("writing graphite data set to disk", 275 zap.Int("seriesMapSize", len(seriesMaps))) 276 require.NoError(t, writeTestDataToDisk(ns, setup, seriesMaps, 0)) 277 278 // Start the server with filesystem bootstrapper. 279 log.Info("starting server") 280 require.NoError(t, setup.StartServer()) 281 log.Info("server is now up") 282 283 // Stop the server. 284 defer func() { 285 require.NoError(t, setup.StopServer()) 286 log.Info("server is now down") 287 }() 288 289 // Start the query server 290 log.Info("starting query server") 291 require.NoError(t, setup.StartQuery(queryConfigYAML)) 292 log.Info("started query server", zap.String("addr", setup.QueryAddress())) 293 294 // Stop the query server. 295 defer func() { 296 require.NoError(t, setup.StopQuery()) 297 log.Info("query server is now down") 298 }() 299 300 // Check each level of the tree can answer expected queries. 301 type checkResult struct { 302 leavesVerified int 303 } 304 type checkFailure struct { 305 expected graphiteFindResults 306 actual graphiteFindResults 307 failMsg string 308 } 309 var ( 310 verifyFindQueries func(node *graphiteNode, level int) (checkResult, *checkFailure, error) 311 parallelVerifyFindQueries func(node *graphiteNode, level int) 312 checkedSeriesAbort = atomic.NewBool(false) 313 numSeriesChecking = uint64(len(generateSeries)) 314 checkedSeriesLogEvery = numSeriesChecking / 10 315 checkedSeries = atomic.NewUint64(0) 316 checkedSeriesLog = atomic.NewUint64(0) 317 // Use custom http client for higher number of max idle conns. 318 httpClient = xhttp.NewHTTPClient(xhttp.DefaultHTTPClientOptions()) 319 wg sync.WaitGroup 320 workerPool = xsync.NewWorkerPool(testOpts.checkConcurrency) 321 ) 322 workerPool.Init() 323 parallelVerifyFindQueries = func(node *graphiteNode, level int) { 324 // Verify this node at level. 325 wg.Add(1) 326 workerPool.Go(func() { 327 defer wg.Done() 328 329 if checkedSeriesAbort.Load() { 330 // Do not execute if aborted. 331 return 332 } 333 334 result, failure, err := verifyFindQueries(node, level) 335 if failure == nil && err == nil { 336 // Account for series checked (for progress report). 337 checkedSeries.Add(uint64(result.leavesVerified)) 338 return 339 } 340 341 // Bail parallel execution (failed require/assert won't stop execution). 342 if checkedSeriesAbort.CAS(false, true) { 343 switch { 344 case failure != nil: 345 // Assert an error result and log once. 346 assert.Equal(t, failure.expected, failure.actual, failure.failMsg) 347 log.Error("aborting checks due to mismatch") 348 case err != nil: 349 assert.NoError(t, err) 350 log.Error("aborting checks due to error") 351 default: 352 require.FailNow(t, "unknown error condition") 353 log.Error("aborting checks due to unknown condition") 354 } 355 } 356 }) 357 358 // Verify children of children. 359 for _, child := range node.children { 360 parallelVerifyFindQueries(child, level+1) 361 } 362 } 363 verifyFindQueries = func(node *graphiteNode, level int) (checkResult, *checkFailure, error) { 364 var r checkResult 365 366 // Write progress report if progress made. 367 checked := checkedSeries.Load() 368 nextLog := checked - (checked % checkedSeriesLogEvery) 369 if lastLog := checkedSeriesLog.Swap(nextLog); lastLog < nextLog { 370 log.Info("checked series progressing", zap.Int("checked", int(checked))) 371 } 372 373 // Verify at depth. 374 numPathParts := len(node.pathParts) 375 queryPathParts := make([]string, 0, 1+numPathParts) 376 if numPathParts > 0 { 377 queryPathParts = append(queryPathParts, node.pathParts...) 378 } 379 queryPathParts = append(queryPathParts, "*") 380 query := strings.Join(queryPathParts, ".") 381 382 params := make(url.Values) 383 params.Set("query", query) 384 385 url := fmt.Sprintf("http://%s%s?%s", setup.QueryAddress(), 386 graphitehandler.FindURL, params.Encode()) 387 388 req, err := http.NewRequestWithContext(context.Background(), 389 http.MethodGet, url, nil) 390 require.NoError(t, err) 391 392 // Ensure that when the limit test runs we don't apply limit 393 // for this specific request (due to this being verification check). 394 req.Header.Set(headers.LimitMaxSeriesHeader, "1000") 395 396 res, err := httpClient.Do(req) 397 if err != nil { 398 return r, nil, err 399 } 400 if res.StatusCode != http.StatusOK { 401 return r, nil, fmt.Errorf("bad response code: expected=%d, actual=%d", 402 http.StatusOK, res.StatusCode) 403 } 404 405 defer res.Body.Close() 406 407 // Compare results. 408 var actual graphiteFindResults 409 if err := json.NewDecoder(res.Body).Decode(&actual); err != nil { 410 return r, nil, err 411 } 412 413 expected := make(graphiteFindResults, 0, len(node.children)) 414 for _, child := range node.children { 415 leaf := 0 416 if child.isLeaf { 417 leaf = 1 418 r.leavesVerified++ 419 } 420 expected = append(expected, graphiteFindResult{ 421 Text: child.name, 422 Leaf: leaf, 423 }) 424 } 425 426 sortGraphiteFindResults(actual) 427 sortGraphiteFindResults(expected) 428 429 if !reflect.DeepEqual(expected, actual) { 430 failMsg := fmt.Sprintf("invalid results: level=%d, parts=%d, query=%s", 431 level, len(node.pathParts), query) 432 failMsg += fmt.Sprintf("\n\ndiff:\n%s\n\n", 433 xtest.Diff(xtest.MustPrettyJSONObject(t, expected), 434 xtest.MustPrettyJSONObject(t, actual))) 435 return r, &checkFailure{ 436 expected: expected, 437 actual: actual, 438 failMsg: failMsg, 439 }, nil 440 } 441 442 return r, nil, nil 443 } 444 445 // Check all top level entries and recurse. 446 log.Info("checking series", 447 zap.Int("checkConcurrency", testOpts.checkConcurrency), 448 zap.Uint64("numSeriesChecking", numSeriesChecking)) 449 parallelVerifyFindQueries(rootNode, 0) 450 451 if testOpts.checkLimit { 452 testGraphiteFindLimit(t, setup, log) 453 } 454 455 // Wait for execution. 456 wg.Wait() 457 458 // Allow for debugging by issuing queries, etc. 459 if DebugTest() { 460 log.Info("debug test set, pausing for investigate") 461 <-make(chan struct{}) 462 } 463 } 464 465 func testGraphiteFindLimit( 466 t require.TestingT, 467 setup TestSetup, 468 log *zap.Logger, 469 ) { 470 params := make(url.Values) 471 params.Set("query", "lvl00_entry00_dir.*") 472 473 url := fmt.Sprintf("http://%s%s?%s", setup.QueryAddress(), 474 graphitehandler.FindURL, params.Encode()) 475 476 req, err := http.NewRequestWithContext(context.Background(), 477 http.MethodGet, url, nil) 478 require.NoError(t, err) 479 480 res, err := http.DefaultClient.Do(req) 481 require.NoError(t, err) 482 require.Equal(t, http.StatusOK, res.StatusCode) 483 484 log.Info("find with limit applied response headers", zap.Any("headers", res.Header)) 485 486 defer res.Body.Close() 487 488 var results graphiteFindResults 489 require.NoError(t, json.NewDecoder(res.Body).Decode(&results)) 490 491 assert.Equal(t, headers.LimitHeaderSeriesLimitApplied, res.Header.Get(headers.LimitHeader)) 492 } 493 494 type graphiteFindResults []graphiteFindResult 495 496 type graphiteFindResult struct { 497 Text string `json:"text"` 498 Leaf int `json:"leaf"` 499 } 500 501 func sortGraphiteFindResults(r graphiteFindResults) { 502 sort.Slice(r, func(i, j int) bool { 503 if r[i].Leaf != r[j].Leaf { 504 return r[i].Leaf < r[j].Leaf 505 } 506 return r[i].Text < r[j].Text 507 }) 508 } 509 510 type graphiteNode struct { 511 name string 512 pathParts []string 513 isLeaf bool 514 children []*graphiteNode 515 } 516 517 type graphiteNodeChildOptions struct { 518 isLeaf bool 519 } 520 521 func (n *graphiteNode) child( 522 name string, 523 opts graphiteNodeChildOptions, 524 ) *graphiteNode { 525 pathParts := append(make([]string, 0, 1+len(n.pathParts)), n.pathParts...) 526 pathParts = append(pathParts, name) 527 528 child := &graphiteNode{ 529 name: name, 530 pathParts: pathParts, 531 isLeaf: opts.isLeaf, 532 } 533 534 n.children = append(n.children, child) 535 536 return child 537 }