github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/graphite_find_test.go (about)

     1  //go:build integration
     2  // +build integration
     3  
     4  // Copyright (c) 2021 Uber Technologies, Inc.
     5  //
     6  // Permission is hereby granted, free of charge, to any person obtaining a copy
     7  // of this software and associated documentation files (the "Software"), to deal
     8  // in the Software without restriction, including without limitation the rights
     9  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    10  // copies of the Software, and to permit persons to whom the Software is
    11  // furnished to do so, subject to the following conditions:
    12  //
    13  // The above copyright notice and this permission notice shall be included in
    14  // all copies or substantial portions of the Software.
    15  //
    16  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    17  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    18  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    19  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    20  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    21  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    22  // THE SOFTWARE.
    23  
    24  package integration
    25  
    26  import (
    27  	"context"
    28  	"encoding/json"
    29  	"fmt"
    30  	"math/rand"
    31  	"net/http"
    32  	"net/url"
    33  	"reflect"
    34  	"runtime"
    35  	"sort"
    36  	"strings"
    37  	"sync"
    38  	"testing"
    39  	"time"
    40  
    41  	// nolint: gci
    42  	"github.com/stretchr/testify/assert"
    43  	"github.com/stretchr/testify/require"
    44  	"go.uber.org/atomic"
    45  	"go.uber.org/zap"
    46  
    47  	"github.com/m3db/m3/src/dbnode/integration/generate"
    48  	"github.com/m3db/m3/src/dbnode/namespace"
    49  	"github.com/m3db/m3/src/dbnode/retention"
    50  	graphitehandler "github.com/m3db/m3/src/query/api/v1/handler/graphite"
    51  	"github.com/m3db/m3/src/query/graphite/graphite"
    52  	"github.com/m3db/m3/src/x/headers"
    53  	"github.com/m3db/m3/src/x/ident"
    54  	xhttp "github.com/m3db/m3/src/x/net/http"
    55  	xsync "github.com/m3db/m3/src/x/sync"
    56  	xtest "github.com/m3db/m3/src/x/test"
    57  )
    58  
    59  type testGraphiteFindDatasetSize uint
    60  
    61  const (
    62  	smallDatasetSize testGraphiteFindDatasetSize = iota
    63  	mediumDatasetSize
    64  	largeDatasetSize
    65  )
    66  
    67  type testGraphiteFindOptions struct {
    68  	checkConcurrency int
    69  	datasetSize      testGraphiteFindDatasetSize
    70  	checkLimit       bool
    71  }
    72  
    73  func TestGraphiteFindSequential(t *testing.T) {
    74  	// NB(rob): We need to investigate why using high concurrency (and hence
    75  	// need to use small dataset size since otherwise verification takes
    76  	// forever) encounters errors running on CI.
    77  	testGraphiteFind(t, testGraphiteFindOptions{
    78  		checkConcurrency: 1,
    79  		datasetSize:      mediumDatasetSize,
    80  	})
    81  }
    82  
    83  func TestGraphiteFindParallel(t *testing.T) {
    84  	// Skip until investigation of why check concurrency encounters errors on CI.
    85  	t.SkipNow()
    86  	testGraphiteFind(t, testGraphiteFindOptions{
    87  		checkConcurrency: runtime.NumCPU(),
    88  		datasetSize:      largeDatasetSize,
    89  	})
    90  }
    91  
    92  func TestGraphiteFindLimits(t *testing.T) {
    93  	testGraphiteFind(t, testGraphiteFindOptions{
    94  		checkConcurrency: 1,
    95  		datasetSize:      smallDatasetSize,
    96  		checkLimit:       true,
    97  	})
    98  }
    99  
   100  func testGraphiteFind(tt *testing.T, testOpts testGraphiteFindOptions) {
   101  	if testing.Short() {
   102  		tt.SkipNow() // Just skip if we're doing a short run
   103  	}
   104  
   105  	// Make sure that parallel assertions fail test immediately
   106  	// by using a TestingT that panics when FailNow is called.
   107  	t := xtest.FailNowPanicsTestingT(tt)
   108  
   109  	queryConfigYAML := `
   110  listenAddress: 127.0.0.1:7201
   111  
   112  logging:
   113    level: info
   114  
   115  metrics:
   116    scope:
   117      prefix: "coordinator"
   118    prometheus:
   119      handlerPath: /metrics
   120      listenAddress: "127.0.0.1:0"
   121    sanitization: prometheus
   122    samplingRate: 1.0
   123  
   124  local:
   125    namespaces:
   126      - namespace: default
   127        type: unaggregated
   128        retention: 12h
   129      - namespace: testns
   130        type: aggregated
   131        retention: 12h
   132        resolution: 1m
   133  `
   134  
   135  	if testOpts.checkLimit {
   136  		queryConfigYAML += `
   137  carbon:
   138    limitsFind:
   139      perQuery:
   140        maxFetchedSeries: 10
   141        instanceMultiple: 2
   142        maxFetchedRange: 2h
   143        requireExhaustive: false
   144  `
   145  	}
   146  
   147  	var (
   148  		blockSize       = 2 * time.Hour
   149  		retentionPeriod = 6 * blockSize
   150  		rOpts           = retention.NewOptions().
   151  				SetRetentionPeriod(retentionPeriod).
   152  				SetBlockSize(blockSize)
   153  		idxOpts = namespace.NewIndexOptions().
   154  			SetEnabled(true).
   155  			SetBlockSize(2 * blockSize)
   156  		nOpts = namespace.NewOptions().
   157  			SetRetentionOptions(rOpts).
   158  			SetIndexOptions(idxOpts)
   159  	)
   160  	ns, err := namespace.NewMetadata(ident.StringID("testns"), nOpts)
   161  	require.NoError(t, err)
   162  
   163  	opts := NewTestOptions(tt).
   164  		SetNamespaces([]namespace.Metadata{ns})
   165  
   166  	// Test setup.
   167  	setup, err := NewTestSetup(tt, opts, nil)
   168  	require.NoError(t, err)
   169  	defer setup.Close()
   170  
   171  	log := setup.StorageOpts().InstrumentOptions().Logger().
   172  		With(zap.String("ns", ns.ID().String()))
   173  
   174  	require.NoError(t, setup.InitializeBootstrappers(InitializeBootstrappersOptions{
   175  		WithFileSystem: true,
   176  	}))
   177  
   178  	// Write test data.
   179  	now := setup.NowFn()()
   180  
   181  	// Create graphite node tree for tests.
   182  	var (
   183  		// nolint: gosec
   184  		randConstSeedSrc = rand.NewSource(123456789)
   185  		// nolint: gosec
   186  		randGen            = rand.New(randConstSeedSrc)
   187  		rootNode           = &graphiteNode{}
   188  		buildNodes         func(node *graphiteNode, level int)
   189  		generateSeries     []generate.Series
   190  		levels             int
   191  		entriesPerLevelMin int
   192  		entriesPerLevelMax int
   193  	)
   194  	switch testOpts.datasetSize {
   195  	case smallDatasetSize:
   196  		levels = 2
   197  		entriesPerLevelMin = 12
   198  		entriesPerLevelMax = 15
   199  	case mediumDatasetSize:
   200  		levels = 4
   201  		entriesPerLevelMin = 5
   202  		entriesPerLevelMax = 7
   203  	case largeDatasetSize:
   204  		// Ideally we'd always use a large dataset size, however you do need
   205  		// high concurrency to validate this entire dataset and CI can't seem
   206  		// to handle high concurrency without encountering errors.
   207  		levels = 5
   208  		entriesPerLevelMin = 6
   209  		entriesPerLevelMax = 9
   210  	default:
   211  		require.FailNow(t, fmt.Sprintf("invalid test dataset size set: %d", testOpts.datasetSize))
   212  	}
   213  
   214  	buildNodes = func(node *graphiteNode, level int) {
   215  		entries := entriesPerLevelMin +
   216  			randGen.Intn(entriesPerLevelMax-entriesPerLevelMin)
   217  		for entry := 0; entry < entries; entry++ {
   218  			name := fmt.Sprintf("lvl%02d_entry%02d", level, entry)
   219  
   220  			// Create a directory node and spawn more underneath.
   221  			if nextLevel := level + 1; nextLevel <= levels {
   222  				childDir := node.child(name+"_dir", graphiteNodeChildOptions{
   223  					isLeaf: false,
   224  				})
   225  				buildNodes(childDir, nextLevel)
   226  			}
   227  
   228  			// Create a leaf node.
   229  			childLeaf := node.child(name+"_leaf", graphiteNodeChildOptions{
   230  				isLeaf: true,
   231  			})
   232  
   233  			// Create series to generate data for the leaf node.
   234  			tags := make([]ident.Tag, 0, len(childLeaf.pathParts))
   235  			for i, pathPartValue := range childLeaf.pathParts {
   236  				tags = append(tags, ident.Tag{
   237  					Name:  graphite.TagNameID(i),
   238  					Value: ident.StringID(pathPartValue),
   239  				})
   240  			}
   241  			series := generate.Series{
   242  				ID:   ident.StringID(strings.Join(childLeaf.pathParts, ".")),
   243  				Tags: ident.NewTags(tags...),
   244  			}
   245  			generateSeries = append(generateSeries, series)
   246  		}
   247  	}
   248  
   249  	// Build tree.
   250  	log.Info("building graphite data set series")
   251  	buildNodes(rootNode, 0)
   252  
   253  	// Generate and write test data.
   254  	log.Info("generating graphite data set datapoints",
   255  		zap.Int("seriesSize", len(generateSeries)))
   256  	generateBlocks := make([]generate.BlockConfig, 0, len(generateSeries))
   257  	for _, series := range generateSeries {
   258  		generateBlocks = append(generateBlocks, []generate.BlockConfig{
   259  			{
   260  				IDs:       []string{series.ID.String()},
   261  				Tags:      series.Tags,
   262  				NumPoints: 1,
   263  				Start:     now.Add(-1 * blockSize),
   264  			},
   265  			{
   266  				IDs:       []string{series.ID.String()},
   267  				Tags:      series.Tags,
   268  				NumPoints: 1,
   269  				Start:     now,
   270  			},
   271  		}...)
   272  	}
   273  	seriesMaps := generate.BlocksByStart(generateBlocks)
   274  	log.Info("writing graphite data set to disk",
   275  		zap.Int("seriesMapSize", len(seriesMaps)))
   276  	require.NoError(t, writeTestDataToDisk(ns, setup, seriesMaps, 0))
   277  
   278  	// Start the server with filesystem bootstrapper.
   279  	log.Info("starting server")
   280  	require.NoError(t, setup.StartServer())
   281  	log.Info("server is now up")
   282  
   283  	// Stop the server.
   284  	defer func() {
   285  		require.NoError(t, setup.StopServer())
   286  		log.Info("server is now down")
   287  	}()
   288  
   289  	// Start the query server
   290  	log.Info("starting query server")
   291  	require.NoError(t, setup.StartQuery(queryConfigYAML))
   292  	log.Info("started query server", zap.String("addr", setup.QueryAddress()))
   293  
   294  	// Stop the query server.
   295  	defer func() {
   296  		require.NoError(t, setup.StopQuery())
   297  		log.Info("query server is now down")
   298  	}()
   299  
   300  	// Check each level of the tree can answer expected queries.
   301  	type checkResult struct {
   302  		leavesVerified int
   303  	}
   304  	type checkFailure struct {
   305  		expected graphiteFindResults
   306  		actual   graphiteFindResults
   307  		failMsg  string
   308  	}
   309  	var (
   310  		verifyFindQueries         func(node *graphiteNode, level int) (checkResult, *checkFailure, error)
   311  		parallelVerifyFindQueries func(node *graphiteNode, level int)
   312  		checkedSeriesAbort        = atomic.NewBool(false)
   313  		numSeriesChecking         = uint64(len(generateSeries))
   314  		checkedSeriesLogEvery     = numSeriesChecking / 10
   315  		checkedSeries             = atomic.NewUint64(0)
   316  		checkedSeriesLog          = atomic.NewUint64(0)
   317  		// Use custom http client for higher number of max idle conns.
   318  		httpClient = xhttp.NewHTTPClient(xhttp.DefaultHTTPClientOptions())
   319  		wg         sync.WaitGroup
   320  		workerPool = xsync.NewWorkerPool(testOpts.checkConcurrency)
   321  	)
   322  	workerPool.Init()
   323  	parallelVerifyFindQueries = func(node *graphiteNode, level int) {
   324  		// Verify this node at level.
   325  		wg.Add(1)
   326  		workerPool.Go(func() {
   327  			defer wg.Done()
   328  
   329  			if checkedSeriesAbort.Load() {
   330  				// Do not execute if aborted.
   331  				return
   332  			}
   333  
   334  			result, failure, err := verifyFindQueries(node, level)
   335  			if failure == nil && err == nil {
   336  				// Account for series checked (for progress report).
   337  				checkedSeries.Add(uint64(result.leavesVerified))
   338  				return
   339  			}
   340  
   341  			// Bail parallel execution (failed require/assert won't stop execution).
   342  			if checkedSeriesAbort.CAS(false, true) {
   343  				switch {
   344  				case failure != nil:
   345  					// Assert an error result and log once.
   346  					assert.Equal(t, failure.expected, failure.actual, failure.failMsg)
   347  					log.Error("aborting checks due to mismatch")
   348  				case err != nil:
   349  					assert.NoError(t, err)
   350  					log.Error("aborting checks due to error")
   351  				default:
   352  					require.FailNow(t, "unknown error condition")
   353  					log.Error("aborting checks due to unknown condition")
   354  				}
   355  			}
   356  		})
   357  
   358  		// Verify children of children.
   359  		for _, child := range node.children {
   360  			parallelVerifyFindQueries(child, level+1)
   361  		}
   362  	}
   363  	verifyFindQueries = func(node *graphiteNode, level int) (checkResult, *checkFailure, error) {
   364  		var r checkResult
   365  
   366  		// Write progress report if progress made.
   367  		checked := checkedSeries.Load()
   368  		nextLog := checked - (checked % checkedSeriesLogEvery)
   369  		if lastLog := checkedSeriesLog.Swap(nextLog); lastLog < nextLog {
   370  			log.Info("checked series progressing", zap.Int("checked", int(checked)))
   371  		}
   372  
   373  		// Verify at depth.
   374  		numPathParts := len(node.pathParts)
   375  		queryPathParts := make([]string, 0, 1+numPathParts)
   376  		if numPathParts > 0 {
   377  			queryPathParts = append(queryPathParts, node.pathParts...)
   378  		}
   379  		queryPathParts = append(queryPathParts, "*")
   380  		query := strings.Join(queryPathParts, ".")
   381  
   382  		params := make(url.Values)
   383  		params.Set("query", query)
   384  
   385  		url := fmt.Sprintf("http://%s%s?%s", setup.QueryAddress(),
   386  			graphitehandler.FindURL, params.Encode())
   387  
   388  		req, err := http.NewRequestWithContext(context.Background(),
   389  			http.MethodGet, url, nil)
   390  		require.NoError(t, err)
   391  
   392  		// Ensure that when the limit test runs we don't apply limit
   393  		// for this specific request (due to this being verification check).
   394  		req.Header.Set(headers.LimitMaxSeriesHeader, "1000")
   395  
   396  		res, err := httpClient.Do(req)
   397  		if err != nil {
   398  			return r, nil, err
   399  		}
   400  		if res.StatusCode != http.StatusOK {
   401  			return r, nil, fmt.Errorf("bad response code: expected=%d, actual=%d",
   402  				http.StatusOK, res.StatusCode)
   403  		}
   404  
   405  		defer res.Body.Close()
   406  
   407  		// Compare results.
   408  		var actual graphiteFindResults
   409  		if err := json.NewDecoder(res.Body).Decode(&actual); err != nil {
   410  			return r, nil, err
   411  		}
   412  
   413  		expected := make(graphiteFindResults, 0, len(node.children))
   414  		for _, child := range node.children {
   415  			leaf := 0
   416  			if child.isLeaf {
   417  				leaf = 1
   418  				r.leavesVerified++
   419  			}
   420  			expected = append(expected, graphiteFindResult{
   421  				Text: child.name,
   422  				Leaf: leaf,
   423  			})
   424  		}
   425  
   426  		sortGraphiteFindResults(actual)
   427  		sortGraphiteFindResults(expected)
   428  
   429  		if !reflect.DeepEqual(expected, actual) {
   430  			failMsg := fmt.Sprintf("invalid results: level=%d, parts=%d, query=%s",
   431  				level, len(node.pathParts), query)
   432  			failMsg += fmt.Sprintf("\n\ndiff:\n%s\n\n",
   433  				xtest.Diff(xtest.MustPrettyJSONObject(t, expected),
   434  					xtest.MustPrettyJSONObject(t, actual)))
   435  			return r, &checkFailure{
   436  				expected: expected,
   437  				actual:   actual,
   438  				failMsg:  failMsg,
   439  			}, nil
   440  		}
   441  
   442  		return r, nil, nil
   443  	}
   444  
   445  	// Check all top level entries and recurse.
   446  	log.Info("checking series",
   447  		zap.Int("checkConcurrency", testOpts.checkConcurrency),
   448  		zap.Uint64("numSeriesChecking", numSeriesChecking))
   449  	parallelVerifyFindQueries(rootNode, 0)
   450  
   451  	if testOpts.checkLimit {
   452  		testGraphiteFindLimit(t, setup, log)
   453  	}
   454  
   455  	// Wait for execution.
   456  	wg.Wait()
   457  
   458  	// Allow for debugging by issuing queries, etc.
   459  	if DebugTest() {
   460  		log.Info("debug test set, pausing for investigate")
   461  		<-make(chan struct{})
   462  	}
   463  }
   464  
   465  func testGraphiteFindLimit(
   466  	t require.TestingT,
   467  	setup TestSetup,
   468  	log *zap.Logger,
   469  ) {
   470  	params := make(url.Values)
   471  	params.Set("query", "lvl00_entry00_dir.*")
   472  
   473  	url := fmt.Sprintf("http://%s%s?%s", setup.QueryAddress(),
   474  		graphitehandler.FindURL, params.Encode())
   475  
   476  	req, err := http.NewRequestWithContext(context.Background(),
   477  		http.MethodGet, url, nil)
   478  	require.NoError(t, err)
   479  
   480  	res, err := http.DefaultClient.Do(req)
   481  	require.NoError(t, err)
   482  	require.Equal(t, http.StatusOK, res.StatusCode)
   483  
   484  	log.Info("find with limit applied response headers", zap.Any("headers", res.Header))
   485  
   486  	defer res.Body.Close()
   487  
   488  	var results graphiteFindResults
   489  	require.NoError(t, json.NewDecoder(res.Body).Decode(&results))
   490  
   491  	assert.Equal(t, headers.LimitHeaderSeriesLimitApplied, res.Header.Get(headers.LimitHeader))
   492  }
   493  
   494  type graphiteFindResults []graphiteFindResult
   495  
   496  type graphiteFindResult struct {
   497  	Text string `json:"text"`
   498  	Leaf int    `json:"leaf"`
   499  }
   500  
   501  func sortGraphiteFindResults(r graphiteFindResults) {
   502  	sort.Slice(r, func(i, j int) bool {
   503  		if r[i].Leaf != r[j].Leaf {
   504  			return r[i].Leaf < r[j].Leaf
   505  		}
   506  		return r[i].Text < r[j].Text
   507  	})
   508  }
   509  
   510  type graphiteNode struct {
   511  	name      string
   512  	pathParts []string
   513  	isLeaf    bool
   514  	children  []*graphiteNode
   515  }
   516  
   517  type graphiteNodeChildOptions struct {
   518  	isLeaf bool
   519  }
   520  
   521  func (n *graphiteNode) child(
   522  	name string,
   523  	opts graphiteNodeChildOptions,
   524  ) *graphiteNode {
   525  	pathParts := append(make([]string, 0, 1+len(n.pathParts)), n.pathParts...)
   526  	pathParts = append(pathParts, name)
   527  
   528  	child := &graphiteNode{
   529  		name:      name,
   530  		pathParts: pathParts,
   531  		isLeaf:    opts.isLeaf,
   532  	}
   533  
   534  	n.children = append(n.children, child)
   535  
   536  	return child
   537  }