github.com/m3db/m3@v1.5.0/src/dbnode/storage/index/block_prop_test.go (about)

     1  // +build big
     2  //
     3  // Copyright (c) 2019 Uber Technologies, Inc.
     4  //
     5  // Permission is hereby granted, free of charge, to any person obtaining a copy
     6  // of this software and associated documentation files (the "Software"), to deal
     7  // in the Software without restriction, including without limitation the rights
     8  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  // copies of the Software, and to permit persons to whom the Software is
    10  // furnished to do so, subject to the following conditions:
    11  //
    12  // The above copyright notice and this permission notice shall be included in
    13  // all copies or substantial portions of the Software.
    14  //
    15  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  // THE SOFTWARE.
    22  
    23  package index
    24  
    25  import (
    26  	"errors"
    27  	"fmt"
    28  	"math/rand"
    29  	"os"
    30  	"sort"
    31  	"testing"
    32  	"time"
    33  
    34  	"github.com/m3db/m3/src/dbnode/namespace"
    35  	"github.com/m3db/m3/src/dbnode/storage/bootstrap/result"
    36  	"github.com/m3db/m3/src/dbnode/storage/limits"
    37  	"github.com/m3db/m3/src/m3ninx/doc"
    38  	"github.com/m3db/m3/src/m3ninx/idx"
    39  	"github.com/m3db/m3/src/m3ninx/index/segment"
    40  	"github.com/m3db/m3/src/m3ninx/index/segment/fst"
    41  	"github.com/m3db/m3/src/m3ninx/index/segment/mem"
    42  	idxpersist "github.com/m3db/m3/src/m3ninx/persist"
    43  	"github.com/m3db/m3/src/m3ninx/search"
    44  	"github.com/m3db/m3/src/m3ninx/search/proptest"
    45  	"github.com/m3db/m3/src/x/context"
    46  	"github.com/m3db/m3/src/x/ident"
    47  	"github.com/m3db/m3/src/x/instrument"
    48  	"github.com/m3db/m3/src/x/tallytest"
    49  	xtime "github.com/m3db/m3/src/x/time"
    50  
    51  	"github.com/leanovate/gopter"
    52  	"github.com/leanovate/gopter/gen"
    53  	"github.com/leanovate/gopter/prop"
    54  	"github.com/stretchr/testify/require"
    55  	"github.com/uber-go/tally"
    56  )
    57  
    58  var testBlockSize = time.Hour
    59  
    60  // TestPostingsListCacheDoesNotAffectBlockQueryResults verifies that the postings list
    61  // cache does not affect the results of querying a block by creating two blocks, one with
    62  // the postings list cache enabled and one without. It then generates a bunch of queries
    63  // and executes them against both blocks, ensuring that both blocks return the exact same
    64  // results. It was added as a regression test when we encountered a bug that caused the
    65  // postings list cache to cause the block to return incorrect results.
    66  //
    67  // It also generates term and regexp queries where the field and pattern are the same to
    68  // ensure that the postings list cache correctly handles caching the results of these
    69  // different types of queries (despite having the same field and "pattern") separately.
    70  func TestPostingsListCacheDoesNotAffectBlockQueryResults(t *testing.T) {
    71  	parameters := gopter.DefaultTestParameters()
    72  	seed := time.Now().UnixNano()
    73  	parameters.MinSuccessfulTests = 500
    74  	parameters.MaxSize = 20
    75  	parameters.Rng = rand.New(rand.NewSource(seed))
    76  	properties := gopter.NewProperties(parameters)
    77  
    78  	testMD := newTestNSMetadata(t)
    79  	blockSize := time.Hour
    80  
    81  	now := xtime.Now()
    82  	blockStart := now.Truncate(blockSize)
    83  
    84  	uncachedBlock := newPropTestBlock(
    85  		t, blockStart, testMD, testOpts.SetPostingsListCache(nil))
    86  
    87  	plCache, err := NewPostingsListCache(1000, PostingsListCacheOptions{
    88  		InstrumentOptions: instrument.NewOptions(),
    89  	})
    90  	require.NoError(t, err)
    91  	defer plCache.Start()()
    92  
    93  	cachedOptions := testOpts.
    94  		SetPostingsListCache(plCache).
    95  		SetReadThroughSegmentOptions(ReadThroughSegmentOptions{
    96  			CacheRegexp: true,
    97  			CacheTerms:  true,
    98  		})
    99  	cachedBlock := newPropTestBlock(t, blockStart, testMD, cachedOptions)
   100  	properties.Property("Index block with and without postings list cache always return the same results", prop.ForAll(
   101  		func(q search.Query, identicalTermAndRegexp []search.Query) (bool, error) {
   102  			queries := []search.Query{
   103  				q,
   104  				identicalTermAndRegexp[0],
   105  				identicalTermAndRegexp[1],
   106  			}
   107  
   108  			for _, q := range queries {
   109  				indexQuery := Query{
   110  					idx.NewQueryFromSearchQuery(q),
   111  				}
   112  
   113  				queryOpts := QueryOptions{
   114  					StartInclusive: blockStart,
   115  					EndExclusive:   blockStart.Add(blockSize),
   116  				}
   117  
   118  				uncachedResults := NewQueryResults(nil, QueryResultsOptions{}, testOpts)
   119  				ctx := context.NewBackground()
   120  				queryIter, err := uncachedBlock.QueryIter(ctx, indexQuery)
   121  				if err != nil {
   122  					return false, err
   123  				}
   124  				require.NoError(t, err)
   125  				for !queryIter.Done() {
   126  					err = uncachedBlock.QueryWithIter(ctx,
   127  						queryOpts, queryIter, uncachedResults, time.Now().Add(time.Millisecond*10), emptyLogFields)
   128  					if err != nil {
   129  						return false, fmt.Errorf("error querying uncached block: %w", err)
   130  					}
   131  				}
   132  
   133  				cachedResults := NewQueryResults(nil, QueryResultsOptions{}, testOpts)
   134  				ctx = context.NewBackground()
   135  				queryIter, err = cachedBlock.QueryIter(ctx, indexQuery)
   136  				if err != nil {
   137  					return false, err
   138  				}
   139  
   140  				for !queryIter.Done() {
   141  					err = cachedBlock.QueryWithIter(ctx, queryOpts, queryIter, cachedResults,
   142  						time.Now().Add(time.Millisecond*10), emptyLogFields)
   143  					if err != nil {
   144  						return false, fmt.Errorf("error querying cached block: %w", err)
   145  					}
   146  				}
   147  
   148  				uncachedMap := uncachedResults.Map()
   149  				cachedMap := cachedResults.Map()
   150  				if uncachedMap.Len() != cachedMap.Len() {
   151  					return false, fmt.Errorf(
   152  						"uncached map size was: %d, but cached map sized was: %d",
   153  						uncachedMap.Len(), cachedMap.Len())
   154  				}
   155  
   156  				for _, entry := range uncachedMap.Iter() {
   157  					key := entry.Key()
   158  					_, ok := cachedMap.Get(key)
   159  					if !ok {
   160  						return false, fmt.Errorf("cached map did not contain: %v", key)
   161  					}
   162  				}
   163  			}
   164  
   165  			return true, nil
   166  		},
   167  		proptest.GenQuery(lotsTestDocuments),
   168  		proptest.GenIdenticalTermAndRegexpQuery(lotsTestDocuments),
   169  	))
   170  
   171  	reporter := gopter.NewFormatedReporter(true, 160, os.Stdout)
   172  	if !properties.Run(reporter) {
   173  		t.Errorf("failed with initial seed: %d", seed)
   174  	}
   175  }
   176  
   177  func newPropTestBlock(t *testing.T, blockStart xtime.UnixNano,
   178  	nsMeta namespace.Metadata, opts Options) Block {
   179  	blk, err := NewBlock(blockStart, nsMeta, BlockOptions{},
   180  		namespace.NewRuntimeOptionsManager(nsMeta.ID().String()), opts)
   181  	require.NoError(t, err)
   182  
   183  	var (
   184  		memSeg = testSegment(t, lotsTestDocuments...).(segment.MutableSegment)
   185  		fstSeg = fst.ToTestSegment(t, memSeg, testFstOptions)
   186  		// Need at least one shard to look fulfilled.
   187  		fulfilled              = result.NewShardTimeRangesFromRange(blockStart, blockStart.Add(testBlockSize), uint32(1))
   188  		indexBlockByVolumeType = result.NewIndexBlockByVolumeType(blockStart)
   189  	)
   190  	indexBlockByVolumeType.SetBlock(idxpersist.DefaultIndexVolumeType, result.NewIndexBlock([]result.Segment{result.NewSegment(fstSeg, false)}, fulfilled))
   191  
   192  	// Use the AddResults API because thats the only scenario in which we'll wrap a segment
   193  	// in a ReadThroughSegment to use the postings list cache.
   194  	err = blk.AddResults(indexBlockByVolumeType)
   195  	require.NoError(t, err)
   196  	return blk
   197  }
   198  
   199  type testFields struct {
   200  	name   string
   201  	values []string
   202  }
   203  
   204  func genField() gopter.Gen {
   205  	return gopter.CombineGens(
   206  		gen.AlphaString(),
   207  		gen.SliceOf(gen.AlphaString()),
   208  	).Map(func(input []interface{}) testFields {
   209  		var (
   210  			name   = input[0].(string)
   211  			values = input[1].([]string)
   212  		)
   213  
   214  		return testFields{
   215  			name:   name,
   216  			values: values,
   217  		}
   218  	})
   219  }
   220  
   221  type propTestSegment struct {
   222  	metadata   doc.Metadata
   223  	exCount    int64
   224  	exCountAgg int64
   225  	segmentMap segmentMap
   226  }
   227  
   228  type (
   229  	testValuesSet map[string]struct{}      //nolint:gofumpt
   230  	segmentMap    map[string]testValuesSet //nolint:gofumpt
   231  )
   232  
   233  func genTestSegment() gopter.Gen {
   234  	return gen.SliceOf(genField()).Map(func(input []testFields) propTestSegment {
   235  		segMap := make(segmentMap, len(input))
   236  		for _, field := range input { //nolint:gocritic
   237  			for _, value := range field.values {
   238  				exVals, found := segMap[field.name]
   239  				if !found {
   240  					exVals = make(testValuesSet)
   241  				}
   242  				exVals[value] = struct{}{}
   243  				segMap[field.name] = exVals
   244  			}
   245  		}
   246  
   247  		aggLength := len(segMap)
   248  		fields := make([]testFields, 0, len(input))
   249  		for name, valSet := range segMap {
   250  			aggLength += len(valSet)
   251  			vals := make([]string, 0, len(valSet))
   252  			for val := range valSet {
   253  				vals = append(vals, val)
   254  			}
   255  
   256  			sort.Strings(vals)
   257  			fields = append(fields, testFields{name: name, values: vals})
   258  		}
   259  
   260  		sort.Slice(fields, func(i, j int) bool {
   261  			return fields[i].name < fields[j].name
   262  		})
   263  
   264  		docFields := []doc.Field{}
   265  		for _, field := range fields { //nolint:gocritic
   266  			for _, val := range field.values {
   267  				docFields = append(docFields, doc.Field{
   268  					Name:  []byte(field.name),
   269  					Value: []byte(val),
   270  				})
   271  			}
   272  		}
   273  
   274  		return propTestSegment{
   275  			metadata:   doc.Metadata{Fields: docFields},
   276  			exCount:    int64(len(segMap)),
   277  			exCountAgg: int64(aggLength),
   278  			segmentMap: segMap,
   279  		}
   280  	})
   281  }
   282  
   283  func verifyResults(
   284  	t *testing.T,
   285  	results AggregateResults,
   286  	exMap segmentMap,
   287  ) {
   288  	resultMap := make(segmentMap, results.Map().Len())
   289  	for _, field := range results.Map().Iter() { //nolint:gocritic
   290  		name := field.Key().String()
   291  		_, found := resultMap[name]
   292  		require.False(t, found, "duplicate values in results map")
   293  
   294  		values := make(testValuesSet, field.value.Map().Len())
   295  		for _, value := range field.value.Map().Iter() {
   296  			val := value.Key().String()
   297  			_, found := values[val]
   298  			require.False(t, found, "duplicate values in results map")
   299  
   300  			values[val] = struct{}{}
   301  		}
   302  
   303  		resultMap[name] = values
   304  	}
   305  
   306  	require.Equal(t, resultMap, exMap)
   307  }
   308  
   309  func TestAggregateDocLimits(t *testing.T) {
   310  	var (
   311  		parameters = gopter.DefaultTestParameters()
   312  		seed       = time.Now().UnixNano()
   313  		reporter   = gopter.NewFormatedReporter(true, 160, os.Stdout)
   314  	)
   315  
   316  	parameters.MinSuccessfulTests = 1000
   317  	parameters.MinSize = 5
   318  	parameters.MaxSize = 10
   319  	parameters.Rng = rand.New(rand.NewSource(seed)) //nolint:gosec
   320  	properties := gopter.NewProperties(parameters)
   321  
   322  	properties.Property("segments dedupe and have correct docs counts", prop.ForAll(
   323  		func(testSegment propTestSegment) (bool, error) {
   324  			seg, err := mem.NewSegment(mem.NewOptions())
   325  			if err != nil {
   326  				return false, err
   327  			}
   328  
   329  			_, err = seg.Insert(testSegment.metadata)
   330  			if err != nil {
   331  				return false, err
   332  			}
   333  
   334  			err = seg.Seal()
   335  			if err != nil {
   336  				return false, err
   337  			}
   338  
   339  			scope := tally.NewTestScope("", nil)
   340  			iOpts := instrument.NewOptions().SetMetricsScope(scope)
   341  			limitOpts := limits.NewOptions().
   342  				SetInstrumentOptions(iOpts).
   343  				SetDocsLimitOpts(limits.LookbackLimitOptions{Lookback: time.Minute}).
   344  				SetBytesReadLimitOpts(limits.LookbackLimitOptions{Lookback: time.Minute}).
   345  				SetAggregateDocsLimitOpts(limits.LookbackLimitOptions{Lookback: time.Minute})
   346  			queryLimits, err := limits.NewQueryLimits(limitOpts)
   347  			require.NoError(t, err)
   348  			testOpts = testOpts.SetInstrumentOptions(iOpts).SetQueryLimits(queryLimits)
   349  
   350  			testMD := newTestNSMetadata(t)
   351  			start := xtime.Now().Truncate(time.Hour)
   352  			blk, err := NewBlock(start, testMD, BlockOptions{},
   353  				namespace.NewRuntimeOptionsManager("foo"), testOpts)
   354  			if err != nil {
   355  				return false, err
   356  			}
   357  
   358  			b, ok := blk.(*block)
   359  			if !ok {
   360  				return false, errors.New("bad block type")
   361  			}
   362  
   363  			b.mutableSegments.foregroundSegments = []*readableSeg{
   364  				newReadableSeg(seg, testOpts),
   365  			}
   366  
   367  			results := NewAggregateResults(ident.StringID("ns"), AggregateResultsOptions{
   368  				Type: AggregateTagNamesAndValues,
   369  			}, testOpts)
   370  
   371  			ctx := context.NewBackground()
   372  			defer ctx.BlockingClose()
   373  
   374  			aggIter, err := b.AggregateIter(ctx, results.AggregateResultsOptions())
   375  			if err != nil {
   376  				return false, err
   377  			}
   378  			for !aggIter.Done() {
   379  				err = b.AggregateWithIter(
   380  					ctx,
   381  					aggIter,
   382  					QueryOptions{},
   383  					results,
   384  					time.Now().Add(time.Millisecond*10),
   385  					emptyLogFields)
   386  
   387  				if err != nil {
   388  					return false, err
   389  				}
   390  			}
   391  			verifyResults(t, results, testSegment.segmentMap)
   392  			snap := scope.Snapshot()
   393  			tallytest.AssertCounterValue(t, testSegment.exCount, snap,
   394  				"query-limit.total-docs-matched", map[string]string{"type": "fetch"})
   395  			tallytest.AssertCounterValue(t, testSegment.exCountAgg, snap,
   396  				"query-limit.total-docs-matched", map[string]string{"type": "aggregate"})
   397  			return true, nil
   398  		},
   399  		genTestSegment(),
   400  	))
   401  
   402  	if !properties.Run(reporter) {
   403  		t.Errorf("failed with initial seed: %d", seed)
   404  	}
   405  }