github.com/m3db/m3@v1.5.0/src/dbnode/storage/index/block_prop_test.go (about) 1 // +build big 2 // 3 // Copyright (c) 2019 Uber Technologies, Inc. 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in 13 // all copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 // THE SOFTWARE. 22 23 package index 24 25 import ( 26 "errors" 27 "fmt" 28 "math/rand" 29 "os" 30 "sort" 31 "testing" 32 "time" 33 34 "github.com/m3db/m3/src/dbnode/namespace" 35 "github.com/m3db/m3/src/dbnode/storage/bootstrap/result" 36 "github.com/m3db/m3/src/dbnode/storage/limits" 37 "github.com/m3db/m3/src/m3ninx/doc" 38 "github.com/m3db/m3/src/m3ninx/idx" 39 "github.com/m3db/m3/src/m3ninx/index/segment" 40 "github.com/m3db/m3/src/m3ninx/index/segment/fst" 41 "github.com/m3db/m3/src/m3ninx/index/segment/mem" 42 idxpersist "github.com/m3db/m3/src/m3ninx/persist" 43 "github.com/m3db/m3/src/m3ninx/search" 44 "github.com/m3db/m3/src/m3ninx/search/proptest" 45 "github.com/m3db/m3/src/x/context" 46 "github.com/m3db/m3/src/x/ident" 47 "github.com/m3db/m3/src/x/instrument" 48 "github.com/m3db/m3/src/x/tallytest" 49 xtime "github.com/m3db/m3/src/x/time" 50 51 "github.com/leanovate/gopter" 52 "github.com/leanovate/gopter/gen" 53 "github.com/leanovate/gopter/prop" 54 "github.com/stretchr/testify/require" 55 "github.com/uber-go/tally" 56 ) 57 58 var testBlockSize = time.Hour 59 60 // TestPostingsListCacheDoesNotAffectBlockQueryResults verifies that the postings list 61 // cache does not affect the results of querying a block by creating two blocks, one with 62 // the postings list cache enabled and one without. It then generates a bunch of queries 63 // and executes them against both blocks, ensuring that both blocks return the exact same 64 // results. It was added as a regression test when we encountered a bug that caused the 65 // postings list cache to cause the block to return incorrect results. 66 // 67 // It also generates term and regexp queries where the field and pattern are the same to 68 // ensure that the postings list cache correctly handles caching the results of these 69 // different types of queries (despite having the same field and "pattern") separately. 70 func TestPostingsListCacheDoesNotAffectBlockQueryResults(t *testing.T) { 71 parameters := gopter.DefaultTestParameters() 72 seed := time.Now().UnixNano() 73 parameters.MinSuccessfulTests = 500 74 parameters.MaxSize = 20 75 parameters.Rng = rand.New(rand.NewSource(seed)) 76 properties := gopter.NewProperties(parameters) 77 78 testMD := newTestNSMetadata(t) 79 blockSize := time.Hour 80 81 now := xtime.Now() 82 blockStart := now.Truncate(blockSize) 83 84 uncachedBlock := newPropTestBlock( 85 t, blockStart, testMD, testOpts.SetPostingsListCache(nil)) 86 87 plCache, err := NewPostingsListCache(1000, PostingsListCacheOptions{ 88 InstrumentOptions: instrument.NewOptions(), 89 }) 90 require.NoError(t, err) 91 defer plCache.Start()() 92 93 cachedOptions := testOpts. 94 SetPostingsListCache(plCache). 95 SetReadThroughSegmentOptions(ReadThroughSegmentOptions{ 96 CacheRegexp: true, 97 CacheTerms: true, 98 }) 99 cachedBlock := newPropTestBlock(t, blockStart, testMD, cachedOptions) 100 properties.Property("Index block with and without postings list cache always return the same results", prop.ForAll( 101 func(q search.Query, identicalTermAndRegexp []search.Query) (bool, error) { 102 queries := []search.Query{ 103 q, 104 identicalTermAndRegexp[0], 105 identicalTermAndRegexp[1], 106 } 107 108 for _, q := range queries { 109 indexQuery := Query{ 110 idx.NewQueryFromSearchQuery(q), 111 } 112 113 queryOpts := QueryOptions{ 114 StartInclusive: blockStart, 115 EndExclusive: blockStart.Add(blockSize), 116 } 117 118 uncachedResults := NewQueryResults(nil, QueryResultsOptions{}, testOpts) 119 ctx := context.NewBackground() 120 queryIter, err := uncachedBlock.QueryIter(ctx, indexQuery) 121 if err != nil { 122 return false, err 123 } 124 require.NoError(t, err) 125 for !queryIter.Done() { 126 err = uncachedBlock.QueryWithIter(ctx, 127 queryOpts, queryIter, uncachedResults, time.Now().Add(time.Millisecond*10), emptyLogFields) 128 if err != nil { 129 return false, fmt.Errorf("error querying uncached block: %w", err) 130 } 131 } 132 133 cachedResults := NewQueryResults(nil, QueryResultsOptions{}, testOpts) 134 ctx = context.NewBackground() 135 queryIter, err = cachedBlock.QueryIter(ctx, indexQuery) 136 if err != nil { 137 return false, err 138 } 139 140 for !queryIter.Done() { 141 err = cachedBlock.QueryWithIter(ctx, queryOpts, queryIter, cachedResults, 142 time.Now().Add(time.Millisecond*10), emptyLogFields) 143 if err != nil { 144 return false, fmt.Errorf("error querying cached block: %w", err) 145 } 146 } 147 148 uncachedMap := uncachedResults.Map() 149 cachedMap := cachedResults.Map() 150 if uncachedMap.Len() != cachedMap.Len() { 151 return false, fmt.Errorf( 152 "uncached map size was: %d, but cached map sized was: %d", 153 uncachedMap.Len(), cachedMap.Len()) 154 } 155 156 for _, entry := range uncachedMap.Iter() { 157 key := entry.Key() 158 _, ok := cachedMap.Get(key) 159 if !ok { 160 return false, fmt.Errorf("cached map did not contain: %v", key) 161 } 162 } 163 } 164 165 return true, nil 166 }, 167 proptest.GenQuery(lotsTestDocuments), 168 proptest.GenIdenticalTermAndRegexpQuery(lotsTestDocuments), 169 )) 170 171 reporter := gopter.NewFormatedReporter(true, 160, os.Stdout) 172 if !properties.Run(reporter) { 173 t.Errorf("failed with initial seed: %d", seed) 174 } 175 } 176 177 func newPropTestBlock(t *testing.T, blockStart xtime.UnixNano, 178 nsMeta namespace.Metadata, opts Options) Block { 179 blk, err := NewBlock(blockStart, nsMeta, BlockOptions{}, 180 namespace.NewRuntimeOptionsManager(nsMeta.ID().String()), opts) 181 require.NoError(t, err) 182 183 var ( 184 memSeg = testSegment(t, lotsTestDocuments...).(segment.MutableSegment) 185 fstSeg = fst.ToTestSegment(t, memSeg, testFstOptions) 186 // Need at least one shard to look fulfilled. 187 fulfilled = result.NewShardTimeRangesFromRange(blockStart, blockStart.Add(testBlockSize), uint32(1)) 188 indexBlockByVolumeType = result.NewIndexBlockByVolumeType(blockStart) 189 ) 190 indexBlockByVolumeType.SetBlock(idxpersist.DefaultIndexVolumeType, result.NewIndexBlock([]result.Segment{result.NewSegment(fstSeg, false)}, fulfilled)) 191 192 // Use the AddResults API because thats the only scenario in which we'll wrap a segment 193 // in a ReadThroughSegment to use the postings list cache. 194 err = blk.AddResults(indexBlockByVolumeType) 195 require.NoError(t, err) 196 return blk 197 } 198 199 type testFields struct { 200 name string 201 values []string 202 } 203 204 func genField() gopter.Gen { 205 return gopter.CombineGens( 206 gen.AlphaString(), 207 gen.SliceOf(gen.AlphaString()), 208 ).Map(func(input []interface{}) testFields { 209 var ( 210 name = input[0].(string) 211 values = input[1].([]string) 212 ) 213 214 return testFields{ 215 name: name, 216 values: values, 217 } 218 }) 219 } 220 221 type propTestSegment struct { 222 metadata doc.Metadata 223 exCount int64 224 exCountAgg int64 225 segmentMap segmentMap 226 } 227 228 type ( 229 testValuesSet map[string]struct{} //nolint:gofumpt 230 segmentMap map[string]testValuesSet //nolint:gofumpt 231 ) 232 233 func genTestSegment() gopter.Gen { 234 return gen.SliceOf(genField()).Map(func(input []testFields) propTestSegment { 235 segMap := make(segmentMap, len(input)) 236 for _, field := range input { //nolint:gocritic 237 for _, value := range field.values { 238 exVals, found := segMap[field.name] 239 if !found { 240 exVals = make(testValuesSet) 241 } 242 exVals[value] = struct{}{} 243 segMap[field.name] = exVals 244 } 245 } 246 247 aggLength := len(segMap) 248 fields := make([]testFields, 0, len(input)) 249 for name, valSet := range segMap { 250 aggLength += len(valSet) 251 vals := make([]string, 0, len(valSet)) 252 for val := range valSet { 253 vals = append(vals, val) 254 } 255 256 sort.Strings(vals) 257 fields = append(fields, testFields{name: name, values: vals}) 258 } 259 260 sort.Slice(fields, func(i, j int) bool { 261 return fields[i].name < fields[j].name 262 }) 263 264 docFields := []doc.Field{} 265 for _, field := range fields { //nolint:gocritic 266 for _, val := range field.values { 267 docFields = append(docFields, doc.Field{ 268 Name: []byte(field.name), 269 Value: []byte(val), 270 }) 271 } 272 } 273 274 return propTestSegment{ 275 metadata: doc.Metadata{Fields: docFields}, 276 exCount: int64(len(segMap)), 277 exCountAgg: int64(aggLength), 278 segmentMap: segMap, 279 } 280 }) 281 } 282 283 func verifyResults( 284 t *testing.T, 285 results AggregateResults, 286 exMap segmentMap, 287 ) { 288 resultMap := make(segmentMap, results.Map().Len()) 289 for _, field := range results.Map().Iter() { //nolint:gocritic 290 name := field.Key().String() 291 _, found := resultMap[name] 292 require.False(t, found, "duplicate values in results map") 293 294 values := make(testValuesSet, field.value.Map().Len()) 295 for _, value := range field.value.Map().Iter() { 296 val := value.Key().String() 297 _, found := values[val] 298 require.False(t, found, "duplicate values in results map") 299 300 values[val] = struct{}{} 301 } 302 303 resultMap[name] = values 304 } 305 306 require.Equal(t, resultMap, exMap) 307 } 308 309 func TestAggregateDocLimits(t *testing.T) { 310 var ( 311 parameters = gopter.DefaultTestParameters() 312 seed = time.Now().UnixNano() 313 reporter = gopter.NewFormatedReporter(true, 160, os.Stdout) 314 ) 315 316 parameters.MinSuccessfulTests = 1000 317 parameters.MinSize = 5 318 parameters.MaxSize = 10 319 parameters.Rng = rand.New(rand.NewSource(seed)) //nolint:gosec 320 properties := gopter.NewProperties(parameters) 321 322 properties.Property("segments dedupe and have correct docs counts", prop.ForAll( 323 func(testSegment propTestSegment) (bool, error) { 324 seg, err := mem.NewSegment(mem.NewOptions()) 325 if err != nil { 326 return false, err 327 } 328 329 _, err = seg.Insert(testSegment.metadata) 330 if err != nil { 331 return false, err 332 } 333 334 err = seg.Seal() 335 if err != nil { 336 return false, err 337 } 338 339 scope := tally.NewTestScope("", nil) 340 iOpts := instrument.NewOptions().SetMetricsScope(scope) 341 limitOpts := limits.NewOptions(). 342 SetInstrumentOptions(iOpts). 343 SetDocsLimitOpts(limits.LookbackLimitOptions{Lookback: time.Minute}). 344 SetBytesReadLimitOpts(limits.LookbackLimitOptions{Lookback: time.Minute}). 345 SetAggregateDocsLimitOpts(limits.LookbackLimitOptions{Lookback: time.Minute}) 346 queryLimits, err := limits.NewQueryLimits(limitOpts) 347 require.NoError(t, err) 348 testOpts = testOpts.SetInstrumentOptions(iOpts).SetQueryLimits(queryLimits) 349 350 testMD := newTestNSMetadata(t) 351 start := xtime.Now().Truncate(time.Hour) 352 blk, err := NewBlock(start, testMD, BlockOptions{}, 353 namespace.NewRuntimeOptionsManager("foo"), testOpts) 354 if err != nil { 355 return false, err 356 } 357 358 b, ok := blk.(*block) 359 if !ok { 360 return false, errors.New("bad block type") 361 } 362 363 b.mutableSegments.foregroundSegments = []*readableSeg{ 364 newReadableSeg(seg, testOpts), 365 } 366 367 results := NewAggregateResults(ident.StringID("ns"), AggregateResultsOptions{ 368 Type: AggregateTagNamesAndValues, 369 }, testOpts) 370 371 ctx := context.NewBackground() 372 defer ctx.BlockingClose() 373 374 aggIter, err := b.AggregateIter(ctx, results.AggregateResultsOptions()) 375 if err != nil { 376 return false, err 377 } 378 for !aggIter.Done() { 379 err = b.AggregateWithIter( 380 ctx, 381 aggIter, 382 QueryOptions{}, 383 results, 384 time.Now().Add(time.Millisecond*10), 385 emptyLogFields) 386 387 if err != nil { 388 return false, err 389 } 390 } 391 verifyResults(t, results, testSegment.segmentMap) 392 snap := scope.Snapshot() 393 tallytest.AssertCounterValue(t, testSegment.exCount, snap, 394 "query-limit.total-docs-matched", map[string]string{"type": "fetch"}) 395 tallytest.AssertCounterValue(t, testSegment.exCountAgg, snap, 396 "query-limit.total-docs-matched", map[string]string{"type": "aggregate"}) 397 return true, nil 398 }, 399 genTestSegment(), 400 )) 401 402 if !properties.Run(reporter) { 403 t.Errorf("failed with initial seed: %d", seed) 404 } 405 }