github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/fields_terms_iterator_test.go (about) 1 // Copyright (c) 2019 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package index 22 23 import ( 24 "bytes" 25 "fmt" 26 "sort" 27 "strings" 28 "testing" 29 30 "github.com/golang/mock/gomock" 31 "github.com/stretchr/testify/require" 32 33 "github.com/m3db/m3/src/m3ninx/doc" 34 "github.com/m3db/m3/src/m3ninx/idx" 35 m3ninxindex "github.com/m3db/m3/src/m3ninx/index" 36 "github.com/m3db/m3/src/m3ninx/index/segment" 37 "github.com/m3db/m3/src/m3ninx/index/segment/fst" 38 "github.com/m3db/m3/src/m3ninx/index/segment/mem" 39 "github.com/m3db/m3/src/m3ninx/postings" 40 "github.com/m3db/m3/src/m3ninx/postings/roaring" 41 "github.com/m3db/m3/src/m3ninx/util" 42 "github.com/m3db/m3/src/x/context" 43 xtest "github.com/m3db/m3/src/x/test" 44 ) 45 46 var ( 47 testFstOptions = fst.NewOptions() 48 lotsTestDocuments = util.MustReadDocs("../../../m3ninx/util/testdata/node_exporter.json", 2000) 49 ) 50 51 func TestFieldsTermsIteratorSimple(t *testing.T) { 52 ctx := context.NewBackground() 53 s := newFieldsTermsIterSetup( 54 pair{"a", "b"}, pair{"a", "c"}, 55 pair{"d", "e"}, pair{"d", "f"}, 56 pair{"g", "h"}, 57 pair{"i", "j"}, 58 pair{"k", "l"}, 59 ) 60 reader, err := s.asSegment(t).Reader() 61 require.NoError(t, err) 62 63 iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{iterateTerms: true}) 64 require.NoError(t, err) 65 s.requireEquals(t, iter) 66 } 67 68 func TestFieldsTermsIteratorSimpleSkip(t *testing.T) { 69 ctx := context.NewBackground() 70 input := []pair{ 71 {"a", "b"}, 72 {"a", "c"}, 73 {"d", "e"}, 74 {"d", "f"}, 75 {"g", "h"}, 76 {"i", "j"}, 77 {"k", "l"}, 78 } 79 s := newFieldsTermsIterSetup(input...) 80 reader, err := s.asSegment(t).Reader() 81 require.NoError(t, err) 82 83 iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{ 84 iterateTerms: true, 85 allowFn: func(f []byte) bool { 86 return !bytes.Equal([]byte("a"), f) && !bytes.Equal([]byte("k"), f) 87 }, 88 }) 89 require.NoError(t, err) 90 slice, err := toSlice(iter) 91 require.NoError(t, err) 92 requireSlicesEqual(t, []pair{ 93 {"d", "e"}, 94 {"d", "f"}, 95 {"g", "h"}, 96 {"i", "j"}, 97 }, slice) 98 } 99 100 func TestFieldsTermsIteratorTermsOnly(t *testing.T) { 101 ctx := context.NewBackground() 102 103 s := newFieldsTermsIterSetup( 104 pair{"a", "b"}, 105 pair{"a", "c"}, 106 pair{"d", "e"}, 107 pair{"d", "f"}, 108 pair{"g", "h"}, 109 pair{"i", "j"}, 110 pair{"k", "l"}, 111 ) 112 reader, err := s.asSegment(t).Reader() 113 require.NoError(t, err) 114 115 iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{}) 116 require.NoError(t, err) 117 slice, err := toSlice(iter) 118 require.NoError(t, err) 119 requireSlicesEqual(t, []pair{ 120 {"a", ""}, 121 {"d", ""}, 122 {"g", ""}, 123 {"i", ""}, 124 {"k", ""}, 125 }, slice) 126 } 127 128 func TestFieldsTermsIteratorEmptyTerm(t *testing.T) { 129 ctx := context.NewBackground() 130 131 ctrl := gomock.NewController(xtest.Reporter{T: t}) 132 defer ctrl.Finish() 133 134 reader := newMockSegmentReader(ctrl, map[string]terms{ 135 "a": {}, 136 }) 137 iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{iterateTerms: false}) 138 require.NoError(t, err) 139 slice, err := toSlice(iter) 140 require.NoError(t, err) 141 requireSlicesEqual(t, []pair{{"a", ""}}, slice) 142 } 143 144 func TestFieldsTermsIteratorRestrictByQueryFields(t *testing.T) { 145 ctx := context.NewBackground() 146 147 ctrl := gomock.NewController(xtest.Reporter{T: t}) 148 defer ctrl.Finish() 149 150 pl0 := roaring.NewPostingsList() 151 require.NoError(t, pl0.Insert(postings.ID(42))) 152 153 pl1 := roaring.NewPostingsList() 154 require.NoError(t, pl1.Insert(postings.ID(1))) 155 156 pl2 := roaring.NewPostingsList() 157 require.NoError(t, pl2.Insert(postings.ID(2))) 158 159 reader := newMockSegmentReader(ctrl, map[string]terms{ 160 "foo": {values: []term{{value: "foo_0"}}, postings: pl0}, 161 "bar": {values: []term{{value: "bar_0"}}, postings: pl1}, 162 "baz": {values: []term{{value: "baz_0"}}, postings: pl2}, 163 }) 164 165 // Simulate term query for "bar": 166 reader.EXPECT().MatchField([]byte("bar")).Return(pl1, nil) 167 168 iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{ 169 iterateTerms: false, 170 restrictByQuery: &Query{ 171 Query: idx.NewFieldQuery([]byte("bar")), 172 }, 173 }) 174 require.NoError(t, err) 175 slice, err := toSlice(iter) 176 require.NoError(t, err) 177 requireSlicesEqual(t, []pair{{"bar", ""}}, slice) 178 } 179 180 func TestFieldsTermsIteratorEmptyTermInclude(t *testing.T) { 181 ctx := context.NewBackground() 182 183 ctrl := gomock.NewController(xtest.Reporter{T: t}) 184 defer ctrl.Finish() 185 186 reader := newMockSegmentReader(ctrl, map[string]terms{ 187 "a": {}, 188 }) 189 iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{iterateTerms: true}) 190 require.NoError(t, err) 191 slice, err := toSlice(iter) 192 require.NoError(t, err) 193 requireSlicesEqual(t, []pair{}, slice) 194 } 195 196 func TestFieldsTermsIteratorIterateTermsAndRestrictByQuery(t *testing.T) { 197 ctx := context.NewBackground() 198 199 testDocs := []doc.Metadata{ 200 { 201 Fields: []doc.Field{ 202 { 203 Name: []byte("fruit"), 204 Value: []byte("banana"), 205 }, 206 { 207 Name: []byte("color"), 208 Value: []byte("yellow"), 209 }, 210 }, 211 }, 212 { 213 Fields: []doc.Field{ 214 { 215 Name: []byte("fruit"), 216 Value: []byte("apple"), 217 }, 218 { 219 Name: []byte("color"), 220 Value: []byte("red"), 221 }, 222 }, 223 }, 224 { 225 Fields: []doc.Field{ 226 { 227 Name: []byte("fruit"), 228 Value: []byte("pineapple"), 229 }, 230 { 231 Name: []byte("color"), 232 Value: []byte("yellow"), 233 }, 234 }, 235 }, 236 } 237 238 seg, err := mem.NewSegment(mem.NewOptions()) 239 require.NoError(t, err) 240 241 require.NoError(t, seg.InsertBatch(m3ninxindex.Batch{ 242 Docs: testDocs, 243 AllowPartialUpdates: true, 244 })) 245 246 require.NoError(t, seg.Seal()) 247 248 fruitRegexp, err := idx.NewRegexpQuery([]byte("fruit"), []byte("^.*apple$")) 249 require.NoError(t, err) 250 251 colorRegexp, err := idx.NewRegexpQuery([]byte("color"), []byte("^(red|yellow)$")) 252 require.NoError(t, err) 253 254 reader, err := seg.Reader() 255 require.NoError(t, err) 256 257 iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{ 258 iterateTerms: true, 259 restrictByQuery: &Query{ 260 Query: idx.NewConjunctionQuery(fruitRegexp, colorRegexp), 261 }, 262 }) 263 require.NoError(t, err) 264 slice, err := toSlice(iter) 265 require.NoError(t, err) 266 requireSlicesEqual(t, []pair{ 267 {"color", "red"}, 268 {"color", "yellow"}, 269 {"fruit", "apple"}, 270 {"fruit", "pineapple"}, 271 }, slice) 272 } 273 274 type terms struct { 275 values []term 276 postings postings.List 277 } 278 279 type term struct { 280 value string 281 postings postings.List 282 } 283 284 func newMockSegmentReader(ctrl *gomock.Controller, termValues map[string]terms) *segment.MockReader { 285 fields := make([]iterpoint, 0, len(termValues)) 286 for field := range termValues { 287 fields = append(fields, iterpoint{ 288 value: field, 289 postings: termValues[field].postings, 290 }) 291 } 292 sort.Slice(fields, func(i, j int) bool { 293 return strings.Compare(fields[i].value, fields[j].value) < 0 294 }) 295 296 r := segment.NewMockReader(ctrl) 297 fieldsPostingsListIterator := &stubFieldsPostingsListIterator{points: fields} 298 299 r.EXPECT().FieldsPostingsList().Return(fieldsPostingsListIterator, nil).AnyTimes() 300 301 for _, f := range fields { 302 termValues := termValues[f.value].values 303 sort.Slice(termValues, func(i, j int) bool { 304 return termValues[i].value < termValues[j].value 305 }) 306 terms := make([]iterpoint, 0, len(termValues)) 307 for _, t := range termValues { 308 terms = append(terms, iterpoint{ 309 value: t.value, 310 postings: t.postings, 311 }) 312 } 313 termIterator := &stubTermIterator{points: terms} 314 r.EXPECT().Terms([]byte(f.value)).Return(termIterator, nil).AnyTimes() 315 } 316 317 return r 318 } 319 320 type stubFieldsPostingsListIterator struct { 321 current iterpoint 322 points []iterpoint 323 } 324 325 func (s *stubFieldsPostingsListIterator) Next() bool { 326 if len(s.points) == 0 { 327 return false 328 } 329 s.current = s.points[0] 330 s.points = s.points[1:] 331 return true 332 } 333 334 func (s *stubFieldsPostingsListIterator) Current() ([]byte, postings.List) { 335 return []byte(s.current.value), s.current.postings 336 } 337 338 func (s *stubFieldsPostingsListIterator) Err() error { 339 return s.current.err 340 } 341 342 func (s *stubFieldsPostingsListIterator) Close() error { 343 if s.current.err != nil { 344 return s.current.err 345 } 346 for s.Next() { 347 if err := s.Err(); err != nil { 348 return err 349 } 350 } 351 return nil 352 } 353 354 type stubTermIterator struct { 355 current iterpoint 356 points []iterpoint 357 } 358 359 func (s *stubTermIterator) Empty() bool { 360 return len(s.points) == 0 361 } 362 363 func (s *stubTermIterator) Next() bool { 364 if len(s.points) == 0 { 365 return false 366 } 367 s.current = s.points[0] 368 s.points = s.points[1:] 369 return true 370 } 371 372 func (s *stubTermIterator) Current() ([]byte, postings.List) { 373 return []byte(s.current.value), s.current.postings 374 } 375 376 func (s *stubTermIterator) Err() error { 377 return s.current.err 378 } 379 380 func (s *stubTermIterator) Close() error { 381 if s.current.err != nil { 382 return s.current.err 383 } 384 for s.Next() { 385 if err := s.Err(); err != nil { 386 return err 387 } 388 } 389 return nil 390 } 391 392 type stubFieldIterator struct { 393 current iterpoint 394 points []iterpoint 395 } 396 397 func (s *stubFieldIterator) Next() bool { 398 if len(s.points) == 0 { 399 return false 400 } 401 s.current = s.points[0] 402 s.points = s.points[1:] 403 return true 404 } 405 406 func (s *stubFieldIterator) Current() []byte { 407 return []byte(s.current.value) 408 } 409 410 func (s *stubFieldIterator) Err() error { 411 return s.current.err 412 } 413 414 func (s *stubFieldIterator) Close() error { 415 if s.current.err != nil { 416 return s.current.err 417 } 418 for s.Next() { 419 if err := s.Err(); err != nil { 420 return err 421 } 422 } 423 return nil 424 } 425 426 type iterpoint struct { 427 err error 428 value string 429 postings postings.List 430 } 431 432 type pair struct { 433 Name, Value string 434 } 435 436 func newFieldsTermsIterSetup(fields ...pair) fieldsTermsIterSetup { 437 sort.Slice(fields, func(i, j int) bool { 438 c := strings.Compare(fields[i].Name, fields[j].Name) 439 if c == 0 { 440 return strings.Compare(fields[i].Value, fields[j].Value) < 0 441 } 442 return c < 0 443 }) 444 return fieldsTermsIterSetup{fields} 445 } 446 447 type fieldsTermsIterSetup struct { 448 fields []pair 449 } 450 451 func (s *fieldsTermsIterSetup) asSegment(t *testing.T) segment.Segment { 452 docs := make([]doc.Metadata, 0, len(s.fields)) 453 for _, f := range s.fields { 454 docs = append(docs, doc.Metadata{ 455 ID: []byte(fmt.Sprintf("id_%v_%v", f.Name, f.Value)), 456 Fields: []doc.Field{ 457 { 458 Name: []byte(f.Name), 459 Value: []byte(f.Value), 460 }, 461 }, 462 }) 463 } 464 memSeg := testSegment(t, docs...).(segment.MutableSegment) 465 return fst.ToTestSegment(t, memSeg, testFstOptions) 466 } 467 468 func (s *fieldsTermsIterSetup) requireEquals(t *testing.T, iter fieldsAndTermsIterator) { 469 pending := s.fields 470 for len(pending) > 0 { 471 require.True(t, iter.Next()) 472 name, value := iter.Current() 473 if bytes.Equal(name, doc.IDReservedFieldName) { 474 continue 475 } 476 top := pending[0] 477 pending = pending[1:] 478 require.Equal(t, top.Name, string(name)) 479 require.Equal(t, top.Value, string(value)) 480 } 481 require.False(t, iter.Next()) 482 require.NoError(t, iter.Err()) 483 require.NoError(t, iter.Close()) 484 } 485 486 func toSlice(iter fieldsAndTermsIterator) ([]pair, error) { 487 var pairs []pair 488 for iter.Next() { 489 n, v := iter.Current() 490 if bytes.Equal(n, doc.IDReservedFieldName) { 491 continue 492 } 493 pairs = append(pairs, pair{ 494 Name: string(n), 495 Value: string(v), 496 }) 497 } 498 return pairs, iter.Err() 499 } 500 501 func requireSlicesEqual(t *testing.T, a, b []pair) { 502 require.Equal(t, len(a), len(b)) 503 for i := 0; i < len(a); i++ { 504 require.Equal(t, a[i], b[i]) 505 } 506 }