github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/storage/index/fields_terms_iterator_test.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package index
    22  
    23  import (
    24  	"bytes"
    25  	"fmt"
    26  	"sort"
    27  	"strings"
    28  	"testing"
    29  
    30  	"github.com/golang/mock/gomock"
    31  	"github.com/stretchr/testify/require"
    32  
    33  	"github.com/m3db/m3/src/m3ninx/doc"
    34  	"github.com/m3db/m3/src/m3ninx/idx"
    35  	m3ninxindex "github.com/m3db/m3/src/m3ninx/index"
    36  	"github.com/m3db/m3/src/m3ninx/index/segment"
    37  	"github.com/m3db/m3/src/m3ninx/index/segment/fst"
    38  	"github.com/m3db/m3/src/m3ninx/index/segment/mem"
    39  	"github.com/m3db/m3/src/m3ninx/postings"
    40  	"github.com/m3db/m3/src/m3ninx/postings/roaring"
    41  	"github.com/m3db/m3/src/m3ninx/util"
    42  	"github.com/m3db/m3/src/x/context"
    43  	xtest "github.com/m3db/m3/src/x/test"
    44  )
    45  
    46  var (
    47  	testFstOptions    = fst.NewOptions()
    48  	lotsTestDocuments = util.MustReadDocs("../../../m3ninx/util/testdata/node_exporter.json", 2000)
    49  )
    50  
    51  func TestFieldsTermsIteratorSimple(t *testing.T) {
    52  	ctx := context.NewBackground()
    53  	s := newFieldsTermsIterSetup(
    54  		pair{"a", "b"}, pair{"a", "c"},
    55  		pair{"d", "e"}, pair{"d", "f"},
    56  		pair{"g", "h"},
    57  		pair{"i", "j"},
    58  		pair{"k", "l"},
    59  	)
    60  	reader, err := s.asSegment(t).Reader()
    61  	require.NoError(t, err)
    62  
    63  	iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{iterateTerms: true})
    64  	require.NoError(t, err)
    65  	s.requireEquals(t, iter)
    66  }
    67  
    68  func TestFieldsTermsIteratorSimpleSkip(t *testing.T) {
    69  	ctx := context.NewBackground()
    70  	input := []pair{
    71  		{"a", "b"},
    72  		{"a", "c"},
    73  		{"d", "e"},
    74  		{"d", "f"},
    75  		{"g", "h"},
    76  		{"i", "j"},
    77  		{"k", "l"},
    78  	}
    79  	s := newFieldsTermsIterSetup(input...)
    80  	reader, err := s.asSegment(t).Reader()
    81  	require.NoError(t, err)
    82  
    83  	iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{
    84  		iterateTerms: true,
    85  		allowFn: func(f []byte) bool {
    86  			return !bytes.Equal([]byte("a"), f) && !bytes.Equal([]byte("k"), f)
    87  		},
    88  	})
    89  	require.NoError(t, err)
    90  	slice, err := toSlice(iter)
    91  	require.NoError(t, err)
    92  	requireSlicesEqual(t, []pair{
    93  		{"d", "e"},
    94  		{"d", "f"},
    95  		{"g", "h"},
    96  		{"i", "j"},
    97  	}, slice)
    98  }
    99  
   100  func TestFieldsTermsIteratorTermsOnly(t *testing.T) {
   101  	ctx := context.NewBackground()
   102  
   103  	s := newFieldsTermsIterSetup(
   104  		pair{"a", "b"},
   105  		pair{"a", "c"},
   106  		pair{"d", "e"},
   107  		pair{"d", "f"},
   108  		pair{"g", "h"},
   109  		pair{"i", "j"},
   110  		pair{"k", "l"},
   111  	)
   112  	reader, err := s.asSegment(t).Reader()
   113  	require.NoError(t, err)
   114  
   115  	iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{})
   116  	require.NoError(t, err)
   117  	slice, err := toSlice(iter)
   118  	require.NoError(t, err)
   119  	requireSlicesEqual(t, []pair{
   120  		{"a", ""},
   121  		{"d", ""},
   122  		{"g", ""},
   123  		{"i", ""},
   124  		{"k", ""},
   125  	}, slice)
   126  }
   127  
   128  func TestFieldsTermsIteratorEmptyTerm(t *testing.T) {
   129  	ctx := context.NewBackground()
   130  
   131  	ctrl := gomock.NewController(xtest.Reporter{T: t})
   132  	defer ctrl.Finish()
   133  
   134  	reader := newMockSegmentReader(ctrl, map[string]terms{
   135  		"a": {},
   136  	})
   137  	iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{iterateTerms: false})
   138  	require.NoError(t, err)
   139  	slice, err := toSlice(iter)
   140  	require.NoError(t, err)
   141  	requireSlicesEqual(t, []pair{{"a", ""}}, slice)
   142  }
   143  
   144  func TestFieldsTermsIteratorRestrictByQueryFields(t *testing.T) {
   145  	ctx := context.NewBackground()
   146  
   147  	ctrl := gomock.NewController(xtest.Reporter{T: t})
   148  	defer ctrl.Finish()
   149  
   150  	pl0 := roaring.NewPostingsList()
   151  	require.NoError(t, pl0.Insert(postings.ID(42)))
   152  
   153  	pl1 := roaring.NewPostingsList()
   154  	require.NoError(t, pl1.Insert(postings.ID(1)))
   155  
   156  	pl2 := roaring.NewPostingsList()
   157  	require.NoError(t, pl2.Insert(postings.ID(2)))
   158  
   159  	reader := newMockSegmentReader(ctrl, map[string]terms{
   160  		"foo": {values: []term{{value: "foo_0"}}, postings: pl0},
   161  		"bar": {values: []term{{value: "bar_0"}}, postings: pl1},
   162  		"baz": {values: []term{{value: "baz_0"}}, postings: pl2},
   163  	})
   164  
   165  	// Simulate term query for "bar":
   166  	reader.EXPECT().MatchField([]byte("bar")).Return(pl1, nil)
   167  
   168  	iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{
   169  		iterateTerms: false,
   170  		restrictByQuery: &Query{
   171  			Query: idx.NewFieldQuery([]byte("bar")),
   172  		},
   173  	})
   174  	require.NoError(t, err)
   175  	slice, err := toSlice(iter)
   176  	require.NoError(t, err)
   177  	requireSlicesEqual(t, []pair{{"bar", ""}}, slice)
   178  }
   179  
   180  func TestFieldsTermsIteratorEmptyTermInclude(t *testing.T) {
   181  	ctx := context.NewBackground()
   182  
   183  	ctrl := gomock.NewController(xtest.Reporter{T: t})
   184  	defer ctrl.Finish()
   185  
   186  	reader := newMockSegmentReader(ctrl, map[string]terms{
   187  		"a": {},
   188  	})
   189  	iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{iterateTerms: true})
   190  	require.NoError(t, err)
   191  	slice, err := toSlice(iter)
   192  	require.NoError(t, err)
   193  	requireSlicesEqual(t, []pair{}, slice)
   194  }
   195  
   196  func TestFieldsTermsIteratorIterateTermsAndRestrictByQuery(t *testing.T) {
   197  	ctx := context.NewBackground()
   198  
   199  	testDocs := []doc.Metadata{
   200  		{
   201  			Fields: []doc.Field{
   202  				{
   203  					Name:  []byte("fruit"),
   204  					Value: []byte("banana"),
   205  				},
   206  				{
   207  					Name:  []byte("color"),
   208  					Value: []byte("yellow"),
   209  				},
   210  			},
   211  		},
   212  		{
   213  			Fields: []doc.Field{
   214  				{
   215  					Name:  []byte("fruit"),
   216  					Value: []byte("apple"),
   217  				},
   218  				{
   219  					Name:  []byte("color"),
   220  					Value: []byte("red"),
   221  				},
   222  			},
   223  		},
   224  		{
   225  			Fields: []doc.Field{
   226  				{
   227  					Name:  []byte("fruit"),
   228  					Value: []byte("pineapple"),
   229  				},
   230  				{
   231  					Name:  []byte("color"),
   232  					Value: []byte("yellow"),
   233  				},
   234  			},
   235  		},
   236  	}
   237  
   238  	seg, err := mem.NewSegment(mem.NewOptions())
   239  	require.NoError(t, err)
   240  
   241  	require.NoError(t, seg.InsertBatch(m3ninxindex.Batch{
   242  		Docs:                testDocs,
   243  		AllowPartialUpdates: true,
   244  	}))
   245  
   246  	require.NoError(t, seg.Seal())
   247  
   248  	fruitRegexp, err := idx.NewRegexpQuery([]byte("fruit"), []byte("^.*apple$"))
   249  	require.NoError(t, err)
   250  
   251  	colorRegexp, err := idx.NewRegexpQuery([]byte("color"), []byte("^(red|yellow)$"))
   252  	require.NoError(t, err)
   253  
   254  	reader, err := seg.Reader()
   255  	require.NoError(t, err)
   256  
   257  	iter, err := newFieldsAndTermsIterator(ctx, reader, fieldsAndTermsIteratorOpts{
   258  		iterateTerms: true,
   259  		restrictByQuery: &Query{
   260  			Query: idx.NewConjunctionQuery(fruitRegexp, colorRegexp),
   261  		},
   262  	})
   263  	require.NoError(t, err)
   264  	slice, err := toSlice(iter)
   265  	require.NoError(t, err)
   266  	requireSlicesEqual(t, []pair{
   267  		{"color", "red"},
   268  		{"color", "yellow"},
   269  		{"fruit", "apple"},
   270  		{"fruit", "pineapple"},
   271  	}, slice)
   272  }
   273  
   274  type terms struct {
   275  	values   []term
   276  	postings postings.List
   277  }
   278  
   279  type term struct {
   280  	value    string
   281  	postings postings.List
   282  }
   283  
   284  func newMockSegmentReader(ctrl *gomock.Controller, termValues map[string]terms) *segment.MockReader {
   285  	fields := make([]iterpoint, 0, len(termValues))
   286  	for field := range termValues {
   287  		fields = append(fields, iterpoint{
   288  			value:    field,
   289  			postings: termValues[field].postings,
   290  		})
   291  	}
   292  	sort.Slice(fields, func(i, j int) bool {
   293  		return strings.Compare(fields[i].value, fields[j].value) < 0
   294  	})
   295  
   296  	r := segment.NewMockReader(ctrl)
   297  	fieldsPostingsListIterator := &stubFieldsPostingsListIterator{points: fields}
   298  
   299  	r.EXPECT().FieldsPostingsList().Return(fieldsPostingsListIterator, nil).AnyTimes()
   300  
   301  	for _, f := range fields {
   302  		termValues := termValues[f.value].values
   303  		sort.Slice(termValues, func(i, j int) bool {
   304  			return termValues[i].value < termValues[j].value
   305  		})
   306  		terms := make([]iterpoint, 0, len(termValues))
   307  		for _, t := range termValues {
   308  			terms = append(terms, iterpoint{
   309  				value:    t.value,
   310  				postings: t.postings,
   311  			})
   312  		}
   313  		termIterator := &stubTermIterator{points: terms}
   314  		r.EXPECT().Terms([]byte(f.value)).Return(termIterator, nil).AnyTimes()
   315  	}
   316  
   317  	return r
   318  }
   319  
   320  type stubFieldsPostingsListIterator struct {
   321  	current iterpoint
   322  	points  []iterpoint
   323  }
   324  
   325  func (s *stubFieldsPostingsListIterator) Next() bool {
   326  	if len(s.points) == 0 {
   327  		return false
   328  	}
   329  	s.current = s.points[0]
   330  	s.points = s.points[1:]
   331  	return true
   332  }
   333  
   334  func (s *stubFieldsPostingsListIterator) Current() ([]byte, postings.List) {
   335  	return []byte(s.current.value), s.current.postings
   336  }
   337  
   338  func (s *stubFieldsPostingsListIterator) Err() error {
   339  	return s.current.err
   340  }
   341  
   342  func (s *stubFieldsPostingsListIterator) Close() error {
   343  	if s.current.err != nil {
   344  		return s.current.err
   345  	}
   346  	for s.Next() {
   347  		if err := s.Err(); err != nil {
   348  			return err
   349  		}
   350  	}
   351  	return nil
   352  }
   353  
   354  type stubTermIterator struct {
   355  	current iterpoint
   356  	points  []iterpoint
   357  }
   358  
   359  func (s *stubTermIterator) Empty() bool {
   360  	return len(s.points) == 0
   361  }
   362  
   363  func (s *stubTermIterator) Next() bool {
   364  	if len(s.points) == 0 {
   365  		return false
   366  	}
   367  	s.current = s.points[0]
   368  	s.points = s.points[1:]
   369  	return true
   370  }
   371  
   372  func (s *stubTermIterator) Current() ([]byte, postings.List) {
   373  	return []byte(s.current.value), s.current.postings
   374  }
   375  
   376  func (s *stubTermIterator) Err() error {
   377  	return s.current.err
   378  }
   379  
   380  func (s *stubTermIterator) Close() error {
   381  	if s.current.err != nil {
   382  		return s.current.err
   383  	}
   384  	for s.Next() {
   385  		if err := s.Err(); err != nil {
   386  			return err
   387  		}
   388  	}
   389  	return nil
   390  }
   391  
   392  type stubFieldIterator struct {
   393  	current iterpoint
   394  	points  []iterpoint
   395  }
   396  
   397  func (s *stubFieldIterator) Next() bool {
   398  	if len(s.points) == 0 {
   399  		return false
   400  	}
   401  	s.current = s.points[0]
   402  	s.points = s.points[1:]
   403  	return true
   404  }
   405  
   406  func (s *stubFieldIterator) Current() []byte {
   407  	return []byte(s.current.value)
   408  }
   409  
   410  func (s *stubFieldIterator) Err() error {
   411  	return s.current.err
   412  }
   413  
   414  func (s *stubFieldIterator) Close() error {
   415  	if s.current.err != nil {
   416  		return s.current.err
   417  	}
   418  	for s.Next() {
   419  		if err := s.Err(); err != nil {
   420  			return err
   421  		}
   422  	}
   423  	return nil
   424  }
   425  
   426  type iterpoint struct {
   427  	err      error
   428  	value    string
   429  	postings postings.List
   430  }
   431  
   432  type pair struct {
   433  	Name, Value string
   434  }
   435  
   436  func newFieldsTermsIterSetup(fields ...pair) fieldsTermsIterSetup {
   437  	sort.Slice(fields, func(i, j int) bool {
   438  		c := strings.Compare(fields[i].Name, fields[j].Name)
   439  		if c == 0 {
   440  			return strings.Compare(fields[i].Value, fields[j].Value) < 0
   441  		}
   442  		return c < 0
   443  	})
   444  	return fieldsTermsIterSetup{fields}
   445  }
   446  
   447  type fieldsTermsIterSetup struct {
   448  	fields []pair
   449  }
   450  
   451  func (s *fieldsTermsIterSetup) asSegment(t *testing.T) segment.Segment {
   452  	docs := make([]doc.Metadata, 0, len(s.fields))
   453  	for _, f := range s.fields {
   454  		docs = append(docs, doc.Metadata{
   455  			ID: []byte(fmt.Sprintf("id_%v_%v", f.Name, f.Value)),
   456  			Fields: []doc.Field{
   457  				{
   458  					Name:  []byte(f.Name),
   459  					Value: []byte(f.Value),
   460  				},
   461  			},
   462  		})
   463  	}
   464  	memSeg := testSegment(t, docs...).(segment.MutableSegment)
   465  	return fst.ToTestSegment(t, memSeg, testFstOptions)
   466  }
   467  
   468  func (s *fieldsTermsIterSetup) requireEquals(t *testing.T, iter fieldsAndTermsIterator) {
   469  	pending := s.fields
   470  	for len(pending) > 0 {
   471  		require.True(t, iter.Next())
   472  		name, value := iter.Current()
   473  		if bytes.Equal(name, doc.IDReservedFieldName) {
   474  			continue
   475  		}
   476  		top := pending[0]
   477  		pending = pending[1:]
   478  		require.Equal(t, top.Name, string(name))
   479  		require.Equal(t, top.Value, string(value))
   480  	}
   481  	require.False(t, iter.Next())
   482  	require.NoError(t, iter.Err())
   483  	require.NoError(t, iter.Close())
   484  }
   485  
   486  func toSlice(iter fieldsAndTermsIterator) ([]pair, error) {
   487  	var pairs []pair
   488  	for iter.Next() {
   489  		n, v := iter.Current()
   490  		if bytes.Equal(n, doc.IDReservedFieldName) {
   491  			continue
   492  		}
   493  		pairs = append(pairs, pair{
   494  			Name:  string(n),
   495  			Value: string(v),
   496  		})
   497  	}
   498  	return pairs, iter.Err()
   499  }
   500  
   501  func requireSlicesEqual(t *testing.T, a, b []pair) {
   502  	require.Equal(t, len(a), len(b))
   503  	for i := 0; i < len(a); i++ {
   504  		require.Equal(t, a[i], b[i])
   505  	}
   506  }