github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/mem/terms_dict_test.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package mem
    22  
    23  import (
    24  	"fmt"
    25  	"reflect"
    26  	re "regexp"
    27  	"testing"
    28  
    29  	"github.com/m3db/m3/src/m3ninx/doc"
    30  	"github.com/m3db/m3/src/m3ninx/postings"
    31  
    32  	"github.com/leanovate/gopter"
    33  	"github.com/leanovate/gopter/gen"
    34  	"github.com/leanovate/gopter/prop"
    35  	"github.com/stretchr/testify/suite"
    36  )
    37  
    38  var (
    39  	testRandomSeed         int64 = 42
    40  	testMinSuccessfulTests       = 1000
    41  
    42  	sampleRegexps = []interface{}{
    43  		`a`,
    44  		`a.`,
    45  		`a.b`,
    46  		`ab`,
    47  		`a.b.c`,
    48  		`abc`,
    49  		`a|^`,
    50  		`a|b`,
    51  		`(a)`,
    52  		`(a)|b`,
    53  		`a*`,
    54  		`a+`,
    55  		`a?`,
    56  		`a{2}`,
    57  		`a{2,3}`,
    58  		`a{2,}`,
    59  		`a*?`,
    60  		`a+?`,
    61  		`a??`,
    62  		`a{2}?`,
    63  		`a{2,3}?`,
    64  		`a{2,}?`,
    65  	}
    66  )
    67  
    68  type newTermsDictFn func() *termsDict
    69  
    70  type termsDictionaryTestSuite struct {
    71  	suite.Suite
    72  
    73  	fn        newTermsDictFn
    74  	termsDict *termsDict
    75  }
    76  
    77  func (t *termsDictionaryTestSuite) SetupTest() {
    78  	t.termsDict = t.fn()
    79  }
    80  
    81  func (t *termsDictionaryTestSuite) TestInsert() {
    82  	props := getProperties()
    83  	props.Property(
    84  		"The dictionary should support inserting fields",
    85  		prop.ForAll(
    86  			func(f doc.Field, id postings.ID) (bool, error) {
    87  				t.termsDict.Insert(f, id)
    88  				return true, nil
    89  			},
    90  			genField(),
    91  			genDocID(),
    92  		))
    93  
    94  	props.TestingRun(t.T())
    95  }
    96  
    97  func (t *termsDictionaryTestSuite) TestIterateFields() {
    98  	props := getProperties()
    99  	props.Property(
   100  		"The dictionary should support iterating over known fields",
   101  		prop.ForAll(
   102  			func(genFields []doc.Field, id postings.ID) (bool, error) {
   103  				expectedFields := make(map[string]struct{}, len(genFields))
   104  				for _, f := range genFields {
   105  					t.termsDict.Insert(f, id)
   106  					expectedFields[string(f.Name)] = struct{}{}
   107  				}
   108  				fieldsIter := t.termsDict.Fields()
   109  				fields := toSlice(t.T(), fieldsIter)
   110  				for _, field := range fields {
   111  					delete(expectedFields, string(field))
   112  				}
   113  				return len(expectedFields) == 0, nil
   114  			},
   115  			gen.SliceOf(genField()),
   116  			genDocID(),
   117  		))
   118  	props.TestingRun(t.T())
   119  }
   120  
   121  func (t *termsDictionaryTestSuite) TestIterateTerms() {
   122  	props := getProperties()
   123  	props.Property(
   124  		"The dictionary should support iterating over known terms",
   125  		prop.ForAll(
   126  			func(genFields []doc.Field, id postings.ID) bool {
   127  				// build map from fieldName -> fieldValue of all generated inputs, and insert into terms dict
   128  				expectedFields := make(map[string]map[string]struct{}, len(genFields))
   129  				for _, f := range genFields {
   130  					t.termsDict.Insert(f, id)
   131  					fName, fValue := string(f.Name), string(f.Value)
   132  					vals, ok := expectedFields[fName]
   133  					if !ok {
   134  						vals = make(map[string]struct{})
   135  						expectedFields[fName] = vals
   136  					}
   137  					vals[fValue] = struct{}{}
   138  				}
   139  				// for each expected combination of fieldName -> []fieldValues, ensure all are present
   140  				for name, expectedValues := range expectedFields {
   141  					valuesIter := t.termsDict.Terms([]byte(name))
   142  					values := toTermPostings(t.T(), valuesIter)
   143  					for val := range values {
   144  						delete(expectedValues, val)
   145  					}
   146  					if len(expectedValues) != 0 {
   147  						return false
   148  					}
   149  				}
   150  				return true
   151  			},
   152  			gen.SliceOf(genField()),
   153  			genDocID(),
   154  		))
   155  	props.TestingRun(t.T())
   156  }
   157  
   158  func (t *termsDictionaryTestSuite) TestContainsTerm() {
   159  	props := getProperties()
   160  	props.Property(
   161  		"The dictionary should support term lookups",
   162  		prop.ForAll(
   163  			func(f doc.Field, id postings.ID) (bool, error) {
   164  				t.termsDict.Insert(f, id)
   165  
   166  				if ok := t.termsDict.ContainsTerm(f.Name, []byte(f.Value)); !ok {
   167  					return false, fmt.Errorf("id of new document '%v' is not in postings list of matching documents", id)
   168  				}
   169  
   170  				return true, nil
   171  			},
   172  			genField(),
   173  			genDocID(),
   174  		))
   175  
   176  	props.TestingRun(t.T())
   177  }
   178  
   179  func (t *termsDictionaryTestSuite) TestContainsField() {
   180  	props := getProperties()
   181  	props.Property(
   182  		"The dictionary should support field lookups",
   183  		prop.ForAll(
   184  			func(f doc.Field, id postings.ID) (bool, error) {
   185  				t.termsDict.Insert(f, id)
   186  
   187  				if ok := t.termsDict.ContainsField(f.Name); !ok {
   188  					return false, fmt.Errorf("id of new document '%v' is not in postings list of matching documents", id)
   189  				}
   190  
   191  				return true, nil
   192  			},
   193  			genField(),
   194  			genDocID(),
   195  		))
   196  
   197  	props.TestingRun(t.T())
   198  }
   199  
   200  func (t *termsDictionaryTestSuite) TestMatchTerm() {
   201  	props := getProperties()
   202  	props.Property(
   203  		"The dictionary should support exact match queries",
   204  		prop.ForAll(
   205  			func(f doc.Field, id postings.ID) (bool, error) {
   206  				t.termsDict.Insert(f, id)
   207  
   208  				pl := t.termsDict.MatchTerm(f.Name, []byte(f.Value))
   209  				if pl == nil {
   210  					return false, fmt.Errorf("postings list of documents matching query should not be nil")
   211  				}
   212  				if !pl.Contains(id) {
   213  					return false, fmt.Errorf("id of new document '%v' is not in postings list of matching documents", id)
   214  				}
   215  
   216  				return true, nil
   217  			},
   218  			genField(),
   219  			genDocID(),
   220  		))
   221  
   222  	props.TestingRun(t.T())
   223  }
   224  
   225  func (t *termsDictionaryTestSuite) TestMatchTermNoResults() {
   226  	props := getProperties()
   227  	props.Property(
   228  		"Exact match queries which return no results are valid",
   229  		prop.ForAll(
   230  			func(f doc.Field) (bool, error) {
   231  				pl := t.termsDict.MatchTerm(f.Name, []byte(f.Value))
   232  				if pl == nil {
   233  					return false, fmt.Errorf("postings list returned should not be nil")
   234  				}
   235  				if pl.Len() != 0 {
   236  					return false, fmt.Errorf("postings list contains unexpected IDs")
   237  				}
   238  
   239  				return true, nil
   240  			},
   241  			genField(),
   242  		))
   243  
   244  	props.TestingRun(t.T())
   245  }
   246  
   247  func (t *termsDictionaryTestSuite) TestMatchRegex() {
   248  	props := getProperties()
   249  	props.Property(
   250  		"The dictionary should support regular expression queries",
   251  		prop.ForAll(
   252  			func(input fieldAndRegexp, id postings.ID) (bool, error) {
   253  				var (
   254  					f        = input.field
   255  					compiled = input.compiled
   256  				)
   257  
   258  				t.termsDict.Insert(f, id)
   259  
   260  				pl := t.termsDict.MatchRegexp(f.Name, compiled)
   261  				if pl == nil {
   262  					return false, fmt.Errorf("postings list of documents matching query should not be nil")
   263  				}
   264  				if !pl.Contains(id) {
   265  					return false, fmt.Errorf("id of new document '%v' is not in list of matching documents", id)
   266  				}
   267  
   268  				return true, nil
   269  			},
   270  			genFieldAndRegex(),
   271  			genDocID(),
   272  		))
   273  
   274  	props.TestingRun(t.T())
   275  }
   276  
   277  func (t *termsDictionaryTestSuite) TestMatchRegexNoResults() {
   278  	props := getProperties()
   279  	props.Property(
   280  		"Regular expression queries which no results are valid",
   281  		prop.ForAll(
   282  			func(input fieldAndRegexp, id postings.ID) (bool, error) {
   283  				var (
   284  					f        = input.field
   285  					compiled = input.compiled
   286  				)
   287  				pl := t.termsDict.MatchRegexp(f.Name, compiled)
   288  				if pl == nil {
   289  					return false, fmt.Errorf("postings list returned should not be nil")
   290  				}
   291  				if pl.Len() != 0 {
   292  					return false, fmt.Errorf("postings list contains unexpected IDs")
   293  				}
   294  
   295  				return true, nil
   296  			},
   297  			genFieldAndRegex(),
   298  			genDocID(),
   299  		))
   300  
   301  	props.TestingRun(t.T())
   302  }
   303  
   304  func TestTermsDictionary(t *testing.T) {
   305  	opts := NewOptions()
   306  	suite.Run(t, &termsDictionaryTestSuite{
   307  		fn: func() *termsDict {
   308  			return newTermsDict(opts).(*termsDict)
   309  		},
   310  	})
   311  }
   312  
   313  func getProperties() *gopter.Properties {
   314  	params := gopter.DefaultTestParameters()
   315  	params.MaxSize = 10
   316  	params.Rng.Seed(testRandomSeed)
   317  	params.MinSuccessfulTests = testMinSuccessfulTests
   318  	return gopter.NewProperties(params)
   319  }
   320  
   321  func genField() gopter.Gen {
   322  	return gopter.CombineGens(
   323  		gen.AnyString(),
   324  		gen.AnyString(),
   325  	).Map(func(values []interface{}) doc.Field {
   326  		var (
   327  			name  = values[0].(string)
   328  			value = values[1].(string)
   329  		)
   330  		f := doc.Field{
   331  			Name:  []byte(name),
   332  			Value: []byte(value),
   333  		}
   334  		return f
   335  	})
   336  }
   337  
   338  func genDocID() gopter.Gen {
   339  	return gen.UInt32().
   340  		Map(func(value uint32) postings.ID {
   341  			return postings.ID(value)
   342  		})
   343  }
   344  
   345  type fieldAndRegexp struct {
   346  	field    doc.Field
   347  	regexp   string
   348  	compiled *re.Regexp
   349  }
   350  
   351  func genFieldAndRegex() gopter.Gen {
   352  	return gen.OneConstOf(sampleRegexps...).
   353  		FlatMap(func(value interface{}) gopter.Gen {
   354  			regex := value.(string)
   355  			return fieldFromRegexp(regex)
   356  		}, reflect.TypeOf(fieldAndRegexp{}))
   357  }
   358  
   359  func fieldFromRegexp(regexp string) gopter.Gen {
   360  	return gopter.CombineGens(
   361  		gen.AnyString(),
   362  		gen.RegexMatch(regexp),
   363  	).Map(func(values []interface{}) fieldAndRegexp {
   364  		var (
   365  			name  = values[0].(string)
   366  			value = values[1].(string)
   367  		)
   368  		f := doc.Field{
   369  			Name:  []byte(name),
   370  			Value: []byte(value),
   371  		}
   372  		return fieldAndRegexp{
   373  			field:    f,
   374  			regexp:   regexp,
   375  			compiled: re.MustCompile(regexp),
   376  		}
   377  	})
   378  }