github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/m3ninx/index/segment/mem/terms_dict_test.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package mem 22 23 import ( 24 "fmt" 25 "reflect" 26 re "regexp" 27 "testing" 28 29 "github.com/m3db/m3/src/m3ninx/doc" 30 "github.com/m3db/m3/src/m3ninx/postings" 31 32 "github.com/leanovate/gopter" 33 "github.com/leanovate/gopter/gen" 34 "github.com/leanovate/gopter/prop" 35 "github.com/stretchr/testify/suite" 36 ) 37 38 var ( 39 testRandomSeed int64 = 42 40 testMinSuccessfulTests = 1000 41 42 sampleRegexps = []interface{}{ 43 `a`, 44 `a.`, 45 `a.b`, 46 `ab`, 47 `a.b.c`, 48 `abc`, 49 `a|^`, 50 `a|b`, 51 `(a)`, 52 `(a)|b`, 53 `a*`, 54 `a+`, 55 `a?`, 56 `a{2}`, 57 `a{2,3}`, 58 `a{2,}`, 59 `a*?`, 60 `a+?`, 61 `a??`, 62 `a{2}?`, 63 `a{2,3}?`, 64 `a{2,}?`, 65 } 66 ) 67 68 type newTermsDictFn func() *termsDict 69 70 type termsDictionaryTestSuite struct { 71 suite.Suite 72 73 fn newTermsDictFn 74 termsDict *termsDict 75 } 76 77 func (t *termsDictionaryTestSuite) SetupTest() { 78 t.termsDict = t.fn() 79 } 80 81 func (t *termsDictionaryTestSuite) TestInsert() { 82 props := getProperties() 83 props.Property( 84 "The dictionary should support inserting fields", 85 prop.ForAll( 86 func(f doc.Field, id postings.ID) (bool, error) { 87 t.termsDict.Insert(f, id) 88 return true, nil 89 }, 90 genField(), 91 genDocID(), 92 )) 93 94 props.TestingRun(t.T()) 95 } 96 97 func (t *termsDictionaryTestSuite) TestIterateFields() { 98 props := getProperties() 99 props.Property( 100 "The dictionary should support iterating over known fields", 101 prop.ForAll( 102 func(genFields []doc.Field, id postings.ID) (bool, error) { 103 expectedFields := make(map[string]struct{}, len(genFields)) 104 for _, f := range genFields { 105 t.termsDict.Insert(f, id) 106 expectedFields[string(f.Name)] = struct{}{} 107 } 108 fieldsIter := t.termsDict.Fields() 109 fields := toSlice(t.T(), fieldsIter) 110 for _, field := range fields { 111 delete(expectedFields, string(field)) 112 } 113 return len(expectedFields) == 0, nil 114 }, 115 gen.SliceOf(genField()), 116 genDocID(), 117 )) 118 props.TestingRun(t.T()) 119 } 120 121 func (t *termsDictionaryTestSuite) TestIterateTerms() { 122 props := getProperties() 123 props.Property( 124 "The dictionary should support iterating over known terms", 125 prop.ForAll( 126 func(genFields []doc.Field, id postings.ID) bool { 127 // build map from fieldName -> fieldValue of all generated inputs, and insert into terms dict 128 expectedFields := make(map[string]map[string]struct{}, len(genFields)) 129 for _, f := range genFields { 130 t.termsDict.Insert(f, id) 131 fName, fValue := string(f.Name), string(f.Value) 132 vals, ok := expectedFields[fName] 133 if !ok { 134 vals = make(map[string]struct{}) 135 expectedFields[fName] = vals 136 } 137 vals[fValue] = struct{}{} 138 } 139 // for each expected combination of fieldName -> []fieldValues, ensure all are present 140 for name, expectedValues := range expectedFields { 141 valuesIter := t.termsDict.Terms([]byte(name)) 142 values := toTermPostings(t.T(), valuesIter) 143 for val := range values { 144 delete(expectedValues, val) 145 } 146 if len(expectedValues) != 0 { 147 return false 148 } 149 } 150 return true 151 }, 152 gen.SliceOf(genField()), 153 genDocID(), 154 )) 155 props.TestingRun(t.T()) 156 } 157 158 func (t *termsDictionaryTestSuite) TestContainsTerm() { 159 props := getProperties() 160 props.Property( 161 "The dictionary should support term lookups", 162 prop.ForAll( 163 func(f doc.Field, id postings.ID) (bool, error) { 164 t.termsDict.Insert(f, id) 165 166 if ok := t.termsDict.ContainsTerm(f.Name, []byte(f.Value)); !ok { 167 return false, fmt.Errorf("id of new document '%v' is not in postings list of matching documents", id) 168 } 169 170 return true, nil 171 }, 172 genField(), 173 genDocID(), 174 )) 175 176 props.TestingRun(t.T()) 177 } 178 179 func (t *termsDictionaryTestSuite) TestContainsField() { 180 props := getProperties() 181 props.Property( 182 "The dictionary should support field lookups", 183 prop.ForAll( 184 func(f doc.Field, id postings.ID) (bool, error) { 185 t.termsDict.Insert(f, id) 186 187 if ok := t.termsDict.ContainsField(f.Name); !ok { 188 return false, fmt.Errorf("id of new document '%v' is not in postings list of matching documents", id) 189 } 190 191 return true, nil 192 }, 193 genField(), 194 genDocID(), 195 )) 196 197 props.TestingRun(t.T()) 198 } 199 200 func (t *termsDictionaryTestSuite) TestMatchTerm() { 201 props := getProperties() 202 props.Property( 203 "The dictionary should support exact match queries", 204 prop.ForAll( 205 func(f doc.Field, id postings.ID) (bool, error) { 206 t.termsDict.Insert(f, id) 207 208 pl := t.termsDict.MatchTerm(f.Name, []byte(f.Value)) 209 if pl == nil { 210 return false, fmt.Errorf("postings list of documents matching query should not be nil") 211 } 212 if !pl.Contains(id) { 213 return false, fmt.Errorf("id of new document '%v' is not in postings list of matching documents", id) 214 } 215 216 return true, nil 217 }, 218 genField(), 219 genDocID(), 220 )) 221 222 props.TestingRun(t.T()) 223 } 224 225 func (t *termsDictionaryTestSuite) TestMatchTermNoResults() { 226 props := getProperties() 227 props.Property( 228 "Exact match queries which return no results are valid", 229 prop.ForAll( 230 func(f doc.Field) (bool, error) { 231 pl := t.termsDict.MatchTerm(f.Name, []byte(f.Value)) 232 if pl == nil { 233 return false, fmt.Errorf("postings list returned should not be nil") 234 } 235 if pl.Len() != 0 { 236 return false, fmt.Errorf("postings list contains unexpected IDs") 237 } 238 239 return true, nil 240 }, 241 genField(), 242 )) 243 244 props.TestingRun(t.T()) 245 } 246 247 func (t *termsDictionaryTestSuite) TestMatchRegex() { 248 props := getProperties() 249 props.Property( 250 "The dictionary should support regular expression queries", 251 prop.ForAll( 252 func(input fieldAndRegexp, id postings.ID) (bool, error) { 253 var ( 254 f = input.field 255 compiled = input.compiled 256 ) 257 258 t.termsDict.Insert(f, id) 259 260 pl := t.termsDict.MatchRegexp(f.Name, compiled) 261 if pl == nil { 262 return false, fmt.Errorf("postings list of documents matching query should not be nil") 263 } 264 if !pl.Contains(id) { 265 return false, fmt.Errorf("id of new document '%v' is not in list of matching documents", id) 266 } 267 268 return true, nil 269 }, 270 genFieldAndRegex(), 271 genDocID(), 272 )) 273 274 props.TestingRun(t.T()) 275 } 276 277 func (t *termsDictionaryTestSuite) TestMatchRegexNoResults() { 278 props := getProperties() 279 props.Property( 280 "Regular expression queries which no results are valid", 281 prop.ForAll( 282 func(input fieldAndRegexp, id postings.ID) (bool, error) { 283 var ( 284 f = input.field 285 compiled = input.compiled 286 ) 287 pl := t.termsDict.MatchRegexp(f.Name, compiled) 288 if pl == nil { 289 return false, fmt.Errorf("postings list returned should not be nil") 290 } 291 if pl.Len() != 0 { 292 return false, fmt.Errorf("postings list contains unexpected IDs") 293 } 294 295 return true, nil 296 }, 297 genFieldAndRegex(), 298 genDocID(), 299 )) 300 301 props.TestingRun(t.T()) 302 } 303 304 func TestTermsDictionary(t *testing.T) { 305 opts := NewOptions() 306 suite.Run(t, &termsDictionaryTestSuite{ 307 fn: func() *termsDict { 308 return newTermsDict(opts).(*termsDict) 309 }, 310 }) 311 } 312 313 func getProperties() *gopter.Properties { 314 params := gopter.DefaultTestParameters() 315 params.MaxSize = 10 316 params.Rng.Seed(testRandomSeed) 317 params.MinSuccessfulTests = testMinSuccessfulTests 318 return gopter.NewProperties(params) 319 } 320 321 func genField() gopter.Gen { 322 return gopter.CombineGens( 323 gen.AnyString(), 324 gen.AnyString(), 325 ).Map(func(values []interface{}) doc.Field { 326 var ( 327 name = values[0].(string) 328 value = values[1].(string) 329 ) 330 f := doc.Field{ 331 Name: []byte(name), 332 Value: []byte(value), 333 } 334 return f 335 }) 336 } 337 338 func genDocID() gopter.Gen { 339 return gen.UInt32(). 340 Map(func(value uint32) postings.ID { 341 return postings.ID(value) 342 }) 343 } 344 345 type fieldAndRegexp struct { 346 field doc.Field 347 regexp string 348 compiled *re.Regexp 349 } 350 351 func genFieldAndRegex() gopter.Gen { 352 return gen.OneConstOf(sampleRegexps...). 353 FlatMap(func(value interface{}) gopter.Gen { 354 regex := value.(string) 355 return fieldFromRegexp(regex) 356 }, reflect.TypeOf(fieldAndRegexp{})) 357 } 358 359 func fieldFromRegexp(regexp string) gopter.Gen { 360 return gopter.CombineGens( 361 gen.AnyString(), 362 gen.RegexMatch(regexp), 363 ).Map(func(values []interface{}) fieldAndRegexp { 364 var ( 365 name = values[0].(string) 366 value = values[1].(string) 367 ) 368 f := doc.Field{ 369 Name: []byte(name), 370 Value: []byte(value), 371 } 372 return fieldAndRegexp{ 373 field: f, 374 regexp: regexp, 375 compiled: re.MustCompile(regexp), 376 } 377 }) 378 }