github.com/cs3org/reva/v2@v2.27.7/pkg/storage/utils/indexer/indexer.go (about) 1 // Copyright 2018-2022 CERN 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // In applying this license, CERN does not waive the privileges and immunities 16 // granted to it by virtue of its status as an Intergovernmental Organization 17 // or submit itself to any jurisdiction. 18 19 // Package indexer provides symlink-based indexer for on-disk document-directories. 20 package indexer 21 22 import ( 23 "context" 24 "errors" 25 "fmt" 26 "path" 27 "strings" 28 29 "github.com/CiscoM31/godata" 30 "github.com/iancoleman/strcase" 31 32 "github.com/cs3org/reva/v2/pkg/errtypes" 33 "github.com/cs3org/reva/v2/pkg/storage/utils/indexer/index" 34 "github.com/cs3org/reva/v2/pkg/storage/utils/indexer/option" 35 "github.com/cs3org/reva/v2/pkg/storage/utils/metadata" 36 "github.com/cs3org/reva/v2/pkg/storage/utils/sync" 37 ) 38 39 // Indexer is a facade to configure and query over multiple indices. 40 type Indexer interface { 41 AddIndex(t interface{}, indexBy option.IndexBy, pkName, entityDirName, indexType string, bound *option.Bound, caseInsensitive bool) error 42 Add(t interface{}) ([]IdxAddResult, error) 43 FindBy(t interface{}, fields ...Field) ([]string, error) 44 Delete(t interface{}) error 45 } 46 47 // Field combines the name and value of an indexed field. 48 type Field struct { 49 Name string 50 Value string 51 } 52 53 // NewField is a utility function to create a new Field. 54 func NewField(name, value string) Field { 55 return Field{Name: name, Value: value} 56 } 57 58 // StorageIndexer is the indexer implementation using metadata storage 59 type StorageIndexer struct { 60 storage metadata.Storage 61 indices typeMap 62 mu sync.NamedRWMutex 63 } 64 65 // IdxAddResult represents the result of an Add call on an index 66 type IdxAddResult struct { 67 Field, Value string 68 } 69 70 // CreateIndexer creates a new Indexer. 71 func CreateIndexer(storage metadata.Storage) Indexer { 72 return &StorageIndexer{ 73 storage: storage, 74 indices: typeMap{}, 75 mu: sync.NewNamedRWMutex(), 76 } 77 } 78 79 // Reset takes care of deleting all indices from storage and from the internal map of indices 80 func (i *StorageIndexer) Reset() error { 81 for j := range i.indices { 82 for _, indices := range i.indices[j].IndicesByField { 83 for _, idx := range indices { 84 err := idx.Delete() 85 if err != nil { 86 return err 87 } 88 } 89 } 90 delete(i.indices, j) 91 } 92 93 return nil 94 } 95 96 // AddIndex adds a new index to the indexer receiver. 97 func (i *StorageIndexer) AddIndex(t interface{}, indexBy option.IndexBy, pkName, entityDirName, indexType string, bound *option.Bound, caseInsensitive bool) error { 98 var idx index.Index 99 100 var f func(metadata.Storage, ...option.Option) index.Index 101 switch indexType { 102 case "unique": 103 f = index.NewUniqueIndexWithOptions 104 case "non_unique": 105 f = index.NewNonUniqueIndexWithOptions 106 case "autoincrement": 107 f = index.NewAutoincrementIndex 108 default: 109 return fmt.Errorf("invalid index type: %s", indexType) 110 } 111 idx = f( 112 i.storage, 113 option.CaseInsensitive(caseInsensitive), 114 option.WithBounds(bound), 115 option.WithIndexBy(indexBy), 116 option.WithTypeName(getTypeFQN(t)), 117 ) 118 119 i.indices.addIndex(getTypeFQN(t), pkName, idx) 120 return idx.Init() 121 } 122 123 // Add a new entry to the indexer 124 func (i *StorageIndexer) Add(t interface{}) ([]IdxAddResult, error) { 125 typeName := getTypeFQN(t) 126 127 i.mu.Lock(typeName) 128 defer i.mu.Unlock(typeName) 129 130 var results []IdxAddResult 131 if fields, ok := i.indices[typeName]; ok { 132 for _, indices := range fields.IndicesByField { 133 for _, idx := range indices { 134 pkVal, err := valueOf(t, option.IndexByField(fields.PKFieldName)) 135 if err != nil { 136 return []IdxAddResult{}, err 137 } 138 idxByVal, err := valueOf(t, idx.IndexBy()) 139 if err != nil { 140 return []IdxAddResult{}, err 141 } 142 value, err := idx.Add(pkVal, idxByVal) 143 if err != nil { 144 return []IdxAddResult{}, err 145 } 146 if value == "" { 147 continue 148 } 149 results = append(results, IdxAddResult{Field: idx.IndexBy().String(), Value: value}) 150 } 151 } 152 } 153 154 return results, nil 155 } 156 157 // FindBy finds a value on an index by fields. 158 // If multiple fields are given then they are handled like an or condition. 159 func (i *StorageIndexer) FindBy(t interface{}, queryFields ...Field) ([]string, error) { 160 typeName := getTypeFQN(t) 161 162 i.mu.RLock(typeName) 163 defer i.mu.RUnlock(typeName) 164 165 resultPaths := make(map[string]struct{}) 166 if fields, ok := i.indices[typeName]; ok { 167 for fieldName, queryFields := range groupFieldsByName(queryFields) { 168 idxes := fields.IndicesByField[strcase.ToCamel(fieldName)] 169 values := make([]string, 0, len(queryFields)) 170 for _, f := range queryFields { 171 values = append(values, f.Value) 172 } 173 for _, idx := range idxes { 174 res, err := idx.LookupCtx(context.Background(), values...) 175 if err != nil { 176 if _, ok := err.(errtypes.IsNotFound); ok { 177 continue 178 } 179 180 if err != nil { 181 return nil, err 182 } 183 } 184 for _, r := range res { 185 resultPaths[path.Base(r)] = struct{}{} 186 } 187 } 188 } 189 } 190 191 result := make([]string, 0, len(resultPaths)) 192 for p := range resultPaths { 193 result = append(result, path.Base(p)) 194 } 195 196 return result, nil 197 } 198 199 // groupFieldsByName groups the given filters and returns a map using the filter type as the key. 200 func groupFieldsByName(queryFields []Field) map[string][]Field { 201 grouped := make(map[string][]Field) 202 for _, f := range queryFields { 203 grouped[f.Name] = append(grouped[f.Name], f) 204 } 205 return grouped 206 } 207 208 // Delete deletes all indexed fields of a given type t on the Indexer. 209 func (i *StorageIndexer) Delete(t interface{}) error { 210 typeName := getTypeFQN(t) 211 212 i.mu.Lock(typeName) 213 defer i.mu.Unlock(typeName) 214 215 if fields, ok := i.indices[typeName]; ok { 216 for _, indices := range fields.IndicesByField { 217 for _, idx := range indices { 218 pkVal, err := valueOf(t, option.IndexByField(fields.PKFieldName)) 219 if err != nil { 220 return err 221 } 222 idxByVal, err := valueOf(t, idx.IndexBy()) 223 if err != nil { 224 return err 225 } 226 if err := idx.Remove(pkVal, idxByVal); err != nil { 227 return err 228 } 229 } 230 } 231 } 232 233 return nil 234 } 235 236 // FindByPartial allows for glob search across all indexes. 237 func (i *StorageIndexer) FindByPartial(t interface{}, field string, pattern string) ([]string, error) { 238 typeName := getTypeFQN(t) 239 240 i.mu.RLock(typeName) 241 defer i.mu.RUnlock(typeName) 242 243 resultPaths := make([]string, 0) 244 if fields, ok := i.indices[typeName]; ok { 245 for _, idx := range fields.IndicesByField[strcase.ToCamel(field)] { 246 res, err := idx.Search(pattern) 247 if err != nil { 248 if _, ok := err.(errtypes.IsNotFound); ok { 249 continue 250 } 251 252 if err != nil { 253 return nil, err 254 } 255 } 256 257 resultPaths = append(resultPaths, res...) 258 259 } 260 } 261 262 result := make([]string, 0, len(resultPaths)) 263 for _, v := range resultPaths { 264 result = append(result, path.Base(v)) 265 } 266 267 return result, nil 268 269 } 270 271 // Update updates all indexes on a value <from> to a value <to>. 272 func (i *StorageIndexer) Update(from, to interface{}) error { 273 typeNameFrom := getTypeFQN(from) 274 275 i.mu.Lock(typeNameFrom) 276 defer i.mu.Unlock(typeNameFrom) 277 278 if typeNameTo := getTypeFQN(to); typeNameFrom != typeNameTo { 279 return fmt.Errorf("update types do not match: from %v to %v", typeNameFrom, typeNameTo) 280 } 281 282 if fields, ok := i.indices[typeNameFrom]; ok { 283 for fName, indices := range fields.IndicesByField { 284 oldV, err := valueOf(from, option.IndexByField(fName)) 285 if err != nil { 286 return err 287 } 288 newV, err := valueOf(to, option.IndexByField(fName)) 289 if err != nil { 290 return err 291 } 292 pkVal, err := valueOf(from, option.IndexByField(fields.PKFieldName)) 293 if err != nil { 294 return err 295 } 296 for _, idx := range indices { 297 if oldV == newV { 298 continue 299 } 300 if oldV == "" { 301 if _, err := idx.Add(pkVal, newV); err != nil { 302 return err 303 } 304 continue 305 } 306 if newV == "" { 307 if err := idx.Remove(pkVal, oldV); err != nil { 308 return err 309 } 310 continue 311 } 312 if err := idx.Update(pkVal, oldV, newV); err != nil { 313 return err 314 } 315 } 316 } 317 } 318 319 return nil 320 } 321 322 // Query parses an OData query into something our indexer.Index understands and resolves it. 323 func (i *StorageIndexer) Query(ctx context.Context, t interface{}, q string) ([]string, error) { 324 query, err := godata.ParseFilterString(ctx, q) 325 if err != nil { 326 return nil, err 327 } 328 329 tree := newQueryTree() 330 if err := buildTreeFromOdataQuery(query.Tree, &tree); err != nil { 331 return nil, err 332 } 333 334 results := make([]string, 0) 335 if err := i.resolveTree(t, &tree, &results); err != nil { 336 return nil, err 337 } 338 339 return results, nil 340 } 341 342 // t is used to infer the indexed field names. When building an index search query, field names have to respect Golang 343 // conventions and be in PascalCase. For a better overview on this contemplate reading the reflection package under the 344 // indexer directory. Traversal of the tree happens in a pre-order fashion. 345 // TODO implement logic for `and` operators. 346 func (i *StorageIndexer) resolveTree(t interface{}, tree *queryTree, partials *[]string) error { 347 if partials == nil { 348 return errors.New("return value cannot be nil: partials") 349 } 350 351 if tree.left != nil { 352 _ = i.resolveTree(t, tree.left, partials) 353 } 354 355 if tree.right != nil { 356 _ = i.resolveTree(t, tree.right, partials) 357 } 358 359 // by the time we're here we reached a leaf node. 360 if tree.token != nil { 361 switch tree.token.filterType { 362 case "FindBy": 363 operand, err := sanitizeInput(tree.token.operands) 364 if err != nil { 365 return err 366 } 367 368 field := Field{Name: operand.field, Value: operand.value} 369 r, err := i.FindBy(t, field) 370 if err != nil { 371 return err 372 } 373 374 *partials = append(*partials, r...) 375 case "FindByPartial": 376 operand, err := sanitizeInput(tree.token.operands) 377 if err != nil { 378 return err 379 } 380 381 r, err := i.FindByPartial(t, operand.field, fmt.Sprintf("%v*", operand.value)) 382 if err != nil { 383 return err 384 } 385 386 *partials = append(*partials, r...) 387 default: 388 return fmt.Errorf("unsupported filter: %v", tree.token.filterType) 389 } 390 } 391 392 *partials = dedup(*partials) 393 return nil 394 } 395 396 type indexerTuple struct { 397 field, value string 398 } 399 400 // sanitizeInput returns a tuple of fieldName + value to be applied on indexer.Index filters. 401 func sanitizeInput(operands []string) (*indexerTuple, error) { 402 if len(operands) != 2 { 403 return nil, fmt.Errorf("invalid number of operands for filter function: got %v expected 2", len(operands)) 404 } 405 406 // field names are Go public types and by design they are in PascalCase, therefore we need to adhere to this rules. 407 // for further information on this have a look at the reflection package. 408 f := strcase.ToCamel(operands[0]) 409 410 // remove single quotes from value. 411 v := strings.ReplaceAll(operands[1], "'", "") 412 return &indexerTuple{ 413 field: f, 414 value: v, 415 }, nil 416 } 417 418 // buildTreeFromOdataQuery builds an indexer.queryTree out of a GOData ParseNode. The purpose of this intermediate tree 419 // is to transform godata operators and functions into supported operations on our index. At the time of this writing 420 // we only support `FindBy` and `FindByPartial` queries as these are the only implemented filters on indexer.Index(es). 421 func buildTreeFromOdataQuery(root *godata.ParseNode, tree *queryTree) error { 422 if root.Token.Type == godata.ExpressionTokenFunc { // i.e "startswith", "contains" 423 switch root.Token.Value { 424 case "startswith": 425 token := token{ 426 operator: root.Token.Value, 427 filterType: "FindByPartial", 428 // TODO sanitize the number of operands it the expected one. 429 operands: []string{ 430 root.Children[0].Token.Value, // field name, i.e: Name 431 root.Children[1].Token.Value, // field value, i.e: Jac 432 }, 433 } 434 435 tree.insert(&token) 436 default: 437 return errors.New("operation not supported") 438 } 439 } 440 441 if root.Token.Type == godata.ExpressionTokenLogical { 442 switch root.Token.Value { 443 case "or": 444 tree.insert(&token{operator: root.Token.Value}) 445 for _, child := range root.Children { 446 if err := buildTreeFromOdataQuery(child, tree.left); err != nil { 447 return err 448 } 449 } 450 case "eq": 451 tree.insert(&token{ 452 operator: root.Token.Value, 453 filterType: "FindBy", 454 operands: []string{ 455 root.Children[0].Token.Value, 456 root.Children[1].Token.Value, 457 }, 458 }) 459 for _, child := range root.Children { 460 if err := buildTreeFromOdataQuery(child, tree.left); err != nil { 461 return err 462 } 463 } 464 default: 465 return errors.New("operator not supported") 466 } 467 } 468 return nil 469 }