github.com/cs3org/reva/v2@v2.27.7/pkg/storage/utils/indexer/indexer.go (about)

     1  // Copyright 2018-2022 CERN
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // In applying this license, CERN does not waive the privileges and immunities
    16  // granted to it by virtue of its status as an Intergovernmental Organization
    17  // or submit itself to any jurisdiction.
    18  
    19  // Package indexer provides symlink-based indexer for on-disk document-directories.
    20  package indexer
    21  
    22  import (
    23  	"context"
    24  	"errors"
    25  	"fmt"
    26  	"path"
    27  	"strings"
    28  
    29  	"github.com/CiscoM31/godata"
    30  	"github.com/iancoleman/strcase"
    31  
    32  	"github.com/cs3org/reva/v2/pkg/errtypes"
    33  	"github.com/cs3org/reva/v2/pkg/storage/utils/indexer/index"
    34  	"github.com/cs3org/reva/v2/pkg/storage/utils/indexer/option"
    35  	"github.com/cs3org/reva/v2/pkg/storage/utils/metadata"
    36  	"github.com/cs3org/reva/v2/pkg/storage/utils/sync"
    37  )
    38  
    39  // Indexer is a facade to configure and query over multiple indices.
    40  type Indexer interface {
    41  	AddIndex(t interface{}, indexBy option.IndexBy, pkName, entityDirName, indexType string, bound *option.Bound, caseInsensitive bool) error
    42  	Add(t interface{}) ([]IdxAddResult, error)
    43  	FindBy(t interface{}, fields ...Field) ([]string, error)
    44  	Delete(t interface{}) error
    45  }
    46  
    47  // Field combines the name and value of an indexed field.
    48  type Field struct {
    49  	Name  string
    50  	Value string
    51  }
    52  
    53  // NewField is a utility function to create a new Field.
    54  func NewField(name, value string) Field {
    55  	return Field{Name: name, Value: value}
    56  }
    57  
    58  // StorageIndexer is the indexer implementation using metadata storage
    59  type StorageIndexer struct {
    60  	storage metadata.Storage
    61  	indices typeMap
    62  	mu      sync.NamedRWMutex
    63  }
    64  
    65  // IdxAddResult represents the result of an Add call on an index
    66  type IdxAddResult struct {
    67  	Field, Value string
    68  }
    69  
    70  // CreateIndexer creates a new Indexer.
    71  func CreateIndexer(storage metadata.Storage) Indexer {
    72  	return &StorageIndexer{
    73  		storage: storage,
    74  		indices: typeMap{},
    75  		mu:      sync.NewNamedRWMutex(),
    76  	}
    77  }
    78  
    79  // Reset takes care of deleting all indices from storage and from the internal map of indices
    80  func (i *StorageIndexer) Reset() error {
    81  	for j := range i.indices {
    82  		for _, indices := range i.indices[j].IndicesByField {
    83  			for _, idx := range indices {
    84  				err := idx.Delete()
    85  				if err != nil {
    86  					return err
    87  				}
    88  			}
    89  		}
    90  		delete(i.indices, j)
    91  	}
    92  
    93  	return nil
    94  }
    95  
    96  // AddIndex adds a new index to the indexer receiver.
    97  func (i *StorageIndexer) AddIndex(t interface{}, indexBy option.IndexBy, pkName, entityDirName, indexType string, bound *option.Bound, caseInsensitive bool) error {
    98  	var idx index.Index
    99  
   100  	var f func(metadata.Storage, ...option.Option) index.Index
   101  	switch indexType {
   102  	case "unique":
   103  		f = index.NewUniqueIndexWithOptions
   104  	case "non_unique":
   105  		f = index.NewNonUniqueIndexWithOptions
   106  	case "autoincrement":
   107  		f = index.NewAutoincrementIndex
   108  	default:
   109  		return fmt.Errorf("invalid index type: %s", indexType)
   110  	}
   111  	idx = f(
   112  		i.storage,
   113  		option.CaseInsensitive(caseInsensitive),
   114  		option.WithBounds(bound),
   115  		option.WithIndexBy(indexBy),
   116  		option.WithTypeName(getTypeFQN(t)),
   117  	)
   118  
   119  	i.indices.addIndex(getTypeFQN(t), pkName, idx)
   120  	return idx.Init()
   121  }
   122  
   123  // Add a new entry to the indexer
   124  func (i *StorageIndexer) Add(t interface{}) ([]IdxAddResult, error) {
   125  	typeName := getTypeFQN(t)
   126  
   127  	i.mu.Lock(typeName)
   128  	defer i.mu.Unlock(typeName)
   129  
   130  	var results []IdxAddResult
   131  	if fields, ok := i.indices[typeName]; ok {
   132  		for _, indices := range fields.IndicesByField {
   133  			for _, idx := range indices {
   134  				pkVal, err := valueOf(t, option.IndexByField(fields.PKFieldName))
   135  				if err != nil {
   136  					return []IdxAddResult{}, err
   137  				}
   138  				idxByVal, err := valueOf(t, idx.IndexBy())
   139  				if err != nil {
   140  					return []IdxAddResult{}, err
   141  				}
   142  				value, err := idx.Add(pkVal, idxByVal)
   143  				if err != nil {
   144  					return []IdxAddResult{}, err
   145  				}
   146  				if value == "" {
   147  					continue
   148  				}
   149  				results = append(results, IdxAddResult{Field: idx.IndexBy().String(), Value: value})
   150  			}
   151  		}
   152  	}
   153  
   154  	return results, nil
   155  }
   156  
   157  // FindBy finds a value on an index by fields.
   158  // If multiple fields are given then they are handled like an or condition.
   159  func (i *StorageIndexer) FindBy(t interface{}, queryFields ...Field) ([]string, error) {
   160  	typeName := getTypeFQN(t)
   161  
   162  	i.mu.RLock(typeName)
   163  	defer i.mu.RUnlock(typeName)
   164  
   165  	resultPaths := make(map[string]struct{})
   166  	if fields, ok := i.indices[typeName]; ok {
   167  		for fieldName, queryFields := range groupFieldsByName(queryFields) {
   168  			idxes := fields.IndicesByField[strcase.ToCamel(fieldName)]
   169  			values := make([]string, 0, len(queryFields))
   170  			for _, f := range queryFields {
   171  				values = append(values, f.Value)
   172  			}
   173  			for _, idx := range idxes {
   174  				res, err := idx.LookupCtx(context.Background(), values...)
   175  				if err != nil {
   176  					if _, ok := err.(errtypes.IsNotFound); ok {
   177  						continue
   178  					}
   179  
   180  					if err != nil {
   181  						return nil, err
   182  					}
   183  				}
   184  				for _, r := range res {
   185  					resultPaths[path.Base(r)] = struct{}{}
   186  				}
   187  			}
   188  		}
   189  	}
   190  
   191  	result := make([]string, 0, len(resultPaths))
   192  	for p := range resultPaths {
   193  		result = append(result, path.Base(p))
   194  	}
   195  
   196  	return result, nil
   197  }
   198  
   199  // groupFieldsByName groups the given filters and returns a map using the filter type as the key.
   200  func groupFieldsByName(queryFields []Field) map[string][]Field {
   201  	grouped := make(map[string][]Field)
   202  	for _, f := range queryFields {
   203  		grouped[f.Name] = append(grouped[f.Name], f)
   204  	}
   205  	return grouped
   206  }
   207  
   208  // Delete deletes all indexed fields of a given type t on the Indexer.
   209  func (i *StorageIndexer) Delete(t interface{}) error {
   210  	typeName := getTypeFQN(t)
   211  
   212  	i.mu.Lock(typeName)
   213  	defer i.mu.Unlock(typeName)
   214  
   215  	if fields, ok := i.indices[typeName]; ok {
   216  		for _, indices := range fields.IndicesByField {
   217  			for _, idx := range indices {
   218  				pkVal, err := valueOf(t, option.IndexByField(fields.PKFieldName))
   219  				if err != nil {
   220  					return err
   221  				}
   222  				idxByVal, err := valueOf(t, idx.IndexBy())
   223  				if err != nil {
   224  					return err
   225  				}
   226  				if err := idx.Remove(pkVal, idxByVal); err != nil {
   227  					return err
   228  				}
   229  			}
   230  		}
   231  	}
   232  
   233  	return nil
   234  }
   235  
   236  // FindByPartial allows for glob search across all indexes.
   237  func (i *StorageIndexer) FindByPartial(t interface{}, field string, pattern string) ([]string, error) {
   238  	typeName := getTypeFQN(t)
   239  
   240  	i.mu.RLock(typeName)
   241  	defer i.mu.RUnlock(typeName)
   242  
   243  	resultPaths := make([]string, 0)
   244  	if fields, ok := i.indices[typeName]; ok {
   245  		for _, idx := range fields.IndicesByField[strcase.ToCamel(field)] {
   246  			res, err := idx.Search(pattern)
   247  			if err != nil {
   248  				if _, ok := err.(errtypes.IsNotFound); ok {
   249  					continue
   250  				}
   251  
   252  				if err != nil {
   253  					return nil, err
   254  				}
   255  			}
   256  
   257  			resultPaths = append(resultPaths, res...)
   258  
   259  		}
   260  	}
   261  
   262  	result := make([]string, 0, len(resultPaths))
   263  	for _, v := range resultPaths {
   264  		result = append(result, path.Base(v))
   265  	}
   266  
   267  	return result, nil
   268  
   269  }
   270  
   271  // Update updates all indexes on a value <from> to a value <to>.
   272  func (i *StorageIndexer) Update(from, to interface{}) error {
   273  	typeNameFrom := getTypeFQN(from)
   274  
   275  	i.mu.Lock(typeNameFrom)
   276  	defer i.mu.Unlock(typeNameFrom)
   277  
   278  	if typeNameTo := getTypeFQN(to); typeNameFrom != typeNameTo {
   279  		return fmt.Errorf("update types do not match: from %v to %v", typeNameFrom, typeNameTo)
   280  	}
   281  
   282  	if fields, ok := i.indices[typeNameFrom]; ok {
   283  		for fName, indices := range fields.IndicesByField {
   284  			oldV, err := valueOf(from, option.IndexByField(fName))
   285  			if err != nil {
   286  				return err
   287  			}
   288  			newV, err := valueOf(to, option.IndexByField(fName))
   289  			if err != nil {
   290  				return err
   291  			}
   292  			pkVal, err := valueOf(from, option.IndexByField(fields.PKFieldName))
   293  			if err != nil {
   294  				return err
   295  			}
   296  			for _, idx := range indices {
   297  				if oldV == newV {
   298  					continue
   299  				}
   300  				if oldV == "" {
   301  					if _, err := idx.Add(pkVal, newV); err != nil {
   302  						return err
   303  					}
   304  					continue
   305  				}
   306  				if newV == "" {
   307  					if err := idx.Remove(pkVal, oldV); err != nil {
   308  						return err
   309  					}
   310  					continue
   311  				}
   312  				if err := idx.Update(pkVal, oldV, newV); err != nil {
   313  					return err
   314  				}
   315  			}
   316  		}
   317  	}
   318  
   319  	return nil
   320  }
   321  
   322  // Query parses an OData query into something our indexer.Index understands and resolves it.
   323  func (i *StorageIndexer) Query(ctx context.Context, t interface{}, q string) ([]string, error) {
   324  	query, err := godata.ParseFilterString(ctx, q)
   325  	if err != nil {
   326  		return nil, err
   327  	}
   328  
   329  	tree := newQueryTree()
   330  	if err := buildTreeFromOdataQuery(query.Tree, &tree); err != nil {
   331  		return nil, err
   332  	}
   333  
   334  	results := make([]string, 0)
   335  	if err := i.resolveTree(t, &tree, &results); err != nil {
   336  		return nil, err
   337  	}
   338  
   339  	return results, nil
   340  }
   341  
   342  // t is used to infer the indexed field names. When building an index search query, field names have to respect Golang
   343  // conventions and be in PascalCase. For a better overview on this contemplate reading the reflection package under the
   344  // indexer directory. Traversal of the tree happens in a pre-order fashion.
   345  // TODO implement logic for `and` operators.
   346  func (i *StorageIndexer) resolveTree(t interface{}, tree *queryTree, partials *[]string) error {
   347  	if partials == nil {
   348  		return errors.New("return value cannot be nil: partials")
   349  	}
   350  
   351  	if tree.left != nil {
   352  		_ = i.resolveTree(t, tree.left, partials)
   353  	}
   354  
   355  	if tree.right != nil {
   356  		_ = i.resolveTree(t, tree.right, partials)
   357  	}
   358  
   359  	// by the time we're here we reached a leaf node.
   360  	if tree.token != nil {
   361  		switch tree.token.filterType {
   362  		case "FindBy":
   363  			operand, err := sanitizeInput(tree.token.operands)
   364  			if err != nil {
   365  				return err
   366  			}
   367  
   368  			field := Field{Name: operand.field, Value: operand.value}
   369  			r, err := i.FindBy(t, field)
   370  			if err != nil {
   371  				return err
   372  			}
   373  
   374  			*partials = append(*partials, r...)
   375  		case "FindByPartial":
   376  			operand, err := sanitizeInput(tree.token.operands)
   377  			if err != nil {
   378  				return err
   379  			}
   380  
   381  			r, err := i.FindByPartial(t, operand.field, fmt.Sprintf("%v*", operand.value))
   382  			if err != nil {
   383  				return err
   384  			}
   385  
   386  			*partials = append(*partials, r...)
   387  		default:
   388  			return fmt.Errorf("unsupported filter: %v", tree.token.filterType)
   389  		}
   390  	}
   391  
   392  	*partials = dedup(*partials)
   393  	return nil
   394  }
   395  
   396  type indexerTuple struct {
   397  	field, value string
   398  }
   399  
   400  // sanitizeInput returns a tuple of fieldName + value to be applied on indexer.Index filters.
   401  func sanitizeInput(operands []string) (*indexerTuple, error) {
   402  	if len(operands) != 2 {
   403  		return nil, fmt.Errorf("invalid number of operands for filter function: got %v expected 2", len(operands))
   404  	}
   405  
   406  	// field names are Go public types and by design they are in PascalCase, therefore we need to adhere to this rules.
   407  	// for further information on this have a look at the reflection package.
   408  	f := strcase.ToCamel(operands[0])
   409  
   410  	// remove single quotes from value.
   411  	v := strings.ReplaceAll(operands[1], "'", "")
   412  	return &indexerTuple{
   413  		field: f,
   414  		value: v,
   415  	}, nil
   416  }
   417  
   418  // buildTreeFromOdataQuery builds an indexer.queryTree out of a GOData ParseNode. The purpose of this intermediate tree
   419  // is to transform godata operators and functions into supported operations on our index. At the time of this writing
   420  // we only support `FindBy` and `FindByPartial` queries as these are the only implemented filters on indexer.Index(es).
   421  func buildTreeFromOdataQuery(root *godata.ParseNode, tree *queryTree) error {
   422  	if root.Token.Type == godata.ExpressionTokenFunc { // i.e "startswith", "contains"
   423  		switch root.Token.Value {
   424  		case "startswith":
   425  			token := token{
   426  				operator:   root.Token.Value,
   427  				filterType: "FindByPartial",
   428  				// TODO sanitize the number of operands it the expected one.
   429  				operands: []string{
   430  					root.Children[0].Token.Value, // field name, i.e: Name
   431  					root.Children[1].Token.Value, // field value, i.e: Jac
   432  				},
   433  			}
   434  
   435  			tree.insert(&token)
   436  		default:
   437  			return errors.New("operation not supported")
   438  		}
   439  	}
   440  
   441  	if root.Token.Type == godata.ExpressionTokenLogical {
   442  		switch root.Token.Value {
   443  		case "or":
   444  			tree.insert(&token{operator: root.Token.Value})
   445  			for _, child := range root.Children {
   446  				if err := buildTreeFromOdataQuery(child, tree.left); err != nil {
   447  					return err
   448  				}
   449  			}
   450  		case "eq":
   451  			tree.insert(&token{
   452  				operator:   root.Token.Value,
   453  				filterType: "FindBy",
   454  				operands: []string{
   455  					root.Children[0].Token.Value,
   456  					root.Children[1].Token.Value,
   457  				},
   458  			})
   459  			for _, child := range root.Children {
   460  				if err := buildTreeFromOdataQuery(child, tree.left); err != nil {
   461  					return err
   462  				}
   463  			}
   464  		default:
   465  			return errors.New("operator not supported")
   466  		}
   467  	}
   468  	return nil
   469  }