github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/row_reader_roaring_set.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"bytes"
    16  	"context"
    17  	"fmt"
    18  
    19  	"github.com/weaviate/sroar"
    20  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    21  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    22  	"github.com/weaviate/weaviate/entities/filters"
    23  )
    24  
    25  // RowReaderRoaringSet reads one or many row(s) depending on the specified
    26  // operator
    27  type RowReaderRoaringSet struct {
    28  	value         []byte
    29  	operator      filters.Operator
    30  	newCursor     func() lsmkv.CursorRoaringSet
    31  	getter        func(key []byte) (*sroar.Bitmap, error)
    32  	bitmapFactory *roaringset.BitmapFactory
    33  }
    34  
    35  // If keyOnly is set, the RowReaderRoaringSet will request key-only cursors
    36  // wherever cursors are used, the specified value arguments in the
    37  // ReadFn will always be empty
    38  func NewRowReaderRoaringSet(bucket *lsmkv.Bucket, value []byte, operator filters.Operator,
    39  	keyOnly bool, bitmapFactory *roaringset.BitmapFactory,
    40  ) *RowReaderRoaringSet {
    41  	getter := bucket.RoaringSetGet
    42  	newCursor := bucket.CursorRoaringSet
    43  	if keyOnly {
    44  		newCursor = bucket.CursorRoaringSetKeyOnly
    45  	}
    46  
    47  	return &RowReaderRoaringSet{
    48  		value:         value,
    49  		operator:      operator,
    50  		newCursor:     newCursor,
    51  		getter:        getter,
    52  		bitmapFactory: bitmapFactory,
    53  	}
    54  }
    55  
    56  // ReadFn will be called 1..n times per match. This means it will also
    57  // be called on a non-match, in this case v == empty bitmap.
    58  // It is up to the caller to decide if that is an error case or not.
    59  //
    60  // Note that because what we are parsing is an inverted index row, it can
    61  // sometimes become confusing what a key and value actually resembles. The
    62  // variables k and v are the literal row key and value. So this means, the
    63  // data-value as in "less than 17" where 17 would be the "value" is in the key
    64  // variable "k". The value will contain bitmap with docIDs having value "k"
    65  //
    66  // The boolean return argument is a way to stop iteration (e.g. when a limit is
    67  // reached) without producing an error. In normal operation always return true,
    68  // if false is returned once, the loop is broken.
    69  type ReadFn func(k []byte, v *sroar.Bitmap) (bool, error)
    70  
    71  // Read a row using the specified ReadFn. If RowReader was created with
    72  // keysOnly==true, the values argument in the readFn will always be nil on all
    73  // requests involving cursors
    74  func (rr *RowReaderRoaringSet) Read(ctx context.Context, readFn ReadFn) error {
    75  	switch rr.operator {
    76  	case filters.OperatorEqual, filters.OperatorIsNull:
    77  		return rr.equal(ctx, readFn)
    78  	case filters.OperatorNotEqual:
    79  		return rr.notEqual(ctx, readFn)
    80  	case filters.OperatorGreaterThan:
    81  		return rr.greaterThan(ctx, readFn, false)
    82  	case filters.OperatorGreaterThanEqual:
    83  		return rr.greaterThan(ctx, readFn, true)
    84  	case filters.OperatorLessThan:
    85  		return rr.lessThan(ctx, readFn, false)
    86  	case filters.OperatorLessThanEqual:
    87  		return rr.lessThan(ctx, readFn, true)
    88  	case filters.OperatorLike:
    89  		return rr.like(ctx, readFn)
    90  	default:
    91  		return fmt.Errorf("operator %v not supported", rr.operator)
    92  	}
    93  }
    94  
    95  // equal is a special case, as we don't need to iterate, but just read a single
    96  // row
    97  func (rr *RowReaderRoaringSet) equal(ctx context.Context,
    98  	readFn ReadFn,
    99  ) error {
   100  	v, err := rr.equalHelper(ctx)
   101  	if err != nil {
   102  		return err
   103  	}
   104  
   105  	_, err = readFn(rr.value, v)
   106  	return err
   107  }
   108  
   109  func (rr *RowReaderRoaringSet) notEqual(ctx context.Context,
   110  	readFn ReadFn,
   111  ) error {
   112  	v, err := rr.equalHelper(ctx)
   113  	if err != nil {
   114  		return err
   115  	}
   116  
   117  	inverted := rr.bitmapFactory.GetBitmap()
   118  	inverted.AndNot(v)
   119  	_, err = readFn(rr.value, inverted)
   120  	return err
   121  }
   122  
   123  // greaterThan reads from the specified value to the end. The first row is only
   124  // included if allowEqual==true, otherwise it starts with the next one
   125  func (rr *RowReaderRoaringSet) greaterThan(ctx context.Context,
   126  	readFn ReadFn, allowEqual bool,
   127  ) error {
   128  	c := rr.newCursor()
   129  	defer c.Close()
   130  
   131  	for k, v := c.Seek(rr.value); k != nil; k, v = c.Next() {
   132  		if err := ctx.Err(); err != nil {
   133  			return err
   134  		}
   135  
   136  		if bytes.Equal(k, rr.value) && !allowEqual {
   137  			continue
   138  		}
   139  
   140  		if continueReading, err := readFn(k, v); err != nil {
   141  			return err
   142  		} else if !continueReading {
   143  			break
   144  		}
   145  	}
   146  
   147  	return nil
   148  }
   149  
   150  // lessThan reads from the very begging to the specified  value. The last
   151  // matching row is only included if allowEqual==true, otherwise it ends one
   152  // prior to that.
   153  func (rr *RowReaderRoaringSet) lessThan(ctx context.Context,
   154  	readFn ReadFn, allowEqual bool,
   155  ) error {
   156  	c := rr.newCursor()
   157  	defer c.Close()
   158  
   159  	for k, v := c.First(); k != nil && bytes.Compare(k, rr.value) < 1; k, v = c.Next() {
   160  		if err := ctx.Err(); err != nil {
   161  			return err
   162  		}
   163  
   164  		if bytes.Equal(k, rr.value) && !allowEqual {
   165  			continue
   166  		}
   167  
   168  		if continueReading, err := readFn(k, v); err != nil {
   169  			return err
   170  		} else if !continueReading {
   171  			break
   172  		}
   173  	}
   174  
   175  	return nil
   176  }
   177  
   178  func (rr *RowReaderRoaringSet) like(ctx context.Context,
   179  	readFn ReadFn,
   180  ) error {
   181  	like, err := parseLikeRegexp(rr.value)
   182  	if err != nil {
   183  		return fmt.Errorf("parse like value: %w", err)
   184  	}
   185  
   186  	c := rr.newCursor()
   187  	defer c.Close()
   188  
   189  	var (
   190  		initialK   []byte
   191  		initialV   *sroar.Bitmap
   192  		likeMinLen int
   193  	)
   194  
   195  	if like.optimizable {
   196  		initialK, initialV = c.Seek(like.min)
   197  		likeMinLen = len(like.min)
   198  	} else {
   199  		initialK, initialV = c.First()
   200  	}
   201  
   202  	for k, v := initialK, initialV; k != nil; k, v = c.Next() {
   203  		if err := ctx.Err(); err != nil {
   204  			return err
   205  		}
   206  
   207  		if like.optimizable {
   208  			// if the query is optimizable, i.e. it doesn't start with a wildcard, we
   209  			// can abort once we've moved past the point where the fixed characters
   210  			// no longer match
   211  			if len(k) < likeMinLen {
   212  				break
   213  			}
   214  			if bytes.Compare(like.min, k[:likeMinLen]) == -1 {
   215  				break
   216  			}
   217  		}
   218  
   219  		if !like.regexp.Match(k) {
   220  			continue
   221  		}
   222  
   223  		if continueReading, err := readFn(k, v); err != nil {
   224  			return err
   225  		} else if !continueReading {
   226  			break
   227  		}
   228  	}
   229  
   230  	return nil
   231  }
   232  
   233  // equalHelper exists, because the Equal and NotEqual operators share this functionality
   234  func (rr *RowReaderRoaringSet) equalHelper(ctx context.Context) (*sroar.Bitmap, error) {
   235  	if err := ctx.Err(); err != nil {
   236  		return nil, err
   237  	}
   238  
   239  	return rr.getter(rr.value)
   240  }