github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/row_reader.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"bytes"
    16  	"context"
    17  	"encoding/binary"
    18  	"fmt"
    19  
    20  	"github.com/weaviate/sroar"
    21  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    22  	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
    23  	"github.com/weaviate/weaviate/entities/filters"
    24  )
    25  
    26  // RowReader reads one or many row(s) depending on the specified operator
    27  type RowReader struct {
    28  	value         []byte
    29  	bucket        *lsmkv.Bucket
    30  	operator      filters.Operator
    31  	keyOnly       bool
    32  	bitmapFactory *roaringset.BitmapFactory
    33  }
    34  
    35  // If keyOnly is set, the RowReader will request key-only cursors wherever
    36  // cursors are used, the specified value arguments in the ReadFn will always be
    37  // nil
    38  func NewRowReader(bucket *lsmkv.Bucket, value []byte, operator filters.Operator,
    39  	keyOnly bool, bitmapFactory *roaringset.BitmapFactory,
    40  ) *RowReader {
    41  	return &RowReader{
    42  		bucket:        bucket,
    43  		value:         value,
    44  		operator:      operator,
    45  		keyOnly:       keyOnly,
    46  		bitmapFactory: bitmapFactory,
    47  	}
    48  }
    49  
    50  // Read a row using the specified ReadFn. If RowReader was created with
    51  // keysOnly==true, the values argument in the readFn will always be nil on all
    52  // requests involving cursors
    53  func (rr *RowReader) Read(ctx context.Context, readFn ReadFn) error {
    54  	switch rr.operator {
    55  	case filters.OperatorEqual:
    56  		return rr.equal(ctx, readFn)
    57  	case filters.OperatorNotEqual:
    58  		return rr.notEqual(ctx, readFn)
    59  	case filters.OperatorGreaterThan:
    60  		return rr.greaterThan(ctx, readFn, false)
    61  	case filters.OperatorGreaterThanEqual:
    62  		return rr.greaterThan(ctx, readFn, true)
    63  	case filters.OperatorLessThan:
    64  		return rr.lessThan(ctx, readFn, false)
    65  	case filters.OperatorLessThanEqual:
    66  		return rr.lessThan(ctx, readFn, true)
    67  	case filters.OperatorLike:
    68  		return rr.like(ctx, readFn)
    69  	case filters.OperatorIsNull: // we need to fetch a row with a given value (there is only nil and !nil) and can reuse equal to get the correct row
    70  		return rr.equal(ctx, readFn)
    71  	default:
    72  		return fmt.Errorf("operator %v not supported", rr.operator)
    73  	}
    74  }
    75  
    76  // equal is a special case, as we don't need to iterate, but just read a single
    77  // row
    78  func (rr *RowReader) equal(ctx context.Context, readFn ReadFn) error {
    79  	v, err := rr.equalHelper(ctx)
    80  	if err != nil {
    81  		return err
    82  	}
    83  
    84  	_, err = readFn(rr.value, rr.transformToBitmap(v))
    85  	return err
    86  }
    87  
    88  func (rr *RowReader) notEqual(ctx context.Context, readFn ReadFn) error {
    89  	v, err := rr.equalHelper(ctx)
    90  	if err != nil {
    91  		return err
    92  	}
    93  
    94  	// Invert the Equal results for an efficient NotEqual
    95  	inverted := rr.bitmapFactory.GetBitmap()
    96  	inverted.AndNot(rr.transformToBitmap(v))
    97  	_, err = readFn(rr.value, inverted)
    98  	return err
    99  }
   100  
   101  // greaterThan reads from the specified value to the end. The first row is only
   102  // included if allowEqual==true, otherwise it starts with the next one
   103  func (rr *RowReader) greaterThan(ctx context.Context, readFn ReadFn,
   104  	allowEqual bool,
   105  ) error {
   106  	c := rr.newCursor()
   107  	defer c.Close()
   108  
   109  	for k, v := c.Seek(rr.value); k != nil; k, v = c.Next() {
   110  		if err := ctx.Err(); err != nil {
   111  			return err
   112  		}
   113  
   114  		if bytes.Equal(k, rr.value) && !allowEqual {
   115  			continue
   116  		}
   117  
   118  		continueReading, err := readFn(k, rr.transformToBitmap(v))
   119  		if err != nil {
   120  			return err
   121  		}
   122  
   123  		if !continueReading {
   124  			break
   125  		}
   126  	}
   127  
   128  	return nil
   129  }
   130  
   131  // lessThan reads from the very begging to the specified  value. The last
   132  // matching row is only included if allowEqual==true, otherwise it ends one
   133  // prior to that.
   134  func (rr *RowReader) lessThan(ctx context.Context, readFn ReadFn,
   135  	allowEqual bool,
   136  ) error {
   137  	c := rr.newCursor()
   138  	defer c.Close()
   139  
   140  	for k, v := c.First(); k != nil && bytes.Compare(k, rr.value) != 1; k, v = c.Next() {
   141  		if err := ctx.Err(); err != nil {
   142  			return err
   143  		}
   144  
   145  		if bytes.Equal(k, rr.value) && !allowEqual {
   146  			continue
   147  		}
   148  
   149  		continueReading, err := readFn(k, rr.transformToBitmap(v))
   150  		if err != nil {
   151  			return err
   152  		}
   153  
   154  		if !continueReading {
   155  			break
   156  		}
   157  	}
   158  
   159  	return nil
   160  }
   161  
   162  func (rr *RowReader) like(ctx context.Context, readFn ReadFn) error {
   163  	like, err := parseLikeRegexp(rr.value)
   164  	if err != nil {
   165  		return fmt.Errorf("parse like value: %w", err)
   166  	}
   167  
   168  	c := rr.newCursor()
   169  	defer c.Close()
   170  
   171  	var (
   172  		initialK []byte
   173  		initialV [][]byte
   174  	)
   175  
   176  	if like.optimizable {
   177  		initialK, initialV = c.Seek(like.min)
   178  	} else {
   179  		initialK, initialV = c.First()
   180  	}
   181  
   182  	for k, v := initialK, initialV; k != nil; k, v = c.Next() {
   183  		if err := ctx.Err(); err != nil {
   184  			return err
   185  		}
   186  
   187  		if like.optimizable {
   188  			// if the query is optimizable, i.e. it doesn't start with a wildcard, we
   189  			// can abort once we've moved past the point where the fixed characters
   190  			// no longer match
   191  			if len(k) < len(like.min) {
   192  				break
   193  			}
   194  
   195  			if bytes.Compare(like.min, k[:len(like.min)]) == -1 {
   196  				break
   197  			}
   198  		}
   199  
   200  		if !like.regexp.Match(k) {
   201  			continue
   202  		}
   203  
   204  		continueReading, err := readFn(k, rr.transformToBitmap(v))
   205  		if err != nil {
   206  			return err
   207  		}
   208  
   209  		if !continueReading {
   210  			break
   211  		}
   212  	}
   213  
   214  	return nil
   215  }
   216  
   217  // newCursor will either return a regular cursor - or a key-only cursor if
   218  // keyOnly==true
   219  func (rr *RowReader) newCursor() *lsmkv.CursorSet {
   220  	if rr.keyOnly {
   221  		return rr.bucket.SetCursorKeyOnly()
   222  	}
   223  
   224  	return rr.bucket.SetCursor()
   225  }
   226  
   227  func (rr *RowReader) transformToBitmap(ids [][]byte) *sroar.Bitmap {
   228  	out := sroar.NewBitmap()
   229  	for _, asBytes := range ids {
   230  		out.Set(binary.LittleEndian.Uint64(asBytes))
   231  	}
   232  	return out
   233  }
   234  
   235  // equalHelper exists, because the Equal and NotEqual operators share this functionality
   236  func (rr *RowReader) equalHelper(ctx context.Context) ([][]byte, error) {
   237  	if err := ctx.Err(); err != nil {
   238  		return nil, err
   239  	}
   240  
   241  	v, err := rr.bucket.SetList(rr.value)
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  	return v, nil
   246  }