github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/searcher_doc_bitmap.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import (
    15  	"context"
    16  	"fmt"
    17  
    18  	"github.com/weaviate/sroar"
    19  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    20  	"github.com/weaviate/weaviate/entities/filters"
    21  )
    22  
    23  func (s *Searcher) docBitmap(ctx context.Context, b *lsmkv.Bucket, limit int,
    24  	pv *propValuePair,
    25  ) (docBitmap, error) {
    26  	// geo props cannot be served by the inverted index and they require an
    27  	// external index. So, instead of trying to serve this chunk of the filter
    28  	// request internally, we can pass it to an external geo index
    29  	if pv.operator == filters.OperatorWithinGeoRange {
    30  		return s.docBitmapGeo(ctx, pv)
    31  	}
    32  	// all other operators perform operations on the inverted index which we
    33  	// can serve directly
    34  
    35  	if pv.hasFilterableIndex {
    36  		// bucket with strategy roaring set serves bitmaps directly
    37  		if b.Strategy() == lsmkv.StrategyRoaringSet {
    38  			return s.docBitmapInvertedRoaringSet(ctx, b, limit, pv)
    39  		}
    40  
    41  		// bucket with strategy set serves docIds used to build bitmap
    42  		return s.docBitmapInvertedSet(ctx, b, limit, pv)
    43  	}
    44  
    45  	if pv.hasSearchableIndex {
    46  		// bucket with strategy map serves docIds used to build bitmap
    47  		// and frequencies, which are ignored for filtering
    48  		return s.docBitmapInvertedMap(ctx, b, limit, pv)
    49  	}
    50  
    51  	return docBitmap{}, fmt.Errorf("property '%s' is neither filterable nor searchable", pv.prop)
    52  }
    53  
    54  func (s *Searcher) docBitmapInvertedRoaringSet(ctx context.Context, b *lsmkv.Bucket,
    55  	limit int, pv *propValuePair,
    56  ) (docBitmap, error) {
    57  	out := newUninitializedDocBitmap()
    58  	isEmpty := true
    59  	var readFn ReadFn = func(k []byte, docIDs *sroar.Bitmap) (bool, error) {
    60  		if isEmpty {
    61  			out.docIDs = docIDs
    62  			isEmpty = false
    63  		} else {
    64  			out.docIDs.Or(docIDs)
    65  		}
    66  
    67  		// NotEqual requires the full set of potentially existing doc ids
    68  		if pv.operator == filters.OperatorNotEqual {
    69  			return true, nil
    70  		}
    71  
    72  		if limit > 0 && out.docIDs.GetCardinality() >= limit {
    73  			return false, nil
    74  		}
    75  		return true, nil
    76  	}
    77  
    78  	rr := NewRowReaderRoaringSet(b, pv.value, pv.operator, false, s.bitmapFactory)
    79  	if err := rr.Read(ctx, readFn); err != nil {
    80  		return out, fmt.Errorf("read row: %w", err)
    81  	}
    82  
    83  	if isEmpty {
    84  		return newDocBitmap(), nil
    85  	}
    86  	return out, nil
    87  }
    88  
    89  func (s *Searcher) docBitmapInvertedSet(ctx context.Context, b *lsmkv.Bucket,
    90  	limit int, pv *propValuePair,
    91  ) (docBitmap, error) {
    92  	out := newUninitializedDocBitmap()
    93  	isEmpty := true
    94  	var readFn ReadFn = func(k []byte, ids *sroar.Bitmap) (bool, error) {
    95  		if isEmpty {
    96  			out.docIDs = ids
    97  			isEmpty = false
    98  		} else {
    99  			out.docIDs.Or(ids)
   100  		}
   101  
   102  		// NotEqual requires the full set of potentially existing doc ids
   103  		if pv.operator == filters.OperatorNotEqual {
   104  			return true, nil
   105  		}
   106  
   107  		if limit > 0 && out.docIDs.GetCardinality() >= limit {
   108  			return false, nil
   109  		}
   110  		return true, nil
   111  	}
   112  
   113  	rr := NewRowReader(b, pv.value, pv.operator, false, s.bitmapFactory)
   114  	if err := rr.Read(ctx, readFn); err != nil {
   115  		return out, fmt.Errorf("read row: %w", err)
   116  	}
   117  
   118  	if isEmpty {
   119  		return newDocBitmap(), nil
   120  	}
   121  	return out, nil
   122  }
   123  
   124  func (s *Searcher) docBitmapInvertedMap(ctx context.Context, b *lsmkv.Bucket,
   125  	limit int, pv *propValuePair,
   126  ) (docBitmap, error) {
   127  	out := newUninitializedDocBitmap()
   128  	isEmpty := true
   129  	var readFn ReadFn = func(k []byte, ids *sroar.Bitmap) (bool, error) {
   130  		if isEmpty {
   131  			out.docIDs = ids
   132  			isEmpty = false
   133  		} else {
   134  			out.docIDs.Or(ids)
   135  		}
   136  
   137  		// NotEqual requires the full set of potentially existing doc ids
   138  		if pv.operator == filters.OperatorNotEqual {
   139  			return true, nil
   140  		}
   141  
   142  		if limit > 0 && out.docIDs.GetCardinality() >= limit {
   143  			return false, nil
   144  		}
   145  		return true, nil
   146  	}
   147  
   148  	rr := NewRowReaderFrequency(b, pv.value, pv.operator, false, s.shardVersion, s.bitmapFactory)
   149  	if err := rr.Read(ctx, readFn); err != nil {
   150  		return out, fmt.Errorf("read row: %w", err)
   151  	}
   152  
   153  	if isEmpty {
   154  		return newDocBitmap(), nil
   155  	}
   156  	return out, nil
   157  }
   158  
   159  func (s *Searcher) docBitmapGeo(ctx context.Context, pv *propValuePair) (docBitmap, error) {
   160  	out := newDocBitmap()
   161  	propIndex, ok := s.propIndices.ByProp(pv.prop)
   162  
   163  	if !ok {
   164  		return out, nil
   165  	}
   166  
   167  	res, err := propIndex.GeoIndex.WithinRange(ctx, *pv.valueGeoRange)
   168  	if err != nil {
   169  		return out, fmt.Errorf("geo index range search on prop %q: %w", pv.prop, err)
   170  	}
   171  
   172  	out.docIDs.SetMany(res)
   173  	return out, nil
   174  }