github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/roaringset/cursor.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package roaringset
    13  
    14  import (
    15  	"bytes"
    16  	"errors"
    17  	"fmt"
    18  
    19  	"github.com/weaviate/sroar"
    20  	"github.com/weaviate/weaviate/entities/lsmkv"
    21  )
    22  
    23  type CombinedCursor struct {
    24  	cursors []InnerCursor
    25  	states  []innerCursorState
    26  	keyOnly bool
    27  }
    28  
    29  type InnerCursor interface {
    30  	First() ([]byte, BitmapLayer, error)
    31  	Next() ([]byte, BitmapLayer, error)
    32  	Seek(key []byte) ([]byte, BitmapLayer, error)
    33  }
    34  
    35  type innerCursorState struct {
    36  	key   []byte
    37  	layer BitmapLayer
    38  	err   error
    39  }
    40  
    41  // When keyOnly flag is set, only keys are returned by First/Next/Seek access methods,
    42  // 2nd value returned is expected to be nil
    43  // When keyOnly is not set, 2nd value is always bitmap. Returned bitmap can be empty (e.g. for Next call after last element was already returned)
    44  func NewCombinedCursor(innerCursors []InnerCursor, keyOnly bool) *CombinedCursor {
    45  	return &CombinedCursor{cursors: innerCursors, keyOnly: keyOnly}
    46  }
    47  
    48  func (c *CombinedCursor) First() ([]byte, *sroar.Bitmap) {
    49  	states := c.runAll(func(ic InnerCursor) ([]byte, BitmapLayer, error) {
    50  		return ic.First()
    51  	})
    52  	return c.getResultFromStates(states)
    53  }
    54  
    55  func (c *CombinedCursor) Next() ([]byte, *sroar.Bitmap) {
    56  	// fallback to First if no previous calls of First or Seek
    57  	if c.states == nil {
    58  		return c.First()
    59  	}
    60  	return c.getResultFromStates(c.states)
    61  }
    62  
    63  func (c *CombinedCursor) Seek(key []byte) ([]byte, *sroar.Bitmap) {
    64  	states := c.runAll(func(ic InnerCursor) ([]byte, BitmapLayer, error) {
    65  		return ic.Seek(key)
    66  	})
    67  	return c.getResultFromStates(states)
    68  }
    69  
    70  type cursorRun func(ic InnerCursor) ([]byte, BitmapLayer, error)
    71  
    72  func (c *CombinedCursor) runAll(cursorRun cursorRun) []innerCursorState {
    73  	states := make([]innerCursorState, len(c.cursors))
    74  	for id, ic := range c.cursors {
    75  		states[id] = c.createState(cursorRun(ic))
    76  	}
    77  	return states
    78  }
    79  
    80  func (c *CombinedCursor) createState(key []byte, layer BitmapLayer, err error) innerCursorState {
    81  	if errors.Is(err, lsmkv.NotFound) {
    82  		return innerCursorState{err: err}
    83  	}
    84  	if err != nil {
    85  		panic(fmt.Errorf("unexpected error: %w", err)) // TODO necessary?
    86  	}
    87  	state := innerCursorState{key: key}
    88  	state.layer = layer
    89  
    90  	return state
    91  }
    92  
    93  func (c *CombinedCursor) getResultFromStates(states []innerCursorState) ([]byte, *sroar.Bitmap) {
    94  	// NotFound is returned only by Seek call.
    95  	// If all cursors returned NotFound, combined Seek has no result, therefore inner cursors' states
    96  	// should not be updated to allow combined cursor to proceed with following Next calls
    97  
    98  	key, ids, allNotFound := c.getCursorIdsWithLowestKey(states)
    99  	if !allNotFound {
   100  		c.states = states
   101  	}
   102  	layers := BitmapLayers{}
   103  	for _, id := range ids {
   104  		layers = append(layers, c.states[id].layer)
   105  		// forward cursors used in final result
   106  		c.states[id] = c.createState(c.cursors[id].Next())
   107  	}
   108  
   109  	if key == nil && c.keyOnly {
   110  		return nil, nil
   111  	}
   112  
   113  	bm := layers.Flatten()
   114  	if key == nil {
   115  		return nil, bm
   116  	}
   117  
   118  	if bm.IsEmpty() {
   119  		// all values deleted, skip key
   120  		return c.Next()
   121  	}
   122  
   123  	// TODO remove keyOnly option, not used anyway
   124  	if !c.keyOnly {
   125  		return key, bm
   126  	}
   127  	return key, nil
   128  }
   129  
   130  func (c *CombinedCursor) getCursorIdsWithLowestKey(states []innerCursorState) ([]byte, []int, bool) {
   131  	var lowestKey []byte
   132  	ids := []int{}
   133  	allNotFound := true
   134  
   135  	for id, state := range states {
   136  		if errors.Is(state.err, lsmkv.NotFound) {
   137  			continue
   138  		}
   139  		allNotFound = false
   140  		if state.key == nil {
   141  			continue
   142  		}
   143  		if lowestKey == nil {
   144  			lowestKey = state.key
   145  			ids = []int{id}
   146  		} else if cmp := bytes.Compare(lowestKey, state.key); cmp > 0 {
   147  			lowestKey = state.key
   148  			ids = []int{id}
   149  		} else if cmp == 0 {
   150  			ids = append(ids, id)
   151  		}
   152  	}
   153  
   154  	return lowestKey, ids, allNotFound
   155  }