github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/cursor_bucket_set.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"bytes"
    16  	"errors"
    17  	"fmt"
    18  
    19  	"github.com/weaviate/weaviate/entities/lsmkv"
    20  )
    21  
    22  type CursorSet struct {
    23  	innerCursors []innerCursorCollection
    24  	state        []cursorStateCollection
    25  	unlock       func()
    26  	keyOnly      bool
    27  }
    28  
    29  type innerCursorCollection interface {
    30  	first() ([]byte, []value, error)
    31  	next() ([]byte, []value, error)
    32  	seek([]byte) ([]byte, []value, error)
    33  }
    34  
    35  type cursorStateCollection struct {
    36  	key   []byte
    37  	value []value
    38  	err   error
    39  }
    40  
    41  // SetCursor holds a RLock for the flushing state. It needs to be closed using the
    42  // .Close() methods or otherwise the lock will never be released
    43  func (b *Bucket) SetCursor() *CursorSet {
    44  	b.flushLock.RLock()
    45  
    46  	if b.strategy != StrategySetCollection {
    47  		panic("SetCursor() called on strategy other than 'set'")
    48  	}
    49  
    50  	innerCursors, unlockSegmentGroup := b.disk.newCollectionCursors()
    51  
    52  	// we have a flush-RLock, so we have the guarantee that the flushing state
    53  	// will not change for the lifetime of the cursor, thus there can only be two
    54  	// states: either a flushing memtable currently exists - or it doesn't
    55  	if b.flushing != nil {
    56  		innerCursors = append(innerCursors, b.flushing.newCollectionCursor())
    57  	}
    58  
    59  	innerCursors = append(innerCursors, b.active.newCollectionCursor())
    60  
    61  	return &CursorSet{
    62  		unlock: func() {
    63  			unlockSegmentGroup()
    64  			b.flushLock.RUnlock()
    65  		},
    66  		// cursor are in order from oldest to newest, with the memtable cursor
    67  		// being at the very top
    68  		innerCursors: innerCursors,
    69  	}
    70  }
    71  
    72  // SetCursorKeyOnly returns nil for all values. It has no control over the
    73  // underlying "inner" cursors which may still retrieve a value which is then
    74  // discarded. It does however, omit any handling of values, such as decoding,
    75  // making this considerably more efficient if only keys are required.
    76  //
    77  // The same locking rules as for SetCursor apply.
    78  func (b *Bucket) SetCursorKeyOnly() *CursorSet {
    79  	c := b.SetCursor()
    80  	c.keyOnly = true
    81  	return c
    82  }
    83  
    84  func (c *CursorSet) Seek(key []byte) ([]byte, [][]byte) {
    85  	c.seekAll(key)
    86  	return c.serveCurrentStateAndAdvance()
    87  }
    88  
    89  func (c *CursorSet) Next() ([]byte, [][]byte) {
    90  	return c.serveCurrentStateAndAdvance()
    91  }
    92  
    93  func (c *CursorSet) First() ([]byte, [][]byte) {
    94  	c.firstAll()
    95  	return c.serveCurrentStateAndAdvance()
    96  }
    97  
    98  func (c *CursorSet) Close() {
    99  	c.unlock()
   100  }
   101  
   102  func (c *CursorSet) seekAll(target []byte) {
   103  	state := make([]cursorStateCollection, len(c.innerCursors))
   104  	for i, cur := range c.innerCursors {
   105  		key, value, err := cur.seek(target)
   106  		if errors.Is(err, lsmkv.NotFound) {
   107  			state[i].err = err
   108  			continue
   109  		}
   110  
   111  		if err != nil {
   112  			panic(fmt.Errorf("unexpected error in seek: %w", err))
   113  		}
   114  
   115  		state[i].key = key
   116  		if !c.keyOnly {
   117  			state[i].value = value
   118  		}
   119  	}
   120  
   121  	c.state = state
   122  }
   123  
   124  func (c *CursorSet) firstAll() {
   125  	state := make([]cursorStateCollection, len(c.innerCursors))
   126  	for i, cur := range c.innerCursors {
   127  		key, value, err := cur.first()
   128  		if errors.Is(err, lsmkv.NotFound) {
   129  			state[i].err = err
   130  			continue
   131  		}
   132  
   133  		if err != nil {
   134  			panic(fmt.Errorf("unexpected error in seek: %w", err))
   135  		}
   136  
   137  		state[i].key = key
   138  		if !c.keyOnly {
   139  			state[i].value = value
   140  		}
   141  	}
   142  
   143  	c.state = state
   144  }
   145  
   146  func (c *CursorSet) serveCurrentStateAndAdvance() ([]byte, [][]byte) {
   147  	id, err := c.cursorWithLowestKey()
   148  	if err != nil {
   149  		if errors.Is(err, lsmkv.NotFound) {
   150  			return nil, nil
   151  		}
   152  	}
   153  
   154  	// check if this is a duplicate key before checking for the remaining errors,
   155  	// as cases such as 'entities.Deleted' can be better handled inside
   156  	// mergeDuplicatesInCurrentStateAndAdvance where we can be sure to act on
   157  	// segments in the correct order
   158  	if ids, ok := c.haveDuplicatesInState(id); ok {
   159  		return c.mergeDuplicatesInCurrentStateAndAdvance(ids)
   160  	} else {
   161  		return c.mergeDuplicatesInCurrentStateAndAdvance([]int{id})
   162  	}
   163  }
   164  
   165  func (c *CursorSet) cursorWithLowestKey() (int, error) {
   166  	err := lsmkv.NotFound
   167  	pos := -1
   168  	var lowest []byte
   169  
   170  	for i, res := range c.state {
   171  		if errors.Is(res.err, lsmkv.NotFound) {
   172  			continue
   173  		}
   174  
   175  		if lowest == nil || bytes.Compare(res.key, lowest) <= 0 {
   176  			pos = i
   177  			err = res.err
   178  			lowest = res.key
   179  		}
   180  	}
   181  
   182  	if err != nil {
   183  		return pos, err
   184  	}
   185  
   186  	return pos, nil
   187  }
   188  
   189  func (c *CursorSet) haveDuplicatesInState(idWithLowestKey int) ([]int, bool) {
   190  	key := c.state[idWithLowestKey].key
   191  
   192  	var idsFound []int
   193  
   194  	for i, cur := range c.state {
   195  		if i == idWithLowestKey {
   196  			idsFound = append(idsFound, i)
   197  			continue
   198  		}
   199  
   200  		if bytes.Equal(key, cur.key) {
   201  			idsFound = append(idsFound, i)
   202  		}
   203  	}
   204  
   205  	return idsFound, len(idsFound) > 1
   206  }
   207  
   208  // if there are no duplicates present it will still work as returning the
   209  // latest result is the same as returning the only result
   210  func (c *CursorSet) mergeDuplicatesInCurrentStateAndAdvance(ids []int) ([]byte, [][]byte) {
   211  	// take the key from any of the results, we have the guarantee that they're
   212  	// all the same
   213  	key := c.state[ids[0]].key
   214  
   215  	var raw []value
   216  	for _, id := range ids {
   217  		raw = append(raw, c.state[id].value...)
   218  		c.advanceInner(id)
   219  	}
   220  
   221  	values := newSetDecoder().Do(raw)
   222  	if len(values) == 0 {
   223  		// all values deleted, skip key
   224  		return c.Next()
   225  	}
   226  
   227  	// TODO remove keyOnly option, not used anyway
   228  	if !c.keyOnly {
   229  		return key, values
   230  	} else {
   231  		return key, nil
   232  	}
   233  }
   234  
   235  func (c *CursorSet) advanceInner(id int) {
   236  	k, v, err := c.innerCursors[id].next()
   237  	if errors.Is(err, lsmkv.NotFound) {
   238  		c.state[id].err = err
   239  		c.state[id].key = nil
   240  		if !c.keyOnly {
   241  			c.state[id].value = nil
   242  		}
   243  		return
   244  	}
   245  
   246  	if errors.Is(err, lsmkv.Deleted) {
   247  		c.state[id].err = err
   248  		c.state[id].key = k
   249  		c.state[id].value = nil
   250  		return
   251  	}
   252  
   253  	if err != nil {
   254  		panic(fmt.Errorf("unexpected error in advance: %w", err))
   255  	}
   256  
   257  	c.state[id].key = k
   258  	if !c.keyOnly {
   259  		c.state[id].value = v
   260  	}
   261  	c.state[id].err = nil
   262  }