github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/cursor_bucket_replace.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"bytes"
    16  
    17  	"github.com/pkg/errors"
    18  	"github.com/weaviate/weaviate/entities/lsmkv"
    19  )
    20  
    21  type CursorReplace struct {
    22  	innerCursors []innerCursorReplace
    23  	state        []cursorStateReplace
    24  	unlock       func()
    25  	serveCache   cursorStateReplace
    26  
    27  	reusableIDList []int
    28  }
    29  
    30  type innerCursorReplace interface {
    31  	first() ([]byte, []byte, error)
    32  	next() ([]byte, []byte, error)
    33  	seek([]byte) ([]byte, []byte, error)
    34  }
    35  
    36  type cursorStateReplace struct {
    37  	key   []byte
    38  	value []byte
    39  	err   error
    40  }
    41  
    42  // Cursor holds a RLock for the flushing state. It needs to be closed using the
    43  // .Close() methods or otherwise the lock will never be released
    44  func (b *Bucket) Cursor() *CursorReplace {
    45  	b.flushLock.RLock()
    46  
    47  	if b.strategy != StrategyReplace {
    48  		panic("Cursor() called on strategy other than 'replace'")
    49  	}
    50  
    51  	innerCursors, unlockSegmentGroup := b.disk.newCursors()
    52  
    53  	// we have a flush-RLock, so we have the guarantee that the flushing state
    54  	// will not change for the lifetime of the cursor, thus there can only be two
    55  	// states: either a flushing memtable currently exists - or it doesn't
    56  	if b.flushing != nil {
    57  		innerCursors = append(innerCursors, b.flushing.newCursor())
    58  	}
    59  
    60  	innerCursors = append(innerCursors, b.active.newCursor())
    61  
    62  	return &CursorReplace{
    63  		// cursor are in order from oldest to newest, with the memtable cursor
    64  		// being at the very top
    65  		innerCursors: innerCursors,
    66  		unlock: func() {
    67  			unlockSegmentGroup()
    68  			b.flushLock.RUnlock()
    69  		},
    70  	}
    71  }
    72  
    73  func (c *CursorReplace) Close() {
    74  	c.unlock()
    75  }
    76  
    77  func (c *CursorReplace) seekAll(target []byte) {
    78  	state := make([]cursorStateReplace, len(c.innerCursors))
    79  	for i, cur := range c.innerCursors {
    80  		key, value, err := cur.seek(target)
    81  		if errors.Is(err, lsmkv.NotFound) {
    82  			state[i].err = err
    83  			continue
    84  		}
    85  
    86  		if errors.Is(err, lsmkv.Deleted) {
    87  			state[i].err = err
    88  			state[i].key = key
    89  			continue
    90  		}
    91  
    92  		if err != nil {
    93  			panic(errors.Wrap(err, "unexpected error in seek (cursor type 'replace')"))
    94  		}
    95  
    96  		state[i].key = key
    97  		state[i].value = value
    98  	}
    99  
   100  	c.state = state
   101  }
   102  
   103  func (c *CursorReplace) serveCurrentStateAndAdvance() ([]byte, []byte) {
   104  	id, err := c.cursorWithLowestKey()
   105  	if err != nil {
   106  		if errors.Is(err, lsmkv.NotFound) {
   107  			return nil, nil
   108  		}
   109  	}
   110  
   111  	// check if this is a duplicate key before checking for the remaining errors,
   112  	// as cases such as 'entities.Deleted' can be better handled inside
   113  	// mergeDuplicatesInCurrentStateAndAdvance where we can be sure to act on
   114  	// segments in the correct order
   115  	if ids, ok := c.haveDuplicatesInState(id); ok {
   116  		return c.mergeDuplicatesInCurrentStateAndAdvance(ids)
   117  	} else {
   118  		return c.mergeDuplicatesInCurrentStateAndAdvance([]int{id})
   119  	}
   120  }
   121  
   122  func (c *CursorReplace) haveDuplicatesInState(idWithLowestKey int) ([]int, bool) {
   123  	key := c.state[idWithLowestKey].key
   124  
   125  	c.reusableIDList = c.reusableIDList[:0]
   126  
   127  	for i, cur := range c.state {
   128  		if i == idWithLowestKey {
   129  			c.reusableIDList = append(c.reusableIDList, i)
   130  			continue
   131  		}
   132  
   133  		if bytes.Equal(key, cur.key) {
   134  			c.reusableIDList = append(c.reusableIDList, i)
   135  		}
   136  	}
   137  
   138  	return c.reusableIDList, len(c.reusableIDList) > 1
   139  }
   140  
   141  // if there are no duplicates present it will still work as returning the
   142  // latest result is the same as returning the only result
   143  func (c *CursorReplace) mergeDuplicatesInCurrentStateAndAdvance(ids []int) ([]byte, []byte) {
   144  	c.copyStateIntoServeCache(ids[len(ids)-1])
   145  
   146  	// with a replace strategy only the highest will be returned, but still all
   147  	// need to be advanced - or we would just encounter them again in the next
   148  	// round
   149  	for _, id := range ids {
   150  		c.advanceInner(id)
   151  	}
   152  
   153  	if errors.Is(c.serveCache.err, lsmkv.Deleted) {
   154  		// element was deleted, proceed with next round
   155  		return c.Next()
   156  	}
   157  
   158  	return c.serveCache.key, c.serveCache.value
   159  }
   160  
   161  func (c *CursorReplace) copyStateIntoServeCache(pos int) {
   162  	resMut := c.state[pos]
   163  	if len(resMut.key) > cap(c.serveCache.key) {
   164  		c.serveCache.key = make([]byte, len(resMut.key))
   165  	} else {
   166  		c.serveCache.key = c.serveCache.key[:len(resMut.key)]
   167  	}
   168  
   169  	if len(resMut.value) > cap(c.serveCache.value) {
   170  		c.serveCache.value = make([]byte, len(resMut.value))
   171  	} else {
   172  		c.serveCache.value = c.serveCache.value[:len(resMut.value)]
   173  	}
   174  
   175  	copy(c.serveCache.key, resMut.key)
   176  	copy(c.serveCache.value, resMut.value)
   177  	c.serveCache.err = resMut.err
   178  }
   179  
   180  func (c *CursorReplace) Seek(key []byte) ([]byte, []byte) {
   181  	c.seekAll(key)
   182  	return c.serveCurrentStateAndAdvance()
   183  }
   184  
   185  func (c *CursorReplace) cursorWithLowestKey() (int, error) {
   186  	err := lsmkv.NotFound
   187  	pos := -1
   188  	var lowest []byte
   189  
   190  	for i, res := range c.state {
   191  		if errors.Is(res.err, lsmkv.NotFound) {
   192  			continue
   193  		}
   194  
   195  		if lowest == nil || bytes.Compare(res.key, lowest) <= 0 {
   196  			pos = i
   197  			err = res.err
   198  			lowest = res.key
   199  		}
   200  	}
   201  
   202  	if err != nil {
   203  		return pos, err
   204  	}
   205  
   206  	return pos, nil
   207  }
   208  
   209  func (c *CursorReplace) advanceInner(id int) {
   210  	k, v, err := c.innerCursors[id].next()
   211  	if errors.Is(err, lsmkv.NotFound) {
   212  		c.state[id].err = err
   213  		c.state[id].key = nil
   214  		c.state[id].value = nil
   215  		return
   216  	}
   217  
   218  	if errors.Is(err, lsmkv.Deleted) {
   219  		c.state[id].err = err
   220  		c.state[id].key = k
   221  		c.state[id].value = nil
   222  		return
   223  	}
   224  
   225  	if err != nil {
   226  		panic(errors.Wrap(err, "unexpected error in advance"))
   227  	}
   228  
   229  	c.state[id].key = k
   230  	c.state[id].value = v
   231  	c.state[id].err = nil
   232  }
   233  
   234  func (c *CursorReplace) Next() ([]byte, []byte) {
   235  	return c.serveCurrentStateAndAdvance()
   236  }
   237  
   238  func (c *CursorReplace) firstAll() {
   239  	state := make([]cursorStateReplace, len(c.innerCursors))
   240  	for i, cur := range c.innerCursors {
   241  		key, value, err := cur.first()
   242  		if errors.Is(err, lsmkv.NotFound) {
   243  			state[i].err = err
   244  			continue
   245  		}
   246  		if errors.Is(err, lsmkv.Deleted) {
   247  			state[i].err = err
   248  			state[i].key = key
   249  			continue
   250  		}
   251  
   252  		if err != nil {
   253  			panic(errors.Wrap(err, "unexpected error in first (cursor type 'replace')"))
   254  		}
   255  
   256  		state[i].key = key
   257  		state[i].value = value
   258  	}
   259  
   260  	c.state = state
   261  }
   262  
   263  func (c *CursorReplace) First() ([]byte, []byte) {
   264  	c.firstAll()
   265  	return c.serveCurrentStateAndAdvance()
   266  }