github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/cursor_bucket_map.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"bytes"
    16  	"errors"
    17  	"fmt"
    18  	"sort"
    19  
    20  	"github.com/weaviate/weaviate/entities/lsmkv"
    21  )
    22  
    23  type CursorMap struct {
    24  	innerCursors []innerCursorMap
    25  	state        []cursorStateMap
    26  	unlock       func()
    27  	listCfg      MapListOptionConfig
    28  	keyOnly      bool
    29  }
    30  
    31  type cursorStateMap struct {
    32  	key   []byte
    33  	value []MapPair
    34  	err   error
    35  }
    36  
    37  type innerCursorMap interface {
    38  	first() ([]byte, []MapPair, error)
    39  	next() ([]byte, []MapPair, error)
    40  	seek([]byte) ([]byte, []MapPair, error)
    41  }
    42  
    43  func (b *Bucket) MapCursor(cfgs ...MapListOption) *CursorMap {
    44  	b.flushLock.RLock()
    45  
    46  	c := MapListOptionConfig{}
    47  	for _, cfg := range cfgs {
    48  		cfg(&c)
    49  	}
    50  
    51  	innerCursors, unlockSegmentGroup := b.disk.newMapCursors()
    52  
    53  	// we have a flush-RLock, so we have the guarantee that the flushing state
    54  	// will not change for the lifetime of the cursor, thus there can only be two
    55  	// states: either a flushing memtable currently exists - or it doesn't
    56  	if b.flushing != nil {
    57  		innerCursors = append(innerCursors, b.flushing.newMapCursor())
    58  	}
    59  
    60  	innerCursors = append(innerCursors, b.active.newMapCursor())
    61  
    62  	return &CursorMap{
    63  		unlock: func() {
    64  			unlockSegmentGroup()
    65  			b.flushLock.RUnlock()
    66  		},
    67  		// cursor are in order from oldest to newest, with the memtable cursor
    68  		// being at the very top
    69  		innerCursors: innerCursors,
    70  		listCfg:      c,
    71  	}
    72  }
    73  
    74  func (b *Bucket) MapCursorKeyOnly(cfgs ...MapListOption) *CursorMap {
    75  	c := b.MapCursor(cfgs...)
    76  	c.keyOnly = true
    77  	return c
    78  }
    79  
    80  func (c *CursorMap) Seek(key []byte) ([]byte, []MapPair) {
    81  	c.seekAll(key)
    82  	return c.serveCurrentStateAndAdvance()
    83  }
    84  
    85  func (c *CursorMap) Next() ([]byte, []MapPair) {
    86  	// before := time.Now()
    87  	// defer func() {
    88  	// 	fmt.Printf("-- total next took %s\n", time.Since(before))
    89  	// }()
    90  	return c.serveCurrentStateAndAdvance()
    91  }
    92  
    93  func (c *CursorMap) First() ([]byte, []MapPair) {
    94  	c.firstAll()
    95  	return c.serveCurrentStateAndAdvance()
    96  }
    97  
    98  func (c *CursorMap) Close() {
    99  	c.unlock()
   100  }
   101  
   102  func (c *CursorMap) seekAll(target []byte) {
   103  	state := make([]cursorStateMap, len(c.innerCursors))
   104  	for i, cur := range c.innerCursors {
   105  		key, value, err := cur.seek(target)
   106  		if errors.Is(err, lsmkv.NotFound) {
   107  			state[i].err = err
   108  			continue
   109  		}
   110  
   111  		if err != nil {
   112  			panic(fmt.Errorf("unexpected error in seek: %w", err))
   113  		}
   114  
   115  		state[i].key = key
   116  		if !c.keyOnly {
   117  			state[i].value = value
   118  		}
   119  	}
   120  
   121  	c.state = state
   122  }
   123  
   124  func (c *CursorMap) firstAll() {
   125  	state := make([]cursorStateMap, len(c.innerCursors))
   126  	for i, cur := range c.innerCursors {
   127  		key, value, err := cur.first()
   128  		if errors.Is(err, lsmkv.NotFound) {
   129  			state[i].err = err
   130  			continue
   131  		}
   132  
   133  		if err != nil {
   134  			panic(fmt.Errorf("unexpected error in seek: %w", err))
   135  		}
   136  
   137  		state[i].key = key
   138  		if !c.keyOnly {
   139  			state[i].value = value
   140  		}
   141  	}
   142  
   143  	c.state = state
   144  }
   145  
   146  func (c *CursorMap) serveCurrentStateAndAdvance() ([]byte, []MapPair) {
   147  	id, err := c.cursorWithLowestKey()
   148  	if err != nil {
   149  		if errors.Is(err, lsmkv.NotFound) {
   150  			return nil, nil
   151  		}
   152  	}
   153  
   154  	// check if this is a duplicate key before checking for the remaining errors,
   155  	// as cases such as 'entities.Deleted' can be better handled inside
   156  	// mergeDuplicatesInCurrentStateAndAdvance where we can be sure to act on
   157  	// segments in the correct order
   158  	if ids, ok := c.haveDuplicatesInState(id); ok {
   159  		return c.mergeDuplicatesInCurrentStateAndAdvance(ids)
   160  	} else {
   161  		return c.mergeDuplicatesInCurrentStateAndAdvance([]int{id})
   162  	}
   163  }
   164  
   165  func (c *CursorMap) cursorWithLowestKey() (int, error) {
   166  	err := lsmkv.NotFound
   167  	pos := -1
   168  	var lowest []byte
   169  
   170  	for i, res := range c.state {
   171  		if errors.Is(res.err, lsmkv.NotFound) {
   172  			continue
   173  		}
   174  
   175  		if lowest == nil || bytes.Compare(res.key, lowest) <= 0 {
   176  			pos = i
   177  			err = res.err
   178  			lowest = res.key
   179  		}
   180  	}
   181  
   182  	if err != nil {
   183  		return pos, err
   184  	}
   185  
   186  	return pos, nil
   187  }
   188  
   189  func (c *CursorMap) haveDuplicatesInState(idWithLowestKey int) ([]int, bool) {
   190  	key := c.state[idWithLowestKey].key
   191  
   192  	var idsFound []int
   193  
   194  	for i, cur := range c.state {
   195  		if i == idWithLowestKey {
   196  			idsFound = append(idsFound, i)
   197  			continue
   198  		}
   199  
   200  		if bytes.Equal(key, cur.key) {
   201  			idsFound = append(idsFound, i)
   202  		}
   203  	}
   204  
   205  	return idsFound, len(idsFound) > 1
   206  }
   207  
   208  // if there are no duplicates present it will still work as returning the
   209  // latest result is the same as returning the only result
   210  func (c *CursorMap) mergeDuplicatesInCurrentStateAndAdvance(ids []int) ([]byte, []MapPair) {
   211  	// take the key from any of the results, we have the guarantee that they're
   212  	// all the same
   213  	key := c.state[ids[0]].key
   214  
   215  	// appending := time.Duration(0)
   216  	// advancing := time.Duration(0)
   217  
   218  	var perSegmentResults [][]MapPair
   219  
   220  	for _, id := range ids {
   221  		candidates := c.state[id].value
   222  		perSegmentResults = append(perSegmentResults, candidates)
   223  
   224  		// before = time.Now()
   225  		c.advanceInner(id)
   226  		// advancing += time.Since(before)
   227  	}
   228  	// fmt.Printf("--- extract values [appending] took %s\n", appending)
   229  	// fmt.Printf("--- extract values [advancing] took %s\n", advancing)
   230  
   231  	if c.listCfg.legacyRequireManualSorting {
   232  		for i := range perSegmentResults {
   233  			sort.Slice(perSegmentResults[i], func(a, b int) bool {
   234  				return bytes.Compare(perSegmentResults[i][a].Key,
   235  					perSegmentResults[i][b].Key) == -1
   236  			})
   237  		}
   238  	}
   239  
   240  	merged, err := newSortedMapMerger().do(perSegmentResults)
   241  	if err != nil {
   242  		panic(fmt.Errorf("unexpected error decoding map values: %w", err))
   243  	}
   244  	if len(merged) == 0 {
   245  		// all values deleted, skip key
   246  		return c.Next()
   247  	}
   248  
   249  	// TODO remove keyOnly option, not used anyway
   250  	if !c.keyOnly {
   251  		return key, merged
   252  	} else {
   253  		return key, nil
   254  	}
   255  }
   256  
   257  func (c *CursorMap) advanceInner(id int) {
   258  	k, v, err := c.innerCursors[id].next()
   259  	if errors.Is(err, lsmkv.NotFound) {
   260  		c.state[id].err = err
   261  		c.state[id].key = nil
   262  		c.state[id].value = nil
   263  		return
   264  	}
   265  
   266  	if errors.Is(err, lsmkv.Deleted) {
   267  		c.state[id].err = err
   268  		c.state[id].key = k
   269  		c.state[id].value = nil
   270  		return
   271  	}
   272  
   273  	if err != nil {
   274  		panic(fmt.Errorf("unexpected error in advance: %w", err))
   275  	}
   276  
   277  	c.state[id].key = k
   278  	if !c.keyOnly {
   279  		c.state[id].value = v
   280  	}
   281  	c.state[id].err = nil
   282  }