github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segmentindex/disk_tree.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package segmentindex
    13  
    14  import (
    15  	"bytes"
    16  	"errors"
    17  	"fmt"
    18  	"io"
    19  
    20  	"github.com/weaviate/weaviate/entities/lsmkv"
    21  	"github.com/weaviate/weaviate/usecases/byteops"
    22  )
    23  
    24  // DiskTree is a read-only wrapper around a marshalled index search tree, which
    25  // can be used for reading, but cannot change the underlying structure. It is
    26  // thus perfectly suited as an index for an (immutable) LSM disk segment, but
    27  // pretty much useless for anything else
    28  type DiskTree struct {
    29  	data []byte
    30  }
    31  
    32  type dtNode struct {
    33  	key        []byte
    34  	startPos   uint64
    35  	endPos     uint64
    36  	leftChild  int64
    37  	rightChild int64
    38  }
    39  
    40  func NewDiskTree(data []byte) *DiskTree {
    41  	return &DiskTree{
    42  		data: data,
    43  	}
    44  }
    45  
    46  func (t *DiskTree) Get(key []byte) (Node, error) {
    47  	if len(t.data) == 0 {
    48  		return Node{}, lsmkv.NotFound
    49  	}
    50  	var out Node
    51  	rw := byteops.NewReadWriter(t.data)
    52  
    53  	// jump to the buffer until the node with _key_ is found or return a NotFound error.
    54  	// This function avoids allocations by reusing the same buffer for all keys and avoids memory reads by only
    55  	// extracting the necessary pieces of information while skipping the rest
    56  	NodeKeyBuffer := make([]byte, len(key))
    57  	for {
    58  		// detect if there is no node with the wanted key.
    59  		if rw.Position+4 > uint64(len(t.data)) || rw.Position+4 < 4 {
    60  			return out, lsmkv.NotFound
    61  		}
    62  
    63  		keyLen := rw.ReadUint32()
    64  		if int(keyLen) > len(NodeKeyBuffer) {
    65  			NodeKeyBuffer = make([]byte, int(keyLen))
    66  		} else if int(keyLen) < len(NodeKeyBuffer) {
    67  			NodeKeyBuffer = NodeKeyBuffer[:keyLen]
    68  		}
    69  		_, err := rw.CopyBytesFromBuffer(uint64(keyLen), NodeKeyBuffer)
    70  		if err != nil {
    71  			return out, fmt.Errorf("copy node key: %w", err)
    72  		}
    73  
    74  		keyEqual := bytes.Compare(key, NodeKeyBuffer)
    75  		if keyEqual == 0 {
    76  			out.Key = NodeKeyBuffer
    77  			out.Start = rw.ReadUint64()
    78  			out.End = rw.ReadUint64()
    79  			return out, nil
    80  		} else if keyEqual < 0 {
    81  			rw.MoveBufferPositionForward(2 * 8) // jump over start+end position
    82  			rw.Position = rw.ReadUint64()       // left child
    83  		} else {
    84  			rw.MoveBufferPositionForward(3 * 8) // jump over start+end position and left child
    85  			rw.Position = rw.ReadUint64()       // right child
    86  		}
    87  	}
    88  }
    89  
    90  func (t *DiskTree) readNodeAt(offset int64) (dtNode, error) {
    91  	retNode, _, err := t.readNode(t.data[offset:])
    92  	return retNode, err
    93  }
    94  
    95  func (t *DiskTree) readNode(in []byte) (dtNode, int, error) {
    96  	var out dtNode
    97  	// in buffer needs at least 36 bytes of data:
    98  	// 4bytes for key length, 32bytes for position and children
    99  	if len(in) < 36 {
   100  		return out, 0, io.EOF
   101  	}
   102  
   103  	rw := byteops.NewReadWriter(in)
   104  
   105  	keyLen := uint64(rw.ReadUint32())
   106  	copiedBytes, err := rw.CopyBytesFromBuffer(keyLen, nil)
   107  	if err != nil {
   108  		return out, int(rw.Position), fmt.Errorf("copy node key: %w", err)
   109  	}
   110  	out.key = copiedBytes
   111  
   112  	out.startPos = rw.ReadUint64()
   113  	out.endPos = rw.ReadUint64()
   114  	out.leftChild = int64(rw.ReadUint64())
   115  	out.rightChild = int64(rw.ReadUint64())
   116  	return out, int(rw.Position), nil
   117  }
   118  
   119  func (t *DiskTree) Seek(key []byte) (Node, error) {
   120  	if len(t.data) == 0 {
   121  		return Node{}, lsmkv.NotFound
   122  	}
   123  
   124  	return t.seekAt(0, key)
   125  }
   126  
   127  func (t *DiskTree) seekAt(offset int64, key []byte) (Node, error) {
   128  	node, err := t.readNodeAt(offset)
   129  	if err != nil {
   130  		return Node{}, err
   131  	}
   132  
   133  	self := Node{
   134  		Key:   node.key,
   135  		Start: node.startPos,
   136  		End:   node.endPos,
   137  	}
   138  
   139  	if bytes.Equal(key, node.key) {
   140  		return self, nil
   141  	}
   142  
   143  	if bytes.Compare(key, node.key) < 0 {
   144  		if node.leftChild < 0 {
   145  			return self, nil
   146  		}
   147  
   148  		left, err := t.seekAt(node.leftChild, key)
   149  		if err == nil {
   150  			return left, nil
   151  		}
   152  
   153  		if errors.Is(err, lsmkv.NotFound) {
   154  			return self, nil
   155  		}
   156  
   157  		return Node{}, err
   158  	} else {
   159  		if node.rightChild < 0 {
   160  			return Node{}, lsmkv.NotFound
   161  		}
   162  
   163  		return t.seekAt(node.rightChild, key)
   164  	}
   165  }
   166  
   167  // AllKeys is a relatively expensive operation as it basically does a full disk
   168  // read of the index. It is meant for one of operations, such as initializing a
   169  // segment where we need access to all keys, e.g. to build a bloom filter. This
   170  // should not run at query time.
   171  //
   172  // The binary tree is traversed in Level-Order so keys have no meaningful
   173  // order. Do not use this method if an In-Order traversal is required, but only
   174  // for use cases who don't require a specific order, such as building a
   175  // bloom filter.
   176  func (t *DiskTree) AllKeys() ([][]byte, error) {
   177  	var out [][]byte
   178  	bufferPos := 0
   179  	for {
   180  		node, readLength, err := t.readNode(t.data[bufferPos:])
   181  		bufferPos += readLength
   182  		if errors.Is(err, io.EOF) {
   183  			break
   184  		}
   185  		if err != nil {
   186  			return nil, err
   187  		}
   188  
   189  		out = append(out, node.key)
   190  	}
   191  
   192  	return out, nil
   193  }
   194  
   195  func (t *DiskTree) Size() int {
   196  	return len(t.data)
   197  }