github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segmentindex/disk_tree.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package segmentindex 13 14 import ( 15 "bytes" 16 "errors" 17 "fmt" 18 "io" 19 20 "github.com/weaviate/weaviate/entities/lsmkv" 21 "github.com/weaviate/weaviate/usecases/byteops" 22 ) 23 24 // DiskTree is a read-only wrapper around a marshalled index search tree, which 25 // can be used for reading, but cannot change the underlying structure. It is 26 // thus perfectly suited as an index for an (immutable) LSM disk segment, but 27 // pretty much useless for anything else 28 type DiskTree struct { 29 data []byte 30 } 31 32 type dtNode struct { 33 key []byte 34 startPos uint64 35 endPos uint64 36 leftChild int64 37 rightChild int64 38 } 39 40 func NewDiskTree(data []byte) *DiskTree { 41 return &DiskTree{ 42 data: data, 43 } 44 } 45 46 func (t *DiskTree) Get(key []byte) (Node, error) { 47 if len(t.data) == 0 { 48 return Node{}, lsmkv.NotFound 49 } 50 var out Node 51 rw := byteops.NewReadWriter(t.data) 52 53 // jump to the buffer until the node with _key_ is found or return a NotFound error. 54 // This function avoids allocations by reusing the same buffer for all keys and avoids memory reads by only 55 // extracting the necessary pieces of information while skipping the rest 56 NodeKeyBuffer := make([]byte, len(key)) 57 for { 58 // detect if there is no node with the wanted key. 59 if rw.Position+4 > uint64(len(t.data)) || rw.Position+4 < 4 { 60 return out, lsmkv.NotFound 61 } 62 63 keyLen := rw.ReadUint32() 64 if int(keyLen) > len(NodeKeyBuffer) { 65 NodeKeyBuffer = make([]byte, int(keyLen)) 66 } else if int(keyLen) < len(NodeKeyBuffer) { 67 NodeKeyBuffer = NodeKeyBuffer[:keyLen] 68 } 69 _, err := rw.CopyBytesFromBuffer(uint64(keyLen), NodeKeyBuffer) 70 if err != nil { 71 return out, fmt.Errorf("copy node key: %w", err) 72 } 73 74 keyEqual := bytes.Compare(key, NodeKeyBuffer) 75 if keyEqual == 0 { 76 out.Key = NodeKeyBuffer 77 out.Start = rw.ReadUint64() 78 out.End = rw.ReadUint64() 79 return out, nil 80 } else if keyEqual < 0 { 81 rw.MoveBufferPositionForward(2 * 8) // jump over start+end position 82 rw.Position = rw.ReadUint64() // left child 83 } else { 84 rw.MoveBufferPositionForward(3 * 8) // jump over start+end position and left child 85 rw.Position = rw.ReadUint64() // right child 86 } 87 } 88 } 89 90 func (t *DiskTree) readNodeAt(offset int64) (dtNode, error) { 91 retNode, _, err := t.readNode(t.data[offset:]) 92 return retNode, err 93 } 94 95 func (t *DiskTree) readNode(in []byte) (dtNode, int, error) { 96 var out dtNode 97 // in buffer needs at least 36 bytes of data: 98 // 4bytes for key length, 32bytes for position and children 99 if len(in) < 36 { 100 return out, 0, io.EOF 101 } 102 103 rw := byteops.NewReadWriter(in) 104 105 keyLen := uint64(rw.ReadUint32()) 106 copiedBytes, err := rw.CopyBytesFromBuffer(keyLen, nil) 107 if err != nil { 108 return out, int(rw.Position), fmt.Errorf("copy node key: %w", err) 109 } 110 out.key = copiedBytes 111 112 out.startPos = rw.ReadUint64() 113 out.endPos = rw.ReadUint64() 114 out.leftChild = int64(rw.ReadUint64()) 115 out.rightChild = int64(rw.ReadUint64()) 116 return out, int(rw.Position), nil 117 } 118 119 func (t *DiskTree) Seek(key []byte) (Node, error) { 120 if len(t.data) == 0 { 121 return Node{}, lsmkv.NotFound 122 } 123 124 return t.seekAt(0, key) 125 } 126 127 func (t *DiskTree) seekAt(offset int64, key []byte) (Node, error) { 128 node, err := t.readNodeAt(offset) 129 if err != nil { 130 return Node{}, err 131 } 132 133 self := Node{ 134 Key: node.key, 135 Start: node.startPos, 136 End: node.endPos, 137 } 138 139 if bytes.Equal(key, node.key) { 140 return self, nil 141 } 142 143 if bytes.Compare(key, node.key) < 0 { 144 if node.leftChild < 0 { 145 return self, nil 146 } 147 148 left, err := t.seekAt(node.leftChild, key) 149 if err == nil { 150 return left, nil 151 } 152 153 if errors.Is(err, lsmkv.NotFound) { 154 return self, nil 155 } 156 157 return Node{}, err 158 } else { 159 if node.rightChild < 0 { 160 return Node{}, lsmkv.NotFound 161 } 162 163 return t.seekAt(node.rightChild, key) 164 } 165 } 166 167 // AllKeys is a relatively expensive operation as it basically does a full disk 168 // read of the index. It is meant for one of operations, such as initializing a 169 // segment where we need access to all keys, e.g. to build a bloom filter. This 170 // should not run at query time. 171 // 172 // The binary tree is traversed in Level-Order so keys have no meaningful 173 // order. Do not use this method if an In-Order traversal is required, but only 174 // for use cases who don't require a specific order, such as building a 175 // bloom filter. 176 func (t *DiskTree) AllKeys() ([][]byte, error) { 177 var out [][]byte 178 bufferPos := 0 179 for { 180 node, readLength, err := t.readNode(t.data[bufferPos:]) 181 bufferPos += readLength 182 if errors.Is(err, io.EOF) { 183 break 184 } 185 if err != nil { 186 return nil, err 187 } 188 189 out = append(out, node.key) 190 } 191 192 return out, nil 193 } 194 195 func (t *DiskTree) Size() int { 196 return len(t.data) 197 }