github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/node.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tree 16 17 import ( 18 "context" 19 "encoding/hex" 20 "fmt" 21 "io" 22 23 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 24 25 "github.com/dolthub/dolt/go/gen/fb/serial" 26 "github.com/dolthub/dolt/go/store/hash" 27 "github.com/dolthub/dolt/go/store/prolly/message" 28 "github.com/dolthub/dolt/go/store/types" 29 "github.com/dolthub/dolt/go/store/val" 30 ) 31 32 type Item []byte 33 34 type subtreeCounts []uint64 35 36 // Node is a generic implementation of a prolly tree node. 37 // Elements in a Node are generic Items. Interpreting Item 38 // contents is deferred to higher layers (see prolly.Map). 39 type Node struct { 40 // keys and values cache offset metadata 41 // to accelerate Item lookups into msg. 42 keys, values message.ItemAccess 43 44 // count is the Item pair count. 45 count uint16 46 47 // level is 0-indexed tree height. 48 level uint16 49 50 // subtrees contains the key cardinality 51 // of each child tree of a non-leaf Node. 52 // this field is lazily decoded from msg 53 // because it requires a malloc. 54 subtrees *subtreeCounts 55 56 // msg is the underlying buffer for the Node 57 // encoded as a Flatbuffers message. 58 msg serial.Message 59 } 60 61 type AddressCb func(ctx context.Context, addr hash.Hash) error 62 63 func WalkAddresses(ctx context.Context, nd Node, ns NodeStore, cb AddressCb) error { 64 return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error { 65 if err := cb(ctx, addr); err != nil { 66 return err 67 } 68 69 if nd.IsLeaf() { 70 return nil 71 } 72 73 child, err := ns.Read(ctx, addr) 74 if err != nil { 75 return err 76 } 77 78 return WalkAddresses(ctx, child, ns, cb) 79 }) 80 } 81 82 type NodeCb func(ctx context.Context, nd Node) error 83 84 // WalkNodes runs a callback function on every node found in the DFS of |nd| 85 // that is of the same message type as |nd|. 86 func WalkNodes(ctx context.Context, nd Node, ns NodeStore, cb NodeCb) error { 87 if err := cb(ctx, nd); err != nil { 88 return err 89 } 90 if nd.IsLeaf() { 91 return nil 92 } 93 94 return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error { 95 child, err := ns.Read(ctx, addr) 96 if err != nil { 97 return err 98 } 99 return WalkNodes(ctx, child, ns, cb) 100 }) 101 } 102 103 // walkOpaqueNodes runs a callback function on every node found in the DFS of |nd| 104 // including nested trees. 105 func walkOpaqueNodes(ctx context.Context, nd Node, ns NodeStore, cb NodeCb) error { 106 if err := cb(ctx, nd); err != nil { 107 return err 108 } 109 110 return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error { 111 child, err := ns.Read(ctx, addr) 112 if err != nil { 113 return err 114 } 115 return WalkNodes(ctx, child, ns, cb) 116 }) 117 } 118 119 func NodeFromBytes(msg []byte) (Node, error) { 120 keys, values, level, count, err := message.UnpackFields(msg) 121 return Node{ 122 keys: keys, 123 values: values, 124 count: count, 125 level: level, 126 msg: msg, 127 }, err 128 } 129 130 func (nd Node) HashOf() hash.Hash { 131 return hash.Of(nd.bytes()) 132 } 133 134 func (nd Node) Count() int { 135 return int(nd.count) 136 } 137 138 func (nd Node) TreeCount() (int, error) { 139 return message.GetTreeCount(nd.msg) 140 } 141 142 func (nd Node) Size() int { 143 return len(nd.bytes()) 144 } 145 146 // Level returns the tree Level for this node 147 func (nd Node) Level() int { 148 return int(nd.level) 149 } 150 151 // IsLeaf returns whether this node is a leaf 152 func (nd Node) IsLeaf() bool { 153 return nd.level == 0 154 } 155 156 // GetKey returns the |ith| key of this node 157 func (nd Node) GetKey(i int) Item { 158 return nd.keys.GetItem(i, nd.msg) 159 } 160 161 // GetValue returns the |ith| value of this node. 162 func (nd Node) GetValue(i int) Item { 163 return nd.values.GetItem(i, nd.msg) 164 } 165 166 func (nd Node) loadSubtrees() (Node, error) { 167 var err error 168 if nd.subtrees == nil { 169 // deserializing subtree counts requires a malloc, 170 // we don't load them unless explicitly requested 171 sc, err := message.GetSubtrees(nd.msg) 172 if err != nil { 173 return Node{}, err 174 } 175 nd.subtrees = (*subtreeCounts)(&sc) 176 } 177 return nd, err 178 } 179 180 func (nd Node) getSubtreeCount(i int) (uint64, error) { 181 if nd.IsLeaf() { 182 return 1, nil 183 } 184 // this will panic unless subtrees were loaded. 185 return (*nd.subtrees)[i], nil 186 } 187 188 // getAddress returns the |ith| address of this node. 189 // This method assumes values are 20-byte address hashes. 190 func (nd Node) getAddress(i int) hash.Hash { 191 return hash.New(nd.GetValue(i)) 192 } 193 194 func (nd Node) empty() bool { 195 return nd.bytes() == nil || nd.count == 0 196 } 197 198 func (nd Node) bytes() []byte { 199 return nd.msg 200 } 201 202 func walkAddresses(ctx context.Context, nd Node, cb AddressCb) (err error) { 203 return message.WalkAddresses(ctx, nd.msg, cb) 204 } 205 206 func getLastKey(nd Node) Item { 207 return nd.GetKey(int(nd.count) - 1) 208 } 209 210 // OutputProllyNode writes the node given to the writer given in a human-readable format, with values converted 211 // to the type specified by the provided schema. All nodes have keys displayed in this manner. Interior nodes have 212 // their child hash references spelled out, leaf nodes have value tuples delineated like the keys 213 func OutputProllyNode(ctx context.Context, w io.Writer, node Node, ns NodeStore, schema schema.Schema) error { 214 kd := schema.GetKeyDescriptor() 215 vd := schema.GetValueDescriptor() 216 for i := 0; i < int(node.count); i++ { 217 k := node.GetKey(i) 218 kt := val.Tuple(k) 219 220 w.Write([]byte("\n { key: ")) 221 for j := 0; j < kt.Count(); j++ { 222 if j > 0 { 223 w.Write([]byte(", ")) 224 } 225 226 isAddr := val.IsAddrEncoding(kd.Types[j].Enc) 227 if isAddr { 228 w.Write([]byte("#")) 229 } 230 w.Write([]byte(hex.EncodeToString(kd.GetField(j, kt)))) 231 if isAddr { 232 w.Write([]byte(" (")) 233 key, err := GetField(ctx, kd, j, kt, ns) 234 if err != nil { 235 return err 236 } 237 w.Write([]byte(fmt.Sprint(key))) 238 w.Write([]byte(")")) 239 } 240 241 } 242 243 if node.IsLeaf() { 244 v := node.GetValue(i) 245 vt := val.Tuple(v) 246 247 w.Write([]byte(" value: ")) 248 for j := 0; j < vt.Count(); j++ { 249 if j > 0 { 250 w.Write([]byte(", ")) 251 } 252 isAddr := val.IsAddrEncoding(vd.Types[j].Enc) 253 if isAddr { 254 w.Write([]byte("#")) 255 } 256 w.Write([]byte(hex.EncodeToString(vd.GetField(j, vt)))) 257 if isAddr { 258 w.Write([]byte(" (")) 259 value, err := GetField(ctx, vd, j, vt, ns) 260 if err != nil { 261 return err 262 } 263 w.Write([]byte(fmt.Sprint(value))) 264 w.Write([]byte(")")) 265 } 266 } 267 268 w.Write([]byte(" }")) 269 } else { 270 ref := node.getAddress(i) 271 272 w.Write([]byte(" ref: #")) 273 w.Write([]byte(ref.String())) 274 w.Write([]byte(" }")) 275 } 276 } 277 278 w.Write([]byte("\n")) 279 return nil 280 } 281 282 // OutputProllyNodeBytes writes the node given to the writer given in a semi-human-readable format, where values are still 283 // displayed in hex-encoded byte strings, but are delineated into their fields. All nodes have keys displayed in this 284 // manner. Interior nodes have their child hash references spelled out, leaf nodes have value tuples delineated like 285 // the keys 286 func OutputProllyNodeBytes(w io.Writer, node Node) error { 287 for i := 0; i < int(node.count); i++ { 288 k := node.GetKey(i) 289 kt := val.Tuple(k) 290 291 w.Write([]byte("\n { key: ")) 292 for j := 0; j < kt.Count(); j++ { 293 if j > 0 { 294 w.Write([]byte(", ")) 295 } 296 297 w.Write([]byte(hex.EncodeToString(kt.GetField(j)))) 298 } 299 300 if node.IsLeaf() { 301 v := node.GetValue(i) 302 vt := val.Tuple(v) 303 304 w.Write([]byte(" value: ")) 305 for j := 0; j < vt.Count(); j++ { 306 if j > 0 { 307 w.Write([]byte(", ")) 308 } 309 w.Write([]byte(hex.EncodeToString(vt.GetField(j)))) 310 } 311 312 w.Write([]byte(" }")) 313 } else { 314 ref := node.getAddress(i) 315 316 w.Write([]byte(" ref: #")) 317 w.Write([]byte(ref.String())) 318 w.Write([]byte(" }")) 319 } 320 } 321 322 w.Write([]byte("\n")) 323 return nil 324 } 325 326 func OutputAddressMapNode(w io.Writer, node Node) error { 327 for i := 0; i < int(node.count); i++ { 328 k := node.GetKey(i) 329 w.Write([]byte("\n { key: ")) 330 w.Write(k) 331 332 ref := node.getAddress(i) 333 334 w.Write([]byte(" ref: #")) 335 w.Write([]byte(ref.String())) 336 w.Write([]byte(" }")) 337 } 338 w.Write([]byte("\n")) 339 return nil 340 } 341 342 func ValueFromNode(root Node) types.Value { 343 return types.SerialMessage(root.bytes()) 344 }