github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/node.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tree
    16  
    17  import (
    18  	"context"
    19  	"encoding/hex"
    20  	"fmt"
    21  	"io"
    22  
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    24  
    25  	"github.com/dolthub/dolt/go/gen/fb/serial"
    26  	"github.com/dolthub/dolt/go/store/hash"
    27  	"github.com/dolthub/dolt/go/store/prolly/message"
    28  	"github.com/dolthub/dolt/go/store/types"
    29  	"github.com/dolthub/dolt/go/store/val"
    30  )
    31  
    32  type Item []byte
    33  
    34  type subtreeCounts []uint64
    35  
    36  // Node is a generic implementation of a prolly tree node.
    37  // Elements in a Node are generic Items. Interpreting Item
    38  // contents is deferred to higher layers (see prolly.Map).
    39  type Node struct {
    40  	// keys and values cache offset metadata
    41  	// to accelerate Item lookups into msg.
    42  	keys, values message.ItemAccess
    43  
    44  	// count is the Item pair count.
    45  	count uint16
    46  
    47  	// level is 0-indexed tree height.
    48  	level uint16
    49  
    50  	// subtrees contains the key cardinality
    51  	// of each child tree of a non-leaf Node.
    52  	// this field is lazily decoded from msg
    53  	// because it requires a malloc.
    54  	subtrees *subtreeCounts
    55  
    56  	// msg is the underlying buffer for the Node
    57  	// encoded as a Flatbuffers message.
    58  	msg serial.Message
    59  }
    60  
    61  type AddressCb func(ctx context.Context, addr hash.Hash) error
    62  
    63  func WalkAddresses(ctx context.Context, nd Node, ns NodeStore, cb AddressCb) error {
    64  	return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error {
    65  		if err := cb(ctx, addr); err != nil {
    66  			return err
    67  		}
    68  
    69  		if nd.IsLeaf() {
    70  			return nil
    71  		}
    72  
    73  		child, err := ns.Read(ctx, addr)
    74  		if err != nil {
    75  			return err
    76  		}
    77  
    78  		return WalkAddresses(ctx, child, ns, cb)
    79  	})
    80  }
    81  
    82  type NodeCb func(ctx context.Context, nd Node) error
    83  
    84  // WalkNodes runs a callback function on every node found in the DFS of |nd|
    85  // that is of the same message type as |nd|.
    86  func WalkNodes(ctx context.Context, nd Node, ns NodeStore, cb NodeCb) error {
    87  	if err := cb(ctx, nd); err != nil {
    88  		return err
    89  	}
    90  	if nd.IsLeaf() {
    91  		return nil
    92  	}
    93  
    94  	return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error {
    95  		child, err := ns.Read(ctx, addr)
    96  		if err != nil {
    97  			return err
    98  		}
    99  		return WalkNodes(ctx, child, ns, cb)
   100  	})
   101  }
   102  
   103  // walkOpaqueNodes runs a callback function on every node found in the DFS of |nd|
   104  // including nested trees.
   105  func walkOpaqueNodes(ctx context.Context, nd Node, ns NodeStore, cb NodeCb) error {
   106  	if err := cb(ctx, nd); err != nil {
   107  		return err
   108  	}
   109  
   110  	return walkAddresses(ctx, nd, func(ctx context.Context, addr hash.Hash) error {
   111  		child, err := ns.Read(ctx, addr)
   112  		if err != nil {
   113  			return err
   114  		}
   115  		return WalkNodes(ctx, child, ns, cb)
   116  	})
   117  }
   118  
   119  func NodeFromBytes(msg []byte) (Node, error) {
   120  	keys, values, level, count, err := message.UnpackFields(msg)
   121  	return Node{
   122  		keys:   keys,
   123  		values: values,
   124  		count:  count,
   125  		level:  level,
   126  		msg:    msg,
   127  	}, err
   128  }
   129  
   130  func (nd Node) HashOf() hash.Hash {
   131  	return hash.Of(nd.bytes())
   132  }
   133  
   134  func (nd Node) Count() int {
   135  	return int(nd.count)
   136  }
   137  
   138  func (nd Node) TreeCount() (int, error) {
   139  	return message.GetTreeCount(nd.msg)
   140  }
   141  
   142  func (nd Node) Size() int {
   143  	return len(nd.bytes())
   144  }
   145  
   146  // Level returns the tree Level for this node
   147  func (nd Node) Level() int {
   148  	return int(nd.level)
   149  }
   150  
   151  // IsLeaf returns whether this node is a leaf
   152  func (nd Node) IsLeaf() bool {
   153  	return nd.level == 0
   154  }
   155  
   156  // GetKey returns the |ith| key of this node
   157  func (nd Node) GetKey(i int) Item {
   158  	return nd.keys.GetItem(i, nd.msg)
   159  }
   160  
   161  // GetValue returns the |ith| value of this node.
   162  func (nd Node) GetValue(i int) Item {
   163  	return nd.values.GetItem(i, nd.msg)
   164  }
   165  
   166  func (nd Node) loadSubtrees() (Node, error) {
   167  	var err error
   168  	if nd.subtrees == nil {
   169  		// deserializing subtree counts requires a malloc,
   170  		// we don't load them unless explicitly requested
   171  		sc, err := message.GetSubtrees(nd.msg)
   172  		if err != nil {
   173  			return Node{}, err
   174  		}
   175  		nd.subtrees = (*subtreeCounts)(&sc)
   176  	}
   177  	return nd, err
   178  }
   179  
   180  func (nd Node) getSubtreeCount(i int) (uint64, error) {
   181  	if nd.IsLeaf() {
   182  		return 1, nil
   183  	}
   184  	// this will panic unless subtrees were loaded.
   185  	return (*nd.subtrees)[i], nil
   186  }
   187  
   188  // getAddress returns the |ith| address of this node.
   189  // This method assumes values are 20-byte address hashes.
   190  func (nd Node) getAddress(i int) hash.Hash {
   191  	return hash.New(nd.GetValue(i))
   192  }
   193  
   194  func (nd Node) empty() bool {
   195  	return nd.bytes() == nil || nd.count == 0
   196  }
   197  
   198  func (nd Node) bytes() []byte {
   199  	return nd.msg
   200  }
   201  
   202  func walkAddresses(ctx context.Context, nd Node, cb AddressCb) (err error) {
   203  	return message.WalkAddresses(ctx, nd.msg, cb)
   204  }
   205  
   206  func getLastKey(nd Node) Item {
   207  	return nd.GetKey(int(nd.count) - 1)
   208  }
   209  
   210  // OutputProllyNode writes the node given to the writer given in a human-readable format, with values converted
   211  // to the type specified by the provided schema. All nodes have keys displayed in this manner. Interior nodes have
   212  // their child hash references spelled out, leaf nodes have value tuples delineated like the keys
   213  func OutputProllyNode(ctx context.Context, w io.Writer, node Node, ns NodeStore, schema schema.Schema) error {
   214  	kd := schema.GetKeyDescriptor()
   215  	vd := schema.GetValueDescriptor()
   216  	for i := 0; i < int(node.count); i++ {
   217  		k := node.GetKey(i)
   218  		kt := val.Tuple(k)
   219  
   220  		w.Write([]byte("\n    { key: "))
   221  		for j := 0; j < kt.Count(); j++ {
   222  			if j > 0 {
   223  				w.Write([]byte(", "))
   224  			}
   225  
   226  			isAddr := val.IsAddrEncoding(kd.Types[j].Enc)
   227  			if isAddr {
   228  				w.Write([]byte("#"))
   229  			}
   230  			w.Write([]byte(hex.EncodeToString(kd.GetField(j, kt))))
   231  			if isAddr {
   232  				w.Write([]byte(" ("))
   233  				key, err := GetField(ctx, kd, j, kt, ns)
   234  				if err != nil {
   235  					return err
   236  				}
   237  				w.Write([]byte(fmt.Sprint(key)))
   238  				w.Write([]byte(")"))
   239  			}
   240  
   241  		}
   242  
   243  		if node.IsLeaf() {
   244  			v := node.GetValue(i)
   245  			vt := val.Tuple(v)
   246  
   247  			w.Write([]byte(" value: "))
   248  			for j := 0; j < vt.Count(); j++ {
   249  				if j > 0 {
   250  					w.Write([]byte(", "))
   251  				}
   252  				isAddr := val.IsAddrEncoding(vd.Types[j].Enc)
   253  				if isAddr {
   254  					w.Write([]byte("#"))
   255  				}
   256  				w.Write([]byte(hex.EncodeToString(vd.GetField(j, vt))))
   257  				if isAddr {
   258  					w.Write([]byte(" ("))
   259  					value, err := GetField(ctx, vd, j, vt, ns)
   260  					if err != nil {
   261  						return err
   262  					}
   263  					w.Write([]byte(fmt.Sprint(value)))
   264  					w.Write([]byte(")"))
   265  				}
   266  			}
   267  
   268  			w.Write([]byte(" }"))
   269  		} else {
   270  			ref := node.getAddress(i)
   271  
   272  			w.Write([]byte(" ref: #"))
   273  			w.Write([]byte(ref.String()))
   274  			w.Write([]byte(" }"))
   275  		}
   276  	}
   277  
   278  	w.Write([]byte("\n"))
   279  	return nil
   280  }
   281  
   282  // OutputProllyNodeBytes writes the node given to the writer given in a semi-human-readable format, where values are still
   283  // displayed in hex-encoded byte strings, but are delineated into their fields. All nodes have keys displayed in this
   284  // manner. Interior nodes have their child hash references spelled out, leaf nodes have value tuples delineated like
   285  // the keys
   286  func OutputProllyNodeBytes(w io.Writer, node Node) error {
   287  	for i := 0; i < int(node.count); i++ {
   288  		k := node.GetKey(i)
   289  		kt := val.Tuple(k)
   290  
   291  		w.Write([]byte("\n    { key: "))
   292  		for j := 0; j < kt.Count(); j++ {
   293  			if j > 0 {
   294  				w.Write([]byte(", "))
   295  			}
   296  
   297  			w.Write([]byte(hex.EncodeToString(kt.GetField(j))))
   298  		}
   299  
   300  		if node.IsLeaf() {
   301  			v := node.GetValue(i)
   302  			vt := val.Tuple(v)
   303  
   304  			w.Write([]byte(" value: "))
   305  			for j := 0; j < vt.Count(); j++ {
   306  				if j > 0 {
   307  					w.Write([]byte(", "))
   308  				}
   309  				w.Write([]byte(hex.EncodeToString(vt.GetField(j))))
   310  			}
   311  
   312  			w.Write([]byte(" }"))
   313  		} else {
   314  			ref := node.getAddress(i)
   315  
   316  			w.Write([]byte(" ref: #"))
   317  			w.Write([]byte(ref.String()))
   318  			w.Write([]byte(" }"))
   319  		}
   320  	}
   321  
   322  	w.Write([]byte("\n"))
   323  	return nil
   324  }
   325  
   326  func OutputAddressMapNode(w io.Writer, node Node) error {
   327  	for i := 0; i < int(node.count); i++ {
   328  		k := node.GetKey(i)
   329  		w.Write([]byte("\n    { key: "))
   330  		w.Write(k)
   331  
   332  		ref := node.getAddress(i)
   333  
   334  		w.Write([]byte(" ref: #"))
   335  		w.Write([]byte(ref.String()))
   336  		w.Write([]byte(" }"))
   337  	}
   338  	w.Write([]byte("\n"))
   339  	return nil
   340  }
   341  
   342  func ValueFromNode(root Node) types.Value {
   343  	return types.SerialMessage(root.bytes())
   344  }