github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/message/prolly_map.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package message
    16  
    17  import (
    18  	"context"
    19  	"encoding/binary"
    20  	"fmt"
    21  
    22  	fb "github.com/dolthub/flatbuffers/v23/go"
    23  
    24  	"github.com/dolthub/dolt/go/gen/fb/serial"
    25  	"github.com/dolthub/dolt/go/store/hash"
    26  	"github.com/dolthub/dolt/go/store/pool"
    27  	"github.com/dolthub/dolt/go/store/val"
    28  )
    29  
    30  const (
    31  	// These constants are mirrored from serial.ProllyTreeNode.KeyOffsets()
    32  	// and serial.ProllyTreeNode.ValueOffsets() respectively.
    33  	// They are only as stable as the flatbuffers schema that define them.
    34  	prollyMapKeyItemBytesVOffset      fb.VOffsetT = 4
    35  	prollyMapKeyOffsetsVOffset        fb.VOffsetT = 6
    36  	prollyMapValueItemBytesVOffset    fb.VOffsetT = 10
    37  	prollyMapValueOffsetsVOffset      fb.VOffsetT = 12
    38  	prollyMapAddressArrayBytesVOffset fb.VOffsetT = 18
    39  )
    40  
    41  var prollyMapFileID = []byte(serial.ProllyTreeNodeFileID)
    42  
    43  func NewProllyMapSerializer(valueDesc val.TupleDesc, pool pool.BuffPool) ProllyMapSerializer {
    44  	return ProllyMapSerializer{valDesc: valueDesc, pool: pool}
    45  }
    46  
    47  type ProllyMapSerializer struct {
    48  	valDesc val.TupleDesc
    49  	pool    pool.BuffPool
    50  }
    51  
    52  var _ Serializer = ProllyMapSerializer{}
    53  
    54  func (s ProllyMapSerializer) Serialize(keys, values [][]byte, subtrees []uint64, level int) serial.Message {
    55  	var (
    56  		keyTups, keyOffs fb.UOffsetT
    57  		valTups, valOffs fb.UOffsetT
    58  		valAddrOffs      fb.UOffsetT
    59  		refArr, cardArr  fb.UOffsetT
    60  	)
    61  
    62  	keySz, valSz, bufSz := estimateProllyMapSize(keys, values, subtrees, s.valDesc.AddressFieldCount())
    63  	b := getFlatbufferBuilder(s.pool, bufSz)
    64  
    65  	// serialize keys and offStart
    66  	keyTups = writeItemBytes(b, keys, keySz)
    67  	serial.ProllyTreeNodeStartKeyOffsetsVector(b, len(keys)+1)
    68  	keyOffs = writeItemOffsets(b, keys, keySz)
    69  
    70  	if level == 0 {
    71  		// serialize value tuples for leaf nodes
    72  		valTups = writeItemBytes(b, values, valSz)
    73  		serial.ProllyTreeNodeStartValueOffsetsVector(b, len(values)+1)
    74  		valOffs = writeItemOffsets(b, values, valSz)
    75  		// serialize offStart of chunk addresses within |valTups|
    76  		if s.valDesc.AddressFieldCount() > 0 {
    77  			serial.ProllyTreeNodeStartValueAddressOffsetsVector(b, countAddresses(values, s.valDesc))
    78  			valAddrOffs = writeAddressOffsets(b, values, valSz, s.valDesc)
    79  		}
    80  	} else {
    81  		// serialize child refs and subtree counts for internal nodes
    82  		refArr = writeItemBytes(b, values, valSz)
    83  		cardArr = writeCountArray(b, subtrees)
    84  	}
    85  
    86  	// populate the node's vtable
    87  	serial.ProllyTreeNodeStart(b)
    88  	serial.ProllyTreeNodeAddKeyItems(b, keyTups)
    89  	serial.ProllyTreeNodeAddKeyOffsets(b, keyOffs)
    90  	if level == 0 {
    91  		serial.ProllyTreeNodeAddValueItems(b, valTups)
    92  		serial.ProllyTreeNodeAddValueOffsets(b, valOffs)
    93  		serial.ProllyTreeNodeAddTreeCount(b, uint64(len(keys)))
    94  		serial.ProllyTreeNodeAddValueAddressOffsets(b, valAddrOffs)
    95  	} else {
    96  		serial.ProllyTreeNodeAddAddressArray(b, refArr)
    97  		serial.ProllyTreeNodeAddSubtreeCounts(b, cardArr)
    98  		serial.ProllyTreeNodeAddTreeCount(b, sumSubtrees(subtrees))
    99  	}
   100  	serial.ProllyTreeNodeAddKeyType(b, serial.ItemTypeTupleFormatAlpha)
   101  	serial.ProllyTreeNodeAddValueType(b, serial.ItemTypeTupleFormatAlpha)
   102  	serial.ProllyTreeNodeAddTreeLevel(b, uint8(level))
   103  
   104  	return serial.FinishMessage(b, serial.ProllyTreeNodeEnd(b), prollyMapFileID)
   105  }
   106  
   107  func getProllyMapKeysAndValues(msg serial.Message) (keys, values ItemAccess, level, count uint16, err error) {
   108  	var pm serial.ProllyTreeNode
   109  	err = serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz)
   110  	if err != nil {
   111  		return
   112  	}
   113  	keys.bufStart = lookupVectorOffset(prollyMapKeyItemBytesVOffset, pm.Table())
   114  	keys.bufLen = uint16(pm.KeyItemsLength())
   115  	keys.offStart = lookupVectorOffset(prollyMapKeyOffsetsVOffset, pm.Table())
   116  	keys.offLen = uint16(pm.KeyOffsetsLength() * uint16Size)
   117  
   118  	count = (keys.offLen / 2) - 1
   119  	level = uint16(pm.TreeLevel())
   120  
   121  	vv := pm.ValueItemsBytes()
   122  	if vv != nil {
   123  		values.bufStart = lookupVectorOffset(prollyMapValueItemBytesVOffset, pm.Table())
   124  		values.bufLen = uint16(pm.ValueItemsLength())
   125  		values.offStart = lookupVectorOffset(prollyMapValueOffsetsVOffset, pm.Table())
   126  		values.offLen = uint16(pm.ValueOffsetsLength() * uint16Size)
   127  	} else {
   128  		values.bufStart = lookupVectorOffset(prollyMapAddressArrayBytesVOffset, pm.Table())
   129  		values.bufLen = uint16(pm.AddressArrayLength())
   130  		values.itemWidth = hash.ByteLen
   131  	}
   132  	return
   133  }
   134  
   135  func walkProllyMapAddresses(ctx context.Context, msg serial.Message, cb func(ctx context.Context, addr hash.Hash) error) error {
   136  	var pm serial.ProllyTreeNode
   137  	err := serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz)
   138  	if err != nil {
   139  		return err
   140  	}
   141  	arr := pm.AddressArrayBytes()
   142  	for i := 0; i < len(arr)/hash.ByteLen; i++ {
   143  		addr := hash.New(arr[i*addrSize : (i+1)*addrSize])
   144  		if err := cb(ctx, addr); err != nil {
   145  			return err
   146  		}
   147  	}
   148  
   149  	cnt := pm.ValueAddressOffsetsLength()
   150  	arr2 := pm.ValueItemsBytes()
   151  	for i := 0; i < cnt; i++ {
   152  		o := pm.ValueAddressOffsets(i)
   153  		addr := hash.New(arr2[o : o+addrSize])
   154  		if err := cb(ctx, addr); err != nil {
   155  			return err
   156  		}
   157  	}
   158  	assertFalse((arr != nil) && (arr2 != nil), "cannot WalkAddresses for ProllyTreeNode with both AddressArray and ValueAddressOffsets")
   159  	return nil
   160  }
   161  
   162  func getProllyMapCount(msg serial.Message) (uint16, error) {
   163  	var pm serial.ProllyTreeNode
   164  	err := serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz)
   165  	if err != nil {
   166  		return 0, err
   167  	}
   168  	return uint16(pm.KeyOffsetsLength() - 1), nil
   169  }
   170  
   171  func getProllyMapTreeLevel(msg serial.Message) (int, error) {
   172  	var pm serial.ProllyTreeNode
   173  	err := serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz)
   174  	if err != nil {
   175  		return 0, fb.ErrTableHasUnknownFields
   176  	}
   177  	return int(pm.TreeLevel()), nil
   178  }
   179  
   180  func getProllyMapTreeCount(msg serial.Message) (int, error) {
   181  	var pm serial.ProllyTreeNode
   182  	err := serial.InitProllyTreeNodeRoot(&pm, msg, serial.MessagePrefixSz)
   183  	if err != nil {
   184  		return 0, fb.ErrTableHasUnknownFields
   185  	}
   186  	return int(pm.TreeCount()), nil
   187  }
   188  
   189  func getProllyMapSubtrees(msg serial.Message) ([]uint64, error) {
   190  	sz, err := getProllyMapCount(msg)
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  
   195  	var pm serial.ProllyTreeNode
   196  	n := fb.GetUOffsetT(msg[serial.MessagePrefixSz:])
   197  	pm.Init(msg, serial.MessagePrefixSz+n)
   198  	if serial.ProllyTreeNodeNumFields < pm.Table().NumFields() {
   199  		return nil, fb.ErrTableHasUnknownFields
   200  	}
   201  
   202  	counts := make([]uint64, sz)
   203  
   204  	return decodeVarints(pm.SubtreeCountsBytes(), counts), nil
   205  }
   206  
   207  // estimateProllyMapSize returns the exact Size of the tuple vectors for keys and values,
   208  // and an estimate of the overall Size of the final flatbuffer.
   209  func estimateProllyMapSize(keys, values [][]byte, subtrees []uint64, valAddrsCnt int) (int, int, int) {
   210  	var keySz, valSz, bufSz int
   211  	for i := range keys {
   212  		keySz += len(keys[i])
   213  		valSz += len(values[i])
   214  	}
   215  	subtreesSz := len(subtrees) * binary.MaxVarintLen64
   216  
   217  	// constraints enforced upstream
   218  	if keySz > int(MaxVectorOffset) {
   219  		panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset))
   220  	}
   221  	if valSz > int(MaxVectorOffset) {
   222  		panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
   223  	}
   224  
   225  	bufSz += keySz + valSz               // tuples
   226  	bufSz += subtreesSz                  // subtree counts
   227  	bufSz += len(keys)*2 + len(values)*2 // offStart
   228  	bufSz += 8 + 1 + 1 + 1               // metadata
   229  	bufSz += 72                          // vtable (approx)
   230  	bufSz += 100                         // padding?
   231  	bufSz += valAddrsCnt * len(values) * 2
   232  	bufSz += serial.MessagePrefixSz
   233  
   234  	return keySz, valSz, bufSz
   235  }