github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/message/merge_artifacts.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package message
    16  
    17  import (
    18  	"context"
    19  	"encoding/binary"
    20  	"fmt"
    21  
    22  	fb "github.com/dolthub/flatbuffers/v23/go"
    23  
    24  	"github.com/dolthub/dolt/go/gen/fb/serial"
    25  	"github.com/dolthub/dolt/go/store/hash"
    26  	"github.com/dolthub/dolt/go/store/pool"
    27  	"github.com/dolthub/dolt/go/store/val"
    28  )
    29  
    30  const (
    31  	// These constants are mirrored from serial.MergeArtifacts.KeyOffsets()
    32  	// and serial.MergeArtifacts.ValueOffsets() respectively.
    33  	// They are only as stable as the flatbuffers schema that define them.
    34  
    35  	mergeArtifactKeyItemBytesVOffset   fb.VOffsetT = 4
    36  	mergeArtifactKeyOffsetsVOffset     fb.VOffsetT = 6
    37  	mergeArtifactValueItemBytesVOffset fb.VOffsetT = 10
    38  	mergeArtifactValueOffsetsVOffset   fb.VOffsetT = 12
    39  	mergeArtifactAddressArrayVOffset   fb.VOffsetT = 14
    40  )
    41  
    42  var mergeArtifactFileID = []byte(serial.MergeArtifactsFileID)
    43  
    44  func NewMergeArtifactSerializer(keyDesc val.TupleDesc, pool pool.BuffPool) MergeArtifactSerializer {
    45  	return MergeArtifactSerializer{
    46  		keyDesc: keyDesc,
    47  		pool:    pool,
    48  	}
    49  }
    50  
    51  type MergeArtifactSerializer struct {
    52  	keyDesc val.TupleDesc
    53  	pool    pool.BuffPool
    54  }
    55  
    56  var _ Serializer = MergeArtifactSerializer{}
    57  
    58  func (s MergeArtifactSerializer) Serialize(keys, values [][]byte, subtrees []uint64, level int) serial.Message {
    59  	var (
    60  		keyTups, keyOffs fb.UOffsetT
    61  		valTups, valOffs fb.UOffsetT
    62  		keyAddrOffs      fb.UOffsetT
    63  		refArr, cardArr  fb.UOffsetT
    64  	)
    65  
    66  	keySz, valSz, bufSz := estimateMergeArtifactSize(keys, values, subtrees, s.keyDesc.AddressFieldCount())
    67  	b := getFlatbufferBuilder(s.pool, bufSz)
    68  
    69  	// serialize keys and offStart
    70  	keyTups = writeItemBytes(b, keys, keySz)
    71  	serial.MergeArtifactsStartKeyOffsetsVector(b, len(keys)+1)
    72  	keyOffs = writeItemOffsets(b, keys, keySz)
    73  
    74  	if level == 0 {
    75  		// serialize value tuples for leaf nodes
    76  		valTups = writeItemBytes(b, values, valSz)
    77  		serial.MergeArtifactsStartValueOffsetsVector(b, len(values)+1)
    78  		valOffs = writeItemOffsets(b, values, valSz)
    79  		// serialize offStart of chunk addresses within |keyTups|
    80  		if s.keyDesc.AddressFieldCount() > 0 {
    81  			serial.MergeArtifactsStartKeyAddressOffsetsVector(b, countAddresses(keys, s.keyDesc))
    82  			keyAddrOffs = writeAddressOffsets(b, keys, keySz, s.keyDesc)
    83  		}
    84  	} else {
    85  		// serialize child refs and subtree counts for internal nodes
    86  		refArr = writeItemBytes(b, values, valSz)
    87  		cardArr = writeCountArray(b, subtrees)
    88  	}
    89  
    90  	// populate the node's vtable
    91  	serial.MergeArtifactsStart(b)
    92  	serial.MergeArtifactsAddKeyItems(b, keyTups)
    93  	serial.MergeArtifactsAddKeyOffsets(b, keyOffs)
    94  	if level == 0 {
    95  		serial.MergeArtifactsAddValueItems(b, valTups)
    96  		serial.MergeArtifactsAddValueOffsets(b, valOffs)
    97  		serial.MergeArtifactsAddTreeCount(b, uint64(len(keys)))
    98  		serial.MergeArtifactsAddKeyAddressOffsets(b, keyAddrOffs)
    99  	} else {
   100  		serial.MergeArtifactsAddAddressArray(b, refArr)
   101  		serial.MergeArtifactsAddSubtreeCounts(b, cardArr)
   102  		serial.MergeArtifactsAddTreeCount(b, sumSubtrees(subtrees))
   103  	}
   104  	serial.MergeArtifactsAddTreeLevel(b, uint8(level))
   105  
   106  	return serial.FinishMessage(b, serial.MergeArtifactsEnd(b), mergeArtifactFileID)
   107  }
   108  
   109  func getArtifactMapKeysAndValues(msg serial.Message) (keys, values ItemAccess, level, count uint16, err error) {
   110  	var ma serial.MergeArtifacts
   111  	err = serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz)
   112  	if err != nil {
   113  		return
   114  	}
   115  	keys.bufStart = lookupVectorOffset(mergeArtifactKeyItemBytesVOffset, ma.Table())
   116  	keys.bufLen = uint16(ma.KeyItemsLength())
   117  	keys.offStart = lookupVectorOffset(mergeArtifactKeyOffsetsVOffset, ma.Table())
   118  	keys.offLen = uint16(ma.KeyOffsetsLength() * uint16Size)
   119  
   120  	count = (keys.offLen / 2) - 1
   121  	level = uint16(ma.TreeLevel())
   122  
   123  	vv := ma.ValueItemsBytes()
   124  	if vv != nil {
   125  		values.bufStart = lookupVectorOffset(mergeArtifactValueItemBytesVOffset, ma.Table())
   126  		values.bufLen = uint16(ma.ValueItemsLength())
   127  		values.offStart = lookupVectorOffset(mergeArtifactValueOffsetsVOffset, ma.Table())
   128  		values.offLen = uint16(ma.ValueOffsetsLength() * uint16Size)
   129  	} else {
   130  		values.bufStart = lookupVectorOffset(mergeArtifactAddressArrayVOffset, ma.Table())
   131  		values.bufLen = uint16(ma.AddressArrayLength())
   132  		values.itemWidth = hash.ByteLen
   133  	}
   134  	return
   135  }
   136  
   137  func walkMergeArtifactAddresses(ctx context.Context, msg serial.Message, cb func(ctx context.Context, addr hash.Hash) error) error {
   138  	var ma serial.MergeArtifacts
   139  	err := serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz)
   140  	if err != nil {
   141  		return err
   142  	}
   143  	arr := ma.AddressArrayBytes()
   144  	for i := 0; i < len(arr)/hash.ByteLen; i++ {
   145  		addr := hash.New(arr[i*addrSize : (i+1)*addrSize])
   146  		if err := cb(ctx, addr); err != nil {
   147  			return err
   148  		}
   149  	}
   150  
   151  	cnt := ma.KeyAddressOffsetsLength()
   152  	arr2 := ma.KeyItemsBytes()
   153  	for i := 0; i < cnt; i++ {
   154  		o := ma.KeyAddressOffsets(i)
   155  		addr := hash.New(arr2[o : o+addrSize])
   156  		if err := cb(ctx, addr); err != nil {
   157  			return err
   158  		}
   159  	}
   160  
   161  	return nil
   162  }
   163  
   164  func getMergeArtifactCount(msg serial.Message) (uint16, error) {
   165  	var ma serial.MergeArtifacts
   166  	err := serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz)
   167  	if err != nil {
   168  		return 0, err
   169  	}
   170  	if ma.KeyItemsLength() == 0 {
   171  		return 0, nil
   172  	}
   173  	// zeroth offset ommitted from array
   174  	return uint16(ma.KeyOffsetsLength() + 1), nil
   175  }
   176  
   177  func getMergeArtifactTreeLevel(msg serial.Message) (int, error) {
   178  	var ma serial.MergeArtifacts
   179  	err := serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz)
   180  	if err != nil {
   181  		return 0, err
   182  	}
   183  	return int(ma.TreeLevel()), nil
   184  }
   185  
   186  func getMergeArtifactTreeCount(msg serial.Message) (int, error) {
   187  	var ma serial.MergeArtifacts
   188  	err := serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz)
   189  	if err != nil {
   190  		return 0, err
   191  	}
   192  	return int(ma.TreeCount()), nil
   193  }
   194  
   195  func getMergeArtifactSubtrees(msg serial.Message) ([]uint64, error) {
   196  	sz, err := getMergeArtifactCount(msg)
   197  	if err != nil {
   198  		return nil, err
   199  	}
   200  	counts := make([]uint64, sz)
   201  	var ma serial.MergeArtifacts
   202  	err = serial.InitMergeArtifactsRoot(&ma, msg, serial.MessagePrefixSz)
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  	return decodeVarints(ma.SubtreeCountsBytes(), counts), nil
   207  }
   208  
   209  // estimateMergeArtifact>Size returns the exact Size of the tuple vectors for keys and values,
   210  // and an estimate of the overall Size of the final flatbuffer.
   211  func estimateMergeArtifactSize(keys, values [][]byte, subtrees []uint64, keyAddrs int) (int, int, int) {
   212  	var keySz, valSz, bufSz int
   213  	for i := range keys {
   214  		keySz += len(keys[i])
   215  		valSz += len(values[i])
   216  	}
   217  	refCntSz := len(subtrees) * binary.MaxVarintLen64
   218  
   219  	// constraints enforced upstream
   220  	if keySz > int(MaxVectorOffset) {
   221  		panic(fmt.Sprintf("key vector exceeds Size limit ( %d > %d )", keySz, MaxVectorOffset))
   222  	}
   223  	if valSz > int(MaxVectorOffset) {
   224  		panic(fmt.Sprintf("value vector exceeds Size limit ( %d > %d )", valSz, MaxVectorOffset))
   225  	}
   226  
   227  	bufSz += keySz + valSz               // tuples
   228  	bufSz += refCntSz                    // subtree counts
   229  	bufSz += len(keys)*2 + len(values)*2 // offStart
   230  	bufSz += 8 + 1 + 1 + 1               // metadata
   231  	bufSz += 72                          // vtable (approx)
   232  	bufSz += 100                         // padding?
   233  	bufSz += keyAddrs * len(keys) * 2
   234  	bufSz += serial.MessagePrefixSz
   235  
   236  	return keySz, valSz, bufSz
   237  }