github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/message/blob.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package message
    16  
    17  import (
    18  	"context"
    19  	"encoding/binary"
    20  
    21  	fb "github.com/dolthub/flatbuffers/v23/go"
    22  
    23  	"github.com/dolthub/dolt/go/gen/fb/serial"
    24  	"github.com/dolthub/dolt/go/store/hash"
    25  	"github.com/dolthub/dolt/go/store/pool"
    26  )
    27  
    28  const (
    29  	blobPayloadBytesVOffset fb.VOffsetT = 4
    30  	blobAddressArrayVOffset fb.VOffsetT = 6
    31  )
    32  
    33  var blobFileID = []byte(serial.BlobFileID)
    34  
    35  func NewBlobSerializer(pool pool.BuffPool) BlobSerializer {
    36  	return BlobSerializer{pool: pool}
    37  }
    38  
    39  type BlobSerializer struct {
    40  	pool pool.BuffPool
    41  }
    42  
    43  var _ Serializer = BlobSerializer{}
    44  
    45  func (s BlobSerializer) Serialize(keys, values [][]byte, subtrees []uint64, level int) serial.Message {
    46  	bufSz := estimateBlobSize(values, subtrees)
    47  	b := getFlatbufferBuilder(s.pool, bufSz)
    48  
    49  	if level == 0 {
    50  		assertTrue(len(values) == 1, "num values != 1 when serialize Blob")
    51  		assertTrue(len(subtrees) == 1, "num subtrees != 1 when serialize Blob")
    52  		payload := b.CreateByteVector(values[0])
    53  
    54  		serial.BlobStart(b)
    55  		serial.BlobAddPayload(b, payload)
    56  	} else {
    57  		addrs := writeItemBytes(b, values, len(values)*hash.ByteLen)
    58  		cards := writeCountArray(b, subtrees)
    59  
    60  		serial.BlobStart(b)
    61  		serial.BlobAddAddressArray(b, addrs)
    62  		serial.BlobAddSubtreeSizes(b, cards)
    63  	}
    64  	serial.BlobAddTreeSize(b, sumSubtrees(subtrees))
    65  	serial.BlobAddTreeLevel(b, uint8(level))
    66  	return serial.FinishMessage(b, serial.BlobEnd(b), blobFileID)
    67  }
    68  
    69  func getBlobKeys(msg serial.Message) (ItemAccess, error) {
    70  	return ItemAccess{}, nil
    71  }
    72  
    73  func getBlobValues(msg serial.Message) (values ItemAccess, err error) {
    74  	var b serial.Blob
    75  	err = serial.InitBlobRoot(&b, msg, serial.MessagePrefixSz)
    76  	if err != nil {
    77  		return ItemAccess{}, err
    78  	}
    79  	if b.TreeLevel() > 0 {
    80  		values.bufStart = lookupVectorOffset(blobAddressArrayVOffset, b.Table())
    81  		values.bufLen = uint16(b.AddressArrayLength() * uint16Size)
    82  		values.itemWidth = hash.ByteLen
    83  	} else {
    84  		values.bufStart = lookupVectorOffset(blobPayloadBytesVOffset, b.Table())
    85  		values.bufLen = uint16(b.PayloadLength())
    86  		values.itemWidth = uint16(b.PayloadLength())
    87  	}
    88  	return
    89  }
    90  
    91  func getBlobCount(msg serial.Message) (uint16, error) {
    92  	var b serial.Blob
    93  	err := serial.InitBlobRoot(&b, msg, serial.MessagePrefixSz)
    94  	if err != nil {
    95  		return 0, err
    96  	}
    97  	if b.TreeLevel() == 0 {
    98  		return 1, nil
    99  	}
   100  	return uint16(b.AddressArrayLength() / hash.ByteLen), nil
   101  }
   102  
   103  func walkBlobAddresses(ctx context.Context, msg serial.Message, cb func(ctx context.Context, addr hash.Hash) error) error {
   104  	var b serial.Blob
   105  	err := serial.InitBlobRoot(&b, msg, serial.MessagePrefixSz)
   106  	if err != nil {
   107  		return err
   108  	}
   109  	arr := b.AddressArrayBytes()
   110  	for i := 0; i < len(arr)/hash.ByteLen; i++ {
   111  		addr := hash.New(arr[i*addrSize : (i+1)*addrSize])
   112  		if err := cb(ctx, addr); err != nil {
   113  			return err
   114  		}
   115  	}
   116  	return nil
   117  }
   118  
   119  func getBlobTreeLevel(msg serial.Message) (uint16, error) {
   120  	var b serial.Blob
   121  	err := serial.InitBlobRoot(&b, msg, serial.MessagePrefixSz)
   122  	if err != nil {
   123  		return 0, err
   124  	}
   125  	return uint16(b.TreeLevel()), nil
   126  }
   127  
   128  func getBlobTreeCount(msg serial.Message) (int, error) {
   129  	var b serial.Blob
   130  	err := serial.InitBlobRoot(&b, msg, serial.MessagePrefixSz)
   131  	if err != nil {
   132  		return 0, err
   133  	}
   134  	return int(b.TreeSize()), nil
   135  }
   136  
   137  func getBlobSubtrees(msg serial.Message) ([]uint64, error) {
   138  	var b serial.Blob
   139  	err := serial.InitBlobRoot(&b, msg, serial.MessagePrefixSz)
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  	if b.TreeLevel() == 0 {
   144  		return nil, nil
   145  	}
   146  	counts := make([]uint64, b.AddressArrayLength()/hash.ByteLen)
   147  	return decodeVarints(b.SubtreeSizesBytes(), counts), nil
   148  }
   149  
   150  func estimateBlobSize(values [][]byte, subtrees []uint64) (bufSz int) {
   151  	for i := range values {
   152  		bufSz += len(values[i])
   153  	}
   154  	bufSz += len(subtrees) * binary.MaxVarintLen64
   155  	bufSz += 200 // overhead
   156  	return
   157  }