github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/table_writer.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"crypto/sha512"
    26  	"encoding/binary"
    27  	"errors"
    28  	"fmt"
    29  	gohash "hash"
    30  	"sort"
    31  
    32  	"github.com/golang/snappy"
    33  
    34  	"github.com/dolthub/dolt/go/store/d"
    35  	"github.com/dolthub/dolt/go/store/hash"
    36  )
    37  
    38  // tableWriter encodes a collection of byte stream chunks into a nbs table. NOT goroutine safe.
    39  type tableWriter struct {
    40  	buff                  []byte
    41  	pos                   uint64
    42  	totalCompressedData   uint64
    43  	totalUncompressedData uint64
    44  	prefixes              prefixIndexSlice
    45  	blockHash             gohash.Hash
    46  
    47  	snapper snappyEncoder
    48  }
    49  
    50  type snappyEncoder interface {
    51  	Encode(dst, src []byte) []byte
    52  }
    53  
    54  type realSnappyEncoder struct{}
    55  
    56  func (r realSnappyEncoder) Encode(dst, src []byte) []byte {
    57  	return snappy.Encode(dst, src)
    58  }
    59  
    60  func maxTableSize(numChunks, totalData uint64) uint64 {
    61  	avgChunkSize := totalData / numChunks
    62  	d.Chk.True(avgChunkSize < maxChunkSize)
    63  	maxSnappySize := snappy.MaxEncodedLen(int(avgChunkSize))
    64  	d.Chk.True(maxSnappySize > 0)
    65  	return numChunks*(prefixTupleSize+lengthSize+hash.SuffixLen+checksumSize+uint64(maxSnappySize)) + footerSize
    66  }
    67  
    68  func indexSize(numChunks uint32) uint64 {
    69  	return uint64(numChunks) * (hash.SuffixLen + lengthSize + prefixTupleSize)
    70  }
    71  
    72  func lengthsOffset(numChunks uint32) uint64 {
    73  	return uint64(numChunks) * prefixTupleSize
    74  }
    75  
    76  func suffixesOffset(numChunks uint32) uint64 {
    77  	return uint64(numChunks) * (prefixTupleSize + lengthSize)
    78  }
    79  
    80  // len(buff) must be >= maxTableSize(numChunks, totalData)
    81  func newTableWriter(buff []byte, snapper snappyEncoder) *tableWriter {
    82  	if snapper == nil {
    83  		snapper = realSnappyEncoder{}
    84  	}
    85  	return &tableWriter{
    86  		buff:      buff,
    87  		blockHash: sha512.New(),
    88  		snapper:   snapper,
    89  	}
    90  }
    91  
    92  func (tw *tableWriter) addChunk(h hash.Hash, data []byte) bool {
    93  	if len(data) == 0 {
    94  		panic("NBS blocks cannont be zero length")
    95  	}
    96  
    97  	// Compress data straight into tw.buff
    98  	compressed := tw.snapper.Encode(tw.buff[tw.pos:], data)
    99  	dataLength := uint64(len(compressed))
   100  	tw.totalCompressedData += dataLength
   101  
   102  	// BUG 3156 indicated that, sometimes, snappy decided that there's not enough space in tw.buff[tw.pos:] to encode into.
   103  	// This _should never happen anymore be_, because we iterate over all chunks to be added and sum the max amount of space that snappy says it might need.
   104  	// Since we know that |data| can't be 0-length, we also know that the compressed version of |data| has length greater than zero. The first element in a snappy-encoded blob is a Uvarint indicating how much data is present. Therefore, if there's a Uvarint-encoded 0 at tw.buff[tw.pos:], we know that snappy did not write anything there and we have a problem.
   105  	if v, n := binary.Uvarint(tw.buff[tw.pos:]); v == 0 {
   106  		d.Chk.True(n != 0)
   107  		panic(fmt.Errorf("bug 3156: unbuffered chunk %s: uncompressed %d, compressed %d, snappy max %d, tw.buff %d", h.String(), len(data), dataLength, snappy.MaxEncodedLen(len(data)), len(tw.buff[tw.pos:])))
   108  	}
   109  
   110  	tw.pos += dataLength
   111  	tw.totalUncompressedData += uint64(len(data))
   112  
   113  	// checksum (4 LSBytes, big-endian)
   114  	binary.BigEndian.PutUint32(tw.buff[tw.pos:], crc(compressed))
   115  	tw.pos += checksumSize
   116  
   117  	// Stored in insertion order
   118  	tw.prefixes = append(tw.prefixes, prefixIndexRec{
   119  		h,
   120  		uint32(len(tw.prefixes)),
   121  		uint32(checksumSize + dataLength),
   122  	})
   123  
   124  	return true
   125  }
   126  
   127  func (tw *tableWriter) finish() (uncompressedLength uint64, blockAddr hash.Hash, err error) {
   128  	err = tw.writeIndex()
   129  
   130  	if err != nil {
   131  		return 0, hash.Hash{}, err
   132  	}
   133  
   134  	tw.writeFooter()
   135  	uncompressedLength = tw.pos
   136  
   137  	var h []byte
   138  	h = tw.blockHash.Sum(h) // Appends hash to h
   139  	copy(blockAddr[:], h)
   140  	return
   141  }
   142  
   143  type prefixIndexRec struct {
   144  	addr        hash.Hash
   145  	order, size uint32
   146  }
   147  
   148  type prefixIndexSlice []prefixIndexRec
   149  
   150  func (hs prefixIndexSlice) Len() int { return len(hs) }
   151  func (hs prefixIndexSlice) Less(i, j int) bool {
   152  	return hs[i].addr.Prefix() < hs[j].addr.Prefix()
   153  }
   154  func (hs prefixIndexSlice) Swap(i, j int) { hs[i], hs[j] = hs[j], hs[i] }
   155  
   156  func (tw *tableWriter) writeIndex() error {
   157  	sort.Sort(tw.prefixes)
   158  
   159  	pfxScratch := [hash.PrefixLen]byte{}
   160  
   161  	numRecords := uint32(len(tw.prefixes))
   162  	lengthsOffset := tw.pos + lengthsOffset(numRecords)   // skip prefix and ordinal for each record
   163  	suffixesOffset := tw.pos + suffixesOffset(numRecords) // skip size for each record
   164  	for _, pi := range tw.prefixes {
   165  		binary.BigEndian.PutUint64(pfxScratch[:], pi.addr.Prefix())
   166  
   167  		// hash prefix
   168  		n := uint64(copy(tw.buff[tw.pos:], pfxScratch[:]))
   169  		if n != hash.PrefixLen {
   170  			return errors.New("failed to copy all data")
   171  		}
   172  
   173  		tw.pos += n
   174  
   175  		// order
   176  		binary.BigEndian.PutUint32(tw.buff[tw.pos:], pi.order)
   177  		tw.pos += ordinalSize
   178  
   179  		// length
   180  		offset := lengthsOffset + uint64(pi.order)*lengthSize
   181  		binary.BigEndian.PutUint32(tw.buff[offset:], pi.size)
   182  
   183  		// hash suffix
   184  		offset = suffixesOffset + uint64(pi.order)*hash.SuffixLen
   185  		n = uint64(copy(tw.buff[offset:], pi.addr.Suffix()))
   186  
   187  		if n != hash.SuffixLen {
   188  			return errors.New("failed to copy all bytes")
   189  		}
   190  	}
   191  	suffixesLen := uint64(numRecords) * hash.SuffixLen
   192  	tw.blockHash.Write(tw.buff[suffixesOffset : suffixesOffset+suffixesLen])
   193  	tw.pos = suffixesOffset + suffixesLen
   194  
   195  	return nil
   196  }
   197  
   198  func (tw *tableWriter) writeFooter() {
   199  	tw.pos += writeFooter(tw.buff[tw.pos:], uint32(len(tw.prefixes)), tw.totalUncompressedData)
   200  }
   201  
   202  func writeFooter(dst []byte, chunkCount uint32, uncData uint64) (consumed uint64) {
   203  	// chunk count
   204  	binary.BigEndian.PutUint32(dst[consumed:], chunkCount)
   205  	consumed += uint32Size
   206  
   207  	// total uncompressed chunk data
   208  	binary.BigEndian.PutUint64(dst[consumed:], uncData)
   209  	consumed += uint64Size
   210  
   211  	// magic number
   212  	copy(dst[consumed:], magicNumber)
   213  	consumed += magicNumberSize
   214  	return
   215  }