github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/table_writer.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"crypto/sha512"
    26  	"encoding/binary"
    27  	"errors"
    28  	"fmt"
    29  	"hash"
    30  	"sort"
    31  
    32  	"github.com/golang/snappy"
    33  
    34  	"github.com/dolthub/dolt/go/store/d"
    35  )
    36  
    37  // tableWriter encodes a collection of byte stream chunks into a nbs table. NOT goroutine safe.
    38  type tableWriter struct {
    39  	buff                  []byte
    40  	pos                   uint64
    41  	totalCompressedData   uint64
    42  	totalUncompressedData uint64
    43  	prefixes              prefixIndexSlice // TODO: This is in danger of exploding memory
    44  	blockHash             hash.Hash
    45  
    46  	snapper snappyEncoder
    47  }
    48  
    49  type snappyEncoder interface {
    50  	Encode(dst, src []byte) []byte
    51  }
    52  
    53  type realSnappyEncoder struct{}
    54  
    55  func (r realSnappyEncoder) Encode(dst, src []byte) []byte {
    56  	return snappy.Encode(dst, src)
    57  }
    58  
    59  func maxTableSize(numChunks, totalData uint64) uint64 {
    60  	avgChunkSize := totalData / numChunks
    61  	d.Chk.True(avgChunkSize < maxChunkSize)
    62  	maxSnappySize := snappy.MaxEncodedLen(int(avgChunkSize))
    63  	d.Chk.True(maxSnappySize > 0)
    64  	return numChunks*(prefixTupleSize+lengthSize+addrSuffixSize+checksumSize+uint64(maxSnappySize)) + footerSize
    65  }
    66  
    67  func indexSize(numChunks uint32) uint64 {
    68  	return uint64(numChunks) * (addrSuffixSize + lengthSize + prefixTupleSize)
    69  }
    70  
    71  func lengthsOffset(numChunks uint32) uint64 {
    72  	return uint64(numChunks) * prefixTupleSize
    73  }
    74  
    75  func suffixesOffset(numChunks uint32) uint64 {
    76  	return uint64(numChunks) * (prefixTupleSize + lengthSize)
    77  }
    78  
    79  // len(buff) must be >= maxTableSize(numChunks, totalData)
    80  func newTableWriter(buff []byte, snapper snappyEncoder) *tableWriter {
    81  	if snapper == nil {
    82  		snapper = realSnappyEncoder{}
    83  	}
    84  	return &tableWriter{
    85  		buff:      buff,
    86  		blockHash: sha512.New(),
    87  		snapper:   snapper,
    88  	}
    89  }
    90  
    91  func (tw *tableWriter) addChunk(h addr, data []byte) bool {
    92  	if len(data) == 0 {
    93  		panic("NBS blocks cannont be zero length")
    94  	}
    95  
    96  	// Compress data straight into tw.buff
    97  	compressed := tw.snapper.Encode(tw.buff[tw.pos:], data)
    98  	dataLength := uint64(len(compressed))
    99  	tw.totalCompressedData += dataLength
   100  
   101  	// BUG 3156 indicated that, sometimes, snappy decided that there's not enough space in tw.buff[tw.pos:] to encode into.
   102  	// This _should never happen anymore be_, because we iterate over all chunks to be added and sum the max amount of space that snappy says it might need.
   103  	// Since we know that |data| can't be 0-length, we also know that the compressed version of |data| has length greater than zero. The first element in a snappy-encoded blob is a Uvarint indicating how much data is present. Therefore, if there's a Uvarint-encoded 0 at tw.buff[tw.pos:], we know that snappy did not write anything there and we have a problem.
   104  	if v, n := binary.Uvarint(tw.buff[tw.pos:]); v == 0 {
   105  		d.Chk.True(n != 0)
   106  		panic(fmt.Errorf("bug 3156: unbuffered chunk %s: uncompressed %d, compressed %d, snappy max %d, tw.buff %d", h.String(), len(data), dataLength, snappy.MaxEncodedLen(len(data)), len(tw.buff[tw.pos:])))
   107  	}
   108  
   109  	tw.pos += dataLength
   110  	tw.totalUncompressedData += uint64(len(data))
   111  
   112  	// checksum (4 LSBytes, big-endian)
   113  	binary.BigEndian.PutUint32(tw.buff[tw.pos:], crc(compressed))
   114  	tw.pos += checksumSize
   115  
   116  	// Stored in insertion order
   117  	tw.prefixes = append(tw.prefixes, prefixIndexRec{
   118  		h.Prefix(),
   119  		h[addrPrefixSize:],
   120  		uint32(len(tw.prefixes)),
   121  		uint32(checksumSize + dataLength),
   122  	})
   123  
   124  	return true
   125  }
   126  
   127  func (tw *tableWriter) finish() (uncompressedLength uint64, blockAddr addr, err error) {
   128  	err = tw.writeIndex()
   129  
   130  	if err != nil {
   131  		return 0, addr{}, err
   132  	}
   133  
   134  	tw.writeFooter()
   135  	uncompressedLength = tw.pos
   136  
   137  	var h []byte
   138  	h = tw.blockHash.Sum(h) // Appends hash to h
   139  	copy(blockAddr[:], h)
   140  	return
   141  }
   142  
   143  type prefixIndexRec struct {
   144  	prefix      uint64
   145  	suffix      []byte
   146  	order, size uint32
   147  }
   148  
   149  type prefixIndexSlice []prefixIndexRec
   150  
   151  func (hs prefixIndexSlice) Len() int           { return len(hs) }
   152  func (hs prefixIndexSlice) Less(i, j int) bool { return hs[i].prefix < hs[j].prefix }
   153  func (hs prefixIndexSlice) Swap(i, j int)      { hs[i], hs[j] = hs[j], hs[i] }
   154  
   155  func (tw *tableWriter) writeIndex() error {
   156  	sort.Sort(tw.prefixes)
   157  
   158  	pfxScratch := [addrPrefixSize]byte{}
   159  
   160  	numRecords := uint32(len(tw.prefixes))
   161  	lengthsOffset := tw.pos + lengthsOffset(numRecords)   // skip prefix and ordinal for each record
   162  	suffixesOffset := tw.pos + suffixesOffset(numRecords) // skip size for each record
   163  	for _, pi := range tw.prefixes {
   164  		binary.BigEndian.PutUint64(pfxScratch[:], pi.prefix)
   165  
   166  		// hash prefix
   167  		n := uint64(copy(tw.buff[tw.pos:], pfxScratch[:]))
   168  		if n != addrPrefixSize {
   169  			return errors.New("failed to copy all data")
   170  		}
   171  
   172  		tw.pos += n
   173  
   174  		// order
   175  		binary.BigEndian.PutUint32(tw.buff[tw.pos:], pi.order)
   176  		tw.pos += ordinalSize
   177  
   178  		// length
   179  		offset := lengthsOffset + uint64(pi.order)*lengthSize
   180  		binary.BigEndian.PutUint32(tw.buff[offset:], pi.size)
   181  
   182  		// hash suffix
   183  		offset = suffixesOffset + uint64(pi.order)*addrSuffixSize
   184  		n = uint64(copy(tw.buff[offset:], pi.suffix))
   185  
   186  		if n != addrSuffixSize {
   187  			return errors.New("failed to copy all bytes")
   188  		}
   189  	}
   190  	suffixesLen := uint64(numRecords) * addrSuffixSize
   191  	tw.blockHash.Write(tw.buff[suffixesOffset : suffixesOffset+suffixesLen])
   192  	tw.pos = suffixesOffset + suffixesLen
   193  
   194  	return nil
   195  }
   196  
   197  func (tw *tableWriter) writeFooter() {
   198  	tw.pos += writeFooter(tw.buff[tw.pos:], uint32(len(tw.prefixes)), tw.totalUncompressedData)
   199  }
   200  
   201  func writeFooter(dst []byte, chunkCount uint32, uncData uint64) (consumed uint64) {
   202  	// chunk count
   203  	binary.BigEndian.PutUint32(dst[consumed:], chunkCount)
   204  	consumed += uint32Size
   205  
   206  	// total uncompressed chunk data
   207  	binary.BigEndian.PutUint64(dst[consumed:], uncData)
   208  	consumed += uint64Size
   209  
   210  	// magic number
   211  	copy(dst[consumed:], magicNumber)
   212  	consumed += magicNumberSize
   213  	return
   214  }