github.com/petermattis/pebble@v0.0.0-20190905164901-ab51a2166067/ptable/writer.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package ptable
     6  
     7  import (
     8  	"bufio"
     9  	"encoding/binary"
    10  	"errors"
    11  	"io"
    12  
    13  	"github.com/golang/snappy"
    14  	"github.com/petermattis/pebble/internal/base"
    15  	"github.com/petermattis/pebble/internal/crc"
    16  	"github.com/petermattis/pebble/vfs"
    17  )
    18  
    19  const (
    20  	blockTrailerLen   = 5
    21  	blockHandleMaxLen = 10 + 10
    22  	footerLen         = 1 + 2*blockHandleMaxLen + 4 + 8
    23  	magicOffset       = footerLen - len(magic)
    24  	versionOffset     = magicOffset - 4
    25  
    26  	magic = "\xf7\xcf\xf4\x85\xb7\x41\xe2\x88"
    27  
    28  	noChecksum     = 0
    29  	checksumCRC32c = 1
    30  
    31  	formatVersion = 3
    32  
    33  	// The block type gives the per-block compression format.
    34  	// These constants are part of the file format and should not be changed.
    35  	// They are different from the db.Compression constants because the latter
    36  	// are designed so that the zero value of the db.Compression type means to
    37  	// use the default compression (which is snappy).
    38  	noCompressionBlockType     = 0
    39  	snappyCompressionBlockType = 1
    40  )
    41  
    42  // Silence unused warning.
    43  var _ = noChecksum
    44  
    45  // blockHandle is the file offset and length of a block.
    46  type blockHandle struct {
    47  	offset, length uint64
    48  }
    49  
    50  // decodeBlockHandle returns the block handle encoded at the start of src, as
    51  // well as the number of bytes it occupies. It returns zero if given invalid
    52  // input.
    53  func decodeBlockHandle(src []byte) (blockHandle, int) {
    54  	offset, n := binary.Uvarint(src)
    55  	length, m := binary.Uvarint(src[n:])
    56  	if n == 0 || m == 0 {
    57  		return blockHandle{}, 0
    58  	}
    59  	return blockHandle{offset, length}, n + m
    60  }
    61  
    62  func encodeBlockHandle(dst []byte, b blockHandle) int {
    63  	n := binary.PutUvarint(dst, b.offset)
    64  	m := binary.PutUvarint(dst[n:], b.length)
    65  	return n + m
    66  }
    67  
    68  // Writer ...
    69  type Writer struct {
    70  	env       *Env
    71  	writer    io.Writer
    72  	bufWriter *bufio.Writer
    73  	closer    io.Closer
    74  	err       error
    75  	// The next four fields are copied from a db.Options.
    76  	blockSize   int
    77  	compression base.Compression
    78  	// The data block and index block writers.
    79  	block      blockWriter
    80  	indexBlock blockWriter
    81  	// compressedBuf is the destination buffer for snappy compression. It is
    82  	// re-used over the lifetime of the writer, avoiding the allocation of a
    83  	// temporary buffer for each block.
    84  	compressedBuf []byte
    85  	// offset is the offset (relative to the table start) of the next block to be
    86  	// written.
    87  	offset uint64
    88  	// tmp is a scratch buffer, large enough to hold either footerLen bytes,
    89  	// blockTrailerLen bytes, or (5 * binary.MaxVarintLen64) bytes.
    90  	tmp [footerLen]byte
    91  }
    92  
    93  var indexColTypes = []ColumnType{ColumnTypeBytes, ColumnTypeInt64}
    94  
    95  // NewWriter ...
    96  func NewWriter(f vfs.File, env *Env, _ *base.Options, lo *base.LevelOptions) *Writer {
    97  	lo = lo.EnsureDefaults()
    98  	w := &Writer{
    99  		env:         env,
   100  		writer:      f,
   101  		closer:      f,
   102  		blockSize:   lo.BlockSize,
   103  		compression: lo.Compression,
   104  	}
   105  	if f == nil {
   106  		w.err = errors.New("pebble/table: nil file")
   107  		return w
   108  	}
   109  
   110  	// If f does not have a Flush method, do our own buffering.
   111  	type flusher interface {
   112  		Flush() error
   113  	}
   114  	if _, ok := f.(flusher); ok {
   115  		w.writer = f
   116  	} else {
   117  		w.bufWriter = bufio.NewWriter(f)
   118  		w.writer = w.bufWriter
   119  	}
   120  
   121  	colTypes := make([]ColumnType, len(w.env.Schema))
   122  	for i := range w.env.Schema {
   123  		colTypes[i] = w.env.Schema[i].Type
   124  	}
   125  	w.block.init(colTypes)
   126  	w.indexBlock.init(indexColTypes)
   127  	return w
   128  }
   129  
   130  // AddKV adds a row encoded in a key/value pair to the table. The encoded
   131  // column data must match the table schema. Data must be added in sorted order.
   132  func (w *Writer) AddKV(key, value []byte) error {
   133  	if w.err != nil {
   134  		return w.err
   135  	}
   136  	if w.block.cols[0].count == 0 {
   137  		w.addIndex(key)
   138  	}
   139  	w.env.Decode(key, value, nil, &w.block)
   140  	w.maybeFinishBlock()
   141  	return w.err
   142  }
   143  
   144  // AddRow adds a row to the table. The columns in the row must match the table
   145  // schema. Data must be added in sorted order.
   146  func (w *Writer) AddRow(row RowReader) error {
   147  	if w.err != nil {
   148  		return w.err
   149  	}
   150  	if w.block.cols[0].count == 0 {
   151  		key, _ := w.env.Encode(row, nil)
   152  		w.addIndex(key)
   153  	}
   154  	w.block.PutRow(row)
   155  	w.maybeFinishBlock()
   156  	return w.err
   157  }
   158  
   159  // EstimatedSize ...
   160  func (w *Writer) EstimatedSize() uint64 {
   161  	return w.offset + uint64(w.block.Size()+w.indexBlock.Size())
   162  }
   163  
   164  // Close ...
   165  func (w *Writer) Close() (err error) {
   166  	defer func() {
   167  		if w.closer == nil {
   168  			return
   169  		}
   170  		err1 := w.closer.Close()
   171  		if err == nil {
   172  			err = err1
   173  		}
   174  		w.closer = nil
   175  	}()
   176  
   177  	if w.err != nil {
   178  		return w.err
   179  	}
   180  
   181  	if w.block.cols[0].count > 0 {
   182  		_, err := w.finishBlock(&w.block)
   183  		if err != nil {
   184  			w.err = err
   185  			return w.err
   186  		}
   187  	}
   188  
   189  	// Add the dummy final index entry and write the index block.
   190  	w.addIndex(nil)
   191  	indexBlockHandle, err := w.finishBlock(&w.indexBlock)
   192  	if err != nil {
   193  		w.err = err
   194  		return w.err
   195  	}
   196  
   197  	// Write the table footer.
   198  	footer := w.tmp[:footerLen]
   199  	for i := range footer {
   200  		footer[i] = 0
   201  	}
   202  	footer[0] = checksumCRC32c
   203  	n := 1
   204  	n += encodeBlockHandle(footer[n:], blockHandle{})
   205  	n += encodeBlockHandle(footer[n:], indexBlockHandle)
   206  	binary.LittleEndian.PutUint32(footer[versionOffset:], formatVersion)
   207  	copy(footer[magicOffset:], magic)
   208  	if _, err := w.writer.Write(footer); err != nil {
   209  		w.err = err
   210  		return w.err
   211  	}
   212  
   213  	// Flush the buffer.
   214  	if w.bufWriter != nil {
   215  		if err := w.bufWriter.Flush(); err != nil {
   216  			w.err = err
   217  			return err
   218  		}
   219  	}
   220  
   221  	// Make any future calls to Set or Close return an error.
   222  	w.err = errors.New("pebble/table: writer is closed")
   223  	return nil
   224  }
   225  
   226  func (w *Writer) addIndex(key []byte) {
   227  	w.indexBlock.PutBytes(0, key)
   228  	w.indexBlock.PutInt64(1, int64(w.offset))
   229  }
   230  
   231  func (w *Writer) maybeFinishBlock() {
   232  	if int(w.block.Size()) < w.blockSize {
   233  		return
   234  	}
   235  	_, w.err = w.finishBlock(&w.block)
   236  }
   237  
   238  func (w *Writer) finishBlock(block *blockWriter) (blockHandle, error) {
   239  	b := block.Finish()
   240  	blockType := byte(noCompressionBlockType)
   241  	if w.compression == base.SnappyCompression {
   242  		compressed := snappy.Encode(w.compressedBuf, b)
   243  		w.compressedBuf = compressed[:cap(compressed)]
   244  		if len(compressed) < len(b)-len(b)/8 {
   245  			blockType = snappyCompressionBlockType
   246  			b = compressed
   247  		}
   248  	}
   249  
   250  	// Reset the per-block state.
   251  	block.reset()
   252  	return w.writeRawBlock(b, blockType)
   253  }
   254  
   255  func (w *Writer) writeRawBlock(b []byte, blockType byte) (blockHandle, error) {
   256  	w.tmp[0] = blockType
   257  
   258  	// Calculate the checksum.
   259  	checksum := crc.New(b).Update(w.tmp[:1]).Value()
   260  	binary.LittleEndian.PutUint32(w.tmp[1:5], checksum)
   261  
   262  	// Write the bytes to the file.
   263  	if _, err := w.writer.Write(b); err != nil {
   264  		return blockHandle{}, err
   265  	}
   266  	if _, err := w.writer.Write(w.tmp[:5]); err != nil {
   267  		return blockHandle{}, err
   268  	}
   269  	bh := blockHandle{w.offset, uint64(len(b))}
   270  	w.offset += uint64(len(b)) + blockTrailerLen
   271  	return bh, nil
   272  }