github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/code.google.com/p/leveldb-go/leveldb/table/writer.go (about)

     1  // Copyright 2011 The LevelDB-Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package table
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"encoding/binary"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  
    15  	"camlistore.org/third_party/code.google.com/p/leveldb-go/leveldb/crc"
    16  	"camlistore.org/third_party/code.google.com/p/leveldb-go/leveldb/db"
    17  	"camlistore.org/third_party/code.google.com/p/snappy-go/snappy"
    18  )
    19  
    20  // indexEntry is a block handle and the length of the separator key.
    21  type indexEntry struct {
    22  	bh     blockHandle
    23  	keyLen int
    24  }
    25  
    26  // Writer is a table writer. It implements the DB interface, as documented
    27  // in the leveldb/db package.
    28  type Writer struct {
    29  	writer    io.Writer
    30  	bufWriter *bufio.Writer
    31  	closer    io.Closer
    32  	err       error
    33  	// The next four fields are copied from a db.Options.
    34  	blockRestartInterval int
    35  	blockSize            int
    36  	cmp                  db.Comparer
    37  	compression          db.Compression
    38  	// A table is a series of blocks and a block's index entry contains a
    39  	// separator key between one block and the next. Thus, a finished block
    40  	// cannot be written until the first key in the next block is seen.
    41  	// pendingBH is the blockHandle of a finished block that is waiting for
    42  	// the next call to Set. If the writer is not in this state, pendingBH
    43  	// is zero.
    44  	pendingBH blockHandle
    45  	// offset is the offset (relative to the table start) of the next block
    46  	// to be written.
    47  	offset uint64
    48  	// prevKey is a copy of the key most recently passed to Set.
    49  	prevKey []byte
    50  	// indexKeys and indexEntries hold the separator keys between each block
    51  	// and the successor key for the final block. indexKeys contains the key's
    52  	// bytes concatenated together. The keyLen field of each indexEntries
    53  	// element is the length of the respective separator key.
    54  	indexKeys    []byte
    55  	indexEntries []indexEntry
    56  	// The next three fields hold data for the current block:
    57  	//   - buf is the accumulated uncompressed bytes,
    58  	//   - nEntries is the number of entries,
    59  	//   - restarts are the offsets (relative to the block start) of each
    60  	//     restart point.
    61  	buf      bytes.Buffer
    62  	nEntries int
    63  	restarts []uint32
    64  	// compressedBuf is the destination buffer for snappy compression. It is
    65  	// re-used over the lifetime of the writer, avoiding the allocation of a
    66  	// temporary buffer for each block.
    67  	compressedBuf []byte
    68  	// tmp is a scratch buffer, large enough to hold either footerLen bytes,
    69  	// blockTrailerLen bytes, or (5 * binary.MaxVarintLen64) bytes.
    70  	tmp [50]byte
    71  }
    72  
    73  // Writer implements the db.DB interface.
    74  var _ db.DB = (*Writer)(nil)
    75  
    76  // Get is provided to implement the DB interface, but returns an error, as a
    77  // Writer cannot read from a table.
    78  func (w *Writer) Get(key []byte, o *db.ReadOptions) ([]byte, error) {
    79  	return nil, errors.New("leveldb/table: cannot Get from a write-only table")
    80  }
    81  
    82  // Delete is provided to implement the DB interface, but returns an error, as a
    83  // Writer can only append key/value pairs.
    84  func (w *Writer) Delete(key []byte, o *db.WriteOptions) error {
    85  	return errors.New("leveldb/table: cannot Delete from a table")
    86  }
    87  
    88  // Find is provided to implement the DB interface, but returns an error, as a
    89  // Writer cannot read from a table.
    90  func (w *Writer) Find(key []byte, o *db.ReadOptions) db.Iterator {
    91  	return &tableIter{
    92  		err: errors.New("leveldb/table: cannot Find from a write-only table"),
    93  	}
    94  }
    95  
    96  // Set implements DB.Set, as documented in the leveldb/db package. For a given
    97  // Writer, the keys passed to Set must be in increasing order.
    98  func (w *Writer) Set(key, value []byte, o *db.WriteOptions) error {
    99  	if w.err != nil {
   100  		return w.err
   101  	}
   102  	if w.cmp.Compare(w.prevKey, key) >= 0 {
   103  		w.err = fmt.Errorf("leveldb/table: Set called in non-increasing key order: %q, %q", w.prevKey, key)
   104  		return w.err
   105  	}
   106  	w.flushPendingBH(key)
   107  	w.append(key, value, w.nEntries%w.blockRestartInterval == 0)
   108  	// If the estimated block size is sufficiently large, finish the current block.
   109  	if w.buf.Len()+4*(len(w.restarts)+1) >= w.blockSize {
   110  		bh, err := w.finishBlock()
   111  		if err != nil {
   112  			w.err = err
   113  			return w.err
   114  		}
   115  		w.pendingBH = bh
   116  	}
   117  	return nil
   118  }
   119  
   120  // flushPendingBH adds any pending block handle to the index entries.
   121  func (w *Writer) flushPendingBH(key []byte) {
   122  	if w.pendingBH.length == 0 {
   123  		// A valid blockHandle must be non-zero.
   124  		// In particular, it must have a non-zero length.
   125  		return
   126  	}
   127  	n0 := len(w.indexKeys)
   128  	w.indexKeys = w.cmp.AppendSeparator(w.indexKeys, w.prevKey, key)
   129  	n1 := len(w.indexKeys)
   130  	w.indexEntries = append(w.indexEntries, indexEntry{w.pendingBH, n1 - n0})
   131  	w.pendingBH = blockHandle{}
   132  }
   133  
   134  // append appends a key/value pair, which may also be a restart point.
   135  func (w *Writer) append(key, value []byte, restart bool) {
   136  	nShared := 0
   137  	if restart {
   138  		w.restarts = append(w.restarts, uint32(w.buf.Len()))
   139  	} else {
   140  		nShared = db.SharedPrefixLen(w.prevKey, key)
   141  	}
   142  	w.prevKey = append(w.prevKey[:0], key...)
   143  	w.nEntries++
   144  	n := binary.PutUvarint(w.tmp[0:], uint64(nShared))
   145  	n += binary.PutUvarint(w.tmp[n:], uint64(len(key)-nShared))
   146  	n += binary.PutUvarint(w.tmp[n:], uint64(len(value)))
   147  	w.buf.Write(w.tmp[:n])
   148  	w.buf.Write(key[nShared:])
   149  	w.buf.Write(value)
   150  }
   151  
   152  // finishBlock finishes the current block and returns its block handle, which is
   153  // its offset and length in the table.
   154  func (w *Writer) finishBlock() (blockHandle, error) {
   155  	// Write the restart points to the buffer.
   156  	if w.nEntries == 0 {
   157  		// Every block must have at least one restart point.
   158  		w.restarts = w.restarts[:1]
   159  		w.restarts[0] = 0
   160  	}
   161  	tmp4 := w.tmp[:4]
   162  	for _, x := range w.restarts {
   163  		binary.LittleEndian.PutUint32(tmp4, x)
   164  		w.buf.Write(tmp4)
   165  	}
   166  	binary.LittleEndian.PutUint32(tmp4, uint32(len(w.restarts)))
   167  	w.buf.Write(tmp4)
   168  
   169  	// Compress the buffer, discarding the result if the improvement
   170  	// isn't at least 12.5%.
   171  	b := w.buf.Bytes()
   172  	w.tmp[0] = noCompressionBlockType
   173  	if w.compression == db.SnappyCompression {
   174  		compressed, err := snappy.Encode(w.compressedBuf, b)
   175  		if err != nil {
   176  			return blockHandle{}, err
   177  		}
   178  		w.compressedBuf = compressed[:cap(compressed)]
   179  		if len(compressed) < len(b)-len(b)/8 {
   180  			w.tmp[0] = snappyCompressionBlockType
   181  			b = compressed
   182  		}
   183  	}
   184  
   185  	// Calculate the checksum.
   186  	checksum := crc.New(b).Update(w.tmp[:1]).Value()
   187  	binary.LittleEndian.PutUint32(w.tmp[1:5], checksum)
   188  
   189  	// Write the bytes to the file.
   190  	if _, err := w.writer.Write(b); err != nil {
   191  		return blockHandle{}, err
   192  	}
   193  	if _, err := w.writer.Write(w.tmp[:5]); err != nil {
   194  		return blockHandle{}, err
   195  	}
   196  	bh := blockHandle{w.offset, uint64(len(b))}
   197  	w.offset += uint64(len(b)) + blockTrailerLen
   198  
   199  	// Reset the per-block state.
   200  	w.buf.Reset()
   201  	w.nEntries = 0
   202  	w.restarts = w.restarts[:0]
   203  	return bh, nil
   204  }
   205  
   206  // Close implements DB.Close, as documented in the leveldb/db package.
   207  func (w *Writer) Close() (err error) {
   208  	defer func() {
   209  		if w.closer == nil {
   210  			return
   211  		}
   212  		err1 := w.closer.Close()
   213  		if err == nil {
   214  			err = err1
   215  		}
   216  		w.closer = nil
   217  	}()
   218  	if w.err != nil {
   219  		return w.err
   220  	}
   221  
   222  	// Finish the last data block, or force an empty data block if there
   223  	// aren't any data blocks at all.
   224  	if w.nEntries > 0 || len(w.indexEntries) == 0 {
   225  		bh, err := w.finishBlock()
   226  		if err != nil {
   227  			w.err = err
   228  			return w.err
   229  		}
   230  		w.pendingBH = bh
   231  		w.flushPendingBH(nil)
   232  	}
   233  
   234  	// Write the (empty) metaindex block.
   235  	metaindexBlockHandle, err := w.finishBlock()
   236  	if err != nil {
   237  		w.err = err
   238  		return w.err
   239  	}
   240  
   241  	// Write the index block.
   242  	// writer.append uses w.tmp[:3*binary.MaxVarintLen64].
   243  	i0, tmp := 0, w.tmp[3*binary.MaxVarintLen64:5*binary.MaxVarintLen64]
   244  	for _, ie := range w.indexEntries {
   245  		n := encodeBlockHandle(tmp, ie.bh)
   246  		i1 := i0 + ie.keyLen
   247  		w.append(w.indexKeys[i0:i1], tmp[:n], true)
   248  		i0 = i1
   249  	}
   250  	indexBlockHandle, err := w.finishBlock()
   251  	if err != nil {
   252  		w.err = err
   253  		return w.err
   254  	}
   255  
   256  	// Write the table footer.
   257  	footer := w.tmp[:footerLen]
   258  	for i := range footer {
   259  		footer[i] = 0
   260  	}
   261  	n := encodeBlockHandle(footer, metaindexBlockHandle)
   262  	encodeBlockHandle(footer[n:], indexBlockHandle)
   263  	copy(footer[footerLen-len(magic):], magic)
   264  	if _, err := w.writer.Write(footer); err != nil {
   265  		w.err = err
   266  		return w.err
   267  	}
   268  
   269  	// Flush the buffer.
   270  	if w.bufWriter != nil {
   271  		if err := w.bufWriter.Flush(); err != nil {
   272  			w.err = err
   273  			return err
   274  		}
   275  	}
   276  
   277  	// Make any future calls to Set or Close return an error.
   278  	w.err = errors.New("leveldb/table: writer is closed")
   279  	return nil
   280  }
   281  
   282  // NewWriter returns a new table writer for the file. Closing the writer will
   283  // close the file.
   284  func NewWriter(f File, o *db.Options) *Writer {
   285  	w := &Writer{
   286  		closer:               f,
   287  		blockRestartInterval: o.GetBlockRestartInterval(),
   288  		blockSize:            o.GetBlockSize(),
   289  		cmp:                  o.GetComparer(),
   290  		compression:          o.GetCompression(),
   291  		prevKey:              make([]byte, 0, 256),
   292  		restarts:             make([]uint32, 0, 256),
   293  	}
   294  	if f == nil {
   295  		w.err = errors.New("leveldb/table: nil file")
   296  		return w
   297  	}
   298  	// If f does not have a Flush method, do our own buffering.
   299  	type flusher interface {
   300  		Flush() error
   301  	}
   302  	if _, ok := f.(flusher); ok {
   303  		w.writer = f
   304  	} else {
   305  		w.bufWriter = bufio.NewWriter(f)
   306  		w.writer = w.bufWriter
   307  	}
   308  	return w
   309  }