github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/pebble_batch.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package storage
    12  
    13  import (
    14  	"sync"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    17  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    18  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    19  	"github.com/cockroachdb/errors"
    20  	"github.com/cockroachdb/pebble"
    21  )
    22  
    23  // Wrapper struct around a pebble.Batch.
    24  type pebbleBatch struct {
    25  	db           *pebble.DB
    26  	batch        *pebble.Batch
    27  	buf          []byte
    28  	prefixIter   pebbleIterator
    29  	normalIter   pebbleIterator
    30  	closed       bool
    31  	isDistinct   bool
    32  	distinctOpen bool
    33  	parentBatch  *pebbleBatch
    34  }
    35  
    36  var _ Batch = &pebbleBatch{}
    37  
    38  var pebbleBatchPool = sync.Pool{
    39  	New: func() interface{} {
    40  		return &pebbleBatch{}
    41  	},
    42  }
    43  
    44  // Instantiates a new pebbleBatch.
    45  func newPebbleBatch(db *pebble.DB, batch *pebble.Batch) *pebbleBatch {
    46  	pb := pebbleBatchPool.Get().(*pebbleBatch)
    47  	*pb = pebbleBatch{
    48  		db:    db,
    49  		batch: batch,
    50  		buf:   pb.buf,
    51  		prefixIter: pebbleIterator{
    52  			lowerBoundBuf: pb.prefixIter.lowerBoundBuf,
    53  			upperBoundBuf: pb.prefixIter.upperBoundBuf,
    54  			reusable:      true,
    55  		},
    56  		normalIter: pebbleIterator{
    57  			lowerBoundBuf: pb.normalIter.lowerBoundBuf,
    58  			upperBoundBuf: pb.normalIter.upperBoundBuf,
    59  			reusable:      true,
    60  		},
    61  	}
    62  	return pb
    63  }
    64  
    65  // Close implements the Batch interface.
    66  func (p *pebbleBatch) Close() {
    67  	if p.closed {
    68  		panic("closing an already-closed pebbleBatch")
    69  	}
    70  	p.closed = true
    71  
    72  	// Destroy the iterators before closing the batch.
    73  	p.prefixIter.destroy()
    74  	p.normalIter.destroy()
    75  
    76  	if !p.isDistinct {
    77  		_ = p.batch.Close()
    78  		p.batch = nil
    79  	} else {
    80  		p.parentBatch.distinctOpen = false
    81  		p.isDistinct = false
    82  	}
    83  
    84  	pebbleBatchPool.Put(p)
    85  }
    86  
    87  // Closed implements the Batch interface.
    88  func (p *pebbleBatch) Closed() bool {
    89  	return p.closed
    90  }
    91  
    92  // ExportToSst is part of the engine.Reader interface.
    93  func (p *pebbleBatch) ExportToSst(
    94  	startKey, endKey roachpb.Key,
    95  	startTS, endTS hlc.Timestamp,
    96  	exportAllRevisions bool,
    97  	targetSize, maxSize uint64,
    98  	io IterOptions,
    99  ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) {
   100  	panic("unimplemented")
   101  }
   102  
   103  // Get implements the Batch interface.
   104  func (p *pebbleBatch) Get(key MVCCKey) ([]byte, error) {
   105  	r := pebble.Reader(p.batch)
   106  	if !p.isDistinct {
   107  		if !p.batch.Indexed() {
   108  			panic("write-only batch")
   109  		}
   110  		if p.distinctOpen {
   111  			panic("distinct batch open")
   112  		}
   113  	} else if !p.batch.Indexed() {
   114  		r = p.db
   115  	}
   116  	if len(key.Key) == 0 {
   117  		return nil, emptyKeyError()
   118  	}
   119  	p.buf = EncodeKeyToBuf(p.buf[:0], key)
   120  	ret, closer, err := r.Get(p.buf)
   121  	if closer != nil {
   122  		retCopy := make([]byte, len(ret))
   123  		copy(retCopy, ret)
   124  		ret = retCopy
   125  		closer.Close()
   126  	}
   127  	if errors.Is(err, pebble.ErrNotFound) || len(ret) == 0 {
   128  		return nil, nil
   129  	}
   130  	return ret, err
   131  }
   132  
   133  // GetProto implements the Batch interface.
   134  func (p *pebbleBatch) GetProto(
   135  	key MVCCKey, msg protoutil.Message,
   136  ) (ok bool, keyBytes, valBytes int64, err error) {
   137  	r := pebble.Reader(p.batch)
   138  	if !p.isDistinct {
   139  		if !p.batch.Indexed() {
   140  			panic("write-only batch")
   141  		}
   142  		if p.distinctOpen {
   143  			panic("distinct batch open")
   144  		}
   145  	} else if !p.batch.Indexed() {
   146  		r = p.db
   147  	}
   148  	if len(key.Key) == 0 {
   149  		return false, 0, 0, emptyKeyError()
   150  	}
   151  	p.buf = EncodeKeyToBuf(p.buf[:0], key)
   152  	val, closer, err := r.Get(p.buf)
   153  	if closer != nil {
   154  		if msg != nil {
   155  			err = protoutil.Unmarshal(val, msg)
   156  		}
   157  		keyBytes = int64(len(p.buf))
   158  		valBytes = int64(len(val))
   159  		closer.Close()
   160  		return true, keyBytes, valBytes, err
   161  	}
   162  	if errors.Is(err, pebble.ErrNotFound) {
   163  		return false, 0, 0, nil
   164  	}
   165  	return false, 0, 0, err
   166  }
   167  
   168  // Iterate implements the Batch interface.
   169  func (p *pebbleBatch) Iterate(
   170  	start, end roachpb.Key, f func(MVCCKeyValue) (stop bool, err error),
   171  ) error {
   172  	if p.distinctOpen {
   173  		panic("distinct batch open")
   174  	}
   175  	return iterateOnReader(p, start, end, f)
   176  }
   177  
   178  // NewIterator implements the Batch interface.
   179  func (p *pebbleBatch) NewIterator(opts IterOptions) Iterator {
   180  	if !opts.Prefix && len(opts.UpperBound) == 0 && len(opts.LowerBound) == 0 {
   181  		panic("iterator must set prefix or upper bound or lower bound")
   182  	}
   183  
   184  	if !p.batch.Indexed() && !p.isDistinct {
   185  		panic("write-only batch")
   186  	}
   187  	if p.distinctOpen {
   188  		panic("distinct batch open")
   189  	}
   190  
   191  	if opts.MinTimestampHint != (hlc.Timestamp{}) {
   192  		// Iterators that specify timestamp bounds cannot be cached.
   193  		return newPebbleIterator(p.batch, opts)
   194  	}
   195  
   196  	iter := &p.normalIter
   197  	if opts.Prefix {
   198  		iter = &p.prefixIter
   199  	}
   200  	if iter.inuse {
   201  		panic("iterator already in use")
   202  	}
   203  
   204  	if iter.iter != nil {
   205  		iter.setOptions(opts)
   206  	} else if p.batch.Indexed() {
   207  		iter.init(p.batch, opts)
   208  	} else {
   209  		iter.init(p.db, opts)
   210  	}
   211  
   212  	iter.inuse = true
   213  	return iter
   214  }
   215  
   216  // NewIterator implements the Batch interface.
   217  func (p *pebbleBatch) ApplyBatchRepr(repr []byte, sync bool) error {
   218  	if p.distinctOpen {
   219  		panic("distinct batch open")
   220  	}
   221  
   222  	var batch pebble.Batch
   223  	if err := batch.SetRepr(repr); err != nil {
   224  		return err
   225  	}
   226  
   227  	return p.batch.Apply(&batch, nil)
   228  }
   229  
   230  // Clear implements the Batch interface.
   231  func (p *pebbleBatch) Clear(key MVCCKey) error {
   232  	if p.distinctOpen {
   233  		panic("distinct batch open")
   234  	}
   235  	if len(key.Key) == 0 {
   236  		return emptyKeyError()
   237  	}
   238  
   239  	p.buf = EncodeKeyToBuf(p.buf[:0], key)
   240  	return p.batch.Delete(p.buf, nil)
   241  }
   242  
   243  // SingleClear implements the Batch interface.
   244  func (p *pebbleBatch) SingleClear(key MVCCKey) error {
   245  	if p.distinctOpen {
   246  		panic("distinct batch open")
   247  	}
   248  	if len(key.Key) == 0 {
   249  		return emptyKeyError()
   250  	}
   251  
   252  	p.buf = EncodeKeyToBuf(p.buf[:0], key)
   253  	return p.batch.SingleDelete(p.buf, nil)
   254  }
   255  
   256  // ClearRange implements the Batch interface.
   257  func (p *pebbleBatch) ClearRange(start, end MVCCKey) error {
   258  	if p.distinctOpen {
   259  		panic("distinct batch open")
   260  	}
   261  
   262  	p.buf = EncodeKeyToBuf(p.buf[:0], start)
   263  	buf2 := EncodeKey(end)
   264  	return p.batch.DeleteRange(p.buf, buf2, nil)
   265  }
   266  
   267  // Clear implements the Batch interface.
   268  func (p *pebbleBatch) ClearIterRange(iter Iterator, start, end roachpb.Key) error {
   269  	if p.distinctOpen {
   270  		panic("distinct batch open")
   271  	}
   272  
   273  	type unsafeRawKeyGetter interface{ unsafeRawKey() []byte }
   274  	// Note that this method has the side effect of modifying iter's bounds.
   275  	// Since all calls to `ClearIterRange` are on new throwaway iterators with no
   276  	// lower bounds, calling SetUpperBound should be sufficient and safe.
   277  	// Furthermore, the start and end keys are always metadata keys (i.e.
   278  	// have zero timestamps), so we can ignore the bounds' MVCC timestamps.
   279  	iter.SetUpperBound(end)
   280  	iter.SeekGE(MakeMVCCMetadataKey(start))
   281  
   282  	for ; ; iter.Next() {
   283  		valid, err := iter.Valid()
   284  		if err != nil {
   285  			return err
   286  		} else if !valid {
   287  			break
   288  		}
   289  
   290  		err = p.batch.Delete(iter.(unsafeRawKeyGetter).unsafeRawKey(), nil)
   291  		if err != nil {
   292  			return err
   293  		}
   294  	}
   295  	return nil
   296  }
   297  
   298  // Merge implements the Batch interface.
   299  func (p *pebbleBatch) Merge(key MVCCKey, value []byte) error {
   300  	if p.distinctOpen {
   301  		panic("distinct batch open")
   302  	}
   303  	if len(key.Key) == 0 {
   304  		return emptyKeyError()
   305  	}
   306  
   307  	p.buf = EncodeKeyToBuf(p.buf[:0], key)
   308  	return p.batch.Merge(p.buf, value, nil)
   309  }
   310  
   311  // Put implements the Batch interface.
   312  func (p *pebbleBatch) Put(key MVCCKey, value []byte) error {
   313  	if p.distinctOpen {
   314  		panic("distinct batch open")
   315  	}
   316  	if len(key.Key) == 0 {
   317  		return emptyKeyError()
   318  	}
   319  
   320  	p.buf = EncodeKeyToBuf(p.buf[:0], key)
   321  	return p.batch.Set(p.buf, value, nil)
   322  }
   323  
   324  // LogData implements the Batch interface.
   325  func (p *pebbleBatch) LogData(data []byte) error {
   326  	return p.batch.LogData(data, nil)
   327  }
   328  
   329  func (p *pebbleBatch) LogLogicalOp(op MVCCLogicalOpType, details MVCCLogicalOpDetails) {
   330  	// No-op.
   331  }
   332  
   333  // Commit implements the Batch interface.
   334  func (p *pebbleBatch) Commit(sync bool) error {
   335  	opts := pebble.NoSync
   336  	if sync {
   337  		opts = pebble.Sync
   338  	}
   339  	if p.batch == nil {
   340  		panic("called with nil batch")
   341  	}
   342  	err := p.batch.Commit(opts)
   343  	if err != nil {
   344  		panic(err)
   345  	}
   346  	return err
   347  }
   348  
   349  // Distinct implements the Batch interface.
   350  func (p *pebbleBatch) Distinct() ReadWriter {
   351  	if p.distinctOpen {
   352  		panic("distinct batch already open")
   353  	}
   354  	// Distinct batches are regular batches with isDistinct set to true. The
   355  	// parent batch is stored in parentBatch, and all writes on it are disallowed
   356  	// while the distinct batch is open. Both the distinct batch and the parent
   357  	// batch share the same underlying pebble.Batch instance.
   358  	//
   359  	// The need for distinct batches is distinctly less in Pebble than
   360  	// RocksDB. In RocksDB, a distinct batch allows reading from a batch without
   361  	// flushing the buffered writes which is a significant performance
   362  	// optimization. In Pebble we're still using the same underlying batch and if
   363  	// it is indexed we'll still be indexing it as we Go.
   364  	p.distinctOpen = true
   365  	d := newPebbleBatch(p.db, p.batch)
   366  	d.parentBatch = p
   367  	d.isDistinct = true
   368  	return d
   369  }
   370  
   371  // Empty implements the Batch interface.
   372  func (p *pebbleBatch) Empty() bool {
   373  	return p.batch.Count() == 0
   374  }
   375  
   376  // Len implements the Batch interface.
   377  func (p *pebbleBatch) Len() int {
   378  	return len(p.batch.Repr())
   379  }
   380  
   381  // Repr implements the Batch interface.
   382  func (p *pebbleBatch) Repr() []byte {
   383  	// Repr expects a "safe" byte slice as its output. The return value of
   384  	// p.batch.Repr() is an unsafe byte slice owned by p.batch. Since we could be
   385  	// sending this slice over the wire, we need to make a copy.
   386  	repr := p.batch.Repr()
   387  	reprCopy := make([]byte, len(repr))
   388  	copy(reprCopy, repr)
   389  	return reprCopy
   390  }