github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/pebble_batch.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package storage 12 13 import ( 14 "sync" 15 16 "github.com/cockroachdb/cockroach/pkg/roachpb" 17 "github.com/cockroachdb/cockroach/pkg/util/hlc" 18 "github.com/cockroachdb/cockroach/pkg/util/protoutil" 19 "github.com/cockroachdb/errors" 20 "github.com/cockroachdb/pebble" 21 ) 22 23 // Wrapper struct around a pebble.Batch. 24 type pebbleBatch struct { 25 db *pebble.DB 26 batch *pebble.Batch 27 buf []byte 28 prefixIter pebbleIterator 29 normalIter pebbleIterator 30 closed bool 31 isDistinct bool 32 distinctOpen bool 33 parentBatch *pebbleBatch 34 } 35 36 var _ Batch = &pebbleBatch{} 37 38 var pebbleBatchPool = sync.Pool{ 39 New: func() interface{} { 40 return &pebbleBatch{} 41 }, 42 } 43 44 // Instantiates a new pebbleBatch. 45 func newPebbleBatch(db *pebble.DB, batch *pebble.Batch) *pebbleBatch { 46 pb := pebbleBatchPool.Get().(*pebbleBatch) 47 *pb = pebbleBatch{ 48 db: db, 49 batch: batch, 50 buf: pb.buf, 51 prefixIter: pebbleIterator{ 52 lowerBoundBuf: pb.prefixIter.lowerBoundBuf, 53 upperBoundBuf: pb.prefixIter.upperBoundBuf, 54 reusable: true, 55 }, 56 normalIter: pebbleIterator{ 57 lowerBoundBuf: pb.normalIter.lowerBoundBuf, 58 upperBoundBuf: pb.normalIter.upperBoundBuf, 59 reusable: true, 60 }, 61 } 62 return pb 63 } 64 65 // Close implements the Batch interface. 66 func (p *pebbleBatch) Close() { 67 if p.closed { 68 panic("closing an already-closed pebbleBatch") 69 } 70 p.closed = true 71 72 // Destroy the iterators before closing the batch. 73 p.prefixIter.destroy() 74 p.normalIter.destroy() 75 76 if !p.isDistinct { 77 _ = p.batch.Close() 78 p.batch = nil 79 } else { 80 p.parentBatch.distinctOpen = false 81 p.isDistinct = false 82 } 83 84 pebbleBatchPool.Put(p) 85 } 86 87 // Closed implements the Batch interface. 88 func (p *pebbleBatch) Closed() bool { 89 return p.closed 90 } 91 92 // ExportToSst is part of the engine.Reader interface. 93 func (p *pebbleBatch) ExportToSst( 94 startKey, endKey roachpb.Key, 95 startTS, endTS hlc.Timestamp, 96 exportAllRevisions bool, 97 targetSize, maxSize uint64, 98 io IterOptions, 99 ) ([]byte, roachpb.BulkOpSummary, roachpb.Key, error) { 100 panic("unimplemented") 101 } 102 103 // Get implements the Batch interface. 104 func (p *pebbleBatch) Get(key MVCCKey) ([]byte, error) { 105 r := pebble.Reader(p.batch) 106 if !p.isDistinct { 107 if !p.batch.Indexed() { 108 panic("write-only batch") 109 } 110 if p.distinctOpen { 111 panic("distinct batch open") 112 } 113 } else if !p.batch.Indexed() { 114 r = p.db 115 } 116 if len(key.Key) == 0 { 117 return nil, emptyKeyError() 118 } 119 p.buf = EncodeKeyToBuf(p.buf[:0], key) 120 ret, closer, err := r.Get(p.buf) 121 if closer != nil { 122 retCopy := make([]byte, len(ret)) 123 copy(retCopy, ret) 124 ret = retCopy 125 closer.Close() 126 } 127 if errors.Is(err, pebble.ErrNotFound) || len(ret) == 0 { 128 return nil, nil 129 } 130 return ret, err 131 } 132 133 // GetProto implements the Batch interface. 134 func (p *pebbleBatch) GetProto( 135 key MVCCKey, msg protoutil.Message, 136 ) (ok bool, keyBytes, valBytes int64, err error) { 137 r := pebble.Reader(p.batch) 138 if !p.isDistinct { 139 if !p.batch.Indexed() { 140 panic("write-only batch") 141 } 142 if p.distinctOpen { 143 panic("distinct batch open") 144 } 145 } else if !p.batch.Indexed() { 146 r = p.db 147 } 148 if len(key.Key) == 0 { 149 return false, 0, 0, emptyKeyError() 150 } 151 p.buf = EncodeKeyToBuf(p.buf[:0], key) 152 val, closer, err := r.Get(p.buf) 153 if closer != nil { 154 if msg != nil { 155 err = protoutil.Unmarshal(val, msg) 156 } 157 keyBytes = int64(len(p.buf)) 158 valBytes = int64(len(val)) 159 closer.Close() 160 return true, keyBytes, valBytes, err 161 } 162 if errors.Is(err, pebble.ErrNotFound) { 163 return false, 0, 0, nil 164 } 165 return false, 0, 0, err 166 } 167 168 // Iterate implements the Batch interface. 169 func (p *pebbleBatch) Iterate( 170 start, end roachpb.Key, f func(MVCCKeyValue) (stop bool, err error), 171 ) error { 172 if p.distinctOpen { 173 panic("distinct batch open") 174 } 175 return iterateOnReader(p, start, end, f) 176 } 177 178 // NewIterator implements the Batch interface. 179 func (p *pebbleBatch) NewIterator(opts IterOptions) Iterator { 180 if !opts.Prefix && len(opts.UpperBound) == 0 && len(opts.LowerBound) == 0 { 181 panic("iterator must set prefix or upper bound or lower bound") 182 } 183 184 if !p.batch.Indexed() && !p.isDistinct { 185 panic("write-only batch") 186 } 187 if p.distinctOpen { 188 panic("distinct batch open") 189 } 190 191 if opts.MinTimestampHint != (hlc.Timestamp{}) { 192 // Iterators that specify timestamp bounds cannot be cached. 193 return newPebbleIterator(p.batch, opts) 194 } 195 196 iter := &p.normalIter 197 if opts.Prefix { 198 iter = &p.prefixIter 199 } 200 if iter.inuse { 201 panic("iterator already in use") 202 } 203 204 if iter.iter != nil { 205 iter.setOptions(opts) 206 } else if p.batch.Indexed() { 207 iter.init(p.batch, opts) 208 } else { 209 iter.init(p.db, opts) 210 } 211 212 iter.inuse = true 213 return iter 214 } 215 216 // NewIterator implements the Batch interface. 217 func (p *pebbleBatch) ApplyBatchRepr(repr []byte, sync bool) error { 218 if p.distinctOpen { 219 panic("distinct batch open") 220 } 221 222 var batch pebble.Batch 223 if err := batch.SetRepr(repr); err != nil { 224 return err 225 } 226 227 return p.batch.Apply(&batch, nil) 228 } 229 230 // Clear implements the Batch interface. 231 func (p *pebbleBatch) Clear(key MVCCKey) error { 232 if p.distinctOpen { 233 panic("distinct batch open") 234 } 235 if len(key.Key) == 0 { 236 return emptyKeyError() 237 } 238 239 p.buf = EncodeKeyToBuf(p.buf[:0], key) 240 return p.batch.Delete(p.buf, nil) 241 } 242 243 // SingleClear implements the Batch interface. 244 func (p *pebbleBatch) SingleClear(key MVCCKey) error { 245 if p.distinctOpen { 246 panic("distinct batch open") 247 } 248 if len(key.Key) == 0 { 249 return emptyKeyError() 250 } 251 252 p.buf = EncodeKeyToBuf(p.buf[:0], key) 253 return p.batch.SingleDelete(p.buf, nil) 254 } 255 256 // ClearRange implements the Batch interface. 257 func (p *pebbleBatch) ClearRange(start, end MVCCKey) error { 258 if p.distinctOpen { 259 panic("distinct batch open") 260 } 261 262 p.buf = EncodeKeyToBuf(p.buf[:0], start) 263 buf2 := EncodeKey(end) 264 return p.batch.DeleteRange(p.buf, buf2, nil) 265 } 266 267 // Clear implements the Batch interface. 268 func (p *pebbleBatch) ClearIterRange(iter Iterator, start, end roachpb.Key) error { 269 if p.distinctOpen { 270 panic("distinct batch open") 271 } 272 273 type unsafeRawKeyGetter interface{ unsafeRawKey() []byte } 274 // Note that this method has the side effect of modifying iter's bounds. 275 // Since all calls to `ClearIterRange` are on new throwaway iterators with no 276 // lower bounds, calling SetUpperBound should be sufficient and safe. 277 // Furthermore, the start and end keys are always metadata keys (i.e. 278 // have zero timestamps), so we can ignore the bounds' MVCC timestamps. 279 iter.SetUpperBound(end) 280 iter.SeekGE(MakeMVCCMetadataKey(start)) 281 282 for ; ; iter.Next() { 283 valid, err := iter.Valid() 284 if err != nil { 285 return err 286 } else if !valid { 287 break 288 } 289 290 err = p.batch.Delete(iter.(unsafeRawKeyGetter).unsafeRawKey(), nil) 291 if err != nil { 292 return err 293 } 294 } 295 return nil 296 } 297 298 // Merge implements the Batch interface. 299 func (p *pebbleBatch) Merge(key MVCCKey, value []byte) error { 300 if p.distinctOpen { 301 panic("distinct batch open") 302 } 303 if len(key.Key) == 0 { 304 return emptyKeyError() 305 } 306 307 p.buf = EncodeKeyToBuf(p.buf[:0], key) 308 return p.batch.Merge(p.buf, value, nil) 309 } 310 311 // Put implements the Batch interface. 312 func (p *pebbleBatch) Put(key MVCCKey, value []byte) error { 313 if p.distinctOpen { 314 panic("distinct batch open") 315 } 316 if len(key.Key) == 0 { 317 return emptyKeyError() 318 } 319 320 p.buf = EncodeKeyToBuf(p.buf[:0], key) 321 return p.batch.Set(p.buf, value, nil) 322 } 323 324 // LogData implements the Batch interface. 325 func (p *pebbleBatch) LogData(data []byte) error { 326 return p.batch.LogData(data, nil) 327 } 328 329 func (p *pebbleBatch) LogLogicalOp(op MVCCLogicalOpType, details MVCCLogicalOpDetails) { 330 // No-op. 331 } 332 333 // Commit implements the Batch interface. 334 func (p *pebbleBatch) Commit(sync bool) error { 335 opts := pebble.NoSync 336 if sync { 337 opts = pebble.Sync 338 } 339 if p.batch == nil { 340 panic("called with nil batch") 341 } 342 err := p.batch.Commit(opts) 343 if err != nil { 344 panic(err) 345 } 346 return err 347 } 348 349 // Distinct implements the Batch interface. 350 func (p *pebbleBatch) Distinct() ReadWriter { 351 if p.distinctOpen { 352 panic("distinct batch already open") 353 } 354 // Distinct batches are regular batches with isDistinct set to true. The 355 // parent batch is stored in parentBatch, and all writes on it are disallowed 356 // while the distinct batch is open. Both the distinct batch and the parent 357 // batch share the same underlying pebble.Batch instance. 358 // 359 // The need for distinct batches is distinctly less in Pebble than 360 // RocksDB. In RocksDB, a distinct batch allows reading from a batch without 361 // flushing the buffered writes which is a significant performance 362 // optimization. In Pebble we're still using the same underlying batch and if 363 // it is indexed we'll still be indexing it as we Go. 364 p.distinctOpen = true 365 d := newPebbleBatch(p.db, p.batch) 366 d.parentBatch = p 367 d.isDistinct = true 368 return d 369 } 370 371 // Empty implements the Batch interface. 372 func (p *pebbleBatch) Empty() bool { 373 return p.batch.Count() == 0 374 } 375 376 // Len implements the Batch interface. 377 func (p *pebbleBatch) Len() int { 378 return len(p.batch.Repr()) 379 } 380 381 // Repr implements the Batch interface. 382 func (p *pebbleBatch) Repr() []byte { 383 // Repr expects a "safe" byte slice as its output. The return value of 384 // p.batch.Repr() is an unsafe byte slice owned by p.batch. Since we could be 385 // sending this slice over the wire, we need to make a copy. 386 repr := p.batch.Repr() 387 reprCopy := make([]byte, len(repr)) 388 copy(reprCopy, repr) 389 return reprCopy 390 }