github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/bulk/kv_buf.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package bulk
    12  
    13  import (
    14  	"bytes"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    17  	"github.com/cockroachdb/errors"
    18  )
    19  
    20  // kvBuf collects []byte key-value pairs in a sortable buffer.
    21  //
    22  // the actual content is stored in a single large slab, instead of individual
    23  // key and value byte slices, reducing the slice header overhead from 48b/pair
    24  // to 16b/pair. The total buffer size cannot be more than 32gb and no one key
    25  // or value may be larger than 512mb.
    26  type kvBuf struct {
    27  	entries []kvBufEntry
    28  	slab    []byte
    29  	MemSize int // size of buffered data including per-entry overhead
    30  }
    31  
    32  // each entry in the buffer has a key and value -- the actual bytes of these are
    33  // stored in the large slab, so the entry only records the offset and length in
    34  // the slab, packing these together into a uint64 for each. The length is stored
    35  // in the lower `lenBits` and the offset in the higher `64-lenBits`.
    36  type kvBufEntry struct {
    37  	keySpan uint64
    38  	valSpan uint64
    39  }
    40  
    41  // entryOverhead is the slice header overhead per KV pair
    42  const entryOverhead = 16
    43  
    44  const (
    45  	lenBits, lenMask  = 28, 1<<lenBits - 1 // 512mb item limit, 32gb buffer limit.
    46  	maxLen, maxOffset = lenMask, 1<<(64-lenBits) - 1
    47  )
    48  
    49  func (b *kvBuf) append(k, v []byte) error {
    50  	if len(b.slab) > maxOffset {
    51  		return errors.Errorf("buffer size %d exceeds limit %d", len(b.slab), maxOffset)
    52  	}
    53  	if len(k) > maxLen {
    54  		return errors.Errorf("length %d exceeds limit %d", len(k), maxLen)
    55  	}
    56  	if len(v) > maxLen {
    57  		return errors.Errorf("length %d exceeds limit %d", len(v), maxLen)
    58  	}
    59  
    60  	b.MemSize += len(k) + len(v) + entryOverhead
    61  	var e kvBufEntry
    62  	e.keySpan = uint64(len(b.slab)<<lenBits) | uint64(len(k)&lenMask)
    63  	b.slab = append(b.slab, k...)
    64  	e.valSpan = uint64(len(b.slab)<<lenBits) | uint64(len(v)&lenMask)
    65  	b.slab = append(b.slab, v...)
    66  
    67  	b.entries = append(b.entries, e)
    68  	return nil
    69  }
    70  
    71  func (b *kvBuf) read(span uint64) []byte {
    72  	length := span & lenMask
    73  	if length == 0 {
    74  		return nil
    75  	}
    76  	offset := span >> lenBits
    77  	return b.slab[offset : offset+length]
    78  }
    79  
    80  func (b *kvBuf) Key(i int) roachpb.Key {
    81  	return b.read(b.entries[i].keySpan)
    82  }
    83  
    84  func (b *kvBuf) Value(i int) []byte {
    85  	return b.read(b.entries[i].valSpan)
    86  }
    87  
    88  // Len implements sort.Interface.
    89  func (b *kvBuf) Len() int {
    90  	return len(b.entries)
    91  }
    92  
    93  // Less implements sort.Interface.
    94  func (b *kvBuf) Less(i, j int) bool {
    95  
    96  	return bytes.Compare(b.read(b.entries[i].keySpan), b.read(b.entries[j].keySpan)) < 0
    97  }
    98  
    99  // Swap implements sort.Interface.
   100  func (b *kvBuf) Swap(i, j int) {
   101  	b.entries[i], b.entries[j] = b.entries[j], b.entries[i]
   102  }
   103  
   104  func (b *kvBuf) Reset() {
   105  	b.slab = b.slab[:0]
   106  	b.entries = b.entries[:0]
   107  	b.MemSize = 0
   108  }