github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/keyspan/span.go (about)

     1  // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package keyspan // import "github.com/cockroachdb/pebble/internal/keyspan"
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  	"unicode"
    14  
    15  	"github.com/cockroachdb/pebble/internal/base"
    16  )
    17  
    18  // Span represents a set of keys over a span of user key space. All of the keys
    19  // within a Span are applied across the span's key span indicated by Start and
    20  // End. Each internal key applied over the user key span appears as a separate
    21  // Key, with its own kind and sequence number. Optionally, each Key may also
    22  // have a Suffix and/or Value.
    23  //
    24  // Note that the start user key is inclusive and the end user key is exclusive.
    25  //
    26  // Currently the only supported key kinds are:
    27  //
    28  //	RANGEDEL, RANGEKEYSET, RANGEKEYUNSET, RANGEKEYDEL.
    29  type Span struct {
    30  	// Start and End encode the user key range of all the contained items, with
    31  	// an inclusive start key and exclusive end key. Both Start and End must be
    32  	// non-nil, or both nil if representing an invalid Span.
    33  	Start, End []byte
    34  	// Keys holds the set of keys applied over the [Start, End) user key range.
    35  	// Keys is sorted by (SeqNum, Kind) descending, unless otherwise specified
    36  	// by the context. If SeqNum and Kind are equal, the order of Keys is
    37  	// undefined. Keys may be empty, even if Start and End are non-nil.
    38  	//
    39  	// Keys are a decoded representation of the internal keys stored in batches
    40  	// or sstable blocks. A single internal key in a range key block may produce
    41  	// several decoded Keys.
    42  	Keys      []Key
    43  	KeysOrder KeysOrder
    44  }
    45  
    46  // KeysOrder describes the ordering of Keys within a Span.
    47  type KeysOrder int8
    48  
    49  const (
    50  	// ByTrailerDesc indicates a Span's keys are sorted by Trailer descending.
    51  	// This is the default ordering, and the ordering used during physical
    52  	// storage.
    53  	ByTrailerDesc KeysOrder = iota
    54  	// BySuffixAsc indicates a Span's keys are sorted by Suffix ascending. This
    55  	// ordering is used during user iteration of range keys.
    56  	BySuffixAsc
    57  )
    58  
    59  // Key represents a single key applied over a span of user keys. A Key is
    60  // contained by a Span which specifies the span of user keys over which the Key
    61  // is applied.
    62  type Key struct {
    63  	// Trailer contains the key kind and sequence number.
    64  	Trailer uint64
    65  	// Suffix holds an optional suffix associated with the key. This is only
    66  	// non-nil for RANGEKEYSET and RANGEKEYUNSET keys.
    67  	Suffix []byte
    68  	// Value holds a logical value associated with the Key. It is NOT the
    69  	// internal value stored in a range key or range deletion block.  This is
    70  	// only non-nil for RANGEKEYSET keys.
    71  	Value []byte
    72  }
    73  
    74  // SeqNum returns the sequence number component of the key.
    75  func (k Key) SeqNum() uint64 {
    76  	return k.Trailer >> 8
    77  }
    78  
    79  // VisibleAt returns true if the provided key is visible at the provided
    80  // snapshot sequence number. It interprets batch sequence numbers as always
    81  // visible, because non-visible batch span keys are filtered when they're
    82  // fragmented.
    83  func (k Key) VisibleAt(snapshot uint64) bool {
    84  	seq := k.SeqNum()
    85  	return seq < snapshot || seq&base.InternalKeySeqNumBatch != 0
    86  }
    87  
    88  // Kind returns the kind component of the key.
    89  func (k Key) Kind() base.InternalKeyKind {
    90  	return base.InternalKeyKind(k.Trailer & 0xff)
    91  }
    92  
    93  // Equal returns true if this Key is equal to the given key. Two keys are said
    94  // to be equal if the two Keys have equal trailers, suffix and value. Suffix
    95  // comparison uses the provided base.Compare func. Value comparison is bytewise.
    96  func (k Key) Equal(equal base.Equal, b Key) bool {
    97  	return k.Trailer == b.Trailer &&
    98  		equal(k.Suffix, b.Suffix) &&
    99  		bytes.Equal(k.Value, b.Value)
   100  }
   101  
   102  // Valid returns true if the span is defined.
   103  func (s *Span) Valid() bool {
   104  	return s.Start != nil && s.End != nil
   105  }
   106  
   107  // Empty returns true if the span does not contain any keys. An empty span may
   108  // still be Valid. A non-empty span must be Valid.
   109  //
   110  // An Empty span may be produced by Visible, or be produced by iterators in
   111  // order to surface the gaps between keys.
   112  func (s *Span) Empty() bool {
   113  	return s == nil || len(s.Keys) == 0
   114  }
   115  
   116  // SmallestKey returns the smallest internal key defined by the span's keys.
   117  // It requires the Span's keys be in ByTrailerDesc order. It panics if the span
   118  // contains no keys or its keys are sorted in a different order.
   119  func (s *Span) SmallestKey() base.InternalKey {
   120  	if len(s.Keys) == 0 {
   121  		panic("pebble: Span contains no keys")
   122  	} else if s.KeysOrder != ByTrailerDesc {
   123  		panic("pebble: span's keys unexpectedly not in trailer order")
   124  	}
   125  	// The first key has the highest (sequence number,kind) tuple.
   126  	return base.InternalKey{
   127  		UserKey: s.Start,
   128  		Trailer: s.Keys[0].Trailer,
   129  	}
   130  }
   131  
   132  // LargestKey returns the largest internal key defined by the span's keys. The
   133  // returned key will always be a "sentinel key" at the end boundary. The
   134  // "sentinel key" models the exclusive end boundary by returning an InternalKey
   135  // with the maximal sequence number, ensuring all InternalKeys with the same
   136  // user key sort after the sentinel key.
   137  //
   138  // It requires the Span's keys be in ByTrailerDesc order. It panics if the span
   139  // contains no keys or its keys are sorted in a different order.
   140  func (s *Span) LargestKey() base.InternalKey {
   141  	if len(s.Keys) == 0 {
   142  		panic("pebble: Span contains no keys")
   143  	} else if s.KeysOrder != ByTrailerDesc {
   144  		panic("pebble: span's keys unexpectedly not in trailer order")
   145  	}
   146  	// The last key has the lowest (sequence number,kind) tuple.
   147  	kind := s.Keys[len(s.Keys)-1].Kind()
   148  	return base.MakeExclusiveSentinelKey(kind, s.End)
   149  }
   150  
   151  // SmallestSeqNum returns the smallest sequence number of a key contained within
   152  // the span. It requires the Span's keys be in ByTrailerDesc order. It panics if
   153  // the span contains no keys or its keys are sorted in a different order.
   154  func (s *Span) SmallestSeqNum() uint64 {
   155  	if len(s.Keys) == 0 {
   156  		panic("pebble: Span contains no keys")
   157  	} else if s.KeysOrder != ByTrailerDesc {
   158  		panic("pebble: span's keys unexpectedly not in trailer order")
   159  	}
   160  
   161  	return s.Keys[len(s.Keys)-1].SeqNum()
   162  }
   163  
   164  // LargestSeqNum returns the largest sequence number of a key contained within
   165  // the span. It requires the Span's keys be in ByTrailerDesc order. It panics if
   166  // the span contains no keys or its keys are sorted in a different order.
   167  func (s *Span) LargestSeqNum() uint64 {
   168  	if len(s.Keys) == 0 {
   169  		panic("pebble: Span contains no keys")
   170  	} else if s.KeysOrder != ByTrailerDesc {
   171  		panic("pebble: span's keys unexpectedly not in trailer order")
   172  	}
   173  	return s.Keys[0].SeqNum()
   174  }
   175  
   176  // TODO(jackson): Replace most of the calls to Visible with more targeted calls
   177  // that avoid the need to construct a new Span.
   178  
   179  // Visible returns a span with the subset of keys visible at the provided
   180  // sequence number. It requires the Span's keys be in ByTrailerDesc order. It
   181  // panics if the span's keys are sorted in a different order.
   182  //
   183  // Visible may incur an allocation, so callers should prefer targeted,
   184  // non-allocating methods when possible.
   185  func (s Span) Visible(snapshot uint64) Span {
   186  	if s.KeysOrder != ByTrailerDesc {
   187  		panic("pebble: span's keys unexpectedly not in trailer order")
   188  	}
   189  
   190  	ret := Span{Start: s.Start, End: s.End}
   191  	if len(s.Keys) == 0 {
   192  		return ret
   193  	}
   194  
   195  	// Keys from indexed batches may force an allocation. The Keys slice is
   196  	// ordered by sequence number, so ordinarily we can return the trailing
   197  	// subslice containing keys with sequence numbers less than `seqNum`.
   198  	//
   199  	// However, batch keys are special. Only visible batch keys are included
   200  	// when an Iterator's batch spans are fragmented. They must always be
   201  	// visible.
   202  	//
   203  	// Batch keys can create a sandwich of visible batch keys at the beginning
   204  	// of the slice and visible committed keys at the end of the slice, forcing
   205  	// us to allocate a new slice and copy the contents.
   206  	//
   207  	// Care is taking to only incur an allocation only when batch keys and
   208  	// visible keys actually sandwich non-visible keys.
   209  
   210  	// lastBatchIdx and lastNonVisibleIdx are set to the last index of a batch
   211  	// key and a non-visible key respectively.
   212  	lastBatchIdx := -1
   213  	lastNonVisibleIdx := -1
   214  	for i := range s.Keys {
   215  		if seqNum := s.Keys[i].SeqNum(); seqNum&base.InternalKeySeqNumBatch != 0 {
   216  			// Batch key. Always visible.
   217  			lastBatchIdx = i
   218  		} else if seqNum >= snapshot {
   219  			// This key is not visible.
   220  			lastNonVisibleIdx = i
   221  		}
   222  	}
   223  
   224  	// In the following comments: b = batch, h = hidden, v = visible (committed).
   225  	switch {
   226  	case lastNonVisibleIdx == -1:
   227  		// All keys are visible.
   228  		//
   229  		// [b b b], [v v v] and [b b b v v v]
   230  		ret.Keys = s.Keys
   231  	case lastBatchIdx == -1:
   232  		// There are no batch keys, so we can return the continuous subslice
   233  		// starting after the last non-visible Key.
   234  		//
   235  		// h h h [v v v]
   236  		ret.Keys = s.Keys[lastNonVisibleIdx+1:]
   237  	case lastNonVisibleIdx == len(s.Keys)-1:
   238  		// While we have a batch key and non-visible keys, there are no
   239  		// committed visible keys. The 'sandwich' is missing the bottom layer,
   240  		// so we can return the continuous sublice at the beginning.
   241  		//
   242  		// [b b b] h h h
   243  		ret.Keys = s.Keys[0 : lastBatchIdx+1]
   244  	default:
   245  		// This is the problematic sandwich case. Allocate a new slice, copying
   246  		// the batch keys and the visible keys into it.
   247  		//
   248  		// [b b b] h h h [v v v]
   249  		ret.Keys = make([]Key, (lastBatchIdx+1)+(len(s.Keys)-lastNonVisibleIdx-1))
   250  		copy(ret.Keys, s.Keys[:lastBatchIdx+1])
   251  		copy(ret.Keys[lastBatchIdx+1:], s.Keys[lastNonVisibleIdx+1:])
   252  	}
   253  	return ret
   254  }
   255  
   256  // VisibleAt returns true if the span contains a key visible at the provided
   257  // snapshot. Keys with sequence numbers with the batch bit set are treated as
   258  // always visible.
   259  //
   260  // VisibleAt requires the Span's keys be in ByTrailerDesc order. It panics if
   261  // the span's keys are sorted in a different order.
   262  func (s *Span) VisibleAt(snapshot uint64) bool {
   263  	if s.KeysOrder != ByTrailerDesc {
   264  		panic("pebble: span's keys unexpectedly not in trailer order")
   265  	}
   266  	if len(s.Keys) == 0 {
   267  		return false
   268  	} else if first := s.Keys[0].SeqNum(); first&base.InternalKeySeqNumBatch != 0 {
   269  		// Only visible batch keys are included when an Iterator's batch spans
   270  		// are fragmented. They must always be visible.
   271  		return true
   272  	} else {
   273  		// Otherwise we check the last key. Since keys are ordered decreasing in
   274  		// sequence number, the last key has the lowest sequence number of any
   275  		// of the span's keys. If any of the keys are visible, the last key must
   276  		// be visible. Or put differently: if the last key is not visible, then
   277  		// no key is visible.
   278  		return s.Keys[len(s.Keys)-1].SeqNum() < snapshot
   279  	}
   280  }
   281  
   282  // ShallowClone returns the span with a Keys slice owned by the span itself.
   283  // None of the key byte slices are cloned (see Span.DeepClone).
   284  func (s *Span) ShallowClone() Span {
   285  	c := Span{
   286  		Start:     s.Start,
   287  		End:       s.End,
   288  		Keys:      make([]Key, len(s.Keys)),
   289  		KeysOrder: s.KeysOrder,
   290  	}
   291  	copy(c.Keys, s.Keys)
   292  	return c
   293  }
   294  
   295  // DeepClone clones the span, creating copies of all contained slices. DeepClone
   296  // is intended for non-production code paths like tests, the level checker, etc
   297  // because it is allocation heavy.
   298  func (s *Span) DeepClone() Span {
   299  	c := Span{
   300  		Start:     make([]byte, len(s.Start)),
   301  		End:       make([]byte, len(s.End)),
   302  		Keys:      make([]Key, len(s.Keys)),
   303  		KeysOrder: s.KeysOrder,
   304  	}
   305  	copy(c.Start, s.Start)
   306  	copy(c.End, s.End)
   307  	for i := range s.Keys {
   308  		c.Keys[i].Trailer = s.Keys[i].Trailer
   309  		if len(s.Keys[i].Suffix) > 0 {
   310  			c.Keys[i].Suffix = make([]byte, len(s.Keys[i].Suffix))
   311  			copy(c.Keys[i].Suffix, s.Keys[i].Suffix)
   312  		}
   313  		if len(s.Keys[i].Value) > 0 {
   314  			c.Keys[i].Value = make([]byte, len(s.Keys[i].Value))
   315  			copy(c.Keys[i].Value, s.Keys[i].Value)
   316  		}
   317  	}
   318  	return c
   319  }
   320  
   321  // Contains returns true if the specified key resides within the span's bounds.
   322  func (s *Span) Contains(cmp base.Compare, key []byte) bool {
   323  	return cmp(s.Start, key) <= 0 && cmp(key, s.End) < 0
   324  }
   325  
   326  // Covers returns true if the span covers keys at seqNum.
   327  //
   328  // Covers requires the Span's keys be in ByTrailerDesc order. It panics if the
   329  // span's keys are sorted in a different order.
   330  func (s Span) Covers(seqNum uint64) bool {
   331  	if s.KeysOrder != ByTrailerDesc {
   332  		panic("pebble: span's keys unexpectedly not in trailer order")
   333  	}
   334  	return !s.Empty() && s.Keys[0].SeqNum() > seqNum
   335  }
   336  
   337  // CoversAt returns true if the span contains a key that is visible at the
   338  // provided snapshot sequence number, and that key's sequence number is higher
   339  // than seqNum.
   340  //
   341  // Keys with sequence numbers with the batch bit set are treated as always
   342  // visible.
   343  //
   344  // CoversAt requires the Span's keys be in ByTrailerDesc order. It panics if the
   345  // span's keys are sorted in a different order.
   346  func (s *Span) CoversAt(snapshot, seqNum uint64) bool {
   347  	if s.KeysOrder != ByTrailerDesc {
   348  		panic("pebble: span's keys unexpectedly not in trailer order")
   349  	}
   350  	// NB: A key is visible at `snapshot` if its sequence number is strictly
   351  	// less than `snapshot`. See base.Visible.
   352  	for i := range s.Keys {
   353  		if kseq := s.Keys[i].SeqNum(); kseq&base.InternalKeySeqNumBatch != 0 {
   354  			// Only visible batch keys are included when an Iterator's batch spans
   355  			// are fragmented. They must always be visible.
   356  			return kseq > seqNum
   357  		} else if kseq < snapshot {
   358  			return kseq > seqNum
   359  		}
   360  	}
   361  	return false
   362  }
   363  
   364  // String returns a string representation of the span.
   365  func (s Span) String() string {
   366  	return fmt.Sprint(prettySpan{Span: s, formatKey: base.DefaultFormatter})
   367  }
   368  
   369  // Pretty returns a formatter for the span.
   370  func (s Span) Pretty(f base.FormatKey) fmt.Formatter {
   371  	// TODO(jackson): Take a base.FormatValue to format Key.Value too.
   372  	return prettySpan{s, f}
   373  }
   374  
   375  type prettySpan struct {
   376  	Span
   377  	formatKey base.FormatKey
   378  }
   379  
   380  func (s prettySpan) Format(fs fmt.State, c rune) {
   381  	if !s.Valid() {
   382  		fmt.Fprintf(fs, "<invalid>")
   383  		return
   384  	}
   385  	fmt.Fprintf(fs, "%s-%s:{", s.formatKey(s.Start), s.formatKey(s.End))
   386  	for i, k := range s.Keys {
   387  		if i > 0 {
   388  			fmt.Fprint(fs, " ")
   389  		}
   390  		fmt.Fprintf(fs, "(#%d,%s", k.SeqNum(), k.Kind())
   391  		if len(k.Suffix) > 0 || len(k.Value) > 0 {
   392  			fmt.Fprintf(fs, ",%s", k.Suffix)
   393  		}
   394  		if len(k.Value) > 0 {
   395  			fmt.Fprintf(fs, ",%s", k.Value)
   396  		}
   397  		fmt.Fprint(fs, ")")
   398  	}
   399  	fmt.Fprintf(fs, "}")
   400  }
   401  
   402  // SortKeysByTrailer sorts a keys slice by trailer.
   403  func SortKeysByTrailer(keys *[]Key) {
   404  	// NB: keys is a pointer to a slice instead of a slice to avoid `sorted`
   405  	// escaping to the heap.
   406  	sorted := (*keysBySeqNumKind)(keys)
   407  	sort.Sort(sorted)
   408  }
   409  
   410  // KeysBySuffix implements sort.Interface, sorting its member Keys slice to by
   411  // Suffix in the order dictated by Cmp.
   412  type KeysBySuffix struct {
   413  	Cmp  base.Compare
   414  	Keys []Key
   415  }
   416  
   417  func (s *KeysBySuffix) Len() int           { return len(s.Keys) }
   418  func (s *KeysBySuffix) Less(i, j int) bool { return s.Cmp(s.Keys[i].Suffix, s.Keys[j].Suffix) < 0 }
   419  func (s *KeysBySuffix) Swap(i, j int)      { s.Keys[i], s.Keys[j] = s.Keys[j], s.Keys[i] }
   420  
   421  // ParseSpan parses the string representation of a Span. It's intended for
   422  // tests. ParseSpan panics if passed a malformed span representation.
   423  func ParseSpan(input string) Span {
   424  	var s Span
   425  	parts := strings.FieldsFunc(input, func(r rune) bool {
   426  		switch r {
   427  		case '-', ':', '{', '}':
   428  			return true
   429  		default:
   430  			return unicode.IsSpace(r)
   431  		}
   432  	})
   433  	s.Start, s.End = []byte(parts[0]), []byte(parts[1])
   434  
   435  	// Each of the remaining parts represents a single Key.
   436  	s.Keys = make([]Key, 0, len(parts)-2)
   437  	for _, p := range parts[2:] {
   438  		keyFields := strings.FieldsFunc(p, func(r rune) bool {
   439  			switch r {
   440  			case '#', ',', '(', ')':
   441  				return true
   442  			default:
   443  				return unicode.IsSpace(r)
   444  			}
   445  		})
   446  
   447  		var k Key
   448  		// Parse the sequence number.
   449  		seqNum, err := strconv.ParseUint(keyFields[0], 10, 64)
   450  		if err != nil {
   451  			panic(fmt.Sprintf("invalid sequence number: %q: %s", keyFields[0], err))
   452  		}
   453  		// Parse the key kind.
   454  		kind := base.ParseKind(keyFields[1])
   455  		k.Trailer = base.MakeTrailer(seqNum, kind)
   456  		// Parse the optional suffix.
   457  		if len(keyFields) >= 3 {
   458  			k.Suffix = []byte(keyFields[2])
   459  		}
   460  		// Parse the optional value.
   461  		if len(keyFields) >= 4 {
   462  			k.Value = []byte(keyFields[3])
   463  		}
   464  		s.Keys = append(s.Keys, k)
   465  	}
   466  	return s
   467  }