github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/internal/base/comparer.go (about)

     1  // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package base
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"strconv"
    12  	"unicode/utf8"
    13  )
    14  
    15  // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal
    16  // to' or 'greater than' b. The two arguments can only be 'equal' if their
    17  // contents are exactly equal. Furthermore, the empty slice must be 'less than'
    18  // any non-empty slice. Compare is used to compare user keys, such as those
    19  // passed as arguments to the various DB methods, as well as those returned
    20  // from Separator, Successor, and Split.
    21  type Compare func(a, b []byte) int
    22  
    23  // Equal returns true if a and b are equivalent. For a given Compare,
    24  // Equal(a,b) must return true iff Compare(a,b) returns zero, that is,
    25  // Equal is a (potentially faster) specialization of Compare.
    26  type Equal func(a, b []byte) bool
    27  
    28  // AbbreviatedKey returns a fixed length prefix of a user key such that AbbreviatedKey(a)
    29  // < AbbreviatedKey(b) iff a < b and AbbreviatedKey(a) > AbbreviatedKey(b) iff a > b. If
    30  // AbbreviatedKey(a) == AbbreviatedKey(b) an additional comparison is required to
    31  // determine if the two keys are actually equal.
    32  //
    33  // This helps optimize indexed batch comparisons for cache locality. If a Split
    34  // function is specified, AbbreviatedKey usually returns the first eight bytes
    35  // of the user key prefix in the order that gives the correct ordering.
    36  type AbbreviatedKey func(key []byte) uint64
    37  
    38  // FormatKey returns a formatter for the user key.
    39  type FormatKey func(key []byte) fmt.Formatter
    40  
    41  // FormatValue returns a formatter for the user value. The key is also
    42  // specified for the value formatter in order to support value formatting that
    43  // is dependent on the key.
    44  type FormatValue func(key, value []byte) fmt.Formatter
    45  
    46  // Separator is used to construct SSTable index blocks. A trivial implementation
    47  // is `return a`, but appending fewer bytes leads to smaller SSTables.
    48  //
    49  // Given keys a, b for which Compare(a, b) < 0, Separator returns a key k such
    50  // that:
    51  //
    52  // 1. Compare(a, k) <= 0, and
    53  // 2. Compare(k, b) < 0.
    54  //
    55  // As a special case, b may be nil in which case the second condition is dropped.
    56  //
    57  // For example, if dst, a and b are the []byte equivalents of the strings
    58  // "aqua", "black" and "blue", then the result may be "aquablb".
    59  // Similarly, if the arguments were "aqua", "green" and "", then the result
    60  // may be "aquah".
    61  type Separator func(dst, a, b []byte) []byte
    62  
    63  // Successor returns a shortened key given a key a, such that Compare(k, a) >=
    64  // 0. A simple implementation may return a unchanged. The dst parameter may be
    65  // used to store the returned key, though it is valid to pass nil. The returned
    66  // key must be valid to pass to Compare.
    67  type Successor func(dst, a []byte) []byte
    68  
    69  // ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and
    70  // returns the smallest key that is larger than the given prefix a.
    71  // ImmediateSuccessor must return a prefix key k such that:
    72  //
    73  //	Split(k) == len(k) and Compare(k, a) > 0
    74  //
    75  // and there exists no representable k2 such that:
    76  //
    77  //	Split(k2) == len(k2) and Compare(k2, a) > 0 and Compare(k2, k) < 0
    78  //
    79  // As an example, an implementation built on the natural byte ordering using
    80  // bytes.Compare could append a `\0` to `a`.
    81  //
    82  // The dst parameter may be used to store the returned key, though it is valid
    83  // to pass nil. The returned key must be valid to pass to Compare.
    84  type ImmediateSuccessor func(dst, a []byte) []byte
    85  
    86  // Split returns the length of the prefix of the user key that corresponds to
    87  // the key portion of an MVCC encoding scheme to enable the use of prefix bloom
    88  // filters.
    89  //
    90  // The method will only ever be called with valid MVCC keys, that is, keys that
    91  // the user could potentially store in the database. Pebble does not know which
    92  // keys are MVCC keys and which are not, and may call Split on both MVCC keys
    93  // and non-MVCC keys.
    94  //
    95  // A trivial MVCC scheme is one in which Split() returns len(a). This
    96  // corresponds to assigning a constant version to each key in the database. For
    97  // performance reasons, it is preferable to use a `nil` split in this case.
    98  //
    99  // The returned prefix must have the following properties:
   100  //
   101  //  1. The prefix must be a byte prefix:
   102  //
   103  //     bytes.HasPrefix(a, prefix(a))
   104  //
   105  //  2. A key consisting of just a prefix must sort before all other keys with
   106  //     that prefix:
   107  //
   108  //     Compare(prefix(a), a) < 0 if len(suffix(a)) > 0
   109  //
   110  //  3. Prefixes must be used to order keys before suffixes:
   111  //
   112  //     If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0
   113  //
   114  //  4. Suffixes themselves must be valid keys and comparable, respecting the same
   115  //     ordering as within a key.
   116  //
   117  //     If Compare(prefix(a), prefix(b)) == 0, then Compare(suffix(a), suffix(b)) == Compare(a, b)
   118  type Split func(a []byte) int
   119  
   120  // Comparer defines a total ordering over the space of []byte keys: a 'less
   121  // than' relationship.
   122  type Comparer struct {
   123  	Compare            Compare
   124  	Equal              Equal
   125  	AbbreviatedKey     AbbreviatedKey
   126  	FormatKey          FormatKey
   127  	FormatValue        FormatValue
   128  	Separator          Separator
   129  	Split              Split
   130  	Successor          Successor
   131  	ImmediateSuccessor ImmediateSuccessor
   132  
   133  	// Name is the name of the comparer.
   134  	//
   135  	// The Level-DB on-disk format stores the comparer name, and opening a
   136  	// database with a different comparer from the one it was created with
   137  	// will result in an error.
   138  	Name string
   139  }
   140  
   141  // DefaultFormatter is the default implementation of user key formatting:
   142  // non-ASCII data is formatted as escaped hexadecimal values.
   143  var DefaultFormatter = func(key []byte) fmt.Formatter {
   144  	return FormatBytes(key)
   145  }
   146  
   147  // DefaultComparer is the default implementation of the Comparer interface.
   148  // It uses the natural ordering, consistent with bytes.Compare.
   149  var DefaultComparer = &Comparer{
   150  	Compare: bytes.Compare,
   151  	Equal:   bytes.Equal,
   152  
   153  	AbbreviatedKey: func(key []byte) uint64 {
   154  		if len(key) >= 8 {
   155  			return binary.BigEndian.Uint64(key)
   156  		}
   157  		var v uint64
   158  		for _, b := range key {
   159  			v <<= 8
   160  			v |= uint64(b)
   161  		}
   162  		return v << uint(8*(8-len(key)))
   163  	},
   164  
   165  	FormatKey: DefaultFormatter,
   166  
   167  	Separator: func(dst, a, b []byte) []byte {
   168  		i, n := SharedPrefixLen(a, b), len(dst)
   169  		dst = append(dst, a...)
   170  
   171  		min := len(a)
   172  		if min > len(b) {
   173  			min = len(b)
   174  		}
   175  		if i >= min {
   176  			// Do not shorten if one string is a prefix of the other.
   177  			return dst
   178  		}
   179  
   180  		if a[i] >= b[i] {
   181  			// b is smaller than a or a is already the shortest possible.
   182  			return dst
   183  		}
   184  
   185  		if i < len(b)-1 || a[i]+1 < b[i] {
   186  			i += n
   187  			dst[i]++
   188  			return dst[:i+1]
   189  		}
   190  
   191  		i += n + 1
   192  		for ; i < len(dst); i++ {
   193  			if dst[i] != 0xff {
   194  				dst[i]++
   195  				return dst[:i+1]
   196  			}
   197  		}
   198  		return dst
   199  	},
   200  
   201  	Successor: func(dst, a []byte) (ret []byte) {
   202  		for i := 0; i < len(a); i++ {
   203  			if a[i] != 0xff {
   204  				dst = append(dst, a[:i+1]...)
   205  				dst[len(dst)-1]++
   206  				return dst
   207  			}
   208  		}
   209  		// a is a run of 0xffs, leave it alone.
   210  		return append(dst, a...)
   211  	},
   212  
   213  	ImmediateSuccessor: func(dst, a []byte) (ret []byte) {
   214  		return append(append(dst, a...), 0x00)
   215  	},
   216  
   217  	// This name is part of the C++ Level-DB implementation's default file
   218  	// format, and should not be changed.
   219  	Name: "leveldb.BytewiseComparator",
   220  }
   221  
   222  // SharedPrefixLen returns the largest i such that a[:i] equals b[:i].
   223  // This function can be useful in implementing the Comparer interface.
   224  func SharedPrefixLen(a, b []byte) int {
   225  	i, n := 0, len(a)
   226  	if n > len(b) {
   227  		n = len(b)
   228  	}
   229  	asUint64 := func(c []byte, i int) uint64 {
   230  		return binary.LittleEndian.Uint64(c[i:])
   231  	}
   232  	for i < n-7 && asUint64(a, i) == asUint64(b, i) {
   233  		i += 8
   234  	}
   235  	for i < n && a[i] == b[i] {
   236  		i++
   237  	}
   238  	return i
   239  }
   240  
   241  // FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII
   242  // data.
   243  type FormatBytes []byte
   244  
   245  const lowerhex = "0123456789abcdef"
   246  
   247  // Format implements the fmt.Formatter interface.
   248  func (p FormatBytes) Format(s fmt.State, c rune) {
   249  	buf := make([]byte, 0, len(p))
   250  	for _, b := range p {
   251  		if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) {
   252  			buf = append(buf, b)
   253  			continue
   254  		}
   255  		buf = append(buf, `\x`...)
   256  		buf = append(buf, lowerhex[b>>4])
   257  		buf = append(buf, lowerhex[b&0xF])
   258  	}
   259  	s.Write(buf)
   260  }