github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/internal/base/comparer.go (about) 1 // Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package base 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "fmt" 11 "strconv" 12 "unicode/utf8" 13 ) 14 15 // Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal 16 // to' or 'greater than' b. The two arguments can only be 'equal' if their 17 // contents are exactly equal. Furthermore, the empty slice must be 'less than' 18 // any non-empty slice. Compare is used to compare user keys, such as those 19 // passed as arguments to the various DB methods, as well as those returned 20 // from Separator, Successor, and Split. 21 type Compare func(a, b []byte) int 22 23 // Equal returns true if a and b are equivalent. For a given Compare, 24 // Equal(a,b) must return true iff Compare(a,b) returns zero, that is, 25 // Equal is a (potentially faster) specialization of Compare. 26 type Equal func(a, b []byte) bool 27 28 // AbbreviatedKey returns a fixed length prefix of a user key such that AbbreviatedKey(a) 29 // < AbbreviatedKey(b) iff a < b and AbbreviatedKey(a) > AbbreviatedKey(b) iff a > b. If 30 // AbbreviatedKey(a) == AbbreviatedKey(b) an additional comparison is required to 31 // determine if the two keys are actually equal. 32 // 33 // This helps optimize indexed batch comparisons for cache locality. If a Split 34 // function is specified, AbbreviatedKey usually returns the first eight bytes 35 // of the user key prefix in the order that gives the correct ordering. 36 type AbbreviatedKey func(key []byte) uint64 37 38 // FormatKey returns a formatter for the user key. 39 type FormatKey func(key []byte) fmt.Formatter 40 41 // FormatValue returns a formatter for the user value. The key is also 42 // specified for the value formatter in order to support value formatting that 43 // is dependent on the key. 44 type FormatValue func(key, value []byte) fmt.Formatter 45 46 // Separator is used to construct SSTable index blocks. A trivial implementation 47 // is `return a`, but appending fewer bytes leads to smaller SSTables. 48 // 49 // Given keys a, b for which Compare(a, b) < 0, Separator returns a key k such 50 // that: 51 // 52 // 1. Compare(a, k) <= 0, and 53 // 2. Compare(k, b) < 0. 54 // 55 // As a special case, b may be nil in which case the second condition is dropped. 56 // 57 // For example, if dst, a and b are the []byte equivalents of the strings 58 // "aqua", "black" and "blue", then the result may be "aquablb". 59 // Similarly, if the arguments were "aqua", "green" and "", then the result 60 // may be "aquah". 61 type Separator func(dst, a, b []byte) []byte 62 63 // Successor returns a shortened key given a key a, such that Compare(k, a) >= 64 // 0. A simple implementation may return a unchanged. The dst parameter may be 65 // used to store the returned key, though it is valid to pass nil. The returned 66 // key must be valid to pass to Compare. 67 type Successor func(dst, a []byte) []byte 68 69 // ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and 70 // returns the smallest key that is larger than the given prefix a. 71 // ImmediateSuccessor must return a prefix key k such that: 72 // 73 // Split(k) == len(k) and Compare(k, a) > 0 74 // 75 // and there exists no representable k2 such that: 76 // 77 // Split(k2) == len(k2) and Compare(k2, a) > 0 and Compare(k2, k) < 0 78 // 79 // As an example, an implementation built on the natural byte ordering using 80 // bytes.Compare could append a `\0` to `a`. 81 // 82 // The dst parameter may be used to store the returned key, though it is valid 83 // to pass nil. The returned key must be valid to pass to Compare. 84 type ImmediateSuccessor func(dst, a []byte) []byte 85 86 // Split returns the length of the prefix of the user key that corresponds to 87 // the key portion of an MVCC encoding scheme to enable the use of prefix bloom 88 // filters. 89 // 90 // The method will only ever be called with valid MVCC keys, that is, keys that 91 // the user could potentially store in the database. Pebble does not know which 92 // keys are MVCC keys and which are not, and may call Split on both MVCC keys 93 // and non-MVCC keys. 94 // 95 // A trivial MVCC scheme is one in which Split() returns len(a). This 96 // corresponds to assigning a constant version to each key in the database. For 97 // performance reasons, it is preferable to use a `nil` split in this case. 98 // 99 // The returned prefix must have the following properties: 100 // 101 // 1. The prefix must be a byte prefix: 102 // 103 // bytes.HasPrefix(a, prefix(a)) 104 // 105 // 2. A key consisting of just a prefix must sort before all other keys with 106 // that prefix: 107 // 108 // Compare(prefix(a), a) < 0 if len(suffix(a)) > 0 109 // 110 // 3. Prefixes must be used to order keys before suffixes: 111 // 112 // If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0 113 // 114 // 4. Suffixes themselves must be valid keys and comparable, respecting the same 115 // ordering as within a key. 116 // 117 // If Compare(prefix(a), prefix(b)) == 0, then Compare(suffix(a), suffix(b)) == Compare(a, b) 118 type Split func(a []byte) int 119 120 // Comparer defines a total ordering over the space of []byte keys: a 'less 121 // than' relationship. 122 type Comparer struct { 123 Compare Compare 124 Equal Equal 125 AbbreviatedKey AbbreviatedKey 126 FormatKey FormatKey 127 FormatValue FormatValue 128 Separator Separator 129 Split Split 130 Successor Successor 131 ImmediateSuccessor ImmediateSuccessor 132 133 // Name is the name of the comparer. 134 // 135 // The Level-DB on-disk format stores the comparer name, and opening a 136 // database with a different comparer from the one it was created with 137 // will result in an error. 138 Name string 139 } 140 141 // DefaultFormatter is the default implementation of user key formatting: 142 // non-ASCII data is formatted as escaped hexadecimal values. 143 var DefaultFormatter = func(key []byte) fmt.Formatter { 144 return FormatBytes(key) 145 } 146 147 // DefaultComparer is the default implementation of the Comparer interface. 148 // It uses the natural ordering, consistent with bytes.Compare. 149 var DefaultComparer = &Comparer{ 150 Compare: bytes.Compare, 151 Equal: bytes.Equal, 152 153 AbbreviatedKey: func(key []byte) uint64 { 154 if len(key) >= 8 { 155 return binary.BigEndian.Uint64(key) 156 } 157 var v uint64 158 for _, b := range key { 159 v <<= 8 160 v |= uint64(b) 161 } 162 return v << uint(8*(8-len(key))) 163 }, 164 165 FormatKey: DefaultFormatter, 166 167 Separator: func(dst, a, b []byte) []byte { 168 i, n := SharedPrefixLen(a, b), len(dst) 169 dst = append(dst, a...) 170 171 min := len(a) 172 if min > len(b) { 173 min = len(b) 174 } 175 if i >= min { 176 // Do not shorten if one string is a prefix of the other. 177 return dst 178 } 179 180 if a[i] >= b[i] { 181 // b is smaller than a or a is already the shortest possible. 182 return dst 183 } 184 185 if i < len(b)-1 || a[i]+1 < b[i] { 186 i += n 187 dst[i]++ 188 return dst[:i+1] 189 } 190 191 i += n + 1 192 for ; i < len(dst); i++ { 193 if dst[i] != 0xff { 194 dst[i]++ 195 return dst[:i+1] 196 } 197 } 198 return dst 199 }, 200 201 Successor: func(dst, a []byte) (ret []byte) { 202 for i := 0; i < len(a); i++ { 203 if a[i] != 0xff { 204 dst = append(dst, a[:i+1]...) 205 dst[len(dst)-1]++ 206 return dst 207 } 208 } 209 // a is a run of 0xffs, leave it alone. 210 return append(dst, a...) 211 }, 212 213 ImmediateSuccessor: func(dst, a []byte) (ret []byte) { 214 return append(append(dst, a...), 0x00) 215 }, 216 217 // This name is part of the C++ Level-DB implementation's default file 218 // format, and should not be changed. 219 Name: "leveldb.BytewiseComparator", 220 } 221 222 // SharedPrefixLen returns the largest i such that a[:i] equals b[:i]. 223 // This function can be useful in implementing the Comparer interface. 224 func SharedPrefixLen(a, b []byte) int { 225 i, n := 0, len(a) 226 if n > len(b) { 227 n = len(b) 228 } 229 asUint64 := func(c []byte, i int) uint64 { 230 return binary.LittleEndian.Uint64(c[i:]) 231 } 232 for i < n-7 && asUint64(a, i) == asUint64(b, i) { 233 i += 8 234 } 235 for i < n && a[i] == b[i] { 236 i++ 237 } 238 return i 239 } 240 241 // FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII 242 // data. 243 type FormatBytes []byte 244 245 const lowerhex = "0123456789abcdef" 246 247 // Format implements the fmt.Formatter interface. 248 func (p FormatBytes) Format(s fmt.State, c rune) { 249 buf := make([]byte, 0, len(p)) 250 for _, b := range p { 251 if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) { 252 buf = append(buf, b) 253 continue 254 } 255 buf = append(buf, `\x`...) 256 buf = append(buf, lowerhex[b>>4]) 257 buf = append(buf, lowerhex[b&0xF]) 258 } 259 s.Write(buf) 260 }