github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/rangekey/coalesce.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package rangekey 6 7 import ( 8 "bytes" 9 "sort" 10 11 "github.com/zuoyebang/bitalostable/internal/base" 12 "github.com/zuoyebang/bitalostable/internal/invariants" 13 "github.com/zuoyebang/bitalostable/internal/keyspan" 14 "github.com/zuoyebang/bitalostable/internal/manifest" 15 ) 16 17 // UserIteratorConfig holds state for constructing the range key iterator stack 18 // for user iteration. 19 type UserIteratorConfig struct { 20 snapshot uint64 21 comparer *base.Comparer 22 miter keyspan.MergingIter 23 biter keyspan.BoundedIter 24 diter keyspan.DefragmentingIter 25 liters [manifest.NumLevels]keyspan.LevelIter 26 litersUsed int 27 sortBuf keysBySuffix 28 } 29 30 // Init initializes the range key iterator stack for user iteration. The 31 // resulting fragment iterator applies range key semantics, defragments spans 32 // according to their user-observable state and removes all Keys other than 33 // RangeKeySets describing the current state of range keys. The resulting spans 34 // contain Keys sorted by Suffix. 35 // 36 // The snapshot sequence number parameter determines which keys are visible. Any 37 // keys not visible at the provided snapshot are ignored. 38 func (ui *UserIteratorConfig) Init( 39 comparer *base.Comparer, 40 snapshot uint64, 41 lower, upper []byte, 42 hasPrefix *bool, 43 prefix *[]byte, 44 iters ...keyspan.FragmentIterator, 45 ) keyspan.FragmentIterator { 46 ui.snapshot = snapshot 47 ui.comparer = comparer 48 ui.miter.Init(comparer.Compare, ui, iters...) 49 ui.biter.Init(comparer.Compare, comparer.Split, &ui.miter, lower, upper, hasPrefix, prefix) 50 ui.diter.Init(comparer, &ui.biter, ui, keyspan.StaticDefragmentReducer) 51 ui.litersUsed = 0 52 return &ui.diter 53 } 54 55 // AddLevel adds a new level to the bottom of the iterator stack. AddLevel 56 // must be called after Init and before any other method on the iterator. 57 func (ui *UserIteratorConfig) AddLevel(iter keyspan.FragmentIterator) { 58 ui.miter.AddLevel(iter) 59 } 60 61 // NewLevelIter returns a pointer to a newly allocated or reused 62 // keyspan.LevelIter. The caller is responsible for calling Init() on this 63 // instance. 64 func (ui *UserIteratorConfig) NewLevelIter() *keyspan.LevelIter { 65 if ui.litersUsed >= len(ui.liters) { 66 return &keyspan.LevelIter{} 67 } 68 ui.litersUsed++ 69 return &ui.liters[ui.litersUsed-1] 70 } 71 72 // SetBounds propagates bounds to the iterator stack. The fragment iterator 73 // interface ordinarily doesn't enforce bounds, so this is exposed as an 74 // explicit method on the user iterator config. 75 func (ui *UserIteratorConfig) SetBounds(lower, upper []byte) { 76 ui.biter.SetBounds(lower, upper) 77 } 78 79 // Transform implements the keyspan.Transformer interface for use with a 80 // keyspan.MergingIter. It transforms spans by resolving range keys at the 81 // provided snapshot sequence number. Shadowing of keys is resolved (eg, removal 82 // of unset keys, removal of keys overwritten by a set at the same suffix, etc) 83 // and then non-RangeKeySet keys are removed. The resulting transformed spans 84 // only contain RangeKeySets describing the state visible at the provided 85 // sequence number, and hold their Keys sorted by Suffix. 86 func (ui *UserIteratorConfig) Transform(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error { 87 // Apply shadowing of keys. 88 dst.Start = s.Start 89 dst.End = s.End 90 ui.sortBuf = keysBySuffix{ 91 cmp: cmp, 92 keys: dst.Keys[:0], 93 } 94 if err := coalesce(&ui.sortBuf, s.Visible(ui.snapshot).Keys, &dst.Keys); err != nil { 95 return err 96 } 97 // During user iteration over range keys, unsets and deletes don't 98 // matter. Remove them. This step helps logical defragmentation during 99 // iteration. 100 keys := dst.Keys 101 dst.Keys = dst.Keys[:0] 102 for i := range keys { 103 switch keys[i].Kind() { 104 case base.InternalKeyKindRangeKeySet: 105 if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 { 106 panic("bitalostable: keys unexpectedly not in ascending suffix order") 107 } 108 dst.Keys = append(dst.Keys, keys[i]) 109 case base.InternalKeyKindRangeKeyUnset: 110 if invariants.Enabled && len(dst.Keys) > 0 && cmp(dst.Keys[len(dst.Keys)-1].Suffix, keys[i].Suffix) > 0 { 111 panic("bitalostable: keys unexpectedly not in ascending suffix order") 112 } 113 // Skip. 114 continue 115 case base.InternalKeyKindRangeKeyDelete: 116 // Skip. 117 continue 118 default: 119 return base.CorruptionErrorf("bitalostable: unrecognized range key kind %s", keys[i].Kind()) 120 } 121 } 122 // coalesce results in dst.Keys being sorted by Suffix. 123 dst.KeysOrder = keyspan.BySuffixAsc 124 return nil 125 } 126 127 // ShouldDefragment implements the DefragmentMethod interface and configures a 128 // DefragmentingIter to defragment spans of range keys if their user-visible 129 // state is identical. This defragmenting method assumes the provided spans have 130 // already been transformed through (UserIterationConfig).Transform, so all 131 // RangeKeySets are user-visible sets and are already in Suffix order. This 132 // defragmenter checks for equality between set suffixes and values (ignoring 133 // sequence numbers). It's intended for use during user iteration, when the 134 // wrapped keyspan iterator is merging spans across all levels of the LSM. 135 func (ui *UserIteratorConfig) ShouldDefragment(equal base.Equal, a, b *keyspan.Span) bool { 136 // This implementation must only be used on spans that have transformed by 137 // ui.Transform. The transform applies shadowing, removes all keys besides 138 // the resulting Sets and sorts the keys by suffix. Since shadowing has been 139 // applied, each Set must set a unique suffix. If the two spans are 140 // equivalent, they must have the same number of range key sets. 141 if len(a.Keys) != len(b.Keys) || len(a.Keys) == 0 { 142 return false 143 } 144 if a.KeysOrder != keyspan.BySuffixAsc || b.KeysOrder != keyspan.BySuffixAsc { 145 panic("bitalostable: range key span's keys unexpectedly not in ascending suffix order") 146 } 147 148 ret := true 149 for i := range a.Keys { 150 if invariants.Enabled { 151 if a.Keys[i].Kind() != base.InternalKeyKindRangeKeySet || 152 b.Keys[i].Kind() != base.InternalKeyKindRangeKeySet { 153 panic("bitalostable: unexpected non-RangeKeySet during defragmentation") 154 } 155 if i > 0 && (ui.comparer.Compare(a.Keys[i].Suffix, a.Keys[i-1].Suffix) < 0 || 156 ui.comparer.Compare(b.Keys[i].Suffix, b.Keys[i-1].Suffix) < 0) { 157 panic("bitalostable: range keys not ordered by suffix during defragmentation") 158 } 159 } 160 if !equal(a.Keys[i].Suffix, b.Keys[i].Suffix) { 161 ret = false 162 break 163 } 164 if !bytes.Equal(a.Keys[i].Value, b.Keys[i].Value) { 165 ret = false 166 break 167 } 168 } 169 return ret 170 } 171 172 // Coalesce imposes range key semantics and coalesces range keys with the same 173 // bounds. Coalesce drops any keys shadowed by more recent sets, unsets or 174 // deletes. Coalesce modifies the provided span's Keys slice, reslicing the 175 // slice to remove dropped keys. 176 // 177 // Coalescence has subtle behavior with respect to sequence numbers. Coalesce 178 // depends on a keyspan.Span's Keys being sorted in sequence number descending 179 // order. The first key has the largest sequence number. The returned coalesced 180 // span includes only the largest sequence number. All other sequence numbers 181 // are forgotten. When a compaction constructs output range keys from a 182 // coalesced span, it produces at most one RANGEKEYSET, one RANGEKEYUNSET and 183 // one RANGEKEYDEL. Each one of these keys adopt the largest sequence number. 184 // 185 // This has the potentially surprising effect of 'promoting' a key to a higher 186 // sequence number. This is okay, because: 187 // - There are no other overlapping keys within the coalesced span of 188 // sequence numbers (otherwise they would be in the compaction, due to 189 // the LSM invariant). 190 // - Range key sequence numbers are never compared to point key sequence 191 // numbers. Range keys and point keys have parallel existences. 192 // - Compactions only coalesce within snapshot stripes. 193 // 194 // Additionally, internal range keys at the same sequence number have subtle 195 // mechanics: 196 // - RANGEKEYSETs shadow RANGEKEYUNSETs of the same suffix. 197 // - RANGEKEYDELs only apply to keys at lower sequence numbers. 198 // 199 // This is required for ingestion. Ingested sstables are assigned a single 200 // sequence number for the file, at which all of the file's keys are visible. 201 // The RANGEKEYSET, RANGEKEYUNSET and RANGEKEYDEL key kinds are ordered such 202 // that among keys with equal sequence numbers (thus ordered by their kinds) the 203 // keys do not affect one another. Ingested sstables are expected to be 204 // consistent with respect to the set/unset suffixes: A given suffix should be 205 // set or unset but not both. 206 // 207 // The resulting dst Keys slice is sorted by Trailer. 208 func Coalesce(cmp base.Compare, keys []keyspan.Key, dst *[]keyspan.Key) error { 209 // TODO(jackson): Currently, Coalesce doesn't actually perform the sequence 210 // number promotion described in the comment above. 211 keysBySuffix := keysBySuffix{ 212 cmp: cmp, 213 keys: (*dst)[:0], 214 } 215 if err := coalesce(&keysBySuffix, keys, dst); err != nil { 216 return err 217 } 218 // coalesce left the keys in *dst sorted by suffix. Re-sort them by trailer. 219 keyspan.SortKeysByTrailer(dst) 220 return nil 221 } 222 223 func coalesce(keysBySuffix *keysBySuffix, keys []keyspan.Key, dst *[]keyspan.Key) error { 224 var deleted bool 225 for i := 0; i < len(keys) && !deleted; i++ { 226 k := keys[i] 227 if invariants.Enabled && i > 0 && k.Trailer > keys[i-1].Trailer { 228 panic("bitalostable: invariant violation: span keys unordered") 229 } 230 231 // NB: Within a given sequence number, keys are ordered as: 232 // RangeKeySet > RangeKeyUnset > RangeKeyDelete 233 // This is significant, because this ensures that none of the range keys 234 // sharing a sequence number shadow each other. 235 switch k.Kind() { 236 case base.InternalKeyKindRangeKeySet: 237 n := len(keysBySuffix.keys) 238 239 if keysBySuffix.get(n, k.Suffix) < n { 240 // This suffix is already set or unset at a higher sequence 241 // number. Skip. 242 continue 243 } 244 keysBySuffix.keys = append(keysBySuffix.keys, k) 245 sort.Sort(keysBySuffix) 246 case base.InternalKeyKindRangeKeyUnset: 247 n := len(keysBySuffix.keys) 248 249 if keysBySuffix.get(n, k.Suffix) < n { 250 // This suffix is already set or unset at a higher sequence 251 // number. Skip. 252 continue 253 } 254 keysBySuffix.keys = append(keysBySuffix.keys, k) 255 sort.Sort(keysBySuffix) 256 case base.InternalKeyKindRangeKeyDelete: 257 // All remaining range keys in this span have been deleted by this 258 // RangeKeyDelete. There's no need to continue looping, because all 259 // the remaining keys are shadowed by this one. The for loop 260 // condition will terminate when it sees the last key is a 261 // range key deletion. 262 keysBySuffix.keys = append(keysBySuffix.keys, k) 263 deleted = true 264 default: 265 return base.CorruptionErrorf("bitalostable: unexpected range key kind %s", k.Kind()) 266 } 267 } 268 269 // Update the span with the (potentially reduced) keys slice. 270 // NB: We don't re-sort by Trailer. The exported Coalesce function however 271 // will. 272 *dst = keysBySuffix.keys 273 return nil 274 } 275 276 type keysBySuffix struct { 277 cmp base.Compare 278 keys []keyspan.Key 279 } 280 281 // get searches for suffix among the first n keys in keys. If the suffix is 282 // found, it returns the index of the item with the suffix. If the suffix is not 283 // found, it returns n. 284 func (s *keysBySuffix) get(n int, suffix []byte) (i int) { 285 // Binary search for the suffix to see if there's an existing key with the 286 // suffix. Only binary search among the first n items. get is called while 287 // appending new keys with suffixes that may sort before existing keys. 288 // The n parameter indicates what portion of the keys slice is sorted and 289 // may contain relevant keys. 290 291 i = sort.Search(n, func(i int) bool { 292 return s.cmp(s.keys[i].Suffix, suffix) >= 0 293 }) 294 if i < n && s.cmp(s.keys[i].Suffix, suffix) == 0 { 295 return i 296 } 297 return n 298 } 299 300 func (s *keysBySuffix) Len() int { return len(s.keys) } 301 func (s *keysBySuffix) Less(i, j int) bool { return s.cmp(s.keys[i].Suffix, s.keys[j].Suffix) < 0 } 302 func (s *keysBySuffix) Swap(i, j int) { s.keys[i], s.keys[j] = s.keys[j], s.keys[i] }