github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/keyspan/defragment.go (about) 1 // Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package keyspan 6 7 import ( 8 "bytes" 9 10 "github.com/zuoyebang/bitalostable/internal/base" 11 "github.com/zuoyebang/bitalostable/internal/invariants" 12 ) 13 14 // bufferReuseMaxCapacity is the maximum capacity of a DefragmentingIter buffer 15 // that DefragmentingIter will reuse. Buffers greater than this will be 16 // discarded and reallocated as necessary. 17 const bufferReuseMaxCapacity = 10 << 10 // 10 KB 18 19 // DefragmentMethod configures the defragmentation performed by the 20 // DefragmentingIter. 21 type DefragmentMethod interface { 22 // ShouldDefragment takes two abutting spans and returns whether the two 23 // spans should be combined into a single, defragmented Span. 24 ShouldDefragment(equal base.Equal, left, right *Span) bool 25 } 26 27 // The DefragmentMethodFunc type is an adapter to allow the use of ordinary 28 // functions as DefragmentMethods. If f is a function with the appropriate 29 // signature, DefragmentMethodFunc(f) is a DefragmentMethod that calls f. 30 type DefragmentMethodFunc func(equal base.Equal, left, right *Span) bool 31 32 // ShouldDefragment calls f(equal, left, right). 33 func (f DefragmentMethodFunc) ShouldDefragment(equal base.Equal, left, right *Span) bool { 34 return f(equal, left, right) 35 } 36 37 // DefragmentInternal configures a DefragmentingIter to defragment spans 38 // only if they have identical keys. It requires spans' keys to be sorted in 39 // trailer descending order. 40 // 41 // This defragmenting method is intended for use in compactions that may see 42 // internal range keys fragments that may now be joined, because the state that 43 // required their fragmentation has been dropped. 44 var DefragmentInternal DefragmentMethod = DefragmentMethodFunc(func(equal base.Equal, a, b *Span) bool { 45 if a.KeysOrder != ByTrailerDesc || b.KeysOrder != ByTrailerDesc { 46 panic("bitalostable: span keys unexpectedly not in trailer descending order") 47 } 48 if len(a.Keys) != len(b.Keys) { 49 return false 50 } 51 for i := range a.Keys { 52 if a.Keys[i].Trailer != b.Keys[i].Trailer { 53 return false 54 } 55 if !equal(a.Keys[i].Suffix, b.Keys[i].Suffix) { 56 return false 57 } 58 if !bytes.Equal(a.Keys[i].Value, b.Keys[i].Value) { 59 return false 60 } 61 } 62 return true 63 }) 64 65 // DefragmentReducer merges the current and next Key slices, returning a new Key 66 // slice. 67 // 68 // Implementations should modify and return `cur` to save on allocations, or 69 // consider allocating a new slice, as the `cur` slice may be retained by the 70 // DefragmentingIter and mutated. The `next` slice must not be mutated. 71 // 72 // The incoming slices are sorted by (SeqNum, Kind) descending. The output slice 73 // must also have this sort order. 74 type DefragmentReducer func(cur, next []Key) []Key 75 76 // StaticDefragmentReducer is a no-op DefragmentReducer that simply returns the 77 // current key slice, effectively retaining the first set of keys encountered 78 // for a defragmented span. 79 // 80 // This reducer can be used, for example, when the set of Keys for each Span 81 // being reduced is not expected to change, and therefore the keys from the 82 // first span encountered can be used without considering keys in subsequent 83 // spans. 84 var StaticDefragmentReducer DefragmentReducer = func(cur, _ []Key) []Key { 85 return cur 86 } 87 88 // iterPos is an enum indicating the position of the defragmenting iter's 89 // wrapped iter. The defragmenting iter must look ahead or behind when 90 // defragmenting forward or backwards respectively, and this enum records that 91 // current position. 92 type iterPos int8 93 94 const ( 95 iterPosPrev iterPos = -1 96 iterPosCurr iterPos = 0 97 iterPosNext iterPos = +1 98 ) 99 100 // DefragmentingIter wraps a key span iterator, defragmenting physical 101 // fragmentation during iteration. 102 // 103 // During flushes and compactions, keys applied over a span may be split at 104 // sstable boundaries. This fragmentation can produce internal key bounds that 105 // do not match any of the bounds ever supplied to a user operation. This 106 // physical fragmentation is necessary to avoid excessively wide sstables. 107 // 108 // The defragmenting iterator undoes this physical fragmentation, joining spans 109 // with abutting bounds and equal state. The defragmenting iterator takes a 110 // DefragmentMethod to determine what is "equal state" for a span. The 111 // DefragmentMethod is a function type, allowing arbitrary comparisons between 112 // Span keys. 113 // 114 // Seeking (SeekGE, SeekLT) poses an obstacle to defragmentation. A seek may 115 // land on a physical fragment in the middle of several fragments that must be 116 // defragmented. A seek first degfragments in the opposite direction of 117 // iteration to find the beginning of the defragmented span, and then 118 // defragments in the iteration direction, ensuring it's found a whole 119 // defragmented span. 120 type DefragmentingIter struct { 121 comparer *base.Comparer 122 equal base.Equal 123 iter FragmentIterator 124 iterSpan *Span 125 iterPos iterPos 126 127 // curr holds the span at the current iterator position. currBuf is a buffer 128 // for use when copying user keys for curr. keysBuf is a buffer for use when 129 // copying Keys for curr. currBuf is cleared between positioning methods. 130 // 131 // keyBuf is a buffer specifically for the defragmented start key when 132 // defragmenting backwards or the defragmented end key when defragmenting 133 // forwards. These bounds are overwritten repeatedly during defragmentation, 134 // and the defragmentation routines overwrite keyBuf repeatedly to store 135 // these extended bounds. 136 curr Span 137 currBuf []byte 138 keysBuf []Key 139 keyBuf []byte 140 141 // method is a comparison function for two spans. method is called when two 142 // spans are abutting to determine whether they may be defragmented. 143 // method does not itself check for adjacency for the two spans. 144 method DefragmentMethod 145 146 // reduce is the reducer function used to collect Keys across all spans that 147 // constitute a defragmented span. 148 reduce DefragmentReducer 149 } 150 151 // Assert that *DefragmentingIter implements the FragmentIterator interface. 152 var _ FragmentIterator = (*DefragmentingIter)(nil) 153 154 // Init initializes the defragmenting iter using the provided defragment 155 // method. 156 func (i *DefragmentingIter) Init( 157 comparer *base.Comparer, iter FragmentIterator, equal DefragmentMethod, reducer DefragmentReducer, 158 ) { 159 *i = DefragmentingIter{ 160 comparer: comparer, 161 equal: comparer.Equal, 162 iter: iter, 163 method: equal, 164 reduce: reducer, 165 } 166 } 167 168 // Error returns any accumulated error. 169 func (i *DefragmentingIter) Error() error { 170 return i.iter.Error() 171 } 172 173 // Close closes the underlying iterators. 174 func (i *DefragmentingIter) Close() error { 175 return i.iter.Close() 176 } 177 178 // SeekGE seeks the iterator to the first span with a start key greater than or 179 // equal to key and returns it. 180 func (i *DefragmentingIter) SeekGE(key []byte) *Span { 181 i.iterSpan = i.iter.SeekGE(key) 182 if i.iterSpan == nil { 183 i.iterPos = iterPosCurr 184 return nil 185 } else if i.iterSpan.Empty() { 186 i.iterPos = iterPosCurr 187 return i.iterSpan 188 } 189 // Save the current span and peek backwards. 190 i.saveCurrent() 191 i.iterSpan = i.iter.Prev() 192 if i.iterSpan != nil && i.equal(i.curr.Start, i.iterSpan.End) && i.checkEqual(i.iterSpan, &i.curr) { 193 // A continuation. The span we originally landed on and defragmented 194 // backwards has a true Start key < key. To obey the FragmentIterator 195 // contract, we must not return this defragmented span. Defragment 196 // forward to finish defragmenting the span in the forward direction. 197 i.defragmentForward() 198 199 // Now we must be on a span that truly has a defragmented Start key > 200 // key. 201 return i.defragmentForward() 202 } 203 204 // The span previous to i.curr does not defragment, so we should return it. 205 // Next the underlying iterator back onto the span we previously saved to 206 // i.curr and then defragment forward. 207 i.iterSpan = i.iter.Next() 208 return i.defragmentForward() 209 } 210 211 // SeekLT seeks the iterator to the last span with a start key less than 212 // key and returns it. 213 func (i *DefragmentingIter) SeekLT(key []byte) *Span { 214 i.iterSpan = i.iter.SeekLT(key) 215 if i.iterSpan == nil { 216 i.iterPos = iterPosCurr 217 return nil 218 } else if i.iterSpan.Empty() { 219 i.iterPos = iterPosCurr 220 return i.iterSpan 221 } 222 // Defragment forward to find the end of the defragmented span. 223 i.defragmentForward() 224 if i.iterPos == iterPosNext { 225 // Prev once back onto the span. 226 i.iterSpan = i.iter.Prev() 227 } 228 // Defragment the full span from its end. 229 return i.defragmentBackward() 230 } 231 232 // First seeks the iterator to the first span and returns it. 233 func (i *DefragmentingIter) First() *Span { 234 i.iterSpan = i.iter.First() 235 if i.iterSpan == nil { 236 i.iterPos = iterPosCurr 237 return nil 238 } 239 return i.defragmentForward() 240 } 241 242 // Last seeks the iterator to the last span and returns it. 243 func (i *DefragmentingIter) Last() *Span { 244 i.iterSpan = i.iter.Last() 245 if i.iterSpan == nil { 246 i.iterPos = iterPosCurr 247 return nil 248 } 249 return i.defragmentBackward() 250 } 251 252 // Next advances to the next span and returns it. 253 func (i *DefragmentingIter) Next() *Span { 254 switch i.iterPos { 255 case iterPosPrev: 256 // Switching directions; The iterator is currently positioned over the 257 // last span of the previous set of fragments. In the below diagram, 258 // the iterator is positioned over the last span that contributes to 259 // the defragmented x position. We want to be positioned over the first 260 // span that contributes to the z position. 261 // 262 // x x x y y y z z z 263 // ^ ^ 264 // old new 265 // 266 // Next once to move onto y, defragment forward to land on the first z 267 // position. 268 i.iterSpan = i.iter.Next() 269 if invariants.Enabled && i.iterSpan == nil { 270 panic("bitalostable: invariant violation: no next span while switching directions") 271 } 272 // We're now positioned on the first span that was defragmented into the 273 // current iterator position. Skip over the rest of the current iterator 274 // position's constitutent fragments. In the above example, this would 275 // land on the first 'z'. 276 i.defragmentForward() 277 if i.iterSpan == nil { 278 i.iterPos = iterPosCurr 279 return nil 280 } 281 282 // Now that we're positioned over the first of the next set of 283 // fragments, defragment forward. 284 return i.defragmentForward() 285 case iterPosCurr: 286 // iterPosCurr is only used when the iter is exhausted or when the iterator 287 // is at an empty span. 288 if invariants.Enabled && i.iterSpan != nil && !i.iterSpan.Empty() { 289 panic("bitalostable: invariant violation: iterPosCurr with valid iterSpan") 290 } 291 292 i.iterSpan = i.iter.Next() 293 if i.iterSpan == nil { 294 return nil 295 } 296 return i.defragmentForward() 297 case iterPosNext: 298 // Already at the next span. 299 if i.iterSpan == nil { 300 i.iterPos = iterPosCurr 301 return nil 302 } 303 return i.defragmentForward() 304 default: 305 panic("unreachable") 306 } 307 } 308 309 // Prev steps back to the previous span and returns it. 310 func (i *DefragmentingIter) Prev() *Span { 311 switch i.iterPos { 312 case iterPosPrev: 313 // Already at the previous span. 314 if i.iterSpan == nil { 315 i.iterPos = iterPosCurr 316 return nil 317 } 318 return i.defragmentBackward() 319 case iterPosCurr: 320 // iterPosCurr is only used when the iter is exhausted or when the iterator 321 // is at an empty span. 322 if invariants.Enabled && i.iterSpan != nil && !i.iterSpan.Empty() { 323 panic("bitalostable: invariant violation: iterPosCurr with valid iterSpan") 324 } 325 326 i.iterSpan = i.iter.Prev() 327 if i.iterSpan == nil { 328 return nil 329 } 330 return i.defragmentBackward() 331 case iterPosNext: 332 // Switching directions; The iterator is currently positioned over the 333 // first fragment of the next set of fragments. In the below diagram, 334 // the iterator is positioned over the first span that contributes to 335 // the defragmented z position. We want to be positioned over the last 336 // span that contributes to the x position. 337 // 338 // x x x y y y z z z 339 // ^ ^ 340 // new old 341 // 342 // Prev once to move onto y, defragment backward to land on the last x 343 // position. 344 i.iterSpan = i.iter.Prev() 345 if invariants.Enabled && i.iterSpan == nil { 346 panic("bitalostable: invariant violation: no previous span while switching directions") 347 } 348 // We're now positioned on the last span that was defragmented into the 349 // current iterator position. Skip over the rest of the current iterator 350 // position's constitutent fragments. In the above example, this would 351 // land on the last 'x'. 352 i.defragmentBackward() 353 354 // Now that we're positioned over the last of the prev set of 355 // fragments, defragment backward. 356 if i.iterSpan == nil { 357 i.iterPos = iterPosCurr 358 return nil 359 } 360 return i.defragmentBackward() 361 default: 362 panic("unreachable") 363 } 364 } 365 366 // checkEqual checks the two spans for logical equivalence. It uses the passed-in 367 // DefragmentMethod and ensures both spans are NOT empty; not defragmenting empty 368 // spans is an optimization that lets us load fewer sstable blocks. 369 func (i *DefragmentingIter) checkEqual(left, right *Span) bool { 370 return (!left.Empty() && !right.Empty()) && i.method.ShouldDefragment(i.equal, i.iterSpan, &i.curr) 371 } 372 373 // defragmentForward defragments spans in the forward direction, starting from 374 // i.iter's current position. The span at the current position must be non-nil, 375 // but may be Empty(). 376 func (i *DefragmentingIter) defragmentForward() *Span { 377 if i.iterSpan.Empty() { 378 // An empty span will never be equal to another span; see checkEqual for 379 // why. To avoid loading non-empty range keys further ahead by calling Next, 380 // return early. 381 i.iterPos = iterPosCurr 382 return i.iterSpan 383 } 384 i.saveCurrent() 385 386 i.iterPos = iterPosNext 387 i.iterSpan = i.iter.Next() 388 for i.iterSpan != nil { 389 if !i.equal(i.curr.End, i.iterSpan.Start) { 390 // Not a continuation. 391 break 392 } 393 if !i.checkEqual(i.iterSpan, &i.curr) { 394 // Not a continuation. 395 break 396 } 397 i.keyBuf = append(i.keyBuf[:0], i.iterSpan.End...) 398 i.curr.End = i.keyBuf 399 i.keysBuf = i.reduce(i.keysBuf, i.iterSpan.Keys) 400 i.iterSpan = i.iter.Next() 401 } 402 i.curr.Keys = i.keysBuf 403 return &i.curr 404 } 405 406 // defragmentBackward defragments spans in the backward direction, starting from 407 // i.iter's current position. The span at the current position must be non-nil, 408 // but may be Empty(). 409 func (i *DefragmentingIter) defragmentBackward() *Span { 410 if i.iterSpan.Empty() { 411 // An empty span will never be equal to another span; see checkEqual for 412 // why. To avoid loading non-empty range keys further ahead by calling Next, 413 // return early. 414 i.iterPos = iterPosCurr 415 return i.iterSpan 416 } 417 i.saveCurrent() 418 419 i.iterPos = iterPosPrev 420 i.iterSpan = i.iter.Prev() 421 for i.iterSpan != nil { 422 if !i.equal(i.curr.Start, i.iterSpan.End) { 423 // Not a continuation. 424 break 425 } 426 if !i.checkEqual(i.iterSpan, &i.curr) { 427 // Not a continuation. 428 break 429 } 430 i.keyBuf = append(i.keyBuf[:0], i.iterSpan.Start...) 431 i.curr.Start = i.keyBuf 432 i.keysBuf = i.reduce(i.keysBuf, i.iterSpan.Keys) 433 i.iterSpan = i.iter.Prev() 434 } 435 i.curr.Keys = i.keysBuf 436 return &i.curr 437 } 438 439 func (i *DefragmentingIter) saveCurrent() { 440 i.currBuf = i.currBuf[:0] 441 i.keysBuf = i.keysBuf[:0] 442 i.keyBuf = i.keyBuf[:0] 443 if cap(i.currBuf) > bufferReuseMaxCapacity { 444 i.currBuf = nil 445 } 446 if cap(i.keyBuf) > bufferReuseMaxCapacity { 447 i.keyBuf = nil 448 } 449 if i.iterSpan == nil { 450 return 451 } 452 i.curr = Span{ 453 Start: i.saveBytes(i.iterSpan.Start), 454 End: i.saveBytes(i.iterSpan.End), 455 KeysOrder: i.iterSpan.KeysOrder, 456 } 457 for j := range i.iterSpan.Keys { 458 i.keysBuf = append(i.keysBuf, Key{ 459 Trailer: i.iterSpan.Keys[j].Trailer, 460 Suffix: i.saveBytes(i.iterSpan.Keys[j].Suffix), 461 Value: i.saveBytes(i.iterSpan.Keys[j].Value), 462 }) 463 } 464 i.curr.Keys = i.keysBuf 465 } 466 467 func (i *DefragmentingIter) saveBytes(b []byte) []byte { 468 if b == nil { 469 return nil 470 } 471 ret := append(i.currBuf, b...) 472 i.currBuf = ret[len(ret):] 473 return ret 474 }