github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/sstable/block_property.go (about) 1 // Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "encoding/binary" 9 "fmt" 10 "math" 11 "sync" 12 13 "github.com/zuoyebang/bitalostable/internal/base" 14 "github.com/zuoyebang/bitalostable/internal/rangekey" 15 ) 16 17 // Block properties are an optional user-facing feature that can be used to 18 // filter data blocks (and whole sstables) from an Iterator before they are 19 // loaded. They do not apply to range delete blocks. These are expected to 20 // very concisely represent a set of some attribute value contained within the 21 // key or value, such that the set includes all the attribute values in the 22 // block. This has some similarities with OLAP pruning approaches that 23 // maintain min-max attribute values for some column (which concisely 24 // represent a set), that is then used to prune at query time. In Pebble's 25 // case, data blocks are small, typically 25-50KB, so these properties should 26 // reduce their precision in order to be concise -- a good rule of thumb is to 27 // not consume more than 50-100 bytes across all properties maintained for a 28 // block, i.e., a 500x reduction compared to loading the data block. 29 // 30 // A block property must be assigned a unique name, which is encoded and 31 // stored in the sstable. This name must be unique among all user-properties 32 // encoded in an sstable. 33 // 34 // A property is represented as a []byte. A nil value or empty byte slice are 35 // considered semantically identical. The caller is free to choose the 36 // semantics of an empty byte slice e.g. they could use it to represent the 37 // empty set or the universal set, whichever they think is more common and 38 // therefore better to encode more concisely. The serialization of the 39 // property for the various Finish*() calls in a BlockPropertyCollector 40 // implementation should be identical, since the corresponding 41 // BlockPropertyFilter implementation is not told the context in which it is 42 // deserializing the property. 43 // 44 // Block properties are more general than table properties and should be 45 // preferred over using table properties. A BlockPropertyCollector can achieve 46 // identical behavior to table properties by returning the nil slice from 47 // FinishDataBlock and FinishIndexBlock, and interpret them as the universal 48 // set in BlockPropertyFilter, and return a non-universal set in FinishTable. 49 // 50 // Block property filtering is nondeterministic because the separation of keys 51 // into blocks is nondeterministic. Clients use block-property filters to 52 // implement efficient application of a filter F that applies to key-value pairs 53 // (abbreviated as kv-filter). Consider correctness defined as surfacing exactly 54 // the same key-value pairs that would be surfaced if one applied the filter F 55 // above normal iteration. With this correctness definition, block property 56 // filtering may introduce two kinds of errors: 57 // 58 // a) Block property filtering that uses a kv-filter may produce additional 59 // key-value pairs that don't satisfy the filter because of the separation 60 // of keys into blocks. Clients may remove these extra key-value pairs by 61 // re-applying the kv filter while reading results back from Pebble. 62 // 63 // b) Block property filtering may surface deleted key-value pairs if the 64 // the kv filter is not a strict function of the key's user key. A block 65 // containing k.DEL may be filtered, while a block containing the deleted 66 // key k.SET may not be filtered, if the kv filter applies to one but not 67 // the other. 68 // 69 // This error may be avoided trivially by using a kv filter that is a pure 70 // function of the the user key. A filter that examines values or key kinds 71 // requires care to ensure F(k.SET, <value>) = F(k.DEL) = F(k.SINGLEDEL). 72 // 73 // The combination of range deletions and filtering by table-level properties 74 // add another opportunity for deleted point keys to be surfaced. The bitalostable 75 // Iterator stack takes care to correctly apply filtered tables' range deletions 76 // to lower tables, preventing this form of nondeterministic error. 77 78 // BlockPropertyCollector is used when writing a sstable. 79 // 80 // - All calls to Add are included in the next FinishDataBlock, after which 81 // the next data block is expected to start. 82 // 83 // - The index entry generated for the data block, which contains the return 84 // value from FinishDataBlock, is not immediately included in the current 85 // index block. It is included when AddPrevDataBlockToIndexBlock is called. 86 // An alternative would be to return an opaque handle from FinishDataBlock 87 // and pass it to a new AddToIndexBlock method, which requires more 88 // plumbing, and passing of an interface{} results in a undesirable heap 89 // allocation. AddPrevDataBlockToIndexBlock must be called before keys are 90 // added to the new data block. 91 type BlockPropertyCollector interface { 92 // Name returns the name of the block property collector. 93 Name() string 94 // Add is called with each new entry added to a data block in the sstable. 95 // The callee can assume that these are in sorted order. 96 Add(key InternalKey, value []byte) error 97 // FinishDataBlock is called when all the entries have been added to a 98 // data block. Subsequent Add calls will be for the next data block. It 99 // returns the property value for the finished block. 100 FinishDataBlock(buf []byte) ([]byte, error) 101 // AddPrevDataBlockToIndexBlock adds the entry corresponding to the 102 // previous FinishDataBlock to the current index block. 103 AddPrevDataBlockToIndexBlock() 104 // FinishIndexBlock is called when an index block, containing all the 105 // key-value pairs since the last FinishIndexBlock, will no longer see new 106 // entries. It returns the property value for the index block. 107 FinishIndexBlock(buf []byte) ([]byte, error) 108 // FinishTable is called when the sstable is finished, and returns the 109 // property value for the sstable. 110 FinishTable(buf []byte) ([]byte, error) 111 } 112 113 // SuffixReplaceableBlockCollector is an extension to the BlockPropertyCollector 114 // interface that allows a block property collector to indicate the it supports 115 // being *updated* during suffix replacement, i.e. when an existing SST in which 116 // all keys have the same key suffix is updated to have a new suffix. 117 // 118 // A collector which supports being updated in such cases must be able to derive 119 // its updated value from its old value and the change being made to the suffix, 120 // without needing to be passed each updated K/V. 121 // 122 // For example, a collector that only inspects values would can simply copy its 123 // previously computed property as-is, since key-suffix replacement does not 124 // change values, while a collector that depends only on key suffixes, like one 125 // which collected mvcc-timestamp bounds from timestamp-suffixed keys, can just 126 // set its new bounds from the new suffix, as it is common to all keys, without 127 // needing to recompute it from every key. 128 // 129 // An implementation of DataBlockIntervalCollector can also implement this 130 // interface, in which case the BlockPropertyCollector returned by passing it to 131 // NewBlockIntervalCollector will also implement this interface automatically. 132 type SuffixReplaceableBlockCollector interface { 133 // UpdateKeySuffixes is called when a block is updated to change the suffix of 134 // all keys in the block, and is passed the old value for that prop, if any, 135 // for that block as well as the old and new suffix. 136 UpdateKeySuffixes(oldProp []byte, oldSuffix, newSuffix []byte) error 137 } 138 139 // BlockPropertyFilter is used in an Iterator to filter sstables and blocks 140 // within the sstable. It should not maintain any per-sstable state, and must 141 // be thread-safe. 142 type BlockPropertyFilter = base.BlockPropertyFilter 143 144 // BoundLimitedBlockPropertyFilter implements the block-property filter but 145 // imposes an additional constraint on its usage, requiring that only blocks 146 // containing exclusively keys between its lower and upper bounds may be 147 // filtered. The bounds may be change during iteration, so the filter doesn't 148 // expose the bounds, instead implementing KeyIsWithin[Lower,Upper]Bound methods 149 // for performing bound comparisons. 150 // 151 // To be used, a BoundLimitedBlockPropertyFilter must be supplied directly 152 // through NewBlockPropertiesFilterer's dedicated parameter. If supplied through 153 // the ordinary slice of block property filters, this filter's bounds will be 154 // ignored. 155 // 156 // The current [lower,upper) bounds of the filter are unknown, because they may 157 // be changing. During forward iteration the lower bound is externally 158 // guaranteed, meaning Intersects only returns false if the sstable iterator is 159 // already known to be positioned at a key ≥ lower. The sstable iterator is then 160 // only responsible for ensuring filtered blocks also meet the upper bound, and 161 // should only allow a block to be filtered if all its keys are < upper. The 162 // sstable iterator may invoke KeyIsWithinUpperBound(key) to perform this check, 163 // where key is an inclusive upper bound on the block's keys. 164 // 165 // During backward iteration the upper bound is externally guaranteed, and 166 // Intersects only returns false if the sstable iterator is already known to be 167 // positioned at a key < upper. The sstable iterator is responsible for ensuring 168 // filtered blocks also meet the lower bound, enforcing that a block is only 169 // filtered if all its keys are ≥ lower. This check is made through passing the 170 // block's inclusive lower bound to KeyIsWithinLowerBound. 171 // 172 // Implementations may become active or inactive through implementing Intersects 173 // to return true whenever the filter is disabled. 174 // 175 // Usage of BoundLimitedBlockPropertyFilter is subtle, and Pebble consumers 176 // should not implement this interface directly. This interface is an internal 177 // detail in the implementation of block-property range-key masking. 178 type BoundLimitedBlockPropertyFilter interface { 179 BlockPropertyFilter 180 181 // KeyIsWithinLowerBound tests whether the provided internal key falls 182 // within the current lower bound of the filter. A true return value 183 // indicates that the filter may be used to filter blocks that exclusively 184 // contain keys ≥ `key`, so long as the blocks' keys also satisfy the upper 185 // bound. 186 KeyIsWithinLowerBound(key *InternalKey) bool 187 // KeyIsWithinUpperBound tests whether the provided internal key falls 188 // within the current upper bound of the filter. A true return value 189 // indicates that the filter may be used to filter blocks that exclusively 190 // contain keys ≤ `key`, so long as the blocks' keys also satisfy the lower 191 // bound. 192 KeyIsWithinUpperBound(key *InternalKey) bool 193 } 194 195 // BlockIntervalCollector is a helper implementation of BlockPropertyCollector 196 // for users who want to represent a set of the form [lower,upper) where both 197 // lower and upper are uint64, and lower <= upper. 198 // 199 // The set is encoded as: 200 // - Two varint integers, (lower,upper-lower), when upper-lower > 0 201 // - Nil, when upper-lower=0 202 // 203 // Users must not expect this to preserve differences between empty sets -- 204 // they will all get turned into the semantically equivalent [0,0). 205 // 206 // A BlockIntervalCollector that collects over point and range keys needs to 207 // have both the point and range DataBlockIntervalCollector specified, since 208 // point and range keys are fed to the BlockIntervalCollector in an interleaved 209 // fashion, independently of one another. This also implies that the 210 // DataBlockIntervalCollectors for point and range keys should be references to 211 // independent instances, rather than references to the same collector, as point 212 // and range keys are tracked independently. 213 type BlockIntervalCollector struct { 214 name string 215 points DataBlockIntervalCollector 216 ranges DataBlockIntervalCollector 217 218 blockInterval interval 219 indexInterval interval 220 tableInterval interval 221 } 222 223 var _ BlockPropertyCollector = &BlockIntervalCollector{} 224 225 // DataBlockIntervalCollector is the interface used by BlockIntervalCollector 226 // that contains the actual logic pertaining to the property. It only 227 // maintains state for the current data block, and resets that state in 228 // FinishDataBlock. This interface can be used to reduce parsing costs. 229 type DataBlockIntervalCollector interface { 230 // Add is called with each new entry added to a data block in the sstable. 231 // The callee can assume that these are in sorted order. 232 Add(key InternalKey, value []byte) error 233 // FinishDataBlock is called when all the entries have been added to a 234 // data block. Subsequent Add calls will be for the next data block. It 235 // returns the [lower, upper) for the finished block. 236 FinishDataBlock() (lower uint64, upper uint64, err error) 237 } 238 239 // NewBlockIntervalCollector constructs a BlockIntervalCollector with the given 240 // name. The BlockIntervalCollector makes use of the given point and range key 241 // DataBlockIntervalCollectors when encountering point and range keys, 242 // respectively. 243 // 244 // The caller may pass a nil DataBlockIntervalCollector for one of the point or 245 // range key collectors, in which case keys of those types will be ignored. This 246 // allows for flexible construction of BlockIntervalCollectors that operate on 247 // just point keys, just range keys, or both point and range keys. 248 // 249 // If both point and range keys are to be tracked, two independent collectors 250 // should be provided, rather than the same collector passed in twice (see the 251 // comment on BlockIntervalCollector for more detail) 252 func NewBlockIntervalCollector( 253 name string, pointCollector, rangeCollector DataBlockIntervalCollector, 254 ) BlockPropertyCollector { 255 if pointCollector == nil && rangeCollector == nil { 256 panic("sstable: at least one interval collector must be provided") 257 } 258 bic := BlockIntervalCollector{ 259 name: name, 260 points: pointCollector, 261 ranges: rangeCollector, 262 } 263 if _, ok := pointCollector.(SuffixReplaceableBlockCollector); ok { 264 return &suffixReplacementBlockCollectorWrapper{bic} 265 } 266 return &bic 267 } 268 269 // Name implements the BlockPropertyCollector interface. 270 func (b *BlockIntervalCollector) Name() string { 271 return b.name 272 } 273 274 // Add implements the BlockPropertyCollector interface. 275 func (b *BlockIntervalCollector) Add(key InternalKey, value []byte) error { 276 if rangekey.IsRangeKey(key.Kind()) { 277 if b.ranges != nil { 278 return b.ranges.Add(key, value) 279 } 280 } else if b.points != nil { 281 return b.points.Add(key, value) 282 } 283 return nil 284 } 285 286 // FinishDataBlock implements the BlockPropertyCollector interface. 287 func (b *BlockIntervalCollector) FinishDataBlock(buf []byte) ([]byte, error) { 288 if b.points == nil { 289 return buf, nil 290 } 291 var err error 292 b.blockInterval.lower, b.blockInterval.upper, err = b.points.FinishDataBlock() 293 if err != nil { 294 return buf, err 295 } 296 buf = b.blockInterval.encode(buf) 297 b.tableInterval.union(b.blockInterval) 298 return buf, nil 299 } 300 301 // AddPrevDataBlockToIndexBlock implements the BlockPropertyCollector 302 // interface. 303 func (b *BlockIntervalCollector) AddPrevDataBlockToIndexBlock() { 304 b.indexInterval.union(b.blockInterval) 305 b.blockInterval = interval{} 306 } 307 308 // FinishIndexBlock implements the BlockPropertyCollector interface. 309 func (b *BlockIntervalCollector) FinishIndexBlock(buf []byte) ([]byte, error) { 310 buf = b.indexInterval.encode(buf) 311 b.indexInterval = interval{} 312 return buf, nil 313 } 314 315 // FinishTable implements the BlockPropertyCollector interface. 316 func (b *BlockIntervalCollector) FinishTable(buf []byte) ([]byte, error) { 317 // If the collector is tracking range keys, the range key interval is union-ed 318 // with the point key interval for the table. 319 if b.ranges != nil { 320 var rangeInterval interval 321 var err error 322 rangeInterval.lower, rangeInterval.upper, err = b.ranges.FinishDataBlock() 323 if err != nil { 324 return buf, err 325 } 326 b.tableInterval.union(rangeInterval) 327 } 328 return b.tableInterval.encode(buf), nil 329 } 330 331 type interval struct { 332 lower uint64 333 upper uint64 334 } 335 336 func (i interval) encode(buf []byte) []byte { 337 if i.lower < i.upper { 338 var encoded [binary.MaxVarintLen64 * 2]byte 339 n := binary.PutUvarint(encoded[:], i.lower) 340 n += binary.PutUvarint(encoded[n:], i.upper-i.lower) 341 buf = append(buf, encoded[:n]...) 342 } 343 return buf 344 } 345 346 func (i *interval) decode(buf []byte) error { 347 if len(buf) == 0 { 348 *i = interval{} 349 return nil 350 } 351 var n int 352 i.lower, n = binary.Uvarint(buf) 353 if n <= 0 || n >= len(buf) { 354 return base.CorruptionErrorf("cannot decode interval from buf %x", buf) 355 } 356 pos := n 357 i.upper, n = binary.Uvarint(buf[pos:]) 358 pos += n 359 if pos != len(buf) || n <= 0 { 360 return base.CorruptionErrorf("cannot decode interval from buf %x", buf) 361 } 362 // Delta decode. 363 i.upper += i.lower 364 if i.upper < i.lower { 365 return base.CorruptionErrorf("unexpected overflow, upper %d < lower %d", i.upper, i.lower) 366 } 367 return nil 368 } 369 370 func (i *interval) union(x interval) { 371 if x.lower >= x.upper { 372 // x is the empty set. 373 return 374 } 375 if i.lower >= i.upper { 376 // i is the empty set. 377 *i = x 378 return 379 } 380 // Both sets are non-empty. 381 if x.lower < i.lower { 382 i.lower = x.lower 383 } 384 if x.upper > i.upper { 385 i.upper = x.upper 386 } 387 } 388 389 func (i interval) intersects(x interval) bool { 390 if i.lower >= i.upper || x.lower >= x.upper { 391 // At least one of the sets is empty. 392 return false 393 } 394 // Neither set is empty. 395 return i.upper > x.lower && i.lower < x.upper 396 } 397 398 type suffixReplacementBlockCollectorWrapper struct { 399 BlockIntervalCollector 400 } 401 402 // UpdateKeySuffixes implements the SuffixReplaceableBlockCollector interface. 403 func (w *suffixReplacementBlockCollectorWrapper) UpdateKeySuffixes( 404 oldProp []byte, from, to []byte, 405 ) error { 406 return w.BlockIntervalCollector.points.(SuffixReplaceableBlockCollector).UpdateKeySuffixes(oldProp, from, to) 407 } 408 409 // BlockIntervalFilter is an implementation of BlockPropertyFilter when the 410 // corresponding collector is a BlockIntervalCollector. That is, the set is of 411 // the form [lower, upper). 412 type BlockIntervalFilter struct { 413 name string 414 filterInterval interval 415 } 416 417 var _ BlockPropertyFilter = (*BlockIntervalFilter)(nil) 418 419 // NewBlockIntervalFilter constructs a BlockPropertyFilter that filters blocks 420 // based on an interval property collected by BlockIntervalCollector and the 421 // given [lower, upper) bounds. The given name specifies the 422 // BlockIntervalCollector's properties to read. 423 func NewBlockIntervalFilter(name string, lower uint64, upper uint64) *BlockIntervalFilter { 424 b := new(BlockIntervalFilter) 425 b.Init(name, lower, upper) 426 return b 427 } 428 429 // Init initializes (or re-initializes, clearing previous state) an existing 430 // BLockPropertyFilter to filter blocks based on an interval property collected 431 // by BlockIntervalCollector and the given [lower, upper) bounds. The given name 432 // specifies the BlockIntervalCollector's properties to read. 433 func (b *BlockIntervalFilter) Init(name string, lower, upper uint64) { 434 *b = BlockIntervalFilter{ 435 name: name, 436 filterInterval: interval{lower: lower, upper: upper}, 437 } 438 } 439 440 // Name implements the BlockPropertyFilter interface. 441 func (b *BlockIntervalFilter) Name() string { 442 return b.name 443 } 444 445 // Intersects implements the BlockPropertyFilter interface. 446 func (b *BlockIntervalFilter) Intersects(prop []byte) (bool, error) { 447 var i interval 448 if err := i.decode(prop); err != nil { 449 return false, err 450 } 451 return i.intersects(b.filterInterval), nil 452 } 453 454 // SetInterval adjusts the [lower, upper) bounds used by the filter. It is not 455 // generally safe to alter the filter while it's in use, except as part of the 456 // implementation of BlockPropertyFilterMask.SetSuffix used for range-key 457 // masking. 458 func (b *BlockIntervalFilter) SetInterval(lower, upper uint64) { 459 b.filterInterval = interval{lower: lower, upper: upper} 460 } 461 462 // When encoding block properties for each block, we cannot afford to encode 463 // the name. Instead, the name is mapped to a shortID, in the scope of that 464 // sstable, and the shortID is encoded. Since we use a uint8, there is a limit 465 // of 256 block property collectors per sstable. 466 type shortID uint8 467 468 type blockPropertiesEncoder struct { 469 propsBuf []byte 470 scratch []byte 471 } 472 473 func (e *blockPropertiesEncoder) getScratchForProp() []byte { 474 return e.scratch[:0] 475 } 476 477 func (e *blockPropertiesEncoder) resetProps() { 478 e.propsBuf = e.propsBuf[:0] 479 } 480 481 func (e *blockPropertiesEncoder) addProp(id shortID, scratch []byte) { 482 const lenID = 1 483 lenProp := uvarintLen(uint32(len(scratch))) 484 n := lenID + lenProp + len(scratch) 485 if cap(e.propsBuf)-len(e.propsBuf) < n { 486 size := len(e.propsBuf) + 2*n 487 if size < 2*cap(e.propsBuf) { 488 size = 2 * cap(e.propsBuf) 489 } 490 buf := make([]byte, len(e.propsBuf), size) 491 copy(buf, e.propsBuf) 492 e.propsBuf = buf 493 } 494 pos := len(e.propsBuf) 495 b := e.propsBuf[pos : pos+lenID] 496 b[0] = byte(id) 497 pos += lenID 498 b = e.propsBuf[pos : pos+lenProp] 499 n = binary.PutUvarint(b, uint64(len(scratch))) 500 pos += n 501 b = e.propsBuf[pos : pos+len(scratch)] 502 pos += len(scratch) 503 copy(b, scratch) 504 e.propsBuf = e.propsBuf[0:pos] 505 e.scratch = scratch 506 } 507 508 func (e *blockPropertiesEncoder) unsafeProps() []byte { 509 return e.propsBuf 510 } 511 512 func (e *blockPropertiesEncoder) props() []byte { 513 buf := make([]byte, len(e.propsBuf)) 514 copy(buf, e.propsBuf) 515 return buf 516 } 517 518 type blockPropertiesDecoder struct { 519 props []byte 520 } 521 522 func (d *blockPropertiesDecoder) done() bool { 523 return len(d.props) == 0 524 } 525 526 // REQUIRES: !done() 527 func (d *blockPropertiesDecoder) next() (id shortID, prop []byte, err error) { 528 const lenID = 1 529 id = shortID(d.props[0]) 530 propLen, m := binary.Uvarint(d.props[lenID:]) 531 n := lenID + m 532 if m <= 0 || propLen == 0 || (n+int(propLen)) > len(d.props) { 533 return 0, nil, base.CorruptionErrorf("corrupt block property length") 534 } 535 prop = d.props[n : n+int(propLen)] 536 d.props = d.props[n+int(propLen):] 537 return id, prop, nil 538 } 539 540 // BlockPropertiesFilterer provides filtering support when reading an sstable 541 // in the context of an iterator that has a slice of BlockPropertyFilters. 542 // After the call to NewBlockPropertiesFilterer, the caller must call 543 // IntersectsUserPropsAndFinishInit to check if the sstable intersects with 544 // the filters. If it does intersect, this function also finishes initializing 545 // the BlockPropertiesFilterer using the shortIDs for the relevant filters. 546 // Subsequent checks for relevance of a block should use the intersects 547 // method. 548 type BlockPropertiesFilterer struct { 549 filters []BlockPropertyFilter 550 // Maps shortID => index in filters. This can be sparse, and shortIDs for 551 // which there is no filter are represented with an index of -1. The 552 // length of this can be shorter than the shortIDs allocated in the 553 // sstable. e.g. if the sstable used shortIDs 0, 1, 2, 3, and the iterator 554 // has two filters, corresponding to shortIDs 2, 0, this would be: 555 // len(shortIDToFiltersIndex)==3, 0=>1, 1=>-1, 2=>0. 556 shortIDToFiltersIndex []int 557 558 // boundLimitedFilter, if non-nil, holds a single block-property filter with 559 // additional constraints on its filtering. A boundLimitedFilter may only 560 // filter blocks that are wholly contained within its bounds. During forward 561 // iteration the lower bound (and during backward iteration the upper bound) 562 // must be externally guaranteed, with Intersects only returning false if 563 // that bound is met. The opposite bound is verified during iteration by the 564 // sstable iterator. 565 // 566 // boundLimitedFilter is permitted to be defined on a property (`Name()`) 567 // for which another filter exists in filters. In this case both filters 568 // will be consulted, and either filter may exclude block(s). Only a single 569 // bound-limited block-property filter may be set. 570 // 571 // The boundLimitedShortID field contains the shortID of the filter's 572 // property within the sstable. It's set to -1 if the property was not 573 // collected when the table was built. 574 boundLimitedFilter BoundLimitedBlockPropertyFilter 575 boundLimitedShortID int 576 } 577 578 var blockPropertiesFiltererPool = sync.Pool{ 579 New: func() interface{} { 580 return &BlockPropertiesFilterer{} 581 }, 582 } 583 584 // NewBlockPropertiesFilterer returns a partially initialized filterer. To complete 585 // initialization, call IntersectsUserPropsAndFinishInit. 586 func NewBlockPropertiesFilterer( 587 filters []BlockPropertyFilter, limited BoundLimitedBlockPropertyFilter, 588 ) *BlockPropertiesFilterer { 589 filterer := blockPropertiesFiltererPool.Get().(*BlockPropertiesFilterer) 590 *filterer = BlockPropertiesFilterer{ 591 filters: filters, 592 shortIDToFiltersIndex: filterer.shortIDToFiltersIndex[:0], 593 boundLimitedFilter: limited, 594 boundLimitedShortID: -1, 595 } 596 return filterer 597 } 598 599 func releaseBlockPropertiesFilterer(filterer *BlockPropertiesFilterer) { 600 *filterer = BlockPropertiesFilterer{ 601 shortIDToFiltersIndex: filterer.shortIDToFiltersIndex[:0], 602 } 603 blockPropertiesFiltererPool.Put(filterer) 604 } 605 606 // IntersectsUserPropsAndFinishInit is called with the user properties map for 607 // the sstable and returns whether the sstable intersects the filters. It 608 // additionally initializes the shortIDToFiltersIndex for the filters that are 609 // relevant to this sstable. 610 func (f *BlockPropertiesFilterer) IntersectsUserPropsAndFinishInit( 611 userProperties map[string]string, 612 ) (bool, error) { 613 for i := range f.filters { 614 props, ok := userProperties[f.filters[i].Name()] 615 if !ok { 616 // Collector was not used when writing this file, so it is 617 // considered intersecting. 618 continue 619 } 620 byteProps := []byte(props) 621 if len(byteProps) < 1 { 622 return false, base.CorruptionErrorf( 623 "block properties for %s is corrupted", f.filters[i].Name()) 624 } 625 shortID := shortID(byteProps[0]) 626 intersects, err := f.filters[i].Intersects(byteProps[1:]) 627 if err != nil || !intersects { 628 return false, err 629 } 630 // Intersects the sstable, so need to use this filter when 631 // deciding whether to read blocks. 632 n := len(f.shortIDToFiltersIndex) 633 if n <= int(shortID) { 634 if cap(f.shortIDToFiltersIndex) <= int(shortID) { 635 index := make([]int, shortID+1, 2*(shortID+1)) 636 copy(index, f.shortIDToFiltersIndex) 637 f.shortIDToFiltersIndex = index 638 } else { 639 f.shortIDToFiltersIndex = f.shortIDToFiltersIndex[:shortID+1] 640 } 641 for j := n; j < int(shortID); j++ { 642 f.shortIDToFiltersIndex[j] = -1 643 } 644 } 645 f.shortIDToFiltersIndex[shortID] = i 646 } 647 if f.boundLimitedFilter == nil { 648 return true, nil 649 } 650 651 // There's a bound-limited filter. Find its shortID. It's possible that 652 // there's an existing filter in f.filters on the same property. That's 653 // okay. Both filters will be consulted whenever a relevant prop is decoded. 654 props, ok := userProperties[f.boundLimitedFilter.Name()] 655 if !ok { 656 // The collector was not used when writing this file, so it's 657 // intersecting. We leave f.boundLimitedShortID=-1, so the filter will 658 // be unused within this file. 659 return true, nil 660 } 661 byteProps := []byte(props) 662 if len(byteProps) < 1 { 663 return false, base.CorruptionErrorf( 664 "block properties for %s is corrupted", f.boundLimitedFilter.Name()) 665 } 666 f.boundLimitedShortID = int(byteProps[0]) 667 668 // We don't check for table-level intersection for the bound-limited filter. 669 // The bound-limited filter is treated as vacuously intersecting. 670 // 671 // NB: If a block-property filter needs to be toggled inactive/active, it 672 // should be implemented within the Intersects implementation. 673 // 674 // TODO(jackson): We could filter at the table-level by threading the table 675 // smallest and largest bounds here. 676 677 // The bound-limited filter isn't included in shortIDToFiltersIndex. 678 // 679 // When determining intersection, we decode props only up to the shortID 680 // len(shortIDToFiltersIndex). If f.limitedShortID is greater than any of 681 // the existing filters' shortIDs, we need to grow shortIDToFiltersIndex. 682 // Growing the index with -1s ensures we're able to consult the index 683 // without length checks. 684 if n := len(f.shortIDToFiltersIndex); n <= f.boundLimitedShortID { 685 if cap(f.shortIDToFiltersIndex) <= f.boundLimitedShortID { 686 index := make([]int, f.boundLimitedShortID+1) 687 copy(index, f.shortIDToFiltersIndex) 688 f.shortIDToFiltersIndex = index 689 } else { 690 f.shortIDToFiltersIndex = f.shortIDToFiltersIndex[:f.boundLimitedShortID+1] 691 } 692 for j := n; j <= f.boundLimitedShortID; j++ { 693 f.shortIDToFiltersIndex[j] = -1 694 } 695 } 696 return true, nil 697 } 698 699 type intersectsResult int8 700 701 const ( 702 blockIntersects intersectsResult = iota 703 blockExcluded 704 // blockMaybeExcluded is returned by BlockPropertiesFilterer.intersects when 705 // no filters unconditionally exclude the block, but the bound-limited block 706 // property filter will exclude it if the block's bounds fall within the 707 // filter's current bounds. See the reader's 708 // {single,two}LevelIterator.resolveMaybeExcluded methods. 709 blockMaybeExcluded 710 ) 711 712 func (f *BlockPropertiesFilterer) intersects(props []byte) (ret intersectsResult, err error) { 713 i := 0 714 decoder := blockPropertiesDecoder{props: props} 715 ret = blockIntersects 716 for i < len(f.shortIDToFiltersIndex) { 717 var id int 718 var prop []byte 719 if !decoder.done() { 720 var shortID shortID 721 var err error 722 shortID, prop, err = decoder.next() 723 if err != nil { 724 return ret, err 725 } 726 id = int(shortID) 727 } else { 728 id = math.MaxUint8 + 1 729 } 730 for i < len(f.shortIDToFiltersIndex) && id > i { 731 // The property for this id is not encoded for this block, but there 732 // may still be a filter for this id. 733 if intersects, err := f.intersectsFilter(i, nil); err != nil { 734 return ret, err 735 } else if intersects == blockExcluded { 736 return blockExcluded, nil 737 } else if intersects == blockMaybeExcluded { 738 ret = blockMaybeExcluded 739 } 740 i++ 741 } 742 if i >= len(f.shortIDToFiltersIndex) { 743 return ret, nil 744 } 745 // INVARIANT: id <= i. And since i is always incremented by 1, id==i. 746 if id != i { 747 panic(fmt.Sprintf("%d != %d", id, i)) 748 } 749 if intersects, err := f.intersectsFilter(i, prop); err != nil { 750 return ret, err 751 } else if intersects == blockExcluded { 752 return blockExcluded, nil 753 } else if intersects == blockMaybeExcluded { 754 ret = blockMaybeExcluded 755 } 756 i++ 757 } 758 // ret == blockIntersects || ret == blockMaybeExcluded 759 return ret, nil 760 } 761 762 func (f *BlockPropertiesFilterer) intersectsFilter(i int, prop []byte) (intersectsResult, error) { 763 if f.shortIDToFiltersIndex[i] >= 0 { 764 intersects, err := f.filters[f.shortIDToFiltersIndex[i]].Intersects(prop) 765 if err != nil { 766 return blockIntersects, err 767 } 768 if !intersects { 769 return blockExcluded, nil 770 } 771 } 772 if i == f.boundLimitedShortID { 773 // The bound-limited filter uses this id. 774 // 775 // The bound-limited filter only applies within a keyspan interval. We 776 // expect the Intersects call to be cheaper than bounds checks. If 777 // Intersects determines that there is no intersection, we return 778 // `blockMaybeExcluded` if no other bpf unconditionally excludes the 779 // block. 780 intersects, err := f.boundLimitedFilter.Intersects(prop) 781 if err != nil { 782 return blockIntersects, err 783 } else if !intersects { 784 return blockMaybeExcluded, nil 785 } 786 } 787 return blockIntersects, nil 788 }