github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/metamorphic/key_manager.go (about) 1 package metamorphic 2 3 import ( 4 "bytes" 5 "cmp" 6 "fmt" 7 "slices" 8 "strings" 9 10 "github.com/cockroachdb/errors" 11 "github.com/cockroachdb/pebble" 12 "github.com/cockroachdb/pebble/internal/base" 13 "github.com/cockroachdb/pebble/internal/testkeys" 14 "github.com/stretchr/testify/require" 15 ) 16 17 // objKey is a tuple of (objID, key). This struct is used primarily as a map 18 // key for keyManager. Only writer objTags can occur here, i.e., dbTag and 19 // batchTag, since this is used for tracking the keys in a writer. 20 type objKey struct { 21 id objID 22 key []byte 23 } 24 25 // makeObjKey returns a new objKey given and id and key. 26 func makeObjKey(id objID, key []byte) objKey { 27 if id.tag() != dbTag && id.tag() != batchTag { 28 panic(fmt.Sprintf("unexpected non-writer tag %v", id.tag())) 29 } 30 return objKey{id, key} 31 } 32 33 // String implements fmt.Stringer, returning a stable string representation of 34 // the objKey. This string is used as map key. 35 func (o objKey) String() string { 36 return fmt.Sprintf("%s:%s", o.id, o.key) 37 } 38 39 // keyMeta is metadata associated with an (objID, key) pair, where objID is 40 // a writer containing the key. 41 type keyMeta struct { 42 objKey 43 // history provides the history of writer operations applied against this 44 // key on this object. history is always ordered by non-decreasing 45 // metaTimestamp. 46 history keyHistory 47 } 48 49 func (m *keyMeta) clear() { 50 m.history = m.history[:0] 51 } 52 53 // mergeInto merges this metadata into the metadata for other, appending all of 54 // its individual operations to dst at the provided timestamp. 55 func (m *keyMeta) mergeInto(dst *keyMeta, ts int) { 56 for _, op := range m.history { 57 // If the key is being merged into a database object and the operation 58 // is a delete, we can clear the destination history. Database objects 59 // are end points in the merging of keys and won't be the source of a 60 // future merge. Deletions cause all other operations to behave as 61 // though the key was never written to the database at all, so we don't 62 // need to consider it for maintaining single delete invariants. 63 // 64 // NB: There's a subtlety here in that isDelete() will return true if 65 // opType is a writerSingleDelete, but single deletes are capable of 66 // leaking information about the history of writes. However, that's 67 // okay, because as long as we're properly generating single deletes 68 // according to the W1 invariant described in keyManager's comment, a 69 // single delete is equivalent to delete for the current history. 70 if dst.objKey.id.tag() == dbTag && op.opType.isDelete() { 71 dst.clear() 72 continue 73 } 74 dst.history = append(dst.history, keyHistoryItem{ 75 opType: op.opType, 76 metaTimestamp: ts, 77 }) 78 } 79 } 80 81 type bounds struct { 82 smallest []byte 83 largest []byte 84 largestExcl bool // is largest exclusive? 85 } 86 87 func (b *bounds) String() string { 88 if b.largestExcl { 89 return fmt.Sprintf("[%q,%q)", b.smallest, b.largest) 90 } 91 return fmt.Sprintf("[%q,%q]", b.smallest, b.largest) 92 } 93 94 // overlaps returns true iff the bounds intersect. 95 func (b *bounds) overlaps(cmp base.Compare, other *bounds) bool { 96 // Is b strictly before other? 97 if v := cmp(b.largest, other.smallest); v < 0 || (v == 0 && b.largestExcl) { 98 return false 99 } 100 // Is b strictly after other? 101 if v := cmp(b.smallest, other.largest); v > 0 || (v == 0 && other.largestExcl) { 102 return false 103 } 104 return true 105 } 106 107 // mergeInto merges the receiver bounds into other, mutating other. 108 func (b bounds) mergeInto(cmp base.Compare, other *bounds) { 109 if cmp(other.smallest, b.smallest) > 0 { 110 other.smallest = b.smallest 111 } 112 if v := cmp(other.largest, b.largest); v < 0 || (v == 0 && other.largestExcl) { 113 other.largest = b.largest 114 other.largestExcl = b.largestExcl 115 } 116 } 117 118 // keyManager tracks the write operations performed on keys in the generation 119 // phase of the metamorphic test. It maintains histories of operations performed 120 // against every unique user key on every writer object. These histories inform 121 // operation generation in order to maintain invariants that Pebble requires of 122 // end users, mostly around single deletions. 123 // 124 // A single deletion has a subtle requirement of the writer: 125 // 126 // W1: The writer may only single delete a key `k` if `k` has been Set once 127 // (and never MergeD) since the last delete. 128 // 129 // When a SINGLEDEL key deletes a SET key within a compaction, both the SET and 130 // the SINGLEDEL keys are elided. If multiple SETs of the key exist within the 131 // LSM, the SINGLEDEL reveals the lower SET. This behavior is dependent on the 132 // internal LSM state and nondeterministic. To ensure determinism, the end user 133 // must satisfy W1 and use single delete only when they can guarantee that the 134 // key has been set at most once since the last delete, preventing this rollback 135 // to a previous value. 136 // 137 // This W1 invariant requires a delicate dance during operation generation, 138 // because independent batches may be independently built and committed. With 139 // multi-instance variants of the metamorphic tests, keys in batches may 140 // ultimately be committed to any of several DB instances. To satisfy these 141 // requirements, the key manager tracks the history of every key on every 142 // writable object. When generating a new single deletion operation, the 143 // generator asks the key manager for a set of keys for which a single delete 144 // maintains the W1 invariant within the object itself. This object-local W1 145 // invariant (OLW1) is equivalent to W1 if one only ever performs write 146 // operations directly against individual DB objects. 147 // 148 // However with the existence of batches that receive writes independent of DB 149 // objects, W1 may be violated by appending the histories of two objects that 150 // independently satisfy OLW1. Consider a sequence such as: 151 // 152 // 1. db1.Set("foo") 153 // 2. batch1.Set("foo") 154 // 3. batch1.SingleDelete("foo") 155 // 4. db1.Apply(batch1) 156 // 157 // Both db1 and batch1 satisfy the object-local invariant OLW1. However the 158 // composition of the histories created by appending batch1's operations to db1 159 // creates a history that now violates W1 on db1. To detect this violation, 160 // batch applications/commits and ingestions examine the tail of the destination 161 // object's history and the head of the source batch's history. When a violation 162 // is detected, these operations insert additional Delete operations to clear 163 // the conflicting keys before proceeding with the conflicting operation. These 164 // deletes reset the key history. 165 // 166 // Note that this generation-time key tracking requires that operations be 167 // infallible, because a runtime failure would cause the key manager's state to 168 // diverge from the runtime object state. Ingestion operations pose an obstacle, 169 // because the generator may generate ingestions that fail due to overlapping 170 // sstables. Today, this complication is sidestepped by avoiding ingestion of 171 // multiple batches containing deletes or single deletes since loss of those 172 // specific operations on a key are what we cannot tolerate (doing SingleDelete 173 // on a key that has not been written to because the Set was lost is harmless). 174 // 175 // TODO(jackson): Instead, compute smallest and largest bounds of batches so 176 // that we know at generation-time whether or not an ingestion operation will 177 // fail and can avoid updating key state. 178 type keyManager struct { 179 comparer *base.Comparer 180 181 // metaTimestamp is used to provide a ordering over certain operations like 182 // iter creation, updates to keys. Keeping track of the timestamp allows us 183 // to make determinations such as whether a key will be visible to an 184 // iterator. 185 metaTimestamp int 186 187 // byObjKey tracks the state for each (writer, key) pair. It refers to the 188 // same *keyMeta as in the byObj slices. Using a map allows for fast state 189 // lookups when changing the state based on a writer operation on the key. 190 byObjKey map[string]*keyMeta 191 // List of keys per writer, and what has happened to it in that writer. 192 // Will be transferred when needed. 193 byObj map[objID][]*keyMeta 194 // boundsByObj holds user key bounds encompassing all the keys set within an 195 // object. It's updated within `update` when a new op is generated. It's 196 // used when determining whether an ingestion should succeed or not. 197 boundsByObj map[objID]*bounds 198 199 // globalKeys represents all the keys that have been generated so far. Not 200 // all these keys have been written to. globalKeys is sorted. 201 globalKeys [][]byte 202 // globalKeysMap contains the same keys as globalKeys but in a map. It 203 // ensures no duplication. 204 globalKeysMap map[string]bool 205 // globalKeyPrefixes contains all the key prefixes (as defined by the 206 // comparer's Split) generated so far. globalKeyPrefixes is sorted. 207 globalKeyPrefixes [][]byte 208 // globalKeyPrefixesMap contains the same keys as globalKeyPrefixes. It 209 // ensures no duplication. 210 globalKeyPrefixesMap map[string]struct{} 211 } 212 213 func (k *keyManager) nextMetaTimestamp() int { 214 ret := k.metaTimestamp 215 k.metaTimestamp++ 216 return ret 217 } 218 219 // newKeyManager returns a pointer to a new keyManager. Callers should 220 // interact with this using addNewKey, knownKeys, update methods only. 221 func newKeyManager(numInstances int) *keyManager { 222 m := &keyManager{ 223 comparer: testkeys.Comparer, 224 byObjKey: make(map[string]*keyMeta), 225 byObj: make(map[objID][]*keyMeta), 226 boundsByObj: make(map[objID]*bounds), 227 globalKeysMap: make(map[string]bool), 228 globalKeyPrefixesMap: make(map[string]struct{}), 229 } 230 for i := 1; i <= max(numInstances, 1); i++ { 231 m.byObj[makeObjID(dbTag, uint32(i))] = []*keyMeta{} 232 } 233 return m 234 } 235 236 // addNewKey adds the given key to the key manager for global key tracking. 237 // Returns false iff this is not a new key. 238 func (k *keyManager) addNewKey(key []byte) bool { 239 if k.globalKeysMap[string(key)] { 240 return false 241 } 242 insertSorted(k.comparer.Compare, &k.globalKeys, key) 243 k.globalKeysMap[string(key)] = true 244 245 prefixLen := k.comparer.Split(key) 246 if _, ok := k.globalKeyPrefixesMap[string(key[:prefixLen])]; !ok { 247 insertSorted(k.comparer.Compare, &k.globalKeyPrefixes, key[:prefixLen]) 248 k.globalKeyPrefixesMap[string(key[:prefixLen])] = struct{}{} 249 } 250 return true 251 } 252 253 // getOrInit returns the keyMeta for the (objID, key) pair, if it exists, else 254 // allocates, initializes and returns a new value. 255 func (k *keyManager) getOrInit(id objID, key []byte) *keyMeta { 256 o := makeObjKey(id, key) 257 m, ok := k.byObjKey[o.String()] 258 if ok { 259 return m 260 } 261 m = &keyMeta{objKey: makeObjKey(id, key)} 262 // Initialize the key-to-meta index. 263 k.byObjKey[o.String()] = m 264 // Add to the id-to-metas slide. 265 k.byObj[o.id] = append(k.byObj[o.id], m) 266 267 // Expand the object's bounds to contain this key if they don't already. 268 k.expandBounds(id, bounds{ 269 smallest: key, 270 largest: key, 271 }) 272 return m 273 } 274 275 // mergeKeysInto merges all metadata for all keys associated with the "from" ID 276 // with the metadata for keys associated with the "to" ID. 277 func (k *keyManager) mergeKeysInto(from, to objID, mergeFunc func(src, dst *keyMeta, ts int)) { 278 msFrom, ok := k.byObj[from] 279 if !ok { 280 msFrom = []*keyMeta{} 281 k.byObj[from] = msFrom 282 } 283 msTo, ok := k.byObj[to] 284 if !ok { 285 msTo = []*keyMeta{} 286 k.byObj[to] = msTo 287 } 288 289 // Sort to facilitate a merge. 290 slices.SortFunc(msFrom, func(a, b *keyMeta) int { 291 return bytes.Compare(a.key, b.key) 292 }) 293 slices.SortFunc(msTo, func(a, b *keyMeta) int { 294 return bytes.Compare(a.key, b.key) 295 }) 296 297 ts := k.nextMetaTimestamp() 298 var msNew []*keyMeta 299 var iTo int 300 for _, m := range msFrom { 301 // Move cursor on mTo forward. 302 for iTo < len(msTo) && bytes.Compare(msTo[iTo].key, m.key) < 0 { 303 msNew = append(msNew, msTo[iTo]) 304 iTo++ 305 } 306 307 var mTo *keyMeta 308 if iTo < len(msTo) && bytes.Equal(msTo[iTo].key, m.key) { 309 mTo = msTo[iTo] 310 iTo++ 311 } else { 312 mTo = &keyMeta{objKey: makeObjKey(to, m.key)} 313 k.byObjKey[mTo.String()] = mTo 314 } 315 316 mergeFunc(m, mTo, ts) 317 msNew = append(msNew, mTo) 318 319 delete(k.byObjKey, m.String()) // Unlink "from". 320 } 321 322 // Add any remaining items from the "to" set. 323 for iTo < len(msTo) { 324 msNew = append(msNew, msTo[iTo]) 325 iTo++ 326 } 327 328 // All the keys in `from` have been merged into `to`. Expand `to`'s bounds 329 // to be at least as wide as `from`'s. 330 if fromBounds := k.boundsByObj[from]; fromBounds != nil { 331 k.expandBounds(to, *fromBounds) 332 } 333 k.byObj[to] = msNew // Update "to" obj. 334 delete(k.byObj, from) // Unlink "from" obj. 335 delete(k.boundsByObj, from) // Unlink "from" bounds. 336 } 337 338 // expandBounds expands the incrementally maintained bounds of o to be at least 339 // as wide as `b`. 340 func (k *keyManager) expandBounds(o objID, b bounds) { 341 existing, ok := k.boundsByObj[o] 342 if !ok { 343 existing = new(bounds) 344 *existing = b 345 k.boundsByObj[o] = existing 346 return 347 } 348 b.mergeInto(k.comparer.Compare, existing) 349 } 350 351 // doObjectBoundsOverlap returns true iff any of the named objects have key 352 // bounds that overlap any other named object. 353 func (k *keyManager) doObjectBoundsOverlap(objIDs []objID) bool { 354 for i := range objIDs { 355 ib, iok := k.boundsByObj[objIDs[i]] 356 if !iok { 357 continue 358 } 359 for j := i + 1; j < len(objIDs); j++ { 360 jb, jok := k.boundsByObj[objIDs[j]] 361 if !jok { 362 continue 363 } 364 if ib.overlaps(k.comparer.Compare, jb) { 365 return true 366 } 367 } 368 } 369 return false 370 } 371 372 // checkForSingleDelConflicts examines all the keys written to srcObj, and 373 // determines whether any of the contained single deletes would be 374 // nondeterministic if applied to dstObj in dstObj's current state. It returns a 375 // slice of all the keys that are found to conflict. In order to preserve 376 // determinism, the caller must delete the key from the destination before 377 // writing src's mutations to dst in order to ensure determinism. 378 // 379 // It takes a `srcCollapsed` parameter that determines whether the source 380 // history should be "collapsed" (see keyHistory.collapsed) before determining 381 // whether the applied state will conflict. This is required to facilitate 382 // ingestOps which are NOT equivalent to committing the batch, because they can 383 // only commit 1 internal point key at each unique user key. 384 func (k *keyManager) checkForSingleDelConflicts(srcObj, dstObj objID, srcCollapsed bool) [][]byte { 385 var conflicts [][]byte 386 for _, src := range k.byObj[srcObj] { 387 // Single delete generation logic already ensures that both srcObj and 388 // dstObj's single deletes are deterministic within the context of their 389 // existing writes. However, applying srcObj on top of dstObj may 390 // violate the invariants. Consider: 391 // 392 // src: a.SET; a.SINGLEDEL; 393 // dst: a.SET; 394 // 395 // The merged view is: 396 // 397 // a.SET; a.SET; a.SINGLEDEL; 398 // 399 // This is invalid, because there is more than 1 value mutation of the 400 // key before the single delete. 401 // 402 // We walk the source object's history in chronological order, looking 403 // for a single delete that was written before a DEL/RANGEDEL. (NB: We 404 // don't need to look beyond a DEL/RANGEDEL, because these deletes bound 405 // any subsequently-written single deletes to applying to the keys 406 // within src's history between the two tombstones. We already know from 407 // per-object history invariants that any such single delete must be 408 // deterministic with respect to src's keys.) 409 var srcHasUnboundedSingleDelete bool 410 var srcValuesBeforeSingleDelete int 411 412 // When the srcObj is being ingested (srcCollapsed=t), the semantics 413 // change. We must first "collapse" the key's history to represent the 414 // ingestion semantics. 415 srcHistory := src.history 416 if srcCollapsed { 417 srcHistory = src.history.collapsed() 418 } 419 420 srcloop: 421 for _, item := range srcHistory { 422 switch item.opType { 423 case writerDelete, writerDeleteRange: 424 // We found a DEL or RANGEDEL before any single delete. If src 425 // contains additional single deletes, their effects are limited 426 // to applying to later keys. Combining the two object histories 427 // doesn't pose any determinism risk. 428 break srcloop 429 case writerSingleDelete: 430 // We found a single delete. Since we found this single delete 431 // before a DEL or RANGEDEL, this delete has the potential to 432 // affect the visibility of keys in `dstObj`. We'll need to look 433 // for potential conflicts down below. 434 srcHasUnboundedSingleDelete = true 435 if srcValuesBeforeSingleDelete > 1 { 436 panic(errors.AssertionFailedf("unexpectedly found %d sets/merges within %s before single del", 437 srcValuesBeforeSingleDelete, srcObj)) 438 } 439 break srcloop 440 case writerSet, writerMerge: 441 // We found a SET or MERGE operation for this key. If there's a 442 // subsequent single delete, we'll need to make sure there's not 443 // a SET or MERGE in the dst too. 444 srcValuesBeforeSingleDelete++ 445 default: 446 panic(errors.AssertionFailedf("unexpected optype %d", item.opType)) 447 } 448 } 449 if !srcHasUnboundedSingleDelete { 450 continue 451 } 452 453 dst, ok := k.byObjKey[makeObjKey(dstObj, src.key).String()] 454 // If the destination writer has no record of the key, the combined key 455 // history is simply the src object's key history which is valid due to 456 // per-object single deletion invariants. 457 if !ok { 458 continue 459 } 460 461 // We need to examine the trailing key history on dst. 462 consecutiveValues := srcValuesBeforeSingleDelete 463 dstloop: 464 for i := len(dst.history) - 1; i >= 0; i-- { 465 switch dst.history[i].opType { 466 case writerSet, writerMerge: 467 // A SET/MERGE may conflict if there's more than 1 consecutive 468 // SET/MERGEs. 469 consecutiveValues++ 470 if consecutiveValues > 1 { 471 conflicts = append(conflicts, src.key) 472 break dstloop 473 } 474 case writerDelete, writerSingleDelete, writerDeleteRange: 475 // Dels clear the history, enabling use of single delete. 476 break dstloop 477 default: 478 panic(errors.AssertionFailedf("unexpected optype %d", dst.history[i].opType)) 479 } 480 } 481 } 482 return conflicts 483 } 484 485 // update updates the internal state of the keyManager according to the given 486 // op. 487 func (k *keyManager) update(o op) { 488 switch s := o.(type) { 489 case *setOp: 490 meta := k.getOrInit(s.writerID, s.key) 491 meta.history = append(meta.history, keyHistoryItem{ 492 opType: writerSet, 493 metaTimestamp: k.nextMetaTimestamp(), 494 }) 495 case *mergeOp: 496 meta := k.getOrInit(s.writerID, s.key) 497 meta.history = append(meta.history, keyHistoryItem{ 498 opType: writerMerge, 499 metaTimestamp: k.nextMetaTimestamp(), 500 }) 501 case *deleteOp: 502 meta := k.getOrInit(s.writerID, s.key) 503 if meta.objKey.id.tag() == dbTag { 504 meta.clear() 505 } else { 506 meta.history = append(meta.history, keyHistoryItem{ 507 opType: writerDelete, 508 metaTimestamp: k.nextMetaTimestamp(), 509 }) 510 } 511 case *deleteRangeOp: 512 // We track the history of discrete point keys, but a range deletion 513 // applies over a continuous key span of infinite keys. However, the key 514 // manager knows all keys that have been used in all operations, so we 515 // can discretize the range tombstone by adding it to every known key 516 // within the range. 517 ts := k.nextMetaTimestamp() 518 keyRange := pebble.KeyRange{Start: s.start, End: s.end} 519 for _, key := range k.knownKeysInRange(keyRange) { 520 meta := k.getOrInit(s.writerID, key) 521 if meta.objKey.id.tag() == dbTag { 522 meta.clear() 523 } else { 524 meta.history = append(meta.history, keyHistoryItem{ 525 opType: writerDeleteRange, 526 metaTimestamp: ts, 527 }) 528 } 529 } 530 k.expandBounds(s.writerID, bounds{ 531 smallest: s.start, 532 largest: s.end, 533 largestExcl: true, 534 }) 535 case *singleDeleteOp: 536 meta := k.getOrInit(s.writerID, s.key) 537 meta.history = append(meta.history, keyHistoryItem{ 538 opType: writerSingleDelete, 539 metaTimestamp: k.nextMetaTimestamp(), 540 }) 541 542 case *ingestOp: 543 // Some ingestion operations may attempt to ingest overlapping sstables 544 // which is prohibited. We know at generation time whether these 545 // ingestions will be successful. If they won't be successful, we should 546 // not update the key state because both the batch(es) and target DB 547 // will be left unmodified. 548 if k.doObjectBoundsOverlap(s.batchIDs) { 549 // This ingestion will fail. 550 return 551 } 552 553 // For each batch, merge the keys into the DB. We can't call 554 // keyMeta.mergeInto directly to merge, because ingest operations first 555 // "flatten" the batch (because you can't set the same key twice at a 556 // single sequence number). Instead we compute the collapsed history and 557 // merge that. 558 for _, batchID := range s.batchIDs { 559 k.mergeKeysInto(batchID, s.dbID, func(src, dst *keyMeta, ts int) { 560 collapsedSrc := keyMeta{ 561 objKey: src.objKey, 562 history: src.history.collapsed(), 563 } 564 collapsedSrc.mergeInto(dst, ts) 565 }) 566 } 567 // TODO(bilal): Handle ingestAndExciseOp and replicateOp here. 568 case *applyOp: 569 // Merge the keys from this writer into the parent writer. 570 k.mergeKeysInto(s.batchID, s.writerID, (*keyMeta).mergeInto) 571 case *batchCommitOp: 572 // Merge the keys from the batch with the keys from the DB. 573 k.mergeKeysInto(s.batchID, s.dbID, (*keyMeta).mergeInto) 574 } 575 } 576 577 func (k *keyManager) knownKeys() (keys [][]byte) { 578 return k.globalKeys 579 } 580 581 // knownKeysInRange returns all eligible read keys within the range 582 // [start,end). The returned slice is owned by the keyManager and must not be 583 // retained. 584 func (k *keyManager) knownKeysInRange(kr pebble.KeyRange) (keys [][]byte) { 585 s, _ := slices.BinarySearchFunc(k.globalKeys, kr.Start, k.comparer.Compare) 586 e, _ := slices.BinarySearchFunc(k.globalKeys, kr.End, k.comparer.Compare) 587 if s >= e { 588 return nil 589 } 590 return k.globalKeys[s:e] 591 } 592 593 func (k *keyManager) prefixes() (prefixes [][]byte) { 594 return k.globalKeyPrefixes 595 } 596 597 // prefixExists returns true if a key has been generated with the provided 598 // prefix before. 599 func (k *keyManager) prefixExists(prefix []byte) bool { 600 _, exists := k.globalKeyPrefixesMap[string(prefix)] 601 return exists 602 } 603 604 // eligibleSingleDeleteKeys returns a slice of keys that can be safely single 605 // deleted, given the writer id. Restricting single delete keys through this 606 // method is used to ensure the OLW1 guarantee (see the keyManager comment) for 607 // the provided object ID. 608 func (k *keyManager) eligibleSingleDeleteKeys(o objID) (keys [][]byte) { 609 // Creating a slice of keys is wasteful given that the caller will pick one, 610 // but makes it simpler for unit testing. 611 for _, key := range k.globalKeys { 612 objKey := makeObjKey(o, key) 613 meta, ok := k.byObjKey[objKey.String()] 614 if !ok { 615 keys = append(keys, key) 616 continue 617 } 618 // Examine the history within this object. 619 if meta.history.canSingleDelete() { 620 keys = append(keys, key) 621 } 622 } 623 return keys 624 } 625 626 // a keyHistoryItem describes an individual operation performed on a key. 627 type keyHistoryItem struct { 628 // opType may be writerSet, writerDelete, writerSingleDelete, 629 // writerDeleteRange or writerMerge only. No other opTypes may appear here. 630 opType opType 631 metaTimestamp int 632 } 633 634 // keyHistory captures the history of mutations to a key in chronological order. 635 type keyHistory []keyHistoryItem 636 637 // before returns the subslice of the key history that happened strictly before 638 // the provided meta timestamp. 639 func (h keyHistory) before(metaTimestamp int) keyHistory { 640 i, _ := slices.BinarySearchFunc(h, metaTimestamp, func(a keyHistoryItem, ts int) int { 641 return cmp.Compare(a.metaTimestamp, ts) 642 }) 643 return h[:i] 644 } 645 646 // canSingleDelete examines the tail of the history and returns true if a single 647 // delete appended to this history would satisfy the single delete invariants. 648 func (h keyHistory) canSingleDelete() bool { 649 if len(h) == 0 { 650 return true 651 } 652 switch o := h[len(h)-1].opType; o { 653 case writerDelete, writerDeleteRange, writerSingleDelete: 654 return true 655 case writerSet, writerMerge: 656 if len(h) == 1 { 657 return true 658 } 659 return h[len(h)-2].opType.isDelete() 660 default: 661 panic(errors.AssertionFailedf("unexpected writer op %v", o)) 662 } 663 } 664 665 func (h keyHistory) String() string { 666 var sb strings.Builder 667 for i, it := range h { 668 if i > 0 { 669 fmt.Fprint(&sb, ", ") 670 } 671 switch it.opType { 672 case writerDelete: 673 fmt.Fprint(&sb, "del") 674 case writerDeleteRange: 675 fmt.Fprint(&sb, "delrange") 676 case writerSingleDelete: 677 fmt.Fprint(&sb, "singledel") 678 case writerSet: 679 fmt.Fprint(&sb, "set") 680 case writerMerge: 681 fmt.Fprint(&sb, "merge") 682 default: 683 fmt.Fprintf(&sb, "optype[v=%d]", it.opType) 684 } 685 fmt.Fprintf(&sb, "(%d)", it.metaTimestamp) 686 } 687 return sb.String() 688 } 689 690 // hasVisibleKey examines the tail of the history and returns true if the 691 // history should end in a visible value for this key. 692 func (h keyHistory) hasVisibleValue() bool { 693 if len(h) == 0 { 694 return false 695 } 696 return !h[len(h)-1].opType.isDelete() 697 } 698 699 // collapsed returns a new key history that's equivalent to the history created 700 // by an ingestOp that "collapses" a batch's keys. See ingestOp.build. 701 func (h keyHistory) collapsed() keyHistory { 702 var ret keyHistory 703 // When collapsing a batch, any range deletes are semantically applied 704 // first. Look for any range deletes and apply them. 705 for _, op := range h { 706 if op.opType == writerDeleteRange { 707 ret = append(ret, op) 708 break 709 } 710 } 711 // Among point keys, the most recently written key wins. 712 for i := len(h) - 1; i >= 0; i-- { 713 if h[i].opType != writerDeleteRange { 714 ret = append(ret, h[i]) 715 break 716 } 717 } 718 return ret 719 } 720 721 func opWrittenKeys(untypedOp op) [][]byte { 722 switch t := untypedOp.(type) { 723 case *applyOp: 724 case *batchCommitOp: 725 case *checkpointOp: 726 case *closeOp: 727 case *compactOp: 728 case *dbRestartOp: 729 case *deleteOp: 730 return [][]byte{t.key} 731 case *deleteRangeOp: 732 return [][]byte{t.start, t.end} 733 case *flushOp: 734 case *getOp: 735 case *ingestOp: 736 case *initOp: 737 case *iterFirstOp: 738 case *iterLastOp: 739 case *iterNextOp: 740 case *iterNextPrefixOp: 741 case *iterCanSingleDelOp: 742 case *iterPrevOp: 743 case *iterSeekGEOp: 744 case *iterSeekLTOp: 745 case *iterSeekPrefixGEOp: 746 case *iterSetBoundsOp: 747 case *iterSetOptionsOp: 748 case *mergeOp: 749 return [][]byte{t.key} 750 case *newBatchOp: 751 case *newIndexedBatchOp: 752 case *newIterOp: 753 case *newIterUsingCloneOp: 754 case *newSnapshotOp: 755 case *rangeKeyDeleteOp: 756 case *rangeKeySetOp: 757 case *rangeKeyUnsetOp: 758 case *setOp: 759 return [][]byte{t.key} 760 case *singleDeleteOp: 761 return [][]byte{t.key} 762 case *replicateOp: 763 return [][]byte{t.start, t.end} 764 } 765 return nil 766 } 767 768 func loadPrecedingKeys(t TestingT, ops []op, cfg *config, m *keyManager) { 769 for _, op := range ops { 770 // Pretend we're generating all the operation's keys as potential new 771 // key, so that we update the key manager's keys and prefix sets. 772 for _, k := range opWrittenKeys(op) { 773 m.addNewKey(k) 774 775 // If the key has a suffix, ratchet up the suffix distribution if 776 // necessary. 777 if s := m.comparer.Split(k); s < len(k) { 778 suffix, err := testkeys.ParseSuffix(k[s:]) 779 require.NoError(t, err) 780 if uint64(suffix) > cfg.writeSuffixDist.Max() { 781 diff := int(uint64(suffix) - cfg.writeSuffixDist.Max()) 782 cfg.writeSuffixDist.IncMax(diff) 783 } 784 } 785 } 786 787 // Update key tracking state. 788 m.update(op) 789 } 790 } 791 792 func insertSorted(cmp base.Compare, dst *[][]byte, k []byte) { 793 s := *dst 794 i, _ := slices.BinarySearchFunc(s, k, cmp) 795 *dst = slices.Insert(s, i, k) 796 }