github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/testdata/iter_histories/prefix_iteration (about)

     1  # Regression test for a bug discovered in #1878.
     2  # A lazy-combined iterator triggers combined iteration during an initial
     3  # seek-prefix-ge call. The initial seek-prefix-ge call avoids defragmenting
     4  # fragments beyond the initial fragment [c,f). A subsequent seek-ge that seeks
     5  # within the bounds of the initial fragment [c,f) must not fall into the
     6  # optimization that reuses the span without reseeking the keypsan iterator,
     7  # because the span is not defragmented.
     8  #
     9  # In the bug surfaced by #1878, the initial seek-prefix-ge that switched to
    10  # combined iteration failed to record that the iterator was now in prefix mode,
    11  # allowing the subsequent seek-ge to incorrectly reuse the existing span.
    12  
    13  reset
    14  ----
    15  
    16  batch commit
    17  range-key-set a c @5 foo
    18  ----
    19  committed 1 keys
    20  
    21  flush
    22  ----
    23  
    24  batch commit
    25  range-key-set c f @5 foo
    26  ----
    27  committed 1 keys
    28  
    29  flush
    30  ----
    31  
    32  batch commit
    33  range-key-set f m @5 foo
    34  ----
    35  committed 1 keys
    36  
    37  flush
    38  ----
    39  
    40  lsm
    41  ----
    42  0.0:
    43    000005:[a#10,RANGEKEYSET-c#inf,RANGEKEYSET]
    44    000007:[c#11,RANGEKEYSET-f#inf,RANGEKEYSET]
    45    000009:[f#12,RANGEKEYSET-m#inf,RANGEKEYSET]
    46  
    47  combined-iter
    48  seek-prefix-ge d@5
    49  seek-ge d
    50  ----
    51  d@5: (., [d-"d\x00") @5=foo UPDATED)
    52  d: (., [a-m) @5=foo UPDATED)
    53  
    54  # Test that repeated SeekPrefixGEs correctly return truncated spans with
    55  # RangeKeyChanged() -> UPDATED.
    56  
    57  combined-iter
    58  seek-prefix-ge c@5
    59  seek-prefix-ge d@5
    60  seek-ge d@7
    61  seek-prefix-ge d@7
    62  ----
    63  c@5: (., [c-"c\x00") @5=foo UPDATED)
    64  d@5: (., [d-"d\x00") @5=foo UPDATED)
    65  d@7: (., [a-m) @5=foo UPDATED)
    66  d@7: (., [d-"d\x00") @5=foo UPDATED)
    67  
    68  # Test a LSM with range keys fragmented within a prefix.
    69  # This is a regression test for cockroachdb/cockroach#86102.
    70  
    71  reset target-file-size=1
    72  ----
    73  
    74  batch commit
    75  range-key-set a c @1 bar
    76  range-key-set c e @1 foo
    77  set c@9 c@9
    78  set c@8 c@8
    79  set c@7 c@7
    80  set c@6 c@6
    81  set c@5 c@5
    82  set c@4 c@4
    83  set c@3 c@3
    84  set c@2 c@2
    85  set d@0 d@0
    86  range-key-set y z @1 foo
    87  set z z
    88  ----
    89  committed 13 keys
    90  
    91  flush
    92  ----
    93  
    94  lsm
    95  ----
    96  0.0:
    97    000005:[a#10,RANGEKEYSET-c@8#inf,RANGEKEYSET]
    98    000006:[c@8#13,SET-c@7#inf,RANGEKEYSET]
    99    000007:[c@7#14,SET-c@6#inf,RANGEKEYSET]
   100    000008:[c@6#15,SET-c@5#inf,RANGEKEYSET]
   101    000009:[c@5#16,SET-c@4#inf,RANGEKEYSET]
   102    000010:[c@4#17,SET-c@3#inf,RANGEKEYSET]
   103    000011:[c@3#18,SET-c@2#inf,RANGEKEYSET]
   104    000012:[c@2#19,SET-d@0#inf,RANGEKEYSET]
   105    000013:[d@0#20,SET-e#inf,RANGEKEYSET]
   106    000014:[y#21,RANGEKEYSET-z#22,SET]
   107  
   108  # The first seek-prefix-ge y@1 converts the iterator from lazy combined iterator
   109  # to combined iteration.
   110  #
   111  # The second seek-prefix-ge d@1 does not fully defragment the range key. The
   112  # underlying range key is defragmented to [c@2,e). This incomplete
   113  # defragmentation is still hidden from the user at this point, since the range
   114  # key is truncated to [d,d\x00).
   115  #
   116  # The third seek-prefix-ge c@0 seeks to a key that falls within the
   117  # range key currently defragmented on interleaving iterator. A previous bug
   118  # would use this span without defragmenting the span to include the full
   119  # span of the prefix [c,c\x00).
   120  
   121  combined-iter
   122  seek-prefix-ge y@1
   123  seek-prefix-ge d@1
   124  seek-prefix-ge c@0
   125  ----
   126  y@1: (., [y-"y\x00") @1=foo UPDATED)
   127  d@1: (., [d-"d\x00") @1=foo UPDATED)
   128  c@0: (., [c-"c\x00") @1=foo UPDATED)
   129  
   130  # Test a LSM with range keys fragmented within a prefix.
   131  # This is a regression test for cockroachdb/cockroach#86102.
   132  
   133  reset
   134  ----
   135  
   136  ingest ext1
   137  range-key-set a c@8 @1 bar
   138  set c@9 c@9
   139  ----
   140  
   141  ingest ext2
   142  range-key-set c@8 e @1 bar
   143  set c@8 c@8
   144  set c@7 c@7
   145  set c@6 c@6
   146  set c@5 c@5
   147  set c@4 c@4
   148  set c@3 c@3
   149  set c@2 c@2
   150  ----
   151  
   152  ingest ext2
   153  range-key-set y z @1 foo
   154  set z z
   155  ----
   156  
   157  lsm
   158  ----
   159  6:
   160    000004:[a#10,RANGEKEYSET-c@8#inf,RANGEKEYSET]
   161    000005:[c@8#11,RANGEKEYSET-e#inf,RANGEKEYSET]
   162    000006:[y#12,RANGEKEYSET-z#12,SET]
   163  
   164  
   165  # The first seek-prefix-ge y@1 converts the iterator from lazy combined iterator
   166  # to combined iteration.
   167  #
   168  # The second seek-prefix-ge d@1 does not fully defragment the range key. The
   169  # underlying range key is defragmented to [a,c@8). This incomplete
   170  # defragmentation is still hidden from the user at this point, since the range
   171  # key is truncated to [a,a\x00).
   172  #
   173  # The third seek-prefix-ge c@10 seeks to a key that falls within the
   174  # range key currently defragmented on interleaving iterator. A previous bug
   175  # would use this span without defragmenting the span to include the full
   176  # span of the prefix [c,c\x00).
   177  
   178  combined-iter
   179  seek-prefix-ge y@1
   180  seek-prefix-ge a@1
   181  seek-prefix-ge c@10
   182  ----
   183  y@1: (., [y-"y\x00") @1=foo UPDATED)
   184  a@1: (., [a-"a\x00") @1=bar UPDATED)
   185  c@10: (., [c-"c\x00") @1=bar UPDATED)
   186  
   187  # Regression test for an invariant violation in the range key defragmenting
   188  # iterator during prefix iteration. [Related to #1893]. There is a lot of
   189  # subtlety here. Do not modify this test case without verifying that it still
   190  # exercises the right conditions.
   191  #
   192  # Normally during forward iteration, if a switch to lazy-combined iteration is
   193  # triggered, the lazy-combined iterator establishes a seek key for the range key
   194  # iterator such that the seek key is:
   195  #   1. greater than or equal to the key at previous iterator position.
   196  #   2. less than or equal to the first range key with a start key greater than
   197  #       or equal to the previous iterator position.
   198  # These invariants are important so that the range key iterator is positioned
   199  # appropriately after the switch to combined iteration and no range keys are
   200  # missed.
   201  #
   202  # Parts of the iterator stack depend on the above invariants. For example,
   203  # during forward iteration the BoundedIter only checks span start keys against
   204  # iterator bounds and the configured prefix, with the expectation that the seek
   205  # is always already greater than or equal to the lower bound. In turn, the
   206  # DefragmentingIter indirectly relies on the same invariant, because it requires
   207  # a consistent view of the fragments. If the BoundedIter returns a span in one
   208  # direction, but skips it when iterating back, the defragmenting iterator will
   209  # end up on a different fragment.
   210  #
   211  # This test exercises a case in which previously, during prefix iteration, it
   212  # was possible for the switch to lazy-combined iteration to trigger using a seek
   213  # key k, such that there exist range key fragments between the current iterator
   214  # position and k (violating the 2nd invariant up above).
   215  #
   216  # The sequence of events is:
   217  #   1. SeekPrefixGE("b@9") = 'b@4':
   218  #      a. This seek positions the two levels, L0 and L6. The L0 iterator seeks
   219  #         to file 000006. This file does not contain any keys with the prefix
   220  #         "b", and the bloom filter must succeed in excluding the file. Since the
   221  #         file contains a range deletion, SeekPrefixGE returns the level's
   222  #         largest point key (`d#inf,RANGEDEL`) to ensure the file stays open until
   223  #         the iterator advances past the range deletion.
   224  #      b. In L6, the level iterator seeks to 000004 which contains a key with
   225  #         the prefix, returning 'b@4'.
   226  #   2. Next():
   227  #      a. Next advances the the L6 iterator to file 000005. This file contains a
   228  #         range key [e,f)@1=bar, which updates the lazy-combined iterator's
   229  #         state, recording the earliest observed range key as 'e'. The L6 level
   230  #         iterator then returns the file single point key 'c'.
   231  #      b. The merging iterator checks whether point key 'c' is deleted by any
   232  #         range key deletions. It is. It's deleted by L0's [c,d) range deletion.
   233  #         The merging iterator then seeks the iterator to the tombstone's end
   234  #         key 'd'.
   235  #      c. After seeking, the range deletion sentinel d is at the top of the
   236  #         heap. At this point, the merging iterator checks whether the keyspace
   237  #         of the prefix has been exceeded, and it has. It returns nil.
   238  #   3. Switch to combined iteration:
   239  #      a. The Next has completed and triggered combined iteration. The only file
   240  #         containing range keys that was observed was 000005, containing the
   241  #         range key [e,f). The switch to combined iteration seeks the keyspan
   242  #         iterator to 'e'. Note that the iterator never observed L0's [d,e)
   243  #         range key that precedes [e,f) in the keyspace.
   244  #      b. Seeking the keyspan iterator calls DefragmentingIter.SeekLT('e'),
   245  #         which lands on the [d,e) fragment. This fragment does NOT check to see
   246  #         if the span starts at a prefix greater than the current prefix 'b',
   247  #         because only bounds in the direction of iteration are check.
   248  #      c. The DefragmentingIter observes disappearing range key fragments when
   249  #         it switches directions, as a result of (b).
   250  #
   251  
   252  # Use 100-bits per key to ensure the bloom filter provides total recall.
   253  reset bloom-bits-per-key=100
   254  ----
   255  
   256  # Ingest L6 files:
   257  #
   258  # 000004: b@4
   259  # 000005: c, [e,f)@1=bar
   260  
   261  ingest ext1
   262  set b@4 b@4
   263  ----
   264  
   265  ingest ext1
   266  set c c
   267  range-key-set e f @1 bar
   268  ----
   269  
   270  # Ingest L0 files:
   271  #
   272  # 000006: a, del-range(c, d)
   273  # 000007: [d,e)@1=bar
   274  
   275  ingest ext2
   276  set a a
   277  del-range c d
   278  ----
   279  
   280  ingest ext3
   281  range-key-set d e @1 bar
   282  ----
   283  
   284  lsm
   285  ----
   286  0.0:
   287    000006:[a#12,SET-d#inf,RANGEDEL]
   288    000007:[d#13,RANGEKEYSET-e#inf,RANGEKEYSET]
   289  6:
   290    000004:[b@4#10,SET-b@4#10,SET]
   291    000005:[c#11,SET-f#inf,RANGEKEYSET]
   292  
   293  combined-iter
   294  seek-prefix-ge b@9
   295  next
   296  ----
   297  b@4: (b@4, .)
   298  .
   299  
   300  # Regression test for #2151.
   301  #
   302  # This test consists of two SeekPrefixGEs for ascending keys, which results in
   303  # TrySeekUsingNext()=true for the second seek. The entirety of both seeked
   304  # prefixes is deleted by the range deletion [b-d). The iterator being used is
   305  # created from a snapshot at sequence number #4. At that sequence number, the
   306  # iterator observes the range deletion and all of L6's point keys, but none of
   307  # the point keys in L5.
   308  #
   309  # Previously, a bug existed where the SeekPrefixGE("b@9") would cause the
   310  # iterator to next beyond the L5 sstable. The subsequent SeekPrefixGE with
   311  # TrySeekUsingNext would mistakenly miss the range deletion [b-d) because it had
   312  # already proceeded beyond the file.
   313  
   314  define snapshots=(4)
   315  L5
   316    b.RANGEDEL.3:d
   317    b@9.SET.9:v
   318    c@9.SET.9:v
   319    d@9.SET.9:v
   320  L6
   321    b@2.SET.2:v
   322    c@2.SET.2:v
   323    d@2.SET.2:v
   324  ----
   325  5:
   326    000004:[b#3,RANGEDEL-d@9#9,SET]
   327  6:
   328    000005:[b@2#2,SET-d@2#2,SET]
   329  
   330  combined-iter snapshot=4
   331  seek-prefix-ge b@9
   332  seek-prefix-ge c@9
   333  ----
   334  .
   335  .