github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/testdata/iter_histories/iter_optimizations (about)

     1  # Test repeated seeks into the same range key, while TrySeekUsingNext=true.
     2  # Test for regression fixed in #1849.
     3  
     4  reset
     5  ----
     6  
     7  batch commit
     8  range-key-set a c @5 boop
     9  range-key-set c e @5 beep
    10  ----
    11  committed 2 keys
    12  
    13  combined-iter
    14  seek-ge a
    15  seek-ge b
    16  ----
    17  a: (., [a-c) @5=boop UPDATED)
    18  b: (., [a-c) @5=boop)
    19  
    20  # Ensure that no-op optimizations do not reuse range key iterator state across
    21  # SetOptions calls. No-op optimizations have the potential to fail to update
    22  # RangeKeyChanged().
    23  
    24  reset
    25  ----
    26  
    27  batch commit
    28  range-key-set p s @1 foo
    29  ----
    30  committed 1 keys
    31  
    32  combined-iter lower=n@9 upper=x@5
    33  seek-lt y@3
    34  set-options lower=n@9 upper=x@5
    35  seek-lt-limit t o
    36  ----
    37  p: (., [p-s) @1=foo UPDATED)
    38  .
    39  p: valid (., [p-s) @1=foo UPDATED)
    40  
    41  combined-iter lower=n@9 upper=x@5
    42  seek-ge o
    43  set-options lower=n@9 upper=x@5
    44  seek-ge oat
    45  ----
    46  p: (., [p-s) @1=foo UPDATED)
    47  .
    48  p: (., [p-s) @1=foo UPDATED)
    49  
    50  combined-iter lower=n@9 upper=x@5
    51  seek-prefix-ge p@5
    52  set-options lower=n@9 upper=x@5
    53  seek-prefix-ge p
    54  ----
    55  p@5: (., [p-"p\x00") @1=foo UPDATED)
    56  .
    57  p: (., [p-"p\x00") @1=foo UPDATED)
    58  
    59  # Regression test for #1963 / cockroachdb/cockroach#88296.
    60  #
    61  # The iterators in this test move their bounds monotonically forward
    62  # [a,b)→[b,e). This enables the sstable iterator optimization for monotonically
    63  # moving bounds (see boundsCmp in sstable/reader.go). With this optimization,
    64  # the first seek after the SetBounds may use the fact that the bounds moved
    65  # forward monotonically to avoid re-seeking within the index.
    66  #
    67  # The test cases below exercise a seek to a key, followed by a seek to a smaller
    68  # key. The second seek should not make use of the bounds optimization because
    69  # doing so may incorrectly skip all keys between the lower bound and the first
    70  # seek key. Previously, the code paths that handled block-property filtering on
    71  # a two-level iterator could leave the iterator in a state such that the second
    72  # seek would improperly also exercise the monotonic bounds optimization. In the
    73  # test cases below, this would result in the key 'b' not being found. Each test
    74  # case exercises a different combination of seek-ge and seek-prefix-ge.
    75  
    76  reset block-size=1 index-block-size=1
    77  ----
    78  
    79  batch commit
    80  set a a
    81  set b b
    82  set b@4 b@4
    83  set z@6 z@6
    84  ----
    85  committed 4 keys
    86  
    87  flush
    88  ----
    89  
    90  combined-iter lower=a upper=b point-key-filter=(1,4)
    91  seek-ge a
    92  set-bounds lower=b upper=e
    93  seek-prefix-ge d@5
    94  seek-prefix-ge b
    95  ----
    96  a: (a, .)
    97  .
    98  .
    99  b: (b, .)
   100  
   101  combined-iter lower=a upper=b point-key-filter=(1,4)
   102  seek-ge a
   103  set-bounds lower=b upper=e
   104  seek-ge d@5
   105  seek-prefix-ge b
   106  ----
   107  a: (a, .)
   108  .
   109  .
   110  b: (b, .)
   111  
   112  combined-iter lower=a upper=b point-key-filter=(1,4)
   113  seek-ge a
   114  set-bounds lower=b upper=e
   115  seek-ge d@5
   116  seek-ge b
   117  ----
   118  a: (a, .)
   119  .
   120  .
   121  b: (b, .)
   122  
   123  combined-iter lower=a upper=b point-key-filter=(1,4)
   124  seek-ge a
   125  set-bounds lower=b upper=e
   126  seek-prefix-ge d@5
   127  seek-ge b
   128  ----
   129  a: (a, .)
   130  .
   131  .
   132  b: (b, .)
   133  
   134  # Test a similar case with range key masking. The previous bug did not apply to
   135  # this case, because range-key masking never skips blocks on a seek.
   136  
   137  reset block-size=1 index-block-size=1
   138  ----
   139  
   140  batch commit
   141  set a a
   142  set b b
   143  set b@4 b@4
   144  set z@6 z@6
   145  range-key-set a z @9 v
   146  ----
   147  committed 5 keys
   148  
   149  flush
   150  ----
   151  
   152  combined-iter lower=a upper=b mask-suffix=@10 mask-filter
   153  seek-ge a
   154  set-bounds lower=b upper=e
   155  seek-prefix-ge d@5
   156  seek-ge b
   157  ----
   158  a: (a, [a-b) @9=v UPDATED)
   159  .
   160  d@5: (., [d-"d\x00") @9=v UPDATED)
   161  b: (b, [b-e) @9=v UPDATED)
   162  
   163  # Test TrySeekUsingNext across no-op SetOptions when reading through an indexed
   164  # batch with modifications. The seek-prefix-ges after the first should make use
   165  # of the TrySeekUsingNext optimization.
   166  #
   167  # TODO(jackson): The iterator stats don't signal the use of try-seek-using-next,
   168  # so we inspect lastPositioningOp as a proxy since that's the
   169  # try-seek-using-next prerequisite that previously regressed. Is there a way to
   170  # adapt to this test so that the absence of the try-seek-using-next optimization
   171  # is visible in the iterator statistics?
   172  #
   173  # Regression test for cockroachdb/cockroach#88819.
   174  
   175  reset
   176  ----
   177  
   178  batch commit
   179  set b@5 b@5
   180  set c@3 c@3
   181  set d@9 d@9
   182  set e@8 e@8
   183  set f@8 f@8
   184  ----
   185  committed 5 keys
   186  
   187  flush
   188  ----
   189  
   190  batch name=foo
   191  set g@4 g@4
   192  ----
   193  wrote 1 keys to batch "foo"
   194  
   195  combined-iter reader=foo name=fooiter
   196  inspect lastPositioningOp
   197  seek-prefix-ge b@10
   198  stats
   199  ----
   200  lastPositioningOp="unknown"
   201  b@5: (b@5, .)
   202  stats: (interface (dir, seek, step): (fwd, 1, 0), (rev, 0, 0)), (internal (dir, seek, step): (fwd, 1, 0), (rev, 0, 0)),
   203  (internal-stats: (block-bytes: (total 119B, cached 0B, read-time 0s)), (points: (count 1, key-bytes 3B, value-bytes 3B, tombstoned 0)))
   204  
   205  mutate batch=foo
   206  set h@2 h@2
   207  ----
   208  
   209  iter iter=fooiter
   210  set-options
   211  inspect lastPositioningOp
   212  seek-prefix-ge c@10
   213  stats
   214  ----
   215  .
   216  lastPositioningOp="seekprefixge"
   217  c@3: (c@3, .)
   218  stats: (interface (dir, seek, step): (fwd, 2, 0), (rev, 0, 0)), (internal (dir, seek, step): (fwd, 2, 0), (rev, 0, 0)),
   219  (internal-stats: (block-bytes: (total 119B, cached 0B, read-time 0s)), (points: (count 2, key-bytes 6B, value-bytes 6B, tombstoned 0)))
   220  
   221  mutate batch=foo
   222  set i@1 i@1
   223  ----
   224  
   225  iter iter=fooiter
   226  set-options
   227  inspect lastPositioningOp
   228  seek-prefix-ge d@10
   229  stats
   230  ----
   231  .
   232  lastPositioningOp="seekprefixge"
   233  d@9: (d@9, .)
   234  stats: (interface (dir, seek, step): (fwd, 3, 0), (rev, 0, 0)), (internal (dir, seek, step): (fwd, 3, 0), (rev, 0, 0)),
   235  (internal-stats: (block-bytes: (total 119B, cached 0B, read-time 0s)), (points: (count 3, key-bytes 9B, value-bytes 9B, tombstoned 0)))
   236  
   237  mutate batch=foo
   238  set j@6 j@6
   239  ----
   240  
   241  iter iter=fooiter
   242  set-options
   243  inspect lastPositioningOp
   244  seek-prefix-ge e@10
   245  stats
   246  ----
   247  .
   248  lastPositioningOp="seekprefixge"
   249  e@8: (e@8, .)
   250  stats: (interface (dir, seek, step): (fwd, 4, 0), (rev, 0, 0)), (internal (dir, seek, step): (fwd, 4, 0), (rev, 0, 0)),
   251  (internal-stats: (block-bytes: (total 119B, cached 0B, read-time 0s)), (points: (count 4, key-bytes 12B, value-bytes 12B, tombstoned 0)))
   252  
   253  # Ensure that a case eligible for TrySeekUsingNext across a SetOptions correctly
   254  # sees new batch mutations. The batch iterator should ignore the
   255  # TrySeekUsingNext designation.
   256  
   257  reset
   258  ----
   259  
   260  batch commit
   261  set b@3 b@3
   262  set c@3 c@3
   263  ----
   264  committed 2 keys
   265  
   266  batch name=b1
   267  ----
   268  wrote 0 keys to batch "b1"
   269  
   270  combined-iter name=i1 reader=b1
   271  seek-prefix-ge b@6
   272  ----
   273  b@3: (b@3, .)
   274  
   275  mutate batch=b1
   276  set b@4 b@4
   277  ----
   278  
   279  iter iter=i1
   280  set-options
   281  inspect lastPositioningOp
   282  seek-prefix-ge b@5
   283  ----
   284  .
   285  lastPositioningOp="seekprefixge"
   286  b@4: (b@4, .)
   287  
   288  # Similar case with SeekGE.
   289  
   290  iter iter=i1
   291  seek-ge b@2
   292  ----
   293  c@3: (c@3, .)
   294  
   295  mutate batch=b1
   296  set c@9 c@9
   297  ----
   298  
   299  iter iter=i1
   300  set-options
   301  inspect lastPositioningOp
   302  seek-ge b@1
   303  ----
   304  .
   305  lastPositioningOp="seekge"
   306  c@9: (c@9, .)
   307  
   308  # Test a case similar to the above, but with an intermediate switch to
   309  # range-key-only iteration, so that the batchIter is not re-seeked.
   310  
   311  reset
   312  ----
   313  
   314  batch commit
   315  set b@5 b@5
   316  set c@3 c@3
   317  ----
   318  committed 2 keys
   319  
   320  batch name=b1
   321  ----
   322  wrote 0 keys to batch "b1"
   323  
   324  combined-iter name=i1 reader=b1
   325  seek-ge b@9
   326  ----
   327  b@5: (b@5, .)
   328  
   329  mutate batch=b1
   330  set b@6 b@6
   331  ----
   332  
   333  iter iter=i1
   334  set-options key-types=range
   335  seek-ge b@8
   336  set-options key-types=both
   337  inspect lastPositioningOp
   338  seek-ge b@7
   339  ----
   340  .
   341  .
   342  .
   343  lastPositioningOp="invalidate"
   344  b@6: (b@6, .)
   345  
   346  reset
   347  ----
   348  
   349  batch commit
   350  set b@2 b@2
   351  set c@3 c@3
   352  ----
   353  committed 2 keys
   354  
   355  batch name=b1
   356  ----
   357  wrote 0 keys to batch "b1"
   358  
   359  combined-iter name=i1 reader=b1
   360  seek-prefix-ge b@1
   361  ----
   362  .
   363  
   364  mutate batch=b1
   365  set c@4 c@4
   366  ----
   367  
   368  iter iter=i1
   369  set-options
   370  inspect lastPositioningOp
   371  seek-prefix-ge c@8
   372  ----
   373  .
   374  lastPositioningOp="seekprefixge"
   375  c@4: (c@4, .)
   376  
   377  # Regression test for #2084.
   378  #
   379  # The optimization added in #2058 began using an enabled TrySeekUsingNext flag
   380  # to avoid re-seeking within a level's file metadata. This optimization was
   381  # dependent on the invariant that the iterator remained positioned at the
   382  # previous seek key, so that a subsequent seek to a larger key does not need to
   383  # backtrack.
   384  #
   385  # This invariant wasn't strictly preserved by the levelIter during SeekPrefixGE
   386  # calls. During a SeekPrefixGE, the sstable iterator may return nil despite the
   387  # existence of sstable keys greater than the seek key if the sstable's bloom
   388  # filter excludes the seek prefix. If the sstable DOES NOT contain any range
   389  # tombstones, the levelIter does not advance to the next file if the file's
   390  # largest bound has a prefix larger than the seek prefix, returning nil, else it
   391  # does advance since the next file could contain the seek prefix.
   392  #
   393  # However, if the file DOES contain range tombstones, the levelIter returns a
   394  # synthetic largest boundary key so that the file remains open until the merging
   395  # iterator passes beyond its bounds. This ensures the file's range deletions'
   396  # effects on other keys are observed. If another level returned a key greater
   397  # than this largest boundary key (eg, because the other level doesn't restrict
   398  # results to the seek prefix), the merging iterator could step beyond the
   399  # level's synthetic boundary key.  This step could advance the levelIter to the
   400  # next file, despite its irrelevance to the current prefix. This step would also
   401  # break the invariant that the level iterator remained positioned at the seek
   402  # key.
   403  #
   404  # The bug was fixed by comparing the synthetic boundary key to the seek prefix,
   405  # avoiding ever Next-ing the level iterator beyond the seek prefix.
   406  
   407  # Set 100 bloom-filter bits per key to ensure the bloom-filter exclusivity
   408  # checks successfully exclude prefixes that aren't present.
   409  reset bloom-bits-per-key=100
   410  ----
   411  
   412  # [a           -d)
   413  #    b@3          d@1
   414  batch commit
   415  del-range a d
   416  set b@3 b@3
   417  set d@1 d@1
   418  ----
   419  committed 3 keys
   420  
   421  flush
   422  ----
   423  
   424  # c@0 e@0
   425  batch commit
   426  del c@0
   427  set e@0 e@0
   428  ----
   429  committed 2 keys
   430  
   431  flush
   432  ----
   433  
   434  lsm
   435  ----
   436  0.1:
   437    000007:[c@0#13,DEL-e@0#14,SET]
   438  0.0:
   439    000005:[a#10,RANGEDEL-d@1#12,SET]
   440  
   441  # The first SeekPrefixGE(b@3) positions each level iterator over their
   442  # respective files and correctly finds b@3.
   443  #
   444  # The second SeekPrefixGE(c@5) seeks in both files. The 0.0 level iterator finds
   445  # that its file does not contain the prefix 'c', so it returns nil. Since the file
   446  # contains a range deletion, it returns a synthetic boundary key with user key
   447  # d@1 to ensure the file stays open until the iterator has moved beyond the
   448  # file's bounds. The seek in level 0.1 finds a key with the prefix 'c': a point
   449  # tombstone c@0#4,DEL. This gets bubbled up to the Iterator, which skips it
   450  # because it's a point tombstone, nexting within 000007 to e@0#5.
   451  #
   452  # Previously, in the bug highlighted by #2084, the merging iterator would then
   453  # see that level 0.0's synthetic boundary key at d@1 was at the top of the heap
   454  # and move to the next file in 0.0. The subsequent call to SeekPrefixGE(d@1,
   455  # TrySeekUsingNext=true) would incorrectly use the current position within the
   456  # 0.0 file metadata (nil), and miss the d@1 key.
   457  
   458  combined-iter
   459  seek-prefix-ge b@3
   460  seek-prefix-ge c@5
   461  seek-prefix-ge d@1
   462  ----
   463  b@3: (b@3, .)
   464  .
   465  d@1: (d@1, .)
   466  
   467  
   468  # Test an instance where unequal application of TrySeekUsingNext optimizations
   469  # among a merging iterator's levels can result in surfacing deleted keys.
   470  # Regression test for #2101.
   471  
   472  reset
   473  ----
   474  
   475  batch commit
   476  set b b
   477  ----
   478  committed 1 keys
   479  
   480  flush
   481  ----
   482  
   483  compact a-h
   484  ----
   485  6:
   486    000005:[b#10,SET-b#10,SET]
   487  
   488  batch commit
   489  set g g
   490  ----
   491  committed 1 keys
   492  
   493  flush
   494  ----
   495  
   496  compact a-h
   497  ----
   498  6:
   499    000005:[b#10,SET-b#10,SET]
   500    000007:[g#11,SET-g#11,SET]
   501  
   502  batch commit
   503  del-range b d
   504  ----
   505  committed 1 keys
   506  
   507  flush
   508  ----
   509  
   510  batch commit
   511  set e e
   512  ----
   513  committed 1 keys
   514  
   515  flush
   516  ----
   517  
   518  lsm
   519  ----
   520  0.0:
   521    000009:[b#12,RANGEDEL-d#inf,RANGEDEL]
   522    000011:[e#13,SET-e#13,SET]
   523  6:
   524    000005:[b#10,SET-b#10,SET]
   525    000007:[g#11,SET-g#11,SET]
   526  
   527  # The `seek-ge b` could incorrectly return `b` if the level 0.0 levelIter obeys
   528  # the TrySeekUsingNext optimization but the level 6 levelIter does not. The
   529  # TrySeekUsingNext optimization must be applied equally across all the levels of
   530  # a merging iterator.
   531  
   532  combined-iter
   533  seek-ge a
   534  seek-ge b
   535  ----
   536  e: (e, .)
   537  e: (e, .)
   538  
   539  # Regression test for #2118, where a MERGE pushes child iterators to the next
   540  # key, and possibly past a file that contained a range tombstone that we
   541  # should have paused at in a SeekPrefixGE, affecting future TrySeekUsingNexts.
   542  # This test constructs this example (suffixes ignored), where square brackets
   543  # consist of one SST:
   544  #
   545  # L0: [(b, MERGE)  (c-d, RANGEDEL)] [(m, DEL)]
   546  # L6: [(c, SET) (c-e, RANGEKEYSET)] [(j, SET)]
   547  #
   548  # We create an iterator with L6 filters enabled and create relatively large
   549  # bloom filter blocks to reduce the false positive rate. Then we SeekPrefixGE(b)
   550  # and end up with the L0 levelIter landing on the (b, MERGE), and the L6 iterator
   551  # is exhausted as no SST filter blocks match the prefix. The top-level iterator
   552  # then Next()s to find the next internal key at b if there is any, we land
   553  # on the pause key at (d, RANGEDELSENTINEL). Crucially since there are no
   554  # more items in the mergingIter heap and the merging iter is set to elide
   555  # range tombstones, we Next() the level iter again as part of the same top-level
   556  # iterator Next(), and land on (m, DEL). The type of the key here doesn't really
   557  # matter.
   558  #
   559  # We then do a SeekPrefixGE(c), and since c > b, in the buggy scenario we
   560  # TrySeekUsingNext. The bottom levelIter correctly finds the sstable containing
   561  # the set, but the upper levelIter is already past the sstable containing the
   562  # rangedel, so it just returns (m, DEL) again, and we surface the (c, SET) that
   563  # should have been deleted.
   564  
   565  reset bloom-bits-per-key=100
   566  ----
   567  
   568  batch commit
   569  set c@2 foo
   570  range-key-set c e @5 bar
   571  ----
   572  committed 2 keys
   573  
   574  flush
   575  ----
   576  
   577  compact a-z
   578  ----
   579  6:
   580    000005:[c#11,RANGEKEYSET-e#inf,RANGEKEYSET]
   581  
   582  batch commit
   583  set j k
   584  ----
   585  committed 1 keys
   586  
   587  flush
   588  ----
   589  
   590  compact a-z
   591  ----
   592  6:
   593    000005:[c#11,RANGEKEYSET-e#inf,RANGEKEYSET]
   594    000007:[j#12,SET-j#12,SET]
   595  
   596  batch commit
   597  del-range c@2 d
   598  merge b@2 g
   599  ----
   600  committed 2 keys
   601  
   602  flush
   603  ----
   604  
   605  batch commit
   606  del m
   607  ----
   608  committed 1 keys
   609  
   610  flush
   611  ----
   612  
   613  lsm
   614  ----
   615  0.0:
   616    000009:[b@2#14,MERGE-d#inf,RANGEDEL]
   617    000011:[m#15,DEL-m#15,DEL]
   618  6:
   619    000005:[c#11,RANGEKEYSET-e#inf,RANGEKEYSET]
   620    000007:[j#12,SET-j#12,SET]
   621  
   622  combined-iter upper=z@3 mask-suffix=@3 mask-filter use-l6-filter
   623  seek-prefix-ge b@2
   624  seek-prefix-ge c@2
   625  ----
   626  b@2: (g, .)
   627  c@2: (., [c-"c\x00") @5=bar UPDATED)
   628  
   629  # Regression test for Cockroachdb#92205. This test constructs this scenario:
   630  #
   631  # A DEL in a middle level (L0.0) that we SeekPrefixGE directly for. Note that
   632  # this DEL is not deleted by any range deletes; it gets exposed to the
   633  # Iterator. There's a key after this DEL in the L0.0 levelIter, and there's a
   634  # level above it (L0.1) that has a rangedel deleting that key, but not the DEL
   635  # we SeekPrefixGE for. In the lowest level, there's a SET at L6 that is to the
   636  # right of the DEL in L0.0, but is also not deleted by the RANGEDEL in L0.1.
   637  # Our second SeekPrefixGE will be for this SET. Visualization, where square
   638  # brackets are files:
   639  #
   640  # L0.1                 [dd-ee#RANGEDEL]
   641  # L0.0    [b#DEL          e#SET]
   642  # L6            [d#SET]       [f#SET g#SET]
   643  #
   644  # When the Iterator encounters the above DEL internal key in the SeekPrefixGE, it
   645  # calls Iterator.nextUserKey in the Iterator.findNextEntry call that was part of the
   646  # SeekPrefixGE call. While Iterator.findNextEntry has a conditional to exit
   647  # out of the loop if we're in prefix iteration and have gone past the prefix,
   648  # this break only happens _after_ nextUserKey() has run. As a result we Next()
   649  # the levelIter in L0.0, land on e#SET, and the mergingIter realizes that it
   650  # is deleted by the rangedel in a higher level (L0.1). The mergingIter does not
   651  # see d#SET because that sstable was excluded by the bloom filter. We then do a relative
   652  # seek of all levels below L0.1 to ee (the end key of the rangedel), and in that
   653  # process we advance the L6 levelIter to the second file.
   654  #
   655  # When we do the second SeekPrefixGE for d, the outer Iterator thinks d > b and
   656  # so TrySeekUsingNext can work. However, the L6 levelIter has already advanced
   657  # past the file containing d#SET, so we don't surface it even though we should
   658  # have.
   659  
   660  reset bloom-bits-per-key=100
   661  ----
   662  
   663  batch commit
   664  set d@4 foo
   665  ----
   666  committed 1 keys
   667  
   668  flush
   669  ----
   670  
   671  compact a-f
   672  ----
   673  6:
   674    000005:[d@4#10,SET-d@4#10,SET]
   675  
   676  batch commit
   677  set f@5 bar
   678  set g@5 baz
   679  ----
   680  committed 2 keys
   681  
   682  flush
   683  ----
   684  
   685  compact e-k
   686  ----
   687  6:
   688    000005:[d@4#10,SET-d@4#10,SET]
   689    000007:[f@5#11,SET-g@5#12,SET]
   690  
   691  batch commit
   692  del b@5
   693  set e@4 foobar
   694  ----
   695  committed 2 keys
   696  
   697  flush
   698  ----
   699  
   700  batch commit
   701  del-range dd ee
   702  ----
   703  committed 1 keys
   704  
   705  flush
   706  ----
   707  
   708  lsm
   709  ----
   710  0.1:
   711    000011:[dd#15,RANGEDEL-ee#inf,RANGEDEL]
   712  0.0:
   713    000009:[b@5#13,DEL-e@4#14,SET]
   714  6:
   715    000005:[d@4#10,SET-d@4#10,SET]
   716    000007:[f@5#11,SET-g@5#12,SET]
   717  
   718  combined-iter upper=z@3 use-l6-filter
   719  seek-prefix-ge b@6
   720  seek-prefix-ge d@5
   721  ----
   722  .
   723  d@4: (foo, .)