github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/suffix_rewriter_test.go (about)

     1  package sstable
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"math/rand"
     8  	"strconv"
     9  	"testing"
    10  
    11  	"github.com/cockroachdb/pebble/bloom"
    12  	"github.com/cockroachdb/pebble/internal/base"
    13  	"github.com/cockroachdb/pebble/objstorage"
    14  	"github.com/stretchr/testify/require"
    15  )
    16  
    17  func TestRewriteSuffixProps(t *testing.T) {
    18  	from, to := []byte("_212"), []byte("_646")
    19  	for format := TableFormatPebblev2; format <= TableFormatMax; format++ {
    20  		t.Run(format.String(), func(t *testing.T) {
    21  			wOpts := WriterOptions{
    22  				FilterPolicy: bloom.FilterPolicy(10),
    23  				Comparer:     test4bSuffixComparer,
    24  				TablePropertyCollectors: []func() TablePropertyCollector{
    25  					intSuffixTablePropCollectorFn("ts3", 3), intSuffixTablePropCollectorFn("ts2", 2),
    26  				},
    27  				BlockPropertyCollectors: []func() BlockPropertyCollector{
    28  					keyCountCollectorFn("count"),
    29  					intSuffixIntervalCollectorFn("bp3", 3),
    30  					intSuffixIntervalCollectorFn("bp2", 2),
    31  					intSuffixIntervalCollectorFn("bp1", 1),
    32  				},
    33  				TableFormat: format,
    34  			}
    35  			if format >= TableFormatPebblev4 {
    36  				wOpts.IsStrictObsolete = true
    37  			}
    38  
    39  			const keyCount = 1e5
    40  			const rangeKeyCount = 100
    41  			// Setup our test SST.
    42  			sst := make4bSuffixTestSST(t, wOpts, []byte(from), keyCount, rangeKeyCount)
    43  
    44  			expectedProps := make(map[string]string)
    45  			expectedProps["ts2.min"] = "46"
    46  			expectedProps["ts2.max"] = "46"
    47  			expectedProps["ts3.min"] = "646"
    48  			expectedProps["ts3.max"] = "646"
    49  
    50  			// Also expect to see the aggregated block properties with their updated value
    51  			// at the correct (new) shortIDs. Seeing the rolled up value here is almost an
    52  			// end-to-end test since we only fed them each block during rewrite.
    53  			expectedProps["count"] = string(append([]byte{1}, strconv.Itoa(keyCount+rangeKeyCount)...))
    54  			expectedProps["bp2"] = string(interval{46, 47}.encode([]byte{2}))
    55  			expectedProps["bp3"] = string(interval{646, 647}.encode([]byte{0}))
    56  
    57  			// Swap the order of two of the props so they have new shortIDs, and remove
    58  			// one. rwOpts inherits the IsStrictObsolete value from wOpts.
    59  			rwOpts := wOpts
    60  			if rand.Intn(2) != 0 {
    61  				rwOpts.TableFormat = TableFormatPebblev2
    62  				rwOpts.IsStrictObsolete = false
    63  				t.Log("table format set to TableFormatPebblev2")
    64  			}
    65  			fmt.Printf("from format %s, to format %s\n", format.String(), rwOpts.TableFormat.String())
    66  			rwOpts.BlockPropertyCollectors = rwOpts.BlockPropertyCollectors[:3]
    67  			rwOpts.BlockPropertyCollectors[0], rwOpts.BlockPropertyCollectors[1] = rwOpts.BlockPropertyCollectors[1], rwOpts.BlockPropertyCollectors[0]
    68  
    69  			// Rewrite the SST using updated options and check the returned props.
    70  			readerOpts := ReaderOptions{
    71  				Comparer: test4bSuffixComparer,
    72  				Filters:  map[string]base.FilterPolicy{wOpts.FilterPolicy.Name(): wOpts.FilterPolicy},
    73  			}
    74  			r, err := NewMemReader(sst, readerOpts)
    75  			require.NoError(t, err)
    76  			defer r.Close()
    77  
    78  			var sstBytes [2][]byte
    79  			adjustPropsForEffectiveFormat := func(effectiveFormat TableFormat) {
    80  				if effectiveFormat == TableFormatPebblev4 {
    81  					expectedProps["obsolete-key"] = string([]byte{3})
    82  				} else {
    83  					delete(expectedProps, "obsolete-key")
    84  				}
    85  			}
    86  			for i, byBlocks := range []bool{false, true} {
    87  				t.Run(fmt.Sprintf("byBlocks=%v", byBlocks), func(t *testing.T) {
    88  					rewrittenSST := &memFile{}
    89  					if byBlocks {
    90  						_, rewriteFormat, err := rewriteKeySuffixesInBlocks(
    91  							r, rewrittenSST, rwOpts, from, to, 8)
    92  						// rewriteFormat is equal to the original format, since
    93  						// rwOpts.TableFormat is ignored.
    94  						require.Equal(t, wOpts.TableFormat, rewriteFormat)
    95  						require.NoError(t, err)
    96  						adjustPropsForEffectiveFormat(rewriteFormat)
    97  					} else {
    98  						_, err := RewriteKeySuffixesViaWriter(r, rewrittenSST, rwOpts, from, to)
    99  						require.NoError(t, err)
   100  						adjustPropsForEffectiveFormat(rwOpts.TableFormat)
   101  					}
   102  
   103  					sstBytes[i] = rewrittenSST.Data()
   104  					// Check that a reader on the rewritten STT has the expected props.
   105  					rRewritten, err := NewMemReader(rewrittenSST.Data(), readerOpts)
   106  					require.NoError(t, err)
   107  					defer rRewritten.Close()
   108  					require.Equal(t, expectedProps, rRewritten.Properties.UserProperties)
   109  					require.False(t, rRewritten.Properties.IsStrictObsolete)
   110  
   111  					// Compare the block level props from the data blocks in the layout,
   112  					// only if we did not do a rewrite from one format to another. If the
   113  					// format changes, the block boundaries change slightly.
   114  					if !byBlocks && wOpts.TableFormat != rwOpts.TableFormat {
   115  						return
   116  					}
   117  					layout, err := r.Layout()
   118  					require.NoError(t, err)
   119  					newLayout, err := rRewritten.Layout()
   120  					require.NoError(t, err)
   121  
   122  					ival := interval{}
   123  					for i := range layout.Data {
   124  						oldProps := make([][]byte, len(wOpts.BlockPropertyCollectors))
   125  						oldDecoder := blockPropertiesDecoder{layout.Data[i].Props}
   126  						for !oldDecoder.done() {
   127  							id, val, err := oldDecoder.next()
   128  							require.NoError(t, err)
   129  							oldProps[id] = val
   130  						}
   131  						newProps := make([][]byte, len(rwOpts.BlockPropertyCollectors))
   132  						newDecoder := blockPropertiesDecoder{newLayout.Data[i].Props}
   133  						for !newDecoder.done() {
   134  							id, val, err := newDecoder.next()
   135  							require.NoError(t, err)
   136  							if int(id) < len(newProps) {
   137  								newProps[id] = val
   138  							}
   139  						}
   140  						require.Equal(t, oldProps[0], newProps[1])
   141  						ival.decode(newProps[0])
   142  						require.Equal(t, interval{646, 647}, ival)
   143  						ival.decode(newProps[2])
   144  						require.Equal(t, interval{46, 47}, ival)
   145  					}
   146  				})
   147  			}
   148  			if wOpts.TableFormat == rwOpts.TableFormat {
   149  				// Both methods of rewriting should produce the same result.
   150  				require.Equal(t, sstBytes[0], sstBytes[1])
   151  			}
   152  		})
   153  	}
   154  }
   155  
   156  // memFile is a file-like struct that buffers all data written to it in memory.
   157  // Implements the objstorage.Writable interface.
   158  type memFile struct {
   159  	buf bytes.Buffer
   160  }
   161  
   162  var _ objstorage.Writable = (*memFile)(nil)
   163  
   164  // Finish is part of the objstorage.Writable interface.
   165  func (*memFile) Finish() error {
   166  	return nil
   167  }
   168  
   169  // Abort is part of the objstorage.Writable interface.
   170  func (*memFile) Abort() {}
   171  
   172  // Write is part of the objstorage.Writable interface.
   173  func (f *memFile) Write(p []byte) error {
   174  	_, err := f.buf.Write(p)
   175  	return err
   176  }
   177  
   178  // Data returns the in-memory buffer behind this MemFile.
   179  func (f *memFile) Data() []byte {
   180  	return f.buf.Bytes()
   181  }
   182  
   183  func make4bSuffixTestSST(
   184  	t testing.TB, writerOpts WriterOptions, suffix []byte, keys int, rangeKeys int,
   185  ) []byte {
   186  	key := make([]byte, 28)
   187  	endKey := make([]byte, 24)
   188  	copy(key[24:], suffix)
   189  
   190  	f := &memFile{}
   191  	w := NewWriter(f, writerOpts)
   192  	for i := 0; i < keys; i++ {
   193  		binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix
   194  		binary.BigEndian.PutUint64(key[8:16], 456)
   195  		binary.BigEndian.PutUint64(key[16:], uint64(i))
   196  		err := w.AddWithForceObsolete(
   197  			base.MakeInternalKey(key, 0, InternalKeyKindSet), key, false)
   198  		if err != nil {
   199  			t.Fatal(err)
   200  		}
   201  	}
   202  	for i := 0; i < rangeKeys; i++ {
   203  		binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix
   204  		binary.BigEndian.PutUint64(key[8:16], 456)
   205  		binary.BigEndian.PutUint64(key[16:], uint64(i))
   206  		binary.BigEndian.PutUint64(endKey[:8], 123) // 16-byte shared prefix
   207  		binary.BigEndian.PutUint64(endKey[8:16], 456)
   208  		binary.BigEndian.PutUint64(endKey[16:], uint64(i+1))
   209  		if err := w.RangeKeySet(key[:24], endKey[:24], suffix, key); err != nil {
   210  			t.Fatal(err)
   211  		}
   212  	}
   213  	if err := w.Close(); err != nil {
   214  		t.Fatal(err)
   215  	}
   216  
   217  	return f.buf.Bytes()
   218  }
   219  
   220  func BenchmarkRewriteSST(b *testing.B) {
   221  	from, to := []byte("_123"), []byte("_456")
   222  	writerOpts := WriterOptions{
   223  		FilterPolicy: bloom.FilterPolicy(10),
   224  		Comparer:     test4bSuffixComparer,
   225  		TableFormat:  TableFormatPebblev2,
   226  	}
   227  
   228  	sizes := []int{100, 10000, 1e6}
   229  	compressions := []Compression{NoCompression, SnappyCompression}
   230  
   231  	files := make([][]*Reader, len(compressions))
   232  
   233  	for comp := range compressions {
   234  		files[comp] = make([]*Reader, len(sizes))
   235  
   236  		for size := range sizes {
   237  			writerOpts.Compression = compressions[comp]
   238  			sst := make4bSuffixTestSST(b, writerOpts, from, sizes[size], 0 /* rangeKeys */)
   239  			r, err := NewMemReader(sst, ReaderOptions{
   240  				Comparer: test4bSuffixComparer,
   241  				Filters:  map[string]base.FilterPolicy{writerOpts.FilterPolicy.Name(): writerOpts.FilterPolicy},
   242  			})
   243  			if err != nil {
   244  				b.Fatal(err)
   245  			}
   246  			files[comp][size] = r
   247  		}
   248  	}
   249  
   250  	b.ResetTimer()
   251  	for comp := range compressions {
   252  		b.Run(compressions[comp].String(), func(b *testing.B) {
   253  			for sz := range sizes {
   254  				r := files[comp][sz]
   255  				b.Run(fmt.Sprintf("keys=%d", sizes[sz]), func(b *testing.B) {
   256  					b.Run("ReaderWriterLoop", func(b *testing.B) {
   257  						b.SetBytes(r.readable.Size())
   258  						for i := 0; i < b.N; i++ {
   259  							if _, err := RewriteKeySuffixesViaWriter(r, &discardFile{}, writerOpts, from, to); err != nil {
   260  								b.Fatal(err)
   261  							}
   262  						}
   263  					})
   264  					for _, concurrency := range []int{1, 2, 4, 8, 16} {
   265  						b.Run(fmt.Sprintf("RewriteKeySuffixes,concurrency=%d", concurrency), func(b *testing.B) {
   266  							b.SetBytes(r.readable.Size())
   267  							for i := 0; i < b.N; i++ {
   268  								if _, _, err := rewriteKeySuffixesInBlocks(r, &discardFile{}, writerOpts, []byte("_123"), []byte("_456"), concurrency); err != nil {
   269  									b.Fatal(err)
   270  								}
   271  							}
   272  						})
   273  					}
   274  				})
   275  			}
   276  		})
   277  	}
   278  }