github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/sstable/suffix_rewriter_test.go (about)

     1  package sstable
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/binary"
     6  	"fmt"
     7  	"strconv"
     8  	"testing"
     9  
    10  	"github.com/stretchr/testify/require"
    11  	"github.com/zuoyebang/bitalostable/bloom"
    12  	"github.com/zuoyebang/bitalostable/internal/base"
    13  )
    14  
    15  func TestRewriteSuffixProps(t *testing.T) {
    16  	from, to := []byte("_212"), []byte("_646")
    17  
    18  	wOpts := WriterOptions{
    19  		FilterPolicy: bloom.FilterPolicy(10),
    20  		Comparer:     test4bSuffixComparer,
    21  		TablePropertyCollectors: []func() TablePropertyCollector{
    22  			intSuffixTablePropCollectorFn("ts3", 3), intSuffixTablePropCollectorFn("ts2", 2),
    23  		},
    24  		BlockPropertyCollectors: []func() BlockPropertyCollector{
    25  			keyCountCollectorFn("count"),
    26  			intSuffixIntervalCollectorFn("bp3", 3),
    27  			intSuffixIntervalCollectorFn("bp2", 2),
    28  			intSuffixIntervalCollectorFn("bp1", 1),
    29  		},
    30  		TableFormat: TableFormatPebblev2,
    31  	}
    32  
    33  	const keyCount = 1e5
    34  	const rangeKeyCount = 100
    35  	// Setup our test SST.
    36  	sst := make4bSuffixTestSST(t, wOpts, []byte(from), keyCount, rangeKeyCount)
    37  
    38  	expectedProps := make(map[string]string)
    39  	expectedProps["ts2.min"] = "46"
    40  	expectedProps["ts2.max"] = "46"
    41  	expectedProps["ts3.min"] = "646"
    42  	expectedProps["ts3.max"] = "646"
    43  
    44  	// Also expect to see the aggregated block properties with their updated value
    45  	// at the correct (new) shortIDs. Seeing the rolled up value here is almost an
    46  	// end-to-end test since we only fed them each block during rewrite.
    47  	expectedProps["count"] = string(append([]byte{1}, strconv.Itoa(keyCount+rangeKeyCount)...))
    48  	expectedProps["bp2"] = string(interval{46, 47}.encode([]byte{2}))
    49  	expectedProps["bp3"] = string(interval{646, 647}.encode([]byte{0}))
    50  
    51  	// Swap the order of two of the props so they have new shortIDs, and remove
    52  	// one.
    53  	rwOpts := wOpts
    54  	rwOpts.BlockPropertyCollectors = rwOpts.BlockPropertyCollectors[:3]
    55  	rwOpts.BlockPropertyCollectors[0], rwOpts.BlockPropertyCollectors[1] = rwOpts.BlockPropertyCollectors[1], rwOpts.BlockPropertyCollectors[0]
    56  
    57  	// Rewrite the SST using updated options and check the returned props.
    58  	readerOpts := ReaderOptions{
    59  		Comparer: test4bSuffixComparer,
    60  		Filters:  map[string]base.FilterPolicy{wOpts.FilterPolicy.Name(): wOpts.FilterPolicy},
    61  	}
    62  	r, err := NewMemReader(sst, readerOpts)
    63  	require.NoError(t, err)
    64  	defer r.Close()
    65  
    66  	for _, byBlocks := range []bool{false, true} {
    67  		t.Run(fmt.Sprintf("byBlocks=%v", byBlocks), func(t *testing.T) {
    68  			rewrittenSST := &memFile{}
    69  			if byBlocks {
    70  				_, err := rewriteKeySuffixesInBlocks(r, rewrittenSST, rwOpts, from, to, 8)
    71  				require.NoError(t, err)
    72  			} else {
    73  				_, err := RewriteKeySuffixesViaWriter(r, rewrittenSST, rwOpts, from, to)
    74  				require.NoError(t, err)
    75  			}
    76  
    77  			// Check that a reader on the rewritten STT has the expected props.
    78  			rRewritten, err := NewMemReader(rewrittenSST.Bytes(), readerOpts)
    79  			require.NoError(t, err)
    80  			defer rRewritten.Close()
    81  			require.Equal(t, expectedProps, rRewritten.Properties.UserProperties)
    82  
    83  			// Compare the block level props from the data blocks in the layout.
    84  			layout, err := r.Layout()
    85  			require.NoError(t, err)
    86  			newLayout, err := rRewritten.Layout()
    87  			require.NoError(t, err)
    88  
    89  			ival := interval{}
    90  			for i := range layout.Data {
    91  				oldProps := make([][]byte, len(wOpts.BlockPropertyCollectors))
    92  				oldDecoder := blockPropertiesDecoder{layout.Data[i].Props}
    93  				for !oldDecoder.done() {
    94  					id, val, err := oldDecoder.next()
    95  					require.NoError(t, err)
    96  					oldProps[id] = val
    97  				}
    98  				newProps := make([][]byte, len(rwOpts.BlockPropertyCollectors))
    99  				newDecoder := blockPropertiesDecoder{newLayout.Data[i].Props}
   100  				for !newDecoder.done() {
   101  					id, val, err := newDecoder.next()
   102  					require.NoError(t, err)
   103  					newProps[id] = val
   104  				}
   105  				require.Equal(t, oldProps[0], newProps[1])
   106  				ival.decode(newProps[0])
   107  				require.Equal(t, interval{646, 647}, ival)
   108  				ival.decode(newProps[2])
   109  				require.Equal(t, interval{46, 47}, ival)
   110  			}
   111  		})
   112  	}
   113  }
   114  
   115  // memFile is a file-like struct that buffers all data written to it in memory.
   116  // Implements the writeCloseSyncer interface.
   117  type memFile struct {
   118  	bytes.Buffer
   119  }
   120  
   121  // Close implements the writeCloseSyncer interface.
   122  func (*memFile) Close() error {
   123  	return nil
   124  }
   125  
   126  // Sync implements the writeCloseSyncer interface.
   127  func (*memFile) Sync() error {
   128  	return nil
   129  }
   130  
   131  // Data returns the in-memory buffer behind this MemFile.
   132  func (f *memFile) Data() []byte {
   133  	return f.Bytes()
   134  }
   135  
   136  // Flush is implemented so it prevents buffering inside Writter.
   137  func (f *memFile) Flush() error {
   138  	return nil
   139  }
   140  
   141  func make4bSuffixTestSST(
   142  	t testing.TB, writerOpts WriterOptions, suffix []byte, keys int, rangeKeys int,
   143  ) []byte {
   144  	key := make([]byte, 28)
   145  	endKey := make([]byte, 24)
   146  	copy(key[24:], suffix)
   147  
   148  	f := &memFile{}
   149  	w := NewWriter(f, writerOpts)
   150  	for i := 0; i < keys; i++ {
   151  		binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix
   152  		binary.BigEndian.PutUint64(key[8:16], 456)
   153  		binary.BigEndian.PutUint64(key[16:], uint64(i))
   154  		if err := w.Set(key, key); err != nil {
   155  			t.Fatal(err)
   156  		}
   157  	}
   158  	for i := 0; i < rangeKeys; i++ {
   159  		binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix
   160  		binary.BigEndian.PutUint64(key[8:16], 456)
   161  		binary.BigEndian.PutUint64(key[16:], uint64(i))
   162  		binary.BigEndian.PutUint64(endKey[:8], 123) // 16-byte shared prefix
   163  		binary.BigEndian.PutUint64(endKey[8:16], 456)
   164  		binary.BigEndian.PutUint64(endKey[16:], uint64(i+1))
   165  		if err := w.RangeKeySet(key[:24], endKey[:24], suffix, key); err != nil {
   166  			t.Fatal(err)
   167  		}
   168  	}
   169  	if err := w.Close(); err != nil {
   170  		t.Fatal(err)
   171  	}
   172  
   173  	return f.Bytes()
   174  }
   175  
   176  func BenchmarkRewriteSST(b *testing.B) {
   177  	from, to := []byte("_123"), []byte("_456")
   178  	writerOpts := WriterOptions{
   179  		FilterPolicy: bloom.FilterPolicy(10),
   180  		Comparer:     test4bSuffixComparer,
   181  		TableFormat:  TableFormatPebblev2,
   182  	}
   183  
   184  	sizes := []int{100, 10000, 1e6}
   185  	compressions := []Compression{NoCompression, SnappyCompression}
   186  
   187  	files := make([][]*Reader, len(compressions))
   188  
   189  	for comp := range compressions {
   190  		files[comp] = make([]*Reader, len(sizes))
   191  
   192  		for size := range sizes {
   193  			writerOpts.Compression = compressions[comp]
   194  			sst := make4bSuffixTestSST(b, writerOpts, from, sizes[size], 0 /* rangeKeys */)
   195  			r, err := NewMemReader(sst, ReaderOptions{
   196  				Comparer: test4bSuffixComparer,
   197  				Filters:  map[string]base.FilterPolicy{writerOpts.FilterPolicy.Name(): writerOpts.FilterPolicy},
   198  			})
   199  			if err != nil {
   200  				b.Fatal(err)
   201  			}
   202  			files[comp][size] = r
   203  		}
   204  	}
   205  
   206  	b.ResetTimer()
   207  	for comp := range compressions {
   208  		b.Run(compressions[comp].String(), func(b *testing.B) {
   209  			for sz := range sizes {
   210  				r := files[comp][sz]
   211  				b.Run(fmt.Sprintf("keys=%d", sizes[sz]), func(b *testing.B) {
   212  					b.Run("ReaderWriterLoop", func(b *testing.B) {
   213  						stat, _ := r.file.Stat()
   214  						b.SetBytes(stat.Size())
   215  						for i := 0; i < b.N; i++ {
   216  							if _, err := RewriteKeySuffixesViaWriter(r, &discardFile{}, writerOpts, from, to); err != nil {
   217  								b.Fatal(err)
   218  							}
   219  						}
   220  					})
   221  					for _, concurrency := range []int{1, 2, 4, 8, 16} {
   222  						b.Run(fmt.Sprintf("RewriteKeySuffixes,concurrency=%d", concurrency), func(b *testing.B) {
   223  							stat, _ := r.file.Stat()
   224  							b.SetBytes(stat.Size())
   225  							for i := 0; i < b.N; i++ {
   226  								if _, err := rewriteKeySuffixesInBlocks(r, &discardFile{}, writerOpts, []byte("_123"), []byte("_456"), concurrency); err != nil {
   227  									b.Fatal(err)
   228  								}
   229  							}
   230  						})
   231  					}
   232  				})
   233  			}
   234  		})
   235  	}
   236  }