github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/suffix_rewriter_test.go (about) 1 package sstable 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "fmt" 7 "math/rand" 8 "strconv" 9 "testing" 10 11 "github.com/cockroachdb/pebble/bloom" 12 "github.com/cockroachdb/pebble/internal/base" 13 "github.com/cockroachdb/pebble/objstorage" 14 "github.com/stretchr/testify/require" 15 ) 16 17 func TestRewriteSuffixProps(t *testing.T) { 18 from, to := []byte("_212"), []byte("_646") 19 for format := TableFormatPebblev2; format <= TableFormatMax; format++ { 20 t.Run(format.String(), func(t *testing.T) { 21 wOpts := WriterOptions{ 22 FilterPolicy: bloom.FilterPolicy(10), 23 Comparer: test4bSuffixComparer, 24 TablePropertyCollectors: []func() TablePropertyCollector{ 25 intSuffixTablePropCollectorFn("ts3", 3), intSuffixTablePropCollectorFn("ts2", 2), 26 }, 27 BlockPropertyCollectors: []func() BlockPropertyCollector{ 28 keyCountCollectorFn("count"), 29 intSuffixIntervalCollectorFn("bp3", 3), 30 intSuffixIntervalCollectorFn("bp2", 2), 31 intSuffixIntervalCollectorFn("bp1", 1), 32 }, 33 TableFormat: format, 34 } 35 if format >= TableFormatPebblev4 { 36 wOpts.IsStrictObsolete = true 37 } 38 39 const keyCount = 1e5 40 const rangeKeyCount = 100 41 // Setup our test SST. 42 sst := make4bSuffixTestSST(t, wOpts, []byte(from), keyCount, rangeKeyCount) 43 44 expectedProps := make(map[string]string) 45 expectedProps["ts2.min"] = "46" 46 expectedProps["ts2.max"] = "46" 47 expectedProps["ts3.min"] = "646" 48 expectedProps["ts3.max"] = "646" 49 50 // Also expect to see the aggregated block properties with their updated value 51 // at the correct (new) shortIDs. Seeing the rolled up value here is almost an 52 // end-to-end test since we only fed them each block during rewrite. 53 expectedProps["count"] = string(append([]byte{1}, strconv.Itoa(keyCount+rangeKeyCount)...)) 54 expectedProps["bp2"] = string(interval{46, 47}.encode([]byte{2})) 55 expectedProps["bp3"] = string(interval{646, 647}.encode([]byte{0})) 56 57 // Swap the order of two of the props so they have new shortIDs, and remove 58 // one. rwOpts inherits the IsStrictObsolete value from wOpts. 59 rwOpts := wOpts 60 if rand.Intn(2) != 0 { 61 rwOpts.TableFormat = TableFormatPebblev2 62 rwOpts.IsStrictObsolete = false 63 t.Log("table format set to TableFormatPebblev2") 64 } 65 fmt.Printf("from format %s, to format %s\n", format.String(), rwOpts.TableFormat.String()) 66 rwOpts.BlockPropertyCollectors = rwOpts.BlockPropertyCollectors[:3] 67 rwOpts.BlockPropertyCollectors[0], rwOpts.BlockPropertyCollectors[1] = rwOpts.BlockPropertyCollectors[1], rwOpts.BlockPropertyCollectors[0] 68 69 // Rewrite the SST using updated options and check the returned props. 70 readerOpts := ReaderOptions{ 71 Comparer: test4bSuffixComparer, 72 Filters: map[string]base.FilterPolicy{wOpts.FilterPolicy.Name(): wOpts.FilterPolicy}, 73 } 74 r, err := NewMemReader(sst, readerOpts) 75 require.NoError(t, err) 76 defer r.Close() 77 78 var sstBytes [2][]byte 79 adjustPropsForEffectiveFormat := func(effectiveFormat TableFormat) { 80 if effectiveFormat == TableFormatPebblev4 { 81 expectedProps["obsolete-key"] = string([]byte{3}) 82 } else { 83 delete(expectedProps, "obsolete-key") 84 } 85 } 86 for i, byBlocks := range []bool{false, true} { 87 t.Run(fmt.Sprintf("byBlocks=%v", byBlocks), func(t *testing.T) { 88 rewrittenSST := &memFile{} 89 if byBlocks { 90 _, rewriteFormat, err := rewriteKeySuffixesInBlocks( 91 r, rewrittenSST, rwOpts, from, to, 8) 92 // rewriteFormat is equal to the original format, since 93 // rwOpts.TableFormat is ignored. 94 require.Equal(t, wOpts.TableFormat, rewriteFormat) 95 require.NoError(t, err) 96 adjustPropsForEffectiveFormat(rewriteFormat) 97 } else { 98 _, err := RewriteKeySuffixesViaWriter(r, rewrittenSST, rwOpts, from, to) 99 require.NoError(t, err) 100 adjustPropsForEffectiveFormat(rwOpts.TableFormat) 101 } 102 103 sstBytes[i] = rewrittenSST.Data() 104 // Check that a reader on the rewritten STT has the expected props. 105 rRewritten, err := NewMemReader(rewrittenSST.Data(), readerOpts) 106 require.NoError(t, err) 107 defer rRewritten.Close() 108 require.Equal(t, expectedProps, rRewritten.Properties.UserProperties) 109 require.False(t, rRewritten.Properties.IsStrictObsolete) 110 111 // Compare the block level props from the data blocks in the layout, 112 // only if we did not do a rewrite from one format to another. If the 113 // format changes, the block boundaries change slightly. 114 if !byBlocks && wOpts.TableFormat != rwOpts.TableFormat { 115 return 116 } 117 layout, err := r.Layout() 118 require.NoError(t, err) 119 newLayout, err := rRewritten.Layout() 120 require.NoError(t, err) 121 122 ival := interval{} 123 for i := range layout.Data { 124 oldProps := make([][]byte, len(wOpts.BlockPropertyCollectors)) 125 oldDecoder := blockPropertiesDecoder{layout.Data[i].Props} 126 for !oldDecoder.done() { 127 id, val, err := oldDecoder.next() 128 require.NoError(t, err) 129 oldProps[id] = val 130 } 131 newProps := make([][]byte, len(rwOpts.BlockPropertyCollectors)) 132 newDecoder := blockPropertiesDecoder{newLayout.Data[i].Props} 133 for !newDecoder.done() { 134 id, val, err := newDecoder.next() 135 require.NoError(t, err) 136 if int(id) < len(newProps) { 137 newProps[id] = val 138 } 139 } 140 require.Equal(t, oldProps[0], newProps[1]) 141 ival.decode(newProps[0]) 142 require.Equal(t, interval{646, 647}, ival) 143 ival.decode(newProps[2]) 144 require.Equal(t, interval{46, 47}, ival) 145 } 146 }) 147 } 148 if wOpts.TableFormat == rwOpts.TableFormat { 149 // Both methods of rewriting should produce the same result. 150 require.Equal(t, sstBytes[0], sstBytes[1]) 151 } 152 }) 153 } 154 } 155 156 // memFile is a file-like struct that buffers all data written to it in memory. 157 // Implements the objstorage.Writable interface. 158 type memFile struct { 159 buf bytes.Buffer 160 } 161 162 var _ objstorage.Writable = (*memFile)(nil) 163 164 // Finish is part of the objstorage.Writable interface. 165 func (*memFile) Finish() error { 166 return nil 167 } 168 169 // Abort is part of the objstorage.Writable interface. 170 func (*memFile) Abort() {} 171 172 // Write is part of the objstorage.Writable interface. 173 func (f *memFile) Write(p []byte) error { 174 _, err := f.buf.Write(p) 175 return err 176 } 177 178 // Data returns the in-memory buffer behind this MemFile. 179 func (f *memFile) Data() []byte { 180 return f.buf.Bytes() 181 } 182 183 func make4bSuffixTestSST( 184 t testing.TB, writerOpts WriterOptions, suffix []byte, keys int, rangeKeys int, 185 ) []byte { 186 key := make([]byte, 28) 187 endKey := make([]byte, 24) 188 copy(key[24:], suffix) 189 190 f := &memFile{} 191 w := NewWriter(f, writerOpts) 192 for i := 0; i < keys; i++ { 193 binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix 194 binary.BigEndian.PutUint64(key[8:16], 456) 195 binary.BigEndian.PutUint64(key[16:], uint64(i)) 196 err := w.AddWithForceObsolete( 197 base.MakeInternalKey(key, 0, InternalKeyKindSet), key, false) 198 if err != nil { 199 t.Fatal(err) 200 } 201 } 202 for i := 0; i < rangeKeys; i++ { 203 binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix 204 binary.BigEndian.PutUint64(key[8:16], 456) 205 binary.BigEndian.PutUint64(key[16:], uint64(i)) 206 binary.BigEndian.PutUint64(endKey[:8], 123) // 16-byte shared prefix 207 binary.BigEndian.PutUint64(endKey[8:16], 456) 208 binary.BigEndian.PutUint64(endKey[16:], uint64(i+1)) 209 if err := w.RangeKeySet(key[:24], endKey[:24], suffix, key); err != nil { 210 t.Fatal(err) 211 } 212 } 213 if err := w.Close(); err != nil { 214 t.Fatal(err) 215 } 216 217 return f.buf.Bytes() 218 } 219 220 func BenchmarkRewriteSST(b *testing.B) { 221 from, to := []byte("_123"), []byte("_456") 222 writerOpts := WriterOptions{ 223 FilterPolicy: bloom.FilterPolicy(10), 224 Comparer: test4bSuffixComparer, 225 TableFormat: TableFormatPebblev2, 226 } 227 228 sizes := []int{100, 10000, 1e6} 229 compressions := []Compression{NoCompression, SnappyCompression} 230 231 files := make([][]*Reader, len(compressions)) 232 233 for comp := range compressions { 234 files[comp] = make([]*Reader, len(sizes)) 235 236 for size := range sizes { 237 writerOpts.Compression = compressions[comp] 238 sst := make4bSuffixTestSST(b, writerOpts, from, sizes[size], 0 /* rangeKeys */) 239 r, err := NewMemReader(sst, ReaderOptions{ 240 Comparer: test4bSuffixComparer, 241 Filters: map[string]base.FilterPolicy{writerOpts.FilterPolicy.Name(): writerOpts.FilterPolicy}, 242 }) 243 if err != nil { 244 b.Fatal(err) 245 } 246 files[comp][size] = r 247 } 248 } 249 250 b.ResetTimer() 251 for comp := range compressions { 252 b.Run(compressions[comp].String(), func(b *testing.B) { 253 for sz := range sizes { 254 r := files[comp][sz] 255 b.Run(fmt.Sprintf("keys=%d", sizes[sz]), func(b *testing.B) { 256 b.Run("ReaderWriterLoop", func(b *testing.B) { 257 b.SetBytes(r.readable.Size()) 258 for i := 0; i < b.N; i++ { 259 if _, err := RewriteKeySuffixesViaWriter(r, &discardFile{}, writerOpts, from, to); err != nil { 260 b.Fatal(err) 261 } 262 } 263 }) 264 for _, concurrency := range []int{1, 2, 4, 8, 16} { 265 b.Run(fmt.Sprintf("RewriteKeySuffixes,concurrency=%d", concurrency), func(b *testing.B) { 266 b.SetBytes(r.readable.Size()) 267 for i := 0; i < b.N; i++ { 268 if _, _, err := rewriteKeySuffixesInBlocks(r, &discardFile{}, writerOpts, []byte("_123"), []byte("_456"), concurrency); err != nil { 269 b.Fatal(err) 270 } 271 } 272 }) 273 } 274 }) 275 } 276 }) 277 } 278 }