github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/sstable/suffix_rewriter_test.go (about) 1 package sstable 2 3 import ( 4 "bytes" 5 "encoding/binary" 6 "fmt" 7 "strconv" 8 "testing" 9 10 "github.com/stretchr/testify/require" 11 "github.com/zuoyebang/bitalostable/bloom" 12 "github.com/zuoyebang/bitalostable/internal/base" 13 ) 14 15 func TestRewriteSuffixProps(t *testing.T) { 16 from, to := []byte("_212"), []byte("_646") 17 18 wOpts := WriterOptions{ 19 FilterPolicy: bloom.FilterPolicy(10), 20 Comparer: test4bSuffixComparer, 21 TablePropertyCollectors: []func() TablePropertyCollector{ 22 intSuffixTablePropCollectorFn("ts3", 3), intSuffixTablePropCollectorFn("ts2", 2), 23 }, 24 BlockPropertyCollectors: []func() BlockPropertyCollector{ 25 keyCountCollectorFn("count"), 26 intSuffixIntervalCollectorFn("bp3", 3), 27 intSuffixIntervalCollectorFn("bp2", 2), 28 intSuffixIntervalCollectorFn("bp1", 1), 29 }, 30 TableFormat: TableFormatPebblev2, 31 } 32 33 const keyCount = 1e5 34 const rangeKeyCount = 100 35 // Setup our test SST. 36 sst := make4bSuffixTestSST(t, wOpts, []byte(from), keyCount, rangeKeyCount) 37 38 expectedProps := make(map[string]string) 39 expectedProps["ts2.min"] = "46" 40 expectedProps["ts2.max"] = "46" 41 expectedProps["ts3.min"] = "646" 42 expectedProps["ts3.max"] = "646" 43 44 // Also expect to see the aggregated block properties with their updated value 45 // at the correct (new) shortIDs. Seeing the rolled up value here is almost an 46 // end-to-end test since we only fed them each block during rewrite. 47 expectedProps["count"] = string(append([]byte{1}, strconv.Itoa(keyCount+rangeKeyCount)...)) 48 expectedProps["bp2"] = string(interval{46, 47}.encode([]byte{2})) 49 expectedProps["bp3"] = string(interval{646, 647}.encode([]byte{0})) 50 51 // Swap the order of two of the props so they have new shortIDs, and remove 52 // one. 53 rwOpts := wOpts 54 rwOpts.BlockPropertyCollectors = rwOpts.BlockPropertyCollectors[:3] 55 rwOpts.BlockPropertyCollectors[0], rwOpts.BlockPropertyCollectors[1] = rwOpts.BlockPropertyCollectors[1], rwOpts.BlockPropertyCollectors[0] 56 57 // Rewrite the SST using updated options and check the returned props. 58 readerOpts := ReaderOptions{ 59 Comparer: test4bSuffixComparer, 60 Filters: map[string]base.FilterPolicy{wOpts.FilterPolicy.Name(): wOpts.FilterPolicy}, 61 } 62 r, err := NewMemReader(sst, readerOpts) 63 require.NoError(t, err) 64 defer r.Close() 65 66 for _, byBlocks := range []bool{false, true} { 67 t.Run(fmt.Sprintf("byBlocks=%v", byBlocks), func(t *testing.T) { 68 rewrittenSST := &memFile{} 69 if byBlocks { 70 _, err := rewriteKeySuffixesInBlocks(r, rewrittenSST, rwOpts, from, to, 8) 71 require.NoError(t, err) 72 } else { 73 _, err := RewriteKeySuffixesViaWriter(r, rewrittenSST, rwOpts, from, to) 74 require.NoError(t, err) 75 } 76 77 // Check that a reader on the rewritten STT has the expected props. 78 rRewritten, err := NewMemReader(rewrittenSST.Bytes(), readerOpts) 79 require.NoError(t, err) 80 defer rRewritten.Close() 81 require.Equal(t, expectedProps, rRewritten.Properties.UserProperties) 82 83 // Compare the block level props from the data blocks in the layout. 84 layout, err := r.Layout() 85 require.NoError(t, err) 86 newLayout, err := rRewritten.Layout() 87 require.NoError(t, err) 88 89 ival := interval{} 90 for i := range layout.Data { 91 oldProps := make([][]byte, len(wOpts.BlockPropertyCollectors)) 92 oldDecoder := blockPropertiesDecoder{layout.Data[i].Props} 93 for !oldDecoder.done() { 94 id, val, err := oldDecoder.next() 95 require.NoError(t, err) 96 oldProps[id] = val 97 } 98 newProps := make([][]byte, len(rwOpts.BlockPropertyCollectors)) 99 newDecoder := blockPropertiesDecoder{newLayout.Data[i].Props} 100 for !newDecoder.done() { 101 id, val, err := newDecoder.next() 102 require.NoError(t, err) 103 newProps[id] = val 104 } 105 require.Equal(t, oldProps[0], newProps[1]) 106 ival.decode(newProps[0]) 107 require.Equal(t, interval{646, 647}, ival) 108 ival.decode(newProps[2]) 109 require.Equal(t, interval{46, 47}, ival) 110 } 111 }) 112 } 113 } 114 115 // memFile is a file-like struct that buffers all data written to it in memory. 116 // Implements the writeCloseSyncer interface. 117 type memFile struct { 118 bytes.Buffer 119 } 120 121 // Close implements the writeCloseSyncer interface. 122 func (*memFile) Close() error { 123 return nil 124 } 125 126 // Sync implements the writeCloseSyncer interface. 127 func (*memFile) Sync() error { 128 return nil 129 } 130 131 // Data returns the in-memory buffer behind this MemFile. 132 func (f *memFile) Data() []byte { 133 return f.Bytes() 134 } 135 136 // Flush is implemented so it prevents buffering inside Writter. 137 func (f *memFile) Flush() error { 138 return nil 139 } 140 141 func make4bSuffixTestSST( 142 t testing.TB, writerOpts WriterOptions, suffix []byte, keys int, rangeKeys int, 143 ) []byte { 144 key := make([]byte, 28) 145 endKey := make([]byte, 24) 146 copy(key[24:], suffix) 147 148 f := &memFile{} 149 w := NewWriter(f, writerOpts) 150 for i := 0; i < keys; i++ { 151 binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix 152 binary.BigEndian.PutUint64(key[8:16], 456) 153 binary.BigEndian.PutUint64(key[16:], uint64(i)) 154 if err := w.Set(key, key); err != nil { 155 t.Fatal(err) 156 } 157 } 158 for i := 0; i < rangeKeys; i++ { 159 binary.BigEndian.PutUint64(key[:8], 123) // 16-byte shared prefix 160 binary.BigEndian.PutUint64(key[8:16], 456) 161 binary.BigEndian.PutUint64(key[16:], uint64(i)) 162 binary.BigEndian.PutUint64(endKey[:8], 123) // 16-byte shared prefix 163 binary.BigEndian.PutUint64(endKey[8:16], 456) 164 binary.BigEndian.PutUint64(endKey[16:], uint64(i+1)) 165 if err := w.RangeKeySet(key[:24], endKey[:24], suffix, key); err != nil { 166 t.Fatal(err) 167 } 168 } 169 if err := w.Close(); err != nil { 170 t.Fatal(err) 171 } 172 173 return f.Bytes() 174 } 175 176 func BenchmarkRewriteSST(b *testing.B) { 177 from, to := []byte("_123"), []byte("_456") 178 writerOpts := WriterOptions{ 179 FilterPolicy: bloom.FilterPolicy(10), 180 Comparer: test4bSuffixComparer, 181 TableFormat: TableFormatPebblev2, 182 } 183 184 sizes := []int{100, 10000, 1e6} 185 compressions := []Compression{NoCompression, SnappyCompression} 186 187 files := make([][]*Reader, len(compressions)) 188 189 for comp := range compressions { 190 files[comp] = make([]*Reader, len(sizes)) 191 192 for size := range sizes { 193 writerOpts.Compression = compressions[comp] 194 sst := make4bSuffixTestSST(b, writerOpts, from, sizes[size], 0 /* rangeKeys */) 195 r, err := NewMemReader(sst, ReaderOptions{ 196 Comparer: test4bSuffixComparer, 197 Filters: map[string]base.FilterPolicy{writerOpts.FilterPolicy.Name(): writerOpts.FilterPolicy}, 198 }) 199 if err != nil { 200 b.Fatal(err) 201 } 202 files[comp][size] = r 203 } 204 } 205 206 b.ResetTimer() 207 for comp := range compressions { 208 b.Run(compressions[comp].String(), func(b *testing.B) { 209 for sz := range sizes { 210 r := files[comp][sz] 211 b.Run(fmt.Sprintf("keys=%d", sizes[sz]), func(b *testing.B) { 212 b.Run("ReaderWriterLoop", func(b *testing.B) { 213 stat, _ := r.file.Stat() 214 b.SetBytes(stat.Size()) 215 for i := 0; i < b.N; i++ { 216 if _, err := RewriteKeySuffixesViaWriter(r, &discardFile{}, writerOpts, from, to); err != nil { 217 b.Fatal(err) 218 } 219 } 220 }) 221 for _, concurrency := range []int{1, 2, 4, 8, 16} { 222 b.Run(fmt.Sprintf("RewriteKeySuffixes,concurrency=%d", concurrency), func(b *testing.B) { 223 stat, _ := r.file.Stat() 224 b.SetBytes(stat.Size()) 225 for i := 0; i < b.N; i++ { 226 if _, err := rewriteKeySuffixesInBlocks(r, &discardFile{}, writerOpts, []byte("_123"), []byte("_456"), concurrency); err != nil { 227 b.Fatal(err) 228 } 229 } 230 }) 231 } 232 }) 233 } 234 }) 235 } 236 }