github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/surf/surf_bench_test.go (about) 1 package surf 2 3 import ( 4 "bufio" 5 "compress/gzip" 6 "encoding/binary" 7 "fmt" 8 "os" 9 "path/filepath" 10 "strconv" 11 "strings" 12 "testing" 13 ) 14 15 func BenchmarkGet(b *testing.B) { 16 forEachDataset(func(name string, data [][]byte) { 17 b.Run(name, func(b *testing.B) { 18 b.StopTimer() 19 insert, vals, others := splitKeys(data) 20 b.StartTimer() 21 buildAndBenchSuRF(b, insert, vals, func(b *testing.B, surf *SuRF) { 22 b.Run("exist", func(b *testing.B) { 23 for n := 0; n < b.N; n++ { 24 i := n % len(insert) 25 surf.Get(insert[i]) 26 } 27 }) 28 29 var total, fp int 30 b.Run("nonexist", func(b *testing.B) { 31 var localFp int 32 for n := 0; n < b.N; n++ { 33 i := n % len(others) 34 if _, ok := surf.Get(others[i]); ok { 35 localFp++ 36 } 37 } 38 fp += localFp 39 total += b.N 40 }) 41 42 b.Logf("\nSuRF size is %d bytes\nnumber of keys %d\nfalse positive rate is %.2f", surf.MarshalSize(), len(insert), float64(fp)/float64(total)*100) 43 }) 44 }) 45 }) 46 } 47 48 func BenchmarkSeek(b *testing.B) { 49 forEachDataset(func(name string, data [][]byte) { 50 b.Run(name, func(b *testing.B) { 51 b.StopTimer() 52 insert, vals, others := splitKeys(data) 53 b.StartTimer() 54 buildAndBenchSuRF(b, insert, vals, func(b *testing.B, surf *SuRF) { 55 b.Run("exist", func(b *testing.B) { 56 it := surf.NewIterator() 57 for n := 0; n < b.N; n++ { 58 i := n % len(insert) 59 it.Seek(insert[i]) 60 } 61 }) 62 63 var total, fp int 64 b.Run("nonexist", func(b *testing.B) { 65 var localFp int 66 it := surf.NewIterator() 67 for n := 0; n < b.N; n++ { 68 i := n % len(others) 69 it.Seek(others[i]) 70 if endian.Uint32(it.Value()) < endian.Uint32(vals[i]) { 71 localFp++ 72 } 73 } 74 fp += localFp 75 total += b.N 76 }) 77 78 b.Logf("\nSuRF size is %d bytes\nnumber of keys %d\nfalse positive rate is %.2f", surf.MarshalSize(), len(insert), float64(fp)/float64(total)*100) 79 }) 80 }) 81 }) 82 } 83 84 func buildAndBenchSuRF(b *testing.B, keys, vals [][]byte, run func(t *testing.B, surf *SuRF)) { 85 suffixLens := [][]uint32{ 86 {0, 0}, 87 {16, 0}, 88 {0, 16}, 89 {8, 8}, 90 } 91 92 for _, sl := range suffixLens { 93 builder := NewBuilder(4, sl[0], sl[1]) 94 95 builder.totalCount = len(keys) 96 builder.buildNodes(keys, vals, 0, 0, 0) 97 for i := 0; i < builder.treeHeight(); i++ { 98 builder.sparseStartLevel = uint32(i) 99 builder.ldLabels = builder.ldLabels[:0] 100 builder.ldHasChild = builder.ldHasChild[:0] 101 builder.ldIsPrefix = builder.ldIsPrefix[:0] 102 builder.buildDense() 103 104 surf := new(SuRF) 105 surf.ld.Init(builder) 106 surf.ls.Init(builder) 107 108 b.ResetTimer() 109 b.Run(fmt.Sprintf("cutoff=%d,hashLen=%d,realLen=%d", i, sl[0], sl[1]), func(b *testing.B) { 110 run(b, surf) 111 }) 112 } 113 } 114 } 115 116 func forEachDataset(fn func(string, [][]byte)) { 117 err := filepath.Walk("testdata", func(path string, info os.FileInfo, err error) error { 118 if err != nil { 119 return err 120 } 121 122 if info.IsDir() || filepath.Ext(info.Name()) != ".gz" { 123 return nil 124 } 125 f, err := os.Open(path) 126 if err != nil { 127 return err 128 } 129 fn(loadData(f)) 130 return nil 131 }) 132 if err != nil { 133 panic(err) 134 } 135 } 136 137 func loadData(f *os.File) (string, [][]byte) { 138 decompressor, err := gzip.NewReader(f) 139 if err != nil { 140 panic(err) 141 } 142 sc := bufio.NewScanner(decompressor) 143 sc.Split(dataSplitFunc) 144 145 nameSize := strings.Split(strings.TrimSuffix(f.Name(), filepath.Ext(f.Name())), "_") 146 size, _ := strconv.Atoi(nameSize[1]) 147 keys := make([][]byte, 0, size) 148 for sc.Scan() { 149 keys = append(keys, append([]byte{}, sc.Bytes()...)) 150 } 151 152 return nameSize[0], keys 153 } 154 155 func dataSplitFunc(data []byte, atEOF bool) (advance int, token []byte, err error) { 156 if len(data) < 2 { 157 return 0, nil, nil 158 } 159 l := int(binary.LittleEndian.Uint16(data[:2])) 160 if len(data[2:]) < l { 161 return 0, nil, nil 162 } 163 164 return 2 + l, data[2 : 2+l], nil 165 }