github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/surf/surf_bench_test.go (about)

     1  package surf
     2  
     3  import (
     4  	"bufio"
     5  	"compress/gzip"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"os"
     9  	"path/filepath"
    10  	"strconv"
    11  	"strings"
    12  	"testing"
    13  )
    14  
    15  func BenchmarkGet(b *testing.B) {
    16  	forEachDataset(func(name string, data [][]byte) {
    17  		b.Run(name, func(b *testing.B) {
    18  			b.StopTimer()
    19  			insert, vals, others := splitKeys(data)
    20  			b.StartTimer()
    21  			buildAndBenchSuRF(b, insert, vals, func(b *testing.B, surf *SuRF) {
    22  				b.Run("exist", func(b *testing.B) {
    23  					for n := 0; n < b.N; n++ {
    24  						i := n % len(insert)
    25  						surf.Get(insert[i])
    26  					}
    27  				})
    28  
    29  				var total, fp int
    30  				b.Run("nonexist", func(b *testing.B) {
    31  					var localFp int
    32  					for n := 0; n < b.N; n++ {
    33  						i := n % len(others)
    34  						if _, ok := surf.Get(others[i]); ok {
    35  							localFp++
    36  						}
    37  					}
    38  					fp += localFp
    39  					total += b.N
    40  				})
    41  
    42  				b.Logf("\nSuRF size is %d bytes\nnumber of keys %d\nfalse positive rate is %.2f", surf.MarshalSize(), len(insert), float64(fp)/float64(total)*100)
    43  			})
    44  		})
    45  	})
    46  }
    47  
    48  func BenchmarkSeek(b *testing.B) {
    49  	forEachDataset(func(name string, data [][]byte) {
    50  		b.Run(name, func(b *testing.B) {
    51  			b.StopTimer()
    52  			insert, vals, others := splitKeys(data)
    53  			b.StartTimer()
    54  			buildAndBenchSuRF(b, insert, vals, func(b *testing.B, surf *SuRF) {
    55  				b.Run("exist", func(b *testing.B) {
    56  					it := surf.NewIterator()
    57  					for n := 0; n < b.N; n++ {
    58  						i := n % len(insert)
    59  						it.Seek(insert[i])
    60  					}
    61  				})
    62  
    63  				var total, fp int
    64  				b.Run("nonexist", func(b *testing.B) {
    65  					var localFp int
    66  					it := surf.NewIterator()
    67  					for n := 0; n < b.N; n++ {
    68  						i := n % len(others)
    69  						it.Seek(others[i])
    70  						if endian.Uint32(it.Value()) < endian.Uint32(vals[i]) {
    71  							localFp++
    72  						}
    73  					}
    74  					fp += localFp
    75  					total += b.N
    76  				})
    77  
    78  				b.Logf("\nSuRF size is %d bytes\nnumber of keys %d\nfalse positive rate is %.2f", surf.MarshalSize(), len(insert), float64(fp)/float64(total)*100)
    79  			})
    80  		})
    81  	})
    82  }
    83  
    84  func buildAndBenchSuRF(b *testing.B, keys, vals [][]byte, run func(t *testing.B, surf *SuRF)) {
    85  	suffixLens := [][]uint32{
    86  		{0, 0},
    87  		{16, 0},
    88  		{0, 16},
    89  		{8, 8},
    90  	}
    91  
    92  	for _, sl := range suffixLens {
    93  		builder := NewBuilder(4, sl[0], sl[1])
    94  
    95  		builder.totalCount = len(keys)
    96  		builder.buildNodes(keys, vals, 0, 0, 0)
    97  		for i := 0; i < builder.treeHeight(); i++ {
    98  			builder.sparseStartLevel = uint32(i)
    99  			builder.ldLabels = builder.ldLabels[:0]
   100  			builder.ldHasChild = builder.ldHasChild[:0]
   101  			builder.ldIsPrefix = builder.ldIsPrefix[:0]
   102  			builder.buildDense()
   103  
   104  			surf := new(SuRF)
   105  			surf.ld.Init(builder)
   106  			surf.ls.Init(builder)
   107  
   108  			b.ResetTimer()
   109  			b.Run(fmt.Sprintf("cutoff=%d,hashLen=%d,realLen=%d", i, sl[0], sl[1]), func(b *testing.B) {
   110  				run(b, surf)
   111  			})
   112  		}
   113  	}
   114  }
   115  
   116  func forEachDataset(fn func(string, [][]byte)) {
   117  	err := filepath.Walk("testdata", func(path string, info os.FileInfo, err error) error {
   118  		if err != nil {
   119  			return err
   120  		}
   121  
   122  		if info.IsDir() || filepath.Ext(info.Name()) != ".gz" {
   123  			return nil
   124  		}
   125  		f, err := os.Open(path)
   126  		if err != nil {
   127  			return err
   128  		}
   129  		fn(loadData(f))
   130  		return nil
   131  	})
   132  	if err != nil {
   133  		panic(err)
   134  	}
   135  }
   136  
   137  func loadData(f *os.File) (string, [][]byte) {
   138  	decompressor, err := gzip.NewReader(f)
   139  	if err != nil {
   140  		panic(err)
   141  	}
   142  	sc := bufio.NewScanner(decompressor)
   143  	sc.Split(dataSplitFunc)
   144  
   145  	nameSize := strings.Split(strings.TrimSuffix(f.Name(), filepath.Ext(f.Name())), "_")
   146  	size, _ := strconv.Atoi(nameSize[1])
   147  	keys := make([][]byte, 0, size)
   148  	for sc.Scan() {
   149  		keys = append(keys, append([]byte{}, sc.Bytes()...))
   150  	}
   151  
   152  	return nameSize[0], keys
   153  }
   154  
   155  func dataSplitFunc(data []byte, atEOF bool) (advance int, token []byte, err error) {
   156  	if len(data) < 2 {
   157  		return 0, nil, nil
   158  	}
   159  	l := int(binary.LittleEndian.Uint16(data[:2]))
   160  	if len(data[2:]) < l {
   161  		return 0, nil, nil
   162  	}
   163  
   164  	return 2 + l, data[2 : 2+l], nil
   165  }