github.com/coocood/badger@v1.5.1-0.20200528065104-c02ac3616d04/surf/surf_test.go (about)

     1  package surf
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"math/rand"
     7  	"sort"
     8  	"testing"
     9  	"time"
    10  
    11  	"github.com/pingcap/log"
    12  	"github.com/stretchr/testify/require"
    13  	"go.uber.org/zap"
    14  )
    15  
    16  func TestBuildPrefixKeys(t *testing.T) {
    17  	keys := [][]byte{
    18  		{1},
    19  		{1, 1},
    20  		{1, 1, 1},
    21  		{1, 1, 1, 1},
    22  		{2},
    23  		{2, 2},
    24  		{2, 2, 2},
    25  	}
    26  	vals := genSeqVals(len(keys))
    27  	checker := newFullSuRFChecker(keys, vals)
    28  	buildAndCheckSuRF(t, keys, vals, checker)
    29  }
    30  
    31  func TestBuildCompressPath(t *testing.T) {
    32  	keys := [][]byte{
    33  		{1, 1, 1},
    34  		{1, 1, 1, 2, 2},
    35  		{1, 1, 1, 2, 2, 2},
    36  		{1, 1, 1, 2, 2, 3},
    37  		{2, 1, 3},
    38  		{2, 2, 3},
    39  		{2, 3, 1, 1, 1, 1, 1, 1, 1},
    40  		{2, 3, 1, 1, 1, 2, 2, 2, 2},
    41  	}
    42  	vals := genSeqVals(len(keys))
    43  	checker := newFullSuRFChecker(keys, vals)
    44  	buildAndCheckSuRF(t, keys, vals, checker)
    45  }
    46  
    47  func TestBuildSuffixKeys(t *testing.T) {
    48  	keys := [][]byte{
    49  		bytes.Repeat([]byte{1}, 30),
    50  		bytes.Repeat([]byte{2}, 30),
    51  		bytes.Repeat([]byte{3}, 30),
    52  		bytes.Repeat([]byte{4}, 30),
    53  	}
    54  	vals := genSeqVals(len(keys))
    55  	checker := newFullSuRFChecker(keys, vals)
    56  	buildAndCheckSuRF(t, keys, vals, checker)
    57  }
    58  
    59  func TestRandomKeysSparse(t *testing.T) {
    60  	keys := genRandomKeys(2000000, 60, 0)
    61  	vals := genSeqVals(len(keys))
    62  	checker := newFullSuRFChecker(keys, vals)
    63  	buildAndCheckSuRF(t, keys, vals, checker)
    64  }
    65  
    66  func TestRandomKeysPrefixGrowth(t *testing.T) {
    67  	keys := genRandomKeys(100, 10, 200)
    68  	vals := genSeqVals(len(keys))
    69  	checker := newFullSuRFChecker(keys, vals)
    70  	buildAndCheckSuRF(t, keys, vals, checker)
    71  }
    72  
    73  func TestSeekKeys(t *testing.T) {
    74  	keys := genRandomKeys(50, 10, 300)
    75  	insert, vals, seek := splitKeys(keys)
    76  	checker := func(t *testing.T, surf *SuRF) {
    77  		it := surf.NewIterator()
    78  		for i, k := range seek {
    79  			it.Seek(k)
    80  			require.True(t, it.Valid())
    81  			require.True(t, endian.Uint32(it.Value()) <= endian.Uint32(vals[i]))
    82  		}
    83  	}
    84  
    85  	buildAndCheckSuRF(t, insert, vals, checker)
    86  }
    87  
    88  func TestMarshal(t *testing.T) {
    89  	keys := genRandomKeys(30, 20, 300)
    90  	vals := make([][]byte, len(keys))
    91  	for i := range keys {
    92  		vals[i] = make([]byte, 4)
    93  		endian.PutUint32(vals[i], uint32(i))
    94  	}
    95  	b := NewBuilder(4, 13, 13)
    96  	s1 := b.Build(keys, vals, 60)
    97  	var s2 SuRF
    98  	buf := s1.Marshal()
    99  	s2.Unmarshal(buf)
   100  	s1.checkEquals(t, &s2)
   101  	newFullSuRFChecker(keys, vals)(t, &s2)
   102  }
   103  
   104  func splitKeys(keys [][]byte) (a, aIdx, b [][]byte) {
   105  	a = keys[:0]
   106  	b = make([][]byte, 0, len(keys)/2)
   107  	aIdx = make([][]byte, 0, len(keys)/2)
   108  	for i := 0; i < len(keys) & ^1; i += 2 {
   109  		b = append(b, keys[i])
   110  		a = append(a, keys[i+1])
   111  		val := make([]byte, 4)
   112  		endian.PutUint32(val, uint32(i+1))
   113  		aIdx = append(aIdx, val)
   114  	}
   115  	return
   116  }
   117  
   118  // max key length is `initLen * (round + 1)`
   119  // max result size is (initSize + initSize * (round + 1)) * (round + 1) / 2
   120  // you can use small round (0 is allowed) to generate a sparse key set,
   121  // or use a large round to generate a key set which has many common prefixes.
   122  func genRandomKeys(initSize, initLen, round int) [][]byte {
   123  	start := time.Now()
   124  	keys := make([][]byte, initSize)
   125  	rand := rand.New(rand.NewSource(start.Unix()))
   126  	for i := range keys {
   127  		keys[i] = make([]byte, rand.Intn(initLen)+1)
   128  		rand.Read(keys[i])
   129  	}
   130  
   131  	for r := 1; r <= round; r++ {
   132  		for i := 0; i < initSize*r; i++ {
   133  			k := make([]byte, len(keys[i])+rand.Intn(initLen)+1)
   134  			copy(k, keys[i])
   135  			rand.Read(k[len(keys[i]):])
   136  			keys = append(keys, k)
   137  		}
   138  	}
   139  
   140  	sort.Slice(keys, func(i, j int) bool {
   141  		return bytes.Compare(keys[i], keys[j]) < 0
   142  	})
   143  
   144  	var prev []byte
   145  	result := keys[:0]
   146  	for _, k := range keys {
   147  		if bytes.Equal(prev, k) {
   148  			continue
   149  		}
   150  		prev = k
   151  		result = append(result, k)
   152  	}
   153  	for i := len(result); i < len(keys); i++ {
   154  		keys[i] = nil
   155  	}
   156  	log.Info("keys generated", zap.Int("count", len(result)), zap.Duration("time", time.Since(start)), zap.Int64("seed", start.Unix()))
   157  
   158  	return result
   159  }
   160  
   161  func genSeqVals(n int) [][]byte {
   162  	vals := make([][]byte, n)
   163  	for i := 0; i < n; i++ {
   164  		vals[i] = make([]byte, 4)
   165  		endian.PutUint32(vals[i], uint32(i))
   166  	}
   167  	return vals
   168  }
   169  
   170  func buildAndCheckSuRF(t *testing.T, keys, vals [][]byte, checker func(t *testing.T, surf *SuRF)) {
   171  	suffixLens := [][]uint32{
   172  		{0, 0},
   173  		{4, 0},
   174  		{13, 0},
   175  		{32, 0},
   176  		{0, 4},
   177  		{0, 13},
   178  		{0, 32},
   179  		{3, 3},
   180  		{8, 8},
   181  	}
   182  
   183  	for _, sl := range suffixLens {
   184  		b := NewBuilder(4, sl[0], sl[1])
   185  
   186  		b.totalCount = len(keys)
   187  		b.buildNodes(keys, vals, 0, 0, 0)
   188  		for i := 0; i < b.treeHeight(); i++ {
   189  			b.sparseStartLevel = uint32(i)
   190  			b.ldLabels = b.ldLabels[:0]
   191  			b.ldHasChild = b.ldHasChild[:0]
   192  			b.ldIsPrefix = b.ldIsPrefix[:0]
   193  			b.buildDense()
   194  
   195  			surf := new(SuRF)
   196  			surf.ld.Init(b)
   197  			surf.ls.Init(b)
   198  
   199  			t.Run(fmt.Sprintf("cutoff=%d,hashLen=%d,realLen=%d", i, sl[0], sl[1]), func(t *testing.T) {
   200  				t.Parallel()
   201  				checker(t, surf)
   202  			})
   203  		}
   204  	}
   205  }
   206  
   207  func newFullSuRFChecker(keys, vals [][]byte) func(t *testing.T, surf *SuRF) {
   208  	return func(t *testing.T, surf *SuRF) {
   209  		for i, k := range keys {
   210  			val, ok := surf.Get(k)
   211  			require.True(t, ok)
   212  			require.EqualValues(t, vals[i], val)
   213  		}
   214  
   215  		var i int
   216  		it := surf.NewIterator()
   217  		for it.SeekToFirst(); it.Valid(); it.Next() {
   218  			require.Truef(t, bytes.HasPrefix(keys[i], it.Key()), "%v %v %d", keys[i], it.Key(), i)
   219  			require.EqualValues(t, vals[i], it.Value())
   220  			i++
   221  		}
   222  		require.Equal(t, len(keys), i)
   223  
   224  		i = len(keys) - 1
   225  		for it.SeekToLast(); it.Valid(); it.Prev() {
   226  			require.True(t, bytes.HasPrefix(keys[i], it.Key()))
   227  			require.EqualValues(t, vals[i], it.Value())
   228  			i--
   229  		}
   230  		require.Equal(t, -1, i)
   231  
   232  		for i, k := range keys {
   233  			it.Seek(k)
   234  			if i != 0 {
   235  				cmp := it.compare(keys[i-1])
   236  				require.True(t, cmp > 0)
   237  			}
   238  			if i != len(keys)-1 {
   239  				cmp := it.compare(keys[i+1])
   240  				require.True(t, cmp < 0 || cmp == couldBePositive)
   241  			}
   242  			cmp := it.compare(k)
   243  			require.True(t, cmp >= 0)
   244  			require.EqualValues(t, vals[i], it.Value())
   245  		}
   246  	}
   247  }
   248  
   249  func (v *rankVector) checkEquals(t *testing.T, o *rankVector) {
   250  	require.Equal(t, v.numBits, o.numBits)
   251  	require.Equal(t, v.lutSize(), o.lutSize())
   252  	if v.numBits != 0 {
   253  		require.Equal(t, v.bits, o.bits)
   254  	}
   255  	require.Equal(t, v.rankLut, o.rankLut)
   256  }
   257  
   258  func (v *selectVector) checkEquals(t *testing.T, o *selectVector) {
   259  	require.Equal(t, v.numBits, o.numBits)
   260  	require.Equal(t, v.numOnes, o.numOnes)
   261  	require.Equal(t, v.lutSize(), o.lutSize())
   262  	require.Equal(t, v.bits, o.bits)
   263  	require.Equal(t, v.selectLut, o.selectLut)
   264  }
   265  
   266  func (v *suffixVector) checkEquals(t *testing.T, o *suffixVector) {
   267  	require.Equal(t, v.numBits, o.numBits)
   268  	if v.numBits != 0 {
   269  		require.Equal(t, v.bits, o.bits)
   270  	}
   271  	require.Equal(t, v.hashSuffixLen, o.hashSuffixLen)
   272  	require.Equal(t, v.realSuffixLen, o.realSuffixLen)
   273  }
   274  
   275  func (v *valueVector) checkEquals(t *testing.T, o *valueVector) {
   276  	require.Equal(t, v.bytes, o.bytes)
   277  	require.Equal(t, v.valueSize, o.valueSize)
   278  }
   279  
   280  func (v *labelVector) checkEquals(t *testing.T, o *labelVector) {
   281  	require.Equal(t, v.labels, o.labels)
   282  }
   283  
   284  func (ld *loudsDense) checkEquals(t *testing.T, o *loudsDense) {
   285  	require.Equal(t, ld.height, o.height)
   286  	ld.labelVec.checkEquals(t, &o.labelVec.rankVector)
   287  	ld.hasChildVec.checkEquals(t, &o.hasChildVec.rankVector)
   288  	ld.isPrefixVec.checkEquals(t, &o.isPrefixVec.rankVector)
   289  	ld.suffixes.checkEquals(t, &o.suffixes)
   290  	ld.values.checkEquals(t, &o.values)
   291  }
   292  
   293  func (ls *loudsSparse) checkEquals(t *testing.T, o *loudsSparse) {
   294  	require.Equal(t, ls.height, o.height)
   295  	require.Equal(t, ls.startLevel, o.startLevel)
   296  	require.Equal(t, ls.denseChildCount, o.denseChildCount)
   297  	require.Equal(t, ls.denseNodeCount, o.denseNodeCount)
   298  	ls.labelVec.checkEquals(t, &o.labelVec)
   299  	ls.hasChildVec.checkEquals(t, &o.hasChildVec.rankVector)
   300  	ls.loudsVec.checkEquals(t, &o.loudsVec)
   301  	ls.suffixes.checkEquals(t, &o.suffixes)
   302  	ls.values.checkEquals(t, &o.values)
   303  }
   304  
   305  func (s *SuRF) checkEquals(t *testing.T, o *SuRF) {
   306  	s.ld.checkEquals(t, &o.ld)
   307  	s.ls.checkEquals(t, &o.ls)
   308  }