github.com/pingcap/badger@v1.5.1-0.20230103063557-828f39b09b6d/surf/surf_test.go (about) 1 package surf 2 3 import ( 4 "bytes" 5 "fmt" 6 "math/rand" 7 "sort" 8 "testing" 9 "time" 10 11 "github.com/pingcap/log" 12 "github.com/stretchr/testify/require" 13 "go.uber.org/zap" 14 ) 15 16 func TestBuildPrefixKeys(t *testing.T) { 17 keys := [][]byte{ 18 {1}, 19 {1, 1}, 20 {1, 1, 1}, 21 {1, 1, 1, 1}, 22 {2}, 23 {2, 2}, 24 {2, 2, 2}, 25 } 26 vals := genSeqVals(len(keys)) 27 checker := newFullSuRFChecker(keys, vals) 28 buildAndCheckSuRF(t, keys, vals, checker) 29 } 30 31 func TestBuildCompressPath(t *testing.T) { 32 keys := [][]byte{ 33 {1, 1, 1}, 34 {1, 1, 1, 2, 2}, 35 {1, 1, 1, 2, 2, 2}, 36 {1, 1, 1, 2, 2, 3}, 37 {2, 1, 3}, 38 {2, 2, 3}, 39 {2, 3, 1, 1, 1, 1, 1, 1, 1}, 40 {2, 3, 1, 1, 1, 2, 2, 2, 2}, 41 } 42 vals := genSeqVals(len(keys)) 43 checker := newFullSuRFChecker(keys, vals) 44 buildAndCheckSuRF(t, keys, vals, checker) 45 } 46 47 func TestBuildSuffixKeys(t *testing.T) { 48 keys := [][]byte{ 49 bytes.Repeat([]byte{1}, 30), 50 bytes.Repeat([]byte{2}, 30), 51 bytes.Repeat([]byte{3}, 30), 52 bytes.Repeat([]byte{4}, 30), 53 } 54 vals := genSeqVals(len(keys)) 55 checker := newFullSuRFChecker(keys, vals) 56 buildAndCheckSuRF(t, keys, vals, checker) 57 } 58 59 func TestRandomKeysSparse(t *testing.T) { 60 keys := genRandomKeys(2000000, 60, 0) 61 vals := genSeqVals(len(keys)) 62 checker := newFullSuRFChecker(keys, vals) 63 buildAndCheckSuRF(t, keys, vals, checker) 64 } 65 66 func TestRandomKeysPrefixGrowth(t *testing.T) { 67 keys := genRandomKeys(100, 10, 200) 68 vals := genSeqVals(len(keys)) 69 checker := newFullSuRFChecker(keys, vals) 70 buildAndCheckSuRF(t, keys, vals, checker) 71 } 72 73 func TestSeekKeys(t *testing.T) { 74 keys := genRandomKeys(50, 10, 300) 75 insert, vals, seek := splitKeys(keys) 76 checker := func(t *testing.T, surf *SuRF) { 77 it := surf.NewIterator() 78 for i, k := range seek { 79 it.Seek(k) 80 require.True(t, it.Valid()) 81 require.True(t, endian.Uint32(it.Value()) <= endian.Uint32(vals[i])) 82 } 83 } 84 85 buildAndCheckSuRF(t, insert, vals, checker) 86 } 87 88 func TestMarshal(t *testing.T) { 89 keys := genRandomKeys(30, 20, 300) 90 vals := make([][]byte, len(keys)) 91 for i := range keys { 92 vals[i] = make([]byte, 4) 93 endian.PutUint32(vals[i], uint32(i)) 94 } 95 b := NewBuilder(4, 13, 13) 96 s1 := b.Build(keys, vals, 60) 97 var s2 SuRF 98 buf := s1.Marshal() 99 s2.Unmarshal(buf) 100 s1.checkEquals(t, &s2) 101 newFullSuRFChecker(keys, vals)(t, &s2) 102 } 103 104 func splitKeys(keys [][]byte) (a, aIdx, b [][]byte) { 105 a = keys[:0] 106 b = make([][]byte, 0, len(keys)/2) 107 aIdx = make([][]byte, 0, len(keys)/2) 108 for i := 0; i < len(keys) & ^1; i += 2 { 109 b = append(b, keys[i]) 110 a = append(a, keys[i+1]) 111 val := make([]byte, 4) 112 endian.PutUint32(val, uint32(i+1)) 113 aIdx = append(aIdx, val) 114 } 115 return 116 } 117 118 // max key length is `initLen * (round + 1)` 119 // max result size is (initSize + initSize * (round + 1)) * (round + 1) / 2 120 // you can use small round (0 is allowed) to generate a sparse key set, 121 // or use a large round to generate a key set which has many common prefixes. 122 func genRandomKeys(initSize, initLen, round int) [][]byte { 123 start := time.Now() 124 keys := make([][]byte, initSize) 125 rand := rand.New(rand.NewSource(start.Unix())) 126 for i := range keys { 127 keys[i] = make([]byte, rand.Intn(initLen)+1) 128 rand.Read(keys[i]) 129 } 130 131 for r := 1; r <= round; r++ { 132 for i := 0; i < initSize*r; i++ { 133 k := make([]byte, len(keys[i])+rand.Intn(initLen)+1) 134 copy(k, keys[i]) 135 rand.Read(k[len(keys[i]):]) 136 keys = append(keys, k) 137 } 138 } 139 140 sort.Slice(keys, func(i, j int) bool { 141 return bytes.Compare(keys[i], keys[j]) < 0 142 }) 143 144 var prev []byte 145 result := keys[:0] 146 for _, k := range keys { 147 if bytes.Equal(prev, k) { 148 continue 149 } 150 prev = k 151 result = append(result, k) 152 } 153 for i := len(result); i < len(keys); i++ { 154 keys[i] = nil 155 } 156 log.Info("keys generated", zap.Int("count", len(result)), zap.Duration("time", time.Since(start)), zap.Int64("seed", start.Unix())) 157 158 return result 159 } 160 161 func genSeqVals(n int) [][]byte { 162 vals := make([][]byte, n) 163 for i := 0; i < n; i++ { 164 vals[i] = make([]byte, 4) 165 endian.PutUint32(vals[i], uint32(i)) 166 } 167 return vals 168 } 169 170 func buildAndCheckSuRF(t *testing.T, keys, vals [][]byte, checker func(t *testing.T, surf *SuRF)) { 171 suffixLens := [][]uint32{ 172 {0, 0}, 173 {4, 0}, 174 {13, 0}, 175 {32, 0}, 176 {0, 4}, 177 {0, 13}, 178 {0, 32}, 179 {3, 3}, 180 {8, 8}, 181 } 182 183 for _, sl := range suffixLens { 184 b := NewBuilder(4, sl[0], sl[1]) 185 186 b.totalCount = len(keys) 187 b.buildNodes(keys, vals, 0, 0, 0) 188 for i := 0; i < b.treeHeight(); i++ { 189 b.sparseStartLevel = uint32(i) 190 b.ldLabels = b.ldLabels[:0] 191 b.ldHasChild = b.ldHasChild[:0] 192 b.ldIsPrefix = b.ldIsPrefix[:0] 193 b.buildDense() 194 195 surf := new(SuRF) 196 surf.ld.Init(b) 197 surf.ls.Init(b) 198 199 t.Run(fmt.Sprintf("cutoff=%d,hashLen=%d,realLen=%d", i, sl[0], sl[1]), func(t *testing.T) { 200 t.Parallel() 201 checker(t, surf) 202 }) 203 } 204 } 205 } 206 207 func newFullSuRFChecker(keys, vals [][]byte) func(t *testing.T, surf *SuRF) { 208 return func(t *testing.T, surf *SuRF) { 209 for i, k := range keys { 210 val, ok := surf.Get(k) 211 require.True(t, ok) 212 require.EqualValues(t, vals[i], val) 213 } 214 215 var i int 216 it := surf.NewIterator() 217 for it.SeekToFirst(); it.Valid(); it.Next() { 218 require.Truef(t, bytes.HasPrefix(keys[i], it.Key()), "%v %v %d", keys[i], it.Key(), i) 219 require.EqualValues(t, vals[i], it.Value()) 220 i++ 221 } 222 require.Equal(t, len(keys), i) 223 224 i = len(keys) - 1 225 for it.SeekToLast(); it.Valid(); it.Prev() { 226 require.True(t, bytes.HasPrefix(keys[i], it.Key())) 227 require.EqualValues(t, vals[i], it.Value()) 228 i-- 229 } 230 require.Equal(t, -1, i) 231 232 for i, k := range keys { 233 it.Seek(k) 234 if i != 0 { 235 cmp := it.compare(keys[i-1]) 236 require.True(t, cmp > 0) 237 } 238 if i != len(keys)-1 { 239 cmp := it.compare(keys[i+1]) 240 require.True(t, cmp < 0 || cmp == couldBePositive) 241 } 242 cmp := it.compare(k) 243 require.True(t, cmp >= 0) 244 require.EqualValues(t, vals[i], it.Value()) 245 } 246 } 247 } 248 249 func (v *rankVector) checkEquals(t *testing.T, o *rankVector) { 250 require.Equal(t, v.numBits, o.numBits) 251 require.Equal(t, v.lutSize(), o.lutSize()) 252 if v.numBits != 0 { 253 require.Equal(t, v.bits, o.bits) 254 } 255 require.Equal(t, v.rankLut, o.rankLut) 256 } 257 258 func (v *selectVector) checkEquals(t *testing.T, o *selectVector) { 259 require.Equal(t, v.numBits, o.numBits) 260 require.Equal(t, v.numOnes, o.numOnes) 261 require.Equal(t, v.lutSize(), o.lutSize()) 262 require.Equal(t, v.bits, o.bits) 263 require.Equal(t, v.selectLut, o.selectLut) 264 } 265 266 func (v *suffixVector) checkEquals(t *testing.T, o *suffixVector) { 267 require.Equal(t, v.numBits, o.numBits) 268 if v.numBits != 0 { 269 require.Equal(t, v.bits, o.bits) 270 } 271 require.Equal(t, v.hashSuffixLen, o.hashSuffixLen) 272 require.Equal(t, v.realSuffixLen, o.realSuffixLen) 273 } 274 275 func (v *valueVector) checkEquals(t *testing.T, o *valueVector) { 276 require.Equal(t, v.bytes, o.bytes) 277 require.Equal(t, v.valueSize, o.valueSize) 278 } 279 280 func (v *labelVector) checkEquals(t *testing.T, o *labelVector) { 281 require.Equal(t, v.labels, o.labels) 282 } 283 284 func (ld *loudsDense) checkEquals(t *testing.T, o *loudsDense) { 285 require.Equal(t, ld.height, o.height) 286 ld.labelVec.checkEquals(t, &o.labelVec.rankVector) 287 ld.hasChildVec.checkEquals(t, &o.hasChildVec.rankVector) 288 ld.isPrefixVec.checkEquals(t, &o.isPrefixVec.rankVector) 289 ld.suffixes.checkEquals(t, &o.suffixes) 290 ld.values.checkEquals(t, &o.values) 291 } 292 293 func (ls *loudsSparse) checkEquals(t *testing.T, o *loudsSparse) { 294 require.Equal(t, ls.height, o.height) 295 require.Equal(t, ls.startLevel, o.startLevel) 296 require.Equal(t, ls.denseChildCount, o.denseChildCount) 297 require.Equal(t, ls.denseNodeCount, o.denseNodeCount) 298 ls.labelVec.checkEquals(t, &o.labelVec) 299 ls.hasChildVec.checkEquals(t, &o.hasChildVec.rankVector) 300 ls.loudsVec.checkEquals(t, &o.loudsVec) 301 ls.suffixes.checkEquals(t, &o.suffixes) 302 ls.values.checkEquals(t, &o.values) 303 } 304 305 func (s *SuRF) checkEquals(t *testing.T, o *SuRF) { 306 s.ld.checkEquals(t, &o.ld) 307 s.ls.checkEquals(t, &o.ls) 308 }