github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/table_test.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package nbs 23 24 import ( 25 "context" 26 "encoding/binary" 27 "fmt" 28 "sort" 29 "testing" 30 31 "github.com/stretchr/testify/assert" 32 "github.com/stretchr/testify/require" 33 "golang.org/x/sync/errgroup" 34 35 "github.com/dolthub/dolt/go/store/chunks" 36 "github.com/dolthub/dolt/go/store/hash" 37 ) 38 39 func buildTable(chunks [][]byte) ([]byte, addr, error) { 40 totalData := uint64(0) 41 for _, chunk := range chunks { 42 totalData += uint64(len(chunk)) 43 } 44 capacity := maxTableSize(uint64(len(chunks)), totalData) 45 46 buff := make([]byte, capacity) 47 48 tw := newTableWriter(buff, nil) 49 50 for _, chunk := range chunks { 51 tw.addChunk(computeAddr(chunk), chunk) 52 } 53 54 length, blockHash, err := tw.finish() 55 56 if err != nil { 57 return nil, addr{}, err 58 } 59 60 return buff[:length], blockHash, nil 61 } 62 63 func mustGetString(assert *assert.Assertions, ctx context.Context, tr tableReader, data []byte) string { 64 bytes, err := tr.get(ctx, computeAddr(data), &Stats{}) 65 assert.NoError(err) 66 return string(bytes) 67 } 68 69 func TestSimple(t *testing.T) { 70 assert := assert.New(t) 71 72 chunks := [][]byte{ 73 []byte("hello2"), 74 []byte("goodbye2"), 75 []byte("badbye2"), 76 } 77 78 tableData, _, err := buildTable(chunks) 79 require.NoError(t, err) 80 ti, err := parseTableIndex(tableData) 81 require.NoError(t, err) 82 tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 83 84 assertChunksInReader(chunks, tr, assert) 85 86 assert.Equal(string(chunks[0]), mustGetString(assert, context.Background(), tr, chunks[0])) 87 assert.Equal(string(chunks[1]), mustGetString(assert, context.Background(), tr, chunks[1])) 88 assert.Equal(string(chunks[2]), mustGetString(assert, context.Background(), tr, chunks[2])) 89 90 notPresent := [][]byte{ 91 []byte("yo"), 92 []byte("do"), 93 []byte("so much to do"), 94 } 95 96 assertChunksNotInReader(notPresent, tr, assert) 97 98 assert.NotEqual(string(notPresent[0]), mustGetString(assert, context.Background(), tr, notPresent[0])) 99 assert.NotEqual(string(notPresent[1]), mustGetString(assert, context.Background(), tr, notPresent[1])) 100 assert.NotEqual(string(notPresent[2]), mustGetString(assert, context.Background(), tr, notPresent[2])) 101 } 102 103 func assertChunksInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) { 104 for _, c := range chunks { 105 assert.True(r.has(computeAddr(c))) 106 } 107 } 108 109 func assertChunksNotInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) { 110 for _, c := range chunks { 111 assert.False(r.has(computeAddr(c))) 112 } 113 } 114 115 func TestHasMany(t *testing.T) { 116 assert := assert.New(t) 117 118 chunks := [][]byte{ 119 []byte("hello2"), 120 []byte("goodbye2"), 121 []byte("badbye2"), 122 } 123 124 tableData, _, err := buildTable(chunks) 125 require.NoError(t, err) 126 ti, err := parseTableIndex(tableData) 127 require.NoError(t, err) 128 tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 129 130 addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} 131 hasAddrs := []hasRecord{ 132 {&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), 0, false}, 133 {&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), 1, false}, 134 {&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), 2, false}, 135 } 136 sort.Sort(hasRecordByPrefix(hasAddrs)) 137 138 _, err = tr.hasMany(hasAddrs) 139 require.NoError(t, err) 140 for _, ha := range hasAddrs { 141 assert.True(ha.has, "Nothing for prefix %d", ha.prefix) 142 } 143 } 144 145 func TestHasManySequentialPrefix(t *testing.T) { 146 assert := assert.New(t) 147 148 // Use bogus addrs so we can generate the case of sequentially non-unique prefixes in the index 149 // Note that these are already sorted 150 addrStrings := []string{ 151 "0rfgadopg6h3fk7d253ivbjsij4qo3nv", 152 "0rfgadopg6h3fk7d253ivbjsij4qo4nv", 153 "0rfgadopg6h3fk7d253ivbjsij4qo9nv", 154 } 155 156 addrs := make([]addr, len(addrStrings)) 157 for i, s := range addrStrings { 158 addrs[i] = addr(hash.Parse(s)) 159 } 160 161 bogusData := []byte("bogus") // doesn't matter what this is. hasMany() won't check chunkRecords 162 totalData := uint64(len(bogusData) * len(addrs)) 163 164 capacity := maxTableSize(uint64(len(addrs)), totalData) 165 buff := make([]byte, capacity) 166 tw := newTableWriter(buff, nil) 167 168 for _, a := range addrs { 169 tw.addChunk(a, bogusData) 170 } 171 172 length, _, err := tw.finish() 173 require.NoError(t, err) 174 buff = buff[:length] 175 176 ti, err := parseTableIndex(buff) 177 require.NoError(t, err) 178 tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) 179 180 hasAddrs := make([]hasRecord, 2) 181 // Leave out the first address 182 hasAddrs[0] = hasRecord{&addrs[1], addrs[1].Prefix(), 1, false} 183 hasAddrs[1] = hasRecord{&addrs[2], addrs[2].Prefix(), 2, false} 184 185 _, err = tr.hasMany(hasAddrs) 186 require.NoError(t, err) 187 188 for _, ha := range hasAddrs { 189 assert.True(ha.has, fmt.Sprintf("Nothing for prefix %x\n", ha.prefix)) 190 } 191 } 192 193 func TestGetMany(t *testing.T) { 194 assert := assert.New(t) 195 196 data := [][]byte{ 197 []byte("hello2"), 198 []byte("goodbye2"), 199 []byte("badbye2"), 200 } 201 202 tableData, _, err := buildTable(data) 203 require.NoError(t, err) 204 ti, err := parseTableIndex(tableData) 205 require.NoError(t, err) 206 tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 207 208 addrs := addrSlice{computeAddr(data[0]), computeAddr(data[1]), computeAddr(data[2])} 209 getBatch := []getRecord{ 210 {&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false}, 211 {&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), false}, 212 {&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), false}, 213 } 214 sort.Sort(getRecordByPrefix(getBatch)) 215 216 eg, ctx := errgroup.WithContext(context.Background()) 217 218 got := make([]*chunks.Chunk, 0) 219 _, err = tr.getMany(ctx, eg, getBatch, func(c *chunks.Chunk) { got = append(got, c) }, &Stats{}) 220 require.NoError(t, err) 221 require.NoError(t, eg.Wait()) 222 223 assert.True(len(got) == len(getBatch)) 224 } 225 226 func TestCalcReads(t *testing.T) { 227 assert := assert.New(t) 228 229 chunks := [][]byte{ 230 []byte("hello2"), 231 []byte("goodbye2"), 232 []byte("badbye2"), 233 } 234 235 tableData, _, err := buildTable(chunks) 236 require.NoError(t, err) 237 ti, err := parseTableIndex(tableData) 238 require.NoError(t, err) 239 tr := newTableReader(ti, tableReaderAtFromBytes(tableData), 0) 240 addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} 241 getBatch := []getRecord{ 242 {&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false}, 243 {&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), false}, 244 {&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), false}, 245 } 246 247 gb2 := []getRecord{getBatch[0], getBatch[2]} 248 sort.Sort(getRecordByPrefix(getBatch)) 249 250 reads, remaining, err := tr.calcReads(getBatch, 0) 251 require.NoError(t, err) 252 assert.False(remaining) 253 assert.Equal(1, reads) 254 255 sort.Sort(getRecordByPrefix(gb2)) 256 reads, remaining, err = tr.calcReads(gb2, 0) 257 require.NoError(t, err) 258 assert.False(remaining) 259 assert.Equal(2, reads) 260 } 261 262 func TestExtract(t *testing.T) { 263 assert := assert.New(t) 264 265 chunks := [][]byte{ 266 []byte("hello2"), 267 []byte("goodbye2"), 268 []byte("badbye2"), 269 } 270 271 tableData, _, err := buildTable(chunks) 272 require.NoError(t, err) 273 ti, err := parseTableIndex(tableData) 274 require.NoError(t, err) 275 tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 276 277 addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])} 278 279 chunkChan := make(chan extractRecord) 280 go func() { 281 err := tr.extract(context.Background(), chunkChan) 282 require.NoError(t, err) 283 close(chunkChan) 284 }() 285 286 i := 0 287 for rec := range chunkChan { 288 assert.NotNil(rec.data, "Nothing for", addrs[i]) 289 assert.Equal(addrs[i], rec.a) 290 assert.Equal(chunks[i], rec.data) 291 i++ 292 } 293 } 294 295 func Test65k(t *testing.T) { 296 assert := assert.New(t) 297 298 count := 1 << 16 299 chunks := make([][]byte, count) 300 301 dataFn := func(i int) []byte { 302 return []byte(fmt.Sprintf("data%d", i*2)) 303 } 304 305 for i := 0; i < count; i++ { 306 chunks[i] = dataFn(i) 307 } 308 309 tableData, _, err := buildTable(chunks) 310 require.NoError(t, err) 311 ti, err := parseTableIndex(tableData) 312 require.NoError(t, err) 313 tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 314 315 for i := 0; i < count; i++ { 316 data := dataFn(i) 317 h := computeAddr(data) 318 assert.True(tr.has(computeAddr(data))) 319 bytes, err := tr.get(context.Background(), h, &Stats{}) 320 require.NoError(t, err) 321 assert.Equal(string(data), string(bytes)) 322 } 323 324 for i := count; i < count*2; i++ { 325 data := dataFn(i) 326 h := computeAddr(data) 327 assert.False(tr.has(computeAddr(data))) 328 bytes, err := tr.get(context.Background(), h, &Stats{}) 329 require.NoError(t, err) 330 assert.NotEqual(string(data), string(bytes)) 331 } 332 } 333 334 // Ensure all addresses share the first 7 bytes. Useful for easily generating tests which have 335 // "prefix" collisions. 336 func computeAddrCommonPrefix(data []byte) addr { 337 a := computeAddrDefault(data) 338 a[0] = 0x01 339 a[1] = 0x23 340 a[2] = 0x45 341 a[3] = 0x67 342 a[4] = 0x89 343 a[5] = 0xab 344 a[6] = 0xcd 345 return a 346 } 347 348 func doTestNGetMany(t *testing.T, count int) { 349 assert := assert.New(t) 350 351 data := make([][]byte, count) 352 353 dataFn := func(i int) []byte { 354 return []byte(fmt.Sprintf("data%d", i*2)) 355 } 356 357 for i := 0; i < count; i++ { 358 data[i] = dataFn(i) 359 } 360 361 tableData, _, err := buildTable(data) 362 require.NoError(t, err) 363 ti, err := parseTableIndex(tableData) 364 require.NoError(t, err) 365 tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize) 366 367 getBatch := make([]getRecord, len(data)) 368 for i := 0; i < count; i++ { 369 a := computeAddr(dataFn(i)) 370 getBatch[i] = getRecord{&a, a.Prefix(), false} 371 } 372 373 sort.Sort(getRecordByPrefix(getBatch)) 374 375 eg, ctx := errgroup.WithContext(context.Background()) 376 377 got := make([]*chunks.Chunk, 0) 378 _, err = tr.getMany(ctx, eg, getBatch, func(c *chunks.Chunk) { got = append(got, c) }, &Stats{}) 379 require.NoError(t, err) 380 require.NoError(t, eg.Wait()) 381 382 assert.True(len(got) == len(getBatch)) 383 } 384 385 func Test65kGetMany(t *testing.T) { 386 doTestNGetMany(t, 1<<16) 387 } 388 389 func Test2kGetManyCommonPrefix(t *testing.T) { 390 computeAddr = computeAddrCommonPrefix 391 defer func() { 392 computeAddr = computeAddrDefault 393 }() 394 395 doTestNGetMany(t, 1<<11) 396 } 397 398 func TestEmpty(t *testing.T) { 399 assert := assert.New(t) 400 401 buff := make([]byte, footerSize) 402 tw := newTableWriter(buff, nil) 403 length, _, err := tw.finish() 404 require.NoError(t, err) 405 assert.True(length == footerSize) 406 }