github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/table_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"encoding/binary"
    27  	"fmt"
    28  	"sort"
    29  	"testing"
    30  
    31  	"github.com/stretchr/testify/assert"
    32  	"github.com/stretchr/testify/require"
    33  	"golang.org/x/sync/errgroup"
    34  
    35  	"github.com/dolthub/dolt/go/store/chunks"
    36  	"github.com/dolthub/dolt/go/store/hash"
    37  )
    38  
    39  func buildTable(chunks [][]byte) ([]byte, addr, error) {
    40  	totalData := uint64(0)
    41  	for _, chunk := range chunks {
    42  		totalData += uint64(len(chunk))
    43  	}
    44  	capacity := maxTableSize(uint64(len(chunks)), totalData)
    45  
    46  	buff := make([]byte, capacity)
    47  
    48  	tw := newTableWriter(buff, nil)
    49  
    50  	for _, chunk := range chunks {
    51  		tw.addChunk(computeAddr(chunk), chunk)
    52  	}
    53  
    54  	length, blockHash, err := tw.finish()
    55  
    56  	if err != nil {
    57  		return nil, addr{}, err
    58  	}
    59  
    60  	return buff[:length], blockHash, nil
    61  }
    62  
    63  func mustGetString(assert *assert.Assertions, ctx context.Context, tr tableReader, data []byte) string {
    64  	bytes, err := tr.get(ctx, computeAddr(data), &Stats{})
    65  	assert.NoError(err)
    66  	return string(bytes)
    67  }
    68  
    69  func TestSimple(t *testing.T) {
    70  	assert := assert.New(t)
    71  
    72  	chunks := [][]byte{
    73  		[]byte("hello2"),
    74  		[]byte("goodbye2"),
    75  		[]byte("badbye2"),
    76  	}
    77  
    78  	tableData, _, err := buildTable(chunks)
    79  	require.NoError(t, err)
    80  	ti, err := parseTableIndex(tableData)
    81  	require.NoError(t, err)
    82  	tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
    83  
    84  	assertChunksInReader(chunks, tr, assert)
    85  
    86  	assert.Equal(string(chunks[0]), mustGetString(assert, context.Background(), tr, chunks[0]))
    87  	assert.Equal(string(chunks[1]), mustGetString(assert, context.Background(), tr, chunks[1]))
    88  	assert.Equal(string(chunks[2]), mustGetString(assert, context.Background(), tr, chunks[2]))
    89  
    90  	notPresent := [][]byte{
    91  		[]byte("yo"),
    92  		[]byte("do"),
    93  		[]byte("so much to do"),
    94  	}
    95  
    96  	assertChunksNotInReader(notPresent, tr, assert)
    97  
    98  	assert.NotEqual(string(notPresent[0]), mustGetString(assert, context.Background(), tr, notPresent[0]))
    99  	assert.NotEqual(string(notPresent[1]), mustGetString(assert, context.Background(), tr, notPresent[1]))
   100  	assert.NotEqual(string(notPresent[2]), mustGetString(assert, context.Background(), tr, notPresent[2]))
   101  }
   102  
   103  func assertChunksInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) {
   104  	for _, c := range chunks {
   105  		assert.True(r.has(computeAddr(c)))
   106  	}
   107  }
   108  
   109  func assertChunksNotInReader(chunks [][]byte, r chunkReader, assert *assert.Assertions) {
   110  	for _, c := range chunks {
   111  		assert.False(r.has(computeAddr(c)))
   112  	}
   113  }
   114  
   115  func TestHasMany(t *testing.T) {
   116  	assert := assert.New(t)
   117  
   118  	chunks := [][]byte{
   119  		[]byte("hello2"),
   120  		[]byte("goodbye2"),
   121  		[]byte("badbye2"),
   122  	}
   123  
   124  	tableData, _, err := buildTable(chunks)
   125  	require.NoError(t, err)
   126  	ti, err := parseTableIndex(tableData)
   127  	require.NoError(t, err)
   128  	tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
   129  
   130  	addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
   131  	hasAddrs := []hasRecord{
   132  		{&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), 0, false},
   133  		{&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), 1, false},
   134  		{&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), 2, false},
   135  	}
   136  	sort.Sort(hasRecordByPrefix(hasAddrs))
   137  
   138  	_, err = tr.hasMany(hasAddrs)
   139  	require.NoError(t, err)
   140  	for _, ha := range hasAddrs {
   141  		assert.True(ha.has, "Nothing for prefix %d", ha.prefix)
   142  	}
   143  }
   144  
   145  func TestHasManySequentialPrefix(t *testing.T) {
   146  	assert := assert.New(t)
   147  
   148  	// Use bogus addrs so we can generate the case of sequentially non-unique prefixes in the index
   149  	// Note that these are already sorted
   150  	addrStrings := []string{
   151  		"0rfgadopg6h3fk7d253ivbjsij4qo3nv",
   152  		"0rfgadopg6h3fk7d253ivbjsij4qo4nv",
   153  		"0rfgadopg6h3fk7d253ivbjsij4qo9nv",
   154  	}
   155  
   156  	addrs := make([]addr, len(addrStrings))
   157  	for i, s := range addrStrings {
   158  		addrs[i] = addr(hash.Parse(s))
   159  	}
   160  
   161  	bogusData := []byte("bogus") // doesn't matter what this is. hasMany() won't check chunkRecords
   162  	totalData := uint64(len(bogusData) * len(addrs))
   163  
   164  	capacity := maxTableSize(uint64(len(addrs)), totalData)
   165  	buff := make([]byte, capacity)
   166  	tw := newTableWriter(buff, nil)
   167  
   168  	for _, a := range addrs {
   169  		tw.addChunk(a, bogusData)
   170  	}
   171  
   172  	length, _, err := tw.finish()
   173  	require.NoError(t, err)
   174  	buff = buff[:length]
   175  
   176  	ti, err := parseTableIndex(buff)
   177  	require.NoError(t, err)
   178  	tr := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
   179  
   180  	hasAddrs := make([]hasRecord, 2)
   181  	// Leave out the first address
   182  	hasAddrs[0] = hasRecord{&addrs[1], addrs[1].Prefix(), 1, false}
   183  	hasAddrs[1] = hasRecord{&addrs[2], addrs[2].Prefix(), 2, false}
   184  
   185  	_, err = tr.hasMany(hasAddrs)
   186  	require.NoError(t, err)
   187  
   188  	for _, ha := range hasAddrs {
   189  		assert.True(ha.has, fmt.Sprintf("Nothing for prefix %x\n", ha.prefix))
   190  	}
   191  }
   192  
   193  func TestGetMany(t *testing.T) {
   194  	assert := assert.New(t)
   195  
   196  	data := [][]byte{
   197  		[]byte("hello2"),
   198  		[]byte("goodbye2"),
   199  		[]byte("badbye2"),
   200  	}
   201  
   202  	tableData, _, err := buildTable(data)
   203  	require.NoError(t, err)
   204  	ti, err := parseTableIndex(tableData)
   205  	require.NoError(t, err)
   206  	tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
   207  
   208  	addrs := addrSlice{computeAddr(data[0]), computeAddr(data[1]), computeAddr(data[2])}
   209  	getBatch := []getRecord{
   210  		{&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false},
   211  		{&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), false},
   212  		{&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), false},
   213  	}
   214  	sort.Sort(getRecordByPrefix(getBatch))
   215  
   216  	eg, ctx := errgroup.WithContext(context.Background())
   217  
   218  	got := make([]*chunks.Chunk, 0)
   219  	_, err = tr.getMany(ctx, eg, getBatch, func(c *chunks.Chunk) { got = append(got, c) }, &Stats{})
   220  	require.NoError(t, err)
   221  	require.NoError(t, eg.Wait())
   222  
   223  	assert.True(len(got) == len(getBatch))
   224  }
   225  
   226  func TestCalcReads(t *testing.T) {
   227  	assert := assert.New(t)
   228  
   229  	chunks := [][]byte{
   230  		[]byte("hello2"),
   231  		[]byte("goodbye2"),
   232  		[]byte("badbye2"),
   233  	}
   234  
   235  	tableData, _, err := buildTable(chunks)
   236  	require.NoError(t, err)
   237  	ti, err := parseTableIndex(tableData)
   238  	require.NoError(t, err)
   239  	tr := newTableReader(ti, tableReaderAtFromBytes(tableData), 0)
   240  	addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
   241  	getBatch := []getRecord{
   242  		{&addrs[0], binary.BigEndian.Uint64(addrs[0][:addrPrefixSize]), false},
   243  		{&addrs[1], binary.BigEndian.Uint64(addrs[1][:addrPrefixSize]), false},
   244  		{&addrs[2], binary.BigEndian.Uint64(addrs[2][:addrPrefixSize]), false},
   245  	}
   246  
   247  	gb2 := []getRecord{getBatch[0], getBatch[2]}
   248  	sort.Sort(getRecordByPrefix(getBatch))
   249  
   250  	reads, remaining, err := tr.calcReads(getBatch, 0)
   251  	require.NoError(t, err)
   252  	assert.False(remaining)
   253  	assert.Equal(1, reads)
   254  
   255  	sort.Sort(getRecordByPrefix(gb2))
   256  	reads, remaining, err = tr.calcReads(gb2, 0)
   257  	require.NoError(t, err)
   258  	assert.False(remaining)
   259  	assert.Equal(2, reads)
   260  }
   261  
   262  func TestExtract(t *testing.T) {
   263  	assert := assert.New(t)
   264  
   265  	chunks := [][]byte{
   266  		[]byte("hello2"),
   267  		[]byte("goodbye2"),
   268  		[]byte("badbye2"),
   269  	}
   270  
   271  	tableData, _, err := buildTable(chunks)
   272  	require.NoError(t, err)
   273  	ti, err := parseTableIndex(tableData)
   274  	require.NoError(t, err)
   275  	tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
   276  
   277  	addrs := addrSlice{computeAddr(chunks[0]), computeAddr(chunks[1]), computeAddr(chunks[2])}
   278  
   279  	chunkChan := make(chan extractRecord)
   280  	go func() {
   281  		err := tr.extract(context.Background(), chunkChan)
   282  		require.NoError(t, err)
   283  		close(chunkChan)
   284  	}()
   285  
   286  	i := 0
   287  	for rec := range chunkChan {
   288  		assert.NotNil(rec.data, "Nothing for", addrs[i])
   289  		assert.Equal(addrs[i], rec.a)
   290  		assert.Equal(chunks[i], rec.data)
   291  		i++
   292  	}
   293  }
   294  
   295  func Test65k(t *testing.T) {
   296  	assert := assert.New(t)
   297  
   298  	count := 1 << 16
   299  	chunks := make([][]byte, count)
   300  
   301  	dataFn := func(i int) []byte {
   302  		return []byte(fmt.Sprintf("data%d", i*2))
   303  	}
   304  
   305  	for i := 0; i < count; i++ {
   306  		chunks[i] = dataFn(i)
   307  	}
   308  
   309  	tableData, _, err := buildTable(chunks)
   310  	require.NoError(t, err)
   311  	ti, err := parseTableIndex(tableData)
   312  	require.NoError(t, err)
   313  	tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
   314  
   315  	for i := 0; i < count; i++ {
   316  		data := dataFn(i)
   317  		h := computeAddr(data)
   318  		assert.True(tr.has(computeAddr(data)))
   319  		bytes, err := tr.get(context.Background(), h, &Stats{})
   320  		require.NoError(t, err)
   321  		assert.Equal(string(data), string(bytes))
   322  	}
   323  
   324  	for i := count; i < count*2; i++ {
   325  		data := dataFn(i)
   326  		h := computeAddr(data)
   327  		assert.False(tr.has(computeAddr(data)))
   328  		bytes, err := tr.get(context.Background(), h, &Stats{})
   329  		require.NoError(t, err)
   330  		assert.NotEqual(string(data), string(bytes))
   331  	}
   332  }
   333  
   334  // Ensure all addresses share the first 7 bytes. Useful for easily generating tests which have
   335  // "prefix" collisions.
   336  func computeAddrCommonPrefix(data []byte) addr {
   337  	a := computeAddrDefault(data)
   338  	a[0] = 0x01
   339  	a[1] = 0x23
   340  	a[2] = 0x45
   341  	a[3] = 0x67
   342  	a[4] = 0x89
   343  	a[5] = 0xab
   344  	a[6] = 0xcd
   345  	return a
   346  }
   347  
   348  func doTestNGetMany(t *testing.T, count int) {
   349  	assert := assert.New(t)
   350  
   351  	data := make([][]byte, count)
   352  
   353  	dataFn := func(i int) []byte {
   354  		return []byte(fmt.Sprintf("data%d", i*2))
   355  	}
   356  
   357  	for i := 0; i < count; i++ {
   358  		data[i] = dataFn(i)
   359  	}
   360  
   361  	tableData, _, err := buildTable(data)
   362  	require.NoError(t, err)
   363  	ti, err := parseTableIndex(tableData)
   364  	require.NoError(t, err)
   365  	tr := newTableReader(ti, tableReaderAtFromBytes(tableData), fileBlockSize)
   366  
   367  	getBatch := make([]getRecord, len(data))
   368  	for i := 0; i < count; i++ {
   369  		a := computeAddr(dataFn(i))
   370  		getBatch[i] = getRecord{&a, a.Prefix(), false}
   371  	}
   372  
   373  	sort.Sort(getRecordByPrefix(getBatch))
   374  
   375  	eg, ctx := errgroup.WithContext(context.Background())
   376  
   377  	got := make([]*chunks.Chunk, 0)
   378  	_, err = tr.getMany(ctx, eg, getBatch, func(c *chunks.Chunk) { got = append(got, c) }, &Stats{})
   379  	require.NoError(t, err)
   380  	require.NoError(t, eg.Wait())
   381  
   382  	assert.True(len(got) == len(getBatch))
   383  }
   384  
   385  func Test65kGetMany(t *testing.T) {
   386  	doTestNGetMany(t, 1<<16)
   387  }
   388  
   389  func Test2kGetManyCommonPrefix(t *testing.T) {
   390  	computeAddr = computeAddrCommonPrefix
   391  	defer func() {
   392  		computeAddr = computeAddrDefault
   393  	}()
   394  
   395  	doTestNGetMany(t, 1<<11)
   396  }
   397  
   398  func TestEmpty(t *testing.T) {
   399  	assert := assert.New(t)
   400  
   401  	buff := make([]byte, footerSize)
   402  	tw := newTableWriter(buff, nil)
   403  	length, _, err := tw.finish()
   404  	require.NoError(t, err)
   405  	assert.True(length == footerSize)
   406  }