github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/mem_table_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"bytes"
    26  	"context"
    27  	"io"
    28  	"os"
    29  	"testing"
    30  
    31  	"github.com/golang/snappy"
    32  	"github.com/stretchr/testify/assert"
    33  	"github.com/stretchr/testify/require"
    34  	"golang.org/x/sync/errgroup"
    35  
    36  	"github.com/dolthub/dolt/go/store/chunks"
    37  	"github.com/dolthub/dolt/go/store/d"
    38  	"github.com/dolthub/dolt/go/store/hash"
    39  	"github.com/dolthub/dolt/go/store/types"
    40  )
    41  
    42  var testMDChunks = []chunks.Chunk{
    43  	mustChunk(types.EncodeValue(types.String("Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse, "), types.Format_Default)),
    44  	mustChunk(types.EncodeValue(types.String("and nothing particular to interest me on shore, I thought I would sail about a little and see the watery "), types.Format_Default)),
    45  	mustChunk(types.EncodeValue(types.String("part of the world. It is a way I have of driving off the spleen and regulating the "), types.Format_Default)),
    46  	mustChunk(types.EncodeValue(types.String("circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly "), types.Format_Default)),
    47  	mustChunk(types.EncodeValue(types.String("November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing "), types.Format_Default)),
    48  	mustChunk(types.EncodeValue(types.String("funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires "), types.Format_Default)),
    49  	mustChunk(types.EncodeValue(types.String("a strong moral principle to prevent me from deliberately stepping into the street, and methodically "), types.Format_Default)),
    50  	mustChunk(types.EncodeValue(types.String("knocking people’s hats off—then, I account it high time to get to sea as soon as I can."), types.Format_Default)),
    51  }
    52  
    53  var testMDChunksSize uint64
    54  
    55  func init() {
    56  	for _, chunk := range testMDChunks {
    57  		testMDChunksSize += uint64(len(chunk.Data()))
    58  	}
    59  }
    60  
    61  func mustChunk(chunk chunks.Chunk, err error) chunks.Chunk {
    62  	d.PanicIfError(err)
    63  	return chunk
    64  }
    65  
    66  func TestWriteChunks(t *testing.T) {
    67  	name, data, err := WriteChunks(testMDChunks)
    68  	if err != nil {
    69  		t.Error(err)
    70  	}
    71  
    72  	dir, err := os.MkdirTemp("", "write_chunks_test")
    73  	if err != nil {
    74  		t.Error(err)
    75  	}
    76  
    77  	err = os.WriteFile(dir+name, data, os.ModePerm)
    78  	if err != nil {
    79  		t.Error(err)
    80  	}
    81  }
    82  
    83  func TestMemTableAddHasGetChunk(t *testing.T) {
    84  	assert := assert.New(t)
    85  	mt := newMemTable(1024)
    86  
    87  	chunks := [][]byte{
    88  		[]byte("hello2"),
    89  		[]byte("goodbye2"),
    90  		[]byte("badbye2"),
    91  	}
    92  
    93  	for _, c := range chunks {
    94  		assert.Equal(mt.addChunk(computeAddr(c), c), chunkAdded)
    95  	}
    96  
    97  	assertChunksInReader(chunks, mt, assert)
    98  
    99  	for _, c := range chunks {
   100  		data, err := mt.get(context.Background(), computeAddr(c), &Stats{})
   101  		require.NoError(t, err)
   102  		assert.Equal(bytes.Compare(c, data), 0)
   103  	}
   104  
   105  	notPresent := []byte("nope")
   106  	assert.False(mt.has(computeAddr(notPresent)))
   107  	assert.Nil(mt.get(context.Background(), computeAddr(notPresent), &Stats{}))
   108  }
   109  
   110  func TestMemTableAddOverflowChunk(t *testing.T) {
   111  	memTableSize := uint64(1024)
   112  
   113  	assert := assert.New(t)
   114  	big := make([]byte, memTableSize)
   115  	little := []byte{0x01}
   116  	{
   117  		bigAddr := computeAddr(big)
   118  		mt := newMemTable(memTableSize)
   119  		assert.Equal(mt.addChunk(bigAddr, big), chunkAdded)
   120  		assert.True(mt.has(bigAddr))
   121  		assert.Equal(mt.addChunk(computeAddr(little), little), chunkNotAdded)
   122  		assert.False(mt.has(computeAddr(little)))
   123  	}
   124  
   125  	{
   126  		big := big[:memTableSize-1]
   127  		bigAddr := computeAddr(big)
   128  		mt := newMemTable(memTableSize)
   129  		assert.Equal(mt.addChunk(bigAddr, big), chunkAdded)
   130  		assert.True(mt.has(bigAddr))
   131  		assert.Equal(mt.addChunk(computeAddr(little), little), chunkAdded)
   132  		assert.True(mt.has(computeAddr(little)))
   133  		other := []byte("o")
   134  		assert.Equal(mt.addChunk(computeAddr(other), other), chunkNotAdded)
   135  		assert.False(mt.has(computeAddr(other)))
   136  	}
   137  }
   138  
   139  func TestMemTableWrite(t *testing.T) {
   140  	ctx := context.Background()
   141  	assert := assert.New(t)
   142  	mt := newMemTable(1024)
   143  
   144  	chunks := [][]byte{
   145  		[]byte("hello2"),
   146  		[]byte("goodbye2"),
   147  		[]byte("badbye2"),
   148  	}
   149  
   150  	for _, c := range chunks {
   151  		assert.Equal(mt.addChunk(computeAddr(c), c), chunkAdded)
   152  	}
   153  
   154  	td1, _, err := buildTable(chunks[1:2])
   155  	require.NoError(t, err)
   156  	ti1, err := parseTableIndexByCopy(ctx, td1, &UnlimitedQuotaProvider{})
   157  	require.NoError(t, err)
   158  	tr1, err := newTableReader(ti1, tableReaderAtFromBytes(td1), fileBlockSize)
   159  	require.NoError(t, err)
   160  	defer tr1.close()
   161  	assert.True(tr1.has(computeAddr(chunks[1])))
   162  
   163  	td2, _, err := buildTable(chunks[2:])
   164  	require.NoError(t, err)
   165  	ti2, err := parseTableIndexByCopy(ctx, td2, &UnlimitedQuotaProvider{})
   166  	require.NoError(t, err)
   167  	tr2, err := newTableReader(ti2, tableReaderAtFromBytes(td2), fileBlockSize)
   168  	require.NoError(t, err)
   169  	defer tr2.close()
   170  	assert.True(tr2.has(computeAddr(chunks[2])))
   171  
   172  	_, data, count, err := mt.write(chunkReaderGroup{tr1, tr2}, &Stats{})
   173  	require.NoError(t, err)
   174  	assert.Equal(uint32(1), count)
   175  
   176  	ti, err := parseTableIndexByCopy(ctx, data, &UnlimitedQuotaProvider{})
   177  	require.NoError(t, err)
   178  	outReader, err := newTableReader(ti, tableReaderAtFromBytes(data), fileBlockSize)
   179  	require.NoError(t, err)
   180  	defer outReader.close()
   181  	assert.True(outReader.has(computeAddr(chunks[0])))
   182  	assert.False(outReader.has(computeAddr(chunks[1])))
   183  	assert.False(outReader.has(computeAddr(chunks[2])))
   184  }
   185  
   186  type tableReaderAtAdapter struct {
   187  	br *bytes.Reader
   188  }
   189  
   190  func tableReaderAtFromBytes(b []byte) tableReaderAt {
   191  	return tableReaderAtAdapter{bytes.NewReader(b)}
   192  }
   193  
   194  func (adapter tableReaderAtAdapter) Close() error {
   195  	return nil
   196  }
   197  
   198  func (adapter tableReaderAtAdapter) clone() (tableReaderAt, error) {
   199  	return adapter, nil
   200  }
   201  
   202  func (adapter tableReaderAtAdapter) Reader(ctx context.Context) (io.ReadCloser, error) {
   203  	r := *adapter.br
   204  	return io.NopCloser(&r), nil
   205  }
   206  
   207  func (adapter tableReaderAtAdapter) ReadAtWithStats(ctx context.Context, p []byte, off int64, stats *Stats) (n int, err error) {
   208  	return adapter.br.ReadAt(p, off)
   209  }
   210  
   211  func TestMemTableSnappyWriteOutOfLine(t *testing.T) {
   212  	assert := assert.New(t)
   213  	mt := newMemTable(1024)
   214  
   215  	chunks := [][]byte{
   216  		[]byte("hello2"),
   217  		[]byte("goodbye2"),
   218  		[]byte("badbye2"),
   219  	}
   220  
   221  	for _, c := range chunks {
   222  		assert.Equal(mt.addChunk(computeAddr(c), c), chunkAdded)
   223  	}
   224  	mt.snapper = &outOfLineSnappy{[]bool{false, true, false}} // chunks[1] should trigger a panic
   225  
   226  	assert.Panics(func() { mt.write(nil, &Stats{}) })
   227  }
   228  
   229  type outOfLineSnappy struct {
   230  	policy []bool
   231  }
   232  
   233  func (o *outOfLineSnappy) Encode(dst, src []byte) []byte {
   234  	outOfLine := false
   235  	if len(o.policy) > 0 {
   236  		outOfLine = o.policy[0]
   237  		o.policy = o.policy[1:]
   238  	}
   239  	if outOfLine {
   240  		return snappy.Encode(nil, src)
   241  	}
   242  	return snappy.Encode(dst, src)
   243  }
   244  
   245  type chunkReaderGroup []chunkReader
   246  
   247  func (crg chunkReaderGroup) has(h hash.Hash) (bool, error) {
   248  	for _, haver := range crg {
   249  		ok, err := haver.has(h)
   250  
   251  		if err != nil {
   252  			return false, err
   253  		}
   254  		if ok {
   255  			return true, nil
   256  		}
   257  	}
   258  
   259  	return false, nil
   260  }
   261  
   262  func (crg chunkReaderGroup) get(ctx context.Context, h hash.Hash, stats *Stats) ([]byte, error) {
   263  	for _, haver := range crg {
   264  		if data, err := haver.get(ctx, h, stats); err != nil {
   265  			return nil, err
   266  		} else if data != nil {
   267  			return data, nil
   268  		}
   269  	}
   270  
   271  	return nil, nil
   272  }
   273  
   274  func (crg chunkReaderGroup) hasMany(addrs []hasRecord) (bool, error) {
   275  	for _, haver := range crg {
   276  		remaining, err := haver.hasMany(addrs)
   277  
   278  		if err != nil {
   279  			return false, err
   280  		}
   281  
   282  		if !remaining {
   283  			return false, nil
   284  		}
   285  	}
   286  	return true, nil
   287  }
   288  
   289  func (crg chunkReaderGroup) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, *chunks.Chunk), stats *Stats) (bool, error) {
   290  	for _, haver := range crg {
   291  		remaining, err := haver.getMany(ctx, eg, reqs, found, stats)
   292  		if err != nil {
   293  			return true, err
   294  		}
   295  		if !remaining {
   296  			return false, nil
   297  		}
   298  	}
   299  	return true, nil
   300  }
   301  
   302  func (crg chunkReaderGroup) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error) {
   303  	for _, haver := range crg {
   304  		remaining, err := haver.getManyCompressed(ctx, eg, reqs, found, stats)
   305  		if err != nil {
   306  			return true, err
   307  		}
   308  		if !remaining {
   309  			return false, nil
   310  		}
   311  	}
   312  	return true, nil
   313  }
   314  
   315  func (crg chunkReaderGroup) count() (count uint32, err error) {
   316  	for _, haver := range crg {
   317  		count += mustUint32(haver.count())
   318  	}
   319  	return
   320  }
   321  
   322  func (crg chunkReaderGroup) uncompressedLen() (data uint64, err error) {
   323  	for _, haver := range crg {
   324  		data += mustUint64(haver.uncompressedLen())
   325  	}
   326  	return
   327  }
   328  
   329  func (crg chunkReaderGroup) close() error {
   330  	var firstErr error
   331  	for _, c := range crg {
   332  		err := c.close()
   333  		if err != nil && firstErr == nil {
   334  			firstErr = err
   335  		}
   336  	}
   337  	return firstErr
   338  }