github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/cmp_chunk_table_writer_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nbs
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"sort"
    21  	"testing"
    22  
    23  	"github.com/stretchr/testify/assert"
    24  	"github.com/stretchr/testify/require"
    25  	"golang.org/x/sync/errgroup"
    26  
    27  	"github.com/dolthub/dolt/go/store/chunks"
    28  	"github.com/dolthub/dolt/go/store/hash"
    29  )
    30  
    31  func TestCmpChunkTableWriter(t *testing.T) {
    32  	// Put some chunks in a table file and get the buffer back which contains the table file data
    33  	ctx := context.Background()
    34  
    35  	expectedId, buff, err := WriteChunks(testMDChunks)
    36  	require.NoError(t, err)
    37  
    38  	// Setup a TableReader to read compressed chunks out of
    39  	ti, err := parseTableIndexByCopy(ctx, buff, &UnlimitedQuotaProvider{})
    40  	require.NoError(t, err)
    41  	tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize)
    42  	require.NoError(t, err)
    43  	defer tr.close()
    44  
    45  	hashes := make(hash.HashSet)
    46  	for _, chnk := range testMDChunks {
    47  		hashes.Insert(chnk.Hash())
    48  	}
    49  
    50  	reqs := toGetRecords(hashes)
    51  	found := make([]CompressedChunk, 0)
    52  
    53  	eg, egCtx := errgroup.WithContext(ctx)
    54  	_, err = tr.getManyCompressed(egCtx, eg, reqs, func(ctx context.Context, c CompressedChunk) { found = append(found, c) }, &Stats{})
    55  	require.NoError(t, err)
    56  	require.NoError(t, eg.Wait())
    57  
    58  	// for all the chunks we find, write them using the compressed writer
    59  	tw, err := NewCmpChunkTableWriter("")
    60  	require.NoError(t, err)
    61  	for _, cmpChnk := range found {
    62  		err = tw.AddCmpChunk(cmpChnk)
    63  		require.NoError(t, err)
    64  	}
    65  
    66  	id, err := tw.Finish()
    67  	require.NoError(t, err)
    68  
    69  	t.Run("ErrDuplicateChunkWritten", func(t *testing.T) {
    70  		tw, err := NewCmpChunkTableWriter("")
    71  		require.NoError(t, err)
    72  		for _, cmpChnk := range found {
    73  			err = tw.AddCmpChunk(cmpChnk)
    74  			require.NoError(t, err)
    75  			err = tw.AddCmpChunk(cmpChnk)
    76  			require.NoError(t, err)
    77  		}
    78  		_, err = tw.Finish()
    79  		require.Error(t, err, ErrDuplicateChunkWritten)
    80  	})
    81  
    82  	assert.Equal(t, expectedId, id)
    83  
    84  	output := bytes.NewBuffer(nil)
    85  	err = tw.Flush(output)
    86  	require.NoError(t, err)
    87  
    88  	outputBuff := output.Bytes()
    89  	outputTI, err := parseTableIndexByCopy(ctx, outputBuff, &UnlimitedQuotaProvider{})
    90  	require.NoError(t, err)
    91  	outputTR, err := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize)
    92  	require.NoError(t, err)
    93  	defer outputTR.close()
    94  
    95  	compareContentsOfTables(t, ctx, hashes, tr, outputTR)
    96  }
    97  
    98  func TestContainsDuplicates(t *testing.T) {
    99  	t.Run("Empty", func(t *testing.T) {
   100  		require.False(t, containsDuplicates(prefixIndexSlice{}))
   101  	})
   102  	t.Run("ManyUniqueMatchingPrefixes", func(t *testing.T) {
   103  		var recs prefixIndexSlice
   104  		for i := 0; i < 256; i++ {
   105  			var rec prefixIndexRec
   106  			rec.addr[19] = byte(i)
   107  			recs = append(recs, rec)
   108  		}
   109  		sort.Sort(recs)
   110  		require.False(t, containsDuplicates(recs))
   111  	})
   112  	t.Run("OneDuplicate", func(t *testing.T) {
   113  		var recs prefixIndexSlice
   114  		for i := 0; i < 256; i++ {
   115  			var rec prefixIndexRec
   116  			rec.addr[19] = byte(i)
   117  			recs = append(recs, rec)
   118  		}
   119  		{
   120  			var rec prefixIndexRec
   121  			rec.addr[19] = byte(128)
   122  			recs = append(recs, rec)
   123  		}
   124  		sort.Sort(recs)
   125  		require.True(t, containsDuplicates(recs))
   126  	})
   127  }
   128  
   129  func compareContentsOfTables(t *testing.T, ctx context.Context, hashes hash.HashSet, expectedRd, actualRd tableReader) {
   130  	expected, err := readAllChunks(ctx, hashes, expectedRd)
   131  	require.NoError(t, err)
   132  	actual, err := readAllChunks(ctx, hashes, actualRd)
   133  	require.NoError(t, err)
   134  
   135  	assert.Equal(t, len(expected), len(actual))
   136  	assert.Equal(t, expected, actual)
   137  }
   138  
   139  func readAllChunks(ctx context.Context, hashes hash.HashSet, reader tableReader) (map[hash.Hash][]byte, error) {
   140  	reqs := toGetRecords(hashes)
   141  	found := make([]*chunks.Chunk, 0)
   142  	eg, ctx := errgroup.WithContext(ctx)
   143  	_, err := reader.getMany(ctx, eg, reqs, func(ctx context.Context, c *chunks.Chunk) { found = append(found, c) }, &Stats{})
   144  	if err != nil {
   145  		return nil, err
   146  	}
   147  	err = eg.Wait()
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	hashToData := make(map[hash.Hash][]byte)
   153  	for _, c := range found {
   154  		hashToData[c.Hash()] = c.Data()
   155  	}
   156  
   157  	return hashToData, nil
   158  }