github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/cmp_chunk_table_writer_test.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nbs 16 17 import ( 18 "bytes" 19 "context" 20 "sort" 21 "testing" 22 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 "golang.org/x/sync/errgroup" 26 27 "github.com/dolthub/dolt/go/store/chunks" 28 "github.com/dolthub/dolt/go/store/hash" 29 ) 30 31 func TestCmpChunkTableWriter(t *testing.T) { 32 // Put some chunks in a table file and get the buffer back which contains the table file data 33 ctx := context.Background() 34 35 expectedId, buff, err := WriteChunks(testMDChunks) 36 require.NoError(t, err) 37 38 // Setup a TableReader to read compressed chunks out of 39 ti, err := parseTableIndexByCopy(ctx, buff, &UnlimitedQuotaProvider{}) 40 require.NoError(t, err) 41 tr, err := newTableReader(ti, tableReaderAtFromBytes(buff), fileBlockSize) 42 require.NoError(t, err) 43 defer tr.close() 44 45 hashes := make(hash.HashSet) 46 for _, chnk := range testMDChunks { 47 hashes.Insert(chnk.Hash()) 48 } 49 50 reqs := toGetRecords(hashes) 51 found := make([]CompressedChunk, 0) 52 53 eg, egCtx := errgroup.WithContext(ctx) 54 _, err = tr.getManyCompressed(egCtx, eg, reqs, func(ctx context.Context, c CompressedChunk) { found = append(found, c) }, &Stats{}) 55 require.NoError(t, err) 56 require.NoError(t, eg.Wait()) 57 58 // for all the chunks we find, write them using the compressed writer 59 tw, err := NewCmpChunkTableWriter("") 60 require.NoError(t, err) 61 for _, cmpChnk := range found { 62 err = tw.AddCmpChunk(cmpChnk) 63 require.NoError(t, err) 64 } 65 66 id, err := tw.Finish() 67 require.NoError(t, err) 68 69 t.Run("ErrDuplicateChunkWritten", func(t *testing.T) { 70 tw, err := NewCmpChunkTableWriter("") 71 require.NoError(t, err) 72 for _, cmpChnk := range found { 73 err = tw.AddCmpChunk(cmpChnk) 74 require.NoError(t, err) 75 err = tw.AddCmpChunk(cmpChnk) 76 require.NoError(t, err) 77 } 78 _, err = tw.Finish() 79 require.Error(t, err, ErrDuplicateChunkWritten) 80 }) 81 82 assert.Equal(t, expectedId, id) 83 84 output := bytes.NewBuffer(nil) 85 err = tw.Flush(output) 86 require.NoError(t, err) 87 88 outputBuff := output.Bytes() 89 outputTI, err := parseTableIndexByCopy(ctx, outputBuff, &UnlimitedQuotaProvider{}) 90 require.NoError(t, err) 91 outputTR, err := newTableReader(outputTI, tableReaderAtFromBytes(buff), fileBlockSize) 92 require.NoError(t, err) 93 defer outputTR.close() 94 95 compareContentsOfTables(t, ctx, hashes, tr, outputTR) 96 } 97 98 func TestContainsDuplicates(t *testing.T) { 99 t.Run("Empty", func(t *testing.T) { 100 require.False(t, containsDuplicates(prefixIndexSlice{})) 101 }) 102 t.Run("ManyUniqueMatchingPrefixes", func(t *testing.T) { 103 var recs prefixIndexSlice 104 for i := 0; i < 256; i++ { 105 var rec prefixIndexRec 106 rec.addr[19] = byte(i) 107 recs = append(recs, rec) 108 } 109 sort.Sort(recs) 110 require.False(t, containsDuplicates(recs)) 111 }) 112 t.Run("OneDuplicate", func(t *testing.T) { 113 var recs prefixIndexSlice 114 for i := 0; i < 256; i++ { 115 var rec prefixIndexRec 116 rec.addr[19] = byte(i) 117 recs = append(recs, rec) 118 } 119 { 120 var rec prefixIndexRec 121 rec.addr[19] = byte(128) 122 recs = append(recs, rec) 123 } 124 sort.Sort(recs) 125 require.True(t, containsDuplicates(recs)) 126 }) 127 } 128 129 func compareContentsOfTables(t *testing.T, ctx context.Context, hashes hash.HashSet, expectedRd, actualRd tableReader) { 130 expected, err := readAllChunks(ctx, hashes, expectedRd) 131 require.NoError(t, err) 132 actual, err := readAllChunks(ctx, hashes, actualRd) 133 require.NoError(t, err) 134 135 assert.Equal(t, len(expected), len(actual)) 136 assert.Equal(t, expected, actual) 137 } 138 139 func readAllChunks(ctx context.Context, hashes hash.HashSet, reader tableReader) (map[hash.Hash][]byte, error) { 140 reqs := toGetRecords(hashes) 141 found := make([]*chunks.Chunk, 0) 142 eg, ctx := errgroup.WithContext(ctx) 143 _, err := reader.getMany(ctx, eg, reqs, func(ctx context.Context, c *chunks.Chunk) { found = append(found, c) }, &Stats{}) 144 if err != nil { 145 return nil, err 146 } 147 err = eg.Wait() 148 if err != nil { 149 return nil, err 150 } 151 152 hashToData := make(map[hash.Hash][]byte) 153 for _, c := range found { 154 hashToData[c.Hash()] = c.Data() 155 } 156 157 return hashToData, nil 158 }