github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/conjoiner_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2017 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"bytes"
    26  	"context"
    27  	"encoding/binary"
    28  	"sort"
    29  	"testing"
    30  
    31  	"github.com/stretchr/testify/assert"
    32  	"github.com/stretchr/testify/require"
    33  
    34  	"github.com/dolthub/dolt/go/store/blobstore"
    35  	"github.com/dolthub/dolt/go/store/constants"
    36  	"github.com/dolthub/dolt/go/store/hash"
    37  )
    38  
    39  type tableSpecsByAscendingCount []tableSpec
    40  
    41  func (ts tableSpecsByAscendingCount) Len() int { return len(ts) }
    42  func (ts tableSpecsByAscendingCount) Less(i, j int) bool {
    43  	tsI, tsJ := ts[i], ts[j]
    44  	if tsI.chunkCount == tsJ.chunkCount {
    45  		return bytes.Compare(tsI.name[:], tsJ.name[:]) < 0
    46  	}
    47  	return tsI.chunkCount < tsJ.chunkCount
    48  }
    49  func (ts tableSpecsByAscendingCount) Swap(i, j int) { ts[i], ts[j] = ts[j], ts[i] }
    50  
    51  func makeTestSrcs(t *testing.T, tableSizes []uint32, p tablePersister) (srcs chunkSources) {
    52  	count := uint32(0)
    53  	nextChunk := func() (chunk []byte) {
    54  		chunk = make([]byte, 4)
    55  		binary.BigEndian.PutUint32(chunk, count)
    56  		count++
    57  		return chunk
    58  	}
    59  
    60  	for _, s := range tableSizes {
    61  		mt := newMemTable(testMemTableSize)
    62  		for i := uint32(0); i < s; i++ {
    63  			c := nextChunk()
    64  			mt.addChunk(computeAddr(c), c)
    65  		}
    66  		cs, err := p.Persist(context.Background(), mt, nil, &Stats{})
    67  		require.NoError(t, err)
    68  		c, err := cs.clone()
    69  		require.NoError(t, err)
    70  		srcs = append(srcs, c)
    71  		cs.close()
    72  	}
    73  	return
    74  }
    75  
    76  // Makes a tableSet with len(tableSizes) upstream tables containing tableSizes[N] unique chunks
    77  func makeTestTableSpecs(t *testing.T, tableSizes []uint32, p tablePersister) (specs []tableSpec) {
    78  	for _, src := range makeTestSrcs(t, tableSizes, p) {
    79  		specs = append(specs, tableSpec{src.hash(), mustUint32(src.count())})
    80  		err := src.close()
    81  		require.NoError(t, err)
    82  	}
    83  	return
    84  }
    85  
    86  func TestConjoin(t *testing.T) {
    87  	t.Run("fake table persister", func(t *testing.T) {
    88  		testConjoin(t, func(*testing.T) tablePersister {
    89  			return newFakeTablePersister(&UnlimitedQuotaProvider{})
    90  		})
    91  	})
    92  	t.Run("in-memory blobstore persister", func(t *testing.T) {
    93  		testConjoin(t, func(*testing.T) tablePersister {
    94  			return &blobstorePersister{
    95  				bs:        blobstore.NewInMemoryBlobstore(""),
    96  				blockSize: 4096,
    97  				q:         &UnlimitedQuotaProvider{},
    98  			}
    99  		})
   100  	})
   101  	t.Run("local fs blobstore persister", func(t *testing.T) {
   102  		testConjoin(t, func(*testing.T) tablePersister {
   103  			return &blobstorePersister{
   104  				bs:        blobstore.NewLocalBlobstore(t.TempDir()),
   105  				blockSize: 4096,
   106  				q:         &UnlimitedQuotaProvider{},
   107  			}
   108  		})
   109  	})
   110  }
   111  
   112  func testConjoin(t *testing.T, factory func(t *testing.T) tablePersister) {
   113  	stats := &Stats{}
   114  	setup := func(lock hash.Hash, root hash.Hash, sizes []uint32) (fm *fakeManifest, p tablePersister, upstream manifestContents) {
   115  		p = factory(t)
   116  		fm = &fakeManifest{}
   117  		fm.set(constants.FormatLD1String, lock, root, makeTestTableSpecs(t, sizes, p), nil)
   118  		var err error
   119  		_, upstream, err = fm.ParseIfExists(context.Background(), nil, nil)
   120  		require.NoError(t, err)
   121  		return
   122  	}
   123  
   124  	// Returns the chunk counts of the tables in ts.compacted & ts.upstream in ascending order
   125  	getSortedSizes := func(specs []tableSpec) (sorted []uint32) {
   126  		all := append([]tableSpec{}, specs...)
   127  		sort.Sort(tableSpecsByAscendingCount(all))
   128  		for _, ts := range all {
   129  			sorted = append(sorted, ts.chunkCount)
   130  		}
   131  		return
   132  	}
   133  
   134  	assertContainAll := func(t *testing.T, p tablePersister, expect, actual []tableSpec) {
   135  		open := func(specs []tableSpec) (sources chunkSources) {
   136  			for _, sp := range specs {
   137  				cs, err := p.Open(context.Background(), sp.name, sp.chunkCount, stats)
   138  				if err != nil {
   139  					require.NoError(t, err)
   140  				}
   141  				sources = append(sources, cs)
   142  			}
   143  			return
   144  		}
   145  
   146  		expectSrcs, actualSrcs := open(expect), open(actual)
   147  		defer func() {
   148  			for _, s := range expectSrcs {
   149  				s.close()
   150  			}
   151  			for _, s := range actualSrcs {
   152  				s.close()
   153  			}
   154  		}()
   155  
   156  		ctx := context.Background()
   157  		for _, src := range expectSrcs {
   158  			err := extractAllChunks(ctx, src, func(rec extractRecord) {
   159  				var ok bool
   160  				for _, act := range actualSrcs {
   161  					var err error
   162  					ok, err = act.has(rec.a)
   163  					require.NoError(t, err)
   164  					var buf []byte
   165  					if ok {
   166  						buf, err = act.get(ctx, rec.a, stats)
   167  						require.NoError(t, err)
   168  						assert.Equal(t, rec.data, buf)
   169  						break
   170  					}
   171  				}
   172  				assert.True(t, ok)
   173  			})
   174  			require.NoError(t, err)
   175  		}
   176  	}
   177  
   178  	// Compact some tables, interloper slips in a new table
   179  	makeExtra := func(p tablePersister) tableSpec {
   180  		mt := newMemTable(testMemTableSize)
   181  		data := []byte{0xde, 0xad}
   182  		mt.addChunk(computeAddr(data), data)
   183  		src, err := p.Persist(context.Background(), mt, nil, &Stats{})
   184  		require.NoError(t, err)
   185  		defer src.close()
   186  		return tableSpec{src.hash(), mustUint32(src.count())}
   187  	}
   188  
   189  	tc := []struct {
   190  		name        string
   191  		precompact  []uint32
   192  		postcompact []uint32
   193  	}{
   194  		{"uniform", []uint32{1, 1, 1, 1, 1}, []uint32{5}},
   195  		{"all but last", []uint32{1, 1, 1, 1, 5}, []uint32{4, 5}},
   196  		{"all", []uint32{5, 5, 5}, []uint32{15}},
   197  		{"first four", []uint32{5, 6, 10, 11, 35, 64}, []uint32{32, 35, 64}},
   198  		{"log, first two", []uint32{1, 2, 4, 8, 16, 32, 64}, []uint32{3, 4, 8, 16, 32, 64}},
   199  		{"log, all", []uint32{2, 3, 4, 8, 16, 32, 64}, []uint32{129}},
   200  	}
   201  
   202  	startLock, startRoot := computeAddr([]byte("lock")), hash.Of([]byte("root"))
   203  	t.Run("Success", func(t *testing.T) {
   204  		// Compact some tables, no one interrupts
   205  		for _, c := range tc {
   206  			t.Run(c.name, func(t *testing.T) {
   207  				fm, p, upstream := setup(startLock, startRoot, c.precompact)
   208  
   209  				_, _, err := conjoin(context.Background(), inlineConjoiner{}, upstream, fm, p, stats)
   210  				require.NoError(t, err)
   211  				exists, newUpstream, err := fm.ParseIfExists(context.Background(), stats, nil)
   212  				require.NoError(t, err)
   213  				assert.True(t, exists)
   214  				assert.Equal(t, c.postcompact, getSortedSizes(newUpstream.specs))
   215  				assertContainAll(t, p, upstream.specs, newUpstream.specs)
   216  			})
   217  		}
   218  	})
   219  
   220  	t.Run("Retry", func(t *testing.T) {
   221  		for _, c := range tc {
   222  			t.Run(c.name, func(t *testing.T) {
   223  				fm, p, upstream := setup(startLock, startRoot, c.precompact)
   224  
   225  				newTable := makeExtra(p)
   226  				u := updatePreemptManifest{fm, func() {
   227  					specs := append([]tableSpec{}, upstream.specs...)
   228  					fm.set(constants.FormatLD1String, computeAddr([]byte("lock2")), startRoot, append(specs, newTable), nil)
   229  				}}
   230  				_, _, err := conjoin(context.Background(), inlineConjoiner{}, upstream, u, p, stats)
   231  				require.NoError(t, err)
   232  				exists, newUpstream, err := fm.ParseIfExists(context.Background(), stats, nil)
   233  				require.NoError(t, err)
   234  				assert.True(t, exists)
   235  				assert.Equal(t, append([]uint32{1}, c.postcompact...), getSortedSizes(newUpstream.specs))
   236  				assertContainAll(t, p, append(upstream.specs, newTable), newUpstream.specs)
   237  			})
   238  		}
   239  	})
   240  
   241  	t.Run("TablesDroppedUpstream", func(t *testing.T) {
   242  		// Interloper drops some compactees
   243  		for _, c := range tc {
   244  			t.Run(c.name, func(t *testing.T) {
   245  				fm, p, upstream := setup(startLock, startRoot, c.precompact)
   246  
   247  				u := updatePreemptManifest{fm, func() {
   248  					fm.set(constants.FormatLD1String, computeAddr([]byte("lock2")), startRoot, upstream.specs[1:], nil)
   249  				}}
   250  				_, _, err := conjoin(context.Background(), inlineConjoiner{}, upstream, u, p, stats)
   251  				require.NoError(t, err)
   252  				exists, newUpstream, err := fm.ParseIfExists(context.Background(), stats, nil)
   253  				require.NoError(t, err)
   254  				assert.True(t, exists)
   255  				assert.Equal(t, c.precompact[1:], getSortedSizes(newUpstream.specs))
   256  			})
   257  		}
   258  	})
   259  
   260  	setupAppendix := func(lock hash.Hash, root hash.Hash, specSizes, appendixSizes []uint32) (fm *fakeManifest, p tablePersister, upstream manifestContents) {
   261  		p = newFakeTablePersister(&UnlimitedQuotaProvider{})
   262  		fm = &fakeManifest{}
   263  		fm.set(constants.FormatLD1String, lock, root, makeTestTableSpecs(t, specSizes, p), makeTestTableSpecs(t, appendixSizes, p))
   264  
   265  		var err error
   266  		_, upstream, err = fm.ParseIfExists(context.Background(), nil, nil)
   267  		require.NoError(t, err)
   268  
   269  		return
   270  	}
   271  
   272  	tca := []struct {
   273  		name        string
   274  		appendix    []uint32
   275  		precompact  []uint32
   276  		postcompact []uint32
   277  	}{
   278  		{"uniform", []uint32{1}, []uint32{1, 1, 1, 1, 1}, []uint32{1, 4}},
   279  		{"all but last", []uint32{2}, []uint32{2, 1, 1, 1, 1, 5}, []uint32{2, 4, 5}},
   280  		{"all", []uint32{1, 2, 3}, []uint32{1, 2, 3, 5, 5, 5}, []uint32{1, 2, 3, 15}},
   281  		{"first four", []uint32{8, 9, 10}, []uint32{8, 9, 10, 5, 6, 10, 11, 35, 64}, []uint32{8, 9, 10, 32, 35, 64}},
   282  		{"log, first two", nil, []uint32{1, 2, 4, 8, 16, 32, 64}, []uint32{3, 4, 8, 16, 32, 64}},
   283  		{"log, all", []uint32{9, 10, 11, 12}, []uint32{9, 10, 11, 12, 2, 3, 4, 8, 16, 32, 64}, []uint32{9, 10, 11, 12, 129}},
   284  	}
   285  
   286  	t.Run("SuccessAppendix", func(t *testing.T) {
   287  		// Compact some tables, no one interrupts
   288  		for _, c := range tca {
   289  			t.Run(c.name, func(t *testing.T) {
   290  				fm, p, upstream := setupAppendix(startLock, startRoot, c.precompact, c.appendix)
   291  
   292  				_, _, err := conjoin(context.Background(), inlineConjoiner{}, upstream, fm, p, stats)
   293  				require.NoError(t, err)
   294  				exists, newUpstream, err := fm.ParseIfExists(context.Background(), stats, nil)
   295  				require.NoError(t, err)
   296  				assert.True(t, exists)
   297  				assert.Equal(t, c.postcompact, getSortedSizes(newUpstream.specs))
   298  				assert.Equal(t, c.appendix, getSortedSizes(newUpstream.appendix))
   299  				assertContainAll(t, p, upstream.specs, newUpstream.specs)
   300  				assertContainAll(t, p, upstream.appendix, newUpstream.appendix)
   301  			})
   302  		}
   303  	})
   304  
   305  	t.Run("RetryAppendixSpecsChange", func(t *testing.T) {
   306  		for _, c := range tca {
   307  			t.Run(c.name, func(t *testing.T) {
   308  				fm, p, upstream := setupAppendix(startLock, startRoot, c.precompact, c.appendix)
   309  
   310  				newTable := makeExtra(p)
   311  				u := updatePreemptManifest{fm, func() {
   312  					specs := append([]tableSpec{}, upstream.specs...)
   313  					fm.set(constants.FormatLD1String, computeAddr([]byte("lock2")), startRoot, append(specs, newTable), upstream.appendix)
   314  				}}
   315  
   316  				_, _, err := conjoin(context.Background(), inlineConjoiner{}, upstream, u, p, stats)
   317  				require.NoError(t, err)
   318  				exists, newUpstream, err := fm.ParseIfExists(context.Background(), stats, nil)
   319  				require.NoError(t, err)
   320  				assert.True(t, exists)
   321  				assert.Equal(t, append([]uint32{1}, c.postcompact...), getSortedSizes(newUpstream.specs))
   322  				assert.Equal(t, c.appendix, getSortedSizes(newUpstream.appendix))
   323  				assertContainAll(t, p, append(upstream.specs, newTable), newUpstream.specs)
   324  				assertContainAll(t, p, upstream.appendix, newUpstream.appendix)
   325  			})
   326  		}
   327  	})
   328  
   329  	t.Run("RetryAppendixAppendixChange", func(t *testing.T) {
   330  		for _, c := range tca {
   331  			t.Run(c.name, func(t *testing.T) {
   332  				fm, p, upstream := setupAppendix(startLock, startRoot, c.precompact, c.appendix)
   333  
   334  				newTable := makeExtra(p)
   335  				u := updatePreemptManifest{fm, func() {
   336  					app := append([]tableSpec{}, upstream.appendix...)
   337  					specs := append([]tableSpec{}, newTable)
   338  					fm.set(constants.FormatLD1String, computeAddr([]byte("lock2")), startRoot, append(specs, upstream.specs...), append(app, newTable))
   339  				}}
   340  
   341  				_, _, err := conjoin(context.Background(), inlineConjoiner{}, upstream, u, p, stats)
   342  				require.NoError(t, err)
   343  				exists, newUpstream, err := fm.ParseIfExists(context.Background(), stats, nil)
   344  				require.NoError(t, err)
   345  				assert.True(t, exists)
   346  				if newUpstream.appendix != nil {
   347  					assert.Equal(t, append([]uint32{1}, c.appendix...), getSortedSizes(newUpstream.appendix))
   348  					assertContainAll(t, p, append(upstream.appendix, newTable), newUpstream.appendix)
   349  				} else {
   350  					assert.Equal(t, upstream.appendix, newUpstream.appendix)
   351  				}
   352  			})
   353  		}
   354  	})
   355  
   356  	t.Run("TablesDroppedUpstreamAppendixSpecChanges", func(t *testing.T) {
   357  		// Interloper drops some compactees
   358  		for _, c := range tca {
   359  			t.Run(c.name, func(t *testing.T) {
   360  				fm, p, upstream := setupAppendix(startLock, startRoot, c.precompact, c.appendix)
   361  
   362  				u := updatePreemptManifest{fm, func() {
   363  					fm.set(constants.FormatLD1String, computeAddr([]byte("lock2")), startRoot, upstream.specs[len(c.appendix)+1:], upstream.appendix[:])
   364  				}}
   365  				_, _, err := conjoin(context.Background(), inlineConjoiner{}, upstream, u, p, stats)
   366  				require.NoError(t, err)
   367  				exists, newUpstream, err := fm.ParseIfExists(context.Background(), stats, nil)
   368  				require.NoError(t, err)
   369  				assert.True(t, exists)
   370  				assert.Equal(t, c.precompact[len(c.appendix)+1:], getSortedSizes(newUpstream.specs))
   371  				assert.Equal(t, c.appendix, getSortedSizes(newUpstream.appendix))
   372  			})
   373  		}
   374  	})
   375  
   376  	t.Run("TablesDroppedUpstreamAppendixAppendixChanges", func(t *testing.T) {
   377  		// Interloper drops some compactees
   378  		for _, c := range tca {
   379  			t.Run(c.name, func(t *testing.T) {
   380  				fm, p, upstream := setupAppendix(startLock, startRoot, c.precompact, c.appendix)
   381  
   382  				newTable := makeExtra(p)
   383  				u := updatePreemptManifest{fm, func() {
   384  					specs := append([]tableSpec{}, newTable)
   385  					specs = append(specs, upstream.specs[len(c.appendix)+1:]...)
   386  					fm.set(constants.FormatLD1String, computeAddr([]byte("lock2")), startRoot, specs, append([]tableSpec{}, newTable))
   387  				}}
   388  
   389  				_, _, err := conjoin(context.Background(), inlineConjoiner{}, upstream, u, p, stats)
   390  				require.NoError(t, err)
   391  				exists, newUpstream, err := fm.ParseIfExists(context.Background(), stats, nil)
   392  				require.NoError(t, err)
   393  				assert.True(t, exists)
   394  				assert.Equal(t, append([]uint32{1}, c.precompact[len(c.appendix)+1:]...), getSortedSizes(newUpstream.specs))
   395  				assert.Equal(t, []uint32{1}, getSortedSizes(newUpstream.appendix))
   396  			})
   397  		}
   398  	})
   399  }
   400  
   401  type updatePreemptManifest struct {
   402  	manifest
   403  	preUpdate func()
   404  }
   405  
   406  func (u updatePreemptManifest) Update(ctx context.Context, lastLock hash.Hash, newContents manifestContents, stats *Stats, writeHook func() error) (manifestContents, error) {
   407  	if u.preUpdate != nil {
   408  		u.preUpdate()
   409  	}
   410  	return u.manifest.Update(ctx, lastLock, newContents, stats, writeHook)
   411  }