github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/store_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nbs
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"math/rand"
    23  	"os"
    24  	"path/filepath"
    25  	"sync"
    26  	"testing"
    27  	"time"
    28  
    29  	"github.com/google/uuid"
    30  	"github.com/stretchr/testify/assert"
    31  	"github.com/stretchr/testify/require"
    32  
    33  	"github.com/dolthub/dolt/go/libraries/utils/set"
    34  	"github.com/dolthub/dolt/go/store/chunks"
    35  	"github.com/dolthub/dolt/go/store/hash"
    36  	"github.com/dolthub/dolt/go/store/types"
    37  	"github.com/dolthub/dolt/go/store/util/tempfiles"
    38  )
    39  
    40  func makeTestLocalStore(t *testing.T, maxTableFiles int) (st *NomsBlockStore, nomsDir string) {
    41  	ctx := context.Background()
    42  	nomsDir = filepath.Join(tempfiles.MovableTempFileProvider.GetTempDir(), "noms_"+uuid.New().String()[:8])
    43  	err := os.MkdirAll(nomsDir, os.ModePerm)
    44  	require.NoError(t, err)
    45  
    46  	// create a v5 manifest
    47  	_, err = fileManifestV5{nomsDir}.Update(ctx, addr{}, manifestContents{}, &Stats{}, nil)
    48  	require.NoError(t, err)
    49  
    50  	st, err = newLocalStore(ctx, types.Format_Default.VersionString(), nomsDir, defaultMemTableSize, maxTableFiles)
    51  	require.NoError(t, err)
    52  	return st, nomsDir
    53  }
    54  
    55  type fileToData map[string][]byte
    56  
    57  func populateLocalStore(t *testing.T, st *NomsBlockStore, numTableFiles int) fileToData {
    58  	ctx := context.Background()
    59  	fileToData := make(fileToData, numTableFiles)
    60  	for i := 0; i < numTableFiles; i++ {
    61  		var chunkData [][]byte
    62  		for j := 0; j < i+1; j++ {
    63  			chunkData = append(chunkData, []byte(fmt.Sprintf("%d:%d", i, j)))
    64  		}
    65  		data, addr, err := buildTable(chunkData)
    66  		require.NoError(t, err)
    67  		fileID := addr.String()
    68  		fileToData[fileID] = data
    69  		err = st.WriteTableFile(ctx, fileID, i+1, bytes.NewReader(data), 0, nil)
    70  		require.NoError(t, err)
    71  	}
    72  	return fileToData
    73  }
    74  
    75  func TestNBSAsTableFileStore(t *testing.T) {
    76  	ctx := context.Background()
    77  
    78  	numTableFiles := 128
    79  	assert.Greater(t, defaultMaxTables, numTableFiles)
    80  	st, _ := makeTestLocalStore(t, defaultMaxTables)
    81  	fileToData := populateLocalStore(t, st, numTableFiles)
    82  
    83  	_, sources, _, err := st.Sources(ctx)
    84  	require.NoError(t, err)
    85  
    86  	assert.Equal(t, numTableFiles, len(sources))
    87  
    88  	for _, src := range sources {
    89  		fileID := src.FileID()
    90  		expected, ok := fileToData[fileID]
    91  		require.True(t, ok)
    92  
    93  		rd, err := src.Open(context.Background())
    94  		require.NoError(t, err)
    95  
    96  		data, err := ioutil.ReadAll(rd)
    97  		require.NoError(t, err)
    98  
    99  		err = rd.Close()
   100  		require.NoError(t, err)
   101  
   102  		assert.Equal(t, expected, data)
   103  	}
   104  
   105  	size, err := st.Size(ctx)
   106  	require.NoError(t, err)
   107  	require.Greater(t, size, uint64(0))
   108  }
   109  
   110  type tableFileSet map[string]TableFile
   111  
   112  func (s tableFileSet) contains(fileName string) (ok bool) {
   113  	_, ok = s[fileName]
   114  	return ok
   115  }
   116  
   117  // findAbsent returns the table file names in |ftd| that don't exist in |s|
   118  func (s tableFileSet) findAbsent(ftd fileToData) (absent []string) {
   119  	for fileID := range ftd {
   120  		if !s.contains(fileID) {
   121  			absent = append(absent, fileID)
   122  		}
   123  	}
   124  	return absent
   125  }
   126  
   127  func tableFileSetFromSources(sources []TableFile) (s tableFileSet) {
   128  	s = make(tableFileSet, len(sources))
   129  	for _, src := range sources {
   130  		s[src.FileID()] = src
   131  	}
   132  	return s
   133  }
   134  
   135  func TestNBSPruneTableFiles(t *testing.T) {
   136  	ctx := context.Background()
   137  
   138  	// over populate table files
   139  	numTableFiles := 64
   140  	maxTableFiles := 16
   141  	st, nomsDir := makeTestLocalStore(t, maxTableFiles)
   142  	fileToData := populateLocalStore(t, st, numTableFiles)
   143  
   144  	// add a chunk and flush to trigger a conjoin
   145  	c := []byte("it's a boy!")
   146  	ok := st.addChunk(ctx, computeAddr(c), c)
   147  	require.True(t, ok)
   148  	ok, err := st.Commit(ctx, st.upstream.root, st.upstream.root)
   149  	require.True(t, ok)
   150  	require.NoError(t, err)
   151  
   152  	_, sources, _, err := st.Sources(ctx)
   153  	require.NoError(t, err)
   154  	assert.Greater(t, numTableFiles, len(sources))
   155  
   156  	// find which input table files were conjoined
   157  	tfSet := tableFileSetFromSources(sources)
   158  	absent := tfSet.findAbsent(fileToData)
   159  	// assert some input table files were conjoined
   160  	assert.NotEmpty(t, absent)
   161  
   162  	currTableFiles := func(dirName string) *set.StrSet {
   163  		infos, err := ioutil.ReadDir(dirName)
   164  		require.NoError(t, err)
   165  		curr := set.NewStrSet(nil)
   166  		for _, fi := range infos {
   167  			if fi.Name() != manifestFileName && fi.Name() != lockFileName {
   168  				curr.Add(fi.Name())
   169  			}
   170  		}
   171  		return curr
   172  	}
   173  
   174  	preGC := currTableFiles(nomsDir)
   175  	for _, tf := range sources {
   176  		assert.True(t, preGC.Contains(tf.FileID()))
   177  	}
   178  	for _, fileName := range absent {
   179  		assert.True(t, preGC.Contains(fileName))
   180  	}
   181  
   182  	err = st.PruneTableFiles(ctx)
   183  	require.NoError(t, err)
   184  
   185  	postGC := currTableFiles(nomsDir)
   186  	for _, tf := range sources {
   187  		assert.True(t, preGC.Contains(tf.FileID()))
   188  	}
   189  	for _, fileName := range absent {
   190  		assert.False(t, postGC.Contains(fileName))
   191  	}
   192  	infos, err := ioutil.ReadDir(nomsDir)
   193  	require.NoError(t, err)
   194  
   195  	// assert that we only have files for current sources,
   196  	// the manifest, and the lock file
   197  	assert.Equal(t, len(sources)+2, len(infos))
   198  
   199  	size, err := st.Size(ctx)
   200  	require.NoError(t, err)
   201  	require.Greater(t, size, uint64(0))
   202  }
   203  
   204  func makeChunkSet(N, size int) (s map[hash.Hash]chunks.Chunk) {
   205  	bb := make([]byte, size*N)
   206  	time.Sleep(10)
   207  	rand.Seed(time.Now().UnixNano())
   208  	rand.Read(bb)
   209  
   210  	s = make(map[hash.Hash]chunks.Chunk, N)
   211  	offset := 0
   212  	for i := 0; i < N; i++ {
   213  		c := chunks.NewChunk(bb[offset : offset+size])
   214  		s[c.Hash()] = c
   215  		offset += size
   216  	}
   217  
   218  	return
   219  }
   220  
   221  func TestNBSCopyGC(t *testing.T) {
   222  	ctx := context.Background()
   223  	st, _ := makeTestLocalStore(t, 8)
   224  
   225  	keepers := makeChunkSet(64, 64)
   226  	tossers := makeChunkSet(64, 64)
   227  
   228  	for _, c := range keepers {
   229  		err := st.Put(ctx, c)
   230  		require.NoError(t, err)
   231  	}
   232  	for h, c := range keepers {
   233  		out, err := st.Get(ctx, h)
   234  		require.NoError(t, err)
   235  		assert.Equal(t, c, out)
   236  	}
   237  
   238  	for h := range tossers {
   239  		// assert mutually exclusive chunk sets
   240  		c, ok := keepers[h]
   241  		require.False(t, ok)
   242  		assert.Equal(t, chunks.Chunk{}, c)
   243  	}
   244  	for _, c := range tossers {
   245  		err := st.Put(ctx, c)
   246  		require.NoError(t, err)
   247  	}
   248  	for h, c := range tossers {
   249  		out, err := st.Get(ctx, h)
   250  		require.NoError(t, err)
   251  		assert.Equal(t, c, out)
   252  	}
   253  
   254  	r, err := st.Root(ctx)
   255  	require.NoError(t, err)
   256  
   257  	keepChan := make(chan []hash.Hash, 16)
   258  	var msErr error
   259  	wg := &sync.WaitGroup{}
   260  	wg.Add(1)
   261  	go func() {
   262  		msErr = st.MarkAndSweepChunks(ctx, r, keepChan)
   263  		wg.Done()
   264  	}()
   265  	for h := range keepers {
   266  		keepChan <- []hash.Hash{h}
   267  	}
   268  	close(keepChan)
   269  	wg.Wait()
   270  	require.NoError(t, msErr)
   271  
   272  	for h, c := range keepers {
   273  		out, err := st.Get(ctx, h)
   274  		require.NoError(t, err)
   275  		assert.Equal(t, c, out)
   276  	}
   277  	for h := range tossers {
   278  		out, err := st.Get(ctx, h)
   279  		require.NoError(t, err)
   280  		assert.Equal(t, chunks.EmptyChunk, out)
   281  	}
   282  }
   283  
   284  func persistTableFileSources(t *testing.T, p tablePersister, numTableFiles int) (map[hash.Hash]uint32, []hash.Hash) {
   285  	tableFileMap := make(map[hash.Hash]uint32, numTableFiles)
   286  	mapIds := make([]hash.Hash, numTableFiles)
   287  
   288  	for i := 0; i < numTableFiles; i++ {
   289  		var chunkData [][]byte
   290  		for j := 0; j < i+1; j++ {
   291  			chunkData = append(chunkData, []byte(fmt.Sprintf("%d:%d", i, j)))
   292  		}
   293  		_, addr, err := buildTable(chunkData)
   294  		require.NoError(t, err)
   295  		fileIDHash, ok := hash.MaybeParse(addr.String())
   296  		require.True(t, ok)
   297  		tableFileMap[fileIDHash] = uint32(i + 1)
   298  		mapIds[i] = fileIDHash
   299  		_, err = p.Persist(context.Background(), createMemTable(chunkData), nil, &Stats{})
   300  		require.NoError(t, err)
   301  	}
   302  	return tableFileMap, mapIds
   303  }
   304  
   305  func prepStore(ctx context.Context, t *testing.T, assert *assert.Assertions) (*fakeManifest, tablePersister, *NomsBlockStore, *Stats, chunks.Chunk) {
   306  	fm, p, store := makeStoreWithFakes(t)
   307  	h, err := store.Root(ctx)
   308  	require.NoError(t, err)
   309  	assert.Equal(hash.Hash{}, h)
   310  
   311  	rootChunk := chunks.NewChunk([]byte("root"))
   312  	rootHash := rootChunk.Hash()
   313  	err = store.Put(ctx, rootChunk)
   314  	require.NoError(t, err)
   315  	success, err := store.Commit(ctx, rootHash, hash.Hash{})
   316  	require.NoError(t, err)
   317  	if assert.True(success) {
   318  		has, err := store.Has(ctx, rootHash)
   319  		require.NoError(t, err)
   320  		assert.True(has)
   321  		h, err := store.Root(ctx)
   322  		require.NoError(t, err)
   323  		assert.Equal(rootHash, h)
   324  	}
   325  
   326  	stats := &Stats{}
   327  
   328  	_, upstream, err := fm.ParseIfExists(ctx, stats, nil)
   329  	require.NoError(t, err)
   330  	// expect single spec for initial commit
   331  	assert.Equal(1, upstream.NumTableSpecs())
   332  	// Start with no appendixes
   333  	assert.Equal(0, upstream.NumAppendixSpecs())
   334  	return fm, p, store, stats, rootChunk
   335  }
   336  
   337  func TestNBSUpdateManifestWithAppendixOptions(t *testing.T) {
   338  	assert := assert.New(t)
   339  	ctx := context.Background()
   340  
   341  	_, p, store, _, _ := prepStore(ctx, t, assert)
   342  	defer store.Close()
   343  
   344  	// persist tablefiles to tablePersister
   345  	appendixUpdates, appendixIds := persistTableFileSources(t, p, 4)
   346  
   347  	tests := []struct {
   348  		description                   string
   349  		option                        ManifestAppendixOption
   350  		appendixSpecIds               []hash.Hash
   351  		expectedNumberOfSpecs         int
   352  		expectedNumberOfAppendixSpecs int
   353  		expectedError                 error
   354  	}{
   355  		{
   356  			description:     "should error on unsupported appendix option",
   357  			appendixSpecIds: appendixIds[:1],
   358  			expectedError:   ErrUnsupportedManifestAppendixOption,
   359  		},
   360  		{
   361  			description:                   "should append to appendix",
   362  			option:                        ManifestAppendixOption_Append,
   363  			appendixSpecIds:               appendixIds[:2],
   364  			expectedNumberOfSpecs:         3,
   365  			expectedNumberOfAppendixSpecs: 2,
   366  		},
   367  		{
   368  			description:                   "should replace appendix",
   369  			option:                        ManifestAppendixOption_Set,
   370  			appendixSpecIds:               appendixIds[3:],
   371  			expectedNumberOfSpecs:         2,
   372  			expectedNumberOfAppendixSpecs: 1,
   373  		},
   374  		{
   375  			description:                   "should set appendix to nil",
   376  			option:                        ManifestAppendixOption_Set,
   377  			appendixSpecIds:               []hash.Hash{},
   378  			expectedNumberOfSpecs:         1,
   379  			expectedNumberOfAppendixSpecs: 0,
   380  		},
   381  	}
   382  
   383  	for _, test := range tests {
   384  		t.Run(test.description, func(t *testing.T) {
   385  			updates := make(map[hash.Hash]uint32)
   386  			for _, id := range test.appendixSpecIds {
   387  				updates[id] = appendixUpdates[id]
   388  			}
   389  
   390  			if test.expectedError == nil {
   391  				info, err := store.UpdateManifestWithAppendix(ctx, updates, test.option)
   392  				require.NoError(t, err)
   393  				assert.Equal(test.expectedNumberOfSpecs, info.NumTableSpecs())
   394  				assert.Equal(test.expectedNumberOfAppendixSpecs, info.NumAppendixSpecs())
   395  			} else {
   396  				_, err := store.UpdateManifestWithAppendix(ctx, updates, test.option)
   397  				assert.Equal(test.expectedError, err)
   398  			}
   399  		})
   400  	}
   401  }
   402  
   403  func TestNBSUpdateManifestWithAppendix(t *testing.T) {
   404  	assert := assert.New(t)
   405  	ctx := context.Background()
   406  
   407  	fm, p, store, stats, _ := prepStore(ctx, t, assert)
   408  	defer store.Close()
   409  
   410  	_, upstream, err := fm.ParseIfExists(ctx, stats, nil)
   411  	require.NoError(t, err)
   412  
   413  	// persist tablefile to tablePersister
   414  	appendixUpdates, appendixIds := persistTableFileSources(t, p, 1)
   415  
   416  	// Ensure appendix (and specs) are updated
   417  	appendixFileId := appendixIds[0]
   418  	updates := map[hash.Hash]uint32{appendixFileId: appendixUpdates[appendixFileId]}
   419  	newContents, err := store.UpdateManifestWithAppendix(ctx, updates, ManifestAppendixOption_Append)
   420  	require.NoError(t, err)
   421  	assert.Equal(upstream.NumTableSpecs()+1, newContents.NumTableSpecs())
   422  	assert.Equal(1, newContents.NumAppendixSpecs())
   423  	assert.Equal(newContents.GetTableSpecInfo(0), newContents.GetAppendixTableSpecInfo(0))
   424  }
   425  
   426  func TestNBSUpdateManifestRetainsAppendix(t *testing.T) {
   427  	assert := assert.New(t)
   428  	ctx := context.Background()
   429  
   430  	fm, p, store, stats, _ := prepStore(ctx, t, assert)
   431  	defer store.Close()
   432  
   433  	_, upstream, err := fm.ParseIfExists(ctx, stats, nil)
   434  	require.NoError(t, err)
   435  
   436  	// persist tablefile to tablePersister
   437  	specUpdates, specIds := persistTableFileSources(t, p, 3)
   438  
   439  	// Update the manifest
   440  	firstSpecId := specIds[0]
   441  	newContents, err := store.UpdateManifest(ctx, map[hash.Hash]uint32{firstSpecId: specUpdates[firstSpecId]})
   442  	require.NoError(t, err)
   443  	assert.Equal(1+upstream.NumTableSpecs(), newContents.NumTableSpecs())
   444  	assert.Equal(0, upstream.NumAppendixSpecs())
   445  
   446  	_, upstream, err = fm.ParseIfExists(ctx, stats, nil)
   447  	require.NoError(t, err)
   448  
   449  	// Update the appendix
   450  	appendixSpecId := specIds[1]
   451  	updates := map[hash.Hash]uint32{appendixSpecId: specUpdates[appendixSpecId]}
   452  	newContents, err = store.UpdateManifestWithAppendix(ctx, updates, ManifestAppendixOption_Append)
   453  	require.NoError(t, err)
   454  	assert.Equal(1+upstream.NumTableSpecs(), newContents.NumTableSpecs())
   455  	assert.Equal(1+upstream.NumAppendixSpecs(), newContents.NumAppendixSpecs())
   456  	assert.Equal(newContents.GetAppendixTableSpecInfo(0), newContents.GetTableSpecInfo(0))
   457  
   458  	_, upstream, err = fm.ParseIfExists(ctx, stats, nil)
   459  	require.NoError(t, err)
   460  
   461  	// Update the manifest again to show
   462  	// it successfully retains the appendix
   463  	// and the appendix specs are properly prepended
   464  	// to the |manifestContents.specs|
   465  	secondSpecId := specIds[2]
   466  	newContents, err = store.UpdateManifest(ctx, map[hash.Hash]uint32{secondSpecId: specUpdates[secondSpecId]})
   467  	require.NoError(t, err)
   468  	assert.Equal(1+upstream.NumTableSpecs(), newContents.NumTableSpecs())
   469  	assert.Equal(upstream.NumAppendixSpecs(), newContents.NumAppendixSpecs())
   470  	assert.Equal(newContents.GetAppendixTableSpecInfo(0), newContents.GetTableSpecInfo(0))
   471  }
   472  
   473  func TestNBSCommitRetainsAppendix(t *testing.T) {
   474  	assert := assert.New(t)
   475  	ctx := context.Background()
   476  
   477  	fm, p, store, stats, rootChunk := prepStore(ctx, t, assert)
   478  	defer store.Close()
   479  
   480  	_, upstream, err := fm.ParseIfExists(ctx, stats, nil)
   481  	require.NoError(t, err)
   482  
   483  	// persist tablefile to tablePersister
   484  	appendixUpdates, appendixIds := persistTableFileSources(t, p, 1)
   485  
   486  	// Update the appendix
   487  	appendixFileId := appendixIds[0]
   488  	updates := map[hash.Hash]uint32{appendixFileId: appendixUpdates[appendixFileId]}
   489  	newContents, err := store.UpdateManifestWithAppendix(ctx, updates, ManifestAppendixOption_Append)
   490  	require.NoError(t, err)
   491  	assert.Equal(1+upstream.NumTableSpecs(), newContents.NumTableSpecs())
   492  	assert.Equal(1, newContents.NumAppendixSpecs())
   493  
   494  	_, upstream, err = fm.ParseIfExists(ctx, stats, nil)
   495  	require.NoError(t, err)
   496  
   497  	// Make second Commit
   498  	secondRootChunk := chunks.NewChunk([]byte("newer root"))
   499  	secondRoot := secondRootChunk.Hash()
   500  	err = store.Put(ctx, secondRootChunk)
   501  	require.NoError(t, err)
   502  	success, err := store.Commit(ctx, secondRoot, rootChunk.Hash())
   503  	require.NoError(t, err)
   504  	if assert.True(success) {
   505  		h, err := store.Root(ctx)
   506  		require.NoError(t, err)
   507  		assert.Equal(secondRoot, h)
   508  		has, err := store.Has(context.Background(), rootChunk.Hash())
   509  		require.NoError(t, err)
   510  		assert.True(has)
   511  		has, err = store.Has(context.Background(), secondRoot)
   512  		require.NoError(t, err)
   513  		assert.True(has)
   514  	}
   515  
   516  	// Ensure commit did not blow away appendix
   517  	_, newUpstream, err := fm.ParseIfExists(ctx, stats, nil)
   518  	require.NoError(t, err)
   519  	assert.Equal(1+upstream.NumTableSpecs(), newUpstream.NumTableSpecs())
   520  	assert.Equal(upstream.NumAppendixSpecs(), newUpstream.NumAppendixSpecs())
   521  	assert.Equal(upstream.GetAppendixTableSpecInfo(0), newUpstream.GetTableSpecInfo(0))
   522  	assert.Equal(newUpstream.GetTableSpecInfo(0), newUpstream.GetAppendixTableSpecInfo(0))
   523  }