github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/gc_test.go (about)

     1  // Copyright 2020 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package doltdb_test
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"os"
    21  	"testing"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/require"
    26  
    27  	"github.com/dolthub/dolt/go/cmd/dolt/commands"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/ref"
    32  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
    33  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    34  	"github.com/dolthub/dolt/go/store/hash"
    35  	"github.com/dolthub/dolt/go/store/nbs"
    36  	"github.com/dolthub/dolt/go/store/prolly"
    37  	"github.com/dolthub/dolt/go/store/prolly/tree"
    38  	"github.com/dolthub/dolt/go/store/types"
    39  	"github.com/dolthub/dolt/go/store/val"
    40  )
    41  
    42  func TestGarbageCollection(t *testing.T) {
    43  	require.True(t, true)
    44  	assert.True(t, true)
    45  
    46  	for _, gct := range gcTests {
    47  		t.Run(gct.name, func(t *testing.T) {
    48  			testGarbageCollection(t, gct)
    49  		})
    50  	}
    51  
    52  	t.Run("HasCacheDataCorruption", testGarbageCollectionHasCacheDataCorruptionBugFix)
    53  }
    54  
    55  type stage struct {
    56  	commands     []testCommand
    57  	preStageFunc func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, prevRes interface{}) interface{}
    58  }
    59  
    60  type gcTest struct {
    61  	name       string
    62  	stages     []stage
    63  	query      string
    64  	expected   []sql.Row
    65  	postGCFunc func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, prevRes interface{})
    66  }
    67  
    68  var gcTests = []gcTest{
    69  	{
    70  		name: "gc test",
    71  		stages: []stage{
    72  			{
    73  				preStageFunc: func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, i interface{}) interface{} {
    74  					return nil
    75  				},
    76  				commands: []testCommand{
    77  					{commands.CheckoutCmd{}, []string{"-b", "temp"}},
    78  					{commands.SqlCmd{}, []string{"-q", "INSERT INTO test VALUES (0),(1),(2);"}},
    79  					{commands.AddCmd{}, []string{"."}},
    80  					{commands.CommitCmd{}, []string{"-m", "commit"}},
    81  				},
    82  			},
    83  			{
    84  				preStageFunc: func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, i interface{}) interface{} {
    85  					cm, err := ddb.ResolveCommitRef(ctx, ref.NewBranchRef("temp"))
    86  					require.NoError(t, err)
    87  					h, err := cm.HashOf()
    88  					require.NoError(t, err)
    89  					cs, err := doltdb.NewCommitSpec(h.String())
    90  					require.NoError(t, err)
    91  					_, err = ddb.Resolve(ctx, cs, nil)
    92  					require.NoError(t, err)
    93  					return h
    94  				},
    95  				commands: []testCommand{
    96  					{commands.CheckoutCmd{}, []string{env.DefaultInitBranch}},
    97  					{commands.BranchCmd{}, []string{"-D", "temp"}},
    98  					{commands.SqlCmd{}, []string{"-q", "INSERT INTO test VALUES (4),(5),(6);"}},
    99  				},
   100  			},
   101  		},
   102  		query:    "select * from test;",
   103  		expected: []sql.Row{{int32(4)}, {int32(5)}, {int32(6)}},
   104  		postGCFunc: func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, prevRes interface{}) {
   105  			h := prevRes.(hash.Hash)
   106  			cs, err := doltdb.NewCommitSpec(h.String())
   107  			require.NoError(t, err)
   108  			_, err = ddb.Resolve(ctx, cs, nil)
   109  			require.Error(t, err)
   110  		},
   111  	},
   112  }
   113  
   114  var gcSetupCommon = []testCommand{
   115  	{commands.SqlCmd{}, []string{"-q", "CREATE TABLE test (pk int PRIMARY KEY)"}},
   116  	{commands.AddCmd{}, []string{"."}},
   117  	{commands.CommitCmd{}, []string{"-m", "created test table"}},
   118  }
   119  
   120  func testGarbageCollection(t *testing.T, test gcTest) {
   121  	ctx := context.Background()
   122  	dEnv := dtestutils.CreateTestEnv()
   123  	defer dEnv.DoltDB.Close()
   124  
   125  	cliCtx, verr := commands.NewArgFreeCliContext(ctx, dEnv)
   126  	require.NoError(t, verr)
   127  
   128  	for _, c := range gcSetupCommon {
   129  		exitCode := c.cmd.Exec(ctx, c.cmd.Name(), c.args, dEnv, cliCtx)
   130  		require.Equal(t, 0, exitCode)
   131  	}
   132  
   133  	var res interface{}
   134  	for _, stage := range test.stages {
   135  		res = stage.preStageFunc(ctx, t, dEnv.DoltDB, res)
   136  		for _, c := range stage.commands {
   137  			exitCode := c.cmd.Exec(ctx, c.cmd.Name(), c.args, dEnv, cliCtx)
   138  			require.Equal(t, 0, exitCode)
   139  		}
   140  	}
   141  
   142  	err := dEnv.DoltDB.GC(ctx, nil)
   143  	require.NoError(t, err)
   144  	test.postGCFunc(ctx, t, dEnv.DoltDB, res)
   145  
   146  	working, err := dEnv.WorkingRoot(ctx)
   147  	require.NoError(t, err)
   148  	// assert all out rows are present after gc
   149  	actual, err := sqle.ExecuteSelect(dEnv, working, test.query)
   150  	require.NoError(t, err)
   151  	assert.Equal(t, test.expected, actual)
   152  }
   153  
   154  // In September 2023, we found a failure to handle the `hasCache` in
   155  // `*NomsBlockStore` appropriately while cleaning up a memtable into which
   156  // dangling references had been written could result in writing chunks to a
   157  // database which referenced non-existent chunks.
   158  //
   159  // The general pattern was to get new chunk addresses into the hasCache, but
   160  // not written to the store, and then to have an incoming chunk add a reference
   161  // to missing chunk. At that time, we would clear the memtable, since it had
   162  // invalid chunks in it, but we wouldn't purge the hasCache. Later writes which
   163  // attempted to reference the chunks which had made it into the hasCache would
   164  // succeed.
   165  //
   166  // One such concrete pattern for doing this is implemented below. We do:
   167  //
   168  // 1) Put a new chunk to the database -- C1.
   169  //
   170  // 2) Run a GC.
   171  //
   172  // 3) Put a new chunk to the database -- C2.
   173  //
   174  // 4) Call NBS.Commit() with a stale last hash.Hash. This causes us to cache C2
   175  // as present in the store, but it does not get written to disk, because the
   176  // optimistic concurrency control on the value of the current root hash fails.
   177  //
   178  // 5) Put a chunk referencing C1 to the database -- R1.
   179  //
   180  // 5) Call NBS.Commit(). This causes ErrDanglingRef. C1 was written before the
   181  // GC and is no longer in the store. C2 is also cleared from the pending write
   182  // set.
   183  //
   184  // 6) Put a chunk referencing C2 to the database -- R2.
   185  //
   186  // 7) Call NBS.Commit(). This should fail, since R2 references C2 and C2 is not
   187  // in the store. However, C2 is in the cache as a result of step #4, and so
   188  // this does not fail. R2 gets written to disk with a dangling reference to C2.
   189  func testGarbageCollectionHasCacheDataCorruptionBugFix(t *testing.T) {
   190  	ctx := context.Background()
   191  
   192  	d, err := os.MkdirTemp(t.TempDir(), "hascachetest-")
   193  	require.NoError(t, err)
   194  
   195  	ddb, err := doltdb.LoadDoltDB(ctx, types.Format_DOLT, "file://"+d, filesys.LocalFS)
   196  	require.NoError(t, err)
   197  	defer ddb.Close()
   198  
   199  	err = ddb.WriteEmptyRepo(ctx, "main", "Aaron Son", "aaron@dolthub.com")
   200  	require.NoError(t, err)
   201  
   202  	root, err := ddb.NomsRoot(ctx)
   203  	require.NoError(t, err)
   204  
   205  	ns := ddb.NodeStore()
   206  
   207  	c1 := newIntMap(t, ctx, ns, 1, 1)
   208  	_, err = ns.Write(ctx, c1.Node())
   209  	require.NoError(t, err)
   210  
   211  	err = ddb.GC(ctx, nil)
   212  	require.NoError(t, err)
   213  
   214  	c2 := newIntMap(t, ctx, ns, 2, 2)
   215  	_, err = ns.Write(ctx, c2.Node())
   216  	require.NoError(t, err)
   217  
   218  	success, err := ddb.CommitRoot(ctx, c2.HashOf(), c2.HashOf())
   219  	require.NoError(t, err)
   220  	require.False(t, success, "committing the root with a last hash which does not match the current root must fail")
   221  
   222  	r1 := newAddrMap(t, ctx, ns, "r1", c1.HashOf())
   223  	_, err = ns.Write(ctx, r1.Node())
   224  	require.NoError(t, err)
   225  
   226  	success, err = ddb.CommitRoot(ctx, root, root)
   227  	require.True(t, errors.Is(err, nbs.ErrDanglingRef), "committing a reference to just-collected c1 must fail with ErrDanglingRef")
   228  
   229  	r2 := newAddrMap(t, ctx, ns, "r2", c2.HashOf())
   230  	_, err = ns.Write(ctx, r2.Node())
   231  	require.NoError(t, err)
   232  
   233  	success, err = ddb.CommitRoot(ctx, root, root)
   234  	require.True(t, errors.Is(err, nbs.ErrDanglingRef), "committing a reference to c2, which was erased with the ErrDanglingRef above, must also fail with ErrDanglingRef")
   235  }
   236  
   237  func newIntMap(t *testing.T, ctx context.Context, ns tree.NodeStore, k, v int8) prolly.Map {
   238  	desc := val.NewTupleDescriptor(val.Type{
   239  		Enc:      val.Int8Enc,
   240  		Nullable: false,
   241  	})
   242  
   243  	tb := val.NewTupleBuilder(desc)
   244  	tb.PutInt8(0, k)
   245  	keyTuple := tb.Build(ns.Pool())
   246  
   247  	tb.PutInt8(0, v)
   248  	valueTuple := tb.Build(ns.Pool())
   249  
   250  	m, err := prolly.NewMapFromTuples(ctx, ns, desc, desc, keyTuple, valueTuple)
   251  	require.NoError(t, err)
   252  	return m
   253  }
   254  
   255  func newAddrMap(t *testing.T, ctx context.Context, ns tree.NodeStore, key string, h hash.Hash) prolly.AddressMap {
   256  	m, err := prolly.NewEmptyAddressMap(ns)
   257  	require.NoError(t, err)
   258  
   259  	editor := m.Editor()
   260  	err = editor.Add(ctx, key, h)
   261  	require.NoError(t, err)
   262  
   263  	m, err = editor.Flush(ctx)
   264  	require.NoError(t, err)
   265  
   266  	return m
   267  }