github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/doltdb/gc_test.go (about) 1 // Copyright 2020 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package doltdb_test 16 17 import ( 18 "context" 19 "errors" 20 "os" 21 "testing" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 "github.com/stretchr/testify/assert" 25 "github.com/stretchr/testify/require" 26 27 "github.com/dolthub/dolt/go/cmd/dolt/commands" 28 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 29 "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils" 30 "github.com/dolthub/dolt/go/libraries/doltcore/env" 31 "github.com/dolthub/dolt/go/libraries/doltcore/ref" 32 "github.com/dolthub/dolt/go/libraries/doltcore/sqle" 33 "github.com/dolthub/dolt/go/libraries/utils/filesys" 34 "github.com/dolthub/dolt/go/store/hash" 35 "github.com/dolthub/dolt/go/store/nbs" 36 "github.com/dolthub/dolt/go/store/prolly" 37 "github.com/dolthub/dolt/go/store/prolly/tree" 38 "github.com/dolthub/dolt/go/store/types" 39 "github.com/dolthub/dolt/go/store/val" 40 ) 41 42 func TestGarbageCollection(t *testing.T) { 43 require.True(t, true) 44 assert.True(t, true) 45 46 for _, gct := range gcTests { 47 t.Run(gct.name, func(t *testing.T) { 48 testGarbageCollection(t, gct) 49 }) 50 } 51 52 t.Run("HasCacheDataCorruption", testGarbageCollectionHasCacheDataCorruptionBugFix) 53 } 54 55 type stage struct { 56 commands []testCommand 57 preStageFunc func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, prevRes interface{}) interface{} 58 } 59 60 type gcTest struct { 61 name string 62 stages []stage 63 query string 64 expected []sql.Row 65 postGCFunc func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, prevRes interface{}) 66 } 67 68 var gcTests = []gcTest{ 69 { 70 name: "gc test", 71 stages: []stage{ 72 { 73 preStageFunc: func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, i interface{}) interface{} { 74 return nil 75 }, 76 commands: []testCommand{ 77 {commands.CheckoutCmd{}, []string{"-b", "temp"}}, 78 {commands.SqlCmd{}, []string{"-q", "INSERT INTO test VALUES (0),(1),(2);"}}, 79 {commands.AddCmd{}, []string{"."}}, 80 {commands.CommitCmd{}, []string{"-m", "commit"}}, 81 }, 82 }, 83 { 84 preStageFunc: func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, i interface{}) interface{} { 85 cm, err := ddb.ResolveCommitRef(ctx, ref.NewBranchRef("temp")) 86 require.NoError(t, err) 87 h, err := cm.HashOf() 88 require.NoError(t, err) 89 cs, err := doltdb.NewCommitSpec(h.String()) 90 require.NoError(t, err) 91 _, err = ddb.Resolve(ctx, cs, nil) 92 require.NoError(t, err) 93 return h 94 }, 95 commands: []testCommand{ 96 {commands.CheckoutCmd{}, []string{env.DefaultInitBranch}}, 97 {commands.BranchCmd{}, []string{"-D", "temp"}}, 98 {commands.SqlCmd{}, []string{"-q", "INSERT INTO test VALUES (4),(5),(6);"}}, 99 }, 100 }, 101 }, 102 query: "select * from test;", 103 expected: []sql.Row{{int32(4)}, {int32(5)}, {int32(6)}}, 104 postGCFunc: func(ctx context.Context, t *testing.T, ddb *doltdb.DoltDB, prevRes interface{}) { 105 h := prevRes.(hash.Hash) 106 cs, err := doltdb.NewCommitSpec(h.String()) 107 require.NoError(t, err) 108 _, err = ddb.Resolve(ctx, cs, nil) 109 require.Error(t, err) 110 }, 111 }, 112 } 113 114 var gcSetupCommon = []testCommand{ 115 {commands.SqlCmd{}, []string{"-q", "CREATE TABLE test (pk int PRIMARY KEY)"}}, 116 {commands.AddCmd{}, []string{"."}}, 117 {commands.CommitCmd{}, []string{"-m", "created test table"}}, 118 } 119 120 func testGarbageCollection(t *testing.T, test gcTest) { 121 ctx := context.Background() 122 dEnv := dtestutils.CreateTestEnv() 123 defer dEnv.DoltDB.Close() 124 125 cliCtx, verr := commands.NewArgFreeCliContext(ctx, dEnv) 126 require.NoError(t, verr) 127 128 for _, c := range gcSetupCommon { 129 exitCode := c.cmd.Exec(ctx, c.cmd.Name(), c.args, dEnv, cliCtx) 130 require.Equal(t, 0, exitCode) 131 } 132 133 var res interface{} 134 for _, stage := range test.stages { 135 res = stage.preStageFunc(ctx, t, dEnv.DoltDB, res) 136 for _, c := range stage.commands { 137 exitCode := c.cmd.Exec(ctx, c.cmd.Name(), c.args, dEnv, cliCtx) 138 require.Equal(t, 0, exitCode) 139 } 140 } 141 142 err := dEnv.DoltDB.GC(ctx, nil) 143 require.NoError(t, err) 144 test.postGCFunc(ctx, t, dEnv.DoltDB, res) 145 146 working, err := dEnv.WorkingRoot(ctx) 147 require.NoError(t, err) 148 // assert all out rows are present after gc 149 actual, err := sqle.ExecuteSelect(dEnv, working, test.query) 150 require.NoError(t, err) 151 assert.Equal(t, test.expected, actual) 152 } 153 154 // In September 2023, we found a failure to handle the `hasCache` in 155 // `*NomsBlockStore` appropriately while cleaning up a memtable into which 156 // dangling references had been written could result in writing chunks to a 157 // database which referenced non-existent chunks. 158 // 159 // The general pattern was to get new chunk addresses into the hasCache, but 160 // not written to the store, and then to have an incoming chunk add a reference 161 // to missing chunk. At that time, we would clear the memtable, since it had 162 // invalid chunks in it, but we wouldn't purge the hasCache. Later writes which 163 // attempted to reference the chunks which had made it into the hasCache would 164 // succeed. 165 // 166 // One such concrete pattern for doing this is implemented below. We do: 167 // 168 // 1) Put a new chunk to the database -- C1. 169 // 170 // 2) Run a GC. 171 // 172 // 3) Put a new chunk to the database -- C2. 173 // 174 // 4) Call NBS.Commit() with a stale last hash.Hash. This causes us to cache C2 175 // as present in the store, but it does not get written to disk, because the 176 // optimistic concurrency control on the value of the current root hash fails. 177 // 178 // 5) Put a chunk referencing C1 to the database -- R1. 179 // 180 // 5) Call NBS.Commit(). This causes ErrDanglingRef. C1 was written before the 181 // GC and is no longer in the store. C2 is also cleared from the pending write 182 // set. 183 // 184 // 6) Put a chunk referencing C2 to the database -- R2. 185 // 186 // 7) Call NBS.Commit(). This should fail, since R2 references C2 and C2 is not 187 // in the store. However, C2 is in the cache as a result of step #4, and so 188 // this does not fail. R2 gets written to disk with a dangling reference to C2. 189 func testGarbageCollectionHasCacheDataCorruptionBugFix(t *testing.T) { 190 ctx := context.Background() 191 192 d, err := os.MkdirTemp(t.TempDir(), "hascachetest-") 193 require.NoError(t, err) 194 195 ddb, err := doltdb.LoadDoltDB(ctx, types.Format_DOLT, "file://"+d, filesys.LocalFS) 196 require.NoError(t, err) 197 defer ddb.Close() 198 199 err = ddb.WriteEmptyRepo(ctx, "main", "Aaron Son", "aaron@dolthub.com") 200 require.NoError(t, err) 201 202 root, err := ddb.NomsRoot(ctx) 203 require.NoError(t, err) 204 205 ns := ddb.NodeStore() 206 207 c1 := newIntMap(t, ctx, ns, 1, 1) 208 _, err = ns.Write(ctx, c1.Node()) 209 require.NoError(t, err) 210 211 err = ddb.GC(ctx, nil) 212 require.NoError(t, err) 213 214 c2 := newIntMap(t, ctx, ns, 2, 2) 215 _, err = ns.Write(ctx, c2.Node()) 216 require.NoError(t, err) 217 218 success, err := ddb.CommitRoot(ctx, c2.HashOf(), c2.HashOf()) 219 require.NoError(t, err) 220 require.False(t, success, "committing the root with a last hash which does not match the current root must fail") 221 222 r1 := newAddrMap(t, ctx, ns, "r1", c1.HashOf()) 223 _, err = ns.Write(ctx, r1.Node()) 224 require.NoError(t, err) 225 226 success, err = ddb.CommitRoot(ctx, root, root) 227 require.True(t, errors.Is(err, nbs.ErrDanglingRef), "committing a reference to just-collected c1 must fail with ErrDanglingRef") 228 229 r2 := newAddrMap(t, ctx, ns, "r2", c2.HashOf()) 230 _, err = ns.Write(ctx, r2.Node()) 231 require.NoError(t, err) 232 233 success, err = ddb.CommitRoot(ctx, root, root) 234 require.True(t, errors.Is(err, nbs.ErrDanglingRef), "committing a reference to c2, which was erased with the ErrDanglingRef above, must also fail with ErrDanglingRef") 235 } 236 237 func newIntMap(t *testing.T, ctx context.Context, ns tree.NodeStore, k, v int8) prolly.Map { 238 desc := val.NewTupleDescriptor(val.Type{ 239 Enc: val.Int8Enc, 240 Nullable: false, 241 }) 242 243 tb := val.NewTupleBuilder(desc) 244 tb.PutInt8(0, k) 245 keyTuple := tb.Build(ns.Pool()) 246 247 tb.PutInt8(0, v) 248 valueTuple := tb.Build(ns.Pool()) 249 250 m, err := prolly.NewMapFromTuples(ctx, ns, desc, desc, keyTuple, valueTuple) 251 require.NoError(t, err) 252 return m 253 } 254 255 func newAddrMap(t *testing.T, ctx context.Context, ns tree.NodeStore, key string, h hash.Hash) prolly.AddressMap { 256 m, err := prolly.NewEmptyAddressMap(ns) 257 require.NoError(t, err) 258 259 editor := m.Editor() 260 err = editor.Add(ctx, key, h) 261 require.NoError(t, err) 262 263 m, err = editor.Flush(ctx) 264 require.NoError(t, err) 265 266 return m 267 }