github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/datas/pull/puller_test.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package pull 16 17 import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "os" 22 "path/filepath" 23 "sync" 24 "testing" 25 26 "github.com/google/uuid" 27 "github.com/stretchr/testify/assert" 28 "github.com/stretchr/testify/require" 29 30 "github.com/dolthub/dolt/go/store/d" 31 "github.com/dolthub/dolt/go/store/datas" 32 "github.com/dolthub/dolt/go/store/hash" 33 "github.com/dolthub/dolt/go/store/nbs" 34 "github.com/dolthub/dolt/go/store/prolly/tree" 35 "github.com/dolthub/dolt/go/store/types" 36 "github.com/dolthub/dolt/go/store/util/clienttest" 37 ) 38 39 func TestNbsPuller(t *testing.T) { 40 testPuller(t, func(ctx context.Context) (types.ValueReadWriter, datas.Database) { 41 dir := filepath.Join(os.TempDir(), uuid.New().String()) 42 err := os.MkdirAll(dir, os.ModePerm) 43 require.NoError(t, err) 44 45 nbf := types.Format_Default.VersionString() 46 q := nbs.NewUnlimitedMemQuotaProvider() 47 st, err := nbs.NewLocalStore(ctx, nbf, dir, clienttest.DefaultMemTableSize, q) 48 require.NoError(t, err) 49 50 ns := tree.NewNodeStore(st) 51 vs := types.NewValueStore(st) 52 return vs, datas.NewTypesDatabase(vs, ns) 53 }) 54 } 55 56 func TestChunkJournalPuller(t *testing.T) { 57 testPuller(t, func(ctx context.Context) (types.ValueReadWriter, datas.Database) { 58 dir := filepath.Join(os.TempDir(), uuid.New().String()) 59 err := os.MkdirAll(dir, os.ModePerm) 60 require.NoError(t, err) 61 62 nbf := types.Format_Default.VersionString() 63 q := nbs.NewUnlimitedMemQuotaProvider() 64 65 st, err := nbs.NewLocalJournalingStore(ctx, nbf, dir, q) 66 require.NoError(t, err) 67 68 ns := tree.NewNodeStore(st) 69 vs := types.NewValueStore(st) 70 return vs, datas.NewTypesDatabase(vs, ns) 71 }) 72 } 73 74 func addTableValues(ctx context.Context, vrw types.ValueReadWriter, m types.Map, tableName string, alternatingKeyVals ...types.Value) (types.Map, error) { 75 val, ok, err := m.MaybeGet(ctx, types.String(tableName)) 76 77 if err != nil { 78 return types.EmptyMap, err 79 } 80 81 var tblMap types.Map 82 if ok { 83 mv, err := val.(types.Ref).TargetValue(ctx, vrw) 84 85 if err != nil { 86 return types.EmptyMap, err 87 } 88 89 me := mv.(types.Map).Edit() 90 91 for i := 0; i < len(alternatingKeyVals); i += 2 { 92 me.Set(alternatingKeyVals[i], alternatingKeyVals[i+1]) 93 } 94 95 tblMap, err = me.Map(ctx) 96 97 if err != nil { 98 return types.EmptyMap, err 99 } 100 } else { 101 tblMap, err = types.NewMap(ctx, vrw, alternatingKeyVals...) 102 103 if err != nil { 104 return types.EmptyMap, err 105 } 106 } 107 108 tblRef, err := writeValAndGetRef(ctx, vrw, tblMap) 109 110 if err != nil { 111 return types.EmptyMap, err 112 } 113 114 me := m.Edit() 115 me.Set(types.String(tableName), tblRef) 116 return me.Map(ctx) 117 } 118 119 func deleteTableValues(ctx context.Context, vrw types.ValueReadWriter, m types.Map, tableName string, keys ...types.Value) (types.Map, error) { 120 if len(keys) == 0 { 121 return m, nil 122 } 123 124 val, ok, err := m.MaybeGet(ctx, types.String(tableName)) 125 126 if err != nil { 127 return types.EmptyMap, err 128 } 129 130 if !ok { 131 return types.EmptyMap, errors.New("can't delete from table that wasn't created") 132 } 133 134 mv, err := val.(types.Ref).TargetValue(ctx, vrw) 135 136 if err != nil { 137 return types.EmptyMap, err 138 } 139 140 me := mv.(types.Map).Edit() 141 for _, k := range keys { 142 me.Remove(k) 143 } 144 145 tblMap, err := me.Map(ctx) 146 147 if err != nil { 148 return types.EmptyMap, err 149 } 150 151 tblRef, err := writeValAndGetRef(ctx, vrw, tblMap) 152 153 if err != nil { 154 return types.EmptyMap, err 155 } 156 157 me = m.Edit() 158 me.Set(types.String(tableName), tblRef) 159 return me.Map(ctx) 160 } 161 162 type datasFactory func(context.Context) (types.ValueReadWriter, datas.Database) 163 164 func testPuller(t *testing.T, makeDB datasFactory) { 165 ctx := context.Background() 166 vs, db := makeDB(ctx) 167 defer db.Close() 168 169 deltas := []struct { 170 name string 171 sets map[string][]types.Value 172 deletes map[string][]types.Value 173 tblDeletes []string 174 }{ 175 { 176 "empty", 177 map[string][]types.Value{}, 178 map[string][]types.Value{}, 179 []string{}, 180 }, 181 { 182 "employees", 183 map[string][]types.Value{ 184 "employees": { 185 mustTuple(types.NewTuple(vs.Format(), types.String("Hendriks"), types.String("Brian"))), 186 mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.Int(39))), 187 mustTuple(types.NewTuple(vs.Format(), types.String("Sehn"), types.String("Timothy"))), 188 mustTuple(types.NewTuple(vs.Format(), types.String("CEO"), types.Int(39))), 189 mustTuple(types.NewTuple(vs.Format(), types.String("Son"), types.String("Aaron"))), 190 mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.Int(36))), 191 }, 192 }, 193 map[string][]types.Value{}, 194 []string{}, 195 }, 196 { 197 "ip to country", 198 map[string][]types.Value{ 199 "ip_to_country": { 200 types.String("5.183.230.1"), types.String("BZ"), 201 types.String("5.180.188.1"), types.String("AU"), 202 types.String("2.56.9.244"), types.String("GB"), 203 types.String("20.175.7.56"), types.String("US"), 204 }, 205 }, 206 map[string][]types.Value{}, 207 []string{}, 208 }, 209 { 210 "more ips", 211 map[string][]types.Value{ 212 "ip_to_country": { 213 types.String("20.175.193.85"), types.String("US"), 214 types.String("5.196.110.191"), types.String("FR"), 215 types.String("4.14.242.160"), types.String("CA"), 216 }, 217 }, 218 map[string][]types.Value{}, 219 []string{}, 220 }, 221 { 222 "more employees", 223 map[string][]types.Value{ 224 "employees": { 225 mustTuple(types.NewTuple(vs.Format(), types.String("Jesuele"), types.String("Matt"))), 226 mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.NullValue)), 227 mustTuple(types.NewTuple(vs.Format(), types.String("Wilkins"), types.String("Daylon"))), 228 mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.NullValue)), 229 mustTuple(types.NewTuple(vs.Format(), types.String("Katie"), types.String("McCulloch"))), 230 mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.NullValue)), 231 }, 232 }, 233 map[string][]types.Value{}, 234 []string{}, 235 }, 236 { 237 "delete ips table", 238 map[string][]types.Value{}, 239 map[string][]types.Value{}, 240 []string{"ip_to_country"}, 241 }, 242 { 243 "delete some employees", 244 map[string][]types.Value{}, 245 map[string][]types.Value{ 246 "employees": { 247 mustTuple(types.NewTuple(vs.Format(), types.String("Hendriks"), types.String("Brian"))), 248 mustTuple(types.NewTuple(vs.Format(), types.String("Sehn"), types.String("Timothy"))), 249 mustTuple(types.NewTuple(vs.Format(), types.String("Son"), types.String("Aaron"))), 250 }, 251 }, 252 []string{}, 253 }, 254 } 255 256 ds, err := db.GetDataset(ctx, "ds") 257 require.NoError(t, err) 258 rootMap, err := types.NewMap(ctx, vs) 259 require.NoError(t, err) 260 261 var parent []hash.Hash 262 states := map[string]hash.Hash{} 263 for _, delta := range deltas { 264 for tbl, sets := range delta.sets { 265 rootMap, err = addTableValues(ctx, vs, rootMap, tbl, sets...) 266 require.NoError(t, err) 267 } 268 269 for tbl, dels := range delta.deletes { 270 rootMap, err = deleteTableValues(ctx, vs, rootMap, tbl, dels...) 271 require.NoError(t, err) 272 } 273 274 me := rootMap.Edit() 275 for _, tbl := range delta.tblDeletes { 276 me.Remove(types.String(tbl)) 277 } 278 rootMap, err = me.Map(ctx) 279 require.NoError(t, err) 280 281 commitOpts := datas.CommitOptions{Parents: parent} 282 ds, err = db.Commit(ctx, ds, rootMap, commitOpts) 283 require.NoError(t, err) 284 285 dsAddr, ok := ds.MaybeHeadAddr() 286 require.True(t, ok) 287 288 parent = []hash.Hash{dsAddr} 289 290 states[delta.name] = dsAddr 291 } 292 293 tbl, err := makeABigTable(ctx, vs) 294 require.NoError(t, err) 295 296 tblRef, err := writeValAndGetRef(ctx, vs, tbl) 297 require.NoError(t, err) 298 299 me := rootMap.Edit() 300 me.Set(types.String("big_table"), tblRef) 301 rootMap, err = me.Map(ctx) 302 require.NoError(t, err) 303 304 commitOpts := datas.CommitOptions{Parents: parent} 305 ds, err = db.Commit(ctx, ds, rootMap, commitOpts) 306 require.NoError(t, err) 307 308 addr, ok := ds.MaybeHeadAddr() 309 require.True(t, ok) 310 311 states["add big table"] = addr 312 313 for k, rootAddr := range states { 314 t.Run(k, func(t *testing.T) { 315 statsCh := make(chan Stats, 16) 316 wg := new(sync.WaitGroup) 317 wg.Add(1) 318 go func() { 319 defer wg.Done() 320 for evt := range statsCh { 321 jsonBytes, err := json.Marshal(evt) 322 if err == nil { 323 t.Logf("stats: %s\n", string(jsonBytes)) 324 } 325 } 326 }() 327 328 sinkvs, sinkdb := makeDB(ctx) 329 defer sinkdb.Close() 330 331 tmpDir := filepath.Join(os.TempDir(), uuid.New().String()) 332 err = os.MkdirAll(tmpDir, os.ModePerm) 333 require.NoError(t, err) 334 waf, err := types.WalkAddrsForChunkStore(datas.ChunkStoreFromDatabase(db)) 335 require.NoError(t, err) 336 plr, err := NewPuller(ctx, tmpDir, 128, datas.ChunkStoreFromDatabase(db), datas.ChunkStoreFromDatabase(sinkdb), waf, []hash.Hash{rootAddr}, statsCh) 337 require.NoError(t, err) 338 339 err = plr.Pull(ctx) 340 close(statsCh) 341 require.NoError(t, err) 342 wg.Wait() 343 344 sinkDS, err := sinkdb.GetDataset(ctx, "ds") 345 require.NoError(t, err) 346 sinkDS, err = sinkdb.FastForward(ctx, sinkDS, rootAddr, "") 347 require.NoError(t, err) 348 349 require.NoError(t, err) 350 sinkRootAddr, ok := sinkDS.MaybeHeadAddr() 351 require.True(t, ok) 352 353 eq, err := pullerAddrEquality(ctx, rootAddr, sinkRootAddr, vs, sinkvs) 354 require.NoError(t, err) 355 assert.True(t, eq) 356 }) 357 } 358 } 359 360 func makeABigTable(ctx context.Context, vrw types.ValueReadWriter) (types.Map, error) { 361 m, err := types.NewMap(ctx, vrw) 362 363 if err != nil { 364 return types.EmptyMap, nil 365 } 366 367 me := m.Edit() 368 369 for i := 0; i < 256*1024; i++ { 370 tpl, err := types.NewTuple(vrw.Format(), types.UUID(uuid.New()), types.String(uuid.New().String()), types.Float(float64(i))) 371 372 if err != nil { 373 return types.EmptyMap, err 374 } 375 376 me.Set(types.Int(i), tpl) 377 } 378 379 return me.Map(ctx) 380 } 381 382 func pullerAddrEquality(ctx context.Context, expected, actual hash.Hash, src, sink types.ValueReadWriter) (bool, error) { 383 if expected != actual { 384 return false, nil 385 } 386 387 expectedVal, err := src.ReadValue(ctx, expected) 388 if err != nil { 389 return false, err 390 } 391 actualVal, err := sink.ReadValue(ctx, actual) 392 if err != nil { 393 return false, err 394 } 395 396 return expectedVal.Equals(actualVal), nil 397 } 398 399 func writeValAndGetRef(ctx context.Context, vrw types.ValueReadWriter, val types.Value) (types.Ref, error) { 400 valRef, err := types.NewRef(val, vrw.Format()) 401 402 if err != nil { 403 return types.Ref{}, err 404 } 405 406 targetVal, err := valRef.TargetValue(ctx, vrw) 407 408 if err != nil { 409 return types.Ref{}, err 410 } 411 412 if targetVal == nil { 413 _, err = vrw.WriteValue(ctx, val) 414 415 if err != nil { 416 return types.Ref{}, err 417 } 418 } 419 420 return valRef, err 421 } 422 423 func mustTuple(val types.Tuple, err error) types.Tuple { 424 d.PanicIfError(err) 425 return val 426 }