github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/datas/pull/puller_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pull
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"errors"
    21  	"os"
    22  	"path/filepath"
    23  	"sync"
    24  	"testing"
    25  
    26  	"github.com/google/uuid"
    27  	"github.com/stretchr/testify/assert"
    28  	"github.com/stretchr/testify/require"
    29  
    30  	"github.com/dolthub/dolt/go/store/d"
    31  	"github.com/dolthub/dolt/go/store/datas"
    32  	"github.com/dolthub/dolt/go/store/hash"
    33  	"github.com/dolthub/dolt/go/store/nbs"
    34  	"github.com/dolthub/dolt/go/store/prolly/tree"
    35  	"github.com/dolthub/dolt/go/store/types"
    36  	"github.com/dolthub/dolt/go/store/util/clienttest"
    37  )
    38  
    39  func TestNbsPuller(t *testing.T) {
    40  	testPuller(t, func(ctx context.Context) (types.ValueReadWriter, datas.Database) {
    41  		dir := filepath.Join(os.TempDir(), uuid.New().String())
    42  		err := os.MkdirAll(dir, os.ModePerm)
    43  		require.NoError(t, err)
    44  
    45  		nbf := types.Format_Default.VersionString()
    46  		q := nbs.NewUnlimitedMemQuotaProvider()
    47  		st, err := nbs.NewLocalStore(ctx, nbf, dir, clienttest.DefaultMemTableSize, q)
    48  		require.NoError(t, err)
    49  
    50  		ns := tree.NewNodeStore(st)
    51  		vs := types.NewValueStore(st)
    52  		return vs, datas.NewTypesDatabase(vs, ns)
    53  	})
    54  }
    55  
    56  func TestChunkJournalPuller(t *testing.T) {
    57  	testPuller(t, func(ctx context.Context) (types.ValueReadWriter, datas.Database) {
    58  		dir := filepath.Join(os.TempDir(), uuid.New().String())
    59  		err := os.MkdirAll(dir, os.ModePerm)
    60  		require.NoError(t, err)
    61  
    62  		nbf := types.Format_Default.VersionString()
    63  		q := nbs.NewUnlimitedMemQuotaProvider()
    64  
    65  		st, err := nbs.NewLocalJournalingStore(ctx, nbf, dir, q)
    66  		require.NoError(t, err)
    67  
    68  		ns := tree.NewNodeStore(st)
    69  		vs := types.NewValueStore(st)
    70  		return vs, datas.NewTypesDatabase(vs, ns)
    71  	})
    72  }
    73  
    74  func addTableValues(ctx context.Context, vrw types.ValueReadWriter, m types.Map, tableName string, alternatingKeyVals ...types.Value) (types.Map, error) {
    75  	val, ok, err := m.MaybeGet(ctx, types.String(tableName))
    76  
    77  	if err != nil {
    78  		return types.EmptyMap, err
    79  	}
    80  
    81  	var tblMap types.Map
    82  	if ok {
    83  		mv, err := val.(types.Ref).TargetValue(ctx, vrw)
    84  
    85  		if err != nil {
    86  			return types.EmptyMap, err
    87  		}
    88  
    89  		me := mv.(types.Map).Edit()
    90  
    91  		for i := 0; i < len(alternatingKeyVals); i += 2 {
    92  			me.Set(alternatingKeyVals[i], alternatingKeyVals[i+1])
    93  		}
    94  
    95  		tblMap, err = me.Map(ctx)
    96  
    97  		if err != nil {
    98  			return types.EmptyMap, err
    99  		}
   100  	} else {
   101  		tblMap, err = types.NewMap(ctx, vrw, alternatingKeyVals...)
   102  
   103  		if err != nil {
   104  			return types.EmptyMap, err
   105  		}
   106  	}
   107  
   108  	tblRef, err := writeValAndGetRef(ctx, vrw, tblMap)
   109  
   110  	if err != nil {
   111  		return types.EmptyMap, err
   112  	}
   113  
   114  	me := m.Edit()
   115  	me.Set(types.String(tableName), tblRef)
   116  	return me.Map(ctx)
   117  }
   118  
   119  func deleteTableValues(ctx context.Context, vrw types.ValueReadWriter, m types.Map, tableName string, keys ...types.Value) (types.Map, error) {
   120  	if len(keys) == 0 {
   121  		return m, nil
   122  	}
   123  
   124  	val, ok, err := m.MaybeGet(ctx, types.String(tableName))
   125  
   126  	if err != nil {
   127  		return types.EmptyMap, err
   128  	}
   129  
   130  	if !ok {
   131  		return types.EmptyMap, errors.New("can't delete from table that wasn't created")
   132  	}
   133  
   134  	mv, err := val.(types.Ref).TargetValue(ctx, vrw)
   135  
   136  	if err != nil {
   137  		return types.EmptyMap, err
   138  	}
   139  
   140  	me := mv.(types.Map).Edit()
   141  	for _, k := range keys {
   142  		me.Remove(k)
   143  	}
   144  
   145  	tblMap, err := me.Map(ctx)
   146  
   147  	if err != nil {
   148  		return types.EmptyMap, err
   149  	}
   150  
   151  	tblRef, err := writeValAndGetRef(ctx, vrw, tblMap)
   152  
   153  	if err != nil {
   154  		return types.EmptyMap, err
   155  	}
   156  
   157  	me = m.Edit()
   158  	me.Set(types.String(tableName), tblRef)
   159  	return me.Map(ctx)
   160  }
   161  
   162  type datasFactory func(context.Context) (types.ValueReadWriter, datas.Database)
   163  
   164  func testPuller(t *testing.T, makeDB datasFactory) {
   165  	ctx := context.Background()
   166  	vs, db := makeDB(ctx)
   167  	defer db.Close()
   168  
   169  	deltas := []struct {
   170  		name       string
   171  		sets       map[string][]types.Value
   172  		deletes    map[string][]types.Value
   173  		tblDeletes []string
   174  	}{
   175  		{
   176  			"empty",
   177  			map[string][]types.Value{},
   178  			map[string][]types.Value{},
   179  			[]string{},
   180  		},
   181  		{
   182  			"employees",
   183  			map[string][]types.Value{
   184  				"employees": {
   185  					mustTuple(types.NewTuple(vs.Format(), types.String("Hendriks"), types.String("Brian"))),
   186  					mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.Int(39))),
   187  					mustTuple(types.NewTuple(vs.Format(), types.String("Sehn"), types.String("Timothy"))),
   188  					mustTuple(types.NewTuple(vs.Format(), types.String("CEO"), types.Int(39))),
   189  					mustTuple(types.NewTuple(vs.Format(), types.String("Son"), types.String("Aaron"))),
   190  					mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.Int(36))),
   191  				},
   192  			},
   193  			map[string][]types.Value{},
   194  			[]string{},
   195  		},
   196  		{
   197  			"ip to country",
   198  			map[string][]types.Value{
   199  				"ip_to_country": {
   200  					types.String("5.183.230.1"), types.String("BZ"),
   201  					types.String("5.180.188.1"), types.String("AU"),
   202  					types.String("2.56.9.244"), types.String("GB"),
   203  					types.String("20.175.7.56"), types.String("US"),
   204  				},
   205  			},
   206  			map[string][]types.Value{},
   207  			[]string{},
   208  		},
   209  		{
   210  			"more ips",
   211  			map[string][]types.Value{
   212  				"ip_to_country": {
   213  					types.String("20.175.193.85"), types.String("US"),
   214  					types.String("5.196.110.191"), types.String("FR"),
   215  					types.String("4.14.242.160"), types.String("CA"),
   216  				},
   217  			},
   218  			map[string][]types.Value{},
   219  			[]string{},
   220  		},
   221  		{
   222  			"more employees",
   223  			map[string][]types.Value{
   224  				"employees": {
   225  					mustTuple(types.NewTuple(vs.Format(), types.String("Jesuele"), types.String("Matt"))),
   226  					mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.NullValue)),
   227  					mustTuple(types.NewTuple(vs.Format(), types.String("Wilkins"), types.String("Daylon"))),
   228  					mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.NullValue)),
   229  					mustTuple(types.NewTuple(vs.Format(), types.String("Katie"), types.String("McCulloch"))),
   230  					mustTuple(types.NewTuple(vs.Format(), types.String("Software Engineer"), types.NullValue)),
   231  				},
   232  			},
   233  			map[string][]types.Value{},
   234  			[]string{},
   235  		},
   236  		{
   237  			"delete ips table",
   238  			map[string][]types.Value{},
   239  			map[string][]types.Value{},
   240  			[]string{"ip_to_country"},
   241  		},
   242  		{
   243  			"delete some employees",
   244  			map[string][]types.Value{},
   245  			map[string][]types.Value{
   246  				"employees": {
   247  					mustTuple(types.NewTuple(vs.Format(), types.String("Hendriks"), types.String("Brian"))),
   248  					mustTuple(types.NewTuple(vs.Format(), types.String("Sehn"), types.String("Timothy"))),
   249  					mustTuple(types.NewTuple(vs.Format(), types.String("Son"), types.String("Aaron"))),
   250  				},
   251  			},
   252  			[]string{},
   253  		},
   254  	}
   255  
   256  	ds, err := db.GetDataset(ctx, "ds")
   257  	require.NoError(t, err)
   258  	rootMap, err := types.NewMap(ctx, vs)
   259  	require.NoError(t, err)
   260  
   261  	var parent []hash.Hash
   262  	states := map[string]hash.Hash{}
   263  	for _, delta := range deltas {
   264  		for tbl, sets := range delta.sets {
   265  			rootMap, err = addTableValues(ctx, vs, rootMap, tbl, sets...)
   266  			require.NoError(t, err)
   267  		}
   268  
   269  		for tbl, dels := range delta.deletes {
   270  			rootMap, err = deleteTableValues(ctx, vs, rootMap, tbl, dels...)
   271  			require.NoError(t, err)
   272  		}
   273  
   274  		me := rootMap.Edit()
   275  		for _, tbl := range delta.tblDeletes {
   276  			me.Remove(types.String(tbl))
   277  		}
   278  		rootMap, err = me.Map(ctx)
   279  		require.NoError(t, err)
   280  
   281  		commitOpts := datas.CommitOptions{Parents: parent}
   282  		ds, err = db.Commit(ctx, ds, rootMap, commitOpts)
   283  		require.NoError(t, err)
   284  
   285  		dsAddr, ok := ds.MaybeHeadAddr()
   286  		require.True(t, ok)
   287  
   288  		parent = []hash.Hash{dsAddr}
   289  
   290  		states[delta.name] = dsAddr
   291  	}
   292  
   293  	tbl, err := makeABigTable(ctx, vs)
   294  	require.NoError(t, err)
   295  
   296  	tblRef, err := writeValAndGetRef(ctx, vs, tbl)
   297  	require.NoError(t, err)
   298  
   299  	me := rootMap.Edit()
   300  	me.Set(types.String("big_table"), tblRef)
   301  	rootMap, err = me.Map(ctx)
   302  	require.NoError(t, err)
   303  
   304  	commitOpts := datas.CommitOptions{Parents: parent}
   305  	ds, err = db.Commit(ctx, ds, rootMap, commitOpts)
   306  	require.NoError(t, err)
   307  
   308  	addr, ok := ds.MaybeHeadAddr()
   309  	require.True(t, ok)
   310  
   311  	states["add big table"] = addr
   312  
   313  	for k, rootAddr := range states {
   314  		t.Run(k, func(t *testing.T) {
   315  			statsCh := make(chan Stats, 16)
   316  			wg := new(sync.WaitGroup)
   317  			wg.Add(1)
   318  			go func() {
   319  				defer wg.Done()
   320  				for evt := range statsCh {
   321  					jsonBytes, err := json.Marshal(evt)
   322  					if err == nil {
   323  						t.Logf("stats: %s\n", string(jsonBytes))
   324  					}
   325  				}
   326  			}()
   327  
   328  			sinkvs, sinkdb := makeDB(ctx)
   329  			defer sinkdb.Close()
   330  
   331  			tmpDir := filepath.Join(os.TempDir(), uuid.New().String())
   332  			err = os.MkdirAll(tmpDir, os.ModePerm)
   333  			require.NoError(t, err)
   334  			waf, err := types.WalkAddrsForChunkStore(datas.ChunkStoreFromDatabase(db))
   335  			require.NoError(t, err)
   336  			plr, err := NewPuller(ctx, tmpDir, 128, datas.ChunkStoreFromDatabase(db), datas.ChunkStoreFromDatabase(sinkdb), waf, []hash.Hash{rootAddr}, statsCh)
   337  			require.NoError(t, err)
   338  
   339  			err = plr.Pull(ctx)
   340  			close(statsCh)
   341  			require.NoError(t, err)
   342  			wg.Wait()
   343  
   344  			sinkDS, err := sinkdb.GetDataset(ctx, "ds")
   345  			require.NoError(t, err)
   346  			sinkDS, err = sinkdb.FastForward(ctx, sinkDS, rootAddr, "")
   347  			require.NoError(t, err)
   348  
   349  			require.NoError(t, err)
   350  			sinkRootAddr, ok := sinkDS.MaybeHeadAddr()
   351  			require.True(t, ok)
   352  
   353  			eq, err := pullerAddrEquality(ctx, rootAddr, sinkRootAddr, vs, sinkvs)
   354  			require.NoError(t, err)
   355  			assert.True(t, eq)
   356  		})
   357  	}
   358  }
   359  
   360  func makeABigTable(ctx context.Context, vrw types.ValueReadWriter) (types.Map, error) {
   361  	m, err := types.NewMap(ctx, vrw)
   362  
   363  	if err != nil {
   364  		return types.EmptyMap, nil
   365  	}
   366  
   367  	me := m.Edit()
   368  
   369  	for i := 0; i < 256*1024; i++ {
   370  		tpl, err := types.NewTuple(vrw.Format(), types.UUID(uuid.New()), types.String(uuid.New().String()), types.Float(float64(i)))
   371  
   372  		if err != nil {
   373  			return types.EmptyMap, err
   374  		}
   375  
   376  		me.Set(types.Int(i), tpl)
   377  	}
   378  
   379  	return me.Map(ctx)
   380  }
   381  
   382  func pullerAddrEquality(ctx context.Context, expected, actual hash.Hash, src, sink types.ValueReadWriter) (bool, error) {
   383  	if expected != actual {
   384  		return false, nil
   385  	}
   386  
   387  	expectedVal, err := src.ReadValue(ctx, expected)
   388  	if err != nil {
   389  		return false, err
   390  	}
   391  	actualVal, err := sink.ReadValue(ctx, actual)
   392  	if err != nil {
   393  		return false, err
   394  	}
   395  
   396  	return expectedVal.Equals(actualVal), nil
   397  }
   398  
   399  func writeValAndGetRef(ctx context.Context, vrw types.ValueReadWriter, val types.Value) (types.Ref, error) {
   400  	valRef, err := types.NewRef(val, vrw.Format())
   401  
   402  	if err != nil {
   403  		return types.Ref{}, err
   404  	}
   405  
   406  	targetVal, err := valRef.TargetValue(ctx, vrw)
   407  
   408  	if err != nil {
   409  		return types.Ref{}, err
   410  	}
   411  
   412  	if targetVal == nil {
   413  		_, err = vrw.WriteValue(ctx, val)
   414  
   415  		if err != nil {
   416  			return types.Ref{}, err
   417  		}
   418  	}
   419  
   420  	return valRef, err
   421  }
   422  
   423  func mustTuple(val types.Tuple, err error) types.Tuple {
   424  	d.PanicIfError(err)
   425  	return val
   426  }