github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/diff/async_differ_test.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package diff
    16  
    17  import (
    18  	"context"
    19  	"testing"
    20  	"time"
    21  
    22  	"github.com/stretchr/testify/assert"
    23  	"github.com/stretchr/testify/require"
    24  
    25  	dtu "github.com/dolthub/dolt/go/libraries/doltcore/dtestutils"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    27  	"github.com/dolthub/dolt/go/store/chunks"
    28  	"github.com/dolthub/dolt/go/store/constants"
    29  	"github.com/dolthub/dolt/go/store/types"
    30  )
    31  
    32  func TestAsyncDiffer(t *testing.T) {
    33  	ctx := context.Background()
    34  	storage := &chunks.MemoryStorage{}
    35  	vrw := types.NewValueStore(storage.NewView())
    36  
    37  	vals := []types.Value{
    38  		types.Uint(0), types.String("a"),
    39  		types.Uint(1), types.String("b"),
    40  		types.Uint(3), types.String("d"),
    41  		types.Uint(4), types.String("e"),
    42  		types.Uint(6), types.String("g"),
    43  		types.Uint(7), types.String("h"),
    44  		types.Uint(9), types.String("j"),
    45  		types.Uint(10), types.String("k"),
    46  		types.Uint(12), types.String("m"),
    47  		types.Uint(13), types.String("n"),
    48  		types.Uint(15), types.String("p"),
    49  		types.Uint(16), types.String("q"),
    50  		types.Uint(18), types.String("s"),
    51  		types.Uint(19), types.String("t"),
    52  		types.Uint(21), types.String("v"),
    53  		types.Uint(22), types.String("w"),
    54  		types.Uint(24), types.String("y"),
    55  		types.Uint(25), types.String("z"),
    56  	}
    57  
    58  	m1, err := types.NewMap(ctx, vrw, vals...)
    59  	require.NoError(t, err)
    60  
    61  	vals = []types.Value{
    62  		types.Uint(0), types.String("a"), // unchanged
    63  		//types.Uint(1), types.String("b"),		// deleted
    64  		types.Uint(2), types.String("c"), // added
    65  		types.Uint(3), types.String("d"), // unchanged
    66  		//types.Uint(4), types.String("e"),		// deleted
    67  		types.Uint(5), types.String("f"), // added
    68  		types.Uint(6), types.String("g"), // unchanged
    69  		//types.Uint(7), types.String("h"),		// deleted
    70  		types.Uint(8), types.String("i"), // added
    71  		types.Uint(9), types.String("j"), // unchanged
    72  		//types.Uint(10), types.String("k"),	// deleted
    73  		types.Uint(11), types.String("l"), // added
    74  		types.Uint(12), types.String("m2"), // changed
    75  		//types.Uint(13), types.String("n"),	// deleted
    76  		types.Uint(14), types.String("o"), // added
    77  		types.Uint(15), types.String("p2"), // changed
    78  		//types.Uint(16), types.String("q"),	// deleted
    79  		types.Uint(17), types.String("r"), // added
    80  		types.Uint(18), types.String("s2"), // changed
    81  		//types.Uint(19), types.String("t"),	// deleted
    82  		types.Uint(20), types.String("u"), // added
    83  		types.Uint(21), types.String("v2"), // changed
    84  		//types.Uint(22), types.String("w"),	// deleted
    85  		types.Uint(23), types.String("x"), // added
    86  		types.Uint(24), types.String("y2"), // changed
    87  		//types.Uint(25), types.String("z"),	// deleted
    88  	}
    89  	m2, err := types.NewMap(ctx, vrw, vals...)
    90  	require.NoError(t, err)
    91  
    92  	tests := []struct {
    93  		name           string
    94  		createdStarted func(ctx context.Context, m1, m2 types.Map) *AsyncDiffer
    95  		expectedStats  map[types.DiffChangeType]uint64
    96  	}{
    97  		{
    98  			name: "iter all",
    99  			createdStarted: func(ctx context.Context, m1, m2 types.Map) *AsyncDiffer {
   100  				ad := NewAsyncDiffer(4)
   101  				ad.Start(ctx, m1, m2)
   102  				return ad
   103  			},
   104  			expectedStats: map[types.DiffChangeType]uint64{
   105  				types.DiffChangeModified: 5,
   106  				types.DiffChangeAdded:    8,
   107  				types.DiffChangeRemoved:  9,
   108  			},
   109  		},
   110  
   111  		{
   112  			name: "iter range starting with nil",
   113  			createdStarted: func(ctx context.Context, m1, m2 types.Map) *AsyncDiffer {
   114  				ad := NewAsyncDiffer(4)
   115  				ad.StartWithRange(ctx, m1, m2, nil, func(ctx context.Context, value types.Value) (bool, bool, error) {
   116  					return true, false, nil
   117  				})
   118  				return ad
   119  			},
   120  			expectedStats: map[types.DiffChangeType]uint64{
   121  				types.DiffChangeModified: 5,
   122  				types.DiffChangeAdded:    8,
   123  				types.DiffChangeRemoved:  9,
   124  			},
   125  		},
   126  
   127  		{
   128  			name: "iter range staring with Null Value",
   129  			createdStarted: func(ctx context.Context, m1, m2 types.Map) *AsyncDiffer {
   130  				ad := NewAsyncDiffer(4)
   131  				ad.StartWithRange(ctx, m1, m2, types.NullValue, func(ctx context.Context, value types.Value) (bool, bool, error) {
   132  					return true, false, nil
   133  				})
   134  				return ad
   135  			},
   136  			expectedStats: map[types.DiffChangeType]uint64{
   137  				types.DiffChangeModified: 5,
   138  				types.DiffChangeAdded:    8,
   139  				types.DiffChangeRemoved:  9,
   140  			},
   141  		},
   142  
   143  		{
   144  			name: "iter range less than 17",
   145  			createdStarted: func(ctx context.Context, m1, m2 types.Map) *AsyncDiffer {
   146  				ad := NewAsyncDiffer(4)
   147  				end := types.Uint(27)
   148  				ad.StartWithRange(ctx, m1, m2, types.NullValue, func(ctx context.Context, value types.Value) (bool, bool, error) {
   149  					valid, err := value.Less(ctx, vrw.Format(), end)
   150  					return valid, false, err
   151  				})
   152  				return ad
   153  			},
   154  			expectedStats: map[types.DiffChangeType]uint64{
   155  				types.DiffChangeModified: 5,
   156  				types.DiffChangeAdded:    8,
   157  				types.DiffChangeRemoved:  9,
   158  			},
   159  		},
   160  
   161  		{
   162  			name: "iter range less than 15",
   163  			createdStarted: func(ctx context.Context, m1, m2 types.Map) *AsyncDiffer {
   164  				ad := NewAsyncDiffer(4)
   165  				end := types.Uint(15)
   166  				ad.StartWithRange(ctx, m1, m2, types.NullValue, func(ctx context.Context, value types.Value) (bool, bool, error) {
   167  					valid, err := value.Less(ctx, vrw.Format(), end)
   168  					return valid, false, err
   169  				})
   170  				return ad
   171  			},
   172  			expectedStats: map[types.DiffChangeType]uint64{
   173  				types.DiffChangeModified: 1,
   174  				types.DiffChangeAdded:    5,
   175  				types.DiffChangeRemoved:  5,
   176  			},
   177  		},
   178  
   179  		{
   180  			name: "iter range 10 < 15",
   181  			createdStarted: func(ctx context.Context, m1, m2 types.Map) *AsyncDiffer {
   182  				ad := NewAsyncDiffer(4)
   183  				start := types.Uint(10)
   184  				end := types.Uint(15)
   185  				ad.StartWithRange(ctx, m1, m2, start, func(ctx context.Context, value types.Value) (bool, bool, error) {
   186  					valid, err := value.Less(ctx, vrw.Format(), end)
   187  					return valid, false, err
   188  				})
   189  				return ad
   190  			},
   191  			expectedStats: map[types.DiffChangeType]uint64{
   192  				types.DiffChangeModified: 1,
   193  				types.DiffChangeAdded:    2,
   194  				types.DiffChangeRemoved:  2,
   195  			},
   196  		},
   197  	}
   198  
   199  	for _, test := range tests {
   200  		t.Run(test.name, func(t *testing.T) {
   201  			ctx := context.Background()
   202  			ad := test.createdStarted(ctx, m1, m2)
   203  			err := readAll(ad)
   204  			require.NoError(t, err)
   205  			require.Equal(t, test.expectedStats, ad.diffStats)
   206  		})
   207  	}
   208  
   209  	t.Run("can close without reading all", func(t *testing.T) {
   210  		ad := NewAsyncDiffer(1)
   211  		ad.Start(ctx, m1, m2)
   212  		res, more, err := ad.GetDiffs(1, -1)
   213  		require.NoError(t, err)
   214  		assert.True(t, more)
   215  		assert.Len(t, res, 1)
   216  		err = ad.Close()
   217  		assert.NoError(t, err)
   218  	})
   219  
   220  	t.Run("can filter based on change type", func(t *testing.T) {
   221  		ad := NewAsyncDiffer(20)
   222  		ad.Start(ctx, m1, m2)
   223  		res, more, err := ad.GetDiffs(10, -1)
   224  		require.NoError(t, err)
   225  		assert.True(t, more)
   226  		assert.Len(t, res, 10)
   227  		err = ad.Close()
   228  		assert.NoError(t, err)
   229  
   230  		ad = NewAsyncDiffer(20)
   231  		ad.Start(ctx, m1, m2)
   232  		res, more, err = ad.GetDiffsWithFilter(10, 20*time.Second, types.DiffChangeModified)
   233  		require.NoError(t, err)
   234  		assert.False(t, more)
   235  		assert.Len(t, res, 5)
   236  		err = ad.Close()
   237  		assert.NoError(t, err)
   238  
   239  		ad = NewAsyncDiffer(20)
   240  		ad.Start(ctx, m1, m2)
   241  		res, more, err = ad.GetDiffsWithFilter(6, -1, types.DiffChangeAdded)
   242  		require.NoError(t, err)
   243  		assert.True(t, more)
   244  		assert.Len(t, res, 6)
   245  		err = ad.Close()
   246  		assert.NoError(t, err)
   247  	})
   248  
   249  	k1Row1Vals := []types.Value{c1Tag, types.Uint(3), c2Tag, types.String("d")}
   250  	k1Vals, err := getKeylessRow(ctx, k1Row1Vals)
   251  	assert.NoError(t, err)
   252  	k1, err := types.NewMap(ctx, vrw, k1Vals...)
   253  	assert.NoError(t, err)
   254  
   255  	// Delete one row, add two rows
   256  	k2Row1Vals := []types.Value{c1Tag, types.Uint(4), c2Tag, types.String("d")}
   257  	k2Vals1, err := getKeylessRow(ctx, k2Row1Vals)
   258  	assert.NoError(t, err)
   259  	k2Row2Vals := []types.Value{c1Tag, types.Uint(1), c2Tag, types.String("e")}
   260  	k2Vals2, err := getKeylessRow(ctx, k2Row2Vals)
   261  	assert.NoError(t, err)
   262  	k2Vals := append(k2Vals1, k2Vals2...)
   263  	k2, err := types.NewMap(ctx, vrw, k2Vals...)
   264  	require.NoError(t, err)
   265  
   266  	t.Run("can diff and filter keyless tables", func(t *testing.T) {
   267  		kd := &keylessDiffer{AsyncDiffer: NewAsyncDiffer(20)}
   268  		kd.Start(ctx, k1, k2)
   269  		res, more, err := kd.GetDiffs(10, 20*time.Second)
   270  		require.NoError(t, err)
   271  		assert.False(t, more)
   272  		assert.Len(t, res, 3)
   273  		err = kd.Close()
   274  		assert.NoError(t, err)
   275  
   276  		kd = &keylessDiffer{AsyncDiffer: NewAsyncDiffer(20)}
   277  		kd.Start(ctx, k1, k2)
   278  		res, more, err = kd.GetDiffsWithFilter(10, 20*time.Second, types.DiffChangeModified)
   279  		require.NoError(t, err)
   280  		assert.False(t, more)
   281  		assert.Len(t, res, 0)
   282  		err = kd.Close()
   283  		assert.NoError(t, err)
   284  
   285  		kd = &keylessDiffer{AsyncDiffer: NewAsyncDiffer(20)}
   286  		kd.Start(ctx, k1, k2)
   287  		res, more, err = kd.GetDiffsWithFilter(6, -1, types.DiffChangeAdded)
   288  		require.NoError(t, err)
   289  		assert.False(t, more)
   290  		assert.Len(t, res, 2)
   291  		err = kd.Close()
   292  		assert.NoError(t, err)
   293  	})
   294  }
   295  
   296  func readAll(ad *AsyncDiffer) error {
   297  	for {
   298  		_, more, err := ad.GetDiffs(10, -1)
   299  
   300  		if err != nil {
   301  			return err
   302  		}
   303  
   304  		if !more {
   305  			break
   306  		}
   307  	}
   308  
   309  	return nil
   310  }
   311  
   312  var c1Tag = types.Uint(1)
   313  var c2Tag = types.Uint(2)
   314  var cardTag = types.Uint(schema.KeylessRowCardinalityTag)
   315  var rowIdTag = types.Uint(schema.KeylessRowIdTag)
   316  
   317  func getKeylessRow(ctx context.Context, vals []types.Value) ([]types.Value, error) {
   318  	nbf, err := types.GetFormatForVersionString(constants.FormatDefaultString)
   319  	if err != nil {
   320  		return []types.Value{}, err
   321  	}
   322  
   323  	id1, err := types.UUIDHashedFromValues(nbf, vals...)
   324  	if err != nil {
   325  		return []types.Value{}, err
   326  	}
   327  
   328  	prefix := []types.Value{
   329  		cardTag,
   330  		types.Uint(1),
   331  	}
   332  	vals = append(prefix, vals...)
   333  
   334  	return []types.Value{
   335  		dtu.MustTuple(rowIdTag, id1),
   336  		dtu.MustTuple(vals...),
   337  	}, nil
   338  }