github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/merge/three_way.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package merge
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"fmt"
    28  
    29  	"github.com/dolthub/dolt/go/store/d"
    30  	"github.com/dolthub/dolt/go/store/types"
    31  )
    32  
    33  // Policy functors are used to merge two values (a and b) against a common
    34  // ancestor. All three Values and their must by wholly readable from vrw.
    35  // Whenever a change is merged, implementations should send a struct{} over
    36  // progress.
    37  type Policy func(ctx context.Context, a, b, ancestor types.Value, vrw types.ValueReadWriter, progress chan struct{}) (merged types.Value, err error)
    38  
    39  // ResolveFunc is the type for custom merge-conflict resolution callbacks.
    40  // When the merge algorithm encounters two non-mergeable changes (aChange and
    41  // bChange) at the same path, it calls the ResolveFunc passed into ThreeWay().
    42  // The callback gets the types of the two incompatible changes (added, changed
    43  // or removed) and the two Values that could not be merged (if any). If the
    44  // ResolveFunc cannot devise a resolution, ok should be false upon return and
    45  // the other return values are undefined. If the conflict can be resolved, the
    46  // function should return the appropriate type of change to apply, the new value
    47  // to be used (if any), and true.
    48  type ResolveFunc func(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool)
    49  
    50  // None is the no-op ResolveFunc. Any conflict results in a merge failure.
    51  func None(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) {
    52  	return change, merged, false
    53  }
    54  
    55  // Ours resolves conflicts by preferring changes from the Value currently being committed.
    56  func Ours(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) {
    57  	return aChange, a, true
    58  }
    59  
    60  // Theirs resolves conflicts by preferring changes in the current HEAD.
    61  func Theirs(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) {
    62  	return bChange, b, true
    63  }
    64  
    65  // ErrMergeConflict indicates that a merge attempt failed and must be resolved
    66  // manually for the provided reason.
    67  type ErrMergeConflict struct {
    68  	msg string
    69  }
    70  
    71  func (e *ErrMergeConflict) Error() string {
    72  	return e.msg
    73  }
    74  
    75  func newMergeConflict(format string, args ...interface{}) *ErrMergeConflict {
    76  	return &ErrMergeConflict{fmt.Sprintf(format, args...)}
    77  }
    78  
    79  // NewThreeWay creates a new Policy based on ThreeWay using the provided
    80  // ResolveFunc.
    81  func NewThreeWay(resolve ResolveFunc) Policy {
    82  	return func(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter, progress chan struct{}) (merged types.Value, err error) {
    83  		return ThreeWay(ctx, a, b, parent, vrw, resolve, progress)
    84  	}
    85  }
    86  
    87  // ThreeWay attempts a three-way merge between two _candidate_ values that
    88  // have both changed with respect to a common _parent_ value. The result of
    89  // the algorithm is a _merged_ value or an error if merging could not be done.
    90  //
    91  // The algorithm works recursively, applying the following rules for each value:
    92  //
    93  // - If any of the three values have a different [kind](link): conflict
    94  // - If the two candidates are identical: the result is that value
    95  // - If the values are primitives or Blob: conflict
    96  // - If the values are maps:
    97  //   - if the same key was inserted or updated in both candidates:
    98  //   - first run this same algorithm on those two values to attempt to merge them
    99  //   - if the two merged values are still different: conflict
   100  //   - if a key was inserted in one candidate and removed in the other: conflict
   101  //
   102  // - If the values are structs:
   103  //   - Same as map, except using field names instead of map keys
   104  //
   105  // - If the values are sets:
   106  //   - Apply the changes from both candidates to the parent to get the result. No conflicts are possible.
   107  //
   108  // - If the values are list:
   109  //   - Apply list-merge (see below)
   110  //
   111  // Merge rules for List are a bit more complex than Map, Struct, and Set due
   112  // to a wider away of potential use patterns. A List might be a de-facto Map
   113  // with sequential numeric keys, or it might be a sequence of objects where
   114  // order matters but the caller is unlikely to go back and update the value at
   115  // a given index. List modifications are expressed in terms of 'splices' (see
   116  // types/edit_distance.go). Roughly, a splice indicates that some number of
   117  // elements were added and/or removed at some index in |parent|. In the
   118  // following example:
   119  //
   120  // parent: [a, b, c, d]
   121  // a:      [b, c, d]
   122  // b:      [a, b, c, d, e]
   123  // merged: [b, c, d, e]
   124  //
   125  // The difference from parent -> is described by the splice {0, 1}, indicating
   126  // that 1 element was removed from parent at index 0. The difference from
   127  // parent -> b is described as {4, 0, e}, indicating that 0 elements were
   128  // removed at parent's index 4, and the element 'e' was added. Our merge
   129  // algorithm will successfully merge a and b, because these splices do not
   130  // overlap; that is, neither one removes the index at which the other
   131  // operates. As a general rule, the merge algorithm will refuse to merge
   132  // splices that overlap, as in the following examples:
   133  //
   134  // parent: [a, b, c]
   135  // a:      [a, d, b, c]
   136  // b:      [a, c]
   137  // merged: conflict
   138  //
   139  // parent: [a, b, c]
   140  // a:      [a, e, b, c]
   141  // b:      [a, d, b, c]
   142  // merged: conflict
   143  //
   144  // The splices in the first example are {1, 0, d} (remove 0 elements at index
   145  // 1 and add 'd') and {1, 1} (remove 1 element at index 1). Since the latter
   146  // removes the element at which the former adds an element, these splices
   147  // overlap. Similarly, in the second example, both splices operate at index 1
   148  // but add different elements. Thus, they also overlap.
   149  //
   150  // There is one special case for overlapping splices. If they perform the
   151  // exact same operation, the algorithm considers them not to be in conflict.
   152  // E.g.
   153  //
   154  // parent: [a, b, c]
   155  // a:      [a, d, e]
   156  // b:      [a, d, e]
   157  // merged: [a, d, e]
   158  func ThreeWay(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter, resolve ResolveFunc, progress chan struct{}) (merged types.Value, err error) {
   159  	describe := func(v types.Value) (string, error) {
   160  		if v != nil {
   161  			t, err := types.TypeOf(v)
   162  
   163  			if err != nil {
   164  				return "", err
   165  			}
   166  
   167  			str, err := t.Describe(ctx)
   168  
   169  			if err != nil {
   170  				return "", err
   171  			}
   172  
   173  			return str, nil
   174  		}
   175  
   176  		return "nil Value", nil
   177  	}
   178  
   179  	if a == nil && b == nil {
   180  		return parent, nil
   181  	} else if unmergeable(a, b) {
   182  		aDesc, err := describe(a)
   183  
   184  		if err != nil {
   185  			return nil, err
   186  		}
   187  
   188  		bDesc, err := describe(b)
   189  
   190  		if err != nil {
   191  			return nil, err
   192  		}
   193  
   194  		return parent, newMergeConflict("Cannot merge %s with %s.", aDesc, bDesc)
   195  	}
   196  
   197  	if resolve == nil {
   198  		resolve = None
   199  	}
   200  	m := &merger{vrw, resolve, progress}
   201  	return m.threeWay(ctx, a, b, parent, types.Path{})
   202  }
   203  
   204  // a and b cannot be merged if they are of different NomsKind, or if at least one of the two is nil, or if either is a Noms primitive.
   205  func unmergeable(a, b types.Value) bool {
   206  	if a != nil && b != nil {
   207  		aKind, bKind := a.Kind(), b.Kind()
   208  		return aKind != bKind || types.IsPrimitiveKind(aKind) || types.IsPrimitiveKind(bKind)
   209  	}
   210  	return true
   211  }
   212  
   213  type merger struct {
   214  	vrw      types.ValueReadWriter
   215  	resolve  ResolveFunc
   216  	progress chan<- struct{}
   217  }
   218  
   219  func updateProgress(progress chan<- struct{}) {
   220  	// TODO: Eventually we'll want more information than a single bit :).
   221  	if progress != nil {
   222  		progress <- struct{}{}
   223  	}
   224  }
   225  
   226  func (m *merger) threeWay(ctx context.Context, a, b, parent types.Value, path types.Path) (merged types.Value, err error) {
   227  	defer updateProgress(m.progress)
   228  
   229  	if a == nil || b == nil {
   230  		d.Panic("Merge candidates cannont be nil: a = %v, b = %v", a, b)
   231  	}
   232  
   233  	switch a.Kind() {
   234  	case types.ListKind:
   235  		if aList, bList, pList, ok, err := listAssert(ctx, m.vrw, a, b, parent); err != nil {
   236  			return nil, err
   237  		} else if ok {
   238  			return threeWayListMerge(ctx, aList, bList, pList)
   239  		}
   240  
   241  	case types.MapKind:
   242  		if aMap, bMap, pMap, ok, err := mapAssert(ctx, m.vrw, a, b, parent); err != nil {
   243  			return nil, err
   244  		} else if ok {
   245  			return m.threeWayMapMerge(ctx, aMap, bMap, pMap, path)
   246  		}
   247  
   248  	case types.RefKind:
   249  		if aValue, bValue, pValue, ok, err := refAssert(ctx, a, b, parent, m.vrw); err != nil {
   250  			return nil, err
   251  		} else if ok {
   252  			merged, err := m.threeWay(ctx, aValue, bValue, pValue, path)
   253  			if err != nil {
   254  				return parent, err
   255  			}
   256  			return m.vrw.WriteValue(ctx, merged)
   257  		}
   258  
   259  	case types.SetKind:
   260  		if aSet, bSet, pSet, ok, err := setAssert(ctx, m.vrw, a, b, parent); err != nil {
   261  			return nil, err
   262  
   263  		} else if ok {
   264  			return m.threeWaySetMerge(ctx, aSet, bSet, pSet, path)
   265  		}
   266  
   267  	case types.StructKind:
   268  		if aStruct, bStruct, pStruct, ok, err := structAssert(a, b, parent); err != nil {
   269  			return nil, err
   270  		} else if ok {
   271  			return m.threeWayStructMerge(ctx, aStruct, bStruct, pStruct, path)
   272  		}
   273  	}
   274  
   275  	pDescription := "<nil>"
   276  	if parent != nil {
   277  		t, err := types.TypeOf(parent)
   278  
   279  		if err != nil {
   280  			return nil, err
   281  		}
   282  
   283  		pDescription, err = t.Describe(ctx)
   284  
   285  		if err != nil {
   286  			return nil, err
   287  		}
   288  	}
   289  
   290  	aType, err := types.TypeOf(a)
   291  	bType, err := types.TypeOf(b)
   292  	aDesc, err := aType.Describe(ctx)
   293  	bDesc, err := bType.Describe(ctx)
   294  
   295  	return parent, newMergeConflict("Cannot merge %s and %s on top of %s.", aDesc, bDesc, pDescription)
   296  }
   297  
   298  func (m *merger) threeWayMapMerge(ctx context.Context, a, b, parent types.Map, path types.Path) (merged types.Value, err error) {
   299  	apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) {
   300  		defer updateProgress(m.progress)
   301  		switch change.ChangeType {
   302  		case types.DiffChangeAdded, types.DiffChangeModified:
   303  			res, err := target.getValue().(types.Map).Edit().Set(change.Key, newVal).Map(ctx)
   304  
   305  			if err != nil {
   306  				return nil, err
   307  			}
   308  
   309  			return mapCandidate{res}, nil
   310  		case types.DiffChangeRemoved:
   311  			res, err := target.getValue().(types.Map).Edit().Remove(change.Key).Map(ctx)
   312  
   313  			if err != nil {
   314  				return nil, err
   315  			}
   316  
   317  			return mapCandidate{res}, nil
   318  		default:
   319  			panic("Not Reached")
   320  		}
   321  	}
   322  	return m.threeWayOrderedSequenceMerge(ctx, mapCandidate{a}, mapCandidate{b}, mapCandidate{parent}, apply, path)
   323  }
   324  
   325  func (m *merger) threeWaySetMerge(ctx context.Context, a, b, parent types.Set, path types.Path) (merged types.Value, err error) {
   326  	apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) {
   327  		defer updateProgress(m.progress)
   328  		switch change.ChangeType {
   329  		case types.DiffChangeAdded, types.DiffChangeModified:
   330  			se, err := target.getValue().(types.Set).Edit().Insert(ctx, newVal)
   331  
   332  			if err != nil {
   333  				return nil, err
   334  			}
   335  
   336  			s, err := se.Set(ctx)
   337  
   338  			if err != nil {
   339  				return nil, err
   340  			}
   341  
   342  			return setCandidate{s}, nil
   343  		case types.DiffChangeRemoved:
   344  			se, err := target.getValue().(types.Set).Edit().Remove(ctx, newVal)
   345  
   346  			if err != nil {
   347  				return nil, err
   348  			}
   349  
   350  			s, err := se.Set(ctx)
   351  
   352  			if err != nil {
   353  				return nil, err
   354  			}
   355  
   356  			return setCandidate{s}, nil
   357  		default:
   358  			panic("Not Reached")
   359  		}
   360  	}
   361  	return m.threeWayOrderedSequenceMerge(ctx, setCandidate{a}, setCandidate{b}, setCandidate{parent}, apply, path)
   362  }
   363  
   364  func (m *merger) threeWayStructMerge(ctx context.Context, a, b, parent types.Struct, path types.Path) (merged types.Value, err error) {
   365  	apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) {
   366  		defer updateProgress(m.progress)
   367  		// Right now, this always iterates over all fields to create a new Struct, because there's no API for adding/removing a field from an existing struct type.
   368  		targetVal := target.getValue().(types.Struct)
   369  		if f, ok := change.Key.(types.String); ok {
   370  			field := string(f)
   371  			data := types.StructData{}
   372  			_ = targetVal.IterFields(func(name string, v types.Value) error {
   373  				if name != field {
   374  					data[name] = v
   375  				}
   376  
   377  				return nil
   378  			})
   379  			if change.ChangeType == types.DiffChangeAdded || change.ChangeType == types.DiffChangeModified {
   380  				data[field] = newVal
   381  			}
   382  
   383  			st, err := types.NewStruct(m.vrw.Format(), targetVal.Name(), data)
   384  
   385  			if err != nil {
   386  				return nil, err
   387  			}
   388  
   389  			return structCandidate{st}, nil
   390  		}
   391  
   392  		return nil, errors.New("bad key type in diff")
   393  	}
   394  	return m.threeWayOrderedSequenceMerge(ctx, structCandidate{a}, structCandidate{b}, structCandidate{parent}, apply, path)
   395  }
   396  
   397  func listAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aList, bList, pList types.List, ok bool, err error) {
   398  	var aOk, bOk, pOk bool
   399  	aList, aOk = a.(types.List)
   400  	bList, bOk = b.(types.List)
   401  	if parent != nil {
   402  		pList, pOk = parent.(types.List)
   403  	} else {
   404  		pList, err = types.NewList(ctx, vrw)
   405  
   406  		if err != nil {
   407  			return types.EmptyList, types.EmptyList, types.EmptyList, false, err
   408  		}
   409  
   410  		pOk = true
   411  	}
   412  
   413  	return aList, bList, pList, aOk && bOk && pOk, nil
   414  }
   415  
   416  func mapAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aMap, bMap, pMap types.Map, ok bool, err error) {
   417  	var aOk, bOk, pOk bool
   418  	aMap, aOk = a.(types.Map)
   419  	bMap, bOk = b.(types.Map)
   420  	if parent != nil {
   421  		pMap, pOk = parent.(types.Map)
   422  	} else {
   423  		pMap, err = types.NewMap(ctx, vrw)
   424  
   425  		if err != nil {
   426  			return types.EmptyMap, types.EmptyMap, types.EmptyMap, false, err
   427  		}
   428  
   429  		pOk = true
   430  	}
   431  
   432  	return aMap, bMap, pMap, aOk && bOk && pOk, nil
   433  }
   434  
   435  func refAssert(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter) (aValue, bValue, pValue types.Value, ok bool, err error) {
   436  	var aOk, bOk, pOk bool
   437  	var aRef, bRef, pRef types.Ref
   438  	aRef, aOk = a.(types.Ref)
   439  	bRef, bOk = b.(types.Ref)
   440  	if !aOk || !bOk {
   441  		return
   442  	}
   443  
   444  	aValue, err = aRef.TargetValue(ctx, vrw)
   445  
   446  	if err != nil {
   447  		return nil, nil, nil, false, err
   448  	}
   449  
   450  	bValue, err = bRef.TargetValue(ctx, vrw)
   451  
   452  	if err != nil {
   453  		return nil, nil, nil, false, err
   454  	}
   455  
   456  	if parent != nil {
   457  		if pRef, pOk = parent.(types.Ref); pOk {
   458  			pValue, err = pRef.TargetValue(ctx, vrw)
   459  
   460  			if err != nil {
   461  				return nil, nil, nil, false, err
   462  			}
   463  
   464  		}
   465  	} else {
   466  		pOk = true // parent == nil is still OK. It just leaves pValue as nil.
   467  	}
   468  	return aValue, bValue, pValue, aOk && bOk && pOk, nil
   469  }
   470  
   471  func setAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aSet, bSet, pSet types.Set, ok bool, err error) {
   472  	var aOk, bOk, pOk bool
   473  	aSet, aOk = a.(types.Set)
   474  	bSet, bOk = b.(types.Set)
   475  	if parent != nil {
   476  		pSet, pOk = parent.(types.Set)
   477  	} else {
   478  		pSet, err = types.NewSet(ctx, vrw)
   479  
   480  		if err != nil {
   481  			return types.EmptySet, types.EmptySet, types.EmptySet, false, err
   482  		}
   483  
   484  		pOk = true
   485  	}
   486  
   487  	return aSet, bSet, pSet, aOk && bOk && pOk, nil
   488  }
   489  
   490  func structAssert(a, b, parent types.Value) (aStruct, bStruct, pStruct types.Struct, ok bool, err error) {
   491  	var aOk, bOk, pOk bool
   492  	aStruct, aOk = a.(types.Struct)
   493  	bStruct, bOk = b.(types.Struct)
   494  	if aOk && bOk {
   495  		if aStruct.Name() == bStruct.Name() {
   496  			if parent != nil {
   497  				pStruct, pOk = parent.(types.Struct)
   498  			} else {
   499  				pStruct, err = types.NewStruct(aStruct.Format(), aStruct.Name(), nil)
   500  
   501  				if err != nil {
   502  					es := types.EmptyStruct(aStruct.Format())
   503  					return es, es, es, false, err
   504  				}
   505  
   506  				pOk = true
   507  			}
   508  
   509  			return aStruct, bStruct, pStruct, pOk, err
   510  		}
   511  	}
   512  
   513  	return
   514  }