github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/merge/three_way.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package merge
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"fmt"
    28  
    29  	"github.com/dolthub/dolt/go/store/d"
    30  	"github.com/dolthub/dolt/go/store/types"
    31  )
    32  
    33  // Policy functors are used to merge two values (a and b) against a common
    34  // ancestor. All three Values and their must by wholly readable from vrw.
    35  // Whenever a change is merged, implementations should send a struct{} over
    36  // progress.
    37  type Policy func(ctx context.Context, a, b, ancestor types.Value, vrw types.ValueReadWriter, progress chan struct{}) (merged types.Value, err error)
    38  
    39  // ResolveFunc is the type for custom merge-conflict resolution callbacks.
    40  // When the merge algorithm encounters two non-mergeable changes (aChange and
    41  // bChange) at the same path, it calls the ResolveFunc passed into ThreeWay().
    42  // The callback gets the types of the two incompatible changes (added, changed
    43  // or removed) and the two Values that could not be merged (if any). If the
    44  // ResolveFunc cannot devise a resolution, ok should be false upon return and
    45  // the other return values are undefined. If the conflict can be resolved, the
    46  // function should return the appropriate type of change to apply, the new value
    47  // to be used (if any), and true.
    48  type ResolveFunc func(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool)
    49  
    50  // None is the no-op ResolveFunc. Any conflict results in a merge failure.
    51  func None(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) {
    52  	return change, merged, false
    53  }
    54  
    55  // Ours resolves conflicts by preferring changes from the Value currently being committed.
    56  func Ours(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) {
    57  	return aChange, a, true
    58  }
    59  
    60  // Theirs resolves conflicts by preferring changes in the current HEAD.
    61  func Theirs(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) {
    62  	return bChange, b, true
    63  }
    64  
    65  // ErrMergeConflict indicates that a merge attempt failed and must be resolved
    66  // manually for the provided reason.
    67  type ErrMergeConflict struct {
    68  	msg string
    69  }
    70  
    71  func (e *ErrMergeConflict) Error() string {
    72  	return e.msg
    73  }
    74  
    75  func newMergeConflict(format string, args ...interface{}) *ErrMergeConflict {
    76  	return &ErrMergeConflict{fmt.Sprintf(format, args...)}
    77  }
    78  
    79  // NewThreeWay creates a new Policy based on ThreeWay using the provided
    80  // ResolveFunc.
    81  func NewThreeWay(resolve ResolveFunc) Policy {
    82  	return func(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter, progress chan struct{}) (merged types.Value, err error) {
    83  		return ThreeWay(ctx, a, b, parent, vrw, resolve, progress)
    84  	}
    85  }
    86  
    87  // ThreeWay attempts a three-way merge between two _candidate_ values that
    88  // have both changed with respect to a common _parent_ value. The result of
    89  // the algorithm is a _merged_ value or an error if merging could not be done.
    90  //
    91  // The algorithm works recursively, applying the following rules for each value:
    92  //
    93  // - If any of the three values have a different [kind](link): conflict
    94  // - If the two candidates are identical: the result is that value
    95  // - If the values are primitives or Blob: conflict
    96  // - If the values are maps:
    97  //   - if the same key was inserted or updated in both candidates:
    98  //     - first run this same algorithm on those two values to attempt to merge them
    99  //     - if the two merged values are still different: conflict
   100  //   - if a key was inserted in one candidate and removed in the other: conflict
   101  // - If the values are structs:
   102  //   - Same as map, except using field names instead of map keys
   103  // - If the values are sets:
   104  //   - Apply the changes from both candidates to the parent to get the result. No conflicts are possible.
   105  // - If the values are list:
   106  //   - Apply list-merge (see below)
   107  //
   108  // Merge rules for List are a bit more complex than Map, Struct, and Set due
   109  // to a wider away of potential use patterns. A List might be a de-facto Map
   110  // with sequential numeric keys, or it might be a sequence of objects where
   111  // order matters but the caller is unlikely to go back and update the value at
   112  // a given index. List modifications are expressed in terms of 'splices' (see
   113  // types/edit_distance.go). Roughly, a splice indicates that some number of
   114  // elements were added and/or removed at some index in |parent|. In the
   115  // following example:
   116  //
   117  // parent: [a, b, c, d]
   118  // a:      [b, c, d]
   119  // b:      [a, b, c, d, e]
   120  // merged: [b, c, d, e]
   121  //
   122  // The difference from parent -> is described by the splice {0, 1}, indicating
   123  // that 1 element was removed from parent at index 0. The difference from
   124  // parent -> b is described as {4, 0, e}, indicating that 0 elements were
   125  // removed at parent's index 4, and the element 'e' was added. Our merge
   126  // algorithm will successfully merge a and b, because these splices do not
   127  // overlap; that is, neither one removes the index at which the other
   128  // operates. As a general rule, the merge algorithm will refuse to merge
   129  // splices that overlap, as in the following examples:
   130  //
   131  // parent: [a, b, c]
   132  // a:      [a, d, b, c]
   133  // b:      [a, c]
   134  // merged: conflict
   135  //
   136  // parent: [a, b, c]
   137  // a:      [a, e, b, c]
   138  // b:      [a, d, b, c]
   139  // merged: conflict
   140  //
   141  // The splices in the first example are {1, 0, d} (remove 0 elements at index
   142  // 1 and add 'd') and {1, 1} (remove 1 element at index 1). Since the latter
   143  // removes the element at which the former adds an element, these splices
   144  // overlap. Similarly, in the second example, both splices operate at index 1
   145  // but add different elements. Thus, they also overlap.
   146  //
   147  // There is one special case for overlapping splices. If they perform the
   148  // exact same operation, the algorithm considers them not to be in conflict.
   149  // E.g.
   150  //
   151  // parent: [a, b, c]
   152  // a:      [a, d, e]
   153  // b:      [a, d, e]
   154  // merged: [a, d, e]
   155  func ThreeWay(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter, resolve ResolveFunc, progress chan struct{}) (merged types.Value, err error) {
   156  	describe := func(v types.Value) (string, error) {
   157  		if v != nil {
   158  			t, err := types.TypeOf(v)
   159  
   160  			if err != nil {
   161  				return "", err
   162  			}
   163  
   164  			str, err := t.Describe(ctx)
   165  
   166  			if err != nil {
   167  				return "", err
   168  			}
   169  
   170  			return str, nil
   171  		}
   172  
   173  		return "nil Value", nil
   174  	}
   175  
   176  	if a == nil && b == nil {
   177  		return parent, nil
   178  	} else if unmergeable(a, b) {
   179  		aDesc, err := describe(a)
   180  
   181  		if err != nil {
   182  			return nil, err
   183  		}
   184  
   185  		bDesc, err := describe(b)
   186  
   187  		if err != nil {
   188  			return nil, err
   189  		}
   190  
   191  		return parent, newMergeConflict("Cannot merge %s with %s.", aDesc, bDesc)
   192  	}
   193  
   194  	if resolve == nil {
   195  		resolve = None
   196  	}
   197  	m := &merger{vrw, resolve, progress}
   198  	return m.threeWay(ctx, a, b, parent, types.Path{})
   199  }
   200  
   201  // a and b cannot be merged if they are of different NomsKind, or if at least one of the two is nil, or if either is a Noms primitive.
   202  func unmergeable(a, b types.Value) bool {
   203  	if a != nil && b != nil {
   204  		aKind, bKind := a.Kind(), b.Kind()
   205  		return aKind != bKind || types.IsPrimitiveKind(aKind) || types.IsPrimitiveKind(bKind)
   206  	}
   207  	return true
   208  }
   209  
   210  type merger struct {
   211  	vrw      types.ValueReadWriter
   212  	resolve  ResolveFunc
   213  	progress chan<- struct{}
   214  }
   215  
   216  func updateProgress(progress chan<- struct{}) {
   217  	// TODO: Eventually we'll want more information than a single bit :).
   218  	if progress != nil {
   219  		progress <- struct{}{}
   220  	}
   221  }
   222  
   223  func (m *merger) threeWay(ctx context.Context, a, b, parent types.Value, path types.Path) (merged types.Value, err error) {
   224  	defer updateProgress(m.progress)
   225  
   226  	if a == nil || b == nil {
   227  		d.Panic("Merge candidates cannont be nil: a = %v, b = %v", a, b)
   228  	}
   229  
   230  	switch a.Kind() {
   231  	case types.ListKind:
   232  		if aList, bList, pList, ok, err := listAssert(ctx, m.vrw, a, b, parent); err != nil {
   233  			return nil, err
   234  		} else if ok {
   235  			return threeWayListMerge(ctx, aList, bList, pList)
   236  		}
   237  
   238  	case types.MapKind:
   239  		if aMap, bMap, pMap, ok, err := mapAssert(ctx, m.vrw, a, b, parent); err != nil {
   240  			return nil, err
   241  		} else if ok {
   242  			return m.threeWayMapMerge(ctx, aMap, bMap, pMap, path)
   243  		}
   244  
   245  	case types.RefKind:
   246  		if aValue, bValue, pValue, ok, err := refAssert(ctx, a, b, parent, m.vrw); err != nil {
   247  			return nil, err
   248  		} else if ok {
   249  			merged, err := m.threeWay(ctx, aValue, bValue, pValue, path)
   250  			if err != nil {
   251  				return parent, err
   252  			}
   253  			return m.vrw.WriteValue(ctx, merged)
   254  		}
   255  
   256  	case types.SetKind:
   257  		if aSet, bSet, pSet, ok, err := setAssert(ctx, m.vrw, a, b, parent); err != nil {
   258  			return nil, err
   259  
   260  		} else if ok {
   261  			return m.threeWaySetMerge(ctx, aSet, bSet, pSet, path)
   262  		}
   263  
   264  	case types.StructKind:
   265  		if aStruct, bStruct, pStruct, ok, err := structAssert(a, b, parent); err != nil {
   266  			return nil, err
   267  		} else if ok {
   268  			return m.threeWayStructMerge(ctx, aStruct, bStruct, pStruct, path)
   269  		}
   270  	}
   271  
   272  	pDescription := "<nil>"
   273  	if parent != nil {
   274  		t, err := types.TypeOf(parent)
   275  
   276  		if err != nil {
   277  			return nil, err
   278  		}
   279  
   280  		pDescription, err = t.Describe(ctx)
   281  
   282  		if err != nil {
   283  			return nil, err
   284  		}
   285  	}
   286  
   287  	aType, err := types.TypeOf(a)
   288  	bType, err := types.TypeOf(b)
   289  	aDesc, err := aType.Describe(ctx)
   290  	bDesc, err := bType.Describe(ctx)
   291  
   292  	return parent, newMergeConflict("Cannot merge %s and %s on top of %s.", aDesc, bDesc, pDescription)
   293  }
   294  
   295  func (m *merger) threeWayMapMerge(ctx context.Context, a, b, parent types.Map, path types.Path) (merged types.Value, err error) {
   296  	apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) {
   297  		defer updateProgress(m.progress)
   298  		switch change.ChangeType {
   299  		case types.DiffChangeAdded, types.DiffChangeModified:
   300  			res, err := target.getValue().(types.Map).Edit().Set(change.Key, newVal).Map(ctx)
   301  
   302  			if err != nil {
   303  				return nil, err
   304  			}
   305  
   306  			return mapCandidate{res}, nil
   307  		case types.DiffChangeRemoved:
   308  			res, err := target.getValue().(types.Map).Edit().Remove(change.Key).Map(ctx)
   309  
   310  			if err != nil {
   311  				return nil, err
   312  			}
   313  
   314  			return mapCandidate{res}, nil
   315  		default:
   316  			panic("Not Reached")
   317  		}
   318  	}
   319  	return m.threeWayOrderedSequenceMerge(ctx, mapCandidate{a}, mapCandidate{b}, mapCandidate{parent}, apply, path)
   320  }
   321  
   322  func (m *merger) threeWaySetMerge(ctx context.Context, a, b, parent types.Set, path types.Path) (merged types.Value, err error) {
   323  	apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) {
   324  		defer updateProgress(m.progress)
   325  		switch change.ChangeType {
   326  		case types.DiffChangeAdded, types.DiffChangeModified:
   327  			se, err := target.getValue().(types.Set).Edit().Insert(newVal)
   328  
   329  			if err != nil {
   330  				return nil, err
   331  			}
   332  
   333  			s, err := se.Set(ctx)
   334  
   335  			if err != nil {
   336  				return nil, err
   337  			}
   338  
   339  			return setCandidate{s}, nil
   340  		case types.DiffChangeRemoved:
   341  			se, err := target.getValue().(types.Set).Edit().Remove(newVal)
   342  
   343  			if err != nil {
   344  				return nil, err
   345  			}
   346  
   347  			s, err := se.Set(ctx)
   348  
   349  			if err != nil {
   350  				return nil, err
   351  			}
   352  
   353  			return setCandidate{s}, nil
   354  		default:
   355  			panic("Not Reached")
   356  		}
   357  	}
   358  	return m.threeWayOrderedSequenceMerge(ctx, setCandidate{a}, setCandidate{b}, setCandidate{parent}, apply, path)
   359  }
   360  
   361  func (m *merger) threeWayStructMerge(ctx context.Context, a, b, parent types.Struct, path types.Path) (merged types.Value, err error) {
   362  	apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) {
   363  		defer updateProgress(m.progress)
   364  		// Right now, this always iterates over all fields to create a new Struct, because there's no API for adding/removing a field from an existing struct type.
   365  		targetVal := target.getValue().(types.Struct)
   366  		if f, ok := change.Key.(types.String); ok {
   367  			field := string(f)
   368  			data := types.StructData{}
   369  			_ = targetVal.IterFields(func(name string, v types.Value) error {
   370  				if name != field {
   371  					data[name] = v
   372  				}
   373  
   374  				return nil
   375  			})
   376  			if change.ChangeType == types.DiffChangeAdded || change.ChangeType == types.DiffChangeModified {
   377  				data[field] = newVal
   378  			}
   379  
   380  			st, err := types.NewStruct(m.vrw.Format(), targetVal.Name(), data)
   381  
   382  			if err != nil {
   383  				return nil, err
   384  			}
   385  
   386  			return structCandidate{st}, nil
   387  		}
   388  
   389  		return nil, errors.New("bad key type in diff")
   390  	}
   391  	return m.threeWayOrderedSequenceMerge(ctx, structCandidate{a}, structCandidate{b}, structCandidate{parent}, apply, path)
   392  }
   393  
   394  func listAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aList, bList, pList types.List, ok bool, err error) {
   395  	var aOk, bOk, pOk bool
   396  	aList, aOk = a.(types.List)
   397  	bList, bOk = b.(types.List)
   398  	if parent != nil {
   399  		pList, pOk = parent.(types.List)
   400  	} else {
   401  		pList, err = types.NewList(ctx, vrw)
   402  
   403  		if err != nil {
   404  			return types.EmptyList, types.EmptyList, types.EmptyList, false, err
   405  		}
   406  
   407  		pOk = true
   408  	}
   409  
   410  	return aList, bList, pList, aOk && bOk && pOk, nil
   411  }
   412  
   413  func mapAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aMap, bMap, pMap types.Map, ok bool, err error) {
   414  	var aOk, bOk, pOk bool
   415  	aMap, aOk = a.(types.Map)
   416  	bMap, bOk = b.(types.Map)
   417  	if parent != nil {
   418  		pMap, pOk = parent.(types.Map)
   419  	} else {
   420  		pMap, err = types.NewMap(ctx, vrw)
   421  
   422  		if err != nil {
   423  			return types.EmptyMap, types.EmptyMap, types.EmptyMap, false, err
   424  		}
   425  
   426  		pOk = true
   427  	}
   428  
   429  	return aMap, bMap, pMap, aOk && bOk && pOk, nil
   430  }
   431  
   432  func refAssert(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter) (aValue, bValue, pValue types.Value, ok bool, err error) {
   433  	var aOk, bOk, pOk bool
   434  	var aRef, bRef, pRef types.Ref
   435  	aRef, aOk = a.(types.Ref)
   436  	bRef, bOk = b.(types.Ref)
   437  	if !aOk || !bOk {
   438  		return
   439  	}
   440  
   441  	aValue, err = aRef.TargetValue(ctx, vrw)
   442  
   443  	if err != nil {
   444  		return nil, nil, nil, false, err
   445  	}
   446  
   447  	bValue, err = bRef.TargetValue(ctx, vrw)
   448  
   449  	if err != nil {
   450  		return nil, nil, nil, false, err
   451  	}
   452  
   453  	if parent != nil {
   454  		if pRef, pOk = parent.(types.Ref); pOk {
   455  			pValue, err = pRef.TargetValue(ctx, vrw)
   456  
   457  			if err != nil {
   458  				return nil, nil, nil, false, err
   459  			}
   460  
   461  		}
   462  	} else {
   463  		pOk = true // parent == nil is still OK. It just leaves pValue as nil.
   464  	}
   465  	return aValue, bValue, pValue, aOk && bOk && pOk, nil
   466  }
   467  
   468  func setAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aSet, bSet, pSet types.Set, ok bool, err error) {
   469  	var aOk, bOk, pOk bool
   470  	aSet, aOk = a.(types.Set)
   471  	bSet, bOk = b.(types.Set)
   472  	if parent != nil {
   473  		pSet, pOk = parent.(types.Set)
   474  	} else {
   475  		pSet, err = types.NewSet(ctx, vrw)
   476  
   477  		if err != nil {
   478  			return types.EmptySet, types.EmptySet, types.EmptySet, false, err
   479  		}
   480  
   481  		pOk = true
   482  	}
   483  
   484  	return aSet, bSet, pSet, aOk && bOk && pOk, nil
   485  }
   486  
   487  func structAssert(a, b, parent types.Value) (aStruct, bStruct, pStruct types.Struct, ok bool, err error) {
   488  	var aOk, bOk, pOk bool
   489  	aStruct, aOk = a.(types.Struct)
   490  	bStruct, bOk = b.(types.Struct)
   491  	if aOk && bOk {
   492  		if aStruct.Name() == bStruct.Name() {
   493  			if parent != nil {
   494  				pStruct, pOk = parent.(types.Struct)
   495  			} else {
   496  				pStruct, err = types.NewStruct(aStruct.Format(), aStruct.Name(), nil)
   497  
   498  				if err != nil {
   499  					es := types.EmptyStruct(aStruct.Format())
   500  					return es, es, es, false, err
   501  				}
   502  
   503  				pOk = true
   504  			}
   505  
   506  			return aStruct, bStruct, pStruct, pOk, err
   507  		}
   508  	}
   509  
   510  	return
   511  }