github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/merge/three_way.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 package merge 23 24 import ( 25 "context" 26 "errors" 27 "fmt" 28 29 "github.com/dolthub/dolt/go/store/d" 30 "github.com/dolthub/dolt/go/store/types" 31 ) 32 33 // Policy functors are used to merge two values (a and b) against a common 34 // ancestor. All three Values and their must by wholly readable from vrw. 35 // Whenever a change is merged, implementations should send a struct{} over 36 // progress. 37 type Policy func(ctx context.Context, a, b, ancestor types.Value, vrw types.ValueReadWriter, progress chan struct{}) (merged types.Value, err error) 38 39 // ResolveFunc is the type for custom merge-conflict resolution callbacks. 40 // When the merge algorithm encounters two non-mergeable changes (aChange and 41 // bChange) at the same path, it calls the ResolveFunc passed into ThreeWay(). 42 // The callback gets the types of the two incompatible changes (added, changed 43 // or removed) and the two Values that could not be merged (if any). If the 44 // ResolveFunc cannot devise a resolution, ok should be false upon return and 45 // the other return values are undefined. If the conflict can be resolved, the 46 // function should return the appropriate type of change to apply, the new value 47 // to be used (if any), and true. 48 type ResolveFunc func(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) 49 50 // None is the no-op ResolveFunc. Any conflict results in a merge failure. 51 func None(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) { 52 return change, merged, false 53 } 54 55 // Ours resolves conflicts by preferring changes from the Value currently being committed. 56 func Ours(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) { 57 return aChange, a, true 58 } 59 60 // Theirs resolves conflicts by preferring changes in the current HEAD. 61 func Theirs(aChange, bChange types.DiffChangeType, a, b types.Value, path types.Path) (change types.DiffChangeType, merged types.Value, ok bool) { 62 return bChange, b, true 63 } 64 65 // ErrMergeConflict indicates that a merge attempt failed and must be resolved 66 // manually for the provided reason. 67 type ErrMergeConflict struct { 68 msg string 69 } 70 71 func (e *ErrMergeConflict) Error() string { 72 return e.msg 73 } 74 75 func newMergeConflict(format string, args ...interface{}) *ErrMergeConflict { 76 return &ErrMergeConflict{fmt.Sprintf(format, args...)} 77 } 78 79 // NewThreeWay creates a new Policy based on ThreeWay using the provided 80 // ResolveFunc. 81 func NewThreeWay(resolve ResolveFunc) Policy { 82 return func(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter, progress chan struct{}) (merged types.Value, err error) { 83 return ThreeWay(ctx, a, b, parent, vrw, resolve, progress) 84 } 85 } 86 87 // ThreeWay attempts a three-way merge between two _candidate_ values that 88 // have both changed with respect to a common _parent_ value. The result of 89 // the algorithm is a _merged_ value or an error if merging could not be done. 90 // 91 // The algorithm works recursively, applying the following rules for each value: 92 // 93 // - If any of the three values have a different [kind](link): conflict 94 // - If the two candidates are identical: the result is that value 95 // - If the values are primitives or Blob: conflict 96 // - If the values are maps: 97 // - if the same key was inserted or updated in both candidates: 98 // - first run this same algorithm on those two values to attempt to merge them 99 // - if the two merged values are still different: conflict 100 // - if a key was inserted in one candidate and removed in the other: conflict 101 // 102 // - If the values are structs: 103 // - Same as map, except using field names instead of map keys 104 // 105 // - If the values are sets: 106 // - Apply the changes from both candidates to the parent to get the result. No conflicts are possible. 107 // 108 // - If the values are list: 109 // - Apply list-merge (see below) 110 // 111 // Merge rules for List are a bit more complex than Map, Struct, and Set due 112 // to a wider away of potential use patterns. A List might be a de-facto Map 113 // with sequential numeric keys, or it might be a sequence of objects where 114 // order matters but the caller is unlikely to go back and update the value at 115 // a given index. List modifications are expressed in terms of 'splices' (see 116 // types/edit_distance.go). Roughly, a splice indicates that some number of 117 // elements were added and/or removed at some index in |parent|. In the 118 // following example: 119 // 120 // parent: [a, b, c, d] 121 // a: [b, c, d] 122 // b: [a, b, c, d, e] 123 // merged: [b, c, d, e] 124 // 125 // The difference from parent -> is described by the splice {0, 1}, indicating 126 // that 1 element was removed from parent at index 0. The difference from 127 // parent -> b is described as {4, 0, e}, indicating that 0 elements were 128 // removed at parent's index 4, and the element 'e' was added. Our merge 129 // algorithm will successfully merge a and b, because these splices do not 130 // overlap; that is, neither one removes the index at which the other 131 // operates. As a general rule, the merge algorithm will refuse to merge 132 // splices that overlap, as in the following examples: 133 // 134 // parent: [a, b, c] 135 // a: [a, d, b, c] 136 // b: [a, c] 137 // merged: conflict 138 // 139 // parent: [a, b, c] 140 // a: [a, e, b, c] 141 // b: [a, d, b, c] 142 // merged: conflict 143 // 144 // The splices in the first example are {1, 0, d} (remove 0 elements at index 145 // 1 and add 'd') and {1, 1} (remove 1 element at index 1). Since the latter 146 // removes the element at which the former adds an element, these splices 147 // overlap. Similarly, in the second example, both splices operate at index 1 148 // but add different elements. Thus, they also overlap. 149 // 150 // There is one special case for overlapping splices. If they perform the 151 // exact same operation, the algorithm considers them not to be in conflict. 152 // E.g. 153 // 154 // parent: [a, b, c] 155 // a: [a, d, e] 156 // b: [a, d, e] 157 // merged: [a, d, e] 158 func ThreeWay(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter, resolve ResolveFunc, progress chan struct{}) (merged types.Value, err error) { 159 describe := func(v types.Value) (string, error) { 160 if v != nil { 161 t, err := types.TypeOf(v) 162 163 if err != nil { 164 return "", err 165 } 166 167 str, err := t.Describe(ctx) 168 169 if err != nil { 170 return "", err 171 } 172 173 return str, nil 174 } 175 176 return "nil Value", nil 177 } 178 179 if a == nil && b == nil { 180 return parent, nil 181 } else if unmergeable(a, b) { 182 aDesc, err := describe(a) 183 184 if err != nil { 185 return nil, err 186 } 187 188 bDesc, err := describe(b) 189 190 if err != nil { 191 return nil, err 192 } 193 194 return parent, newMergeConflict("Cannot merge %s with %s.", aDesc, bDesc) 195 } 196 197 if resolve == nil { 198 resolve = None 199 } 200 m := &merger{vrw, resolve, progress} 201 return m.threeWay(ctx, a, b, parent, types.Path{}) 202 } 203 204 // a and b cannot be merged if they are of different NomsKind, or if at least one of the two is nil, or if either is a Noms primitive. 205 func unmergeable(a, b types.Value) bool { 206 if a != nil && b != nil { 207 aKind, bKind := a.Kind(), b.Kind() 208 return aKind != bKind || types.IsPrimitiveKind(aKind) || types.IsPrimitiveKind(bKind) 209 } 210 return true 211 } 212 213 type merger struct { 214 vrw types.ValueReadWriter 215 resolve ResolveFunc 216 progress chan<- struct{} 217 } 218 219 func updateProgress(progress chan<- struct{}) { 220 // TODO: Eventually we'll want more information than a single bit :). 221 if progress != nil { 222 progress <- struct{}{} 223 } 224 } 225 226 func (m *merger) threeWay(ctx context.Context, a, b, parent types.Value, path types.Path) (merged types.Value, err error) { 227 defer updateProgress(m.progress) 228 229 if a == nil || b == nil { 230 d.Panic("Merge candidates cannont be nil: a = %v, b = %v", a, b) 231 } 232 233 switch a.Kind() { 234 case types.ListKind: 235 if aList, bList, pList, ok, err := listAssert(ctx, m.vrw, a, b, parent); err != nil { 236 return nil, err 237 } else if ok { 238 return threeWayListMerge(ctx, aList, bList, pList) 239 } 240 241 case types.MapKind: 242 if aMap, bMap, pMap, ok, err := mapAssert(ctx, m.vrw, a, b, parent); err != nil { 243 return nil, err 244 } else if ok { 245 return m.threeWayMapMerge(ctx, aMap, bMap, pMap, path) 246 } 247 248 case types.RefKind: 249 if aValue, bValue, pValue, ok, err := refAssert(ctx, a, b, parent, m.vrw); err != nil { 250 return nil, err 251 } else if ok { 252 merged, err := m.threeWay(ctx, aValue, bValue, pValue, path) 253 if err != nil { 254 return parent, err 255 } 256 return m.vrw.WriteValue(ctx, merged) 257 } 258 259 case types.SetKind: 260 if aSet, bSet, pSet, ok, err := setAssert(ctx, m.vrw, a, b, parent); err != nil { 261 return nil, err 262 263 } else if ok { 264 return m.threeWaySetMerge(ctx, aSet, bSet, pSet, path) 265 } 266 267 case types.StructKind: 268 if aStruct, bStruct, pStruct, ok, err := structAssert(a, b, parent); err != nil { 269 return nil, err 270 } else if ok { 271 return m.threeWayStructMerge(ctx, aStruct, bStruct, pStruct, path) 272 } 273 } 274 275 pDescription := "<nil>" 276 if parent != nil { 277 t, err := types.TypeOf(parent) 278 279 if err != nil { 280 return nil, err 281 } 282 283 pDescription, err = t.Describe(ctx) 284 285 if err != nil { 286 return nil, err 287 } 288 } 289 290 aType, err := types.TypeOf(a) 291 bType, err := types.TypeOf(b) 292 aDesc, err := aType.Describe(ctx) 293 bDesc, err := bType.Describe(ctx) 294 295 return parent, newMergeConflict("Cannot merge %s and %s on top of %s.", aDesc, bDesc, pDescription) 296 } 297 298 func (m *merger) threeWayMapMerge(ctx context.Context, a, b, parent types.Map, path types.Path) (merged types.Value, err error) { 299 apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) { 300 defer updateProgress(m.progress) 301 switch change.ChangeType { 302 case types.DiffChangeAdded, types.DiffChangeModified: 303 res, err := target.getValue().(types.Map).Edit().Set(change.Key, newVal).Map(ctx) 304 305 if err != nil { 306 return nil, err 307 } 308 309 return mapCandidate{res}, nil 310 case types.DiffChangeRemoved: 311 res, err := target.getValue().(types.Map).Edit().Remove(change.Key).Map(ctx) 312 313 if err != nil { 314 return nil, err 315 } 316 317 return mapCandidate{res}, nil 318 default: 319 panic("Not Reached") 320 } 321 } 322 return m.threeWayOrderedSequenceMerge(ctx, mapCandidate{a}, mapCandidate{b}, mapCandidate{parent}, apply, path) 323 } 324 325 func (m *merger) threeWaySetMerge(ctx context.Context, a, b, parent types.Set, path types.Path) (merged types.Value, err error) { 326 apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) { 327 defer updateProgress(m.progress) 328 switch change.ChangeType { 329 case types.DiffChangeAdded, types.DiffChangeModified: 330 se, err := target.getValue().(types.Set).Edit().Insert(ctx, newVal) 331 332 if err != nil { 333 return nil, err 334 } 335 336 s, err := se.Set(ctx) 337 338 if err != nil { 339 return nil, err 340 } 341 342 return setCandidate{s}, nil 343 case types.DiffChangeRemoved: 344 se, err := target.getValue().(types.Set).Edit().Remove(ctx, newVal) 345 346 if err != nil { 347 return nil, err 348 } 349 350 s, err := se.Set(ctx) 351 352 if err != nil { 353 return nil, err 354 } 355 356 return setCandidate{s}, nil 357 default: 358 panic("Not Reached") 359 } 360 } 361 return m.threeWayOrderedSequenceMerge(ctx, setCandidate{a}, setCandidate{b}, setCandidate{parent}, apply, path) 362 } 363 364 func (m *merger) threeWayStructMerge(ctx context.Context, a, b, parent types.Struct, path types.Path) (merged types.Value, err error) { 365 apply := func(target candidate, change types.ValueChanged, newVal types.Value) (candidate, error) { 366 defer updateProgress(m.progress) 367 // Right now, this always iterates over all fields to create a new Struct, because there's no API for adding/removing a field from an existing struct type. 368 targetVal := target.getValue().(types.Struct) 369 if f, ok := change.Key.(types.String); ok { 370 field := string(f) 371 data := types.StructData{} 372 _ = targetVal.IterFields(func(name string, v types.Value) error { 373 if name != field { 374 data[name] = v 375 } 376 377 return nil 378 }) 379 if change.ChangeType == types.DiffChangeAdded || change.ChangeType == types.DiffChangeModified { 380 data[field] = newVal 381 } 382 383 st, err := types.NewStruct(m.vrw.Format(), targetVal.Name(), data) 384 385 if err != nil { 386 return nil, err 387 } 388 389 return structCandidate{st}, nil 390 } 391 392 return nil, errors.New("bad key type in diff") 393 } 394 return m.threeWayOrderedSequenceMerge(ctx, structCandidate{a}, structCandidate{b}, structCandidate{parent}, apply, path) 395 } 396 397 func listAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aList, bList, pList types.List, ok bool, err error) { 398 var aOk, bOk, pOk bool 399 aList, aOk = a.(types.List) 400 bList, bOk = b.(types.List) 401 if parent != nil { 402 pList, pOk = parent.(types.List) 403 } else { 404 pList, err = types.NewList(ctx, vrw) 405 406 if err != nil { 407 return types.EmptyList, types.EmptyList, types.EmptyList, false, err 408 } 409 410 pOk = true 411 } 412 413 return aList, bList, pList, aOk && bOk && pOk, nil 414 } 415 416 func mapAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aMap, bMap, pMap types.Map, ok bool, err error) { 417 var aOk, bOk, pOk bool 418 aMap, aOk = a.(types.Map) 419 bMap, bOk = b.(types.Map) 420 if parent != nil { 421 pMap, pOk = parent.(types.Map) 422 } else { 423 pMap, err = types.NewMap(ctx, vrw) 424 425 if err != nil { 426 return types.EmptyMap, types.EmptyMap, types.EmptyMap, false, err 427 } 428 429 pOk = true 430 } 431 432 return aMap, bMap, pMap, aOk && bOk && pOk, nil 433 } 434 435 func refAssert(ctx context.Context, a, b, parent types.Value, vrw types.ValueReadWriter) (aValue, bValue, pValue types.Value, ok bool, err error) { 436 var aOk, bOk, pOk bool 437 var aRef, bRef, pRef types.Ref 438 aRef, aOk = a.(types.Ref) 439 bRef, bOk = b.(types.Ref) 440 if !aOk || !bOk { 441 return 442 } 443 444 aValue, err = aRef.TargetValue(ctx, vrw) 445 446 if err != nil { 447 return nil, nil, nil, false, err 448 } 449 450 bValue, err = bRef.TargetValue(ctx, vrw) 451 452 if err != nil { 453 return nil, nil, nil, false, err 454 } 455 456 if parent != nil { 457 if pRef, pOk = parent.(types.Ref); pOk { 458 pValue, err = pRef.TargetValue(ctx, vrw) 459 460 if err != nil { 461 return nil, nil, nil, false, err 462 } 463 464 } 465 } else { 466 pOk = true // parent == nil is still OK. It just leaves pValue as nil. 467 } 468 return aValue, bValue, pValue, aOk && bOk && pOk, nil 469 } 470 471 func setAssert(ctx context.Context, vrw types.ValueReadWriter, a, b, parent types.Value) (aSet, bSet, pSet types.Set, ok bool, err error) { 472 var aOk, bOk, pOk bool 473 aSet, aOk = a.(types.Set) 474 bSet, bOk = b.(types.Set) 475 if parent != nil { 476 pSet, pOk = parent.(types.Set) 477 } else { 478 pSet, err = types.NewSet(ctx, vrw) 479 480 if err != nil { 481 return types.EmptySet, types.EmptySet, types.EmptySet, false, err 482 } 483 484 pOk = true 485 } 486 487 return aSet, bSet, pSet, aOk && bOk && pOk, nil 488 } 489 490 func structAssert(a, b, parent types.Value) (aStruct, bStruct, pStruct types.Struct, ok bool, err error) { 491 var aOk, bOk, pOk bool 492 aStruct, aOk = a.(types.Struct) 493 bStruct, bOk = b.(types.Struct) 494 if aOk && bOk { 495 if aStruct.Name() == bStruct.Name() { 496 if parent != nil { 497 pStruct, pOk = parent.(types.Struct) 498 } else { 499 pStruct, err = types.NewStruct(aStruct.Format(), aStruct.Name(), nil) 500 501 if err != nil { 502 es := types.EmptyStruct(aStruct.Format()) 503 return es, es, es, false, err 504 } 505 506 pOk = true 507 } 508 509 return aStruct, bStruct, pStruct, pOk, err 510 } 511 } 512 513 return 514 }