github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/merge/merge.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 22 "github.com/dolthub/go-mysql-server/sql" 23 goerrors "gopkg.in/src-d/go-errors.v1" 24 25 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 26 "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" 27 "github.com/dolthub/dolt/go/libraries/utils/set" 28 "github.com/dolthub/dolt/go/store/hash" 29 "github.com/dolthub/dolt/go/store/types" 30 ) 31 32 var ErrFastForward = errors.New("fast forward") 33 var ErrTableDeletedAndModified = errors.New("conflict: table with same name deleted and modified ") 34 var ErrTableDeletedAndSchemaModified = errors.New("conflict: table with same name deleted and its schema modified ") 35 var ErrSchemaConflict = goerrors.NewKind("schema conflict found, merge aborted. Please alter schema to prevent schema conflicts before merging: %s") 36 37 // ErrCantOverwriteConflicts is returned when there are unresolved conflicts 38 // and the merge produces new conflicts. Because we currently don't have a model 39 // to merge sets of conflicts together, we need to abort the merge at this 40 // point. 41 var ErrCantOverwriteConflicts = errors.New("existing unresolved conflicts would be" + 42 " overridden by new conflicts produced by merge. Please resolve them and try again") 43 44 var ErrConflictsIncompatible = errors.New("the existing conflicts are of a different schema" + 45 " than the conflicts generated by this merge. Please resolve them and try again") 46 47 var ErrMultipleViolationsForRow = errors.New("multiple violations for row not supported") 48 49 var ErrSameTblAddedTwice = goerrors.NewKind("table with same name '%s' added in 2 commits can't be merged") 50 51 func MergeCommits(ctx *sql.Context, commit, mergeCommit *doltdb.Commit, opts editor.Options) (*Result, error) { 52 optCmt, err := doltdb.GetCommitAncestor(ctx, commit, mergeCommit) 53 if err != nil { 54 return nil, err 55 } 56 ancCommit, ok := optCmt.ToCommit() 57 if !ok { 58 // Ancestor commit should have been resolved before getting this far. 59 return nil, doltdb.ErrGhostCommitRuntimeFailure 60 } 61 62 ourRoot, err := commit.GetRootValue(ctx) 63 if err != nil { 64 return nil, err 65 } 66 67 theirRoot, err := mergeCommit.GetRootValue(ctx) 68 if err != nil { 69 return nil, err 70 } 71 72 ancRoot, err := ancCommit.GetRootValue(ctx) 73 if err != nil { 74 return nil, err 75 } 76 77 mo := MergeOpts{ 78 IsCherryPick: false, 79 KeepSchemaConflicts: true, 80 } 81 return MergeRoots(ctx, ourRoot, theirRoot, ancRoot, mergeCommit, ancCommit, opts, mo) 82 } 83 84 type Result struct { 85 Root doltdb.RootValue 86 SchemaConflicts []SchemaConflict 87 Stats map[string]*MergeStats 88 } 89 90 func (r Result) HasSchemaConflicts() bool { 91 return len(r.SchemaConflicts) > 0 92 } 93 94 func (r Result) HasMergeArtifacts() bool { 95 if r.HasSchemaConflicts() { 96 return true 97 } 98 for _, stats := range r.Stats { 99 if stats.HasArtifacts() { 100 return true 101 } 102 } 103 return false 104 } 105 106 // CountOfTablesWithDataConflicts returns the number of tables in this merge result that have 107 // a data conflict. 108 func (r Result) CountOfTablesWithDataConflicts() int { 109 count := 0 110 for _, mergeStats := range r.Stats { 111 if mergeStats.HasDataConflicts() { 112 count++ 113 } 114 } 115 return count 116 } 117 118 // CountOfTablesWithSchemaConflicts returns the number of tables in this merge result that have 119 // a schema conflict. 120 func (r Result) CountOfTablesWithSchemaConflicts() int { 121 count := 0 122 for _, mergeStats := range r.Stats { 123 if mergeStats.HasSchemaConflicts() { 124 count++ 125 } 126 } 127 return count 128 } 129 130 // CountOfTablesWithConstraintViolations returns the number of tables in this merge result that have 131 // a constraint violation. 132 func (r Result) CountOfTablesWithConstraintViolations() int { 133 count := 0 134 for _, mergeStats := range r.Stats { 135 if mergeStats.HasConstraintViolations() { 136 count++ 137 } 138 } 139 return count 140 } 141 142 func SchemaConflictTableNames(sc []SchemaConflict) (tables []string) { 143 tables = make([]string, len(sc)) 144 for i := range sc { 145 tables[i] = sc[i].TableName 146 } 147 return 148 } 149 150 // MergeRoots three-way merges |ourRoot|, |theirRoot|, and |ancRoot| and returns 151 // the merged root. If any conflicts or constraint violations are produced they 152 // are stored in the merged root. If |ourRoot| already contains conflicts they 153 // are stashed before the merge is performed. We abort the merge if the stash 154 // contains conflicts and we produce new conflicts. We currently don't have a 155 // model to merge conflicts together. 156 // 157 // Constraint violations that exist in ancestor are stashed and merged with the 158 // violations we detect when we diff the ancestor and the newly merged root. 159 // 160 // |theirRootIsh| is the hash of their's working set or commit. It is used to 161 // key any artifacts generated by this merge. |ancRootIsh| is similar and is 162 // used to retrieve the base value for a conflict. 163 func MergeRoots( 164 ctx *sql.Context, 165 ourRoot, theirRoot, ancRoot doltdb.RootValue, 166 theirs, ancestor doltdb.Rootish, 167 opts editor.Options, 168 mergeOpts MergeOpts, 169 ) (*Result, error) { 170 var ( 171 conflictStash *conflictStash 172 violationStash *violationStash 173 nbf *types.NomsBinFormat 174 err error 175 ) 176 177 nbf = ourRoot.VRW().Format() 178 if !types.IsFormat_DOLT(nbf) { 179 ourRoot, conflictStash, err = stashConflicts(ctx, ourRoot) 180 if err != nil { 181 return nil, err 182 } 183 ancRoot, violationStash, err = stashViolations(ctx, ancRoot) 184 if err != nil { 185 return nil, err 186 } 187 } 188 189 // merge collations 190 oColl, err := ourRoot.GetCollation(ctx) 191 if err != nil { 192 return nil, err 193 } 194 tColl, err := theirRoot.GetCollation(ctx) 195 if err != nil { 196 return nil, err 197 } 198 aColl, err := ancRoot.GetCollation(ctx) 199 if err != nil { 200 return nil, err 201 } 202 mergedRoot := ourRoot 203 204 // there is a collation change 205 if oColl != tColl { 206 // both sides changed, and not the same, conflict 207 if oColl != aColl && tColl != aColl { 208 oCollName := sql.CollationID(oColl).Collation().Name 209 tCollName := sql.CollationID(tColl).Collation().Name 210 return nil, fmt.Errorf("database collation conflict, please resolve manually. ours: %s, theirs: %s", oCollName, tCollName) 211 } 212 // only their side changed, take their side 213 if oColl == aColl { 214 mergedRoot, err = mergedRoot.SetCollation(ctx, tColl) 215 if err != nil { 216 return nil, err 217 } 218 } 219 // only our side changed, keep our side 220 } 221 222 // Make sure to pass in ourRoot as the first RootValue so that ourRoot's table names will be merged first. 223 // This helps to avoid non-deterministic error result for table rename cases. Renaming a table creates two changes: 224 // 1. dropping the old name table 225 // 2. adding the new name table 226 // Dropping the old name table will trigger delete/modify conflict, which is the preferred error case over 227 // same column tag used error returned from creating the new name table. 228 tblNames, err := doltdb.UnionTableNames(ctx, ourRoot, theirRoot) 229 230 if err != nil { 231 return nil, err 232 } 233 234 tblToStats := make(map[string]*MergeStats) 235 236 // Merge tables one at a time. This is done based on name. With table names from ourRoot being merged first, 237 // renaming a table will return delete/modify conflict error consistently. 238 // TODO: merge based on a more durable table identity that persists across renames 239 merger, err := NewMerger(ourRoot, theirRoot, ancRoot, theirs, ancestor, ourRoot.VRW(), ourRoot.NodeStore()) 240 if err != nil { 241 return nil, err 242 } 243 244 // visitedTables holds all tables that were added, removed, or modified (basically not "unmodified") 245 visitedTables := make(map[string]struct{}) 246 var schConflicts []SchemaConflict 247 for _, tblName := range tblNames { 248 mergedTable, stats, err := merger.MergeTable(ctx, tblName, opts, mergeOpts) 249 if errors.Is(ErrTableDeletedAndModified, err) && doltdb.IsFullTextTable(tblName) { 250 // If a Full-Text table was both modified and deleted, then we want to ignore the deletion. 251 // If there's a true conflict, then the parent table will catch the conflict. 252 stats = &MergeStats{Operation: TableModified} 253 } else if errors.Is(ErrTableDeletedAndSchemaModified, err) { 254 tblToStats[tblName] = &MergeStats{ 255 Operation: TableModified, 256 SchemaConflicts: 1, 257 } 258 conflict := SchemaConflict{ 259 TableName: tblName, 260 ModifyDeleteConflict: true, 261 } 262 if !mergeOpts.KeepSchemaConflicts { 263 return nil, conflict 264 } 265 schConflicts = append(schConflicts, conflict) 266 continue 267 } else if err != nil { 268 return nil, err 269 } 270 // If this table was visited during the merge, then we'll add it to the set 271 if stats.Operation != TableUnmodified { 272 visitedTables[tblName] = struct{}{} 273 } 274 if doltdb.IsFullTextTable(tblName) && (stats.Operation == TableModified || stats.Operation == TableRemoved) { 275 // We handle removal and modification later in the rebuilding process, so we'll skip those. 276 // We do not handle adding new tables, so we allow that to proceed. 277 continue 278 } 279 if mergedTable.conflict.Count() > 0 { 280 if types.IsFormat_DOLT(nbf) { 281 schConflicts = append(schConflicts, mergedTable.conflict) 282 } else { 283 // return schema conflict as error 284 return nil, mergedTable.conflict 285 } 286 } 287 288 if mergedTable.table != nil { 289 tblToStats[tblName] = stats 290 291 mergedRoot, err = mergedRoot.PutTable(ctx, doltdb.TableName{Name: tblName}, mergedTable.table) 292 if err != nil { 293 return nil, err 294 } 295 continue 296 } 297 298 newRootHasTable, err := mergedRoot.HasTable(ctx, tblName) 299 if err != nil { 300 return nil, err 301 } 302 303 if newRootHasTable { 304 // Merge root deleted this table 305 tblToStats[tblName] = &MergeStats{Operation: TableRemoved} 306 307 mergedRoot, err = mergedRoot.RemoveTables(ctx, false, false, tblName) 308 if err != nil { 309 return nil, err 310 } 311 } else { 312 // This is a deleted table that the merge root still has 313 if stats.Operation != TableRemoved { 314 panic(fmt.Sprintf("Invalid merge state for table %s. This is a bug.", tblName)) 315 } 316 // Nothing to update, our root already has the table deleted 317 } 318 } 319 320 mergedRoot, err = rebuildFullTextIndexes(ctx, mergedRoot, ourRoot, theirRoot, visitedTables) 321 if err != nil { 322 return nil, err 323 } 324 325 mergedFKColl, conflicts, err := ForeignKeysMerge(ctx, mergedRoot, ourRoot, theirRoot, ancRoot) 326 if err != nil { 327 return nil, err 328 } 329 if len(conflicts) > 0 { 330 return nil, fmt.Errorf("foreign key conflicts") 331 } 332 333 mergedRoot, err = mergedRoot.PutForeignKeyCollection(ctx, mergedFKColl) 334 if err != nil { 335 return nil, err 336 } 337 338 mergedRoot, err = mergedRoot.HandlePostMerge(ctx, ourRoot, theirRoot, ancRoot) 339 if err != nil { 340 return nil, err 341 } 342 343 h, err := merger.rightSrc.HashOf() 344 if err != nil { 345 return nil, err 346 } 347 348 var tableSet *set.StrSet = nil 349 if mergeOpts.RecordViolationsForTables != nil { 350 tableSet = set.NewCaseInsensitiveStrSet(nil) 351 for tableName, _ := range mergeOpts.RecordViolationsForTables { 352 tableSet.Add(tableName) 353 } 354 } 355 356 mergedRoot, _, err = AddForeignKeyViolations(ctx, mergedRoot, ancRoot, tableSet, h) 357 if err != nil { 358 return nil, err 359 } 360 361 if types.IsFormat_DOLT(ourRoot.VRW().Format()) { 362 err = getConstraintViolationStats(ctx, mergedRoot, tblToStats) 363 if err != nil { 364 return nil, err 365 } 366 367 return &Result{ 368 Root: mergedRoot, 369 SchemaConflicts: schConflicts, 370 Stats: tblToStats, 371 }, nil 372 } 373 374 mergedRoot, err = mergeCVsWithStash(ctx, mergedRoot, violationStash) 375 if err != nil { 376 return nil, err 377 } 378 379 err = getConstraintViolationStats(ctx, mergedRoot, tblToStats) 380 if err != nil { 381 return nil, err 382 } 383 384 mergedHasConflicts := checkForConflicts(tblToStats) 385 if !conflictStash.Empty() && mergedHasConflicts { 386 return nil, ErrCantOverwriteConflicts 387 } else if !conflictStash.Empty() { 388 mergedRoot, err = applyConflictStash(ctx, conflictStash.Stash, mergedRoot) 389 if err != nil { 390 return nil, err 391 } 392 } 393 394 return &Result{ 395 Root: mergedRoot, 396 SchemaConflicts: schConflicts, 397 Stats: tblToStats, 398 }, nil 399 } 400 401 // mergeCVsWithStash merges the table constraint violations in |stash| with |root|. 402 // Returns an updated root with all the merged CVs. 403 func mergeCVsWithStash(ctx context.Context, root doltdb.RootValue, stash *violationStash) (doltdb.RootValue, error) { 404 updatedRoot := root 405 for name, stashed := range stash.Stash { 406 tbl, ok, err := root.GetTable(ctx, doltdb.TableName{Name: name}) 407 if err != nil { 408 return nil, err 409 } 410 if !ok { 411 // the table with the CVs was deleted 412 continue 413 } 414 curr, err := tbl.GetConstraintViolations(ctx) 415 if err != nil { 416 return nil, err 417 } 418 unioned, err := types.UnionMaps(ctx, curr, stashed, func(key types.Value, currV types.Value, stashV types.Value) (types.Value, error) { 419 if !currV.Equals(stashV) { 420 panic(fmt.Sprintf("encountered conflict when merging constraint violations, conflicted key: %v\ncurrent value: %v\nstashed value: %v\n", key, currV, stashV)) 421 } 422 return currV, nil 423 }) 424 if err != nil { 425 return nil, err 426 } 427 tbl, err = tbl.SetConstraintViolations(ctx, unioned) 428 if err != nil { 429 return nil, err 430 } 431 updatedRoot, err = root.PutTable(ctx, doltdb.TableName{Name: name}, tbl) 432 if err != nil { 433 return nil, err 434 } 435 } 436 return updatedRoot, nil 437 } 438 439 // checks if a conflict occurred during the merge 440 func checkForConflicts(tblToStats map[string]*MergeStats) bool { 441 for _, stat := range tblToStats { 442 if stat.HasConflicts() { 443 return true 444 } 445 } 446 return false 447 } 448 449 // populates tblToStats with violation statistics 450 func getConstraintViolationStats(ctx context.Context, root doltdb.RootValue, tblToStats map[string]*MergeStats) error { 451 for tblName, stats := range tblToStats { 452 tbl, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tblName}) 453 if err != nil { 454 return err 455 } 456 if ok { 457 n, err := tbl.NumConstraintViolations(ctx) 458 if err != nil { 459 return err 460 } 461 stats.ConstraintViolations = int(n) 462 } 463 } 464 return nil 465 } 466 467 // MayHaveConstraintViolations returns whether the given roots may have constraint violations. For example, a fast 468 // forward merge that does not involve any tables with foreign key constraints or check constraints will not be able 469 // to generate constraint violations. Unique key constraint violations would be caught during the generation of the 470 // merged root, therefore it is not a factor for this function. 471 func MayHaveConstraintViolations(ctx context.Context, ancestor, merged doltdb.RootValue) (bool, error) { 472 ancTables, err := doltdb.MapTableHashes(ctx, ancestor) 473 if err != nil { 474 return false, err 475 } 476 mergedTables, err := doltdb.MapTableHashes(ctx, merged) 477 if err != nil { 478 return false, err 479 } 480 fkColl, err := merged.GetForeignKeyCollection(ctx) 481 if err != nil { 482 return false, err 483 } 484 tablesInFks := fkColl.Tables() 485 for tblName := range tablesInFks { 486 if ancHash, ok := ancTables[tblName]; !ok { 487 // If a table used in a foreign key is new then it's treated as a change 488 return true, nil 489 } else if mergedHash, ok := mergedTables[tblName]; !ok { 490 return false, fmt.Errorf("foreign key uses table '%s' but no hash can be found for this table", tblName) 491 } else if !ancHash.Equal(mergedHash) { 492 return true, nil 493 } 494 } 495 return false, nil 496 } 497 498 type ArtifactStatus struct { 499 SchemaConflictsTables []string 500 DataConflictTables []string 501 ConstraintViolationsTables []string 502 } 503 504 func (as ArtifactStatus) HasConflicts() bool { 505 return len(as.DataConflictTables) > 0 || len(as.SchemaConflictsTables) > 0 506 } 507 508 func (as ArtifactStatus) HasConstraintViolations() bool { 509 return len(as.ConstraintViolationsTables) > 0 510 } 511 512 func GetMergeArtifactStatus(ctx context.Context, working *doltdb.WorkingSet) (as ArtifactStatus, err error) { 513 if working.MergeActive() { 514 as.SchemaConflictsTables = working.MergeState().TablesWithSchemaConflicts() 515 } 516 517 as.DataConflictTables, err = doltdb.TablesWithDataConflicts(ctx, working.WorkingRoot()) 518 if err != nil { 519 return as, err 520 } 521 522 as.ConstraintViolationsTables, err = doltdb.TablesWithConstraintViolations(ctx, working.WorkingRoot()) 523 if err != nil { 524 return as, err 525 } 526 return 527 } 528 529 // MergeWouldStompChanges returns list of table names that are stomped and the diffs map between head and working set. 530 func MergeWouldStompChanges(ctx context.Context, roots doltdb.Roots, mergeCommit *doltdb.Commit) ([]string, map[string]hash.Hash, error) { 531 mergeRoot, err := mergeCommit.GetRootValue(ctx) 532 if err != nil { 533 return nil, nil, err 534 } 535 536 headTableHashes, err := doltdb.MapTableHashes(ctx, roots.Head) 537 if err != nil { 538 return nil, nil, err 539 } 540 541 workingTableHashes, err := doltdb.MapTableHashes(ctx, roots.Working) 542 if err != nil { 543 return nil, nil, err 544 } 545 546 mergeTableHashes, err := doltdb.MapTableHashes(ctx, mergeRoot) 547 if err != nil { 548 return nil, nil, err 549 } 550 551 headWorkingDiffs := diffTableHashes(headTableHashes, workingTableHashes) 552 mergedHeadDiffs := diffTableHashes(headTableHashes, mergeTableHashes) 553 554 stompedTables := make([]string, 0, len(headWorkingDiffs)) 555 for tName, _ := range headWorkingDiffs { 556 if _, ok := mergedHeadDiffs[tName]; ok { 557 // even if the working changes match the merge changes, don't allow (matches git behavior). 558 stompedTables = append(stompedTables, tName) 559 } 560 } 561 562 return stompedTables, headWorkingDiffs, nil 563 } 564 565 func diffTableHashes(headTableHashes, otherTableHashes map[string]hash.Hash) map[string]hash.Hash { 566 diffs := make(map[string]hash.Hash) 567 for tName, hh := range headTableHashes { 568 if h, ok := otherTableHashes[tName]; ok { 569 if h != hh { 570 // modification 571 diffs[tName] = h 572 } 573 } else { 574 // deletion 575 diffs[tName] = hash.Hash{} 576 } 577 } 578 579 for tName, h := range otherTableHashes { 580 if _, ok := headTableHashes[tName]; !ok { 581 // addition 582 diffs[tName] = h 583 } 584 } 585 586 return diffs 587 }