github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/merge/merge.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package merge
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  
    22  	"github.com/dolthub/go-mysql-server/sql"
    23  	goerrors "gopkg.in/src-d/go-errors.v1"
    24  
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
    27  	"github.com/dolthub/dolt/go/libraries/utils/set"
    28  	"github.com/dolthub/dolt/go/store/hash"
    29  	"github.com/dolthub/dolt/go/store/types"
    30  )
    31  
    32  var ErrFastForward = errors.New("fast forward")
    33  var ErrTableDeletedAndModified = errors.New("conflict: table with same name deleted and modified ")
    34  var ErrTableDeletedAndSchemaModified = errors.New("conflict: table with same name deleted and its schema modified ")
    35  var ErrSchemaConflict = goerrors.NewKind("schema conflict found, merge aborted. Please alter schema to prevent schema conflicts before merging: %s")
    36  
    37  // ErrCantOverwriteConflicts is returned when there are unresolved conflicts
    38  // and the merge produces new conflicts. Because we currently don't have a model
    39  // to merge sets of conflicts together, we need to abort the merge at this
    40  // point.
    41  var ErrCantOverwriteConflicts = errors.New("existing unresolved conflicts would be" +
    42  	" overridden by new conflicts produced by merge. Please resolve them and try again")
    43  
    44  var ErrConflictsIncompatible = errors.New("the existing conflicts are of a different schema" +
    45  	" than the conflicts generated by this merge. Please resolve them and try again")
    46  
    47  var ErrMultipleViolationsForRow = errors.New("multiple violations for row not supported")
    48  
    49  var ErrSameTblAddedTwice = goerrors.NewKind("table with same name '%s' added in 2 commits can't be merged")
    50  
    51  func MergeCommits(ctx *sql.Context, commit, mergeCommit *doltdb.Commit, opts editor.Options) (*Result, error) {
    52  	optCmt, err := doltdb.GetCommitAncestor(ctx, commit, mergeCommit)
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  	ancCommit, ok := optCmt.ToCommit()
    57  	if !ok {
    58  		// Ancestor commit should have been resolved before getting this far.
    59  		return nil, doltdb.ErrGhostCommitRuntimeFailure
    60  	}
    61  
    62  	ourRoot, err := commit.GetRootValue(ctx)
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  
    67  	theirRoot, err := mergeCommit.GetRootValue(ctx)
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  
    72  	ancRoot, err := ancCommit.GetRootValue(ctx)
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  
    77  	mo := MergeOpts{
    78  		IsCherryPick:        false,
    79  		KeepSchemaConflicts: true,
    80  	}
    81  	return MergeRoots(ctx, ourRoot, theirRoot, ancRoot, mergeCommit, ancCommit, opts, mo)
    82  }
    83  
    84  type Result struct {
    85  	Root            doltdb.RootValue
    86  	SchemaConflicts []SchemaConflict
    87  	Stats           map[string]*MergeStats
    88  }
    89  
    90  func (r Result) HasSchemaConflicts() bool {
    91  	return len(r.SchemaConflicts) > 0
    92  }
    93  
    94  func (r Result) HasMergeArtifacts() bool {
    95  	if r.HasSchemaConflicts() {
    96  		return true
    97  	}
    98  	for _, stats := range r.Stats {
    99  		if stats.HasArtifacts() {
   100  			return true
   101  		}
   102  	}
   103  	return false
   104  }
   105  
   106  // CountOfTablesWithDataConflicts returns the number of tables in this merge result that have
   107  // a data conflict.
   108  func (r Result) CountOfTablesWithDataConflicts() int {
   109  	count := 0
   110  	for _, mergeStats := range r.Stats {
   111  		if mergeStats.HasDataConflicts() {
   112  			count++
   113  		}
   114  	}
   115  	return count
   116  }
   117  
   118  // CountOfTablesWithSchemaConflicts returns the number of tables in this merge result that have
   119  // a schema conflict.
   120  func (r Result) CountOfTablesWithSchemaConflicts() int {
   121  	count := 0
   122  	for _, mergeStats := range r.Stats {
   123  		if mergeStats.HasSchemaConflicts() {
   124  			count++
   125  		}
   126  	}
   127  	return count
   128  }
   129  
   130  // CountOfTablesWithConstraintViolations returns the number of tables in this merge result that have
   131  // a constraint violation.
   132  func (r Result) CountOfTablesWithConstraintViolations() int {
   133  	count := 0
   134  	for _, mergeStats := range r.Stats {
   135  		if mergeStats.HasConstraintViolations() {
   136  			count++
   137  		}
   138  	}
   139  	return count
   140  }
   141  
   142  func SchemaConflictTableNames(sc []SchemaConflict) (tables []string) {
   143  	tables = make([]string, len(sc))
   144  	for i := range sc {
   145  		tables[i] = sc[i].TableName
   146  	}
   147  	return
   148  }
   149  
   150  // MergeRoots three-way merges |ourRoot|, |theirRoot|, and |ancRoot| and returns
   151  // the merged root. If any conflicts or constraint violations are produced they
   152  // are stored in the merged root. If |ourRoot| already contains conflicts they
   153  // are stashed before the merge is performed. We abort the merge if the stash
   154  // contains conflicts and we produce new conflicts. We currently don't have a
   155  // model to merge conflicts together.
   156  //
   157  // Constraint violations that exist in ancestor are stashed and merged with the
   158  // violations we detect when we diff the ancestor and the newly merged root.
   159  //
   160  // |theirRootIsh| is the hash of their's working set or commit. It is used to
   161  // key any artifacts generated by this merge. |ancRootIsh| is similar and is
   162  // used to retrieve the base value for a conflict.
   163  func MergeRoots(
   164  	ctx *sql.Context,
   165  	ourRoot, theirRoot, ancRoot doltdb.RootValue,
   166  	theirs, ancestor doltdb.Rootish,
   167  	opts editor.Options,
   168  	mergeOpts MergeOpts,
   169  ) (*Result, error) {
   170  	var (
   171  		conflictStash  *conflictStash
   172  		violationStash *violationStash
   173  		nbf            *types.NomsBinFormat
   174  		err            error
   175  	)
   176  
   177  	nbf = ourRoot.VRW().Format()
   178  	if !types.IsFormat_DOLT(nbf) {
   179  		ourRoot, conflictStash, err = stashConflicts(ctx, ourRoot)
   180  		if err != nil {
   181  			return nil, err
   182  		}
   183  		ancRoot, violationStash, err = stashViolations(ctx, ancRoot)
   184  		if err != nil {
   185  			return nil, err
   186  		}
   187  	}
   188  
   189  	// merge collations
   190  	oColl, err := ourRoot.GetCollation(ctx)
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  	tColl, err := theirRoot.GetCollation(ctx)
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  	aColl, err := ancRoot.GetCollation(ctx)
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  	mergedRoot := ourRoot
   203  
   204  	// there is a collation change
   205  	if oColl != tColl {
   206  		// both sides changed, and not the same, conflict
   207  		if oColl != aColl && tColl != aColl {
   208  			oCollName := sql.CollationID(oColl).Collation().Name
   209  			tCollName := sql.CollationID(tColl).Collation().Name
   210  			return nil, fmt.Errorf("database collation conflict, please resolve manually. ours: %s, theirs: %s", oCollName, tCollName)
   211  		}
   212  		// only their side changed, take their side
   213  		if oColl == aColl {
   214  			mergedRoot, err = mergedRoot.SetCollation(ctx, tColl)
   215  			if err != nil {
   216  				return nil, err
   217  			}
   218  		}
   219  		// only our side changed, keep our side
   220  	}
   221  
   222  	// Make sure to pass in ourRoot as the first RootValue so that ourRoot's table names will be merged first.
   223  	// This helps to avoid non-deterministic error result for table rename cases. Renaming a table creates two changes:
   224  	// 1. dropping the old name table
   225  	// 2. adding the new name table
   226  	// Dropping the old name table will trigger delete/modify conflict, which is the preferred error case over
   227  	// same column tag used error returned from creating the new name table.
   228  	tblNames, err := doltdb.UnionTableNames(ctx, ourRoot, theirRoot)
   229  
   230  	if err != nil {
   231  		return nil, err
   232  	}
   233  
   234  	tblToStats := make(map[string]*MergeStats)
   235  
   236  	// Merge tables one at a time. This is done based on name. With table names from ourRoot being merged first,
   237  	// renaming a table will return delete/modify conflict error consistently.
   238  	// TODO: merge based on a more durable table identity that persists across renames
   239  	merger, err := NewMerger(ourRoot, theirRoot, ancRoot, theirs, ancestor, ourRoot.VRW(), ourRoot.NodeStore())
   240  	if err != nil {
   241  		return nil, err
   242  	}
   243  
   244  	// visitedTables holds all tables that were added, removed, or modified (basically not "unmodified")
   245  	visitedTables := make(map[string]struct{})
   246  	var schConflicts []SchemaConflict
   247  	for _, tblName := range tblNames {
   248  		mergedTable, stats, err := merger.MergeTable(ctx, tblName, opts, mergeOpts)
   249  		if errors.Is(ErrTableDeletedAndModified, err) && doltdb.IsFullTextTable(tblName) {
   250  			// If a Full-Text table was both modified and deleted, then we want to ignore the deletion.
   251  			// If there's a true conflict, then the parent table will catch the conflict.
   252  			stats = &MergeStats{Operation: TableModified}
   253  		} else if errors.Is(ErrTableDeletedAndSchemaModified, err) {
   254  			tblToStats[tblName] = &MergeStats{
   255  				Operation:       TableModified,
   256  				SchemaConflicts: 1,
   257  			}
   258  			conflict := SchemaConflict{
   259  				TableName:            tblName,
   260  				ModifyDeleteConflict: true,
   261  			}
   262  			if !mergeOpts.KeepSchemaConflicts {
   263  				return nil, conflict
   264  			}
   265  			schConflicts = append(schConflicts, conflict)
   266  			continue
   267  		} else if err != nil {
   268  			return nil, err
   269  		}
   270  		// If this table was visited during the merge, then we'll add it to the set
   271  		if stats.Operation != TableUnmodified {
   272  			visitedTables[tblName] = struct{}{}
   273  		}
   274  		if doltdb.IsFullTextTable(tblName) && (stats.Operation == TableModified || stats.Operation == TableRemoved) {
   275  			// We handle removal and modification later in the rebuilding process, so we'll skip those.
   276  			// We do not handle adding new tables, so we allow that to proceed.
   277  			continue
   278  		}
   279  		if mergedTable.conflict.Count() > 0 {
   280  			if types.IsFormat_DOLT(nbf) {
   281  				schConflicts = append(schConflicts, mergedTable.conflict)
   282  			} else {
   283  				// return schema conflict as error
   284  				return nil, mergedTable.conflict
   285  			}
   286  		}
   287  
   288  		if mergedTable.table != nil {
   289  			tblToStats[tblName] = stats
   290  
   291  			mergedRoot, err = mergedRoot.PutTable(ctx, doltdb.TableName{Name: tblName}, mergedTable.table)
   292  			if err != nil {
   293  				return nil, err
   294  			}
   295  			continue
   296  		}
   297  
   298  		newRootHasTable, err := mergedRoot.HasTable(ctx, tblName)
   299  		if err != nil {
   300  			return nil, err
   301  		}
   302  
   303  		if newRootHasTable {
   304  			// Merge root deleted this table
   305  			tblToStats[tblName] = &MergeStats{Operation: TableRemoved}
   306  
   307  			mergedRoot, err = mergedRoot.RemoveTables(ctx, false, false, tblName)
   308  			if err != nil {
   309  				return nil, err
   310  			}
   311  		} else {
   312  			// This is a deleted table that the merge root still has
   313  			if stats.Operation != TableRemoved {
   314  				panic(fmt.Sprintf("Invalid merge state for table %s. This is a bug.", tblName))
   315  			}
   316  			// Nothing to update, our root already has the table deleted
   317  		}
   318  	}
   319  
   320  	mergedRoot, err = rebuildFullTextIndexes(ctx, mergedRoot, ourRoot, theirRoot, visitedTables)
   321  	if err != nil {
   322  		return nil, err
   323  	}
   324  
   325  	mergedFKColl, conflicts, err := ForeignKeysMerge(ctx, mergedRoot, ourRoot, theirRoot, ancRoot)
   326  	if err != nil {
   327  		return nil, err
   328  	}
   329  	if len(conflicts) > 0 {
   330  		return nil, fmt.Errorf("foreign key conflicts")
   331  	}
   332  
   333  	mergedRoot, err = mergedRoot.PutForeignKeyCollection(ctx, mergedFKColl)
   334  	if err != nil {
   335  		return nil, err
   336  	}
   337  
   338  	mergedRoot, err = mergedRoot.HandlePostMerge(ctx, ourRoot, theirRoot, ancRoot)
   339  	if err != nil {
   340  		return nil, err
   341  	}
   342  
   343  	h, err := merger.rightSrc.HashOf()
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  
   348  	var tableSet *set.StrSet = nil
   349  	if mergeOpts.RecordViolationsForTables != nil {
   350  		tableSet = set.NewCaseInsensitiveStrSet(nil)
   351  		for tableName, _ := range mergeOpts.RecordViolationsForTables {
   352  			tableSet.Add(tableName)
   353  		}
   354  	}
   355  
   356  	mergedRoot, _, err = AddForeignKeyViolations(ctx, mergedRoot, ancRoot, tableSet, h)
   357  	if err != nil {
   358  		return nil, err
   359  	}
   360  
   361  	if types.IsFormat_DOLT(ourRoot.VRW().Format()) {
   362  		err = getConstraintViolationStats(ctx, mergedRoot, tblToStats)
   363  		if err != nil {
   364  			return nil, err
   365  		}
   366  
   367  		return &Result{
   368  			Root:            mergedRoot,
   369  			SchemaConflicts: schConflicts,
   370  			Stats:           tblToStats,
   371  		}, nil
   372  	}
   373  
   374  	mergedRoot, err = mergeCVsWithStash(ctx, mergedRoot, violationStash)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  
   379  	err = getConstraintViolationStats(ctx, mergedRoot, tblToStats)
   380  	if err != nil {
   381  		return nil, err
   382  	}
   383  
   384  	mergedHasConflicts := checkForConflicts(tblToStats)
   385  	if !conflictStash.Empty() && mergedHasConflicts {
   386  		return nil, ErrCantOverwriteConflicts
   387  	} else if !conflictStash.Empty() {
   388  		mergedRoot, err = applyConflictStash(ctx, conflictStash.Stash, mergedRoot)
   389  		if err != nil {
   390  			return nil, err
   391  		}
   392  	}
   393  
   394  	return &Result{
   395  		Root:            mergedRoot,
   396  		SchemaConflicts: schConflicts,
   397  		Stats:           tblToStats,
   398  	}, nil
   399  }
   400  
   401  // mergeCVsWithStash merges the table constraint violations in |stash| with |root|.
   402  // Returns an updated root with all the merged CVs.
   403  func mergeCVsWithStash(ctx context.Context, root doltdb.RootValue, stash *violationStash) (doltdb.RootValue, error) {
   404  	updatedRoot := root
   405  	for name, stashed := range stash.Stash {
   406  		tbl, ok, err := root.GetTable(ctx, doltdb.TableName{Name: name})
   407  		if err != nil {
   408  			return nil, err
   409  		}
   410  		if !ok {
   411  			// the table with the CVs was deleted
   412  			continue
   413  		}
   414  		curr, err := tbl.GetConstraintViolations(ctx)
   415  		if err != nil {
   416  			return nil, err
   417  		}
   418  		unioned, err := types.UnionMaps(ctx, curr, stashed, func(key types.Value, currV types.Value, stashV types.Value) (types.Value, error) {
   419  			if !currV.Equals(stashV) {
   420  				panic(fmt.Sprintf("encountered conflict when merging constraint violations, conflicted key: %v\ncurrent value: %v\nstashed value: %v\n", key, currV, stashV))
   421  			}
   422  			return currV, nil
   423  		})
   424  		if err != nil {
   425  			return nil, err
   426  		}
   427  		tbl, err = tbl.SetConstraintViolations(ctx, unioned)
   428  		if err != nil {
   429  			return nil, err
   430  		}
   431  		updatedRoot, err = root.PutTable(ctx, doltdb.TableName{Name: name}, tbl)
   432  		if err != nil {
   433  			return nil, err
   434  		}
   435  	}
   436  	return updatedRoot, nil
   437  }
   438  
   439  // checks if a conflict occurred during the merge
   440  func checkForConflicts(tblToStats map[string]*MergeStats) bool {
   441  	for _, stat := range tblToStats {
   442  		if stat.HasConflicts() {
   443  			return true
   444  		}
   445  	}
   446  	return false
   447  }
   448  
   449  // populates tblToStats with violation statistics
   450  func getConstraintViolationStats(ctx context.Context, root doltdb.RootValue, tblToStats map[string]*MergeStats) error {
   451  	for tblName, stats := range tblToStats {
   452  		tbl, ok, err := root.GetTable(ctx, doltdb.TableName{Name: tblName})
   453  		if err != nil {
   454  			return err
   455  		}
   456  		if ok {
   457  			n, err := tbl.NumConstraintViolations(ctx)
   458  			if err != nil {
   459  				return err
   460  			}
   461  			stats.ConstraintViolations = int(n)
   462  		}
   463  	}
   464  	return nil
   465  }
   466  
   467  // MayHaveConstraintViolations returns whether the given roots may have constraint violations. For example, a fast
   468  // forward merge that does not involve any tables with foreign key constraints or check constraints will not be able
   469  // to generate constraint violations. Unique key constraint violations would be caught during the generation of the
   470  // merged root, therefore it is not a factor for this function.
   471  func MayHaveConstraintViolations(ctx context.Context, ancestor, merged doltdb.RootValue) (bool, error) {
   472  	ancTables, err := doltdb.MapTableHashes(ctx, ancestor)
   473  	if err != nil {
   474  		return false, err
   475  	}
   476  	mergedTables, err := doltdb.MapTableHashes(ctx, merged)
   477  	if err != nil {
   478  		return false, err
   479  	}
   480  	fkColl, err := merged.GetForeignKeyCollection(ctx)
   481  	if err != nil {
   482  		return false, err
   483  	}
   484  	tablesInFks := fkColl.Tables()
   485  	for tblName := range tablesInFks {
   486  		if ancHash, ok := ancTables[tblName]; !ok {
   487  			// If a table used in a foreign key is new then it's treated as a change
   488  			return true, nil
   489  		} else if mergedHash, ok := mergedTables[tblName]; !ok {
   490  			return false, fmt.Errorf("foreign key uses table '%s' but no hash can be found for this table", tblName)
   491  		} else if !ancHash.Equal(mergedHash) {
   492  			return true, nil
   493  		}
   494  	}
   495  	return false, nil
   496  }
   497  
   498  type ArtifactStatus struct {
   499  	SchemaConflictsTables      []string
   500  	DataConflictTables         []string
   501  	ConstraintViolationsTables []string
   502  }
   503  
   504  func (as ArtifactStatus) HasConflicts() bool {
   505  	return len(as.DataConflictTables) > 0 || len(as.SchemaConflictsTables) > 0
   506  }
   507  
   508  func (as ArtifactStatus) HasConstraintViolations() bool {
   509  	return len(as.ConstraintViolationsTables) > 0
   510  }
   511  
   512  func GetMergeArtifactStatus(ctx context.Context, working *doltdb.WorkingSet) (as ArtifactStatus, err error) {
   513  	if working.MergeActive() {
   514  		as.SchemaConflictsTables = working.MergeState().TablesWithSchemaConflicts()
   515  	}
   516  
   517  	as.DataConflictTables, err = doltdb.TablesWithDataConflicts(ctx, working.WorkingRoot())
   518  	if err != nil {
   519  		return as, err
   520  	}
   521  
   522  	as.ConstraintViolationsTables, err = doltdb.TablesWithConstraintViolations(ctx, working.WorkingRoot())
   523  	if err != nil {
   524  		return as, err
   525  	}
   526  	return
   527  }
   528  
   529  // MergeWouldStompChanges returns list of table names that are stomped and the diffs map between head and working set.
   530  func MergeWouldStompChanges(ctx context.Context, roots doltdb.Roots, mergeCommit *doltdb.Commit) ([]string, map[string]hash.Hash, error) {
   531  	mergeRoot, err := mergeCommit.GetRootValue(ctx)
   532  	if err != nil {
   533  		return nil, nil, err
   534  	}
   535  
   536  	headTableHashes, err := doltdb.MapTableHashes(ctx, roots.Head)
   537  	if err != nil {
   538  		return nil, nil, err
   539  	}
   540  
   541  	workingTableHashes, err := doltdb.MapTableHashes(ctx, roots.Working)
   542  	if err != nil {
   543  		return nil, nil, err
   544  	}
   545  
   546  	mergeTableHashes, err := doltdb.MapTableHashes(ctx, mergeRoot)
   547  	if err != nil {
   548  		return nil, nil, err
   549  	}
   550  
   551  	headWorkingDiffs := diffTableHashes(headTableHashes, workingTableHashes)
   552  	mergedHeadDiffs := diffTableHashes(headTableHashes, mergeTableHashes)
   553  
   554  	stompedTables := make([]string, 0, len(headWorkingDiffs))
   555  	for tName, _ := range headWorkingDiffs {
   556  		if _, ok := mergedHeadDiffs[tName]; ok {
   557  			// even if the working changes match the merge changes, don't allow (matches git behavior).
   558  			stompedTables = append(stompedTables, tName)
   559  		}
   560  	}
   561  
   562  	return stompedTables, headWorkingDiffs, nil
   563  }
   564  
   565  func diffTableHashes(headTableHashes, otherTableHashes map[string]hash.Hash) map[string]hash.Hash {
   566  	diffs := make(map[string]hash.Hash)
   567  	for tName, hh := range headTableHashes {
   568  		if h, ok := otherTableHashes[tName]; ok {
   569  			if h != hh {
   570  				// modification
   571  				diffs[tName] = h
   572  			}
   573  		} else {
   574  			// deletion
   575  			diffs[tName] = hash.Hash{}
   576  		}
   577  	}
   578  
   579  	for tName, h := range otherTableHashes {
   580  		if _, ok := headTableHashes[tName]; !ok {
   581  			// addition
   582  			diffs[tName] = h
   583  		}
   584  	}
   585  
   586  	return diffs
   587  }