github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/merge/merge_rows.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package merge
    16  
    17  import (
    18  	"context"
    19  	"strings"
    20  
    21  	"github.com/dolthub/go-mysql-server/sql"
    22  
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/conflict"
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/diff"
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
    29  	"github.com/dolthub/dolt/go/store/atomicerr"
    30  	"github.com/dolthub/dolt/go/store/hash"
    31  	"github.com/dolthub/dolt/go/store/prolly/tree"
    32  	"github.com/dolthub/dolt/go/store/types"
    33  )
    34  
    35  type MergeOpts struct {
    36  	// IsCherryPick is set for cherry-pick operations.
    37  	IsCherryPick bool
    38  	// KeepSchemaConflicts is set when schema conflicts should be stored,
    39  	// otherwise the merge errors out when schema conflicts are detected.
    40  	KeepSchemaConflicts bool
    41  	// ReverifyAllConstraints is set to indicate that a merge should not rely on existing
    42  	// constraint violation artifacts and should instead ensure that all constraints are
    43  	// verified. When this option is not set, merge will use optimizations to short circuit
    44  	// some calculations that aren't needed for merge correctness, but are still needed to
    45  	// correctly verify all constraints.
    46  	ReverifyAllConstraints bool
    47  	// RecordViolationsForTables is an optional map that allows the caller to control which
    48  	// tables will have constraint violations recorded as artifacts in the merged tables. When
    49  	// this field is nil or an empty map, constraint violations will be recorded for all tables,
    50  	// but if the map is populated with any (case-insensitive) table names, then only those tables
    51  	// will have constraint violations recorded. This functionality is primarily used by the
    52  	// dolt_verify_constraints() stored procedure to allow callers to verify constraints for a
    53  	// subset of tables.
    54  	RecordViolationsForTables map[string]struct{}
    55  }
    56  
    57  type TableMerger struct {
    58  	name string
    59  
    60  	leftTbl  *doltdb.Table
    61  	rightTbl *doltdb.Table
    62  	ancTbl   *doltdb.Table
    63  
    64  	leftSch  schema.Schema
    65  	rightSch schema.Schema
    66  	ancSch   schema.Schema
    67  
    68  	rightSrc    doltdb.Rootish
    69  	ancestorSrc doltdb.Rootish
    70  
    71  	vrw types.ValueReadWriter
    72  	ns  tree.NodeStore
    73  
    74  	// recordViolations controls whether constraint violations should be recorded as table
    75  	// artifacts when merging this table. In almost all cases, this should be set to true. The
    76  	// exception is for the dolt_verify_constraints() stored procedure, which allows callers to
    77  	// only record constraint violations for a specified subset of tables.
    78  	recordViolations bool
    79  }
    80  
    81  func (tm TableMerger) tableHashes() (left, right, anc hash.Hash, err error) {
    82  	if tm.leftTbl != nil {
    83  		if left, err = tm.leftTbl.HashOf(); err != nil {
    84  			return
    85  		}
    86  	}
    87  	if tm.rightTbl != nil {
    88  		if right, err = tm.rightTbl.HashOf(); err != nil {
    89  			return
    90  		}
    91  	}
    92  	if tm.ancTbl != nil {
    93  		if anc, err = tm.ancTbl.HashOf(); err != nil {
    94  			return
    95  		}
    96  	}
    97  	return
    98  }
    99  
   100  type RootMerger struct {
   101  	left  doltdb.RootValue
   102  	right doltdb.RootValue
   103  	anc   doltdb.RootValue
   104  
   105  	rightSrc doltdb.Rootish
   106  	ancSrc   doltdb.Rootish
   107  
   108  	vrw types.ValueReadWriter
   109  	ns  tree.NodeStore
   110  }
   111  
   112  // NewMerger creates a new merger utility object.
   113  func NewMerger(
   114  	left, right, anc doltdb.RootValue,
   115  	rightSrc, ancestorSrc doltdb.Rootish,
   116  	vrw types.ValueReadWriter,
   117  	ns tree.NodeStore,
   118  ) (*RootMerger, error) {
   119  	return &RootMerger{
   120  		left:     left,
   121  		right:    right,
   122  		anc:      anc,
   123  		rightSrc: rightSrc,
   124  		ancSrc:   ancestorSrc,
   125  		vrw:      vrw,
   126  		ns:       ns,
   127  	}, nil
   128  }
   129  
   130  type MergedTable struct {
   131  	table    *doltdb.Table
   132  	conflict SchemaConflict
   133  }
   134  
   135  // MergeTable merges schema and table data for the table tblName.
   136  // TODO: this code will loop infinitely when merging certain schema changes
   137  func (rm *RootMerger) MergeTable(ctx *sql.Context, tblName string, opts editor.Options, mergeOpts MergeOpts) (*MergedTable, *MergeStats, error) {
   138  	tm, err := rm.makeTableMerger(ctx, tblName, mergeOpts)
   139  	if err != nil {
   140  		return nil, nil, err
   141  	}
   142  
   143  	// short-circuit here if we can
   144  	finished, stats, err := rm.maybeShortCircuit(ctx, tm, mergeOpts)
   145  	if finished != nil || stats != nil || err != nil {
   146  		return &MergedTable{table: finished}, stats, err
   147  	}
   148  
   149  	// Calculate a merge of the schemas, but don't apply it yet
   150  	mergeSch, schConflicts, mergeInfo, diffInfo, err := SchemaMerge(ctx, tm.vrw.Format(), tm.leftSch, tm.rightSch, tm.ancSch, tblName)
   151  	if err != nil {
   152  		return nil, nil, err
   153  	}
   154  	if schConflicts.Count() > 0 {
   155  		if !mergeOpts.KeepSchemaConflicts {
   156  			return nil, nil, schConflicts
   157  		}
   158  		// handle schema conflicts above
   159  		mt := &MergedTable{
   160  			table:    tm.leftTbl,
   161  			conflict: schConflicts,
   162  		}
   163  		stats = &MergeStats{
   164  			Operation:       TableModified,
   165  			SchemaConflicts: schConflicts.Count(),
   166  		}
   167  		return mt, stats, nil
   168  	}
   169  
   170  	var tbl *doltdb.Table
   171  	if types.IsFormat_DOLT(tm.vrw.Format()) {
   172  		tbl, stats, err = mergeProllyTable(ctx, tm, mergeSch, mergeInfo, diffInfo)
   173  	} else {
   174  		tbl, stats, err = mergeNomsTable(ctx, tm, mergeSch, rm.vrw, opts)
   175  	}
   176  	if err != nil {
   177  		return nil, nil, err
   178  	}
   179  	return &MergedTable{table: tbl}, stats, nil
   180  }
   181  
   182  func (rm *RootMerger) makeTableMerger(ctx context.Context, tblName string, mergeOpts MergeOpts) (*TableMerger, error) {
   183  	recordViolations := true
   184  	if mergeOpts.RecordViolationsForTables != nil {
   185  		if _, ok := mergeOpts.RecordViolationsForTables[strings.ToLower(tblName)]; !ok {
   186  			recordViolations = false
   187  		}
   188  	}
   189  
   190  	tm := TableMerger{
   191  		name:             tblName,
   192  		rightSrc:         rm.rightSrc,
   193  		ancestorSrc:      rm.ancSrc,
   194  		vrw:              rm.vrw,
   195  		ns:               rm.ns,
   196  		recordViolations: recordViolations,
   197  	}
   198  
   199  	var err error
   200  	var leftSideTableExists, rightSideTableExists, ancTableExists bool
   201  
   202  	tm.leftTbl, leftSideTableExists, err = rm.left.GetTable(ctx, doltdb.TableName{Name: tblName})
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  	if leftSideTableExists {
   207  		if tm.leftSch, err = tm.leftTbl.GetSchema(ctx); err != nil {
   208  			return nil, err
   209  		}
   210  	}
   211  
   212  	tm.rightTbl, rightSideTableExists, err = rm.right.GetTable(ctx, doltdb.TableName{Name: tblName})
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  	if rightSideTableExists {
   217  		if tm.rightSch, err = tm.rightTbl.GetSchema(ctx); err != nil {
   218  			return nil, err
   219  		}
   220  	}
   221  
   222  	// If we need to re-verify all constraints, then we need to stub out tables
   223  	// that don't exist, so that the diff logic can compare an empty table to
   224  	// the table containing the real data. This is required by dolt_verify_constraints()
   225  	// so that we can run the merge logic on all rows in all tables.
   226  	if mergeOpts.ReverifyAllConstraints {
   227  		if !leftSideTableExists && rightSideTableExists {
   228  			// if left side doesn't have the table... stub it out with an empty table from the right side...
   229  			tm.leftSch = tm.rightSch
   230  			tm.leftTbl, err = doltdb.NewEmptyTable(ctx, rm.vrw, rm.ns, tm.leftSch)
   231  			if err != nil {
   232  				return nil, err
   233  			}
   234  		} else if !rightSideTableExists && leftSideTableExists {
   235  			// if left side doesn't have the table... stub it out with an empty table from the right side...
   236  			tm.rightSch = tm.leftSch
   237  			tm.rightTbl, err = doltdb.NewEmptyTable(ctx, rm.vrw, rm.ns, tm.rightSch)
   238  			if err != nil {
   239  				return nil, err
   240  			}
   241  		}
   242  	}
   243  
   244  	tm.ancTbl, ancTableExists, err = rm.anc.GetTable(ctx, doltdb.TableName{Name: tblName})
   245  	if err != nil {
   246  		return nil, err
   247  	}
   248  	if ancTableExists {
   249  		if tm.ancSch, err = tm.ancTbl.GetSchema(ctx); err != nil {
   250  			return nil, err
   251  		}
   252  	} else if schema.SchemasAreEqual(tm.leftSch, tm.rightSch) && tm.leftTbl != nil {
   253  		// If left & right added the same table, fill tm.anc with an empty table
   254  		tm.ancSch = tm.leftSch
   255  		tm.ancTbl, err = doltdb.NewEmptyTable(ctx, rm.vrw, rm.ns, tm.ancSch)
   256  		if err != nil {
   257  			return nil, err
   258  		}
   259  	}
   260  
   261  	return &tm, nil
   262  }
   263  
   264  func (rm *RootMerger) maybeShortCircuit(ctx context.Context, tm *TableMerger, opts MergeOpts) (*doltdb.Table, *MergeStats, error) {
   265  	// If we need to re-verify all constraints as part of this merge, then we can't short
   266  	// circuit considering any tables, so return immediately
   267  	if opts.ReverifyAllConstraints {
   268  		return nil, nil, nil
   269  	}
   270  
   271  	rootHash, mergeHash, ancHash, err := tm.tableHashes()
   272  	if err != nil {
   273  		return nil, nil, err
   274  	}
   275  
   276  	leftExists := tm.leftTbl != nil
   277  	rightExists := tm.rightTbl != nil
   278  	ancExists := tm.ancTbl != nil
   279  
   280  	// Nothing changed
   281  	if leftExists && rightExists && ancExists && rootHash == mergeHash && rootHash == ancHash {
   282  		return tm.leftTbl, &MergeStats{Operation: TableUnmodified}, nil
   283  	}
   284  
   285  	// Both made identical changes
   286  	// For keyless tables, this counts as a conflict
   287  	if leftExists && rightExists && rootHash == mergeHash && !schema.IsKeyless(tm.leftSch) {
   288  		return tm.leftTbl, &MergeStats{Operation: TableUnmodified}, nil
   289  	}
   290  
   291  	// One or both added this table
   292  	if !ancExists {
   293  		if rightExists && leftExists {
   294  			if !schema.SchemasAreEqual(tm.leftSch, tm.rightSch) {
   295  				return nil, nil, ErrSameTblAddedTwice.New(tm.name)
   296  			}
   297  		} else if leftExists {
   298  			// fast-forward
   299  			return tm.leftTbl, &MergeStats{Operation: TableUnmodified}, nil
   300  		} else {
   301  			// fast-forward
   302  			return tm.rightTbl, &MergeStats{Operation: TableAdded}, nil
   303  		}
   304  	}
   305  
   306  	// Deleted in both, fast-forward
   307  	if ancExists && !leftExists && !rightExists {
   308  		return nil, &MergeStats{Operation: TableRemoved}, nil
   309  	}
   310  
   311  	// Deleted in root or in merge, either a conflict (if any changes in other root) or else a fast-forward
   312  	if ancExists && (!leftExists || !rightExists) {
   313  		var childTable *doltdb.Table
   314  		var childHash hash.Hash
   315  		if rightExists {
   316  			childTable = tm.rightTbl
   317  			childHash = mergeHash
   318  		} else {
   319  			childTable = tm.leftTbl
   320  			childHash = rootHash
   321  		}
   322  		if childHash != ancHash {
   323  			schemasEqual, err := doltdb.SchemaHashesEqual(ctx, childTable, tm.ancTbl)
   324  			if err != nil {
   325  				return nil, nil, err
   326  			}
   327  			if schemasEqual {
   328  				return nil, nil, ErrTableDeletedAndModified
   329  			} else {
   330  				return nil, nil, ErrTableDeletedAndSchemaModified
   331  			}
   332  		}
   333  		// fast-forward
   334  		return nil, &MergeStats{Operation: TableRemoved}, nil
   335  	}
   336  
   337  	// Changes only in root, table unmodified
   338  	if mergeHash == ancHash {
   339  		return tm.leftTbl, &MergeStats{Operation: TableUnmodified}, nil
   340  	}
   341  
   342  	// Changes only in merge root, fast-forward
   343  	// TODO : no fast-forward when cherry-picking for now
   344  	if !opts.IsCherryPick && rootHash == ancHash {
   345  		ms := MergeStats{Operation: TableModified}
   346  		if rootHash != mergeHash {
   347  			ms, err = calcTableMergeStats(ctx, tm.leftTbl, tm.rightTbl)
   348  			if err != nil {
   349  				return nil, nil, err
   350  			}
   351  		}
   352  		return tm.rightTbl, &ms, nil
   353  	}
   354  
   355  	// no short-circuit
   356  	return nil, nil, nil
   357  }
   358  
   359  func setConflicts(ctx context.Context, cons durable.ConflictIndex, tbl, mergeTbl, ancTbl, tableToUpdate *doltdb.Table) (*doltdb.Table, error) {
   360  	ancSch, err := ancTbl.GetSchema(ctx)
   361  	if err != nil {
   362  		return nil, err
   363  	}
   364  
   365  	sch, err := tbl.GetSchema(ctx)
   366  	if err != nil {
   367  		return nil, err
   368  	}
   369  
   370  	mergeSch, err := mergeTbl.GetSchema(ctx)
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  
   375  	cs := conflict.NewConflictSchema(ancSch, sch, mergeSch)
   376  
   377  	tableToUpdate, err = tableToUpdate.SetConflicts(ctx, cs, cons)
   378  	if err != nil {
   379  		return nil, err
   380  	}
   381  
   382  	return tableToUpdate, nil
   383  }
   384  
   385  func calcTableMergeStats(ctx context.Context, tbl *doltdb.Table, mergeTbl *doltdb.Table) (MergeStats, error) {
   386  	ms := MergeStats{Operation: TableModified}
   387  
   388  	rows, err := tbl.GetRowData(ctx)
   389  	if err != nil {
   390  		return MergeStats{}, err
   391  	}
   392  
   393  	mergeRows, err := mergeTbl.GetRowData(ctx)
   394  	if err != nil {
   395  		return MergeStats{}, err
   396  	}
   397  
   398  	sch, err := tbl.GetSchema(ctx)
   399  	if err != nil {
   400  		return MergeStats{}, err
   401  	}
   402  
   403  	mergeSch, err := mergeTbl.GetSchema(ctx)
   404  	if err != nil {
   405  		return MergeStats{}, err
   406  	}
   407  
   408  	ae := atomicerr.New()
   409  	ch := make(chan diff.DiffStatProgress)
   410  	go func() {
   411  		defer close(ch)
   412  		err := diff.Stat(ctx, ch, rows, mergeRows, sch, mergeSch)
   413  
   414  		ae.SetIfError(err)
   415  	}()
   416  
   417  	for p := range ch {
   418  		if ae.IsSet() {
   419  			break
   420  		}
   421  
   422  		ms.Adds += int(p.Adds)
   423  		ms.Deletes += int(p.Removes)
   424  		ms.Modifications += int(p.Changes)
   425  	}
   426  
   427  	if err := ae.Get(); err != nil {
   428  		return MergeStats{}, err
   429  	}
   430  
   431  	return ms, nil
   432  }