github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/merge/merge_rows.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "context" 19 "strings" 20 21 "github.com/dolthub/go-mysql-server/sql" 22 23 "github.com/dolthub/dolt/go/libraries/doltcore/conflict" 24 "github.com/dolthub/dolt/go/libraries/doltcore/diff" 25 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 27 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 28 "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" 29 "github.com/dolthub/dolt/go/store/atomicerr" 30 "github.com/dolthub/dolt/go/store/hash" 31 "github.com/dolthub/dolt/go/store/prolly/tree" 32 "github.com/dolthub/dolt/go/store/types" 33 ) 34 35 type MergeOpts struct { 36 // IsCherryPick is set for cherry-pick operations. 37 IsCherryPick bool 38 // KeepSchemaConflicts is set when schema conflicts should be stored, 39 // otherwise the merge errors out when schema conflicts are detected. 40 KeepSchemaConflicts bool 41 // ReverifyAllConstraints is set to indicate that a merge should not rely on existing 42 // constraint violation artifacts and should instead ensure that all constraints are 43 // verified. When this option is not set, merge will use optimizations to short circuit 44 // some calculations that aren't needed for merge correctness, but are still needed to 45 // correctly verify all constraints. 46 ReverifyAllConstraints bool 47 // RecordViolationsForTables is an optional map that allows the caller to control which 48 // tables will have constraint violations recorded as artifacts in the merged tables. When 49 // this field is nil or an empty map, constraint violations will be recorded for all tables, 50 // but if the map is populated with any (case-insensitive) table names, then only those tables 51 // will have constraint violations recorded. This functionality is primarily used by the 52 // dolt_verify_constraints() stored procedure to allow callers to verify constraints for a 53 // subset of tables. 54 RecordViolationsForTables map[string]struct{} 55 } 56 57 type TableMerger struct { 58 name string 59 60 leftTbl *doltdb.Table 61 rightTbl *doltdb.Table 62 ancTbl *doltdb.Table 63 64 leftSch schema.Schema 65 rightSch schema.Schema 66 ancSch schema.Schema 67 68 rightSrc doltdb.Rootish 69 ancestorSrc doltdb.Rootish 70 71 vrw types.ValueReadWriter 72 ns tree.NodeStore 73 74 // recordViolations controls whether constraint violations should be recorded as table 75 // artifacts when merging this table. In almost all cases, this should be set to true. The 76 // exception is for the dolt_verify_constraints() stored procedure, which allows callers to 77 // only record constraint violations for a specified subset of tables. 78 recordViolations bool 79 } 80 81 func (tm TableMerger) tableHashes() (left, right, anc hash.Hash, err error) { 82 if tm.leftTbl != nil { 83 if left, err = tm.leftTbl.HashOf(); err != nil { 84 return 85 } 86 } 87 if tm.rightTbl != nil { 88 if right, err = tm.rightTbl.HashOf(); err != nil { 89 return 90 } 91 } 92 if tm.ancTbl != nil { 93 if anc, err = tm.ancTbl.HashOf(); err != nil { 94 return 95 } 96 } 97 return 98 } 99 100 type RootMerger struct { 101 left doltdb.RootValue 102 right doltdb.RootValue 103 anc doltdb.RootValue 104 105 rightSrc doltdb.Rootish 106 ancSrc doltdb.Rootish 107 108 vrw types.ValueReadWriter 109 ns tree.NodeStore 110 } 111 112 // NewMerger creates a new merger utility object. 113 func NewMerger( 114 left, right, anc doltdb.RootValue, 115 rightSrc, ancestorSrc doltdb.Rootish, 116 vrw types.ValueReadWriter, 117 ns tree.NodeStore, 118 ) (*RootMerger, error) { 119 return &RootMerger{ 120 left: left, 121 right: right, 122 anc: anc, 123 rightSrc: rightSrc, 124 ancSrc: ancestorSrc, 125 vrw: vrw, 126 ns: ns, 127 }, nil 128 } 129 130 type MergedTable struct { 131 table *doltdb.Table 132 conflict SchemaConflict 133 } 134 135 // MergeTable merges schema and table data for the table tblName. 136 // TODO: this code will loop infinitely when merging certain schema changes 137 func (rm *RootMerger) MergeTable(ctx *sql.Context, tblName string, opts editor.Options, mergeOpts MergeOpts) (*MergedTable, *MergeStats, error) { 138 tm, err := rm.makeTableMerger(ctx, tblName, mergeOpts) 139 if err != nil { 140 return nil, nil, err 141 } 142 143 // short-circuit here if we can 144 finished, stats, err := rm.maybeShortCircuit(ctx, tm, mergeOpts) 145 if finished != nil || stats != nil || err != nil { 146 return &MergedTable{table: finished}, stats, err 147 } 148 149 // Calculate a merge of the schemas, but don't apply it yet 150 mergeSch, schConflicts, mergeInfo, diffInfo, err := SchemaMerge(ctx, tm.vrw.Format(), tm.leftSch, tm.rightSch, tm.ancSch, tblName) 151 if err != nil { 152 return nil, nil, err 153 } 154 if schConflicts.Count() > 0 { 155 if !mergeOpts.KeepSchemaConflicts { 156 return nil, nil, schConflicts 157 } 158 // handle schema conflicts above 159 mt := &MergedTable{ 160 table: tm.leftTbl, 161 conflict: schConflicts, 162 } 163 stats = &MergeStats{ 164 Operation: TableModified, 165 SchemaConflicts: schConflicts.Count(), 166 } 167 return mt, stats, nil 168 } 169 170 var tbl *doltdb.Table 171 if types.IsFormat_DOLT(tm.vrw.Format()) { 172 tbl, stats, err = mergeProllyTable(ctx, tm, mergeSch, mergeInfo, diffInfo) 173 } else { 174 tbl, stats, err = mergeNomsTable(ctx, tm, mergeSch, rm.vrw, opts) 175 } 176 if err != nil { 177 return nil, nil, err 178 } 179 return &MergedTable{table: tbl}, stats, nil 180 } 181 182 func (rm *RootMerger) makeTableMerger(ctx context.Context, tblName string, mergeOpts MergeOpts) (*TableMerger, error) { 183 recordViolations := true 184 if mergeOpts.RecordViolationsForTables != nil { 185 if _, ok := mergeOpts.RecordViolationsForTables[strings.ToLower(tblName)]; !ok { 186 recordViolations = false 187 } 188 } 189 190 tm := TableMerger{ 191 name: tblName, 192 rightSrc: rm.rightSrc, 193 ancestorSrc: rm.ancSrc, 194 vrw: rm.vrw, 195 ns: rm.ns, 196 recordViolations: recordViolations, 197 } 198 199 var err error 200 var leftSideTableExists, rightSideTableExists, ancTableExists bool 201 202 tm.leftTbl, leftSideTableExists, err = rm.left.GetTable(ctx, doltdb.TableName{Name: tblName}) 203 if err != nil { 204 return nil, err 205 } 206 if leftSideTableExists { 207 if tm.leftSch, err = tm.leftTbl.GetSchema(ctx); err != nil { 208 return nil, err 209 } 210 } 211 212 tm.rightTbl, rightSideTableExists, err = rm.right.GetTable(ctx, doltdb.TableName{Name: tblName}) 213 if err != nil { 214 return nil, err 215 } 216 if rightSideTableExists { 217 if tm.rightSch, err = tm.rightTbl.GetSchema(ctx); err != nil { 218 return nil, err 219 } 220 } 221 222 // If we need to re-verify all constraints, then we need to stub out tables 223 // that don't exist, so that the diff logic can compare an empty table to 224 // the table containing the real data. This is required by dolt_verify_constraints() 225 // so that we can run the merge logic on all rows in all tables. 226 if mergeOpts.ReverifyAllConstraints { 227 if !leftSideTableExists && rightSideTableExists { 228 // if left side doesn't have the table... stub it out with an empty table from the right side... 229 tm.leftSch = tm.rightSch 230 tm.leftTbl, err = doltdb.NewEmptyTable(ctx, rm.vrw, rm.ns, tm.leftSch) 231 if err != nil { 232 return nil, err 233 } 234 } else if !rightSideTableExists && leftSideTableExists { 235 // if left side doesn't have the table... stub it out with an empty table from the right side... 236 tm.rightSch = tm.leftSch 237 tm.rightTbl, err = doltdb.NewEmptyTable(ctx, rm.vrw, rm.ns, tm.rightSch) 238 if err != nil { 239 return nil, err 240 } 241 } 242 } 243 244 tm.ancTbl, ancTableExists, err = rm.anc.GetTable(ctx, doltdb.TableName{Name: tblName}) 245 if err != nil { 246 return nil, err 247 } 248 if ancTableExists { 249 if tm.ancSch, err = tm.ancTbl.GetSchema(ctx); err != nil { 250 return nil, err 251 } 252 } else if schema.SchemasAreEqual(tm.leftSch, tm.rightSch) && tm.leftTbl != nil { 253 // If left & right added the same table, fill tm.anc with an empty table 254 tm.ancSch = tm.leftSch 255 tm.ancTbl, err = doltdb.NewEmptyTable(ctx, rm.vrw, rm.ns, tm.ancSch) 256 if err != nil { 257 return nil, err 258 } 259 } 260 261 return &tm, nil 262 } 263 264 func (rm *RootMerger) maybeShortCircuit(ctx context.Context, tm *TableMerger, opts MergeOpts) (*doltdb.Table, *MergeStats, error) { 265 // If we need to re-verify all constraints as part of this merge, then we can't short 266 // circuit considering any tables, so return immediately 267 if opts.ReverifyAllConstraints { 268 return nil, nil, nil 269 } 270 271 rootHash, mergeHash, ancHash, err := tm.tableHashes() 272 if err != nil { 273 return nil, nil, err 274 } 275 276 leftExists := tm.leftTbl != nil 277 rightExists := tm.rightTbl != nil 278 ancExists := tm.ancTbl != nil 279 280 // Nothing changed 281 if leftExists && rightExists && ancExists && rootHash == mergeHash && rootHash == ancHash { 282 return tm.leftTbl, &MergeStats{Operation: TableUnmodified}, nil 283 } 284 285 // Both made identical changes 286 // For keyless tables, this counts as a conflict 287 if leftExists && rightExists && rootHash == mergeHash && !schema.IsKeyless(tm.leftSch) { 288 return tm.leftTbl, &MergeStats{Operation: TableUnmodified}, nil 289 } 290 291 // One or both added this table 292 if !ancExists { 293 if rightExists && leftExists { 294 if !schema.SchemasAreEqual(tm.leftSch, tm.rightSch) { 295 return nil, nil, ErrSameTblAddedTwice.New(tm.name) 296 } 297 } else if leftExists { 298 // fast-forward 299 return tm.leftTbl, &MergeStats{Operation: TableUnmodified}, nil 300 } else { 301 // fast-forward 302 return tm.rightTbl, &MergeStats{Operation: TableAdded}, nil 303 } 304 } 305 306 // Deleted in both, fast-forward 307 if ancExists && !leftExists && !rightExists { 308 return nil, &MergeStats{Operation: TableRemoved}, nil 309 } 310 311 // Deleted in root or in merge, either a conflict (if any changes in other root) or else a fast-forward 312 if ancExists && (!leftExists || !rightExists) { 313 var childTable *doltdb.Table 314 var childHash hash.Hash 315 if rightExists { 316 childTable = tm.rightTbl 317 childHash = mergeHash 318 } else { 319 childTable = tm.leftTbl 320 childHash = rootHash 321 } 322 if childHash != ancHash { 323 schemasEqual, err := doltdb.SchemaHashesEqual(ctx, childTable, tm.ancTbl) 324 if err != nil { 325 return nil, nil, err 326 } 327 if schemasEqual { 328 return nil, nil, ErrTableDeletedAndModified 329 } else { 330 return nil, nil, ErrTableDeletedAndSchemaModified 331 } 332 } 333 // fast-forward 334 return nil, &MergeStats{Operation: TableRemoved}, nil 335 } 336 337 // Changes only in root, table unmodified 338 if mergeHash == ancHash { 339 return tm.leftTbl, &MergeStats{Operation: TableUnmodified}, nil 340 } 341 342 // Changes only in merge root, fast-forward 343 // TODO : no fast-forward when cherry-picking for now 344 if !opts.IsCherryPick && rootHash == ancHash { 345 ms := MergeStats{Operation: TableModified} 346 if rootHash != mergeHash { 347 ms, err = calcTableMergeStats(ctx, tm.leftTbl, tm.rightTbl) 348 if err != nil { 349 return nil, nil, err 350 } 351 } 352 return tm.rightTbl, &ms, nil 353 } 354 355 // no short-circuit 356 return nil, nil, nil 357 } 358 359 func setConflicts(ctx context.Context, cons durable.ConflictIndex, tbl, mergeTbl, ancTbl, tableToUpdate *doltdb.Table) (*doltdb.Table, error) { 360 ancSch, err := ancTbl.GetSchema(ctx) 361 if err != nil { 362 return nil, err 363 } 364 365 sch, err := tbl.GetSchema(ctx) 366 if err != nil { 367 return nil, err 368 } 369 370 mergeSch, err := mergeTbl.GetSchema(ctx) 371 if err != nil { 372 return nil, err 373 } 374 375 cs := conflict.NewConflictSchema(ancSch, sch, mergeSch) 376 377 tableToUpdate, err = tableToUpdate.SetConflicts(ctx, cs, cons) 378 if err != nil { 379 return nil, err 380 } 381 382 return tableToUpdate, nil 383 } 384 385 func calcTableMergeStats(ctx context.Context, tbl *doltdb.Table, mergeTbl *doltdb.Table) (MergeStats, error) { 386 ms := MergeStats{Operation: TableModified} 387 388 rows, err := tbl.GetRowData(ctx) 389 if err != nil { 390 return MergeStats{}, err 391 } 392 393 mergeRows, err := mergeTbl.GetRowData(ctx) 394 if err != nil { 395 return MergeStats{}, err 396 } 397 398 sch, err := tbl.GetSchema(ctx) 399 if err != nil { 400 return MergeStats{}, err 401 } 402 403 mergeSch, err := mergeTbl.GetSchema(ctx) 404 if err != nil { 405 return MergeStats{}, err 406 } 407 408 ae := atomicerr.New() 409 ch := make(chan diff.DiffStatProgress) 410 go func() { 411 defer close(ch) 412 err := diff.Stat(ctx, ch, rows, mergeRows, sch, mergeSch) 413 414 ae.SetIfError(err) 415 }() 416 417 for p := range ch { 418 if ae.IsSet() { 419 break 420 } 421 422 ms.Adds += int(p.Adds) 423 ms.Deletes += int(p.Removes) 424 ms.Modifications += int(p.Changes) 425 } 426 427 if err := ae.Get(); err != nil { 428 return MergeStats{}, err 429 } 430 431 return ms, nil 432 }