github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/merge/merge_prolly_rows.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "bytes" 19 "context" 20 "encoding/json" 21 "errors" 22 "fmt" 23 "io" 24 25 "github.com/dolthub/go-mysql-server/sql" 26 "github.com/dolthub/go-mysql-server/sql/expression" 27 "github.com/dolthub/go-mysql-server/sql/transform" 28 "github.com/dolthub/go-mysql-server/sql/types" 29 "golang.org/x/exp/maps" 30 errorkinds "gopkg.in/src-d/go-errors.v1" 31 32 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 33 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 34 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 35 "github.com/dolthub/dolt/go/libraries/doltcore/schema/typeinfo" 36 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" 37 "github.com/dolthub/dolt/go/store/hash" 38 "github.com/dolthub/dolt/go/store/pool" 39 "github.com/dolthub/dolt/go/store/prolly" 40 "github.com/dolthub/dolt/go/store/prolly/tree" 41 "github.com/dolthub/dolt/go/store/val" 42 ) 43 44 // ErrUnableToMergeColumnDefaultValue is returned when a column's default value cannot be Eval'ed and we are unable to 45 // correctly fill in a new column's value for existing table rows. This can happen when a column default value uses 46 // references that need to be resolved by the analyzer (e.g. column references, function references). 47 var ErrUnableToMergeColumnDefaultValue = errorkinds.NewKind("unable to automatically apply column default value " + 48 "in merge: %s for table '%s'; to continue merging, first manually apply the column alteration on this branch") 49 50 // mergeProllyTable merges the table specified by |tm| using the specified |mergedSch| and returns the new table 51 // instance, along with merge stats and any error. If |diffInfo.RewriteRows| is true, then any existing rows in the 52 // table's primary index will also be rewritten. This function merges the table's artifacts (e.g. recorded 53 // conflicts), migrates any existing table data to the specified |mergedSch|, and merges table data from both 54 // sides of the merge together. 55 func mergeProllyTable(ctx context.Context, tm *TableMerger, mergedSch schema.Schema, mergeInfo MergeInfo, diffInfo tree.ThreeWayDiffInfo) (*doltdb.Table, *MergeStats, error) { 56 mergeTbl, err := mergeTableArtifacts(ctx, tm, tm.leftTbl) 57 if err != nil { 58 return nil, nil, err 59 } 60 tm.leftTbl = mergeTbl 61 62 // Before we merge the table data we need to fix up the primary index on the left-side of the merge for 63 // any ordinal mapping changes (i.e. moving/dropping/adding columns). 64 // NOTE: This won't ALWAYS be the left side... eventually we will need to optimize which side we pick 65 // (i.e. the side that needs the least work to modify) and make this logic work for either side. 66 lr, err := tm.leftTbl.GetRowData(ctx) 67 if err != nil { 68 return nil, nil, err 69 } 70 leftRows := durable.ProllyMapFromIndex(lr) 71 valueMerger := newValueMerger(mergedSch, tm.leftSch, tm.rightSch, tm.ancSch, leftRows.Pool(), tm.ns) 72 73 if !valueMerger.leftMapping.IsIdentityMapping() { 74 mergeInfo.LeftNeedsRewrite = true 75 } 76 77 if !valueMerger.rightMapping.IsIdentityMapping() { 78 mergeInfo.RightNeedsRewrite = true 79 } 80 81 // We need a sql.Context to apply column default values in merges; if we don't have one already, 82 // create one, since this code also gets called from the CLI merge code path. 83 sqlCtx, ok := ctx.(*sql.Context) 84 if !ok { 85 sqlCtx = sql.NewContext(ctx) 86 } 87 88 var stats *MergeStats 89 mergeTbl, stats, err = mergeProllyTableData(sqlCtx, tm, mergedSch, mergeTbl, valueMerger, mergeInfo, diffInfo) 90 if err != nil { 91 return nil, nil, err 92 } 93 94 n, err := mergeTbl.NumRowsInConflict(sqlCtx) 95 if err != nil { 96 return nil, nil, err 97 } 98 stats.DataConflicts = int(n) 99 100 mergeTbl, err = mergeAutoIncrementValues(sqlCtx, tm.leftTbl, tm.rightTbl, mergeTbl) 101 if err != nil { 102 return nil, nil, err 103 } 104 return mergeTbl, stats, nil 105 } 106 107 // mergeProllyTableData three-way merges the data for a given table. We currently take the left 108 // side of the merge and use that data as the starting point to merge in changes from the right 109 // side. Eventually, we will need to optimize this to pick the side that needs the least work. 110 // We iterate over the calculated diffs using a ThreeWayDiffer instance, and for every change 111 // to the right-side, we apply it to the left-side by merging it into the left-side's primary index 112 // as well as any secondary indexes, and also checking for unique constraints incrementally. When 113 // conflicts are detected, this function attempts to resolve them automatically if possible, and 114 // if not, they are recorded as conflicts in the table's artifacts. 115 func mergeProllyTableData(ctx *sql.Context, tm *TableMerger, finalSch schema.Schema, mergeTbl *doltdb.Table, valueMerger *valueMerger, mergeInfo MergeInfo, diffInfo tree.ThreeWayDiffInfo) (*doltdb.Table, *MergeStats, error) { 116 iter, err := threeWayDiffer(ctx, tm, valueMerger, diffInfo) 117 if err != nil { 118 return nil, nil, err 119 } 120 121 lr, err := tm.leftTbl.GetRowData(ctx) 122 if err != nil { 123 return nil, nil, err 124 } 125 leftEditor := durable.ProllyMapFromIndex(lr).Rewriter(finalSch.GetKeyDescriptor(), finalSch.GetValueDescriptor()) 126 127 ai, err := mergeTbl.GetArtifacts(ctx) 128 if err != nil { 129 return nil, nil, err 130 } 131 artEditor := durable.ProllyMapFromArtifactIndex(ai).Editor() 132 133 keyless := schema.IsKeyless(tm.leftSch) 134 135 defaults, err := resolveDefaults(ctx, tm.name, finalSch, tm.leftSch) 136 if err != nil { 137 return nil, nil, err 138 } 139 140 pri, err := newPrimaryMerger(leftEditor, tm, valueMerger, finalSch, mergeInfo, defaults) 141 if err != nil { 142 return nil, nil, err 143 } 144 sec, err := newSecondaryMerger(ctx, tm, valueMerger, finalSch, mergeInfo) 145 if err != nil { 146 return nil, nil, err 147 } 148 conflicts, err := newConflictMerger(ctx, tm, artEditor) 149 if err != nil { 150 return nil, nil, err 151 } 152 153 checkValidator, err := newCheckValidator(ctx, tm, valueMerger, finalSch, artEditor) 154 if err != nil { 155 return nil, nil, err 156 } 157 158 // validator shares an artifact editor with conflict merge 159 uniq, err := newUniqValidator(ctx, finalSch, tm, valueMerger, artEditor) 160 if err != nil { 161 return nil, nil, err 162 } 163 164 nullChk, err := newNullValidator(ctx, finalSch, tm, valueMerger, artEditor, leftEditor, sec.leftIdxes) 165 if err != nil { 166 return nil, nil, err 167 } 168 169 s := &MergeStats{ 170 Operation: TableModified, 171 } 172 for { 173 diff, err := iter.Next(ctx) 174 if errors.Is(err, io.EOF) { 175 break 176 } else if err != nil { 177 return nil, nil, err 178 } 179 cnt, err := uniq.validateDiff(ctx, diff) 180 if err != nil { 181 return nil, nil, err 182 } 183 s.ConstraintViolations += cnt 184 185 cnt, err = nullChk.validateDiff(ctx, diff) 186 if err != nil { 187 return nil, nil, err 188 } 189 s.ConstraintViolations += cnt 190 if cnt > 0 { 191 continue 192 } 193 194 cnt, err = checkValidator.validateDiff(ctx, diff) 195 if err != nil { 196 return nil, nil, err 197 } 198 s.ConstraintViolations += cnt 199 200 switch diff.Op { 201 case tree.DiffOpLeftAdd, tree.DiffOpLeftModify: 202 // In the event that the right side introduced a schema change, account for it here. 203 // We still have to migrate when the diff is `tree.DiffOpLeftModify` because of the corner case where 204 // the right side contains a schema change but the changed column is null, so row bytes don't change. 205 err = pri.merge(ctx, diff, tm.leftSch) 206 if err != nil { 207 return nil, nil, err 208 } 209 210 case tree.DiffOpDivergentModifyConflict, tree.DiffOpDivergentDeleteConflict: 211 // In this case, a modification or delete was made to one side, and a conflicting delete or modification 212 // was made to the other side, so these cannot be automatically resolved. 213 s.DataConflicts++ 214 err = conflicts.merge(ctx, diff, nil) 215 if err != nil { 216 return nil, nil, err 217 } 218 err = pri.merge(ctx, diff, tm.leftSch) 219 if err != nil { 220 return nil, nil, err 221 } 222 case tree.DiffOpRightAdd: 223 s.Adds++ 224 err = pri.merge(ctx, diff, tm.rightSch) 225 if err != nil { 226 return nil, nil, err 227 } 228 err = sec.merge(ctx, diff, tm.leftSch, tm.rightSch, tm, finalSch) 229 if err != nil { 230 return nil, nil, err 231 } 232 case tree.DiffOpRightModify: 233 s.Modifications++ 234 err = pri.merge(ctx, diff, tm.rightSch) 235 if err != nil { 236 return nil, nil, err 237 } 238 err = sec.merge(ctx, diff, tm.leftSch, tm.rightSch, tm, finalSch) 239 if err != nil { 240 return nil, nil, err 241 } 242 case tree.DiffOpRightDelete, tree.DiffOpDivergentDeleteResolved: 243 s.Deletes++ 244 err = pri.merge(ctx, diff, tm.rightSch) 245 if err != nil { 246 return nil, nil, err 247 } 248 err = sec.merge(ctx, diff, tm.leftSch, tm.rightSch, tm, finalSch) 249 if err != nil { 250 return nil, nil, err 251 } 252 case tree.DiffOpDivergentModifyResolved: 253 // In this case, both sides of the merge have made different changes to a row, but we were able to 254 // resolve them automatically. 255 s.Modifications++ 256 err = pri.merge(ctx, diff, nil) 257 if err != nil { 258 return nil, nil, err 259 } 260 err = sec.merge(ctx, diff, tm.leftSch, tm.rightSch, tm, finalSch) 261 if err != nil { 262 return nil, nil, err 263 } 264 case tree.DiffOpConvergentAdd, tree.DiffOpConvergentModify, tree.DiffOpConvergentDelete: 265 // In this case, both sides of the merge have made the same change, so no additional changes are needed. 266 if keyless { 267 s.DataConflicts++ 268 err = conflicts.merge(ctx, diff, nil) 269 if err != nil { 270 return nil, nil, err 271 } 272 } 273 default: 274 // Currently, all changes are applied to the left-side of the merge, so for any left-side diff ops, 275 // we can simply ignore them since that data is already in the destination (the left-side). 276 } 277 } 278 279 // After we've resolved all the diffs, it's safe for us to update the schema on the table 280 mergeTbl, err = tm.leftTbl.UpdateSchema(ctx, finalSch) 281 if err != nil { 282 return nil, nil, err 283 } 284 285 finalRows, err := pri.finalize(ctx) 286 if err != nil { 287 return nil, nil, err 288 } 289 290 leftIdxs, rightIdxs, err := sec.finalize(ctx) 291 if err != nil { 292 return nil, nil, err 293 } 294 295 finalIdxs, err := mergeProllySecondaryIndexes(ctx, tm, leftIdxs, rightIdxs, finalSch, finalRows, conflicts.ae, mergeInfo.InvalidateSecondaryIndexes) 296 if err != nil { 297 return nil, nil, err 298 } 299 300 finalArtifacts, err := conflicts.finalize(ctx) 301 302 // collect merged data in |finalTbl| 303 finalTbl, err := mergeTbl.UpdateRows(ctx, finalRows) 304 if err != nil { 305 return nil, nil, err 306 } 307 308 finalTbl, err = finalTbl.SetIndexSet(ctx, finalIdxs) 309 if err != nil { 310 return nil, nil, err 311 } 312 313 finalTbl, err = finalTbl.SetArtifacts(ctx, finalArtifacts) 314 if err != nil { 315 return nil, nil, err 316 } 317 318 return finalTbl, s, nil 319 } 320 321 func threeWayDiffer(ctx context.Context, tm *TableMerger, valueMerger *valueMerger, diffInfo tree.ThreeWayDiffInfo) (*tree.ThreeWayDiffer[val.Tuple, val.TupleDesc], error) { 322 lr, err := tm.leftTbl.GetRowData(ctx) 323 if err != nil { 324 return nil, err 325 } 326 leftRows := durable.ProllyMapFromIndex(lr) 327 328 rr, err := tm.rightTbl.GetRowData(ctx) 329 if err != nil { 330 return nil, err 331 } 332 rightRows := durable.ProllyMapFromIndex(rr) 333 334 ar, err := tm.ancTbl.GetRowData(ctx) 335 if err != nil { 336 return nil, err 337 } 338 ancRows := durable.ProllyMapFromIndex(ar) 339 340 return tree.NewThreeWayDiffer( 341 ctx, 342 leftRows.NodeStore(), 343 leftRows.Tuples(), 344 rightRows.Tuples(), 345 ancRows.Tuples(), 346 valueMerger.tryMerge, 347 valueMerger.keyless, 348 diffInfo, 349 leftRows.Tuples().Order, 350 ) 351 } 352 353 // checkValidator is responsible for inspecting three-way diff events, running any check constraint expressions 354 // that need to be reevaluated, and reporting any check constraint violations. 355 type checkValidator struct { 356 checkExpressions map[string]sql.Expression 357 valueMerger *valueMerger 358 tableMerger *TableMerger 359 sch schema.Schema 360 edits *prolly.ArtifactsEditor 361 srcHash hash.Hash 362 } 363 364 // newCheckValidator creates a new checkValidator, ready to validate diff events. |tm| provides the overall information 365 // about the table being merged, |vm| provides the details on how the value tuples are being merged between the ancestor, 366 // right and left sides of the merge, |sch| provides the final schema of the merge, and |edits| is used to write 367 // constraint validation artifacts. 368 func newCheckValidator(ctx *sql.Context, tm *TableMerger, vm *valueMerger, sch schema.Schema, edits *prolly.ArtifactsEditor) (checkValidator, error) { 369 checkExpressions := make(map[string]sql.Expression) 370 371 checks := sch.Checks() 372 for _, check := range checks.AllChecks() { 373 if !check.Enforced() { 374 continue 375 } 376 377 expr, err := index.ResolveCheckExpression(ctx, tm.name, sch, check.Expression()) 378 if err != nil { 379 return checkValidator{}, err 380 } 381 checkExpressions[check.Name()] = expr 382 } 383 384 srcHash, err := tm.rightSrc.HashOf() 385 if err != nil { 386 return checkValidator{}, err 387 } 388 389 return checkValidator{ 390 checkExpressions: checkExpressions, 391 valueMerger: vm, 392 tableMerger: tm, 393 sch: sch, 394 edits: edits, 395 srcHash: srcHash, 396 }, nil 397 } 398 399 // validateDiff inspects the three-way diff event |diff| and evaluates any check constraint expressions that need to 400 // be rechecked after the merge. If any check constraint violations are detected, the violation count is returned as 401 // the first return parameter and the violations are also written to the artifact editor passed in on creation. 402 func (cv checkValidator) validateDiff(ctx *sql.Context, diff tree.ThreeWayDiff) (int, error) { 403 conflictCount := 0 404 405 var valueTuple val.Tuple 406 var valueDesc val.TupleDesc 407 switch diff.Op { 408 case tree.DiffOpLeftDelete, tree.DiffOpRightDelete, tree.DiffOpConvergentDelete, tree.DiffOpDivergentDeleteResolved: 409 // no need to validate check constraints for deletes 410 return 0, nil 411 case tree.DiffOpDivergentDeleteConflict, tree.DiffOpDivergentModifyConflict: 412 // Don't bother validating divergent conflicts, just let them get reported as conflicts 413 return 0, nil 414 case tree.DiffOpLeftAdd, tree.DiffOpLeftModify: 415 valueTuple = diff.Left 416 valueDesc = cv.tableMerger.leftSch.GetValueDescriptor() 417 case tree.DiffOpRightAdd, tree.DiffOpRightModify: 418 valueTuple = diff.Right 419 valueDesc = cv.tableMerger.rightSch.GetValueDescriptor() 420 case tree.DiffOpConvergentAdd, tree.DiffOpConvergentModify: 421 // both sides made the same change, just take the left 422 valueTuple = diff.Left 423 valueDesc = cv.tableMerger.leftSch.GetValueDescriptor() 424 case tree.DiffOpDivergentModifyResolved: 425 valueTuple = diff.Merged 426 valueDesc = cv.tableMerger.leftSch.GetValueDescriptor() 427 } 428 429 for checkName, checkExpression := range cv.checkExpressions { 430 // Remap the value to the final schema before checking. 431 // We skip keyless tables, since their value tuples require different mapping 432 // logic and we don't currently support merges to keyless tables that contain schema changes anyway. 433 newTuple := valueTuple 434 if !cv.valueMerger.keyless { 435 if diff.Op == tree.DiffOpRightAdd || diff.Op == tree.DiffOpRightModify { 436 newTupleBytes := remapTuple(valueTuple, valueDesc, cv.valueMerger.rightMapping) 437 newTuple = val.NewTuple(cv.valueMerger.syncPool, newTupleBytes...) 438 } else if diff.Op == tree.DiffOpLeftAdd || diff.Op == tree.DiffOpLeftModify { 439 newTupleBytes := remapTuple(valueTuple, valueDesc, cv.valueMerger.leftMapping) 440 newTuple = val.NewTuple(cv.valueMerger.syncPool, newTupleBytes...) 441 } 442 } 443 444 row, err := index.BuildRow(ctx, diff.Key, newTuple, cv.sch, cv.valueMerger.ns) 445 if err != nil { 446 return 0, err 447 } 448 449 result, err := checkExpression.Eval(ctx, row) 450 if err != nil { 451 return 0, err 452 } 453 454 // MySQL treats NULL as TRUE for a check constraint 455 if result == nil { 456 result = true 457 } 458 459 // Coerce into a boolean; technically, this shouldn't be 460 // necessary, since check constraint expressions should always 461 // be of a boolean type, but Dolt has allowed this previously. 462 // https://github.com/dolthub/dolt/issues/6411 463 booleanResult, err := sql.ConvertToBool(ctx, result) 464 if err != nil { 465 return 0, fmt.Errorf("unable to convert check constraint expression (%s) into boolean value: %v", checkName, err.Error()) 466 } 467 468 if booleanResult { 469 // If a check constraint returns TRUE (or NULL), then the check constraint is fulfilled 470 // https://dev.mysql.com/doc/refman/8.0/en/create-table-check-constraints.html 471 continue 472 } else { 473 if cv.tableMerger.recordViolations { 474 conflictCount++ 475 meta, err := newCheckCVMeta(cv.sch, checkName) 476 if err != nil { 477 return 0, err 478 } 479 if err = cv.insertArtifact(ctx, diff.Key, newTuple, meta); err != nil { 480 return conflictCount, err 481 } 482 } 483 } 484 } 485 486 return conflictCount, nil 487 } 488 489 // insertArtifact records a check constraint violation, as described by |meta|, for the row with the specified 490 // |key| and |value|. 491 func (cv checkValidator) insertArtifact(ctx context.Context, key, value val.Tuple, meta CheckCVMeta) error { 492 vinfo, err := json.Marshal(meta) 493 if err != nil { 494 return err 495 } 496 cvm := prolly.ConstraintViolationMeta{VInfo: vinfo, Value: value} 497 return cv.edits.ReplaceConstraintViolation(ctx, key, cv.srcHash, prolly.ArtifactTypeChkConsViol, cvm) 498 } 499 500 // uniqValidator checks whether new additions from the merge-right 501 // duplicate secondary index entries. 502 type uniqValidator struct { 503 src doltdb.Rootish 504 srcHash hash.Hash 505 edits *prolly.ArtifactsEditor 506 indexes []uniqIndex 507 valueMerger *valueMerger 508 tm *TableMerger 509 } 510 511 func newUniqValidator(ctx *sql.Context, sch schema.Schema, tm *TableMerger, vm *valueMerger, edits *prolly.ArtifactsEditor) (uniqValidator, error) { 512 srcHash, err := tm.rightSrc.HashOf() 513 if err != nil { 514 return uniqValidator{}, err 515 } 516 517 uv := uniqValidator{ 518 src: tm.rightSrc, 519 srcHash: srcHash, 520 edits: edits, 521 valueMerger: vm, 522 tm: tm, 523 } 524 525 rows, err := tm.leftTbl.GetRowData(ctx) 526 if err != nil { 527 return uniqValidator{}, err 528 } 529 clustered := durable.ProllyMapFromIndex(rows) 530 531 indexes, err := tm.leftTbl.GetIndexSet(ctx) 532 if err != nil { 533 return uniqValidator{}, err 534 } 535 536 for _, def := range sch.Indexes().AllIndexes() { 537 if !def.IsUnique() { 538 continue 539 } else if !tm.leftSch.Indexes().Contains(def.Name()) { 540 continue // todo: how do we validate in this case? 541 } 542 543 idx, err := indexes.GetIndex(ctx, sch, def.Name()) 544 if err != nil { 545 return uniqValidator{}, err 546 } 547 secondary := durable.ProllyMapFromIndex(idx) 548 549 u, err := newUniqIndex(ctx, sch, tm.name, def, clustered, secondary) 550 if err != nil { 551 return uniqValidator{}, err 552 } 553 uv.indexes = append(uv.indexes, u) 554 } 555 return uv, nil 556 } 557 558 // validateDiff processes |diff| and checks for any unique constraint violations that need to be updated. The number 559 // of violations recorded along with any error encountered is returned. Processing |diff| may resolve existing unique 560 // constraint violations, in which case the violations returned may be a negative number. 561 func (uv uniqValidator) validateDiff(ctx *sql.Context, diff tree.ThreeWayDiff) (violations int, err error) { 562 var value val.Tuple 563 switch diff.Op { 564 case tree.DiffOpRightAdd, tree.DiffOpRightModify: 565 value = diff.Right 566 // Don't remap the value to the merged schema if the table is keyless or if the mapping is an identity mapping. 567 if !uv.valueMerger.keyless && !uv.valueMerger.rightMapping.IsIdentityMapping() { 568 modifiedValue := remapTuple(value, uv.tm.rightSch.GetValueDescriptor(), uv.valueMerger.rightMapping) 569 value = val.NewTuple(uv.valueMerger.syncPool, modifiedValue...) 570 } 571 case tree.DiffOpLeftAdd, tree.DiffOpLeftModify: 572 value = diff.Left 573 // Don't remap the value to the merged schema if the table is keyless or if the mapping is an identity mapping. 574 if !uv.valueMerger.keyless && !uv.valueMerger.leftMapping.IsIdentityMapping() { 575 modifiedValue := remapTuple(value, uv.tm.leftSch.GetValueDescriptor(), uv.valueMerger.leftMapping) 576 value = val.NewTuple(uv.valueMerger.syncPool, modifiedValue...) 577 } 578 case tree.DiffOpRightDelete: 579 // If we see a row deletion event from the right side, we grab the original/base value so that we can update our 580 // local copy of the secondary index. 581 value = diff.Base 582 case tree.DiffOpDivergentModifyResolved: 583 value = diff.Merged 584 default: 585 return 586 } 587 588 // For a row deletion... we need to remove any unique constraint violations that were previously recorded for 589 // this row. 590 if diff.Op == tree.DiffOpRightDelete { 591 // First update the unique indexes to remove this row. 592 for _, idx := range uv.indexes { 593 err := idx.removeRow(ctx, diff.Key, value) 594 if err != nil { 595 return violations, err 596 } 597 } 598 599 // Then clear any unique constraint violation artifacts for this row. If there is only one unique constraint 600 // violation artifact left, it will also be cleared by this function (since unique constraint violations 601 // must always occur with at least two rows reported). 602 return uv.clearArtifact(ctx, diff.Key, diff.Base) 603 } 604 605 if uv.tm.recordViolations { 606 for _, idx := range uv.indexes { 607 err = idx.findCollisions(ctx, diff.Key, value, func(k, v val.Tuple) error { 608 violations++ 609 return uv.insertArtifact(ctx, k, v, idx.meta) 610 }) 611 if err != nil { 612 break 613 } 614 } 615 } 616 617 // After detecting any unique constraint violations, we need to update our indexes with the updated row 618 if diff.Op != tree.DiffOpRightDelete { 619 for _, idx := range uv.indexes { 620 err := idx.insertRow(ctx, diff.Key, value) 621 if err != nil { 622 return violations, err 623 } 624 625 err = idx.clustered.Put(ctx, diff.Key, value) 626 if err != nil { 627 return violations, err 628 } 629 } 630 } 631 632 return violations, err 633 } 634 635 // deleteArtifact deletes the unique constraint violation artifact for the row identified by |key| and returns a 636 // boolean that indicates if an artifact was deleted, as well as an error that indicates if there were any 637 // unexpected errors encountered. 638 func (uv uniqValidator) deleteArtifact(ctx context.Context, key val.Tuple) (bool, error) { 639 artifactKey := uv.edits.BuildArtifactKey(ctx, key, uv.srcHash, prolly.ArtifactTypeUniqueKeyViol) 640 641 has, err := uv.edits.Has(ctx, artifactKey) 642 if err != nil || !has { 643 return false, err 644 } 645 646 err = uv.edits.Delete(ctx, artifactKey) 647 if err != nil { 648 return false, err 649 } 650 651 return true, nil 652 } 653 654 // clearArtifactsForValue deletes the unique constraint violation artifact for the row identified by |key| and |value| 655 // and then checks to see if only one unique constraint violation artifact remains, and if so, deletes it as well, 656 // since only a single row remaining for a unique constraint violation means that the violation has been fully 657 // resolved and no other rows conflict with that unique value. 658 func (uv uniqValidator) clearArtifact(ctx context.Context, key val.Tuple, prevValue val.Tuple) (int, error) { 659 deleted, err := uv.deleteArtifact(ctx, key) 660 if err != nil || !deleted { 661 return 0, err 662 } 663 664 // Start the violation count at -1 to represent the artifact above that we just removed 665 violationCount := -1 666 667 for _, idx := range uv.indexes { 668 // TODO: Test with multiple unique indexes and constraint violations on different values 669 // Multiple unique indexes won't work yet: https://github.com/dolthub/dolt/issues/6329 670 err := idx.findCollisions(ctx, key, prevValue, func(k, v val.Tuple) error { 671 deleted, err := uv.deleteArtifact(ctx, k) 672 if err != nil || !deleted { 673 return err 674 } 675 violationCount = violationCount - 1 676 return nil 677 }) 678 if err != nil { 679 break 680 } 681 } 682 683 return violationCount, nil 684 } 685 686 func (uv uniqValidator) insertArtifact(ctx context.Context, key, value val.Tuple, meta UniqCVMeta) error { 687 vinfo, err := json.Marshal(meta) 688 if err != nil { 689 return err 690 } 691 cvm := prolly.ConstraintViolationMeta{VInfo: vinfo, Value: value} 692 return uv.edits.ReplaceConstraintViolation(ctx, key, uv.srcHash, prolly.ArtifactTypeUniqueKeyViol, cvm) 693 } 694 695 type uniqIndex struct { 696 def schema.Index 697 secondary *prolly.MutableMap 698 clustered *prolly.MutableMap 699 meta UniqCVMeta 700 prefixDesc val.TupleDesc 701 secondaryBld index.SecondaryKeyBuilder 702 clusteredBld index.ClusteredKeyBuilder 703 clusteredKeyDesc val.TupleDesc 704 } 705 706 func newUniqIndex(ctx *sql.Context, sch schema.Schema, tableName string, def schema.Index, clustered, secondary prolly.Map) (uniqIndex, error) { 707 meta, err := makeUniqViolMeta(sch, def) 708 if err != nil { 709 return uniqIndex{}, err 710 } 711 712 if schema.IsKeyless(sch) { // todo(andy): sad panda 713 secondary = prolly.ConvertToSecondaryKeylessIndex(secondary) 714 } 715 p := clustered.Pool() 716 717 prefixDesc := secondary.KeyDesc().PrefixDesc(def.Count()) 718 secondaryBld, err := index.NewSecondaryKeyBuilder(ctx, tableName, sch, def, secondary.KeyDesc(), p, secondary.NodeStore()) 719 if err != nil { 720 return uniqIndex{}, err 721 } 722 723 clusteredBld := index.NewClusteredKeyBuilder(def, sch, clustered.KeyDesc(), p) 724 725 return uniqIndex{ 726 def: def, 727 secondary: secondary.Mutate(), 728 clustered: clustered.Mutate(), 729 clusteredKeyDesc: clustered.KeyDesc(), 730 meta: meta, 731 prefixDesc: prefixDesc, 732 secondaryBld: secondaryBld, 733 clusteredBld: clusteredBld, 734 }, nil 735 } 736 737 type collisionFn func(key, value val.Tuple) error 738 739 func (idx uniqIndex) insertRow(ctx context.Context, key, value val.Tuple) error { 740 secondaryIndexKey, err := idx.secondaryBld.SecondaryKeyFromRow(ctx, key, value) 741 if err != nil { 742 return err 743 } 744 745 // secondary indexes only use their key tuple 746 return idx.secondary.Put(ctx, secondaryIndexKey, val.EmptyTuple) 747 } 748 749 func (idx uniqIndex) removeRow(ctx context.Context, key, value val.Tuple) error { 750 secondaryIndexKey, err := idx.secondaryBld.SecondaryKeyFromRow(ctx, key, value) 751 if err != nil { 752 return err 753 } 754 755 err = idx.secondary.Delete(ctx, secondaryIndexKey) 756 if err != nil { 757 return err 758 } 759 760 clusteredIndexKey := idx.clusteredBld.ClusteredKeyFromIndexKey(secondaryIndexKey) 761 return idx.clustered.Delete(ctx, clusteredIndexKey) 762 } 763 764 // findCollisions searches this unique index to find any rows that have the same values as |value| for the columns 765 // included in the unique constraint. For any matching row, the specified callback, |cb|, is invoked with the key 766 // and value for the primary index, representing the conflicting row identified from the unique index. 767 func (idx uniqIndex) findCollisions(ctx context.Context, key, value val.Tuple, cb collisionFn) error { 768 indexKey, err := idx.secondaryBld.SecondaryKeyFromRow(ctx, key, value) 769 if err != nil { 770 return err 771 } 772 773 if idx.prefixDesc.HasNulls(indexKey) { 774 return nil // NULLs cannot cause unique violations 775 } 776 777 // This code uses the secondary index to iterate over all rows (key/value pairs) that have the same prefix. 778 // The prefix here is all the value columns this index is set up to track 779 collisions := make([]val.Tuple, 0) 780 err = idx.secondary.GetPrefix(ctx, indexKey, idx.prefixDesc, func(k, _ val.Tuple) (err error) { 781 if k != nil { 782 collisions = append(collisions, k) 783 } 784 return 785 }) 786 if err != nil || len(collisions) == 0 { 787 return err 788 } 789 790 collisionDetected := false 791 for _, collision := range collisions { 792 // Next find the key in the primary (aka clustered) index 793 clusteredKey := idx.clusteredBld.ClusteredKeyFromIndexKey(collision) 794 if bytes.Equal(key, clusteredKey) { 795 continue // collided with ourselves 796 } 797 798 // |prefix| was non-unique, find the clustered index row that 799 // collided with row(|key|, |value|) and pass both to |cb| 800 err = idx.clustered.Get(ctx, clusteredKey, func(k val.Tuple, v val.Tuple) error { 801 if k == nil { 802 s := idx.clusteredKeyDesc.Format(clusteredKey) 803 return errors.New("failed to find key: " + s) 804 } 805 collisionDetected = true 806 return cb(k, v) 807 }) 808 if err != nil { 809 return err 810 } 811 } 812 if collisionDetected { 813 return cb(key, value) 814 } else { 815 return nil 816 } 817 } 818 819 // nullValidator enforces NOT NULL constraints on merge 820 type nullValidator struct { 821 table string 822 // final is the merge result schema 823 final schema.Schema 824 // leftMap and rightMap map value tuples to |final| 825 leftMap, rightMap val.OrdinalMapping 826 // edits is the artifacts maps editor 827 artEditor *prolly.ArtifactsEditor 828 // leftEdits if the left-side row editor 829 leftEditor *prolly.MutableMap 830 // secEditors are the secondary index editors 831 secEditors []MutableSecondaryIdx 832 // theirRootish is the hash.Hash of the right-side revision 833 theirRootish hash.Hash 834 // ourRootish is the hash.Hash of the left-side revision 835 ourRootish hash.Hash 836 } 837 838 func newNullValidator( 839 ctx context.Context, 840 final schema.Schema, 841 tm *TableMerger, 842 vm *valueMerger, 843 artEditor *prolly.ArtifactsEditor, 844 leftEditor *prolly.MutableMap, 845 secEditors []MutableSecondaryIdx, 846 ) (nullValidator, error) { 847 theirRootish, err := tm.rightSrc.HashOf() 848 if err != nil { 849 return nullValidator{}, err 850 } 851 ourRootish, err := tm.rightSrc.HashOf() 852 if err != nil { 853 return nullValidator{}, err 854 } 855 return nullValidator{ 856 table: tm.name, 857 final: final, 858 leftMap: vm.leftMapping, 859 rightMap: vm.rightMapping, 860 artEditor: artEditor, 861 leftEditor: leftEditor, 862 secEditors: secEditors, 863 theirRootish: theirRootish, 864 ourRootish: ourRootish, 865 }, nil 866 } 867 868 func (nv nullValidator) validateDiff(ctx context.Context, diff tree.ThreeWayDiff) (count int, err error) { 869 switch diff.Op { 870 case tree.DiffOpRightAdd, tree.DiffOpRightModify: 871 var violations []string 872 for to, from := range nv.rightMap { 873 col := nv.final.GetNonPKCols().GetByIndex(to) 874 if col.IsNullable() { 875 continue 876 } 877 if from < 0 { 878 // non-nullable column in |nv.final| does not exist 879 // on the right side of the merge, check if it will 880 // be populated with a default value 881 if col.Default == "" { 882 violations = append(violations, col.Name) 883 } 884 } else { 885 if diff.Right.FieldIsNull(from) { 886 violations = append(violations, col.Name) 887 } 888 } 889 } 890 // for right-side NULL violations, we insert a constraint violation and 891 // set |count| > 0 to signal to the caller that |diff| should not be applied 892 if len(violations) > 0 { 893 var meta prolly.ConstraintViolationMeta 894 if meta, err = newNotNullViolationMeta(violations, diff.Right); err != nil { 895 return 0, err 896 } 897 err = nv.artEditor.ReplaceConstraintViolation(ctx, diff.Key, nv.theirRootish, prolly.ArtifactTypeNullViol, meta) 898 if err != nil { 899 return 0, err 900 } 901 } 902 count = len(violations) 903 return 904 905 case tree.DiffOpLeftAdd, tree.DiffOpLeftModify: 906 var violations []string 907 for to, from := range nv.leftMap { 908 col := nv.final.GetNonPKCols().GetByIndex(to) 909 if col.IsNullable() { 910 continue 911 } 912 if from < 0 { 913 // non-nullable column in |nv.final| does not exist 914 // on the left side of the merge, check if it will 915 // be populated with a default value 916 if col.Default == "" { 917 violations = append(violations, col.Name) 918 } 919 } else { 920 if diff.Left.FieldIsNull(from) { 921 violations = append(violations, col.Name) 922 } 923 } 924 } 925 // for left-side NULL violations, we insert a constraint violation and 926 // then must explicitly remove this row from all left-side indexes 927 if len(violations) > 0 { 928 var meta prolly.ConstraintViolationMeta 929 if meta, err = newNotNullViolationMeta(violations, diff.Left); err != nil { 930 return 0, err 931 } 932 err = nv.artEditor.ReplaceConstraintViolation(ctx, diff.Key, nv.ourRootish, prolly.ArtifactTypeNullViol, meta) 933 if err != nil { 934 return 0, err 935 } 936 if err = nv.leftEditor.Delete(ctx, diff.Key); err != nil { 937 return 0, err 938 } 939 for _, editor := range nv.secEditors { 940 if err = editor.DeleteEntry(ctx, diff.Key, diff.Left); err != nil { 941 return 0, err 942 } 943 } 944 } 945 count = len(violations) 946 return 947 case tree.DiffOpDivergentModifyResolved: 948 var violations []string 949 for to, _ := range nv.leftMap { 950 col := nv.final.GetNonPKCols().GetByIndex(to) 951 if !col.IsNullable() && diff.Merged.FieldIsNull(to) { 952 violations = append(violations, col.Name) 953 } 954 } 955 // for merged NULL violations, we insert a constraint violation and 956 // then must explicitly remove this row from all left-side indexes 957 if len(violations) > 0 { 958 var meta prolly.ConstraintViolationMeta 959 if meta, err = newNotNullViolationMeta(violations, diff.Merged); err != nil { 960 return 0, err 961 } 962 err = nv.artEditor.ReplaceConstraintViolation(ctx, diff.Key, nv.ourRootish, prolly.ArtifactTypeNullViol, meta) 963 if err != nil { 964 return 0, err 965 } 966 if err = nv.leftEditor.Delete(ctx, diff.Key); err != nil { 967 return 0, err 968 } 969 for _, editor := range nv.secEditors { 970 if err = editor.DeleteEntry(ctx, diff.Key, diff.Left); err != nil { 971 return 0, err 972 } 973 } 974 } 975 count = len(violations) 976 return 977 } 978 return 979 } 980 981 // conflictMerger processing primary key diffs 982 // with conflict types into artifact table writes. 983 type conflictMerger struct { 984 ae *prolly.ArtifactsEditor 985 rightRootish hash.Hash 986 meta []byte 987 } 988 989 func newConflictMerger(ctx context.Context, tm *TableMerger, ae *prolly.ArtifactsEditor) (*conflictMerger, error) { 990 has, err := tm.leftTbl.HasConflicts(ctx) 991 if err != nil { 992 return nil, err 993 } 994 if has { 995 a, l, r, err := tm.leftTbl.GetConflictSchemas(ctx, tm.name) 996 if err != nil { 997 return nil, err 998 } 999 1000 equal := schema.ColCollsAreEqual(a.GetAllCols(), tm.ancSch.GetAllCols()) && 1001 schema.ColCollsAreEqual(l.GetAllCols(), tm.leftSch.GetAllCols()) && 1002 schema.ColCollsAreEqual(r.GetAllCols(), tm.rightSch.GetAllCols()) 1003 if !equal { 1004 return nil, ErrConflictsIncompatible 1005 } 1006 } 1007 1008 rightHash, err := tm.rightSrc.HashOf() 1009 if err != nil { 1010 return nil, err 1011 } 1012 1013 baseHash, err := tm.ancestorSrc.HashOf() 1014 if err != nil { 1015 return nil, err 1016 } 1017 1018 m := prolly.ConflictMetadata{ 1019 BaseRootIsh: baseHash, 1020 } 1021 meta, err := json.Marshal(m) 1022 if err != nil { 1023 return nil, err 1024 } 1025 1026 return &conflictMerger{ 1027 meta: meta, 1028 rightRootish: rightHash, 1029 ae: ae, 1030 }, nil 1031 } 1032 1033 func (m *conflictMerger) merge(ctx context.Context, diff tree.ThreeWayDiff, _ schema.Schema) error { 1034 switch diff.Op { 1035 case tree.DiffOpDivergentModifyConflict, tree.DiffOpDivergentDeleteConflict, 1036 tree.DiffOpConvergentAdd, tree.DiffOpConvergentModify, tree.DiffOpConvergentDelete: 1037 default: 1038 return fmt.Errorf("invalid conflict type: %s", diff.Op) 1039 } 1040 return m.ae.Add(ctx, diff.Key, m.rightRootish, prolly.ArtifactTypeConflict, m.meta) 1041 } 1042 1043 func (m *conflictMerger) finalize(ctx context.Context) (durable.ArtifactIndex, error) { 1044 am, err := m.ae.Flush(ctx) 1045 if err != nil { 1046 return nil, err 1047 } 1048 return durable.ArtifactIndexFromProllyMap(am), nil 1049 } 1050 1051 // primaryMerger translates three-way diffs 1052 // on the primary index into merge-left updates. 1053 type primaryMerger struct { 1054 mut *prolly.MutableMap 1055 valueMerger *valueMerger 1056 tableMerger *TableMerger 1057 finalSch schema.Schema 1058 mergeInfo MergeInfo 1059 defaults []sql.Expression 1060 } 1061 1062 func newPrimaryMerger(leftEditor *prolly.MutableMap, tableMerger *TableMerger, valueMerger *valueMerger, finalSch schema.Schema, mergeInfo MergeInfo, defaults []sql.Expression) (*primaryMerger, error) { 1063 return &primaryMerger{ 1064 mut: leftEditor, 1065 valueMerger: valueMerger, 1066 tableMerger: tableMerger, 1067 finalSch: finalSch, 1068 mergeInfo: mergeInfo, 1069 defaults: defaults, 1070 }, nil 1071 } 1072 1073 // merge applies the specified |diff| to the primary index of this primaryMerger. The given |sourceSch| 1074 // specifies the schema of the source of the diff, which is used to map the diff to the post-merge 1075 // schema. |sourceSch| may be nil when no mapping from the source schema is needed (i.e. DiffOpRightDelete, 1076 // and DiffOpDivergentModifyResolved). 1077 func (m *primaryMerger) merge(ctx *sql.Context, diff tree.ThreeWayDiff, sourceSch schema.Schema) error { 1078 switch diff.Op { 1079 case tree.DiffOpRightAdd, tree.DiffOpRightModify: 1080 if sourceSch == nil { 1081 return fmt.Errorf("no source schema specified to map right-side changes to merged schema") 1082 } 1083 1084 newTupleValue := diff.Right 1085 if schema.IsKeyless(sourceSch) { 1086 if m.valueMerger.rightMapping.IsIdentityMapping() == false { 1087 return fmt.Errorf("cannot merge keyless tables with reordered columns") 1088 } 1089 } else { 1090 // Remapping when there's no schema change is harmless, but slow. 1091 if m.mergeInfo.RightNeedsRewrite { 1092 defaults, err := resolveDefaults(ctx, m.tableMerger.name, m.finalSch, m.tableMerger.rightSch) 1093 if err != nil { 1094 return err 1095 } 1096 1097 tempTupleValue, err := remapTupleWithColumnDefaults( 1098 ctx, 1099 diff.Key, 1100 diff.Right, 1101 sourceSch.GetValueDescriptor(), 1102 m.valueMerger.rightMapping, 1103 m.tableMerger, 1104 m.tableMerger.rightSch, 1105 m.finalSch, 1106 defaults, 1107 m.valueMerger.syncPool, 1108 true, 1109 ) 1110 if err != nil { 1111 return err 1112 } 1113 newTupleValue = tempTupleValue 1114 } 1115 } 1116 return m.mut.Put(ctx, diff.Key, newTupleValue) 1117 case tree.DiffOpRightDelete: 1118 return m.mut.Put(ctx, diff.Key, diff.Right) 1119 case tree.DiffOpDivergentDeleteResolved: 1120 // WARNING: In theory, we should only have to call MutableMap::Delete if the key is actually being deleted 1121 // from the left branch. However, because of https://github.com/dolthub/dolt/issues/7192, 1122 // if the left side of the merge is an empty table and we don't attempt to modify the map, 1123 // the table will have an unexpected root hash. 1124 return m.mut.Delete(ctx, diff.Key) 1125 case tree.DiffOpDivergentModifyResolved: 1126 // any generated columns need to be re-resolved because their computed values may have changed as a result of 1127 // the merge 1128 merged := diff.Merged 1129 if hasStoredGeneratedColumns(m.finalSch) { 1130 defaults, err := resolveDefaults(ctx, m.tableMerger.name, m.finalSch, m.tableMerger.rightSch) 1131 if err != nil { 1132 return err 1133 } 1134 1135 tempTupleValue, err := remapTupleWithColumnDefaults( 1136 ctx, 1137 diff.Key, 1138 merged, 1139 m.finalSch.GetValueDescriptor(), 1140 m.valueMerger.rightMapping, 1141 m.tableMerger, 1142 m.tableMerger.rightSch, 1143 m.finalSch, 1144 defaults, 1145 m.valueMerger.syncPool, 1146 true) 1147 if err != nil { 1148 return err 1149 } 1150 merged = tempTupleValue 1151 } 1152 1153 return m.mut.Put(ctx, diff.Key, merged) 1154 case tree.DiffOpLeftAdd, tree.DiffOpLeftModify, tree.DiffOpDivergentModifyConflict, tree.DiffOpDivergentDeleteConflict: 1155 // Remapping when there's no schema change is harmless, but slow. 1156 if !m.mergeInfo.LeftNeedsRewrite { 1157 return nil 1158 } 1159 // If the right side has a schema change, then newly added rows from the left must be migrated to the new schema. 1160 // Rows with unresolvable conflicts must also be migrated to the new schema so that they can resolved manually. 1161 if diff.Left == nil { 1162 return m.mut.Put(ctx, diff.Key, nil) 1163 } 1164 newTupleValue := diff.Left 1165 if schema.IsKeyless(sourceSch) { 1166 if m.valueMerger.leftMapping.IsIdentityMapping() == false { 1167 return fmt.Errorf("cannot merge keyless tables with reordered columns") 1168 } 1169 } else { 1170 tempTupleValue, err := remapTupleWithColumnDefaults(ctx, diff.Key, newTupleValue, sourceSch.GetValueDescriptor(), 1171 m.valueMerger.leftMapping, m.tableMerger, m.tableMerger.leftSch, m.finalSch, m.defaults, m.valueMerger.syncPool, false) 1172 if err != nil { 1173 return err 1174 } 1175 newTupleValue = tempTupleValue 1176 } 1177 return m.mut.Put(ctx, diff.Key, newTupleValue) 1178 default: 1179 return fmt.Errorf("unexpected diffOp for editing primary index: %s", diff.Op) 1180 } 1181 } 1182 1183 func resolveDefaults(ctx *sql.Context, tableName string, mergedSchema schema.Schema, sourceSchema schema.Schema) ([]sql.Expression, error) { 1184 var exprs []sql.Expression 1185 i := 0 1186 1187 // We want a slice of expressions in the order of the merged schema, but with column indexes from the source schema, 1188 // against which they will be evaluated 1189 err := mergedSchema.GetNonPKCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { 1190 if col.Virtual { 1191 return false, nil 1192 } 1193 1194 if col.Default != "" || col.Generated != "" || col.OnUpdate != "" { 1195 expr, err := index.ResolveDefaultExpression(ctx, tableName, mergedSchema, col) 1196 if err != nil { 1197 return true, err 1198 } 1199 if len(exprs) == 0 { 1200 exprs = make([]sql.Expression, mergedSchema.GetNonPKCols().StoredSize()) 1201 } 1202 exprs[i] = expr 1203 } 1204 1205 i++ 1206 return false, nil 1207 }) 1208 if err != nil { 1209 return nil, err 1210 } 1211 1212 // The default expresions always come in the order of the merged schema, but the fields we need to apply them to 1213 // might have different column indexes in the case of a schema change 1214 if len(exprs) > 0 { 1215 for i := range exprs { 1216 if exprs[i] == nil { 1217 continue 1218 } 1219 exprs[i], _, _ = transform.Expr(exprs[i], func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { 1220 if gf, ok := e.(*expression.GetField); ok { 1221 newIdx := indexOf(gf.Name(), sourceSchema.GetAllCols().GetColumnNames()) 1222 if newIdx >= 0 { 1223 return gf.WithIndex(newIdx), transform.NewTree, nil 1224 } 1225 } 1226 return e, transform.SameTree, nil 1227 }) 1228 } 1229 } 1230 1231 return exprs, nil 1232 } 1233 1234 func indexOf(col string, cols []string) int { 1235 for i, column := range cols { 1236 if column == col { 1237 return i 1238 } 1239 } 1240 return -1 1241 } 1242 1243 func hasStoredGeneratedColumns(sch schema.Schema) bool { 1244 hasGenerated := false 1245 sch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { 1246 if col.Generated != "" && !col.Virtual { 1247 hasGenerated = true 1248 return true, nil 1249 } 1250 return false, nil 1251 }) 1252 return hasGenerated 1253 } 1254 1255 func (m *primaryMerger) finalize(ctx context.Context) (durable.Index, error) { 1256 mergedMap, err := m.mut.Map(ctx) 1257 if err != nil { 1258 return nil, err 1259 } 1260 return durable.IndexFromProllyMap(mergedMap), nil 1261 } 1262 1263 // secondaryMerger translates diffs on the primary index 1264 // into secondary index updates. 1265 type secondaryMerger struct { 1266 leftSet durable.IndexSet 1267 rightSet durable.IndexSet 1268 leftIdxes []MutableSecondaryIdx 1269 valueMerger *valueMerger 1270 mergedSchema schema.Schema 1271 tableMerger *TableMerger 1272 mergeInfo MergeInfo 1273 } 1274 1275 const secondaryMergerPendingSize = 650_000 1276 1277 func newSecondaryMerger(ctx *sql.Context, tm *TableMerger, valueMerger *valueMerger, mergedSchema schema.Schema, mergeInfo MergeInfo) (*secondaryMerger, error) { 1278 ls, err := tm.leftTbl.GetIndexSet(ctx) 1279 if err != nil { 1280 return nil, err 1281 } 1282 // Use the mergedSchema to work with the secondary indexes, to pull out row data using the right 1283 // pri_index -> sec_index mapping. 1284 lm, err := GetMutableSecondaryIdxsWithPending(ctx, mergedSchema, tm.name, ls, secondaryMergerPendingSize) 1285 if err != nil { 1286 return nil, err 1287 } 1288 1289 rs, err := tm.rightTbl.GetIndexSet(ctx) 1290 if err != nil { 1291 return nil, err 1292 } 1293 1294 return &secondaryMerger{ 1295 leftSet: ls, 1296 rightSet: rs, 1297 leftIdxes: lm, 1298 valueMerger: valueMerger, 1299 mergedSchema: mergedSchema, 1300 tableMerger: tm, 1301 mergeInfo: mergeInfo, 1302 }, nil 1303 } 1304 1305 func (m *secondaryMerger) merge(ctx *sql.Context, diff tree.ThreeWayDiff, leftSchema, rightSchema schema.Schema, tm *TableMerger, finalSchema schema.Schema) error { 1306 var err error 1307 if m.mergeInfo.InvalidateSecondaryIndexes { 1308 return nil 1309 } 1310 for _, idx := range m.leftIdxes { 1311 switch diff.Op { 1312 case tree.DiffOpDivergentModifyResolved: 1313 // TODO: we need to re-resolve values from generated columns here as well 1314 err = applyEdit(ctx, idx, diff.Key, diff.Left, diff.Merged) 1315 case tree.DiffOpRightAdd, tree.DiffOpRightModify: 1316 // Just as with the primary index, we need to map right-side changes to the final, merged schema. 1317 if rightSchema == nil { 1318 return fmt.Errorf("no source schema specified to map right-side changes to merged schema") 1319 } 1320 1321 newTupleValue := diff.Right 1322 baseTupleValue := diff.Base 1323 if m.mergeInfo.RightNeedsRewrite { 1324 if schema.IsKeyless(rightSchema) { 1325 if m.valueMerger.rightMapping.IsIdentityMapping() == false { 1326 return fmt.Errorf("cannot merge keyless tables with reordered columns") 1327 } 1328 } else { 1329 defaults, err := resolveDefaults(ctx, m.tableMerger.name, m.mergedSchema, m.tableMerger.rightSch) 1330 if err != nil { 1331 return err 1332 } 1333 1334 // Convert right value to result schema 1335 tempTupleValue, err := remapTupleWithColumnDefaults( 1336 ctx, 1337 diff.Key, 1338 diff.Right, 1339 m.valueMerger.rightSchema.GetValueDescriptor(), 1340 m.valueMerger.rightMapping, 1341 m.tableMerger, 1342 m.tableMerger.rightSch, 1343 m.mergedSchema, 1344 defaults, 1345 m.valueMerger.syncPool, 1346 true, 1347 ) 1348 if err != nil { 1349 return err 1350 } 1351 newTupleValue = tempTupleValue 1352 if diff.Base != nil { 1353 defaults, err := resolveDefaults(ctx, m.tableMerger.name, m.mergedSchema, m.tableMerger.ancSch) 1354 if err != nil { 1355 return err 1356 } 1357 1358 // Convert base value to result schema 1359 baseTupleValue, err = remapTupleWithColumnDefaults( 1360 ctx, 1361 diff.Key, 1362 diff.Base, 1363 // Only the right side was modified, so the base schema must be the same as the left schema 1364 leftSchema.GetValueDescriptor(), 1365 m.valueMerger.baseMapping, 1366 tm, 1367 m.tableMerger.ancSch, 1368 finalSchema, 1369 defaults, 1370 m.valueMerger.syncPool, 1371 false) 1372 if err != nil { 1373 return err 1374 } 1375 } 1376 } 1377 } 1378 1379 err = applyEdit(ctx, idx, diff.Key, baseTupleValue, newTupleValue) 1380 case tree.DiffOpRightDelete: 1381 err = applyEdit(ctx, idx, diff.Key, diff.Base, diff.Right) 1382 case tree.DiffOpDivergentDeleteResolved: 1383 // If the left-side has the delete, the index is already correct and no work needs to be done. 1384 // If the right-side has the delete, remove the key from the index. 1385 if diff.Right == nil { 1386 err = applyEdit(ctx, idx, diff.Key, diff.Base, nil) 1387 } 1388 default: 1389 // Any changes to the left-side of the merge are not needed, since we currently 1390 // always default to using the left side of the merge as the final result, so all 1391 // left-side changes are already there. This won't always be the case though! We'll 1392 // eventually want to optimize the merge side we choose for applying changes and 1393 // will need to update this code. 1394 } 1395 if err != nil { 1396 return err 1397 } 1398 } 1399 return nil 1400 } 1401 1402 // finalize reifies edits into output index sets 1403 func (m *secondaryMerger) finalize(ctx context.Context) (durable.IndexSet, durable.IndexSet, error) { 1404 for _, idx := range m.leftIdxes { 1405 idxMap, err := idx.Map(ctx) 1406 if err != nil { 1407 return nil, nil, err 1408 } 1409 m.leftSet, err = m.leftSet.PutIndex(ctx, idx.Name, durable.IndexFromProllyMap(idxMap)) 1410 if err != nil { 1411 return nil, nil, err 1412 } 1413 } 1414 return m.leftSet, m.rightSet, nil 1415 } 1416 1417 // remapTuple takes the given |tuple| and the |desc| that describes its data, and uses |mapping| to map the tuple's 1418 // data into a new [][]byte, as indicated by the specified ordinal mapping. 1419 func remapTuple(tuple val.Tuple, desc val.TupleDesc, mapping val.OrdinalMapping) [][]byte { 1420 result := make([][]byte, len(mapping)) 1421 for to, from := range mapping { 1422 if from == -1 { 1423 continue 1424 } 1425 result[to] = desc.GetField(from, tuple) 1426 } 1427 1428 return result 1429 } 1430 1431 // remapTupleWithColumnDefaults takes the given |tuple| (and the |tupleDesc| that describes how to access its fields) 1432 // and uses |mapping| to map the tuple's data and return a new tuple. 1433 // |tm| provides high access to the name of the table currently being merged and associated node store. 1434 // |mergedSch| is the new schema of the table and is used to look up column default values to apply to any existing 1435 // rows when a new column is added as part of a merge. 1436 // |pool| is used to allocate memory for the new tuple. 1437 // |defaultExprs| is a slice of expressions that represent the default or generated values for all columns, with 1438 // indexes in the same order as the tuple provided. 1439 // |rightSide| indicates if the tuple came from the right side of the merge; this is needed to determine if the tuple 1440 // data needs to be converted from the old schema type to a changed schema type. 1441 func remapTupleWithColumnDefaults( 1442 ctx *sql.Context, 1443 keyTuple, valueTuple val.Tuple, 1444 valDesc val.TupleDesc, 1445 mapping val.OrdinalMapping, 1446 tm *TableMerger, 1447 rowSch schema.Schema, 1448 mergedSch schema.Schema, 1449 defaultExprs []sql.Expression, 1450 pool pool.BuffPool, 1451 rightSide bool, 1452 ) (val.Tuple, error) { 1453 tb := val.NewTupleBuilder(mergedSch.GetValueDescriptor()) 1454 1455 var secondPass []int 1456 for to, from := range mapping { 1457 col := mergedSch.GetNonPKCols().GetByStoredIndex(to) 1458 if from == -1 { 1459 // If the column is a new column, then look up any default or generated value in a second pass, after the 1460 // non-default and non-generated fields have been established. Virtual columns have been excluded, so any 1461 // generated column is stored. 1462 if col.Default != "" || col.Generated != "" || col.OnUpdate != "" { 1463 secondPass = append(secondPass, to) 1464 } 1465 } else { 1466 var value any 1467 var err error 1468 // Generated column values need to be regenerated after the merge 1469 if col.Generated != "" { 1470 secondPass = append(secondPass, to) 1471 } 1472 1473 value, err = tree.GetField(ctx, valDesc, from, valueTuple, tm.ns) 1474 if err != nil { 1475 return nil, err 1476 } 1477 1478 // If the type has changed, then call convert to convert the value to the new type 1479 value, err = convertValueToNewType(value, col.TypeInfo, tm, from, rightSide) 1480 if err != nil { 1481 return nil, err 1482 } 1483 1484 err = tree.PutField(ctx, tm.ns, tb, to, value) 1485 if err != nil { 1486 return nil, err 1487 } 1488 } 1489 } 1490 1491 for _, to := range secondPass { 1492 col := mergedSch.GetNonPKCols().GetByStoredIndex(to) 1493 err := writeTupleExpression(ctx, keyTuple, valueTuple, defaultExprs[to], col, rowSch, tm, tb, to) 1494 if err != nil { 1495 return nil, err 1496 } 1497 } 1498 1499 return tb.Build(pool), nil 1500 } 1501 1502 // writeTupleExpression attempts to evaluate the expression string |exprString| against the row provided and write it 1503 // to the provided index in the tuple builder. This is necessary for column default values and generated columns. 1504 func writeTupleExpression( 1505 ctx *sql.Context, 1506 keyTuple val.Tuple, 1507 valueTuple val.Tuple, 1508 expr sql.Expression, 1509 col schema.Column, 1510 sch schema.Schema, 1511 tm *TableMerger, 1512 tb *val.TupleBuilder, 1513 colIdx int, 1514 ) error { 1515 if !expr.Resolved() { 1516 return ErrUnableToMergeColumnDefaultValue.New(expr.String(), tm.name) 1517 } 1518 1519 row, err := index.BuildRow(ctx, keyTuple, valueTuple, sch, tm.ns) 1520 if err != nil { 1521 return err 1522 } 1523 1524 value, err := expr.Eval(ctx, row) 1525 if err != nil { 1526 return err 1527 } 1528 1529 value, _, err = col.TypeInfo.ToSqlType().Convert(value) 1530 if err != nil { 1531 return err 1532 } 1533 1534 return tree.PutField(ctx, tm.ns, tb, colIdx, value) 1535 } 1536 1537 // convertValueToNewType handles converting a value from a previous type into a new type. |value| is the value from 1538 // the previous schema, |newTypeInfo| is the type info for the value in the new schema, |tm| is the TableMerger 1539 // instance that describes how the table is being merged, |from| is the field position in the value tuple from the 1540 // previous schema, and |rightSide| indicates whether the previous type info can be found on the right side of the merge 1541 // or the left side. If the previous type info is the same as the current type info for the merged schema, then this 1542 // function is a no-op and simply returns |value|. The converted value along with any unexpected error encountered is 1543 // returned. 1544 func convertValueToNewType(value interface{}, newTypeInfo typeinfo.TypeInfo, tm *TableMerger, from int, rightSide bool) (interface{}, error) { 1545 var previousTypeInfo typeinfo.TypeInfo 1546 if rightSide { 1547 previousTypeInfo = tm.rightSch.GetNonPKCols().GetByIndex(from).TypeInfo 1548 } else { 1549 previousTypeInfo = tm.leftSch.GetNonPKCols().GetByIndex(from).TypeInfo 1550 } 1551 1552 if newTypeInfo.Equals(previousTypeInfo) { 1553 return value, nil 1554 } 1555 1556 // If the type has changed, then call convert to convert the value to the new type 1557 newValue, inRange, err := newTypeInfo.ToSqlType().Convert(value) 1558 if err != nil { 1559 return nil, err 1560 } 1561 if !inRange { 1562 return nil, fmt.Errorf("out of range conversion for value %v to type %s", value, newTypeInfo.String()) 1563 } 1564 return newValue, nil 1565 } 1566 1567 func mergeTableArtifacts(ctx context.Context, tm *TableMerger, mergeTbl *doltdb.Table) (*doltdb.Table, error) { 1568 la, err := tm.leftTbl.GetArtifacts(ctx) 1569 if err != nil { 1570 return nil, err 1571 } 1572 left := durable.ProllyMapFromArtifactIndex(la) 1573 1574 ra, err := tm.rightTbl.GetArtifacts(ctx) 1575 if err != nil { 1576 return nil, err 1577 } 1578 right := durable.ProllyMapFromArtifactIndex(ra) 1579 1580 aa, err := tm.ancTbl.GetArtifacts(ctx) 1581 if err != nil { 1582 return nil, err 1583 } 1584 anc := durable.ProllyMapFromArtifactIndex(aa) 1585 1586 var keyCollision bool 1587 collide := func(l, r tree.Diff) (tree.Diff, bool) { 1588 if l.Type == r.Type && bytes.Equal(l.To, r.To) { 1589 return l, true // convergent edit 1590 } 1591 keyCollision = true 1592 return tree.Diff{}, false 1593 } 1594 1595 ma, err := prolly.MergeArtifactMaps(ctx, left, right, anc, collide) 1596 if err != nil { 1597 return nil, err 1598 } 1599 idx := durable.ArtifactIndexFromProllyMap(ma) 1600 1601 if keyCollision { 1602 return nil, fmt.Errorf("encountered a key collision when merging the artifacts for table %s", tm.name) 1603 } 1604 1605 return mergeTbl.SetArtifacts(ctx, idx) 1606 } 1607 1608 // valueMerger attempts to resolve three-ways diffs on the same 1609 // key but with conflicting values. A successful resolve produces 1610 // a three-way cell edit (tree.DiffOpDivergentModifyResolved). 1611 type valueMerger struct { 1612 numCols int 1613 baseVD, leftVD, rightVD, resultVD val.TupleDesc 1614 leftSchema, rightSchema, resultSchema schema.Schema 1615 leftMapping, rightMapping, baseMapping val.OrdinalMapping 1616 baseToLeftMapping val.OrdinalMapping 1617 baseToRightMapping val.OrdinalMapping 1618 baseToResultMapping val.OrdinalMapping 1619 syncPool pool.BuffPool 1620 keyless bool 1621 ns tree.NodeStore 1622 } 1623 1624 func newValueMerger(merged, leftSch, rightSch, baseSch schema.Schema, syncPool pool.BuffPool, ns tree.NodeStore) *valueMerger { 1625 leftMapping, rightMapping, baseMapping := generateSchemaMappings(merged, leftSch, rightSch, baseSch) 1626 1627 baseToLeftMapping, baseToRightMapping, baseToResultMapping := generateSchemaMappings(baseSch, leftSch, rightSch, merged) 1628 1629 return &valueMerger{ 1630 numCols: merged.GetNonPKCols().StoredSize(), 1631 baseVD: baseSch.GetValueDescriptor(), 1632 rightVD: rightSch.GetValueDescriptor(), 1633 resultVD: merged.GetValueDescriptor(), 1634 leftVD: leftSch.GetValueDescriptor(), 1635 resultSchema: merged, 1636 leftMapping: leftMapping, 1637 rightMapping: rightMapping, 1638 baseMapping: baseMapping, 1639 baseToLeftMapping: baseToLeftMapping, 1640 baseToRightMapping: baseToRightMapping, 1641 baseToResultMapping: baseToResultMapping, 1642 leftSchema: leftSch, 1643 rightSchema: rightSch, 1644 syncPool: syncPool, 1645 keyless: schema.IsKeyless(merged), 1646 ns: ns, 1647 } 1648 } 1649 1650 // generateSchemaMappings returns three schema mappings: 1) mapping the |leftSch| to |mergedSch|, 1651 // 2) mapping |rightSch| to |mergedSch|, and 3) mapping |baseSch| to |mergedSch|. Columns are 1652 // mapped from the source schema to destination schema by finding an identical tag, or if no 1653 // identical tag is found, then falling back to a match on column name and type. 1654 func generateSchemaMappings(mergedSch, leftSch, rightSch, baseSch schema.Schema) (leftMapping, rightMapping, baseMapping val.OrdinalMapping) { 1655 n := mergedSch.GetNonPKCols().StoredSize() 1656 leftMapping = make(val.OrdinalMapping, n) 1657 rightMapping = make(val.OrdinalMapping, n) 1658 baseMapping = make(val.OrdinalMapping, n) 1659 1660 i := 0 1661 for _, col := range mergedSch.GetNonPKCols().GetColumns() { 1662 if col.Virtual { 1663 continue 1664 } 1665 leftMapping[i] = findNonPKColumnMappingByTagOrName(leftSch, col) 1666 rightMapping[i] = findNonPKColumnMappingByTagOrName(rightSch, col) 1667 baseMapping[i] = findNonPKColumnMappingByTagOrName(baseSch, col) 1668 i++ 1669 } 1670 1671 return leftMapping, rightMapping, baseMapping 1672 } 1673 1674 // findNonPKColumnMappingByName returns the index of the column with the given name in the given schema, or -1 if it 1675 // doesn't exist. 1676 func findNonPKColumnMappingByName(sch schema.Schema, name string) int { 1677 leftNonPKCols := sch.GetNonPKCols() 1678 if leftNonPKCols.Contains(name) { 1679 return leftNonPKCols.IndexOf(name) 1680 } else { 1681 return -1 1682 } 1683 } 1684 1685 // findNonPKColumnMappingByTagOrName returns the index of the column with the given tag in the given schema. If a 1686 // matching tag is not found, then this function falls back to looking for a matching column by name. If no 1687 // matching column is found, then this function returns -1. 1688 func findNonPKColumnMappingByTagOrName(sch schema.Schema, col schema.Column) int { 1689 if idx, ok := sch.GetNonPKCols().StoredIndexByTag(col.Tag); ok { 1690 return idx 1691 } else { 1692 return findNonPKColumnMappingByName(sch, col.Name) 1693 } 1694 } 1695 1696 // tryMerge performs a cell-wise merge given left, right, and base cell value 1697 // tuples. It returns the merged cell value tuple and a bool indicating if a 1698 // conflict occurred. tryMerge should only be called if left and right produce 1699 // non-identical diffs against base. 1700 func (m *valueMerger) tryMerge(ctx *sql.Context, left, right, base val.Tuple) (val.Tuple, bool, error) { 1701 // If we're merging a keyless table and the keys match, but the values are different, 1702 // that means that the row data is the same, but the cardinality has changed, and if the 1703 // cardinality has changed in different ways on each merge side, we can't auto resolve. 1704 if m.keyless { 1705 return nil, false, nil 1706 } 1707 1708 for i := 0; i < len(m.baseToRightMapping); i++ { 1709 isConflict, err := m.processBaseColumn(ctx, i, left, right, base) 1710 if err != nil { 1711 return nil, false, err 1712 } 1713 if isConflict { 1714 return nil, false, nil 1715 } 1716 } 1717 1718 if base != nil && (left == nil) != (right == nil) { 1719 // One row deleted, the other modified 1720 // We just validated that this is not a conflict. 1721 return nil, true, nil 1722 } 1723 1724 mergedValues := make([][]byte, m.numCols) 1725 for i := 0; i < m.numCols; i++ { 1726 v, isConflict, err := m.processColumn(ctx, i, left, right, base) 1727 if err != nil { 1728 return nil, false, err 1729 } 1730 if isConflict { 1731 return nil, false, nil 1732 } 1733 mergedValues[i] = v 1734 } 1735 1736 return val.NewTuple(m.syncPool, mergedValues...), true, nil 1737 } 1738 1739 // processBaseColumn returns whether column |i| of the base schema, 1740 // if removed on one side, causes a conflict when merged with the other side. 1741 func (m *valueMerger) processBaseColumn(ctx context.Context, i int, left, right, base val.Tuple) (conflict bool, err error) { 1742 if base == nil { 1743 // We're resolving an insertion. This can be done entirely in `processColumn`. 1744 return false, nil 1745 } 1746 baseCol := base.GetField(i) 1747 1748 if left == nil { 1749 // Left side deleted the row. Thus, right side must have modified the row in order for there to be a conflict to resolve. 1750 rightCol, rightColIdx, rightColExists := getColumn(&right, &m.baseToRightMapping, i) 1751 1752 if !rightColExists { 1753 // Right side deleted the column while left side deleted the row. This is not a conflict. 1754 return false, nil 1755 } 1756 // This is a conflict if the value on the right changed. 1757 // But if the right side only changed its representation (from ALTER COLUMN) and still has the same value, 1758 // then this can be resolved. 1759 baseCol, err = convert(ctx, m.baseVD, m.rightVD, m.rightSchema, i, rightColIdx, base, baseCol, m.ns) 1760 if err != nil { 1761 return false, err 1762 } 1763 if isEqual(i, baseCol, rightCol, m.rightVD.Types[rightColIdx]) { 1764 // right column did not change, so there is no conflict. 1765 return false, nil 1766 } 1767 // conflicting modifications 1768 return true, nil 1769 } 1770 1771 if right == nil { 1772 // Right side deleted the row. Thus, left side must have modified the row in order for there to be a conflict to resolve. 1773 leftCol, leftColIdx, leftColExists := getColumn(&left, &m.baseToLeftMapping, i) 1774 1775 if !leftColExists { 1776 // Left side deleted the column while right side deleted the row. This is not a conflict. 1777 return false, nil 1778 } 1779 // This is a conflict if the value on the left changed. 1780 // But if the left side only changed its representation (from ALTER COLUMN) and still has the same value, 1781 // then this can be resolved. 1782 baseCol, err = convert(ctx, m.baseVD, m.leftVD, m.leftSchema, i, leftColIdx, base, baseCol, m.ns) 1783 if err != nil { 1784 return false, err 1785 } 1786 if isEqual(i, baseCol, leftCol, m.leftVD.Types[leftColIdx]) { 1787 // left column did not change, so there is no conflict. 1788 return false, nil 1789 } 1790 // conflicting modifications 1791 return true, nil 1792 } 1793 1794 rightCol, rightColIdx, rightColExists := getColumn(&right, &m.baseToRightMapping, i) 1795 1796 leftCol, leftColIdx, leftColExists := getColumn(&left, &m.baseToLeftMapping, i) 1797 1798 if leftColExists && rightColExists { 1799 // This column also exists in the merged schema, and will be processed there. 1800 return false, nil 1801 } 1802 1803 if !leftColExists && !rightColExists { 1804 // This column is a convergent deletion. There is no conflict. 1805 return false, nil 1806 } 1807 1808 var modifiedCol []byte 1809 var modifiedColIdx int 1810 var modifiedSchema schema.Schema 1811 var modifiedVD val.TupleDesc 1812 if !leftColExists { 1813 modifiedCol, modifiedColIdx = rightCol, rightColIdx 1814 modifiedSchema = m.rightSchema 1815 modifiedVD = m.rightVD 1816 } else { 1817 modifiedCol, modifiedColIdx = leftCol, leftColIdx 1818 modifiedSchema = m.leftSchema 1819 modifiedVD = m.leftVD 1820 } 1821 1822 baseCol, err = convert(ctx, m.baseVD, modifiedVD, modifiedSchema, i, modifiedColIdx, base, baseCol, m.ns) 1823 if err != nil { 1824 return false, err 1825 } 1826 if modifiedVD.Comparator().CompareValues(i, baseCol, modifiedCol, modifiedVD.Types[modifiedColIdx]) == 0 { 1827 return false, nil 1828 } 1829 return true, nil 1830 } 1831 1832 // processColumn returns the merged value of column |i| of the merged schema, 1833 // based on the |left|, |right|, and |base| schema. 1834 func (m *valueMerger) processColumn(ctx *sql.Context, i int, left, right, base val.Tuple) (result []byte, conflict bool, err error) { 1835 // missing columns are coerced into NULL column values 1836 1837 var baseCol []byte 1838 var baseColIdx = -1 1839 var baseColExists = false 1840 if base != nil { 1841 baseCol, baseColIdx, baseColExists = getColumn(&base, &m.baseMapping, i) 1842 } 1843 leftCol, leftColIdx, leftColExists := getColumn(&left, &m.leftMapping, i) 1844 rightCol, rightColIdx, rightColExists := getColumn(&right, &m.rightMapping, i) 1845 resultType := m.resultVD.Types[i] 1846 resultColumn := m.resultSchema.GetNonPKCols().GetByIndex(i) 1847 generatedColumn := resultColumn.Generated != "" 1848 1849 sqlType := m.resultSchema.GetNonPKCols().GetByIndex(i).TypeInfo.ToSqlType() 1850 1851 // We previously asserted that left and right are not nil. 1852 // But base can be nil in the event of convergent inserts. 1853 if base == nil || !baseColExists { 1854 // There are two possible cases: 1855 // - The base row doesn't exist, or 1856 // - The column doesn't exist in the base row 1857 // Regardless, both left and right are inserts, or one is an insert and the other doesn't exist. 1858 1859 if !rightColExists { 1860 return leftCol, false, nil 1861 } 1862 1863 rightCol, err = convert(ctx, m.rightVD, m.resultVD, m.resultSchema, rightColIdx, i, right, rightCol, m.ns) 1864 if err != nil { 1865 return nil, false, err 1866 } 1867 1868 if !leftColExists { 1869 return rightCol, false, nil 1870 } 1871 1872 leftCol, err = convert(ctx, m.leftVD, m.resultVD, m.resultSchema, leftColIdx, i, left, leftCol, m.ns) 1873 if err != nil { 1874 return nil, false, err 1875 } 1876 1877 if isEqual(i, leftCol, rightCol, resultType) { 1878 // Columns are equal, returning either would be correct. 1879 // However, for certain types the two columns may have different bytes. 1880 // We need to ensure that merges are deterministic regardless of the merge direction. 1881 // To achieve this, we sort the two values and return the higher one. 1882 if bytes.Compare(leftCol, rightCol) > 0 { 1883 return leftCol, false, nil 1884 } 1885 return rightCol, false, nil 1886 } 1887 1888 // generated columns will be updated as part of the merge later on, so choose either value for now 1889 if generatedColumn { 1890 return leftCol, false, nil 1891 } 1892 1893 // conflicting inserts 1894 return nil, true, nil 1895 } 1896 1897 // We can now assume that both left are right contain byte-level changes to an existing column. 1898 // But we need to know if those byte-level changes represent a modification to the underlying value, 1899 // and whether those changes represent the *same* modification, otherwise there's a conflict. 1900 1901 // We can't just look at the bytes to determine this, because if a cell's byte representation changed, 1902 // but only because of a schema change, we shouldn't consider that a conflict. 1903 // Conversely, if there was a schema change on only one side, we shouldn't consider the cells equal 1904 // even if they have the same bytes. 1905 1906 // Thus, we must convert all cells to the type in the result schema before comparing them. 1907 1908 if baseCol != nil { 1909 baseCol, err = convert(ctx, m.baseVD, m.resultVD, m.resultSchema, baseColIdx, i, base, baseCol, m.ns) 1910 if err != nil { 1911 return nil, false, err 1912 } 1913 } 1914 1915 var leftModified, rightModified bool 1916 1917 if leftColIdx == -1 && rightColIdx == -1 { 1918 // Both branches are implicitly NULL 1919 return nil, false, err 1920 } 1921 1922 if rightColIdx == -1 { 1923 // The right branch is implicitly NULL 1924 rightModified = baseCol != nil 1925 } else { 1926 // Attempt to convert the right column to match the result schema, then compare it to the base. 1927 rightCol, err = convert(ctx, m.rightVD, m.resultVD, m.resultSchema, rightColIdx, i, right, rightCol, m.ns) 1928 if err != nil { 1929 return nil, true, nil 1930 } 1931 rightModified = !isEqual(i, rightCol, baseCol, resultType) 1932 } 1933 1934 leftCol, err = convert(ctx, m.leftVD, m.resultVD, m.resultSchema, leftColIdx, i, left, leftCol, m.ns) 1935 if err != nil { 1936 return nil, true, nil 1937 } 1938 if isEqual(i, leftCol, rightCol, resultType) { 1939 // Columns are equal, returning either would be correct. 1940 // However, for certain types the two columns may have different bytes. 1941 // We need to ensure that merges are deterministic regardless of the merge direction. 1942 // To achieve this, we sort the two values and return the higher one. 1943 if bytes.Compare(leftCol, rightCol) > 0 { 1944 return leftCol, false, nil 1945 } 1946 return rightCol, false, nil 1947 } 1948 1949 leftModified = !isEqual(i, leftCol, baseCol, resultType) 1950 1951 switch { 1952 case leftModified && rightModified: 1953 // generated columns will be updated as part of the merge later on, so choose either value for now 1954 if generatedColumn { 1955 return leftCol, false, nil 1956 } 1957 // concurrent modification 1958 // if the result type is JSON, we can attempt to merge the JSON changes. 1959 dontMergeJsonVar, err := ctx.Session.GetSessionVariable(ctx, "dolt_dont_merge_json") 1960 if err != nil { 1961 return nil, true, err 1962 } 1963 disallowJsonMerge, err := sql.ConvertToBool(ctx, dontMergeJsonVar) 1964 if err != nil { 1965 return nil, true, err 1966 } 1967 if _, ok := sqlType.(types.JsonType); ok && !disallowJsonMerge { 1968 return m.mergeJSONAddr(ctx, baseCol, leftCol, rightCol) 1969 } 1970 // otherwise, this is a conflict. 1971 return nil, true, nil 1972 case leftModified: 1973 return leftCol, false, nil 1974 default: 1975 return rightCol, false, nil 1976 } 1977 } 1978 1979 func (m *valueMerger) mergeJSONAddr(ctx context.Context, baseAddr []byte, leftAddr []byte, rightAddr []byte) (resultAddr []byte, conflict bool, err error) { 1980 baseDoc, err := tree.NewJSONDoc(hash.New(baseAddr), m.ns).ToJSONDocument(ctx) 1981 if err != nil { 1982 return nil, true, err 1983 } 1984 leftDoc, err := tree.NewJSONDoc(hash.New(leftAddr), m.ns).ToJSONDocument(ctx) 1985 if err != nil { 1986 return nil, true, err 1987 } 1988 rightDoc, err := tree.NewJSONDoc(hash.New(rightAddr), m.ns).ToJSONDocument(ctx) 1989 if err != nil { 1990 return nil, true, err 1991 } 1992 1993 mergedDoc, conflict, err := mergeJSON(baseDoc, leftDoc, rightDoc) 1994 if err != nil { 1995 return nil, true, err 1996 } 1997 if conflict { 1998 return nil, true, nil 1999 } 2000 2001 mergedVal, err := mergedDoc.ToInterface() 2002 if err != nil { 2003 return nil, true, err 2004 } 2005 mergedBytes, err := json.Marshal(mergedVal) 2006 if err != nil { 2007 return nil, true, err 2008 } 2009 mergedAddr, err := tree.SerializeBytesToAddr(ctx, m.ns, bytes.NewReader(mergedBytes), len(mergedBytes)) 2010 if err != nil { 2011 return nil, true, err 2012 } 2013 return mergedAddr[:], false, nil 2014 2015 } 2016 2017 func mergeJSON(base types.JSONDocument, left types.JSONDocument, right types.JSONDocument) (resultDoc types.JSONDocument, conflict bool, err error) { 2018 // First, deserialize each value into JSON. 2019 // We can only merge if the value at all three commits is a JSON object. 2020 2021 baseObject, baseIsObject := base.Val.(types.JsonObject) 2022 leftObject, leftIsObject := left.Val.(types.JsonObject) 2023 rightObject, rightIsObject := right.Val.(types.JsonObject) 2024 2025 if !baseIsObject || !leftIsObject || !rightIsObject { 2026 // At least one of the commits does not have a JSON object. 2027 // If both left and right have the same value, use that value. 2028 // But if they differ, this is an unresolvable merge conflict. 2029 cmp, err := left.Compare(right) 2030 if err != nil { 2031 return types.JSONDocument{}, true, err 2032 } 2033 if cmp == 0 { 2034 //convergent operation. 2035 return left, false, nil 2036 } else { 2037 return types.JSONDocument{}, true, nil 2038 } 2039 } 2040 2041 mergedObject := maps.Clone(leftObject) 2042 merged := types.JSONDocument{Val: mergedObject} 2043 2044 threeWayDiffer := NewThreeWayJsonDiffer(baseObject, leftObject, rightObject) 2045 2046 // Compute the merged object by applying diffs to the left object as needed. 2047 for { 2048 threeWayDiff, err := threeWayDiffer.Next() 2049 if err == io.EOF { 2050 return merged, false, nil 2051 } 2052 2053 switch threeWayDiff.Op { 2054 case tree.DiffOpRightAdd, tree.DiffOpConvergentAdd, tree.DiffOpRightModify, tree.DiffOpConvergentModify: 2055 _, _, err := merged.Set(threeWayDiff.Key, threeWayDiff.Right) 2056 if err != nil { 2057 return types.JSONDocument{}, true, err 2058 } 2059 case tree.DiffOpRightDelete, tree.DiffOpConvergentDelete: 2060 _, _, err := merged.Remove(threeWayDiff.Key) 2061 if err != nil { 2062 return types.JSONDocument{}, true, err 2063 } 2064 case tree.DiffOpLeftAdd, tree.DiffOpLeftModify, tree.DiffOpLeftDelete: 2065 // these changes already exist on the left, so do nothing. 2066 case tree.DiffOpDivergentModifyConflict, tree.DiffOpDivergentDeleteConflict: 2067 return types.JSONDocument{}, true, nil 2068 default: 2069 panic("unreachable") 2070 } 2071 } 2072 } 2073 2074 func isEqual(i int, left []byte, right []byte, resultType val.Type) bool { 2075 // We use a default comparator instead of the comparator in the schema. 2076 // This is necessary to force a binary collation for string comparisons. 2077 return val.DefaultTupleComparator{}.CompareValues(i, left, right, resultType) == 0 2078 } 2079 2080 func getColumn(tuple *val.Tuple, mapping *val.OrdinalMapping, idx int) (col []byte, colIndex int, exists bool) { 2081 colIdx := (*mapping)[idx] 2082 if colIdx == -1 { 2083 return nil, -1, false 2084 } 2085 return tuple.GetField(colIdx), colIdx, true 2086 } 2087 2088 // convert takes the `i`th column in the provided tuple and converts it to the type specified in the provided schema. 2089 // returns the new representation, and a bool indicating success. 2090 func convert(ctx context.Context, fromDesc, toDesc val.TupleDesc, toSchema schema.Schema, fromIndex, toIndex int, tuple val.Tuple, originalValue []byte, ns tree.NodeStore) ([]byte, error) { 2091 if fromDesc.Types[fromIndex] == toDesc.Types[toIndex] { 2092 // No conversion is necessary here. 2093 return originalValue, nil 2094 } 2095 parsedCell, err := tree.GetField(ctx, fromDesc, fromIndex, tuple, ns) 2096 if err != nil { 2097 return nil, err 2098 } 2099 sqlType := toSchema.GetNonPKCols().GetByIndex(toIndex).TypeInfo.ToSqlType() 2100 convertedCell, _, err := sqlType.Convert(parsedCell) 2101 if err != nil { 2102 return nil, err 2103 } 2104 typ := toDesc.Types[toIndex] 2105 // If a merge results in assigning NULL to a non-null column, don't panic. 2106 // Instead we validate the merged tuple before merging it into the table. 2107 typ.Nullable = true 2108 return tree.Serialize(ctx, ns, typ, convertedCell) 2109 }