github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/merge/merge_noms_rows.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package merge 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "strings" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 "golang.org/x/sync/errgroup" 25 26 "github.com/dolthub/dolt/go/libraries/doltcore/conflict" 27 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 28 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 29 "github.com/dolthub/dolt/go/libraries/doltcore/row" 30 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 31 "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" 32 "github.com/dolthub/dolt/go/libraries/utils/valutil" 33 "github.com/dolthub/dolt/go/store/types" 34 ) 35 36 type rowMergeResult struct { 37 mergedRow types.Value 38 didCellMerge bool 39 isConflict bool 40 } 41 42 type rowMerger func(ctx context.Context, nbf *types.NomsBinFormat, sch schema.Schema, r, mergeRow, baseRow types.Value) (rowMergeResult, error) 43 44 type applicator func(ctx context.Context, sch schema.Schema, tableEditor editor.TableEditor, rowData types.Map, stats *MergeStats, change types.ValueChanged) error 45 46 // mergeNomsTable merges a Noms table, which includes updating row data, secondary index data, and applying the specified |mergedSch|. 47 func mergeNomsTable(ctx *sql.Context, tm *TableMerger, mergedSch schema.Schema, vrw types.ValueReadWriter, opts editor.Options) (*doltdb.Table, *MergeStats, error) { 48 // For schema changes on Nom data, we don't need to migrate the existing data, because each column is mapped by 49 // a hash key, and isn't a direct []byte lookup, like with Prolly storage, so it's safe to immediately update 50 // the table to the merged schema. 51 mergeTbl, err := tm.leftTbl.UpdateSchema(ctx, mergedSch) 52 if err != nil { 53 return nil, nil, err 54 } 55 56 // If any indexes were added during the merge, then we need to generate their row data to add to our updated table. 57 addedIndexesSet := make(map[string]string) 58 for _, index := range mergedSch.Indexes().AllIndexes() { 59 addedIndexesSet[strings.ToLower(index.Name())] = index.Name() 60 } 61 for _, index := range tm.leftSch.Indexes().AllIndexes() { 62 delete(addedIndexesSet, strings.ToLower(index.Name())) 63 } 64 for _, addedIndex := range addedIndexesSet { 65 newIndexData, err := editor.RebuildIndex(ctx, mergeTbl, addedIndex, opts) 66 if err != nil { 67 return nil, nil, err 68 } 69 mergeTbl, err = mergeTbl.SetNomsIndexRows(ctx, addedIndex, newIndexData) 70 if err != nil { 71 return nil, nil, err 72 } 73 } 74 75 updatedTblEditor, err := editor.NewTableEditor(ctx, mergeTbl, mergedSch, tm.name, opts) 76 if err != nil { 77 return nil, nil, err 78 } 79 80 rows, err := tm.leftTbl.GetNomsRowData(ctx) 81 if err != nil { 82 return nil, nil, err 83 } 84 85 mergeRows, err := tm.rightTbl.GetNomsRowData(ctx) 86 if err != nil { 87 return nil, nil, err 88 } 89 90 ancRows, err := tm.ancTbl.GetRowData(ctx) 91 if err != nil { 92 return nil, nil, err 93 } 94 95 resultTbl, cons, stats, err := mergeNomsTableData(ctx, vrw, tm.name, mergedSch, rows, mergeRows, durable.NomsMapFromIndex(ancRows), updatedTblEditor) 96 if err != nil { 97 return nil, nil, err 98 } 99 100 if cons.Len() > 0 { 101 resultTbl, err = setConflicts(ctx, durable.ConflictIndexFromNomsMap(cons, vrw), tm.leftTbl, tm.rightTbl, tm.ancTbl, resultTbl) 102 if err != nil { 103 return nil, nil, err 104 } 105 stats.DataConflicts = int(cons.Len()) 106 } 107 108 resultTbl, err = mergeAutoIncrementValues(ctx, tm.leftTbl, tm.rightTbl, resultTbl) 109 if err != nil { 110 return nil, nil, err 111 } 112 113 return resultTbl, stats, nil 114 } 115 116 func mergeNomsTableData( 117 ctx *sql.Context, 118 vrw types.ValueReadWriter, 119 tblName string, 120 sch schema.Schema, 121 rows, mergeRows, ancRows types.Map, 122 tblEdit editor.TableEditor, 123 ) (*doltdb.Table, types.Map, *MergeStats, error) { 124 var rowMerge rowMerger 125 var applyChange applicator 126 if schema.IsKeyless(sch) { 127 rowMerge = keylessRowMerge 128 applyChange = applyKeylessChange 129 } else { 130 rowMerge = nomsPkRowMerge 131 applyChange = applyNomsPkChange 132 } 133 134 changeChan, mergeChangeChan := make(chan types.ValueChanged, 32), make(chan types.ValueChanged, 32) 135 136 originalCtx := ctx 137 eg, errGrCtx := errgroup.WithContext(ctx) 138 ctx = originalCtx.WithContext(errGrCtx) 139 140 eg.Go(func() error { 141 defer close(changeChan) 142 return rows.Diff(ctx, ancRows, changeChan) 143 }) 144 eg.Go(func() error { 145 defer close(mergeChangeChan) 146 return mergeRows.Diff(ctx, ancRows, mergeChangeChan) 147 }) 148 149 conflictValChan := make(chan types.Value) 150 sm := types.NewStreamingMap(ctx, vrw, conflictValChan) 151 stats := &MergeStats{Operation: TableModified} 152 153 eg.Go(func() error { 154 defer close(conflictValChan) 155 156 var change, mergeChange types.ValueChanged 157 for { 158 // Get the next change from both a and b. If either diff(a, parent) or diff(b, parent) is 159 // complete, aChange or bChange will get an empty types.ValueChanged containing a nil Value. 160 // Generally, though, this allows us to proceed through both diffs in (key) order, considering 161 // the "current" change from both diffs at the same time. 162 if change.Key == nil { 163 select { 164 case change = <-changeChan: 165 break 166 case <-ctx.Done(): 167 return ctx.Err() 168 } 169 } 170 if mergeChange.Key == nil { 171 select { 172 case mergeChange = <-mergeChangeChan: 173 break 174 case <-ctx.Done(): 175 return ctx.Err() 176 } 177 } 178 179 key, mergeKey := change.Key, mergeChange.Key 180 181 // Both channels are producing zero values, so we're done. 182 if key == nil && mergeKey == nil { 183 break 184 } 185 186 var err error 187 var processed bool 188 if key != nil { 189 mkNilOrKeyLess := mergeKey == nil 190 if !mkNilOrKeyLess { 191 mkNilOrKeyLess, err = key.Less(ctx, vrw.Format(), mergeKey) 192 if err != nil { 193 return err 194 } 195 } 196 197 if mkNilOrKeyLess { 198 // change will already be in the map 199 // we apply changes directly to "ours" 200 // instead of to ancestor 201 change = types.ValueChanged{} 202 processed = true 203 } 204 } 205 206 if !processed && mergeKey != nil { 207 keyNilOrMKLess := key == nil 208 if !keyNilOrMKLess { 209 keyNilOrMKLess, err = mergeKey.Less(ctx, vrw.Format(), key) 210 if err != nil { 211 return err 212 } 213 } 214 215 if keyNilOrMKLess { 216 err = applyChange(ctx, sch, tblEdit, rows, stats, mergeChange) 217 if err != nil { 218 return err 219 } 220 mergeChange = types.ValueChanged{} 221 processed = true 222 } 223 } 224 225 if !processed { 226 r, mergeRow, ancRow := change.NewValue, mergeChange.NewValue, change.OldValue 227 rowMergeResult, err := rowMerge(ctx, vrw.Format(), sch, r, mergeRow, ancRow) 228 if err != nil { 229 return err 230 } 231 if rowMergeResult.isConflict { 232 conflictTuple, err := conflict.NewConflict(ancRow, r, mergeRow).ToNomsList(vrw) 233 if err != nil { 234 return err 235 } 236 237 err = addConflict(conflictValChan, sm.Done(), key, conflictTuple) 238 if err != nil { 239 return err 240 } 241 } else { 242 vc := types.ValueChanged{ChangeType: change.ChangeType, Key: key, NewValue: rowMergeResult.mergedRow} 243 if rowMergeResult.didCellMerge { 244 vc.OldValue = r 245 } else { 246 vc.OldValue = ancRow 247 } 248 err = applyChange(ctx, sch, tblEdit, rows, stats, vc) 249 if err != nil { 250 return err 251 } 252 } 253 254 change = types.ValueChanged{} 255 mergeChange = types.ValueChanged{} 256 } 257 } 258 259 return nil 260 }) 261 262 var conflicts types.Map 263 eg.Go(func() error { 264 var err error 265 // |sm|'s errgroup is a child of |eg| 266 // so we must wait here, before |eg| finishes 267 conflicts, err = sm.Wait() 268 return err 269 }) 270 271 if err := eg.Wait(); err != nil { 272 return nil, types.EmptyMap, nil, err 273 } 274 275 mergedTable, err := tblEdit.Table(originalCtx) 276 if err != nil { 277 return nil, types.EmptyMap, nil, err 278 } 279 280 return mergedTable, conflicts, stats, nil 281 } 282 283 func addConflict(conflictChan chan types.Value, done <-chan struct{}, key types.Value, value types.Tuple) error { 284 select { 285 case conflictChan <- key: 286 case <-done: 287 return context.Canceled 288 } 289 select { 290 case conflictChan <- value: 291 case <-done: 292 return context.Canceled 293 } 294 return nil 295 } 296 297 func applyNomsPkChange(ctx context.Context, sch schema.Schema, tableEditor editor.TableEditor, rowData types.Map, stats *MergeStats, change types.ValueChanged) error { 298 switch change.ChangeType { 299 case types.DiffChangeAdded: 300 newRow, err := row.FromNoms(sch, change.Key.(types.Tuple), change.NewValue.(types.Tuple)) 301 if err != nil { 302 return err 303 } 304 // TODO(andy): because we apply changes to "ours" instead of ancestor 305 // we have to check for duplicate primary key errors here. 306 val, ok, err := rowData.MaybeGet(ctx, change.Key) 307 if err != nil { 308 return err 309 } else if ok { 310 oldRow, err := row.FromNoms(sch, change.Key.(types.Tuple), val.(types.Tuple)) 311 if err != nil { 312 return err 313 } 314 err = tableEditor.UpdateRow(ctx, oldRow, newRow, makeDupHandler(ctx, tableEditor, change.Key.(types.Tuple), change.NewValue.(types.Tuple))) 315 if err != nil { 316 if err != nil { 317 return err 318 } 319 } 320 } else { 321 err = tableEditor.InsertRow(ctx, newRow, makeDupHandler(ctx, tableEditor, change.Key.(types.Tuple), change.NewValue.(types.Tuple))) 322 if err != nil { 323 if err != nil { 324 return err 325 } 326 } 327 } 328 stats.Adds++ 329 case types.DiffChangeModified: 330 key, oldVal, newVal := change.Key.(types.Tuple), change.OldValue.(types.Tuple), change.NewValue.(types.Tuple) 331 oldRow, err := row.FromNoms(sch, key, oldVal) 332 if err != nil { 333 return err 334 } 335 newRow, err := row.FromNoms(sch, key, newVal) 336 if err != nil { 337 return err 338 } 339 err = tableEditor.UpdateRow(ctx, oldRow, newRow, makeDupHandler(ctx, tableEditor, key, newVal)) 340 if err != nil { 341 if err != nil { 342 return err 343 } 344 } 345 stats.Modifications++ 346 case types.DiffChangeRemoved: 347 key := change.Key.(types.Tuple) 348 value := change.OldValue.(types.Tuple) 349 tv, err := row.TaggedValuesFromTupleKeyAndValue(key, value) 350 if err != nil { 351 return err 352 } 353 354 err = tableEditor.DeleteByKey(ctx, key, tv) 355 if err != nil { 356 return err 357 } 358 359 stats.Deletes++ 360 } 361 362 return nil 363 } 364 365 func makeDupHandler(ctx context.Context, tableEditor editor.TableEditor, newKey, newValue types.Tuple) editor.PKDuplicateCb { 366 return func(keyString, indexName string, existingKey, existingValue types.Tuple, isPk bool) error { 367 if isPk { 368 return fmt.Errorf("duplicate key '%s'", keyString) 369 } 370 371 sch := tableEditor.Schema() 372 idx := sch.Indexes().GetByName(indexName) 373 m, err := makeUniqViolMeta(sch, idx) 374 if err != nil { 375 return err 376 } 377 d, err := json.Marshal(m) 378 if err != nil { 379 return err 380 } 381 382 err = addUniqueViolation(ctx, tableEditor, existingKey, existingValue, newKey, newValue, d) 383 if err != nil { 384 return err 385 } 386 387 return nil 388 } 389 } 390 391 func addUniqueViolation(ctx context.Context, tableEditor editor.TableEditor, existingKey, existingVal, newKey, newVal types.Tuple, jsonData []byte) error { 392 nomsJson, err := jsonDataToNomsValue(ctx, tableEditor.ValueReadWriter(), jsonData) 393 if err != nil { 394 return err 395 } 396 cvKey, cvVal, err := toConstraintViolationRow(ctx, CvType_UniqueIndex, nomsJson, newKey, newVal) 397 if err != nil { 398 return err 399 } 400 err = tableEditor.SetConstraintViolation(ctx, cvKey, cvVal) 401 if err != nil { 402 return err 403 } 404 405 cvKey, cvVal, err = toConstraintViolationRow(ctx, CvType_UniqueIndex, nomsJson, existingKey, existingVal) 406 if err != nil { 407 return err 408 } 409 err = tableEditor.SetConstraintViolation(ctx, cvKey, cvVal) 410 if err != nil { 411 return err 412 } 413 414 return nil 415 } 416 417 func applyKeylessChange(ctx context.Context, sch schema.Schema, tableEditor editor.TableEditor, _ types.Map, stats *MergeStats, change types.ValueChanged) (err error) { 418 apply := func(ch types.ValueChanged) error { 419 switch ch.ChangeType { 420 case types.DiffChangeAdded: 421 newRow, err := row.FromNoms(sch, ch.Key.(types.Tuple), ch.NewValue.(types.Tuple)) 422 if err != nil { 423 return err 424 } 425 err = tableEditor.InsertRow(ctx, newRow, nil) 426 if err != nil { 427 return err 428 } 429 stats.Adds++ 430 case types.DiffChangeModified: 431 oldRow, err := row.FromNoms(sch, ch.Key.(types.Tuple), ch.OldValue.(types.Tuple)) 432 if err != nil { 433 return err 434 } 435 newRow, err := row.FromNoms(sch, ch.Key.(types.Tuple), ch.NewValue.(types.Tuple)) 436 if err != nil { 437 return err 438 } 439 err = tableEditor.UpdateRow(ctx, oldRow, newRow, nil) 440 if err != nil { 441 return err 442 } 443 stats.Modifications++ 444 case types.DiffChangeRemoved: 445 key := change.Key.(types.Tuple) 446 value := change.OldValue.(types.Tuple) 447 tv, err := row.TaggedValuesFromTupleKeyAndValue(key, value) 448 if err != nil { 449 return err 450 } 451 452 err = tableEditor.DeleteByKey(ctx, key, tv) 453 if err != nil { 454 return err 455 } 456 457 stats.Deletes++ 458 } 459 return nil 460 } 461 462 var card uint64 463 change, card, err = convertValueChanged(change) 464 if err != nil { 465 return err 466 } 467 468 for card > 0 { 469 if err = apply(change); err != nil { 470 return err 471 } 472 card-- 473 } 474 return nil 475 } 476 477 func convertValueChanged(vc types.ValueChanged) (types.ValueChanged, uint64, error) { 478 var oldCard uint64 479 if vc.OldValue != nil { 480 v, err := vc.OldValue.(types.Tuple).Get(row.KeylessCardinalityValIdx) 481 if err != nil { 482 return vc, 0, err 483 } 484 oldCard = uint64(v.(types.Uint)) 485 } 486 487 var newCard uint64 488 if vc.NewValue != nil { 489 v, err := vc.NewValue.(types.Tuple).Get(row.KeylessCardinalityValIdx) 490 if err != nil { 491 return vc, 0, err 492 } 493 newCard = uint64(v.(types.Uint)) 494 } 495 496 switch vc.ChangeType { 497 case types.DiffChangeRemoved: 498 return vc, oldCard, nil 499 500 case types.DiffChangeAdded: 501 return vc, newCard, nil 502 503 case types.DiffChangeModified: 504 delta := int64(newCard) - int64(oldCard) 505 if delta > 0 { 506 vc.ChangeType = types.DiffChangeAdded 507 vc.OldValue = nil 508 return vc, uint64(delta), nil 509 } else if delta < 0 { 510 vc.ChangeType = types.DiffChangeRemoved 511 vc.NewValue = nil 512 return vc, uint64(-delta), nil 513 } else { 514 panic(fmt.Sprintf("diff with delta = 0 for key: %s", vc.Key.HumanReadableString())) 515 } 516 default: 517 return vc, 0, fmt.Errorf("unexpected DiffChange type %d", vc.ChangeType) 518 } 519 } 520 521 // pkRowMerge returns the merged value, if a cell-wise merge was performed, and whether a conflict occurred 522 func nomsPkRowMerge(ctx context.Context, nbf *types.NomsBinFormat, sch schema.Schema, r, mergeRow, baseRow types.Value) (rowMergeResult, error) { 523 var baseVals row.TaggedValues 524 if baseRow == nil { 525 if r.Equals(mergeRow) { 526 // same row added to both 527 return rowMergeResult{r, false, false}, nil 528 } 529 } else if r == nil && mergeRow == nil { 530 // same row removed from both 531 return rowMergeResult{nil, false, false}, nil 532 } else if r == nil || mergeRow == nil { 533 // removed from one and modified in another 534 return rowMergeResult{nil, false, true}, nil 535 } else { 536 var err error 537 baseVals, err = row.ParseTaggedValues(baseRow.(types.Tuple)) 538 539 if err != nil { 540 return rowMergeResult{}, err 541 } 542 } 543 544 rowVals, err := row.ParseTaggedValues(r.(types.Tuple)) 545 if err != nil { 546 return rowMergeResult{}, err 547 } 548 549 mergeVals, err := row.ParseTaggedValues(mergeRow.(types.Tuple)) 550 if err != nil { 551 return rowMergeResult{}, err 552 } 553 554 var didMerge bool 555 processTagFunc := func(tag uint64) (resultVal types.Value, isConflict bool) { 556 baseVal, _ := baseVals.Get(tag) 557 val, _ := rowVals.Get(tag) 558 mergeVal, _ := mergeVals.Get(tag) 559 560 if valutil.NilSafeEqCheck(val, mergeVal) { 561 return val, false 562 } else { 563 modified := !valutil.NilSafeEqCheck(val, baseVal) 564 mergeModified := !valutil.NilSafeEqCheck(mergeVal, baseVal) 565 switch { 566 case modified && mergeModified: 567 return nil, true 568 case modified: 569 didMerge = true 570 return val, false 571 default: 572 didMerge = true 573 return mergeVal, false 574 } 575 } 576 577 } 578 579 resultVals := make(row.TaggedValues) 580 581 var isConflict bool 582 err = sch.GetNonPKCols().Iter(func(tag uint64, _ schema.Column) (stop bool, err error) { 583 var val types.Value 584 val, isConflict = processTagFunc(tag) 585 resultVals[tag] = val 586 587 return isConflict, nil 588 }) 589 590 if err != nil { 591 return rowMergeResult{}, err 592 } 593 594 if isConflict { 595 return rowMergeResult{nil, false, true}, nil 596 } 597 598 tpl := resultVals.NomsTupleForNonPKCols(nbf, sch.GetNonPKCols()) 599 v, err := tpl.Value(ctx) 600 601 if err != nil { 602 return rowMergeResult{}, err 603 } 604 605 return rowMergeResult{v, didMerge, false}, nil 606 } 607 608 func keylessRowMerge(ctx context.Context, nbf *types.NomsBinFormat, sch schema.Schema, val, mergeVal, ancVal types.Value) (rowMergeResult, error) { 609 // both sides of the merge produced a diff for this key, 610 // so we always throw a conflict 611 return rowMergeResult{nil, false, true}, nil 612 } 613 614 func mergeAutoIncrementValues(ctx context.Context, tbl, otherTbl, resultTbl *doltdb.Table) (*doltdb.Table, error) { 615 // only need to check one table, no PK changes yet 616 sch, err := tbl.GetSchema(ctx) 617 if err != nil { 618 return nil, err 619 } 620 if !schema.HasAutoIncrement(sch) { 621 return resultTbl, nil 622 } 623 624 autoVal, err := tbl.GetAutoIncrementValue(ctx) 625 if err != nil { 626 return nil, err 627 } 628 mergeAutoVal, err := otherTbl.GetAutoIncrementValue(ctx) 629 if err != nil { 630 return nil, err 631 } 632 if autoVal < mergeAutoVal { 633 autoVal = mergeAutoVal 634 } 635 return resultTbl.SetAutoIncrementValue(ctx, autoVal) 636 }