github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/diff/table_deltas.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package diff 16 17 import ( 18 "context" 19 "fmt" 20 "sort" 21 22 "github.com/dolthub/go-mysql-server/sql" 23 24 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 25 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 26 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 27 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlfmt" 28 "github.com/dolthub/dolt/go/libraries/utils/set" 29 "github.com/dolthub/dolt/go/store/prolly/tree" 30 "github.com/dolthub/dolt/go/store/types" 31 ) 32 33 type TableDiffType int 34 35 const ( 36 AddedTable TableDiffType = iota 37 ModifiedTable 38 RenamedTable 39 RemovedTable 40 ) 41 42 const DBPrefix = "__DATABASE__" 43 44 type TableInfo struct { 45 Name string 46 Sch schema.Schema 47 CreateStmt string 48 } 49 50 // TableDelta represents the change of a single table between two roots. 51 // FromFKs and ToFKs contain Foreign Keys that constrain columns in this table, 52 // they do not contain Foreign Keys that reference this table. 53 type TableDelta struct { 54 FromName string 55 ToName string 56 FromTable *doltdb.Table 57 ToTable *doltdb.Table 58 FromNodeStore tree.NodeStore 59 ToNodeStore tree.NodeStore 60 FromVRW types.ValueReadWriter 61 ToVRW types.ValueReadWriter 62 FromSch schema.Schema 63 ToSch schema.Schema 64 FromFks []doltdb.ForeignKey 65 ToFks []doltdb.ForeignKey 66 ToFksParentSch map[string]schema.Schema 67 FromFksParentSch map[string]schema.Schema 68 } 69 70 type TableDeltaSummary struct { 71 DiffType string 72 DataChange bool 73 SchemaChange bool 74 TableName string 75 FromTableName string 76 ToTableName string 77 AlterStmts []string 78 } 79 80 // IsAdd returns true if the table was added between the fromRoot and toRoot. 81 func (tds TableDeltaSummary) IsAdd() bool { 82 return tds.FromTableName == "" && tds.ToTableName != "" 83 } 84 85 // IsDrop returns true if the table was dropped between the fromRoot and toRoot. 86 func (tds TableDeltaSummary) IsDrop() bool { 87 return tds.FromTableName != "" && tds.ToTableName == "" 88 } 89 90 // IsRename return true if the table was renamed between the fromRoot and toRoot. 91 func (tds TableDeltaSummary) IsRename() bool { 92 if tds.IsAdd() || tds.IsDrop() { 93 return false 94 } 95 return tds.FromTableName != tds.ToTableName 96 } 97 98 // GetStagedUnstagedTableDeltas represents staged and unstaged changes as TableDelta slices. 99 func GetStagedUnstagedTableDeltas(ctx context.Context, roots doltdb.Roots) (staged, unstaged []TableDelta, err error) { 100 staged, err = GetTableDeltas(ctx, roots.Head, roots.Staged) 101 if err != nil { 102 return nil, nil, err 103 } 104 105 unstaged, err = GetTableDeltas(ctx, roots.Staged, roots.Working) 106 if err != nil { 107 return nil, nil, err 108 } 109 110 return staged, unstaged, nil 111 } 112 113 // GetTableDeltas returns a slice of TableDelta objects for each table that changed between fromRoot and toRoot. 114 // It matches tables across roots by finding Schemas with Column tags in common. 115 func GetTableDeltas(ctx context.Context, fromRoot, toRoot doltdb.RootValue) (deltas []TableDelta, err error) { 116 fromVRW := fromRoot.VRW() 117 fromNS := fromRoot.NodeStore() 118 toVRW := toRoot.VRW() 119 toNS := toRoot.NodeStore() 120 121 fromDeltas := make([]TableDelta, 0) 122 err = fromRoot.IterTables(ctx, func(name string, tbl *doltdb.Table, sch schema.Schema) (stop bool, err error) { 123 c, err := fromRoot.GetForeignKeyCollection(ctx) 124 if err != nil { 125 return true, err 126 } 127 fks, _ := c.KeysForTable(name) 128 parentSchs, err := getFkParentSchs(ctx, fromRoot, fks...) 129 if err != nil { 130 return false, err 131 } 132 133 fromDeltas = append(fromDeltas, TableDelta{ 134 FromName: name, 135 FromTable: tbl, 136 FromSch: sch, 137 FromFks: fks, 138 FromFksParentSch: parentSchs, 139 FromVRW: fromVRW, 140 FromNodeStore: fromNS, 141 ToVRW: toVRW, 142 ToNodeStore: toNS, 143 }) 144 return 145 }) 146 if err != nil { 147 return nil, err 148 } 149 150 toDeltas := make([]TableDelta, 0) 151 152 err = toRoot.IterTables(ctx, func(name string, tbl *doltdb.Table, sch schema.Schema) (stop bool, err error) { 153 c, err := toRoot.GetForeignKeyCollection(ctx) 154 if err != nil { 155 return true, err 156 } 157 158 fks, _ := c.KeysForTable(name) 159 parentSchs, err := getFkParentSchs(ctx, toRoot, fks...) 160 if err != nil { 161 return false, err 162 } 163 164 toDeltas = append(toDeltas, TableDelta{ 165 ToName: name, 166 ToTable: tbl, 167 ToSch: sch, 168 ToFks: fks, 169 ToFksParentSch: parentSchs, 170 FromVRW: fromVRW, 171 FromNodeStore: fromNS, 172 ToVRW: toVRW, 173 ToNodeStore: toNS, 174 }) 175 return 176 }) 177 if err != nil { 178 return nil, err 179 } 180 181 deltas = matchTableDeltas(fromDeltas, toDeltas) 182 deltas, err = filterUnmodifiedTableDeltas(deltas) 183 if err != nil { 184 return nil, err 185 } 186 187 fromColl, err := fromRoot.GetCollation(ctx) 188 if err != nil { 189 return nil, err 190 } 191 toColl, err := toRoot.GetCollation(ctx) 192 if err != nil { 193 return nil, err 194 } 195 if fromColl != toColl { 196 sqlCtx, ok := ctx.(*sql.Context) 197 if ok { 198 dbName := DBPrefix + sqlCtx.GetCurrentDatabase() 199 deltas = append(deltas, TableDelta{ 200 FromName: dbName, 201 ToName: dbName, 202 }) 203 } 204 } 205 206 // Make sure we always return the same order of deltas 207 sort.Slice(deltas, func(i, j int) bool { 208 if deltas[i].FromName == deltas[j].FromName { 209 return deltas[i].ToName < deltas[j].ToName 210 } 211 return deltas[i].FromName < deltas[j].FromName 212 }) 213 214 return deltas, nil 215 } 216 217 func getFkParentSchs(ctx context.Context, root doltdb.RootValue, fks ...doltdb.ForeignKey) (map[string]schema.Schema, error) { 218 schs := make(map[string]schema.Schema) 219 for _, toFk := range fks { 220 toRefTable, _, ok, err := doltdb.GetTableInsensitive(ctx, root, toFk.ReferencedTableName) 221 if err != nil { 222 return nil, err 223 } 224 if !ok { 225 continue // as the schemas are for display-only, we can skip on any missing parents (they were deleted, etc.) 226 } 227 toRefSch, err := toRefTable.GetSchema(ctx) 228 if err != nil { 229 return nil, err 230 } 231 schs[toFk.ReferencedTableName] = toRefSch 232 } 233 return schs, nil 234 } 235 236 func filterUnmodifiedTableDeltas(deltas []TableDelta) ([]TableDelta, error) { 237 var filtered []TableDelta 238 for _, d := range deltas { 239 if d.ToTable == nil || d.FromTable == nil { 240 // Table was added or dropped 241 filtered = append(filtered, d) 242 continue 243 } 244 245 hasChanges, err := d.HasChanges() 246 if err != nil { 247 return nil, err 248 } 249 250 if hasChanges { 251 // Take only modified tables 252 filtered = append(filtered, d) 253 } 254 } 255 256 return filtered, nil 257 } 258 259 func matchTableDeltas(fromDeltas, toDeltas []TableDelta) (deltas []TableDelta) { 260 var matchedNames []string 261 from := make(map[string]TableDelta, len(fromDeltas)) 262 for _, f := range fromDeltas { 263 from[f.FromName] = f 264 } 265 266 to := make(map[string]TableDelta, len(toDeltas)) 267 for _, t := range toDeltas { 268 to[t.ToName] = t 269 if _, ok := from[t.ToName]; ok { 270 matchedNames = append(matchedNames, t.ToName) 271 } 272 } 273 274 match := func(t, f TableDelta) TableDelta { 275 return TableDelta{ 276 FromName: f.FromName, 277 ToName: t.ToName, 278 FromTable: f.FromTable, 279 ToTable: t.ToTable, 280 FromSch: f.FromSch, 281 ToSch: t.ToSch, 282 FromFks: f.FromFks, 283 ToFks: t.ToFks, 284 FromFksParentSch: f.FromFksParentSch, 285 ToFksParentSch: t.ToFksParentSch, 286 } 287 } 288 289 deltas = make([]TableDelta, 0) 290 291 for _, name := range matchedNames { 292 t := to[name] 293 f := from[name] 294 matched := match(t, f) 295 deltas = append(deltas, matched) 296 delete(from, f.FromName) 297 delete(to, t.ToName) 298 } 299 300 for _, f := range from { 301 for _, t := range to { 302 // check for overlapping schemas to try and match tables when names don't match 303 if schemasOverlap(f.FromSch, t.ToSch) { 304 matched := match(t, f) 305 deltas = append(deltas, matched) 306 delete(from, f.FromName) 307 delete(to, t.ToName) 308 } 309 } 310 } 311 312 // append unmatched TableDeltas 313 for _, f := range from { 314 deltas = append(deltas, f) 315 } 316 for _, t := range to { 317 deltas = append(deltas, t) 318 } 319 320 return deltas 321 } 322 323 func schemasOverlap(from, to schema.Schema) bool { 324 f := set.NewUint64Set(from.GetAllCols().Tags) 325 t := set.NewUint64Set(to.GetAllCols().Tags) 326 return f.Intersection(t).Size() > 0 327 } 328 329 // IsAdd returns true if the table was added between the fromRoot and toRoot. 330 func (td TableDelta) IsAdd() bool { 331 return td.FromTable == nil && td.ToTable != nil 332 } 333 334 // IsDrop returns true if the table was dropped between the fromRoot and toRoot. 335 func (td TableDelta) IsDrop() bool { 336 return td.FromTable != nil && td.ToTable == nil 337 } 338 339 // IsRename return true if the table was renamed between the fromRoot and toRoot. 340 func (td TableDelta) IsRename() bool { 341 if td.IsAdd() || td.IsDrop() { 342 return false 343 } 344 return td.FromName != td.ToName 345 } 346 347 // HasHashChanged returns true if the hash of the table content has changed between 348 // the fromRoot and toRoot. 349 func (td TableDelta) HasHashChanged() (bool, error) { 350 if td.IsAdd() || td.IsDrop() { 351 return true, nil 352 } 353 354 toHash, err := td.ToTable.HashOf() 355 if err != nil { 356 return false, err 357 } 358 359 fromHash, err := td.FromTable.HashOf() 360 if err != nil { 361 return false, err 362 } 363 364 return !toHash.Equal(fromHash), nil 365 } 366 367 // HasSchemaChanged returns true if the table schema has changed between the 368 // fromRoot and toRoot. 369 func (td TableDelta) HasSchemaChanged(ctx context.Context) (bool, error) { 370 // Database collation change is a schema change 371 if td.FromTable == nil && td.ToTable == nil { 372 return true, nil 373 } 374 375 if td.IsAdd() || td.IsDrop() { 376 return true, nil 377 } 378 379 if td.HasFKChanges() { 380 return true, nil 381 } 382 383 fromSchemaHash, err := td.FromTable.GetSchemaHash(ctx) 384 if err != nil { 385 return false, err 386 } 387 388 toSchemaHash, err := td.ToTable.GetSchemaHash(ctx) 389 if err != nil { 390 return false, err 391 } 392 393 return !fromSchemaHash.Equal(toSchemaHash), nil 394 } 395 396 func (td TableDelta) HasDataChanged(ctx context.Context) (bool, error) { 397 // Database collation change is not a data change 398 if td.FromTable == nil && td.ToTable == nil { 399 return false, nil 400 } 401 402 if td.IsAdd() { 403 isEmpty, err := isTableDataEmpty(ctx, td.ToTable) 404 if err != nil { 405 return false, err 406 } 407 408 return !isEmpty, nil 409 } 410 411 if td.IsDrop() { 412 isEmpty, err := isTableDataEmpty(ctx, td.FromTable) 413 if err != nil { 414 return false, err 415 } 416 return !isEmpty, nil 417 } 418 419 fromRowDataHash, err := td.FromTable.GetRowDataHash(ctx) 420 if err != nil { 421 return false, err 422 } 423 424 toRowDataHash, err := td.ToTable.GetRowDataHash(ctx) 425 if err != nil { 426 return false, err 427 } 428 429 return !fromRowDataHash.Equal(toRowDataHash), nil 430 } 431 432 func (td TableDelta) HasPrimaryKeySetChanged() bool { 433 return !schema.ArePrimaryKeySetsDiffable(td.Format(), td.FromSch, td.ToSch) 434 } 435 436 func (td TableDelta) HasChanges() (bool, error) { 437 hashChanged, err := td.HasHashChanged() 438 if err != nil { 439 return false, err 440 } 441 442 return td.HasFKChanges() || td.IsRename() || td.HasPrimaryKeySetChanged() || hashChanged, nil 443 } 444 445 // CurName returns the most recent name of the table. 446 func (td TableDelta) CurName() string { 447 if td.ToName != "" { 448 return td.ToName 449 } 450 return td.FromName 451 } 452 453 func (td TableDelta) HasFKChanges() bool { 454 if len(td.FromFks) != len(td.ToFks) { 455 return true 456 } 457 458 if td.FromFks == nil && td.ToFks == nil { 459 return false 460 } 461 if td.FromFksParentSch == nil && td.ToFksParentSch == nil { 462 return false 463 } 464 465 sort.Slice(td.FromFks, func(i, j int) bool { 466 return td.FromFks[i].Name < td.FromFks[j].Name 467 }) 468 sort.Slice(td.ToFks, func(i, j int) bool { 469 return td.ToFks[i].Name < td.ToFks[j].Name 470 }) 471 472 fromSchemaMap := td.FromFksParentSch 473 fromSchemaMap[td.FromName] = td.FromSch 474 toSchemaMap := td.ToFksParentSch 475 toSchemaMap[td.ToName] = td.ToSch 476 477 for i := range td.FromFks { 478 if !td.FromFks[i].Equals(td.ToFks[i], fromSchemaMap, toSchemaMap) { 479 return true 480 } 481 } 482 483 return false 484 } 485 486 // GetSchemas returns the table's schema at the fromRoot and toRoot, or schema.Empty if the table did not exist. 487 func (td TableDelta) GetSchemas(ctx context.Context) (from, to schema.Schema, err error) { 488 if td.FromSch == nil { 489 td.FromSch = schema.EmptySchema 490 } 491 if td.ToSch == nil { 492 td.ToSch = schema.EmptySchema 493 } 494 return td.FromSch, td.ToSch, nil 495 } 496 497 // Format returns the format of the tables in this delta. 498 func (td TableDelta) Format() *types.NomsBinFormat { 499 if td.FromTable != nil { 500 return td.FromTable.Format() 501 } 502 return td.ToTable.Format() 503 } 504 505 func (td TableDelta) IsKeyless(ctx context.Context) (bool, error) { 506 f, t, err := td.GetSchemas(ctx) 507 if err != nil { 508 return false, err 509 } 510 511 // nil table is neither keyless nor keyed 512 from, to := schema.IsKeyless(f), schema.IsKeyless(t) 513 if td.FromTable == nil { 514 return to, nil 515 } else if td.ToTable == nil { 516 return from, nil 517 } else { 518 if from && to { 519 return true, nil 520 } else if !from && !to { 521 return false, nil 522 } else { 523 return false, fmt.Errorf("mismatched keyless and keyed schemas for table %s", td.CurName()) 524 } 525 } 526 } 527 528 // isTableDataEmpty return true if the table does not contain any data 529 func isTableDataEmpty(ctx context.Context, table *doltdb.Table) (bool, error) { 530 rowData, err := table.GetRowData(ctx) 531 if err != nil { 532 return false, err 533 } 534 535 return rowData.Empty() 536 } 537 538 // GetSummary returns a summary of the table delta. 539 func (td TableDelta) GetSummary(ctx context.Context) (*TableDeltaSummary, error) { 540 dataChange, err := td.HasDataChanged(ctx) 541 if err != nil { 542 return nil, err 543 } 544 545 // Dropping a table is always a schema change, and also a data change if the table contained data 546 if td.IsDrop() { 547 return &TableDeltaSummary{ 548 TableName: td.FromName, 549 FromTableName: td.FromName, 550 DataChange: dataChange, 551 SchemaChange: true, 552 DiffType: "dropped", 553 }, nil 554 } 555 556 // Creating a table is always a schema change, and also a data change if data was inserted 557 if td.IsAdd() { 558 return &TableDeltaSummary{ 559 TableName: td.ToName, 560 ToTableName: td.ToName, 561 DataChange: dataChange, 562 SchemaChange: true, 563 DiffType: "added", 564 }, nil 565 } 566 567 // Renaming a table is always a schema change, and also a data change if the table data differs 568 if td.IsRename() { 569 return &TableDeltaSummary{ 570 TableName: td.ToName, 571 FromTableName: td.FromName, 572 ToTableName: td.ToName, 573 DataChange: dataChange, 574 SchemaChange: true, 575 DiffType: "renamed", 576 }, nil 577 } 578 579 schemaChange, err := td.HasSchemaChanged(ctx) 580 if err != nil { 581 return nil, err 582 } 583 584 return &TableDeltaSummary{ 585 TableName: td.FromName, 586 FromTableName: td.FromName, 587 ToTableName: td.ToName, 588 DataChange: dataChange, 589 SchemaChange: schemaChange, 590 DiffType: "modified", 591 }, nil 592 } 593 594 // GetRowData returns the table's row data at the fromRoot and toRoot, or an empty map if the table did not exist. 595 func (td TableDelta) GetRowData(ctx context.Context) (from, to durable.Index, err error) { 596 if td.FromTable == nil && td.ToTable == nil { 597 return nil, nil, fmt.Errorf("both from and to tables are missing from table delta") 598 } 599 600 if td.FromTable != nil { 601 from, err = td.FromTable.GetRowData(ctx) 602 if err != nil { 603 return nil, nil, err 604 } 605 } 606 607 if td.ToTable != nil { 608 to, err = td.ToTable.GetRowData(ctx) 609 if err != nil { 610 return nil, nil, err 611 } 612 } 613 614 return from, to, nil 615 } 616 617 // GetDataDiffStatement returns any data diff in SQL statements for given table including INSERT, UPDATE and DELETE row statements. 618 func GetDataDiffStatement(tableName string, sch schema.Schema, row sql.Row, rowDiffType ChangeType, colDiffTypes []ChangeType) (string, error) { 619 if len(row) != len(colDiffTypes) { 620 return "", fmt.Errorf("expected the same size for columns and diff types, got %d and %d", len(row), len(colDiffTypes)) 621 } 622 623 switch rowDiffType { 624 case Added: 625 return sqlfmt.SqlRowAsInsertStmt(row, tableName, sch) 626 case Removed: 627 return sqlfmt.SqlRowAsDeleteStmt(row, tableName, sch, 0) 628 case ModifiedNew: 629 updatedCols := set.NewEmptyStrSet() 630 for i, diffType := range colDiffTypes { 631 if diffType != None { 632 updatedCols.Add(sch.GetAllCols().GetByIndex(i).Name) 633 } 634 } 635 if updatedCols.Size() == 0 { 636 return "", nil 637 } 638 return sqlfmt.SqlRowAsUpdateStmt(row, tableName, sch, updatedCols) 639 case ModifiedOld: 640 // do nothing, we only issue UPDATE for ModifiedNew 641 return "", nil 642 default: 643 return "", fmt.Errorf("unexpected row diff type: %v", rowDiffType) 644 } 645 } 646 647 // WorkingSetContainsOnlyIgnoredTables returns true if all changes in working set are ignored tables. 648 // Otherwise, if there are any non-ignored changes, returns false. 649 // Note that only unstaged tables are subject to dolt_ignore (this is consistent with what git does.) 650 func WorkingSetContainsOnlyIgnoredTables(ctx context.Context, roots doltdb.Roots) (bool, error) { 651 staged, unstaged, err := GetStagedUnstagedTableDeltas(ctx, roots) 652 if err != nil { 653 return false, err 654 } 655 656 if len(staged) > 0 { 657 return false, nil 658 } 659 660 ignorePatterns, err := doltdb.GetIgnoredTablePatterns(ctx, roots) 661 if err != nil { 662 return false, err 663 } 664 665 for _, tableDelta := range unstaged { 666 if !(tableDelta.IsAdd()) { 667 return false, nil 668 } 669 isIgnored, err := ignorePatterns.IsTableNameIgnored(tableDelta.ToName) 670 if err != nil { 671 return false, err 672 } 673 if isIgnored != doltdb.Ignore { 674 return false, nil 675 } 676 } 677 678 return true, nil 679 }