github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dtables/diff_table.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dtables 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "io" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 25 "github.com/dolthub/dolt/go/libraries/doltcore/diff" 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 27 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 28 "github.com/dolthub/dolt/go/libraries/doltcore/row" 29 "github.com/dolthub/dolt/go/libraries/doltcore/rowconv" 30 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 31 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/expreval" 32 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" 33 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil" 34 "github.com/dolthub/dolt/go/libraries/utils/set" 35 "github.com/dolthub/dolt/go/store/datas" 36 "github.com/dolthub/dolt/go/store/hash" 37 "github.com/dolthub/dolt/go/store/prolly" 38 "github.com/dolthub/dolt/go/store/types" 39 ) 40 41 const diffTableDefaultRowCount = 10 42 43 const ( 44 toCommit = "to_commit" 45 fromCommit = "from_commit" 46 toCommitDate = "to_commit_date" 47 fromCommitDate = "from_commit_date" 48 49 diffTypeColName = "diff_type" 50 diffTypeAdded = "added" 51 diffTypeModified = "modified" 52 diffTypeRemoved = "removed" 53 ) 54 55 var _ sql.Table = (*DiffTable)(nil) 56 var _ sql.IndexedTable = (*DiffTable)(nil) 57 var _ sql.IndexAddressable = (*DiffTable)(nil) 58 var _ sql.StatisticsTable = (*DiffTable)(nil) 59 60 type DiffTable struct { 61 name string 62 ddb *doltdb.DoltDB 63 workingRoot doltdb.RootValue 64 head *doltdb.Commit 65 66 headHash hash.Hash 67 headCommitClosure *prolly.CommitClosure 68 69 // from and to need to be mapped to this schema 70 targetSch schema.Schema 71 72 // the schema for the diff table itself. Once from and to are converted to 73 // targetSch, the commit names and dates are inserted. 74 diffTableSch schema.Schema 75 76 sqlSch sql.PrimaryKeySchema 77 partitionFilters []sql.Expression 78 79 table *doltdb.Table 80 lookup sql.IndexLookup 81 82 // noms only 83 joiner *rowconv.Joiner 84 } 85 86 var PrimaryKeyChangeWarning = "cannot render full diff between commits %s and %s due to primary key set change" 87 88 const PrimaryKeyChangeWarningCode int = 1105 // Since this is our own custom warning we'll use 1105, the code for an unknown error 89 90 func NewDiffTable(ctx *sql.Context, dbName, tblName string, ddb *doltdb.DoltDB, root doltdb.RootValue, head *doltdb.Commit) (sql.Table, error) { 91 diffTblName := doltdb.DoltDiffTablePrefix + tblName 92 93 table, tblName, ok, err := doltdb.GetTableInsensitive(ctx, root, tblName) 94 if err != nil { 95 return nil, err 96 } 97 if !ok { 98 return nil, sql.ErrTableNotFound.New(diffTblName) 99 } 100 sch, err := table.GetSchema(ctx) 101 if err != nil { 102 return nil, err 103 } 104 105 diffTableSchema, j, err := GetDiffTableSchemaAndJoiner(ddb.Format(), sch, sch) 106 if err != nil { 107 return nil, err 108 } 109 110 sqlSch, err := sqlutil.FromDoltSchema(dbName, diffTblName, diffTableSchema) 111 if err != nil { 112 return nil, err 113 } 114 115 return &DiffTable{ 116 name: tblName, 117 ddb: ddb, 118 workingRoot: root, 119 head: head, 120 targetSch: sch, 121 diffTableSch: diffTableSchema, 122 sqlSch: sqlSch, 123 partitionFilters: nil, 124 table: table, 125 joiner: j, 126 }, nil 127 } 128 129 func (dt *DiffTable) DataLength(ctx *sql.Context) (uint64, error) { 130 numBytesPerRow := schema.SchemaAvgLength(dt.Schema()) 131 numRows, _, err := dt.RowCount(ctx) 132 if err != nil { 133 return 0, err 134 } 135 return numBytesPerRow * numRows, nil 136 } 137 138 func (dt *DiffTable) RowCount(_ *sql.Context) (uint64, bool, error) { 139 return diffTableDefaultRowCount, false, nil 140 } 141 142 func (dt *DiffTable) Name() string { 143 return doltdb.DoltDiffTablePrefix + dt.name 144 } 145 146 func (dt *DiffTable) String() string { 147 return doltdb.DoltDiffTablePrefix + dt.name 148 } 149 150 func (dt *DiffTable) Schema() sql.Schema { 151 return dt.sqlSch.Schema 152 } 153 154 func (dt *DiffTable) Collation() sql.CollationID { 155 return sql.Collation_Default 156 } 157 158 func (dt *DiffTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) { 159 cmItr := doltdb.CommitItrForRoots(dt.ddb, dt.head) 160 161 sf, err := SelectFuncForFilters(dt.ddb.ValueReadWriter(), dt.partitionFilters) 162 if err != nil { 163 return nil, err 164 } 165 166 t, exactName, ok, err := doltdb.GetTableInsensitive(ctx, dt.workingRoot, dt.name) 167 if err != nil { 168 return nil, err 169 } 170 171 if !ok { 172 return nil, fmt.Errorf("table: %s does not exist", dt.name) 173 } 174 175 wrTblHash, _, err := dt.workingRoot.GetTableHash(ctx, exactName) 176 if err != nil { 177 return nil, err 178 } 179 180 cmHash, _, err := cmItr.Next(ctx) 181 if err != nil { 182 return nil, err 183 } 184 185 cmHashToTblInfo := make(map[hash.Hash]TblInfoAtCommit) 186 cmHashToTblInfo[cmHash] = TblInfoAtCommit{"WORKING", nil, t, wrTblHash} 187 188 err = cmItr.Reset(ctx) 189 if err != nil { 190 return nil, err 191 } 192 193 return &DiffPartitions{ 194 tblName: exactName, 195 cmItr: cmItr, 196 cmHashToTblInfo: cmHashToTblInfo, 197 selectFunc: sf, 198 toSch: dt.targetSch, 199 fromSch: dt.targetSch, 200 }, nil 201 } 202 203 var commitMetaColumns = set.NewStrSet([]string{toCommit, fromCommit, toCommitDate, fromCommitDate}) 204 205 // CommitIsInScope returns true if a given commit hash is head or is 206 // visible from the current head's ancestry graph. 207 func (dt *DiffTable) CommitIsInScope(ctx context.Context, height uint64, h hash.Hash) (bool, error) { 208 cc, err := dt.HeadCommitClosure(ctx) 209 if err != nil { 210 return false, err 211 } 212 headHash, err := dt.HeadHash() 213 if err != nil { 214 return false, err 215 } 216 if headHash == h { 217 return true, nil 218 } 219 return cc.ContainsKey(ctx, h, height) 220 } 221 222 func (dt *DiffTable) HeadCommitClosure(ctx context.Context) (*prolly.CommitClosure, error) { 223 if dt.headCommitClosure == nil { 224 cc, err := dt.head.GetCommitClosure(ctx) 225 dt.headCommitClosure = &cc 226 if err != nil { 227 return nil, err 228 } 229 } 230 return dt.headCommitClosure, nil 231 } 232 233 func (dt *DiffTable) HeadHash() (hash.Hash, error) { 234 if dt.headHash.IsEmpty() { 235 var err error 236 dt.headHash, err = dt.head.HashOf() 237 if err != nil { 238 return hash.Hash{}, err 239 } 240 } 241 return dt.headHash, nil 242 } 243 244 func (dt *DiffTable) PartitionRows(ctx *sql.Context, part sql.Partition) (sql.RowIter, error) { 245 dp := part.(DiffPartition) 246 return dp.GetRowIter(ctx, dt.ddb, dt.joiner, dt.lookup) 247 } 248 249 func (dt *DiffTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) { 250 switch lookup.Index.ID() { 251 case index.ToCommitIndexId: 252 hs, ok := index.LookupToPointSelectStr(lookup) 253 if !ok { 254 return nil, fmt.Errorf("failed to parse commit lookup ranges: %s", sql.DebugString(lookup.Ranges)) 255 } 256 hashes, commits, metas := index.HashesToCommits(ctx, dt.ddb, hs, dt.head, false) 257 if len(hashes) == 0 { 258 return sql.PartitionsToPartitionIter(), nil 259 } 260 return dt.toCommitLookupPartitions(ctx, hashes, commits, metas) 261 case index.FromCommitIndexId: 262 hs, ok := index.LookupToPointSelectStr(lookup) 263 if !ok { 264 return nil, fmt.Errorf("failed to parse commit lookup ranges: %s", sql.DebugString(lookup.Ranges)) 265 } 266 hashes, commits, metas := index.HashesToCommits(ctx, dt.ddb, hs, nil, false) 267 if len(hashes) == 0 { 268 return sql.PartitionsToPartitionIter(), nil 269 } 270 return dt.fromCommitLookupPartitions(ctx, hashes, commits, metas) 271 default: 272 return dt.Partitions(ctx) 273 } 274 } 275 276 // fromCommitLookupPartitions creates a diff partition iterator for a set 277 // of commits. The structure of the iter requires we pre-populate the 278 // children of from_commit for diffing. We walk the commit graph looking 279 // for commits that reference |from_commit| as a parent, and forward populate 280 // for the |from_commit| diff partitions we will iterate. 281 // TODO the structure of the diff iterator doesn't appear to accommodate 282 // several children for a parent hash. 283 func (dt *DiffTable) fromCommitLookupPartitions(ctx *sql.Context, hashes []hash.Hash, commits []*doltdb.Commit, metas []*datas.CommitMeta) (sql.PartitionIter, error) { 284 _, exactName, ok, err := doltdb.GetTableInsensitive(ctx, dt.workingRoot, dt.name) 285 if err != nil { 286 return nil, err 287 } else if !ok { 288 return nil, fmt.Errorf("table: %s does not exist", dt.name) 289 } 290 291 var parentHashes []hash.Hash 292 cmHashToTblInfo := make(map[hash.Hash]TblInfoAtCommit) 293 var pCommits []*doltdb.Commit 294 for i, hs := range hashes { 295 cm := commits[i] 296 297 // scope check 298 height, err := cm.Height() 299 if err != nil { 300 return nil, err 301 } 302 303 childCm, childHs, err := dt.scanHeightForChild(ctx, hs, height+1) 304 if err != nil { 305 return nil, err 306 } 307 if childCm == nil { 308 // non-linear commit graph, fallback to top-down scan 309 childCm, childHs, err = dt.reverseIterForChild(ctx, hs) 310 if err != nil { 311 return nil, err 312 } 313 } 314 315 if childCm != nil { 316 ti, err := tableInfoForCommit(ctx, dt.name, childCm, childHs) 317 if err != nil { 318 return nil, err 319 } 320 cmHashToTblInfo[hs] = ti 321 parentHashes = append(parentHashes, hs) 322 pCommits = append(pCommits, cm) 323 } 324 } 325 326 if len(parentHashes) == 0 { 327 return sql.PartitionsToPartitionIter(), nil 328 } 329 330 sf, err := SelectFuncForFilters(dt.ddb.ValueReadWriter(), dt.partitionFilters) 331 if err != nil { 332 return nil, err 333 } 334 335 cmItr := doltdb.NewCommitSliceIter(pCommits, parentHashes) 336 if err != nil { 337 return nil, err 338 } 339 340 return &DiffPartitions{ 341 tblName: exactName, 342 cmItr: cmItr, 343 cmHashToTblInfo: cmHashToTblInfo, 344 selectFunc: sf, 345 toSch: dt.targetSch, 346 fromSch: dt.targetSch, 347 }, nil 348 } 349 350 // scanHeightForChild searches for a child commit that references a target parent hash 351 // at a specific height. This is an optimization for the common case where a parent and 352 // its child are one level apart, and there is no branching that creates the potential 353 // for a child higher in the graph. 354 func (dt *DiffTable) scanHeightForChild(ctx *sql.Context, parent hash.Hash, height uint64) (*doltdb.Commit, hash.Hash, error) { 355 cc, err := dt.HeadCommitClosure(ctx) 356 if err != nil { 357 return nil, hash.Hash{}, err 358 } 359 iter, err := cc.IterHeight(ctx, height) 360 if err != nil { 361 return nil, hash.Hash{}, err 362 } 363 var childHs hash.Hash 364 var childCm *doltdb.Commit 365 var cnt int 366 for { 367 k, _, err := iter.Next(ctx) 368 if errors.Is(err, io.EOF) { 369 break 370 } 371 if err != nil { 372 return nil, hash.Hash{}, err 373 } 374 cnt++ 375 if cnt > 1 { 376 return nil, hash.Hash{}, nil 377 } 378 379 c, err := doltdb.HashToCommit(ctx, dt.ddb.ValueReadWriter(), dt.ddb.NodeStore(), k.Addr()) 380 phs, err := c.ParentHashes(ctx) 381 if err != nil { 382 return nil, hash.Hash{}, err 383 } 384 for _, ph := range phs { 385 if ph == parent { 386 childCm = c 387 childHs = k.Addr() 388 break 389 } 390 } 391 } 392 return childCm, childHs, nil 393 } 394 395 // reverseIterForChild finds the commit with the largest height that 396 // is a child of the |parent| hash, or nil if no commit is found. 397 func (dt *DiffTable) reverseIterForChild(ctx *sql.Context, parent hash.Hash) (*doltdb.Commit, hash.Hash, error) { 398 iter := doltdb.CommitItrForRoots(dt.ddb, dt.head) 399 for { 400 childHs, optCmt, err := iter.Next(ctx) 401 if errors.Is(err, io.EOF) { 402 return nil, hash.Hash{}, nil 403 } else if err != nil { 404 return nil, hash.Hash{}, err 405 } 406 407 childCm, ok := optCmt.ToCommit() 408 if !ok { 409 // Should have been caught above from the Next() call on the iter. This is a runtime error. 410 return nil, hash.Hash{}, doltdb.ErrGhostCommitRuntimeFailure 411 } 412 413 phs, err := childCm.ParentHashes(ctx) 414 if err != nil { 415 return nil, hash.Hash{}, err 416 } 417 for _, ph := range phs { 418 if ph == parent { 419 return childCm, childHs, nil 420 } 421 } 422 } 423 } 424 425 func tableInfoForCommit(ctx context.Context, table string, cm *doltdb.Commit, hs hash.Hash) (TblInfoAtCommit, error) { 426 r, err := cm.GetRootValue(ctx) 427 if err != nil { 428 return TblInfoAtCommit{}, err 429 } 430 431 tbl, exactName, ok, err := doltdb.GetTableInsensitive(ctx, r, table) 432 if err != nil { 433 return TblInfoAtCommit{}, err 434 } 435 if !ok { 436 return TblInfoAtCommit{}, nil 437 } 438 439 tblHash, _, err := r.GetTableHash(ctx, exactName) 440 if err != nil { 441 return TblInfoAtCommit{}, err 442 } 443 444 meta, err := cm.GetCommitMeta(ctx) 445 if err != nil { 446 return TblInfoAtCommit{}, err 447 } 448 449 ts := types.Timestamp(meta.Time()) 450 return NewTblInfoAtCommit(hs.String(), &ts, tbl, tblHash), nil 451 } 452 453 // toCommitLookupPartitions creates a diff partition iterator for a set of 454 // commits. The structure of the iter requires we pre-populate the parents 455 // of to_commit for diffing. 456 func (dt *DiffTable) toCommitLookupPartitions(ctx *sql.Context, hashes []hash.Hash, commits []*doltdb.Commit, metas []*datas.CommitMeta) (sql.PartitionIter, error) { 457 t, exactName, ok, err := doltdb.GetTableInsensitive(ctx, dt.workingRoot, dt.name) 458 if err != nil { 459 return nil, err 460 } else if !ok { 461 return nil, fmt.Errorf("table: %s does not exist", dt.name) 462 } 463 464 working, err := dt.head.HashOf() 465 if err != nil { 466 return nil, err 467 } 468 469 var parentHashes []hash.Hash 470 cmHashToTblInfo := make(map[hash.Hash]TblInfoAtCommit) 471 var pCommits []*doltdb.Commit 472 for i, hs := range hashes { 473 cm := commits[i] 474 475 var toCmInfo TblInfoAtCommit 476 if hs == working && cm == nil { 477 wrTblHash, _, err := dt.workingRoot.GetTableHash(ctx, exactName) 478 if err != nil { 479 return nil, err 480 } 481 482 toCmInfo = TblInfoAtCommit{"WORKING", nil, t, wrTblHash} 483 cmHashToTblInfo[hs] = toCmInfo 484 parentHashes = append(parentHashes, hs) 485 pCommits = append(pCommits, dt.head) 486 continue 487 } 488 489 // scope check 490 height, err := cm.Height() 491 if err != nil { 492 return nil, err 493 } 494 ok, err = dt.CommitIsInScope(ctx, height, hs) 495 if err != nil { 496 return nil, err 497 } 498 if !ok { 499 continue 500 } 501 502 ti, err := tableInfoForCommit(ctx, dt.name, cm, hs) 503 if err != nil { 504 return nil, err 505 } 506 if ti.IsEmpty() { 507 continue 508 } 509 510 ph, err := cm.ParentHashes(ctx) 511 if err != nil { 512 return nil, err 513 } 514 515 for i, pj := range ph { 516 optCmt, err := cm.GetParent(ctx, i) 517 if err != nil { 518 return nil, err 519 } 520 pc, ok := optCmt.ToCommit() 521 if !ok { 522 return nil, doltdb.ErrGhostCommitEncountered 523 } 524 525 cmHashToTblInfo[pj] = toCmInfo 526 cmHashToTblInfo[pj] = ti 527 pCommits = append(pCommits, pc) 528 } 529 parentHashes = append(parentHashes, ph...) 530 } 531 532 if len(parentHashes) == 0 { 533 return sql.PartitionsToPartitionIter(), nil 534 } 535 536 sf, err := SelectFuncForFilters(dt.ddb.ValueReadWriter(), dt.partitionFilters) 537 if err != nil { 538 return nil, err 539 } 540 541 cmItr := doltdb.NewCommitSliceIter(pCommits, parentHashes) 542 if err != nil { 543 return nil, err 544 } 545 546 return &DiffPartitions{ 547 tblName: exactName, 548 cmItr: cmItr, 549 cmHashToTblInfo: cmHashToTblInfo, 550 selectFunc: sf, 551 toSch: dt.targetSch, 552 fromSch: dt.targetSch, 553 }, nil 554 } 555 556 // GetIndexes implements sql.IndexAddressable 557 func (dt *DiffTable) GetIndexes(ctx *sql.Context) ([]sql.Index, error) { 558 return index.DoltDiffIndexesFromTable(ctx, "", dt.name, dt.table) 559 } 560 561 // IndexedAccess implements sql.IndexAddressable 562 func (dt *DiffTable) IndexedAccess(lookup sql.IndexLookup) sql.IndexedTable { 563 nt := *dt 564 return &nt 565 } 566 567 // PreciseMatch implements sql.IndexAddressable 568 func (dt *DiffTable) PreciseMatch() bool { 569 return false 570 } 571 572 // tableData returns the map of primary key to values for the specified table (or an empty map if the tbl is null) 573 // and the schema of the table (or EmptySchema if tbl is null). 574 func tableData(ctx *sql.Context, tbl *doltdb.Table, ddb *doltdb.DoltDB) (durable.Index, schema.Schema, error) { 575 var data durable.Index 576 var err error 577 578 if tbl == nil { 579 data, err = durable.NewEmptyIndex(ctx, ddb.ValueReadWriter(), ddb.NodeStore(), schema.EmptySchema) 580 if err != nil { 581 return nil, nil, err 582 } 583 } else { 584 data, err = tbl.GetRowData(ctx) 585 if err != nil { 586 return nil, nil, err 587 } 588 } 589 590 var sch schema.Schema 591 if tbl == nil { 592 sch = schema.EmptySchema 593 } else { 594 sch, err = tbl.GetSchema(ctx) 595 596 if err != nil { 597 return nil, nil, err 598 } 599 } 600 601 return data, sch, nil 602 } 603 604 type TblInfoAtCommit struct { 605 name string 606 date *types.Timestamp 607 tbl *doltdb.Table 608 tblHash hash.Hash 609 } 610 611 func NewTblInfoAtCommit(name string, date *types.Timestamp, tbl *doltdb.Table, tblHash hash.Hash) TblInfoAtCommit { 612 return TblInfoAtCommit{ 613 name, date, tbl, tblHash, 614 } 615 } 616 617 func (ti TblInfoAtCommit) IsEmpty() bool { 618 return ti.name == "" 619 } 620 621 var _ sql.Partition = (*DiffPartition)(nil) 622 623 // DiffPartition data partitioned into pairs of table states which get compared 624 type DiffPartition struct { 625 to *doltdb.Table 626 from *doltdb.Table 627 toName string 628 fromName string 629 toDate *types.Timestamp 630 fromDate *types.Timestamp 631 // fromSch and toSch are usually identical. It is the schema of the table at head. 632 toSch schema.Schema 633 fromSch schema.Schema 634 } 635 636 func NewDiffPartition(to, from *doltdb.Table, toName, fromName string, toDate, fromDate *types.Timestamp, toSch, fromSch schema.Schema) *DiffPartition { 637 return &DiffPartition{ 638 to: to, 639 from: from, 640 toName: toName, 641 fromName: fromName, 642 toDate: toDate, 643 fromDate: fromDate, 644 toSch: toSch, 645 fromSch: fromSch, 646 } 647 } 648 649 func (dp DiffPartition) Key() []byte { 650 return []byte(dp.toName + dp.fromName) 651 } 652 653 func (dp DiffPartition) GetRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, joiner *rowconv.Joiner, lookup sql.IndexLookup) (sql.RowIter, error) { 654 if types.IsFormat_DOLT(ddb.Format()) { 655 return newProllyDiffIter(ctx, dp, dp.fromSch, dp.toSch) 656 } else { 657 return newNomsDiffIter(ctx, ddb, joiner, dp, lookup) 658 } 659 } 660 661 // isDiffablePartition checks if the commit pair for this partition is "diffable". 662 // If the primary key sets changed between the two commits, it may not be 663 // possible to diff them. 664 func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (bool, error) { 665 // dp.to is nil when a table has been deleted previously. In this case, we return 666 // false, to stop processing diffs, since that previously deleted table is considered 667 // a logically different table and we don't want to mix the diffs together. 668 if dp.to == nil { 669 return false, nil 670 } 671 672 // dp.from is nil when the to commit created a new table 673 if dp.from == nil { 674 return true, nil 675 } 676 677 fromSch, err := dp.from.GetSchema(ctx) 678 if err != nil { 679 return false, err 680 } 681 682 toSch, err := dp.to.GetSchema(ctx) 683 if err != nil { 684 return false, err 685 } 686 687 return schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch), nil 688 } 689 690 type partitionSelectFunc func(*sql.Context, DiffPartition) (bool, error) 691 692 func SelectFuncForFilters(vr types.ValueReader, filters []sql.Expression) (partitionSelectFunc, error) { 693 const ( 694 toCommitTag uint64 = iota 695 fromCommitTag 696 toCommitDateTag 697 fromCommitDateTag 698 ) 699 700 colColl := schema.NewColCollection( 701 schema.NewColumn(toCommit, toCommitTag, types.StringKind, false), 702 schema.NewColumn(fromCommit, fromCommitTag, types.StringKind, false), 703 schema.NewColumn(toCommitDate, toCommitDateTag, types.TimestampKind, false), 704 schema.NewColumn(fromCommitDate, fromCommitDateTag, types.TimestampKind, false), 705 ) 706 707 expFunc, err := expreval.ExpressionFuncFromSQLExpressions(vr, schema.UnkeyedSchemaFromCols(colColl), filters) 708 709 if err != nil { 710 return nil, err 711 } 712 713 return func(ctx *sql.Context, partition DiffPartition) (bool, error) { 714 vals := row.TaggedValues{ 715 toCommitTag: types.String(partition.toName), 716 fromCommitTag: types.String(partition.fromName), 717 } 718 719 if partition.toDate != nil { 720 vals[toCommitDateTag] = *partition.toDate 721 } 722 723 if partition.fromDate != nil { 724 vals[fromCommitDateTag] = *partition.fromDate 725 } 726 727 return expFunc(ctx, vals) 728 }, nil 729 } 730 731 var _ sql.PartitionIter = &DiffPartitions{} 732 733 // DiffPartitions a collection of partitions. Implements PartitionItr 734 type DiffPartitions struct { 735 tblName string 736 cmItr doltdb.CommitItr 737 cmHashToTblInfo map[hash.Hash]TblInfoAtCommit 738 selectFunc partitionSelectFunc 739 toSch schema.Schema 740 fromSch schema.Schema 741 } 742 743 // processCommit is called in a commit iteration loop. Adds partitions when it finds a commit and its parent that have 744 // different values for the hash of the table being looked at. 745 func (dps *DiffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm *doltdb.Commit, root doltdb.RootValue, tbl *doltdb.Table) (*DiffPartition, error) { 746 tblHash, _, err := root.GetTableHash(ctx, dps.tblName) 747 748 if err != nil { 749 return nil, err 750 } 751 752 toInfoForCommit := dps.cmHashToTblInfo[cmHash] 753 cmHashStr := cmHash.String() 754 meta, err := cm.GetCommitMeta(ctx) 755 756 if err != nil { 757 return nil, err 758 } 759 760 ts := types.Timestamp(meta.Time()) 761 762 var nextPartition *DiffPartition 763 if tblHash != toInfoForCommit.tblHash { 764 partition := DiffPartition{ 765 to: toInfoForCommit.tbl, 766 from: tbl, 767 toName: toInfoForCommit.name, 768 fromName: cmHashStr, 769 toDate: toInfoForCommit.date, 770 fromDate: &ts, 771 fromSch: dps.fromSch, 772 toSch: dps.toSch, 773 } 774 selected, err := dps.selectFunc(ctx, partition) 775 776 if err != nil { 777 return nil, err 778 } 779 780 if selected { 781 nextPartition = &partition 782 } 783 } 784 785 newInfo := TblInfoAtCommit{cmHashStr, &ts, tbl, tblHash} 786 parentHashes, err := cm.ParentHashes(ctx) 787 788 if err != nil { 789 return nil, err 790 } 791 792 for _, h := range parentHashes { 793 dps.cmHashToTblInfo[h] = newInfo 794 } 795 796 return nextPartition, nil 797 } 798 799 func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) { 800 for { 801 cmHash, optCmt, err := dps.cmItr.Next(ctx) 802 if err != nil { 803 return nil, err 804 } 805 cm, ok := optCmt.ToCommit() 806 if !ok { 807 // Should have been caught above from the Next() call on the iter. This is a runtime error. 808 return nil, doltdb.ErrGhostCommitRuntimeFailure 809 } 810 811 root, err := cm.GetRootValue(ctx) 812 813 if err != nil { 814 return nil, err 815 } 816 817 tbl, _, _, err := doltdb.GetTableInsensitive(ctx, root, dps.tblName) 818 819 if err != nil { 820 return nil, err 821 } 822 823 next, err := dps.processCommit(ctx, cmHash, cm, root, tbl) 824 825 if err != nil { 826 return nil, err 827 } 828 829 if next != nil { 830 // If we can't diff this commit with its parent, don't traverse any lower 831 canDiff, err := next.isDiffablePartition(ctx) 832 if err != nil { 833 return nil, err 834 } 835 836 if !canDiff { 837 ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName)) 838 return nil, io.EOF 839 } 840 841 return *next, nil 842 } 843 } 844 } 845 846 func (dps *DiffPartitions) Close(*sql.Context) error { 847 return nil 848 } 849 850 // rowConvForSchema creates a RowConverter for transforming rows with the given schema a target schema. 851 func (dp DiffPartition) rowConvForSchema(ctx context.Context, vrw types.ValueReadWriter, targetSch, srcSch schema.Schema) (*rowconv.RowConverter, error) { 852 if schema.SchemasAreEqual(srcSch, schema.EmptySchema) { 853 return rowconv.IdentityConverter, nil 854 } 855 856 fm, err := rowconv.TagMappingByTagAndName(srcSch, targetSch) 857 if err != nil { 858 return nil, err 859 } 860 861 return rowconv.NewRowConverter(ctx, vrw, fm) 862 } 863 864 // GetDiffTableSchemaAndJoiner returns the schema for the diff table given a 865 // target schema for a row |sch|. In the old storage format, it also returns the 866 // associated joiner. 867 func GetDiffTableSchemaAndJoiner(format *types.NomsBinFormat, fromSch, toSch schema.Schema) (diffTableSchema schema.Schema, j *rowconv.Joiner, err error) { 868 if format == types.Format_DOLT { 869 diffTableSchema, err = CalculateDiffSchema(fromSch, toSch) 870 if err != nil { 871 return nil, nil, err 872 } 873 } else { 874 fromSch, toSch, err = expandFromToSchemas(fromSch, toSch) 875 if err != nil { 876 return nil, nil, err 877 } 878 879 j, err = rowconv.NewJoiner( 880 []rowconv.NamedSchema{{Name: diff.To, Sch: toSch}, {Name: diff.From, Sch: fromSch}}, 881 map[string]rowconv.ColNamingFunc{ 882 diff.To: diff.ToColNamer, 883 diff.From: diff.FromColNamer, 884 }) 885 if err != nil { 886 return nil, nil, err 887 } 888 889 diffTableSchema = j.GetSchema() 890 fullDiffCols := diffTableSchema.GetAllCols() 891 fullDiffCols = fullDiffCols.Append( 892 schema.NewColumn(diffTypeColName, schema.DiffTypeTag, types.StringKind, false), 893 ) 894 diffTableSchema = schema.MustSchemaFromCols(fullDiffCols) 895 } 896 897 return 898 } 899 900 // expandFromToSchemas converts input schemas to schemas appropriate for diffs. One argument must be 901 // non-nil. If one is null, the result will be the columns of the non-nil argument. 902 func expandFromToSchemas(fromSch, toSch schema.Schema) (newFromSch, newToSch schema.Schema, err error) { 903 var fromClmCol, toClmCol *schema.ColCollection 904 if fromSch == nil && toSch == nil { 905 return nil, nil, errors.New("non-nil argument required to CalculateDiffSchema") 906 } else if fromSch == nil { 907 fromClmCol = toSch.GetAllCols() 908 toClmCol = toSch.GetAllCols() 909 } else if toSch == nil { 910 toClmCol = fromSch.GetAllCols() 911 fromClmCol = fromSch.GetAllCols() 912 } else { 913 fromClmCol = fromSch.GetAllCols() 914 toClmCol = toSch.GetAllCols() 915 } 916 917 fromClmCol = fromClmCol.Append( 918 schema.NewColumn("commit", schema.DiffCommitTag, types.StringKind, false), 919 schema.NewColumn("commit_date", schema.DiffCommitDateTag, types.TimestampKind, false)) 920 newFromSch = schema.MustSchemaFromCols(fromClmCol) 921 922 toClmCol = toClmCol.Append( 923 schema.NewColumn("commit", schema.DiffCommitTag, types.StringKind, false), 924 schema.NewColumn("commit_date", schema.DiffCommitDateTag, types.TimestampKind, false)) 925 newToSch = schema.MustSchemaFromCols(toClmCol) 926 927 return 928 } 929 930 // CalculateDiffSchema returns the schema for the dolt_diff table based on the schemas from the from and to tables. 931 // Either may be nil, in which case the nil argument will use the schema of the non-nil argument 932 func CalculateDiffSchema(fromSch, toSch schema.Schema) (schema.Schema, error) { 933 fromSch, toSch, err := expandFromToSchemas(fromSch, toSch) 934 if err != nil { 935 return nil, err 936 } 937 938 cols := make([]schema.Column, toSch.GetAllCols().Size()+fromSch.GetAllCols().Size()+1) 939 940 i := 0 941 err = toSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { 942 toCol, err := schema.NewColumnWithTypeInfo(diff.ToColNamer(col.Name), uint64(i), col.TypeInfo, false, col.Default, false, col.Comment) 943 if err != nil { 944 return true, err 945 } 946 cols[i] = toCol 947 i++ 948 return false, nil 949 }) 950 if err != nil { 951 return nil, err 952 } 953 954 j := toSch.GetAllCols().Size() 955 err = fromSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) { 956 fromCol, err := schema.NewColumnWithTypeInfo(diff.FromColNamer(col.Name), uint64(j), col.TypeInfo, false, col.Default, false, col.Comment) 957 if err != nil { 958 return true, err 959 } 960 cols[j] = fromCol 961 962 j++ 963 return false, nil 964 }) 965 if err != nil { 966 return nil, err 967 } 968 969 cols[len(cols)-1] = schema.NewColumn(diffTypeColName, schema.DiffTypeTag, types.StringKind, false) 970 971 return schema.UnkeyedSchemaFromCols(schema.NewColCollection(cols...)), nil 972 }