github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dtables/unscoped_diff_table.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package dtables 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "io" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 "github.com/dolthub/go-mysql-server/sql/expression" 25 "github.com/dolthub/go-mysql-server/sql/plan" 26 "github.com/dolthub/go-mysql-server/sql/transform" 27 "github.com/dolthub/go-mysql-server/sql/types" 28 29 "github.com/dolthub/dolt/go/libraries/doltcore/diff" 30 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 31 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 32 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess" 33 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" 34 "github.com/dolthub/dolt/go/libraries/utils/set" 35 "github.com/dolthub/dolt/go/store/datas" 36 "github.com/dolthub/dolt/go/store/hash" 37 ) 38 39 const unscopedDiffDefaultRowCount = 1000 40 41 var workingSetPartitionKey = []byte("workingset") 42 var commitHistoryPartitionKey = []byte("commithistory") 43 var commitHashCol = "commit_hash" 44 var filterColumnNameSet = set.NewStrSet([]string{commitHashCol}) 45 46 // UnscopedDiffTable is a sql.Table implementation of a system table that shows which tables have 47 // changed in each commit, across all branches. 48 type UnscopedDiffTable struct { 49 dbName string 50 ddb *doltdb.DoltDB 51 head *doltdb.Commit 52 partitionFilters []sql.Expression 53 commitCheck doltdb.CommitFilter 54 } 55 56 var _ sql.Table = (*UnscopedDiffTable)(nil) 57 var _ sql.StatisticsTable = (*UnscopedDiffTable)(nil) 58 var _ sql.IndexAddressable = (*UnscopedDiffTable)(nil) 59 60 // NewUnscopedDiffTable creates an UnscopedDiffTable 61 func NewUnscopedDiffTable(_ *sql.Context, dbName string, ddb *doltdb.DoltDB, head *doltdb.Commit) sql.Table { 62 return &UnscopedDiffTable{dbName: dbName, ddb: ddb, head: head} 63 } 64 65 func (dt *UnscopedDiffTable) DataLength(ctx *sql.Context) (uint64, error) { 66 numBytesPerRow := schema.SchemaAvgLength(dt.Schema()) 67 numRows, _, err := dt.RowCount(ctx) 68 if err != nil { 69 return 0, err 70 } 71 return numBytesPerRow * numRows, nil 72 } 73 74 func (dt *UnscopedDiffTable) RowCount(_ *sql.Context) (uint64, bool, error) { 75 return unscopedDiffDefaultRowCount, false, nil 76 } 77 78 // Name is a sql.Table interface function which returns the name of the table which is defined by the constant 79 // DiffTableName 80 func (dt *UnscopedDiffTable) Name() string { 81 return doltdb.DiffTableName 82 } 83 84 // String is a sql.Table interface function which returns the name of the table which is defined by the constant 85 // DiffTableName 86 func (dt *UnscopedDiffTable) String() string { 87 return doltdb.DiffTableName 88 } 89 90 // Schema is a sql.Table interface function that returns the sql.Schema for this system table. 91 func (dt *UnscopedDiffTable) Schema() sql.Schema { 92 return []*sql.Column{ 93 {Name: "commit_hash", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: true, DatabaseSource: dt.dbName}, 94 {Name: "table_name", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: true, DatabaseSource: dt.dbName}, 95 {Name: "committer", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName}, 96 {Name: "email", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName}, 97 {Name: "date", Type: types.Datetime, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName}, 98 {Name: "message", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName}, 99 {Name: "data_change", Type: types.Boolean, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName}, 100 {Name: "schema_change", Type: types.Boolean, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName}, 101 } 102 } 103 104 // Collation implements the sql.Table interface. 105 func (dt *UnscopedDiffTable) Collation() sql.CollationID { 106 return sql.Collation_Default 107 } 108 109 // Partitions is a sql.Table interface function that returns a partition of the data. Returns one 110 // partition for working set changes and one partition for all commit history. 111 func (dt *UnscopedDiffTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) { 112 return NewSliceOfPartitionsItr([]sql.Partition{ 113 newDoltDiffPartition(workingSetPartitionKey), 114 newDoltDiffPartition(commitHistoryPartitionKey), 115 }), nil 116 } 117 118 // PartitionRows is a sql.Table interface function that gets a row iterator for a partition. 119 func (dt *UnscopedDiffTable) PartitionRows(ctx *sql.Context, partition sql.Partition) (sql.RowIter, error) { 120 switch p := partition.(type) { 121 case *doltdb.CommitPart: 122 return dt.newCommitHistoryRowItrFromCommits(ctx, []*doltdb.Commit{p.Commit()}) 123 default: 124 if bytes.Equal(partition.Key(), workingSetPartitionKey) { 125 return dt.newWorkingSetRowItr(ctx) 126 } else if bytes.Equal(partition.Key(), commitHistoryPartitionKey) { 127 cms, hasCommitHashEquality := getCommitsFromCommitHashEquality(ctx, dt.ddb, dt.partitionFilters) 128 if hasCommitHashEquality { 129 return dt.newCommitHistoryRowItrFromCommits(ctx, cms) 130 } 131 iter := doltdb.CommitItrForRoots(dt.ddb, dt.head) 132 if dt.commitCheck != nil { 133 iter = doltdb.NewFilteringCommitItr(iter, dt.commitCheck) 134 } 135 return dt.newCommitHistoryRowItrFromItr(ctx, iter) 136 } else { 137 return nil, fmt.Errorf("unexpected partition: %v", partition) 138 } 139 } 140 } 141 142 // GetIndexes implements sql.IndexAddressable 143 func (dt *UnscopedDiffTable) GetIndexes(ctx *sql.Context) ([]sql.Index, error) { 144 return index.DoltCommitIndexes(dt.dbName, dt.Name(), dt.ddb, true) 145 } 146 147 // IndexedAccess implements sql.IndexAddressable 148 func (dt *UnscopedDiffTable) IndexedAccess(lookup sql.IndexLookup) sql.IndexedTable { 149 nt := *dt 150 return &nt 151 } 152 153 // PreciseMatch implements sql.IndexAddressable 154 func (dt *UnscopedDiffTable) PreciseMatch() bool { 155 return true 156 } 157 158 func (dt *UnscopedDiffTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) { 159 if lookup.Index.ID() == index.CommitHashIndexId { 160 hs, ok := index.LookupToPointSelectStr(lookup) 161 if !ok { 162 return nil, fmt.Errorf("failed to parse commit lookup ranges: %s", sql.DebugString(lookup.Ranges)) 163 } 164 hashes, commits, metas := index.HashesToCommits(ctx, dt.ddb, hs, dt.head, false) 165 if len(hashes) == 0 { 166 return sql.PartitionsToPartitionIter(), nil 167 } 168 169 headHash, err := dt.head.HashOf() 170 if err != nil { 171 return nil, err 172 } 173 var partitions []sql.Partition 174 for i, h := range hashes { 175 if h == headHash && commits[i] == nil { 176 partitions = append(partitions, newDoltDiffPartition(workingSetPartitionKey)) 177 } else { 178 partitions = append(partitions, doltdb.NewCommitPart(h, commits[i], metas[i])) 179 } 180 } 181 return sql.PartitionsToPartitionIter(partitions...), nil 182 } 183 184 return dt.Partitions(ctx) 185 } 186 187 func (dt *UnscopedDiffTable) newWorkingSetRowItr(ctx *sql.Context) (sql.RowIter, error) { 188 sess := dsess.DSessFromSess(ctx.Session) 189 roots, ok := sess.GetRoots(ctx, dt.dbName) 190 if !ok { 191 return nil, fmt.Errorf("unable to lookup roots for database %s", dt.dbName) 192 } 193 194 staged, unstaged, err := diff.GetStagedUnstagedTableDeltas(ctx, roots) 195 if err != nil { 196 return nil, err 197 } 198 199 var ri sql.RowIter 200 ri = &doltDiffWorkingSetRowItr{ 201 stagedTableDeltas: staged, 202 unstagedTableDeltas: unstaged, 203 } 204 205 for _, filter := range dt.partitionFilters { 206 ri = plan.NewFilterIter(filter, ri) 207 } 208 209 return ri, nil 210 } 211 212 var _ sql.RowIter = &doltDiffWorkingSetRowItr{} 213 214 type doltDiffWorkingSetRowItr struct { 215 stagedIndex int 216 unstagedIndex int 217 stagedTableDeltas []diff.TableDelta 218 unstagedTableDeltas []diff.TableDelta 219 } 220 221 func (d *doltDiffWorkingSetRowItr) Next(ctx *sql.Context) (sql.Row, error) { 222 var changeSet string 223 var tableDelta diff.TableDelta 224 if d.stagedIndex < len(d.stagedTableDeltas) { 225 changeSet = "STAGED" 226 tableDelta = d.stagedTableDeltas[d.stagedIndex] 227 d.stagedIndex++ 228 } else if d.unstagedIndex < len(d.unstagedTableDeltas) { 229 changeSet = "WORKING" 230 tableDelta = d.unstagedTableDeltas[d.unstagedIndex] 231 d.unstagedIndex++ 232 } else { 233 return nil, io.EOF 234 } 235 236 change, err := tableDelta.GetSummary(ctx) 237 if err != nil { 238 return nil, err 239 } 240 241 sqlRow := sql.NewRow( 242 changeSet, 243 change.TableName, 244 nil, // committer 245 nil, // email 246 nil, // date 247 nil, // message 248 change.DataChange, 249 change.SchemaChange, 250 ) 251 252 return sqlRow, nil 253 } 254 255 func (d *doltDiffWorkingSetRowItr) Close(c *sql.Context) error { 256 return nil 257 } 258 259 var _ sql.Partition = &doltDiffPartition{} 260 261 type doltDiffPartition struct { 262 key []byte 263 } 264 265 func newDoltDiffPartition(key []byte) *doltDiffPartition { 266 return &doltDiffPartition{ 267 key: key, 268 } 269 } 270 271 func (d doltDiffPartition) Key() []byte { 272 return d.key 273 } 274 275 // doltDiffCommitHistoryRowItr is a sql.RowItr implementation which iterates over each commit as if it's a row in the table. 276 type doltDiffCommitHistoryRowItr struct { 277 ctx *sql.Context 278 ddb *doltdb.DoltDB 279 child doltdb.CommitItr 280 commits []*doltdb.Commit 281 meta *datas.CommitMeta 282 hash hash.Hash 283 tableChanges []diff.TableDeltaSummary 284 tableChangesIdx int 285 } 286 287 // newCommitHistoryRowItr creates a doltDiffCommitHistoryRowItr from a CommitItr. 288 func (dt *UnscopedDiffTable) newCommitHistoryRowItrFromItr(ctx *sql.Context, iter doltdb.CommitItr) (*doltDiffCommitHistoryRowItr, error) { 289 dchItr := &doltDiffCommitHistoryRowItr{ 290 ctx: ctx, 291 ddb: dt.ddb, 292 tableChangesIdx: -1, 293 child: iter, 294 } 295 return dchItr, nil 296 } 297 298 // newCommitHistoryRowItr creates a doltDiffCommitHistoryRowItr from a list of commits. 299 func (dt *UnscopedDiffTable) newCommitHistoryRowItrFromCommits(ctx *sql.Context, commits []*doltdb.Commit) (*doltDiffCommitHistoryRowItr, error) { 300 dchItr := &doltDiffCommitHistoryRowItr{ 301 ctx: ctx, 302 ddb: dt.ddb, 303 tableChangesIdx: -1, 304 commits: commits, 305 } 306 return dchItr, nil 307 } 308 309 // incrementIndexes increments the table changes index, and if it's the end of the table changes array, moves 310 // to the next commit, and resets the table changes index so that it can be populated when Next() is called. 311 func (itr *doltDiffCommitHistoryRowItr) incrementIndexes() { 312 itr.tableChangesIdx++ 313 if itr.tableChangesIdx >= len(itr.tableChanges) { 314 itr.tableChangesIdx = -1 315 itr.tableChanges = nil 316 } 317 } 318 319 // Next retrieves the next row. It will return io.EOF if it's the last row. 320 // After retrieving the last row, Close will be automatically closed. 321 func (itr *doltDiffCommitHistoryRowItr) Next(ctx *sql.Context) (sql.Row, error) { 322 defer itr.incrementIndexes() 323 324 for itr.tableChanges == nil { 325 if itr.commits != nil { 326 for _, commit := range itr.commits { 327 err := itr.loadTableChanges(ctx, commit) 328 if err != nil { 329 return nil, err 330 } 331 } 332 itr.commits = nil 333 } else if itr.child != nil { 334 _, optCmt, err := itr.child.Next(ctx) 335 if err != nil { 336 return nil, err 337 } 338 commit, ok := optCmt.ToCommit() 339 if !ok { 340 return nil, io.EOF 341 } 342 343 err = itr.loadTableChanges(ctx, commit) 344 if err == doltdb.ErrGhostCommitEncountered { 345 // When showing the diff table in a shallow clone, we show as much of the dolt_history_{table} as we can, 346 // and don't consider it an error when we hit a ghost commit. 347 return nil, io.EOF 348 } 349 if err != nil { 350 return nil, err 351 } 352 353 } else { 354 return nil, io.EOF 355 } 356 } 357 358 tableChange := itr.tableChanges[itr.tableChangesIdx] 359 meta := itr.meta 360 h := itr.hash 361 362 return sql.NewRow( 363 h.String(), 364 tableChange.TableName, 365 meta.Name, 366 meta.Email, 367 meta.Time(), 368 meta.Description, 369 tableChange.DataChange, 370 tableChange.SchemaChange, 371 ), nil 372 } 373 374 // loadTableChanges loads the current commit's table changes and metadata 375 // into the iterator. 376 func (itr *doltDiffCommitHistoryRowItr) loadTableChanges(ctx context.Context, commit *doltdb.Commit) error { 377 tableChanges, err := itr.calculateTableChanges(ctx, commit) 378 if err != nil { 379 return err 380 } 381 382 itr.tableChanges = tableChanges 383 itr.tableChangesIdx = 0 384 if len(tableChanges) == 0 { 385 return nil 386 } 387 388 meta, err := commit.GetCommitMeta(ctx) 389 if err != nil { 390 return err 391 } 392 itr.meta = meta 393 394 cmHash, err := commit.HashOf() 395 if err != nil { 396 return err 397 } 398 itr.hash = cmHash 399 400 return nil 401 } 402 403 // calculateTableChanges calculates the tables that changed in the specified commit, by comparing that 404 // commit with its immediate ancestor commit. 405 func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Context, commit *doltdb.Commit) ([]diff.TableDeltaSummary, error) { 406 if len(commit.DatasParents()) == 0 { 407 return nil, nil 408 } 409 410 toRootValue, err := commit.GetRootValue(ctx) 411 if err != nil { 412 return nil, err 413 } 414 415 optCmt, err := itr.ddb.ResolveParent(ctx, commit, 0) 416 if err != nil { 417 return nil, err 418 } 419 parent, ok := optCmt.ToCommit() 420 if !ok { 421 return nil, doltdb.ErrGhostCommitEncountered 422 } 423 424 fromRootValue, err := parent.GetRootValue(ctx) 425 if err != nil { 426 return nil, err 427 } 428 429 deltas, err := diff.GetTableDeltas(ctx, fromRootValue, toRootValue) 430 if err != nil { 431 return nil, err 432 } 433 434 tableChanges := make([]diff.TableDeltaSummary, len(deltas)) 435 for i := 0; i < len(deltas); i++ { 436 change, err := deltas[i].GetSummary(itr.ctx) 437 if err != nil { 438 return nil, err 439 } 440 441 tableChanges[i] = *change 442 } 443 444 // Not all commits mutate tables (e.g. empty commits) 445 if len(tableChanges) == 0 { 446 return nil, nil 447 } 448 449 return tableChanges, nil 450 } 451 452 // Close closes the iterator. 453 func (itr *doltDiffCommitHistoryRowItr) Close(*sql.Context) error { 454 return nil 455 } 456 457 // isTableDataEmpty return true if the table does not contain any data 458 func isTableDataEmpty(ctx *sql.Context, table *doltdb.Table) (bool, error) { 459 rowData, err := table.GetRowData(ctx) 460 if err != nil { 461 return false, err 462 } 463 464 return rowData.Empty() 465 } 466 467 // commitFilterForDiffTableFilterExprs returns CommitFilter used for CommitItr. 468 func commitFilterForDiffTableFilterExprs(filters []sql.Expression) (doltdb.CommitFilter, error) { 469 filters = transformFilters(filters...) 470 471 return func(ctx context.Context, h hash.Hash, optCmt *doltdb.OptionalCommit) (filterOut bool, err error) { 472 sc := sql.NewContext(ctx) 473 474 cm, ok := optCmt.ToCommit() 475 if !ok { 476 return false, doltdb.ErrGhostCommitEncountered 477 } 478 479 meta, err := cm.GetCommitMeta(ctx) 480 if err != nil { 481 return false, err 482 } 483 for _, filter := range filters { 484 res, err := filter.Eval(sc, sql.Row{h.String(), meta.Name, meta.Time()}) 485 if err != nil { 486 return false, err 487 } 488 b, ok := res.(bool) 489 if ok && !b { 490 return true, nil 491 } 492 } 493 494 return false, err 495 }, nil 496 } 497 498 // transformFilters return filter expressions with index specified for rows used in CommitFilter. 499 func transformFilters(filters ...sql.Expression) []sql.Expression { 500 for i := range filters { 501 filters[i], _, _ = transform.Expr(filters[i], func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { 502 gf, ok := e.(*expression.GetField) 503 if !ok { 504 return e, transform.SameTree, nil 505 } 506 switch gf.Name() { 507 case commitHashCol: 508 return gf.WithIndex(0), transform.NewTree, nil 509 default: 510 return gf, transform.SameTree, nil 511 } 512 }) 513 } 514 return filters 515 } 516 517 func getCommitsFromCommitHashEquality(ctx *sql.Context, ddb *doltdb.DoltDB, filters []sql.Expression) ([]*doltdb.Commit, bool) { 518 var commits []*doltdb.Commit 519 var isCommitHashEquality bool 520 for i := range filters { 521 switch f := filters[i].(type) { 522 case *expression.Equals: 523 v, err := f.Right().Eval(ctx, nil) 524 if err == nil { 525 isCommitHashEquality = true 526 cm := getCommitFromHash(ctx, ddb, v.(string)) 527 if cm != nil { 528 commits = append(commits, cm) 529 } 530 } 531 case *expression.InTuple: 532 switch r := f.Right().(type) { 533 case expression.Tuple: 534 right, err := r.Eval(ctx, nil) 535 if err == nil && right != nil { 536 isCommitHashEquality = true 537 if len(r) == 1 { 538 cm := getCommitFromHash(ctx, ddb, right.(string)) 539 if cm != nil { 540 commits = append(commits, cm) 541 } 542 } else { 543 for _, el := range right.([]interface{}) { 544 cm := getCommitFromHash(ctx, ddb, el.(string)) 545 if cm != nil { 546 commits = append(commits, cm) 547 } 548 } 549 } 550 } 551 } 552 } 553 } 554 return commits, isCommitHashEquality 555 } 556 557 func getCommitFromHash(ctx *sql.Context, ddb *doltdb.DoltDB, val string) *doltdb.Commit { 558 cmSpec, err := doltdb.NewCommitSpec(val) 559 if err != nil { 560 return nil 561 } 562 headRef, err := dsess.DSessFromSess(ctx.Session).CWBHeadRef(ctx, ctx.GetCurrentDatabase()) 563 if err != nil { 564 return nil 565 } 566 optCmt, err := ddb.Resolve(ctx, cmSpec, headRef) 567 if err != nil { 568 return nil 569 } 570 cm, ok := optCmt.ToCommit() 571 if !ok { 572 return nil 573 } 574 575 return cm 576 }