github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dtables/unscoped_diff_table.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dtables
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  	"github.com/dolthub/go-mysql-server/sql/expression"
    25  	"github.com/dolthub/go-mysql-server/sql/plan"
    26  	"github.com/dolthub/go-mysql-server/sql/transform"
    27  	"github.com/dolthub/go-mysql-server/sql/types"
    28  
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/diff"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    32  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
    33  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
    34  	"github.com/dolthub/dolt/go/libraries/utils/set"
    35  	"github.com/dolthub/dolt/go/store/datas"
    36  	"github.com/dolthub/dolt/go/store/hash"
    37  )
    38  
    39  const unscopedDiffDefaultRowCount = 1000
    40  
    41  var workingSetPartitionKey = []byte("workingset")
    42  var commitHistoryPartitionKey = []byte("commithistory")
    43  var commitHashCol = "commit_hash"
    44  var filterColumnNameSet = set.NewStrSet([]string{commitHashCol})
    45  
    46  // UnscopedDiffTable is a sql.Table implementation of a system table that shows which tables have
    47  // changed in each commit, across all branches.
    48  type UnscopedDiffTable struct {
    49  	dbName           string
    50  	ddb              *doltdb.DoltDB
    51  	head             *doltdb.Commit
    52  	partitionFilters []sql.Expression
    53  	commitCheck      doltdb.CommitFilter
    54  }
    55  
    56  var _ sql.Table = (*UnscopedDiffTable)(nil)
    57  var _ sql.StatisticsTable = (*UnscopedDiffTable)(nil)
    58  var _ sql.IndexAddressable = (*UnscopedDiffTable)(nil)
    59  
    60  // NewUnscopedDiffTable creates an UnscopedDiffTable
    61  func NewUnscopedDiffTable(_ *sql.Context, dbName string, ddb *doltdb.DoltDB, head *doltdb.Commit) sql.Table {
    62  	return &UnscopedDiffTable{dbName: dbName, ddb: ddb, head: head}
    63  }
    64  
    65  func (dt *UnscopedDiffTable) DataLength(ctx *sql.Context) (uint64, error) {
    66  	numBytesPerRow := schema.SchemaAvgLength(dt.Schema())
    67  	numRows, _, err := dt.RowCount(ctx)
    68  	if err != nil {
    69  		return 0, err
    70  	}
    71  	return numBytesPerRow * numRows, nil
    72  }
    73  
    74  func (dt *UnscopedDiffTable) RowCount(_ *sql.Context) (uint64, bool, error) {
    75  	return unscopedDiffDefaultRowCount, false, nil
    76  }
    77  
    78  // Name is a sql.Table interface function which returns the name of the table which is defined by the constant
    79  // DiffTableName
    80  func (dt *UnscopedDiffTable) Name() string {
    81  	return doltdb.DiffTableName
    82  }
    83  
    84  // String is a sql.Table interface function which returns the name of the table which is defined by the constant
    85  // DiffTableName
    86  func (dt *UnscopedDiffTable) String() string {
    87  	return doltdb.DiffTableName
    88  }
    89  
    90  // Schema is a sql.Table interface function that returns the sql.Schema for this system table.
    91  func (dt *UnscopedDiffTable) Schema() sql.Schema {
    92  	return []*sql.Column{
    93  		{Name: "commit_hash", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: true, DatabaseSource: dt.dbName},
    94  		{Name: "table_name", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: true, DatabaseSource: dt.dbName},
    95  		{Name: "committer", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    96  		{Name: "email", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    97  		{Name: "date", Type: types.Datetime, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    98  		{Name: "message", Type: types.Text, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    99  		{Name: "data_change", Type: types.Boolean, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
   100  		{Name: "schema_change", Type: types.Boolean, Source: doltdb.DiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
   101  	}
   102  }
   103  
   104  // Collation implements the sql.Table interface.
   105  func (dt *UnscopedDiffTable) Collation() sql.CollationID {
   106  	return sql.Collation_Default
   107  }
   108  
   109  // Partitions is a sql.Table interface function that returns a partition of the data. Returns one
   110  // partition for working set changes and one partition for all commit history.
   111  func (dt *UnscopedDiffTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) {
   112  	return NewSliceOfPartitionsItr([]sql.Partition{
   113  		newDoltDiffPartition(workingSetPartitionKey),
   114  		newDoltDiffPartition(commitHistoryPartitionKey),
   115  	}), nil
   116  }
   117  
   118  // PartitionRows is a sql.Table interface function that gets a row iterator for a partition.
   119  func (dt *UnscopedDiffTable) PartitionRows(ctx *sql.Context, partition sql.Partition) (sql.RowIter, error) {
   120  	switch p := partition.(type) {
   121  	case *doltdb.CommitPart:
   122  		return dt.newCommitHistoryRowItrFromCommits(ctx, []*doltdb.Commit{p.Commit()})
   123  	default:
   124  		if bytes.Equal(partition.Key(), workingSetPartitionKey) {
   125  			return dt.newWorkingSetRowItr(ctx)
   126  		} else if bytes.Equal(partition.Key(), commitHistoryPartitionKey) {
   127  			cms, hasCommitHashEquality := getCommitsFromCommitHashEquality(ctx, dt.ddb, dt.partitionFilters)
   128  			if hasCommitHashEquality {
   129  				return dt.newCommitHistoryRowItrFromCommits(ctx, cms)
   130  			}
   131  			iter := doltdb.CommitItrForRoots(dt.ddb, dt.head)
   132  			if dt.commitCheck != nil {
   133  				iter = doltdb.NewFilteringCommitItr(iter, dt.commitCheck)
   134  			}
   135  			return dt.newCommitHistoryRowItrFromItr(ctx, iter)
   136  		} else {
   137  			return nil, fmt.Errorf("unexpected partition: %v", partition)
   138  		}
   139  	}
   140  }
   141  
   142  // GetIndexes implements sql.IndexAddressable
   143  func (dt *UnscopedDiffTable) GetIndexes(ctx *sql.Context) ([]sql.Index, error) {
   144  	return index.DoltCommitIndexes(dt.dbName, dt.Name(), dt.ddb, true)
   145  }
   146  
   147  // IndexedAccess implements sql.IndexAddressable
   148  func (dt *UnscopedDiffTable) IndexedAccess(lookup sql.IndexLookup) sql.IndexedTable {
   149  	nt := *dt
   150  	return &nt
   151  }
   152  
   153  // PreciseMatch implements sql.IndexAddressable
   154  func (dt *UnscopedDiffTable) PreciseMatch() bool {
   155  	return true
   156  }
   157  
   158  func (dt *UnscopedDiffTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) {
   159  	if lookup.Index.ID() == index.CommitHashIndexId {
   160  		hs, ok := index.LookupToPointSelectStr(lookup)
   161  		if !ok {
   162  			return nil, fmt.Errorf("failed to parse commit lookup ranges: %s", sql.DebugString(lookup.Ranges))
   163  		}
   164  		hashes, commits, metas := index.HashesToCommits(ctx, dt.ddb, hs, dt.head, false)
   165  		if len(hashes) == 0 {
   166  			return sql.PartitionsToPartitionIter(), nil
   167  		}
   168  
   169  		headHash, err := dt.head.HashOf()
   170  		if err != nil {
   171  			return nil, err
   172  		}
   173  		var partitions []sql.Partition
   174  		for i, h := range hashes {
   175  			if h == headHash && commits[i] == nil {
   176  				partitions = append(partitions, newDoltDiffPartition(workingSetPartitionKey))
   177  			} else {
   178  				partitions = append(partitions, doltdb.NewCommitPart(h, commits[i], metas[i]))
   179  			}
   180  		}
   181  		return sql.PartitionsToPartitionIter(partitions...), nil
   182  	}
   183  
   184  	return dt.Partitions(ctx)
   185  }
   186  
   187  func (dt *UnscopedDiffTable) newWorkingSetRowItr(ctx *sql.Context) (sql.RowIter, error) {
   188  	sess := dsess.DSessFromSess(ctx.Session)
   189  	roots, ok := sess.GetRoots(ctx, dt.dbName)
   190  	if !ok {
   191  		return nil, fmt.Errorf("unable to lookup roots for database %s", dt.dbName)
   192  	}
   193  
   194  	staged, unstaged, err := diff.GetStagedUnstagedTableDeltas(ctx, roots)
   195  	if err != nil {
   196  		return nil, err
   197  	}
   198  
   199  	var ri sql.RowIter
   200  	ri = &doltDiffWorkingSetRowItr{
   201  		stagedTableDeltas:   staged,
   202  		unstagedTableDeltas: unstaged,
   203  	}
   204  
   205  	for _, filter := range dt.partitionFilters {
   206  		ri = plan.NewFilterIter(filter, ri)
   207  	}
   208  
   209  	return ri, nil
   210  }
   211  
   212  var _ sql.RowIter = &doltDiffWorkingSetRowItr{}
   213  
   214  type doltDiffWorkingSetRowItr struct {
   215  	stagedIndex         int
   216  	unstagedIndex       int
   217  	stagedTableDeltas   []diff.TableDelta
   218  	unstagedTableDeltas []diff.TableDelta
   219  }
   220  
   221  func (d *doltDiffWorkingSetRowItr) Next(ctx *sql.Context) (sql.Row, error) {
   222  	var changeSet string
   223  	var tableDelta diff.TableDelta
   224  	if d.stagedIndex < len(d.stagedTableDeltas) {
   225  		changeSet = "STAGED"
   226  		tableDelta = d.stagedTableDeltas[d.stagedIndex]
   227  		d.stagedIndex++
   228  	} else if d.unstagedIndex < len(d.unstagedTableDeltas) {
   229  		changeSet = "WORKING"
   230  		tableDelta = d.unstagedTableDeltas[d.unstagedIndex]
   231  		d.unstagedIndex++
   232  	} else {
   233  		return nil, io.EOF
   234  	}
   235  
   236  	change, err := tableDelta.GetSummary(ctx)
   237  	if err != nil {
   238  		return nil, err
   239  	}
   240  
   241  	sqlRow := sql.NewRow(
   242  		changeSet,
   243  		change.TableName,
   244  		nil, // committer
   245  		nil, // email
   246  		nil, // date
   247  		nil, // message
   248  		change.DataChange,
   249  		change.SchemaChange,
   250  	)
   251  
   252  	return sqlRow, nil
   253  }
   254  
   255  func (d *doltDiffWorkingSetRowItr) Close(c *sql.Context) error {
   256  	return nil
   257  }
   258  
   259  var _ sql.Partition = &doltDiffPartition{}
   260  
   261  type doltDiffPartition struct {
   262  	key []byte
   263  }
   264  
   265  func newDoltDiffPartition(key []byte) *doltDiffPartition {
   266  	return &doltDiffPartition{
   267  		key: key,
   268  	}
   269  }
   270  
   271  func (d doltDiffPartition) Key() []byte {
   272  	return d.key
   273  }
   274  
   275  // doltDiffCommitHistoryRowItr is a sql.RowItr implementation which iterates over each commit as if it's a row in the table.
   276  type doltDiffCommitHistoryRowItr struct {
   277  	ctx             *sql.Context
   278  	ddb             *doltdb.DoltDB
   279  	child           doltdb.CommitItr
   280  	commits         []*doltdb.Commit
   281  	meta            *datas.CommitMeta
   282  	hash            hash.Hash
   283  	tableChanges    []diff.TableDeltaSummary
   284  	tableChangesIdx int
   285  }
   286  
   287  // newCommitHistoryRowItr creates a doltDiffCommitHistoryRowItr from a CommitItr.
   288  func (dt *UnscopedDiffTable) newCommitHistoryRowItrFromItr(ctx *sql.Context, iter doltdb.CommitItr) (*doltDiffCommitHistoryRowItr, error) {
   289  	dchItr := &doltDiffCommitHistoryRowItr{
   290  		ctx:             ctx,
   291  		ddb:             dt.ddb,
   292  		tableChangesIdx: -1,
   293  		child:           iter,
   294  	}
   295  	return dchItr, nil
   296  }
   297  
   298  // newCommitHistoryRowItr creates a doltDiffCommitHistoryRowItr from a list of commits.
   299  func (dt *UnscopedDiffTable) newCommitHistoryRowItrFromCommits(ctx *sql.Context, commits []*doltdb.Commit) (*doltDiffCommitHistoryRowItr, error) {
   300  	dchItr := &doltDiffCommitHistoryRowItr{
   301  		ctx:             ctx,
   302  		ddb:             dt.ddb,
   303  		tableChangesIdx: -1,
   304  		commits:         commits,
   305  	}
   306  	return dchItr, nil
   307  }
   308  
   309  // incrementIndexes increments the table changes index, and if it's the end of the table changes array, moves
   310  // to the next commit, and resets the table changes index so that it can be populated when Next() is called.
   311  func (itr *doltDiffCommitHistoryRowItr) incrementIndexes() {
   312  	itr.tableChangesIdx++
   313  	if itr.tableChangesIdx >= len(itr.tableChanges) {
   314  		itr.tableChangesIdx = -1
   315  		itr.tableChanges = nil
   316  	}
   317  }
   318  
   319  // Next retrieves the next row. It will return io.EOF if it's the last row.
   320  // After retrieving the last row, Close will be automatically closed.
   321  func (itr *doltDiffCommitHistoryRowItr) Next(ctx *sql.Context) (sql.Row, error) {
   322  	defer itr.incrementIndexes()
   323  
   324  	for itr.tableChanges == nil {
   325  		if itr.commits != nil {
   326  			for _, commit := range itr.commits {
   327  				err := itr.loadTableChanges(ctx, commit)
   328  				if err != nil {
   329  					return nil, err
   330  				}
   331  			}
   332  			itr.commits = nil
   333  		} else if itr.child != nil {
   334  			_, optCmt, err := itr.child.Next(ctx)
   335  			if err != nil {
   336  				return nil, err
   337  			}
   338  			commit, ok := optCmt.ToCommit()
   339  			if !ok {
   340  				return nil, io.EOF
   341  			}
   342  
   343  			err = itr.loadTableChanges(ctx, commit)
   344  			if err == doltdb.ErrGhostCommitEncountered {
   345  				// When showing the diff table in a shallow clone, we show as much of the dolt_history_{table} as we can,
   346  				// and don't consider it an error when we hit a ghost commit.
   347  				return nil, io.EOF
   348  			}
   349  			if err != nil {
   350  				return nil, err
   351  			}
   352  
   353  		} else {
   354  			return nil, io.EOF
   355  		}
   356  	}
   357  
   358  	tableChange := itr.tableChanges[itr.tableChangesIdx]
   359  	meta := itr.meta
   360  	h := itr.hash
   361  
   362  	return sql.NewRow(
   363  		h.String(),
   364  		tableChange.TableName,
   365  		meta.Name,
   366  		meta.Email,
   367  		meta.Time(),
   368  		meta.Description,
   369  		tableChange.DataChange,
   370  		tableChange.SchemaChange,
   371  	), nil
   372  }
   373  
   374  // loadTableChanges loads the current commit's table changes and metadata
   375  // into the iterator.
   376  func (itr *doltDiffCommitHistoryRowItr) loadTableChanges(ctx context.Context, commit *doltdb.Commit) error {
   377  	tableChanges, err := itr.calculateTableChanges(ctx, commit)
   378  	if err != nil {
   379  		return err
   380  	}
   381  
   382  	itr.tableChanges = tableChanges
   383  	itr.tableChangesIdx = 0
   384  	if len(tableChanges) == 0 {
   385  		return nil
   386  	}
   387  
   388  	meta, err := commit.GetCommitMeta(ctx)
   389  	if err != nil {
   390  		return err
   391  	}
   392  	itr.meta = meta
   393  
   394  	cmHash, err := commit.HashOf()
   395  	if err != nil {
   396  		return err
   397  	}
   398  	itr.hash = cmHash
   399  
   400  	return nil
   401  }
   402  
   403  // calculateTableChanges calculates the tables that changed in the specified commit, by comparing that
   404  // commit with its immediate ancestor commit.
   405  func (itr *doltDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Context, commit *doltdb.Commit) ([]diff.TableDeltaSummary, error) {
   406  	if len(commit.DatasParents()) == 0 {
   407  		return nil, nil
   408  	}
   409  
   410  	toRootValue, err := commit.GetRootValue(ctx)
   411  	if err != nil {
   412  		return nil, err
   413  	}
   414  
   415  	optCmt, err := itr.ddb.ResolveParent(ctx, commit, 0)
   416  	if err != nil {
   417  		return nil, err
   418  	}
   419  	parent, ok := optCmt.ToCommit()
   420  	if !ok {
   421  		return nil, doltdb.ErrGhostCommitEncountered
   422  	}
   423  
   424  	fromRootValue, err := parent.GetRootValue(ctx)
   425  	if err != nil {
   426  		return nil, err
   427  	}
   428  
   429  	deltas, err := diff.GetTableDeltas(ctx, fromRootValue, toRootValue)
   430  	if err != nil {
   431  		return nil, err
   432  	}
   433  
   434  	tableChanges := make([]diff.TableDeltaSummary, len(deltas))
   435  	for i := 0; i < len(deltas); i++ {
   436  		change, err := deltas[i].GetSummary(itr.ctx)
   437  		if err != nil {
   438  			return nil, err
   439  		}
   440  
   441  		tableChanges[i] = *change
   442  	}
   443  
   444  	// Not all commits mutate tables (e.g. empty commits)
   445  	if len(tableChanges) == 0 {
   446  		return nil, nil
   447  	}
   448  
   449  	return tableChanges, nil
   450  }
   451  
   452  // Close closes the iterator.
   453  func (itr *doltDiffCommitHistoryRowItr) Close(*sql.Context) error {
   454  	return nil
   455  }
   456  
   457  // isTableDataEmpty return true if the table does not contain any data
   458  func isTableDataEmpty(ctx *sql.Context, table *doltdb.Table) (bool, error) {
   459  	rowData, err := table.GetRowData(ctx)
   460  	if err != nil {
   461  		return false, err
   462  	}
   463  
   464  	return rowData.Empty()
   465  }
   466  
   467  // commitFilterForDiffTableFilterExprs returns CommitFilter used for CommitItr.
   468  func commitFilterForDiffTableFilterExprs(filters []sql.Expression) (doltdb.CommitFilter, error) {
   469  	filters = transformFilters(filters...)
   470  
   471  	return func(ctx context.Context, h hash.Hash, optCmt *doltdb.OptionalCommit) (filterOut bool, err error) {
   472  		sc := sql.NewContext(ctx)
   473  
   474  		cm, ok := optCmt.ToCommit()
   475  		if !ok {
   476  			return false, doltdb.ErrGhostCommitEncountered
   477  		}
   478  
   479  		meta, err := cm.GetCommitMeta(ctx)
   480  		if err != nil {
   481  			return false, err
   482  		}
   483  		for _, filter := range filters {
   484  			res, err := filter.Eval(sc, sql.Row{h.String(), meta.Name, meta.Time()})
   485  			if err != nil {
   486  				return false, err
   487  			}
   488  			b, ok := res.(bool)
   489  			if ok && !b {
   490  				return true, nil
   491  			}
   492  		}
   493  
   494  		return false, err
   495  	}, nil
   496  }
   497  
   498  // transformFilters return filter expressions with index specified for rows used in CommitFilter.
   499  func transformFilters(filters ...sql.Expression) []sql.Expression {
   500  	for i := range filters {
   501  		filters[i], _, _ = transform.Expr(filters[i], func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
   502  			gf, ok := e.(*expression.GetField)
   503  			if !ok {
   504  				return e, transform.SameTree, nil
   505  			}
   506  			switch gf.Name() {
   507  			case commitHashCol:
   508  				return gf.WithIndex(0), transform.NewTree, nil
   509  			default:
   510  				return gf, transform.SameTree, nil
   511  			}
   512  		})
   513  	}
   514  	return filters
   515  }
   516  
   517  func getCommitsFromCommitHashEquality(ctx *sql.Context, ddb *doltdb.DoltDB, filters []sql.Expression) ([]*doltdb.Commit, bool) {
   518  	var commits []*doltdb.Commit
   519  	var isCommitHashEquality bool
   520  	for i := range filters {
   521  		switch f := filters[i].(type) {
   522  		case *expression.Equals:
   523  			v, err := f.Right().Eval(ctx, nil)
   524  			if err == nil {
   525  				isCommitHashEquality = true
   526  				cm := getCommitFromHash(ctx, ddb, v.(string))
   527  				if cm != nil {
   528  					commits = append(commits, cm)
   529  				}
   530  			}
   531  		case *expression.InTuple:
   532  			switch r := f.Right().(type) {
   533  			case expression.Tuple:
   534  				right, err := r.Eval(ctx, nil)
   535  				if err == nil && right != nil {
   536  					isCommitHashEquality = true
   537  					if len(r) == 1 {
   538  						cm := getCommitFromHash(ctx, ddb, right.(string))
   539  						if cm != nil {
   540  							commits = append(commits, cm)
   541  						}
   542  					} else {
   543  						for _, el := range right.([]interface{}) {
   544  							cm := getCommitFromHash(ctx, ddb, el.(string))
   545  							if cm != nil {
   546  								commits = append(commits, cm)
   547  							}
   548  						}
   549  					}
   550  				}
   551  			}
   552  		}
   553  	}
   554  	return commits, isCommitHashEquality
   555  }
   556  
   557  func getCommitFromHash(ctx *sql.Context, ddb *doltdb.DoltDB, val string) *doltdb.Commit {
   558  	cmSpec, err := doltdb.NewCommitSpec(val)
   559  	if err != nil {
   560  		return nil
   561  	}
   562  	headRef, err := dsess.DSessFromSess(ctx.Session).CWBHeadRef(ctx, ctx.GetCurrentDatabase())
   563  	if err != nil {
   564  		return nil
   565  	}
   566  	optCmt, err := ddb.Resolve(ctx, cmSpec, headRef)
   567  	if err != nil {
   568  		return nil
   569  	}
   570  	cm, ok := optCmt.ToCommit()
   571  	if !ok {
   572  		return nil
   573  	}
   574  
   575  	return cm
   576  }