github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dtables/column_diff_table.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dtables
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  	"time"
    23  
    24  	"github.com/dolthub/go-mysql-server/sql"
    25  	"github.com/dolthub/go-mysql-server/sql/plan"
    26  	"github.com/dolthub/go-mysql-server/sql/types"
    27  
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/diff"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dsess"
    32  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
    33  	"github.com/dolthub/dolt/go/store/datas"
    34  	"github.com/dolthub/dolt/go/store/hash"
    35  	dtypes "github.com/dolthub/dolt/go/store/types"
    36  )
    37  
    38  const columnDiffDefaultRowCount = 100
    39  
    40  // ColumnDiffTable is a sql.Table implementation of a system table that shows which tables and columns have
    41  // changed in each commit, across all branches.
    42  type ColumnDiffTable struct {
    43  	dbName           string
    44  	ddb              *doltdb.DoltDB
    45  	head             *doltdb.Commit
    46  	partitionFilters []sql.Expression
    47  	commitCheck      doltdb.CommitFilter
    48  }
    49  
    50  var _ sql.Table = (*ColumnDiffTable)(nil)
    51  var _ sql.StatisticsTable = (*ColumnDiffTable)(nil)
    52  
    53  // var _ sql.IndexAddressable = (*ColumnDiffTable)(nil)
    54  
    55  // NewColumnDiffTable creates an ColumnDiffTable
    56  func NewColumnDiffTable(_ *sql.Context, dbName string, ddb *doltdb.DoltDB, head *doltdb.Commit) sql.Table {
    57  	return &ColumnDiffTable{dbName: dbName, ddb: ddb, head: head}
    58  }
    59  
    60  // Name is a sql.Table interface function which returns the name of the table which is defined by the constant
    61  // ColumnDiffTableName
    62  func (dt *ColumnDiffTable) Name() string {
    63  	return doltdb.ColumnDiffTableName
    64  }
    65  
    66  func (dt *ColumnDiffTable) DataLength(ctx *sql.Context) (uint64, error) {
    67  	numBytesPerRow := schema.SchemaAvgLength(dt.Schema())
    68  	numRows, _, err := dt.RowCount(ctx)
    69  	if err != nil {
    70  		return 0, err
    71  	}
    72  	return numBytesPerRow * numRows, nil
    73  }
    74  
    75  func (dt *ColumnDiffTable) RowCount(_ *sql.Context) (uint64, bool, error) {
    76  	return columnDiffDefaultRowCount, false, nil
    77  }
    78  
    79  // String is a sql.Table interface function which returns the name of the table which is defined by the constant
    80  // ColumnDiffTableName
    81  func (dt *ColumnDiffTable) String() string {
    82  	return doltdb.ColumnDiffTableName
    83  }
    84  
    85  // Schema is a sql.Table interface function that returns the sql.Schema for this system table.
    86  func (dt *ColumnDiffTable) Schema() sql.Schema {
    87  	return []*sql.Column{
    88  		{Name: "commit_hash", Type: types.Text, Source: doltdb.ColumnDiffTableName, PrimaryKey: true, DatabaseSource: dt.dbName},
    89  		{Name: "table_name", Type: types.Text, Source: doltdb.ColumnDiffTableName, PrimaryKey: true, DatabaseSource: dt.dbName},
    90  		{Name: "column_name", Type: types.Text, Source: doltdb.ColumnDiffTableName, PrimaryKey: true, DatabaseSource: dt.dbName},
    91  		{Name: "committer", Type: types.Text, Source: doltdb.ColumnDiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    92  		{Name: "email", Type: types.Text, Source: doltdb.ColumnDiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    93  		{Name: "date", Type: types.Datetime, Source: doltdb.ColumnDiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    94  		{Name: "message", Type: types.Text, Source: doltdb.ColumnDiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    95  		{Name: "diff_type", Type: types.Text, Source: doltdb.ColumnDiffTableName, PrimaryKey: false, DatabaseSource: dt.dbName},
    96  	}
    97  }
    98  
    99  // Partitions is a sql.Table interface function that returns a partition of the data. Returns one
   100  // partition for working set changes and one partition for all commit history.
   101  func (dt *ColumnDiffTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) {
   102  	return NewSliceOfPartitionsItr([]sql.Partition{
   103  		newDoltDiffPartition(workingSetPartitionKey),
   104  		newDoltDiffPartition(commitHistoryPartitionKey),
   105  	}), nil
   106  }
   107  
   108  // PartitionRows is a sql.Table interface function that gets a row iterator for a partition.
   109  func (dt *ColumnDiffTable) PartitionRows(ctx *sql.Context, partition sql.Partition) (sql.RowIter, error) {
   110  	switch p := partition.(type) {
   111  	case *doltdb.CommitPart:
   112  		return dt.newCommitHistoryRowItrFromCommits(ctx, []*doltdb.Commit{p.Commit()})
   113  	default:
   114  		if bytes.Equal(partition.Key(), workingSetPartitionKey) {
   115  			return dt.newWorkingSetRowItr(ctx)
   116  		} else if bytes.Equal(partition.Key(), commitHistoryPartitionKey) {
   117  			cms, hasCommitHashEquality := getCommitsFromCommitHashEquality(ctx, dt.ddb, dt.partitionFilters)
   118  			if hasCommitHashEquality {
   119  				return dt.newCommitHistoryRowItrFromCommits(ctx, cms)
   120  			}
   121  			iter := doltdb.CommitItrForRoots(dt.ddb, dt.head)
   122  			if dt.commitCheck != nil {
   123  				iter = doltdb.NewFilteringCommitItr(iter, dt.commitCheck)
   124  			}
   125  			return dt.newCommitHistoryRowItrFromItr(ctx, iter)
   126  		} else {
   127  			return nil, fmt.Errorf("unexpected partition: %v", partition)
   128  		}
   129  	}
   130  }
   131  
   132  // Collation implements the sql.Table interface.
   133  func (dt *ColumnDiffTable) Collation() sql.CollationID {
   134  	return sql.Collation_Default
   135  }
   136  
   137  func (dt *ColumnDiffTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) {
   138  	if lookup.Index.ID() == index.CommitHashIndexId {
   139  		hs, ok := index.LookupToPointSelectStr(lookup)
   140  		if !ok {
   141  			return nil, fmt.Errorf("failed to parse commit lookup ranges: %s", sql.DebugString(lookup.Ranges))
   142  		}
   143  		hashes, commits, metas := index.HashesToCommits(ctx, dt.ddb, hs, dt.head, false)
   144  		if len(hashes) == 0 {
   145  			return sql.PartitionsToPartitionIter(), nil
   146  		}
   147  
   148  		headHash, err := dt.head.HashOf()
   149  		if err != nil {
   150  			return nil, err
   151  		}
   152  		var partitions []sql.Partition
   153  		for i, h := range hashes {
   154  			if h == headHash && commits[i] == nil {
   155  				partitions = append(partitions, newDoltDiffPartition(workingSetPartitionKey))
   156  			} else {
   157  				partitions = append(partitions, doltdb.NewCommitPart(h, commits[i], metas[i]))
   158  			}
   159  		}
   160  		return sql.PartitionsToPartitionIter(partitions...), nil
   161  	}
   162  
   163  	return dt.Partitions(ctx)
   164  }
   165  
   166  type doltColDiffWorkingSetRowItr struct {
   167  	ddb                 *doltdb.DoltDB
   168  	stagedIndex         int
   169  	unstagedIndex       int
   170  	colIndex            int
   171  	changeSet           string
   172  	stagedTableDeltas   []diff.TableDelta
   173  	unstagedTableDeltas []diff.TableDelta
   174  	currentTableDelta   *diff.TableDelta
   175  	tableName           string
   176  	colNames            []string
   177  	diffTypes           []string
   178  }
   179  
   180  func (dt *ColumnDiffTable) newWorkingSetRowItr(ctx *sql.Context) (sql.RowIter, error) {
   181  	sess := dsess.DSessFromSess(ctx.Session)
   182  	roots, ok := sess.GetRoots(ctx, dt.dbName)
   183  	if !ok {
   184  		return nil, fmt.Errorf("unable to lookup roots for database %s", dt.dbName)
   185  	}
   186  
   187  	staged, unstaged, err := diff.GetStagedUnstagedTableDeltas(ctx, roots)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  
   192  	var ri sql.RowIter
   193  	ri = &doltColDiffWorkingSetRowItr{
   194  		ddb:                 dt.ddb,
   195  		stagedTableDeltas:   staged,
   196  		unstagedTableDeltas: unstaged,
   197  	}
   198  
   199  	for _, filter := range dt.partitionFilters {
   200  		ri = plan.NewFilterIter(filter, ri)
   201  	}
   202  
   203  	return ri, nil
   204  }
   205  
   206  // incrementColIndex increments the column index and table changes index.  When the end of the column names array is
   207  // reached, moves to the next table changes delta.
   208  func (d *doltColDiffWorkingSetRowItr) incrementColIndex() {
   209  	d.colIndex++
   210  
   211  	// move to next table once all modified columns are iterated through
   212  	if d.colIndex >= len(d.colNames) {
   213  		d.colIndex = 0
   214  		d.currentTableDelta = nil
   215  		if d.changeSet == "STAGED" {
   216  			d.stagedIndex++
   217  		} else {
   218  			d.unstagedIndex++
   219  		}
   220  	}
   221  }
   222  
   223  func (d *doltColDiffWorkingSetRowItr) Next(ctx *sql.Context) (sql.Row, error) {
   224  	defer d.incrementColIndex()
   225  
   226  	// only need to load new changes when we're finished iterating through the previous tableDelta
   227  	for d.currentTableDelta == nil {
   228  		if d.stagedIndex < len(d.stagedTableDeltas) {
   229  			d.changeSet = "STAGED"
   230  			d.currentTableDelta = &d.stagedTableDeltas[d.stagedIndex]
   231  		} else if d.unstagedIndex < len(d.unstagedTableDeltas) {
   232  			d.changeSet = "WORKING"
   233  			d.currentTableDelta = &d.unstagedTableDeltas[d.unstagedIndex]
   234  		} else {
   235  			return nil, io.EOF
   236  		}
   237  
   238  		change, err := processTableColDelta(ctx, d.ddb, *d.currentTableDelta)
   239  		if err != nil {
   240  			return nil, err
   241  		}
   242  
   243  		// ignore changes with no modified columns
   244  		if len(change.colNames) != 0 {
   245  			d.colNames = change.colNames
   246  			d.diffTypes = change.diffTypes
   247  			d.tableName = change.tableName
   248  		} else {
   249  			if d.changeSet == "STAGED" {
   250  				d.stagedIndex++
   251  			} else {
   252  				d.unstagedIndex++
   253  			}
   254  			d.currentTableDelta = nil
   255  		}
   256  	}
   257  
   258  	sqlRow := sql.NewRow(
   259  		d.changeSet,
   260  		d.tableName,
   261  		d.colNames[d.colIndex],
   262  		nil, // committer
   263  		nil, // email
   264  		nil, // date
   265  		nil, // message
   266  		d.diffTypes[d.colIndex],
   267  	)
   268  
   269  	return sqlRow, nil
   270  }
   271  
   272  func (d *doltColDiffWorkingSetRowItr) Close(c *sql.Context) error {
   273  	return nil
   274  }
   275  
   276  // doltColDiffCommitHistoryRowItr is a sql.RowItr implementation which iterates over each commit as if it's a row in the table.
   277  type doltColDiffCommitHistoryRowItr struct {
   278  	ctx             *sql.Context
   279  	ddb             *doltdb.DoltDB
   280  	child           doltdb.CommitItr
   281  	commits         []*doltdb.Commit
   282  	meta            *datas.CommitMeta
   283  	hash            hash.Hash
   284  	tableChanges    []tableColChange
   285  	tableChangesIdx int
   286  	colIdx          int
   287  }
   288  
   289  // newCommitHistoryRowItr creates a doltDiffCommitHistoryRowItr from a CommitItr.
   290  func (dt *ColumnDiffTable) newCommitHistoryRowItrFromItr(ctx *sql.Context, iter doltdb.CommitItr) (*doltColDiffCommitHistoryRowItr, error) {
   291  	dchItr := &doltColDiffCommitHistoryRowItr{
   292  		ctx:             ctx,
   293  		ddb:             dt.ddb,
   294  		tableChangesIdx: -1,
   295  		child:           iter,
   296  	}
   297  	return dchItr, nil
   298  }
   299  
   300  // newCommitHistoryRowItr creates a doltDiffCommitHistoryRowItr from a list of commits.
   301  func (dt *ColumnDiffTable) newCommitHistoryRowItrFromCommits(ctx *sql.Context, commits []*doltdb.Commit) (*doltColDiffCommitHistoryRowItr, error) {
   302  	dchItr := &doltColDiffCommitHistoryRowItr{
   303  		ctx:             ctx,
   304  		ddb:             dt.ddb,
   305  		tableChangesIdx: -1,
   306  		commits:         commits,
   307  	}
   308  	return dchItr, nil
   309  }
   310  
   311  // incrementIndexes increments the column index and table changes index. When the end of the column names array is
   312  // reached, moves to the next table. When the end of the table changes array is reached, moves to the next commit,
   313  // and resets the table changes index so that it can be populated when Next() is called.
   314  func (itr *doltColDiffCommitHistoryRowItr) incrementIndexes(tableChange tableColChange) {
   315  	itr.colIdx++
   316  	if itr.colIdx >= len(tableChange.colNames) {
   317  		itr.tableChangesIdx++
   318  		itr.colIdx = 0
   319  		if itr.tableChangesIdx >= len(itr.tableChanges) {
   320  			itr.tableChangesIdx = -1
   321  			itr.tableChanges = nil
   322  		}
   323  	}
   324  }
   325  
   326  // Next retrieves the next row. It will return io.EOF if it's the last row.
   327  // After retrieving the last row, Close will be automatically closed.
   328  func (itr *doltColDiffCommitHistoryRowItr) Next(ctx *sql.Context) (sql.Row, error) {
   329  	for itr.tableChanges == nil {
   330  		if itr.commits != nil {
   331  			for _, commit := range itr.commits {
   332  				err := itr.loadTableChanges(ctx, commit)
   333  				if err != nil {
   334  					return nil, err
   335  				}
   336  			}
   337  			itr.commits = nil
   338  		} else if itr.child != nil {
   339  			_, optCmt, err := itr.child.Next(ctx)
   340  			if err != nil {
   341  				return nil, err
   342  			}
   343  			commit, ok := optCmt.ToCommit()
   344  			if !ok {
   345  				return nil, doltdb.ErrGhostCommitEncountered
   346  			}
   347  
   348  			err = itr.loadTableChanges(ctx, commit)
   349  			if err != nil {
   350  				return nil, err
   351  			}
   352  		} else {
   353  			return nil, io.EOF
   354  		}
   355  	}
   356  
   357  	tableChange := itr.tableChanges[itr.tableChangesIdx]
   358  	defer itr.incrementIndexes(tableChange)
   359  
   360  	meta := itr.meta
   361  	h := itr.hash
   362  	col := tableChange.colNames[itr.colIdx]
   363  	diffType := tableChange.diffTypes[itr.colIdx]
   364  
   365  	return sql.NewRow(
   366  		h.String(),
   367  		tableChange.tableName,
   368  		col,
   369  		meta.Name,
   370  		meta.Email,
   371  		meta.Time(),
   372  		meta.Description,
   373  		diffType,
   374  	), nil
   375  }
   376  
   377  // loadTableChanges loads the current commit's table changes and metadata into the iterator.
   378  func (itr *doltColDiffCommitHistoryRowItr) loadTableChanges(ctx context.Context, commit *doltdb.Commit) error {
   379  	tableChanges, err := itr.calculateTableChanges(ctx, commit)
   380  	if err != nil {
   381  		return err
   382  	}
   383  
   384  	itr.tableChanges = tableChanges
   385  	itr.tableChangesIdx = 0
   386  	if len(tableChanges) == 0 {
   387  		return nil
   388  	}
   389  
   390  	meta, err := commit.GetCommitMeta(ctx)
   391  	if err != nil {
   392  		return err
   393  	}
   394  	itr.meta = meta
   395  
   396  	cmHash, err := commit.HashOf()
   397  	if err != nil {
   398  		return err
   399  	}
   400  	itr.hash = cmHash
   401  
   402  	return nil
   403  }
   404  
   405  // calculateTableChanges calculates the tables that changed in the specified commit, by comparing that
   406  // commit with its immediate ancestor commit.
   407  func (itr *doltColDiffCommitHistoryRowItr) calculateTableChanges(ctx context.Context, commit *doltdb.Commit) ([]tableColChange, error) {
   408  	if len(commit.DatasParents()) == 0 {
   409  		return nil, nil
   410  	}
   411  
   412  	toRootValue, err := commit.GetRootValue(ctx)
   413  	if err != nil {
   414  		return nil, err
   415  	}
   416  
   417  	optCmt, err := itr.ddb.ResolveParent(ctx, commit, 0)
   418  	if err != nil {
   419  		return nil, err
   420  	}
   421  	parent, ok := optCmt.ToCommit()
   422  	if !ok {
   423  		return nil, doltdb.ErrGhostCommitEncountered
   424  	}
   425  
   426  	fromRootValue, err := parent.GetRootValue(ctx)
   427  	if err != nil {
   428  		return nil, err
   429  	}
   430  
   431  	deltas, err := diff.GetTableDeltas(ctx, fromRootValue, toRootValue)
   432  	if err != nil {
   433  		return nil, err
   434  	}
   435  
   436  	tableChanges := make([]tableColChange, 0)
   437  	for i := 0; i < len(deltas); i++ {
   438  		change, err := processTableColDelta(itr.ctx, itr.ddb, deltas[i])
   439  		if err != nil {
   440  			return nil, err
   441  		}
   442  
   443  		// only add changes that have modified columns
   444  		if len(change.colNames) != 0 {
   445  			tableChanges = append(tableChanges, *change)
   446  		}
   447  	}
   448  
   449  	// Not all commits mutate tables (e.g. empty commits)
   450  	if len(tableChanges) == 0 {
   451  		return nil, nil
   452  	}
   453  
   454  	return tableChanges, nil
   455  }
   456  
   457  // Close closes the iterator.
   458  func (itr *doltColDiffCommitHistoryRowItr) Close(*sql.Context) error {
   459  	return nil
   460  }
   461  
   462  // tableColChange is an internal data structure used to hold the results of processing
   463  // a diff.TableDelta structure into the output data for this system table.
   464  type tableColChange struct {
   465  	tableName string
   466  	colNames  []string
   467  	diffTypes []string
   468  }
   469  
   470  // processTableColDelta processes the specified TableDelta to determine what kind of change it was (i.e. table drop,
   471  // table rename, table create, or data update) and returns a tableChange struct representing the change.
   472  func processTableColDelta(ctx *sql.Context, ddb *doltdb.DoltDB, delta diff.TableDelta) (*tableColChange, error) {
   473  	// Dropping a table is always a schema change, and also a data change if the table contained data
   474  	if delta.IsDrop() {
   475  		diffTypes := make([]string, delta.FromSch.GetAllCols().Size())
   476  		for i := range diffTypes {
   477  			diffTypes[i] = diffTypeRemoved
   478  		}
   479  
   480  		return &tableColChange{
   481  			tableName: delta.FromName,
   482  			colNames:  delta.FromSch.GetAllCols().GetColumnNames(),
   483  			diffTypes: diffTypes,
   484  		}, nil
   485  	}
   486  
   487  	// Creating a table is always a schema change, and also a data change if data was inserted
   488  	if delta.IsAdd() {
   489  		diffTypes := make([]string, delta.ToSch.GetAllCols().Size())
   490  		for i := range diffTypes {
   491  			diffTypes[i] = diffTypeAdded
   492  		}
   493  
   494  		return &tableColChange{
   495  			tableName: delta.ToName,
   496  			colNames:  delta.ToSch.GetAllCols().GetColumnNames(),
   497  			diffTypes: diffTypes,
   498  		}, nil
   499  	}
   500  
   501  	// NOTE: Renaming a table does not affect columns necessarily, if table data was changed it will be checked below
   502  
   503  	// calculate which columns have been modified
   504  	colSchDiff := calculateColSchemaDiff(delta.ToSch.GetAllCols(), delta.FromSch.GetAllCols())
   505  	colNames, diffTypes, err := calculateColDelta(ctx, ddb, &delta, colSchDiff)
   506  	if err != nil {
   507  		return nil, err
   508  	}
   509  
   510  	return &tableColChange{
   511  		tableName: delta.ToName,
   512  		colNames:  colNames,
   513  		diffTypes: diffTypes,
   514  	}, nil
   515  }
   516  
   517  // calculateColDelta iterates through the rows of the given table delta and compares each cell in the to_ and from_
   518  // cells to compile a list of modified columns
   519  func calculateColDelta(ctx *sql.Context, ddb *doltdb.DoltDB, delta *diff.TableDelta, colSchDiff *colSchemaDiff) ([]string, []string, error) {
   520  	// initialize row iterator
   521  	diffTableSchema, j, err := GetDiffTableSchemaAndJoiner(delta.ToTable.Format(), delta.FromSch, delta.ToSch)
   522  	if err != nil {
   523  		return nil, nil, err
   524  	}
   525  	diffTableCols := diffTableSchema.GetAllCols()
   526  
   527  	now := time.Now() // accurate commit time returned elsewhere
   528  	dp := NewDiffPartition(delta.ToTable, delta.FromTable, delta.ToName, delta.FromName, (*dtypes.Timestamp)(&now), (*dtypes.Timestamp)(&now), delta.ToSch, delta.FromSch)
   529  	ri := NewDiffPartitionRowIter(*dp, ddb, j)
   530  
   531  	var resultColNames []string
   532  	var resultDiffTypes []string
   533  	// add all added/dropped columns to result
   534  	for _, col := range colSchDiff.addedCols {
   535  		resultColNames = append(resultColNames, col)
   536  		resultDiffTypes = append(resultDiffTypes, diffTypeAdded)
   537  	}
   538  	for _, col := range colSchDiff.droppedCols {
   539  		resultColNames = append(resultColNames, col)
   540  		resultDiffTypes = append(resultDiffTypes, diffTypeRemoved)
   541  	}
   542  
   543  	colNamesSet := make(map[string]struct{})
   544  	// check each row for diffs in modified columns
   545  	for {
   546  		r, err := ri.Next(ctx)
   547  		if err == io.EOF {
   548  			for col := range colNamesSet {
   549  				// append modified columns to result
   550  				resultColNames = append(resultColNames, col)
   551  				resultDiffTypes = append(resultDiffTypes, diffTypeModified)
   552  			}
   553  			return resultColNames, resultDiffTypes, nil
   554  		} else if err != nil {
   555  			return nil, nil, err
   556  		}
   557  
   558  		// only need to check modified columns
   559  		for _, col := range colSchDiff.modifiedCols {
   560  			toColTag := diffTableCols.NameToCol["to_"+col].Tag
   561  			fromColTag := diffTableCols.NameToCol["from_"+col].Tag
   562  			toIdx := diffTableCols.TagToIdx[toColTag]
   563  			fromIdx := diffTableCols.TagToIdx[fromColTag]
   564  
   565  			toCol := delta.ToSch.GetAllCols().GetByIndex(toIdx)
   566  			cmp, err := toCol.TypeInfo.ToSqlType().Compare(r[toIdx], r[fromIdx])
   567  			if err != nil {
   568  				return nil, nil, err
   569  			}
   570  			if cmp != 0 {
   571  				colNamesSet[col] = struct{}{}
   572  			}
   573  		}
   574  
   575  		// can stop checking rows when we already have all modified columns in the result set
   576  		if len(colNamesSet) == len(colSchDiff.modifiedCols) {
   577  			for col := range colNamesSet {
   578  				// append modified columns to result
   579  				resultColNames = append(resultColNames, col)
   580  				resultDiffTypes = append(resultDiffTypes, diffTypeModified)
   581  			}
   582  			return resultColNames, resultDiffTypes, nil
   583  		}
   584  	}
   585  }
   586  
   587  // colSchemaDiff is a collection of column names that hold the results of doing a schema diff between to/from schemas,
   588  // i.e. a list of column names for each type of change, the total list of column names, and a corresponding list of
   589  // diff_types for each column
   590  type colSchemaDiff struct {
   591  	modifiedCols []string
   592  	addedCols    []string
   593  	droppedCols  []string
   594  	allCols      []string
   595  	diffTypes    []string
   596  }
   597  
   598  // calculateColSchemaDiff calculates which columns were modified, added, or dropped between to and from schemas and
   599  // returns a colSchemaDiff to hold the results of the diff
   600  func calculateColSchemaDiff(toCols *schema.ColCollection, fromCols *schema.ColCollection) *colSchemaDiff {
   601  	// put to/from columns into a set
   602  	toColTags := make(map[uint64]struct{})
   603  	fromColTags := make(map[uint64]struct{})
   604  	if toCols != nil {
   605  		for _, tag := range toCols.Tags {
   606  			toColTags[tag] = struct{}{}
   607  		}
   608  	}
   609  	if fromCols != nil {
   610  		for _, tag := range fromCols.Tags {
   611  			fromColTags[tag] = struct{}{}
   612  		}
   613  	}
   614  
   615  	var modifiedCols []string
   616  	var addedCols []string
   617  	var droppedCols []string
   618  	var allCols []string
   619  	var diffTypes []string
   620  
   621  	if toCols != nil {
   622  		for _, tag := range toCols.Tags {
   623  			if _, ok := fromColTags[tag]; ok {
   624  				// if the tag is also in fromColumnTags, this column was modified
   625  				modifiedCols = append(modifiedCols, toCols.TagToCol[tag].Name)
   626  				allCols = append(allCols, toCols.TagToCol[tag].Name)
   627  				diffTypes = append(diffTypes, diffTypeModified)
   628  				delete(fromColTags, tag)
   629  			} else {
   630  				// else if it isn't in fromColumnTags, this column was added
   631  				addedCols = append(addedCols, toCols.TagToCol[tag].Name)
   632  				allCols = append(allCols, toCols.TagToCol[tag].Name)
   633  				diffTypes = append(diffTypes, diffTypeAdded)
   634  			}
   635  		}
   636  	}
   637  
   638  	if fromCols != nil {
   639  		for tag, _ := range fromColTags {
   640  			// all remaining tags are columns not in toColumnTags, i.e. dropped columns
   641  			droppedCols = append(droppedCols, fromCols.TagToCol[tag].Name)
   642  			allCols = append(allCols, fromCols.TagToCol[tag].Name)
   643  			diffTypes = append(diffTypes, diffTypeRemoved)
   644  		}
   645  	}
   646  
   647  	return &colSchemaDiff{
   648  		modifiedCols: modifiedCols,
   649  		addedCols:    addedCols,
   650  		droppedCols:  droppedCols,
   651  		allCols:      allCols,
   652  		diffTypes:    diffTypes,
   653  	}
   654  }