github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dtables/diff_table.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dtables
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/diff"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/row"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/rowconv"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/expreval"
    32  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
    33  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil"
    34  	"github.com/dolthub/dolt/go/libraries/utils/set"
    35  	"github.com/dolthub/dolt/go/store/datas"
    36  	"github.com/dolthub/dolt/go/store/hash"
    37  	"github.com/dolthub/dolt/go/store/prolly"
    38  	"github.com/dolthub/dolt/go/store/types"
    39  )
    40  
    41  const diffTableDefaultRowCount = 10
    42  
    43  const (
    44  	toCommit       = "to_commit"
    45  	fromCommit     = "from_commit"
    46  	toCommitDate   = "to_commit_date"
    47  	fromCommitDate = "from_commit_date"
    48  
    49  	diffTypeColName  = "diff_type"
    50  	diffTypeAdded    = "added"
    51  	diffTypeModified = "modified"
    52  	diffTypeRemoved  = "removed"
    53  )
    54  
    55  var _ sql.Table = (*DiffTable)(nil)
    56  var _ sql.IndexedTable = (*DiffTable)(nil)
    57  var _ sql.IndexAddressable = (*DiffTable)(nil)
    58  var _ sql.StatisticsTable = (*DiffTable)(nil)
    59  
    60  type DiffTable struct {
    61  	name        string
    62  	ddb         *doltdb.DoltDB
    63  	workingRoot doltdb.RootValue
    64  	head        *doltdb.Commit
    65  
    66  	headHash          hash.Hash
    67  	headCommitClosure *prolly.CommitClosure
    68  
    69  	// from and to need to be mapped to this schema
    70  	targetSch schema.Schema
    71  
    72  	// the schema for the diff table itself. Once from and to are converted to
    73  	// targetSch, the commit names and dates are inserted.
    74  	diffTableSch schema.Schema
    75  
    76  	sqlSch           sql.PrimaryKeySchema
    77  	partitionFilters []sql.Expression
    78  
    79  	table  *doltdb.Table
    80  	lookup sql.IndexLookup
    81  
    82  	// noms only
    83  	joiner *rowconv.Joiner
    84  }
    85  
    86  var PrimaryKeyChangeWarning = "cannot render full diff between commits %s and %s due to primary key set change"
    87  
    88  const PrimaryKeyChangeWarningCode int = 1105 // Since this is our own custom warning we'll use 1105, the code for an unknown error
    89  
    90  func NewDiffTable(ctx *sql.Context, dbName, tblName string, ddb *doltdb.DoltDB, root doltdb.RootValue, head *doltdb.Commit) (sql.Table, error) {
    91  	diffTblName := doltdb.DoltDiffTablePrefix + tblName
    92  
    93  	table, tblName, ok, err := doltdb.GetTableInsensitive(ctx, root, tblName)
    94  	if err != nil {
    95  		return nil, err
    96  	}
    97  	if !ok {
    98  		return nil, sql.ErrTableNotFound.New(diffTblName)
    99  	}
   100  	sch, err := table.GetSchema(ctx)
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  
   105  	diffTableSchema, j, err := GetDiffTableSchemaAndJoiner(ddb.Format(), sch, sch)
   106  	if err != nil {
   107  		return nil, err
   108  	}
   109  
   110  	sqlSch, err := sqlutil.FromDoltSchema(dbName, diffTblName, diffTableSchema)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  
   115  	return &DiffTable{
   116  		name:             tblName,
   117  		ddb:              ddb,
   118  		workingRoot:      root,
   119  		head:             head,
   120  		targetSch:        sch,
   121  		diffTableSch:     diffTableSchema,
   122  		sqlSch:           sqlSch,
   123  		partitionFilters: nil,
   124  		table:            table,
   125  		joiner:           j,
   126  	}, nil
   127  }
   128  
   129  func (dt *DiffTable) DataLength(ctx *sql.Context) (uint64, error) {
   130  	numBytesPerRow := schema.SchemaAvgLength(dt.Schema())
   131  	numRows, _, err := dt.RowCount(ctx)
   132  	if err != nil {
   133  		return 0, err
   134  	}
   135  	return numBytesPerRow * numRows, nil
   136  }
   137  
   138  func (dt *DiffTable) RowCount(_ *sql.Context) (uint64, bool, error) {
   139  	return diffTableDefaultRowCount, false, nil
   140  }
   141  
   142  func (dt *DiffTable) Name() string {
   143  	return doltdb.DoltDiffTablePrefix + dt.name
   144  }
   145  
   146  func (dt *DiffTable) String() string {
   147  	return doltdb.DoltDiffTablePrefix + dt.name
   148  }
   149  
   150  func (dt *DiffTable) Schema() sql.Schema {
   151  	return dt.sqlSch.Schema
   152  }
   153  
   154  func (dt *DiffTable) Collation() sql.CollationID {
   155  	return sql.Collation_Default
   156  }
   157  
   158  func (dt *DiffTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) {
   159  	cmItr := doltdb.CommitItrForRoots(dt.ddb, dt.head)
   160  
   161  	sf, err := SelectFuncForFilters(dt.ddb.ValueReadWriter(), dt.partitionFilters)
   162  	if err != nil {
   163  		return nil, err
   164  	}
   165  
   166  	t, exactName, ok, err := doltdb.GetTableInsensitive(ctx, dt.workingRoot, dt.name)
   167  	if err != nil {
   168  		return nil, err
   169  	}
   170  
   171  	if !ok {
   172  		return nil, fmt.Errorf("table: %s does not exist", dt.name)
   173  	}
   174  
   175  	wrTblHash, _, err := dt.workingRoot.GetTableHash(ctx, exactName)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	cmHash, _, err := cmItr.Next(ctx)
   181  	if err != nil {
   182  		return nil, err
   183  	}
   184  
   185  	cmHashToTblInfo := make(map[hash.Hash]TblInfoAtCommit)
   186  	cmHashToTblInfo[cmHash] = TblInfoAtCommit{"WORKING", nil, t, wrTblHash}
   187  
   188  	err = cmItr.Reset(ctx)
   189  	if err != nil {
   190  		return nil, err
   191  	}
   192  
   193  	return &DiffPartitions{
   194  		tblName:         exactName,
   195  		cmItr:           cmItr,
   196  		cmHashToTblInfo: cmHashToTblInfo,
   197  		selectFunc:      sf,
   198  		toSch:           dt.targetSch,
   199  		fromSch:         dt.targetSch,
   200  	}, nil
   201  }
   202  
   203  var commitMetaColumns = set.NewStrSet([]string{toCommit, fromCommit, toCommitDate, fromCommitDate})
   204  
   205  // CommitIsInScope returns true if a given commit hash is head or is
   206  // visible from the current head's ancestry graph.
   207  func (dt *DiffTable) CommitIsInScope(ctx context.Context, height uint64, h hash.Hash) (bool, error) {
   208  	cc, err := dt.HeadCommitClosure(ctx)
   209  	if err != nil {
   210  		return false, err
   211  	}
   212  	headHash, err := dt.HeadHash()
   213  	if err != nil {
   214  		return false, err
   215  	}
   216  	if headHash == h {
   217  		return true, nil
   218  	}
   219  	return cc.ContainsKey(ctx, h, height)
   220  }
   221  
   222  func (dt *DiffTable) HeadCommitClosure(ctx context.Context) (*prolly.CommitClosure, error) {
   223  	if dt.headCommitClosure == nil {
   224  		cc, err := dt.head.GetCommitClosure(ctx)
   225  		dt.headCommitClosure = &cc
   226  		if err != nil {
   227  			return nil, err
   228  		}
   229  	}
   230  	return dt.headCommitClosure, nil
   231  }
   232  
   233  func (dt *DiffTable) HeadHash() (hash.Hash, error) {
   234  	if dt.headHash.IsEmpty() {
   235  		var err error
   236  		dt.headHash, err = dt.head.HashOf()
   237  		if err != nil {
   238  			return hash.Hash{}, err
   239  		}
   240  	}
   241  	return dt.headHash, nil
   242  }
   243  
   244  func (dt *DiffTable) PartitionRows(ctx *sql.Context, part sql.Partition) (sql.RowIter, error) {
   245  	dp := part.(DiffPartition)
   246  	return dp.GetRowIter(ctx, dt.ddb, dt.joiner, dt.lookup)
   247  }
   248  
   249  func (dt *DiffTable) LookupPartitions(ctx *sql.Context, lookup sql.IndexLookup) (sql.PartitionIter, error) {
   250  	switch lookup.Index.ID() {
   251  	case index.ToCommitIndexId:
   252  		hs, ok := index.LookupToPointSelectStr(lookup)
   253  		if !ok {
   254  			return nil, fmt.Errorf("failed to parse commit lookup ranges: %s", sql.DebugString(lookup.Ranges))
   255  		}
   256  		hashes, commits, metas := index.HashesToCommits(ctx, dt.ddb, hs, dt.head, false)
   257  		if len(hashes) == 0 {
   258  			return sql.PartitionsToPartitionIter(), nil
   259  		}
   260  		return dt.toCommitLookupPartitions(ctx, hashes, commits, metas)
   261  	case index.FromCommitIndexId:
   262  		hs, ok := index.LookupToPointSelectStr(lookup)
   263  		if !ok {
   264  			return nil, fmt.Errorf("failed to parse commit lookup ranges: %s", sql.DebugString(lookup.Ranges))
   265  		}
   266  		hashes, commits, metas := index.HashesToCommits(ctx, dt.ddb, hs, nil, false)
   267  		if len(hashes) == 0 {
   268  			return sql.PartitionsToPartitionIter(), nil
   269  		}
   270  		return dt.fromCommitLookupPartitions(ctx, hashes, commits, metas)
   271  	default:
   272  		return dt.Partitions(ctx)
   273  	}
   274  }
   275  
   276  // fromCommitLookupPartitions creates a diff partition iterator for a set
   277  // of commits. The structure of the iter requires we pre-populate the
   278  // children of from_commit for diffing. We walk the commit graph looking
   279  // for commits that reference |from_commit| as a parent, and forward populate
   280  // for the |from_commit| diff partitions we will iterate.
   281  // TODO the structure of the diff iterator doesn't appear to accommodate
   282  // several children for a parent hash.
   283  func (dt *DiffTable) fromCommitLookupPartitions(ctx *sql.Context, hashes []hash.Hash, commits []*doltdb.Commit, metas []*datas.CommitMeta) (sql.PartitionIter, error) {
   284  	_, exactName, ok, err := doltdb.GetTableInsensitive(ctx, dt.workingRoot, dt.name)
   285  	if err != nil {
   286  		return nil, err
   287  	} else if !ok {
   288  		return nil, fmt.Errorf("table: %s does not exist", dt.name)
   289  	}
   290  
   291  	var parentHashes []hash.Hash
   292  	cmHashToTblInfo := make(map[hash.Hash]TblInfoAtCommit)
   293  	var pCommits []*doltdb.Commit
   294  	for i, hs := range hashes {
   295  		cm := commits[i]
   296  
   297  		// scope check
   298  		height, err := cm.Height()
   299  		if err != nil {
   300  			return nil, err
   301  		}
   302  
   303  		childCm, childHs, err := dt.scanHeightForChild(ctx, hs, height+1)
   304  		if err != nil {
   305  			return nil, err
   306  		}
   307  		if childCm == nil {
   308  			// non-linear commit graph, fallback to top-down scan
   309  			childCm, childHs, err = dt.reverseIterForChild(ctx, hs)
   310  			if err != nil {
   311  				return nil, err
   312  			}
   313  		}
   314  
   315  		if childCm != nil {
   316  			ti, err := tableInfoForCommit(ctx, dt.name, childCm, childHs)
   317  			if err != nil {
   318  				return nil, err
   319  			}
   320  			cmHashToTblInfo[hs] = ti
   321  			parentHashes = append(parentHashes, hs)
   322  			pCommits = append(pCommits, cm)
   323  		}
   324  	}
   325  
   326  	if len(parentHashes) == 0 {
   327  		return sql.PartitionsToPartitionIter(), nil
   328  	}
   329  
   330  	sf, err := SelectFuncForFilters(dt.ddb.ValueReadWriter(), dt.partitionFilters)
   331  	if err != nil {
   332  		return nil, err
   333  	}
   334  
   335  	cmItr := doltdb.NewCommitSliceIter(pCommits, parentHashes)
   336  	if err != nil {
   337  		return nil, err
   338  	}
   339  
   340  	return &DiffPartitions{
   341  		tblName:         exactName,
   342  		cmItr:           cmItr,
   343  		cmHashToTblInfo: cmHashToTblInfo,
   344  		selectFunc:      sf,
   345  		toSch:           dt.targetSch,
   346  		fromSch:         dt.targetSch,
   347  	}, nil
   348  }
   349  
   350  // scanHeightForChild searches for a child commit that references a target parent hash
   351  // at a specific height. This is an optimization for the common case where a parent and
   352  // its child are one level apart, and there is no branching that creates the potential
   353  // for a child higher in the graph.
   354  func (dt *DiffTable) scanHeightForChild(ctx *sql.Context, parent hash.Hash, height uint64) (*doltdb.Commit, hash.Hash, error) {
   355  	cc, err := dt.HeadCommitClosure(ctx)
   356  	if err != nil {
   357  		return nil, hash.Hash{}, err
   358  	}
   359  	iter, err := cc.IterHeight(ctx, height)
   360  	if err != nil {
   361  		return nil, hash.Hash{}, err
   362  	}
   363  	var childHs hash.Hash
   364  	var childCm *doltdb.Commit
   365  	var cnt int
   366  	for {
   367  		k, _, err := iter.Next(ctx)
   368  		if errors.Is(err, io.EOF) {
   369  			break
   370  		}
   371  		if err != nil {
   372  			return nil, hash.Hash{}, err
   373  		}
   374  		cnt++
   375  		if cnt > 1 {
   376  			return nil, hash.Hash{}, nil
   377  		}
   378  
   379  		c, err := doltdb.HashToCommit(ctx, dt.ddb.ValueReadWriter(), dt.ddb.NodeStore(), k.Addr())
   380  		phs, err := c.ParentHashes(ctx)
   381  		if err != nil {
   382  			return nil, hash.Hash{}, err
   383  		}
   384  		for _, ph := range phs {
   385  			if ph == parent {
   386  				childCm = c
   387  				childHs = k.Addr()
   388  				break
   389  			}
   390  		}
   391  	}
   392  	return childCm, childHs, nil
   393  }
   394  
   395  // reverseIterForChild finds the commit with the largest height that
   396  // is a child of the |parent| hash, or nil if no commit is found.
   397  func (dt *DiffTable) reverseIterForChild(ctx *sql.Context, parent hash.Hash) (*doltdb.Commit, hash.Hash, error) {
   398  	iter := doltdb.CommitItrForRoots(dt.ddb, dt.head)
   399  	for {
   400  		childHs, optCmt, err := iter.Next(ctx)
   401  		if errors.Is(err, io.EOF) {
   402  			return nil, hash.Hash{}, nil
   403  		} else if err != nil {
   404  			return nil, hash.Hash{}, err
   405  		}
   406  
   407  		childCm, ok := optCmt.ToCommit()
   408  		if !ok {
   409  			// Should have been caught above from the Next() call on the iter. This is a runtime error.
   410  			return nil, hash.Hash{}, doltdb.ErrGhostCommitRuntimeFailure
   411  		}
   412  
   413  		phs, err := childCm.ParentHashes(ctx)
   414  		if err != nil {
   415  			return nil, hash.Hash{}, err
   416  		}
   417  		for _, ph := range phs {
   418  			if ph == parent {
   419  				return childCm, childHs, nil
   420  			}
   421  		}
   422  	}
   423  }
   424  
   425  func tableInfoForCommit(ctx context.Context, table string, cm *doltdb.Commit, hs hash.Hash) (TblInfoAtCommit, error) {
   426  	r, err := cm.GetRootValue(ctx)
   427  	if err != nil {
   428  		return TblInfoAtCommit{}, err
   429  	}
   430  
   431  	tbl, exactName, ok, err := doltdb.GetTableInsensitive(ctx, r, table)
   432  	if err != nil {
   433  		return TblInfoAtCommit{}, err
   434  	}
   435  	if !ok {
   436  		return TblInfoAtCommit{}, nil
   437  	}
   438  
   439  	tblHash, _, err := r.GetTableHash(ctx, exactName)
   440  	if err != nil {
   441  		return TblInfoAtCommit{}, err
   442  	}
   443  
   444  	meta, err := cm.GetCommitMeta(ctx)
   445  	if err != nil {
   446  		return TblInfoAtCommit{}, err
   447  	}
   448  
   449  	ts := types.Timestamp(meta.Time())
   450  	return NewTblInfoAtCommit(hs.String(), &ts, tbl, tblHash), nil
   451  }
   452  
   453  // toCommitLookupPartitions creates a diff partition iterator for a set of
   454  // commits. The structure of the iter requires we pre-populate the parents
   455  // of to_commit for diffing.
   456  func (dt *DiffTable) toCommitLookupPartitions(ctx *sql.Context, hashes []hash.Hash, commits []*doltdb.Commit, metas []*datas.CommitMeta) (sql.PartitionIter, error) {
   457  	t, exactName, ok, err := doltdb.GetTableInsensitive(ctx, dt.workingRoot, dt.name)
   458  	if err != nil {
   459  		return nil, err
   460  	} else if !ok {
   461  		return nil, fmt.Errorf("table: %s does not exist", dt.name)
   462  	}
   463  
   464  	working, err := dt.head.HashOf()
   465  	if err != nil {
   466  		return nil, err
   467  	}
   468  
   469  	var parentHashes []hash.Hash
   470  	cmHashToTblInfo := make(map[hash.Hash]TblInfoAtCommit)
   471  	var pCommits []*doltdb.Commit
   472  	for i, hs := range hashes {
   473  		cm := commits[i]
   474  
   475  		var toCmInfo TblInfoAtCommit
   476  		if hs == working && cm == nil {
   477  			wrTblHash, _, err := dt.workingRoot.GetTableHash(ctx, exactName)
   478  			if err != nil {
   479  				return nil, err
   480  			}
   481  
   482  			toCmInfo = TblInfoAtCommit{"WORKING", nil, t, wrTblHash}
   483  			cmHashToTblInfo[hs] = toCmInfo
   484  			parentHashes = append(parentHashes, hs)
   485  			pCommits = append(pCommits, dt.head)
   486  			continue
   487  		}
   488  
   489  		// scope check
   490  		height, err := cm.Height()
   491  		if err != nil {
   492  			return nil, err
   493  		}
   494  		ok, err = dt.CommitIsInScope(ctx, height, hs)
   495  		if err != nil {
   496  			return nil, err
   497  		}
   498  		if !ok {
   499  			continue
   500  		}
   501  
   502  		ti, err := tableInfoForCommit(ctx, dt.name, cm, hs)
   503  		if err != nil {
   504  			return nil, err
   505  		}
   506  		if ti.IsEmpty() {
   507  			continue
   508  		}
   509  
   510  		ph, err := cm.ParentHashes(ctx)
   511  		if err != nil {
   512  			return nil, err
   513  		}
   514  
   515  		for i, pj := range ph {
   516  			optCmt, err := cm.GetParent(ctx, i)
   517  			if err != nil {
   518  				return nil, err
   519  			}
   520  			pc, ok := optCmt.ToCommit()
   521  			if !ok {
   522  				return nil, doltdb.ErrGhostCommitEncountered
   523  			}
   524  
   525  			cmHashToTblInfo[pj] = toCmInfo
   526  			cmHashToTblInfo[pj] = ti
   527  			pCommits = append(pCommits, pc)
   528  		}
   529  		parentHashes = append(parentHashes, ph...)
   530  	}
   531  
   532  	if len(parentHashes) == 0 {
   533  		return sql.PartitionsToPartitionIter(), nil
   534  	}
   535  
   536  	sf, err := SelectFuncForFilters(dt.ddb.ValueReadWriter(), dt.partitionFilters)
   537  	if err != nil {
   538  		return nil, err
   539  	}
   540  
   541  	cmItr := doltdb.NewCommitSliceIter(pCommits, parentHashes)
   542  	if err != nil {
   543  		return nil, err
   544  	}
   545  
   546  	return &DiffPartitions{
   547  		tblName:         exactName,
   548  		cmItr:           cmItr,
   549  		cmHashToTblInfo: cmHashToTblInfo,
   550  		selectFunc:      sf,
   551  		toSch:           dt.targetSch,
   552  		fromSch:         dt.targetSch,
   553  	}, nil
   554  }
   555  
   556  // GetIndexes implements sql.IndexAddressable
   557  func (dt *DiffTable) GetIndexes(ctx *sql.Context) ([]sql.Index, error) {
   558  	return index.DoltDiffIndexesFromTable(ctx, "", dt.name, dt.table)
   559  }
   560  
   561  // IndexedAccess implements sql.IndexAddressable
   562  func (dt *DiffTable) IndexedAccess(lookup sql.IndexLookup) sql.IndexedTable {
   563  	nt := *dt
   564  	return &nt
   565  }
   566  
   567  // PreciseMatch implements sql.IndexAddressable
   568  func (dt *DiffTable) PreciseMatch() bool {
   569  	return false
   570  }
   571  
   572  // tableData returns the map of primary key to values for the specified table (or an empty map if the tbl is null)
   573  // and the schema of the table (or EmptySchema if tbl is null).
   574  func tableData(ctx *sql.Context, tbl *doltdb.Table, ddb *doltdb.DoltDB) (durable.Index, schema.Schema, error) {
   575  	var data durable.Index
   576  	var err error
   577  
   578  	if tbl == nil {
   579  		data, err = durable.NewEmptyIndex(ctx, ddb.ValueReadWriter(), ddb.NodeStore(), schema.EmptySchema)
   580  		if err != nil {
   581  			return nil, nil, err
   582  		}
   583  	} else {
   584  		data, err = tbl.GetRowData(ctx)
   585  		if err != nil {
   586  			return nil, nil, err
   587  		}
   588  	}
   589  
   590  	var sch schema.Schema
   591  	if tbl == nil {
   592  		sch = schema.EmptySchema
   593  	} else {
   594  		sch, err = tbl.GetSchema(ctx)
   595  
   596  		if err != nil {
   597  			return nil, nil, err
   598  		}
   599  	}
   600  
   601  	return data, sch, nil
   602  }
   603  
   604  type TblInfoAtCommit struct {
   605  	name    string
   606  	date    *types.Timestamp
   607  	tbl     *doltdb.Table
   608  	tblHash hash.Hash
   609  }
   610  
   611  func NewTblInfoAtCommit(name string, date *types.Timestamp, tbl *doltdb.Table, tblHash hash.Hash) TblInfoAtCommit {
   612  	return TblInfoAtCommit{
   613  		name, date, tbl, tblHash,
   614  	}
   615  }
   616  
   617  func (ti TblInfoAtCommit) IsEmpty() bool {
   618  	return ti.name == ""
   619  }
   620  
   621  var _ sql.Partition = (*DiffPartition)(nil)
   622  
   623  // DiffPartition data partitioned into pairs of table states which get compared
   624  type DiffPartition struct {
   625  	to       *doltdb.Table
   626  	from     *doltdb.Table
   627  	toName   string
   628  	fromName string
   629  	toDate   *types.Timestamp
   630  	fromDate *types.Timestamp
   631  	// fromSch and toSch are usually identical. It is the schema of the table at head.
   632  	toSch   schema.Schema
   633  	fromSch schema.Schema
   634  }
   635  
   636  func NewDiffPartition(to, from *doltdb.Table, toName, fromName string, toDate, fromDate *types.Timestamp, toSch, fromSch schema.Schema) *DiffPartition {
   637  	return &DiffPartition{
   638  		to:       to,
   639  		from:     from,
   640  		toName:   toName,
   641  		fromName: fromName,
   642  		toDate:   toDate,
   643  		fromDate: fromDate,
   644  		toSch:    toSch,
   645  		fromSch:  fromSch,
   646  	}
   647  }
   648  
   649  func (dp DiffPartition) Key() []byte {
   650  	return []byte(dp.toName + dp.fromName)
   651  }
   652  
   653  func (dp DiffPartition) GetRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, joiner *rowconv.Joiner, lookup sql.IndexLookup) (sql.RowIter, error) {
   654  	if types.IsFormat_DOLT(ddb.Format()) {
   655  		return newProllyDiffIter(ctx, dp, dp.fromSch, dp.toSch)
   656  	} else {
   657  		return newNomsDiffIter(ctx, ddb, joiner, dp, lookup)
   658  	}
   659  }
   660  
   661  // isDiffablePartition checks if the commit pair for this partition is "diffable".
   662  // If the primary key sets changed between the two commits, it may not be
   663  // possible to diff them.
   664  func (dp *DiffPartition) isDiffablePartition(ctx *sql.Context) (bool, error) {
   665  	// dp.to is nil when a table has been deleted previously. In this case, we return
   666  	// false, to stop processing diffs, since that previously deleted table is considered
   667  	// a logically different table and we don't want to mix the diffs together.
   668  	if dp.to == nil {
   669  		return false, nil
   670  	}
   671  
   672  	// dp.from is nil when the to commit created a new table
   673  	if dp.from == nil {
   674  		return true, nil
   675  	}
   676  
   677  	fromSch, err := dp.from.GetSchema(ctx)
   678  	if err != nil {
   679  		return false, err
   680  	}
   681  
   682  	toSch, err := dp.to.GetSchema(ctx)
   683  	if err != nil {
   684  		return false, err
   685  	}
   686  
   687  	return schema.ArePrimaryKeySetsDiffable(dp.from.Format(), fromSch, toSch), nil
   688  }
   689  
   690  type partitionSelectFunc func(*sql.Context, DiffPartition) (bool, error)
   691  
   692  func SelectFuncForFilters(vr types.ValueReader, filters []sql.Expression) (partitionSelectFunc, error) {
   693  	const (
   694  		toCommitTag uint64 = iota
   695  		fromCommitTag
   696  		toCommitDateTag
   697  		fromCommitDateTag
   698  	)
   699  
   700  	colColl := schema.NewColCollection(
   701  		schema.NewColumn(toCommit, toCommitTag, types.StringKind, false),
   702  		schema.NewColumn(fromCommit, fromCommitTag, types.StringKind, false),
   703  		schema.NewColumn(toCommitDate, toCommitDateTag, types.TimestampKind, false),
   704  		schema.NewColumn(fromCommitDate, fromCommitDateTag, types.TimestampKind, false),
   705  	)
   706  
   707  	expFunc, err := expreval.ExpressionFuncFromSQLExpressions(vr, schema.UnkeyedSchemaFromCols(colColl), filters)
   708  
   709  	if err != nil {
   710  		return nil, err
   711  	}
   712  
   713  	return func(ctx *sql.Context, partition DiffPartition) (bool, error) {
   714  		vals := row.TaggedValues{
   715  			toCommitTag:   types.String(partition.toName),
   716  			fromCommitTag: types.String(partition.fromName),
   717  		}
   718  
   719  		if partition.toDate != nil {
   720  			vals[toCommitDateTag] = *partition.toDate
   721  		}
   722  
   723  		if partition.fromDate != nil {
   724  			vals[fromCommitDateTag] = *partition.fromDate
   725  		}
   726  
   727  		return expFunc(ctx, vals)
   728  	}, nil
   729  }
   730  
   731  var _ sql.PartitionIter = &DiffPartitions{}
   732  
   733  // DiffPartitions a collection of partitions. Implements PartitionItr
   734  type DiffPartitions struct {
   735  	tblName         string
   736  	cmItr           doltdb.CommitItr
   737  	cmHashToTblInfo map[hash.Hash]TblInfoAtCommit
   738  	selectFunc      partitionSelectFunc
   739  	toSch           schema.Schema
   740  	fromSch         schema.Schema
   741  }
   742  
   743  // processCommit is called in a commit iteration loop. Adds partitions when it finds a commit and its parent that have
   744  // different values for the hash of the table being looked at.
   745  func (dps *DiffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm *doltdb.Commit, root doltdb.RootValue, tbl *doltdb.Table) (*DiffPartition, error) {
   746  	tblHash, _, err := root.GetTableHash(ctx, dps.tblName)
   747  
   748  	if err != nil {
   749  		return nil, err
   750  	}
   751  
   752  	toInfoForCommit := dps.cmHashToTblInfo[cmHash]
   753  	cmHashStr := cmHash.String()
   754  	meta, err := cm.GetCommitMeta(ctx)
   755  
   756  	if err != nil {
   757  		return nil, err
   758  	}
   759  
   760  	ts := types.Timestamp(meta.Time())
   761  
   762  	var nextPartition *DiffPartition
   763  	if tblHash != toInfoForCommit.tblHash {
   764  		partition := DiffPartition{
   765  			to:       toInfoForCommit.tbl,
   766  			from:     tbl,
   767  			toName:   toInfoForCommit.name,
   768  			fromName: cmHashStr,
   769  			toDate:   toInfoForCommit.date,
   770  			fromDate: &ts,
   771  			fromSch:  dps.fromSch,
   772  			toSch:    dps.toSch,
   773  		}
   774  		selected, err := dps.selectFunc(ctx, partition)
   775  
   776  		if err != nil {
   777  			return nil, err
   778  		}
   779  
   780  		if selected {
   781  			nextPartition = &partition
   782  		}
   783  	}
   784  
   785  	newInfo := TblInfoAtCommit{cmHashStr, &ts, tbl, tblHash}
   786  	parentHashes, err := cm.ParentHashes(ctx)
   787  
   788  	if err != nil {
   789  		return nil, err
   790  	}
   791  
   792  	for _, h := range parentHashes {
   793  		dps.cmHashToTblInfo[h] = newInfo
   794  	}
   795  
   796  	return nextPartition, nil
   797  }
   798  
   799  func (dps *DiffPartitions) Next(ctx *sql.Context) (sql.Partition, error) {
   800  	for {
   801  		cmHash, optCmt, err := dps.cmItr.Next(ctx)
   802  		if err != nil {
   803  			return nil, err
   804  		}
   805  		cm, ok := optCmt.ToCommit()
   806  		if !ok {
   807  			// Should have been caught above from the Next() call on the iter. This is a runtime error.
   808  			return nil, doltdb.ErrGhostCommitRuntimeFailure
   809  		}
   810  
   811  		root, err := cm.GetRootValue(ctx)
   812  
   813  		if err != nil {
   814  			return nil, err
   815  		}
   816  
   817  		tbl, _, _, err := doltdb.GetTableInsensitive(ctx, root, dps.tblName)
   818  
   819  		if err != nil {
   820  			return nil, err
   821  		}
   822  
   823  		next, err := dps.processCommit(ctx, cmHash, cm, root, tbl)
   824  
   825  		if err != nil {
   826  			return nil, err
   827  		}
   828  
   829  		if next != nil {
   830  			// If we can't diff this commit with its parent, don't traverse any lower
   831  			canDiff, err := next.isDiffablePartition(ctx)
   832  			if err != nil {
   833  				return nil, err
   834  			}
   835  
   836  			if !canDiff {
   837  				ctx.Warn(PrimaryKeyChangeWarningCode, fmt.Sprintf(PrimaryKeyChangeWarning, next.fromName, next.toName))
   838  				return nil, io.EOF
   839  			}
   840  
   841  			return *next, nil
   842  		}
   843  	}
   844  }
   845  
   846  func (dps *DiffPartitions) Close(*sql.Context) error {
   847  	return nil
   848  }
   849  
   850  // rowConvForSchema creates a RowConverter for transforming rows with the given schema a target schema.
   851  func (dp DiffPartition) rowConvForSchema(ctx context.Context, vrw types.ValueReadWriter, targetSch, srcSch schema.Schema) (*rowconv.RowConverter, error) {
   852  	if schema.SchemasAreEqual(srcSch, schema.EmptySchema) {
   853  		return rowconv.IdentityConverter, nil
   854  	}
   855  
   856  	fm, err := rowconv.TagMappingByTagAndName(srcSch, targetSch)
   857  	if err != nil {
   858  		return nil, err
   859  	}
   860  
   861  	return rowconv.NewRowConverter(ctx, vrw, fm)
   862  }
   863  
   864  // GetDiffTableSchemaAndJoiner returns the schema for the diff table given a
   865  // target schema for a row |sch|. In the old storage format, it also returns the
   866  // associated joiner.
   867  func GetDiffTableSchemaAndJoiner(format *types.NomsBinFormat, fromSch, toSch schema.Schema) (diffTableSchema schema.Schema, j *rowconv.Joiner, err error) {
   868  	if format == types.Format_DOLT {
   869  		diffTableSchema, err = CalculateDiffSchema(fromSch, toSch)
   870  		if err != nil {
   871  			return nil, nil, err
   872  		}
   873  	} else {
   874  		fromSch, toSch, err = expandFromToSchemas(fromSch, toSch)
   875  		if err != nil {
   876  			return nil, nil, err
   877  		}
   878  
   879  		j, err = rowconv.NewJoiner(
   880  			[]rowconv.NamedSchema{{Name: diff.To, Sch: toSch}, {Name: diff.From, Sch: fromSch}},
   881  			map[string]rowconv.ColNamingFunc{
   882  				diff.To:   diff.ToColNamer,
   883  				diff.From: diff.FromColNamer,
   884  			})
   885  		if err != nil {
   886  			return nil, nil, err
   887  		}
   888  
   889  		diffTableSchema = j.GetSchema()
   890  		fullDiffCols := diffTableSchema.GetAllCols()
   891  		fullDiffCols = fullDiffCols.Append(
   892  			schema.NewColumn(diffTypeColName, schema.DiffTypeTag, types.StringKind, false),
   893  		)
   894  		diffTableSchema = schema.MustSchemaFromCols(fullDiffCols)
   895  	}
   896  
   897  	return
   898  }
   899  
   900  // expandFromToSchemas converts input schemas to schemas appropriate for diffs. One argument must be
   901  // non-nil. If one is null, the result will be the columns of the non-nil argument.
   902  func expandFromToSchemas(fromSch, toSch schema.Schema) (newFromSch, newToSch schema.Schema, err error) {
   903  	var fromClmCol, toClmCol *schema.ColCollection
   904  	if fromSch == nil && toSch == nil {
   905  		return nil, nil, errors.New("non-nil argument required to CalculateDiffSchema")
   906  	} else if fromSch == nil {
   907  		fromClmCol = toSch.GetAllCols()
   908  		toClmCol = toSch.GetAllCols()
   909  	} else if toSch == nil {
   910  		toClmCol = fromSch.GetAllCols()
   911  		fromClmCol = fromSch.GetAllCols()
   912  	} else {
   913  		fromClmCol = fromSch.GetAllCols()
   914  		toClmCol = toSch.GetAllCols()
   915  	}
   916  
   917  	fromClmCol = fromClmCol.Append(
   918  		schema.NewColumn("commit", schema.DiffCommitTag, types.StringKind, false),
   919  		schema.NewColumn("commit_date", schema.DiffCommitDateTag, types.TimestampKind, false))
   920  	newFromSch = schema.MustSchemaFromCols(fromClmCol)
   921  
   922  	toClmCol = toClmCol.Append(
   923  		schema.NewColumn("commit", schema.DiffCommitTag, types.StringKind, false),
   924  		schema.NewColumn("commit_date", schema.DiffCommitDateTag, types.TimestampKind, false))
   925  	newToSch = schema.MustSchemaFromCols(toClmCol)
   926  
   927  	return
   928  }
   929  
   930  // CalculateDiffSchema returns the schema for the dolt_diff table based on the schemas from the from and to tables.
   931  // Either may be nil, in which case the nil argument will use the schema of the non-nil argument
   932  func CalculateDiffSchema(fromSch, toSch schema.Schema) (schema.Schema, error) {
   933  	fromSch, toSch, err := expandFromToSchemas(fromSch, toSch)
   934  	if err != nil {
   935  		return nil, err
   936  	}
   937  
   938  	cols := make([]schema.Column, toSch.GetAllCols().Size()+fromSch.GetAllCols().Size()+1)
   939  
   940  	i := 0
   941  	err = toSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
   942  		toCol, err := schema.NewColumnWithTypeInfo(diff.ToColNamer(col.Name), uint64(i), col.TypeInfo, false, col.Default, false, col.Comment)
   943  		if err != nil {
   944  			return true, err
   945  		}
   946  		cols[i] = toCol
   947  		i++
   948  		return false, nil
   949  	})
   950  	if err != nil {
   951  		return nil, err
   952  	}
   953  
   954  	j := toSch.GetAllCols().Size()
   955  	err = fromSch.GetAllCols().Iter(func(tag uint64, col schema.Column) (stop bool, err error) {
   956  		fromCol, err := schema.NewColumnWithTypeInfo(diff.FromColNamer(col.Name), uint64(j), col.TypeInfo, false, col.Default, false, col.Comment)
   957  		if err != nil {
   958  			return true, err
   959  		}
   960  		cols[j] = fromCol
   961  
   962  		j++
   963  		return false, nil
   964  	})
   965  	if err != nil {
   966  		return nil, err
   967  	}
   968  
   969  	cols[len(cols)-1] = schema.NewColumn(diffTypeColName, schema.DiffTypeTag, types.StringKind, false)
   970  
   971  	return schema.UnkeyedSchemaFromCols(schema.NewColCollection(cols...)), nil
   972  }