github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/sqle/dtables/diff_table.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dtables
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  
    22  	"github.com/dolthub/go-mysql-server/sql"
    23  	"github.com/dolthub/go-mysql-server/sql/parse"
    24  
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/diff"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/row"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/rowconv"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/expreval"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil"
    32  	"github.com/dolthub/dolt/go/libraries/utils/set"
    33  	"github.com/dolthub/dolt/go/store/hash"
    34  	"github.com/dolthub/dolt/go/store/types"
    35  )
    36  
    37  const (
    38  	toCommit       = "to_commit"
    39  	fromCommit     = "from_commit"
    40  	toCommitDate   = "to_commit_date"
    41  	fromCommitDate = "from_commit_date"
    42  
    43  	diffTypeColName  = "diff_type"
    44  	diffTypeAdded    = "added"
    45  	diffTypeModified = "modified"
    46  	diffTypeRemoved  = "removed"
    47  )
    48  
    49  func toNamer(name string) string {
    50  	return diff.To + "_" + name
    51  }
    52  
    53  func fromNamer(name string) string {
    54  	return diff.From + "_" + name
    55  }
    56  
    57  var _ sql.Table = (*DiffTable)(nil)
    58  var _ sql.FilteredTable = (*DiffTable)(nil)
    59  
    60  type DiffTable struct {
    61  	name        string
    62  	ddb         *doltdb.DoltDB
    63  	workingRoot *doltdb.RootValue
    64  	head        *doltdb.Commit
    65  
    66  	ss               *schema.SuperSchema
    67  	joiner           *rowconv.Joiner
    68  	sqlSch           sql.Schema
    69  	partitionFilters []sql.Expression
    70  	rowFilters       []sql.Expression
    71  }
    72  
    73  func NewDiffTable(ctx *sql.Context, tblName string, ddb *doltdb.DoltDB, root *doltdb.RootValue, head *doltdb.Commit) (sql.Table, error) {
    74  	diffTblName := doltdb.DoltDiffTablePrefix + tblName
    75  
    76  	ss, err := calcSuperSchema(ctx, root, tblName)
    77  	if err != nil {
    78  		return nil, err
    79  	}
    80  
    81  	_ = ss.AddColumn(schema.NewColumn("commit", schema.DiffCommitTag, types.StringKind, false))
    82  	_ = ss.AddColumn(schema.NewColumn("commit_date", schema.DiffCommitDateTag, types.TimestampKind, false))
    83  
    84  	sch, err := ss.GenerateSchema()
    85  
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	if sch.GetAllCols().Size() <= 1 {
    91  		return nil, sql.ErrTableNotFound.New(diffTblName)
    92  	}
    93  
    94  	j, err := rowconv.NewJoiner(
    95  		[]rowconv.NamedSchema{{Name: diff.To, Sch: sch}, {Name: diff.From, Sch: sch}},
    96  		map[string]rowconv.ColNamingFunc{
    97  			diff.To:   toNamer,
    98  			diff.From: fromNamer,
    99  		})
   100  
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  
   105  	sqlSch, err := sqlutil.FromDoltSchema(diffTblName, j.GetSchema())
   106  
   107  	if err != nil {
   108  		return nil, err
   109  	}
   110  
   111  	// parses to literal, no need to pass through analyzer
   112  	defaultVal, err := parse.StringToColumnDefaultValue(ctx, fmt.Sprintf(`"%s"`, diffTypeModified))
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  
   117  	sqlSch = append(sqlSch, &sql.Column{
   118  		Name:     diffTypeColName,
   119  		Type:     sql.Text,
   120  		Default:  defaultVal,
   121  		Nullable: false,
   122  		Source:   diffTblName,
   123  	})
   124  
   125  	return &DiffTable{
   126  		name:             tblName,
   127  		ddb:              ddb,
   128  		workingRoot:      root,
   129  		head:             head,
   130  		ss:               ss,
   131  		joiner:           j,
   132  		sqlSch:           sqlSch,
   133  		partitionFilters: nil,
   134  		rowFilters:       nil,
   135  	}, nil
   136  }
   137  
   138  func (dt *DiffTable) Name() string {
   139  	return doltdb.DoltDiffTablePrefix + dt.name
   140  }
   141  
   142  func (dt *DiffTable) String() string {
   143  	return doltdb.DoltDiffTablePrefix + dt.name
   144  }
   145  
   146  func (dt *DiffTable) Schema() sql.Schema {
   147  	return dt.sqlSch
   148  }
   149  
   150  func (dt *DiffTable) Partitions(ctx *sql.Context) (sql.PartitionIter, error) {
   151  	cmItr := doltdb.CommitItrForRoots(dt.ddb, dt.head)
   152  
   153  	sf, err := selectFuncForFilters(dt.ddb.Format(), dt.partitionFilters)
   154  
   155  	if err != nil {
   156  		return nil, err
   157  	}
   158  
   159  	return newDiffPartitions(ctx, cmItr, dt.workingRoot, dt.name, sf)
   160  }
   161  
   162  var partitionFilterCols = set.NewStrSet([]string{toCommit, fromCommit, toCommitDate, fromCommitDate})
   163  
   164  func splitPartitionFilters(filters []sql.Expression) (commitFilters, rowFilters []sql.Expression) {
   165  	return splitFilters(filters, getColumnFilterCheck(partitionFilterCols))
   166  }
   167  
   168  // HandledFilters returns the list of filters that will be handled by the table itself
   169  func (dt *DiffTable) HandledFilters(filters []sql.Expression) []sql.Expression {
   170  	dt.partitionFilters, dt.rowFilters = splitPartitionFilters(filters)
   171  	return dt.partitionFilters
   172  }
   173  
   174  // Filters returns the list of filters that are applied to this table.
   175  func (dt *DiffTable) Filters() []sql.Expression {
   176  	return dt.partitionFilters
   177  }
   178  
   179  // WithFilters returns a new sql.Table instance with the filters applied
   180  func (dt *DiffTable) WithFilters(ctx *sql.Context, filters []sql.Expression) sql.Table {
   181  	if dt.partitionFilters == nil {
   182  		dt.partitionFilters, dt.rowFilters = splitPartitionFilters(filters)
   183  	}
   184  
   185  	return dt
   186  }
   187  
   188  func (dt *DiffTable) PartitionRows(ctx *sql.Context, part sql.Partition) (sql.RowIter, error) {
   189  	dp := part.(diffPartition)
   190  	return dp.getRowIter(ctx, dt.ddb, dt.ss, dt.joiner)
   191  }
   192  
   193  func tableData(ctx *sql.Context, tbl *doltdb.Table, ddb *doltdb.DoltDB) (types.Map, schema.Schema, error) {
   194  	var data types.Map
   195  	var err error
   196  	if tbl == nil {
   197  		data, err = types.NewMap(ctx, ddb.ValueReadWriter())
   198  		if err != nil {
   199  			return types.EmptyMap, nil, err
   200  		}
   201  	} else {
   202  		data, err = tbl.GetRowData(ctx)
   203  		if err != nil {
   204  			return types.EmptyMap, nil, err
   205  		}
   206  	}
   207  
   208  	var sch schema.Schema
   209  	if tbl == nil {
   210  		sch = schema.EmptySchema
   211  	} else {
   212  		sch, err = tbl.GetSchema(ctx)
   213  
   214  		if err != nil {
   215  			return types.EmptyMap, nil, err
   216  		}
   217  	}
   218  
   219  	return data, sch, nil
   220  }
   221  
   222  var _ sql.RowIter = (*diffRowItr)(nil)
   223  
   224  type diffRowItr struct {
   225  	ad             diff.RowDiffer
   226  	diffSrc        *diff.RowDiffSource
   227  	joiner         *rowconv.Joiner
   228  	sch            schema.Schema
   229  	fromCommitInfo commitInfo
   230  	toCommitInfo   commitInfo
   231  }
   232  
   233  type commitInfo struct {
   234  	name    types.String
   235  	date    *types.Timestamp
   236  	nameTag uint64
   237  	dateTag uint64
   238  }
   239  
   240  // Next returns the next row
   241  func (itr *diffRowItr) Next() (sql.Row, error) {
   242  	r, _, err := itr.diffSrc.NextDiff()
   243  
   244  	if err != nil {
   245  		return nil, err
   246  	}
   247  
   248  	toAndFromRows, err := itr.joiner.Split(r)
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  	_, hasTo := toAndFromRows[diff.To]
   253  	_, hasFrom := toAndFromRows[diff.From]
   254  
   255  	r, err = r.SetColVal(itr.toCommitInfo.nameTag, types.String(itr.toCommitInfo.name), itr.sch)
   256  	if err != nil {
   257  		return nil, err
   258  	}
   259  
   260  	r, err = r.SetColVal(itr.fromCommitInfo.nameTag, types.String(itr.fromCommitInfo.name), itr.sch)
   261  
   262  	if err != nil {
   263  		return nil, err
   264  	}
   265  
   266  	if itr.toCommitInfo.date != nil {
   267  		r, err = r.SetColVal(itr.toCommitInfo.dateTag, *itr.toCommitInfo.date, itr.sch)
   268  
   269  		if err != nil {
   270  			return nil, err
   271  		}
   272  	}
   273  
   274  	if itr.fromCommitInfo.date != nil {
   275  		r, err = r.SetColVal(itr.fromCommitInfo.dateTag, *itr.fromCommitInfo.date, itr.sch)
   276  
   277  		if err != nil {
   278  			return nil, err
   279  		}
   280  	}
   281  
   282  	sqlRow, err := sqlutil.DoltRowToSqlRow(r, itr.sch)
   283  
   284  	if err != nil {
   285  		return nil, err
   286  	}
   287  
   288  	if hasTo && hasFrom {
   289  		sqlRow = append(sqlRow, diffTypeModified)
   290  	} else if hasTo && !hasFrom {
   291  		sqlRow = append(sqlRow, diffTypeAdded)
   292  	} else {
   293  		sqlRow = append(sqlRow, diffTypeRemoved)
   294  	}
   295  
   296  	return sqlRow, nil
   297  }
   298  
   299  // Close closes the iterator
   300  func (itr *diffRowItr) Close(*sql.Context) (err error) {
   301  	defer itr.ad.Close()
   302  	defer func() {
   303  		closeErr := itr.diffSrc.Close()
   304  
   305  		if err == nil {
   306  			err = closeErr
   307  		}
   308  	}()
   309  
   310  	return nil
   311  }
   312  
   313  type tblInfoAtCommit struct {
   314  	name    string
   315  	date    *types.Timestamp
   316  	tbl     *doltdb.Table
   317  	tblHash hash.Hash
   318  }
   319  
   320  // data partitioned into pairs of table states which get compared
   321  type diffPartition struct {
   322  	to       *doltdb.Table
   323  	from     *doltdb.Table
   324  	toName   string
   325  	fromName string
   326  	toDate   *types.Timestamp
   327  	fromDate *types.Timestamp
   328  }
   329  
   330  func (dp diffPartition) Key() []byte {
   331  	return []byte(dp.toName + dp.fromName)
   332  }
   333  
   334  func (dp diffPartition) getRowIter(ctx *sql.Context, ddb *doltdb.DoltDB, ss *schema.SuperSchema, joiner *rowconv.Joiner) (sql.RowIter, error) {
   335  	fromData, fromSch, err := tableData(ctx, dp.from, ddb)
   336  
   337  	if err != nil {
   338  		return nil, err
   339  	}
   340  
   341  	toData, toSch, err := tableData(ctx, dp.to, ddb)
   342  
   343  	if err != nil {
   344  		return nil, err
   345  	}
   346  
   347  	vrw := types.NewMemoryValueStore() // We're displaying here, so all values that require a VRW will use an internal one
   348  
   349  	fromConv, err := rowConvForSchema(ctx, vrw, ss, fromSch)
   350  
   351  	if err != nil {
   352  		return nil, err
   353  	}
   354  
   355  	toConv, err := rowConvForSchema(ctx, vrw, ss, toSch)
   356  
   357  	if err != nil {
   358  		return nil, err
   359  	}
   360  
   361  	sch := joiner.GetSchema()
   362  	toCol, _ := sch.GetAllCols().GetByName(toCommit)
   363  	fromCol, _ := sch.GetAllCols().GetByName(fromCommit)
   364  	toDateCol, _ := sch.GetAllCols().GetByName(toCommitDate)
   365  	fromDateCol, _ := sch.GetAllCols().GetByName(fromCommitDate)
   366  
   367  	fromCmInfo := commitInfo{types.String(dp.fromName), dp.fromDate, fromCol.Tag, fromDateCol.Tag}
   368  	toCmInfo := commitInfo{types.String(dp.toName), dp.toDate, toCol.Tag, toDateCol.Tag}
   369  
   370  	rd := diff.NewRowDiffer(ctx, fromSch, toSch, 1024)
   371  	rd.Start(ctx, fromData, toData)
   372  
   373  	src := diff.NewRowDiffSource(rd, joiner)
   374  	src.AddInputRowConversion(fromConv, toConv)
   375  
   376  	return &diffRowItr{
   377  		ad:             rd,
   378  		diffSrc:        src,
   379  		joiner:         joiner,
   380  		sch:            joiner.GetSchema(),
   381  		fromCommitInfo: fromCmInfo,
   382  		toCommitInfo:   toCmInfo,
   383  	}, nil
   384  }
   385  
   386  type partitionSelectFunc func(*sql.Context, diffPartition) (bool, error)
   387  
   388  func selectFuncForFilters(nbf *types.NomsBinFormat, filters []sql.Expression) (partitionSelectFunc, error) {
   389  	const (
   390  		toCommitTag uint64 = iota
   391  		fromCommitTag
   392  		toCommitDateTag
   393  		fromCommitDateTag
   394  	)
   395  
   396  	colColl := schema.NewColCollection(
   397  		schema.NewColumn(toCommit, toCommitTag, types.StringKind, false),
   398  		schema.NewColumn(fromCommit, fromCommitTag, types.StringKind, false),
   399  		schema.NewColumn(toCommitDate, toCommitDateTag, types.TimestampKind, false),
   400  		schema.NewColumn(fromCommitDate, fromCommitDateTag, types.TimestampKind, false),
   401  	)
   402  
   403  	expFunc, err := expreval.ExpressionFuncFromSQLExpressions(nbf, schema.UnkeyedSchemaFromCols(colColl), filters)
   404  
   405  	if err != nil {
   406  		return nil, err
   407  	}
   408  
   409  	return func(ctx *sql.Context, partition diffPartition) (bool, error) {
   410  		vals := row.TaggedValues{
   411  			toCommitTag:   types.String(partition.toName),
   412  			fromCommitTag: types.String(partition.fromName),
   413  		}
   414  
   415  		if partition.toDate != nil {
   416  			vals[toCommitDateTag] = *partition.toDate
   417  		}
   418  
   419  		if partition.fromDate != nil {
   420  			vals[fromCommitDateTag] = *partition.fromDate
   421  		}
   422  
   423  		return expFunc(ctx, vals)
   424  	}, nil
   425  }
   426  
   427  var _ sql.PartitionIter = &diffPartitions{}
   428  
   429  // collection of paratitions. Implements PartitionItr
   430  type diffPartitions struct {
   431  	// TODO change the sql.PartitionIterator interface so that Next receives the context rather than caching it.
   432  	ctx             *sql.Context
   433  	tblName         string
   434  	cmItr           doltdb.CommitItr
   435  	cmHashToTblInfo map[hash.Hash]tblInfoAtCommit
   436  	selectFunc      partitionSelectFunc
   437  }
   438  
   439  func newDiffPartitions(ctx *sql.Context, cmItr doltdb.CommitItr, wr *doltdb.RootValue, tblName string, selectFunc partitionSelectFunc) (*diffPartitions, error) {
   440  	t, exactName, ok, err := wr.GetTableInsensitive(ctx, tblName)
   441  
   442  	if err != nil {
   443  		return nil, err
   444  	}
   445  
   446  	if !ok {
   447  		return nil, errors.New(fmt.Sprintf("table: %s does not exist", tblName))
   448  	}
   449  
   450  	wrTblHash, _, err := wr.GetTableHash(ctx, exactName)
   451  
   452  	if err != nil {
   453  		return nil, err
   454  	}
   455  
   456  	cmHash, _, err := cmItr.Next(ctx)
   457  
   458  	if err != nil {
   459  		return nil, err
   460  	}
   461  
   462  	cmHashToTblInfo := make(map[hash.Hash]tblInfoAtCommit)
   463  	cmHashToTblInfo[cmHash] = tblInfoAtCommit{"WORKING", nil, t, wrTblHash}
   464  
   465  	err = cmItr.Reset(ctx)
   466  
   467  	if err != nil {
   468  		return nil, err
   469  	}
   470  
   471  	return &diffPartitions{
   472  		ctx:             ctx,
   473  		tblName:         tblName,
   474  		cmItr:           cmItr,
   475  		cmHashToTblInfo: cmHashToTblInfo,
   476  		selectFunc:      selectFunc,
   477  	}, nil
   478  }
   479  
   480  // called in a commit iteration loop. Adds partitions when it finds a commit and it's parent that have different values
   481  // for the hash of the table being looked at.
   482  func (dp *diffPartitions) processCommit(ctx *sql.Context, cmHash hash.Hash, cm *doltdb.Commit, root *doltdb.RootValue, tbl *doltdb.Table) (*diffPartition, error) {
   483  	tblHash, _, err := root.GetTableHash(ctx, dp.tblName)
   484  
   485  	if err != nil {
   486  		return nil, err
   487  	}
   488  
   489  	toInfoForCommit := dp.cmHashToTblInfo[cmHash]
   490  	cmHashStr := cmHash.String()
   491  	meta, err := cm.GetCommitMeta()
   492  
   493  	if err != nil {
   494  		return nil, err
   495  	}
   496  
   497  	ts := types.Timestamp(meta.Time())
   498  
   499  	var nextPartition *diffPartition
   500  	if tblHash != toInfoForCommit.tblHash {
   501  		partition := diffPartition{toInfoForCommit.tbl, tbl, toInfoForCommit.name, cmHashStr, toInfoForCommit.date, &ts}
   502  		selected, err := dp.selectFunc(ctx, partition)
   503  
   504  		if err != nil {
   505  			return nil, err
   506  		}
   507  
   508  		if selected {
   509  			nextPartition = &partition
   510  		}
   511  	}
   512  
   513  	newInfo := tblInfoAtCommit{cmHashStr, &ts, tbl, tblHash}
   514  	parentHashes, err := cm.ParentHashes(ctx)
   515  
   516  	if err != nil {
   517  		return nil, err
   518  	}
   519  
   520  	for _, h := range parentHashes {
   521  		dp.cmHashToTblInfo[h] = newInfo
   522  	}
   523  
   524  	return nextPartition, nil
   525  }
   526  
   527  func (dp *diffPartitions) Next() (sql.Partition, error) {
   528  	for {
   529  		cmHash, cm, err := dp.cmItr.Next(dp.ctx)
   530  
   531  		if err != nil {
   532  			return nil, err
   533  		}
   534  
   535  		root, err := cm.GetRootValue()
   536  
   537  		if err != nil {
   538  			return nil, err
   539  		}
   540  
   541  		tbl, _, _, err := root.GetTableInsensitive(dp.ctx, dp.tblName)
   542  
   543  		if err != nil {
   544  			return nil, err
   545  		}
   546  
   547  		next, err := dp.processCommit(dp.ctx, cmHash, cm, root, tbl)
   548  
   549  		if err != nil {
   550  			return nil, err
   551  		}
   552  
   553  		if next != nil {
   554  			return *next, nil
   555  		}
   556  	}
   557  }
   558  
   559  func (dp *diffPartitions) Close(*sql.Context) error {
   560  	return nil
   561  }
   562  
   563  // creates a RowConverter for transforming rows with the the given schema to this super schema.
   564  func rowConvForSchema(ctx context.Context, vrw types.ValueReadWriter, ss *schema.SuperSchema, sch schema.Schema) (*rowconv.RowConverter, error) {
   565  	if schema.SchemasAreEqual(sch, schema.EmptySchema) {
   566  		return rowconv.IdentityConverter, nil
   567  	}
   568  
   569  	inNameToOutName, err := ss.NameMapForSchema(sch)
   570  
   571  	if err != nil {
   572  		return nil, err
   573  	}
   574  
   575  	ssch, err := ss.GenerateSchema()
   576  
   577  	if err != nil {
   578  		return nil, err
   579  	}
   580  
   581  	fm, err := rowconv.NameMapping(sch, ssch, inNameToOutName)
   582  
   583  	if err != nil {
   584  		return nil, err
   585  	}
   586  
   587  	return rowconv.NewRowConverter(ctx, vrw, fm)
   588  }