github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/dtables/diff_iter.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dtables
    16  
    17  import (
    18  	"context"
    19  	"io"
    20  	"time"
    21  
    22  	"github.com/dolthub/go-mysql-server/sql"
    23  
    24  	"github.com/dolthub/dolt/go/libraries/doltcore/diff"
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/rowconv"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/index"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlutil"
    31  	"github.com/dolthub/dolt/go/store/prolly"
    32  	"github.com/dolthub/dolt/go/store/prolly/tree"
    33  	"github.com/dolthub/dolt/go/store/types"
    34  	"github.com/dolthub/dolt/go/store/val"
    35  )
    36  
    37  type diffRowItr struct {
    38  	ad             diff.RowDiffer
    39  	diffSrc        *diff.RowDiffSource
    40  	joiner         *rowconv.Joiner
    41  	sch            schema.Schema
    42  	fromCommitInfo commitInfo
    43  	toCommitInfo   commitInfo
    44  }
    45  
    46  var _ sql.RowIter = &diffRowItr{}
    47  
    48  type commitInfo struct {
    49  	name    types.String
    50  	date    *types.Timestamp
    51  	nameTag uint64
    52  	dateTag uint64
    53  }
    54  
    55  func newNomsDiffIter(ctx *sql.Context, ddb *doltdb.DoltDB, joiner *rowconv.Joiner, dp DiffPartition, lookup sql.IndexLookup) (*diffRowItr, error) {
    56  	fromData, fromSch, err := tableData(ctx, dp.from, ddb)
    57  
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  
    62  	toData, toSch, err := tableData(ctx, dp.to, ddb)
    63  
    64  	if err != nil {
    65  		return nil, err
    66  	}
    67  
    68  	fromConv, err := dp.rowConvForSchema(ctx, ddb.ValueReadWriter(), dp.fromSch, fromSch)
    69  
    70  	if err != nil {
    71  		return nil, err
    72  	}
    73  
    74  	toConv, err := dp.rowConvForSchema(ctx, ddb.ValueReadWriter(), dp.toSch, toSch)
    75  
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  
    80  	sch := joiner.GetSchema()
    81  	toCol, _ := sch.GetAllCols().GetByName(toCommit)
    82  	fromCol, _ := sch.GetAllCols().GetByName(fromCommit)
    83  	toDateCol, _ := sch.GetAllCols().GetByName(toCommitDate)
    84  	fromDateCol, _ := sch.GetAllCols().GetByName(fromCommitDate)
    85  
    86  	fromCmInfo := commitInfo{types.String(dp.fromName), dp.fromDate, fromCol.Tag, fromDateCol.Tag}
    87  	toCmInfo := commitInfo{types.String(dp.toName), dp.toDate, toCol.Tag, toDateCol.Tag}
    88  
    89  	rd := diff.NewRowDiffer(ctx, ddb.Format(), fromSch, toSch, 1024)
    90  	// TODO (dhruv) don't cast to noms map
    91  	// Use index lookup if it exists
    92  	if lookup.IsEmpty() {
    93  		rd.Start(ctx, durable.NomsMapFromIndex(fromData), durable.NomsMapFromIndex(toData))
    94  	} else {
    95  		ranges, err := index.NomsRangesFromIndexLookup(ctx, lookup) // TODO: this is a testing method
    96  		if err != nil {
    97  			return nil, err
    98  		}
    99  		// TODO: maybe just use Check
   100  		rangeFunc := func(ctx context.Context, val types.Value) (bool, bool, error) {
   101  			v, ok := val.(types.Tuple)
   102  			if !ok {
   103  				return false, false, nil
   104  			}
   105  			return ranges[0].Check.Check(ctx, ddb.ValueReadWriter(), v)
   106  		}
   107  		rd.StartWithRange(ctx, durable.NomsMapFromIndex(fromData), durable.NomsMapFromIndex(toData), ranges[0].Start, rangeFunc)
   108  	}
   109  
   110  	src := diff.NewRowDiffSource(rd, joiner, ctx.Warn)
   111  	src.AddInputRowConversion(fromConv, toConv)
   112  
   113  	return &diffRowItr{
   114  		ad:             rd,
   115  		diffSrc:        src,
   116  		joiner:         joiner,
   117  		sch:            joiner.GetSchema(),
   118  		fromCommitInfo: fromCmInfo,
   119  		toCommitInfo:   toCmInfo,
   120  	}, nil
   121  }
   122  
   123  // Next returns the next row
   124  func (itr *diffRowItr) Next(ctx *sql.Context) (sql.Row, error) {
   125  	r, err := itr.diffSrc.NextDiff()
   126  
   127  	if err != nil {
   128  		return nil, err
   129  	}
   130  
   131  	toAndFromRows, err := itr.joiner.Split(r)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  	_, hasTo := toAndFromRows[diff.To]
   136  	_, hasFrom := toAndFromRows[diff.From]
   137  
   138  	r, err = r.SetColVal(itr.toCommitInfo.nameTag, types.String(itr.toCommitInfo.name), itr.sch)
   139  	if err != nil {
   140  		return nil, err
   141  	}
   142  
   143  	r, err = r.SetColVal(itr.fromCommitInfo.nameTag, types.String(itr.fromCommitInfo.name), itr.sch)
   144  
   145  	if err != nil {
   146  		return nil, err
   147  	}
   148  
   149  	if itr.toCommitInfo.date != nil {
   150  		r, err = r.SetColVal(itr.toCommitInfo.dateTag, *itr.toCommitInfo.date, itr.sch)
   151  
   152  		if err != nil {
   153  			return nil, err
   154  		}
   155  	}
   156  
   157  	if itr.fromCommitInfo.date != nil {
   158  		r, err = r.SetColVal(itr.fromCommitInfo.dateTag, *itr.fromCommitInfo.date, itr.sch)
   159  
   160  		if err != nil {
   161  			return nil, err
   162  		}
   163  	}
   164  
   165  	sqlRow, err := sqlutil.DoltRowToSqlRow(r, itr.sch)
   166  
   167  	if err != nil {
   168  		return nil, err
   169  	}
   170  
   171  	if hasTo && hasFrom {
   172  		sqlRow = append(sqlRow, diffTypeModified)
   173  	} else if hasTo && !hasFrom {
   174  		sqlRow = append(sqlRow, diffTypeAdded)
   175  	} else {
   176  		sqlRow = append(sqlRow, diffTypeRemoved)
   177  	}
   178  
   179  	return sqlRow, nil
   180  }
   181  
   182  // Close closes the iterator
   183  func (itr *diffRowItr) Close(*sql.Context) (err error) {
   184  	defer itr.ad.Close()
   185  	defer func() {
   186  		closeErr := itr.diffSrc.Close()
   187  
   188  		if err == nil {
   189  			err = closeErr
   190  		}
   191  	}()
   192  
   193  	return nil
   194  }
   195  
   196  type commitInfo2 struct {
   197  	name string
   198  	ts   *time.Time
   199  }
   200  
   201  type prollyDiffIter struct {
   202  	from, to                   prolly.Map
   203  	fromSch, toSch             schema.Schema
   204  	targetFromSch, targetToSch schema.Schema
   205  	fromConverter, toConverter ProllyRowConverter
   206  	fromVD, toVD               val.TupleDesc
   207  	keyless                    bool
   208  
   209  	fromCm commitInfo2
   210  	toCm   commitInfo2
   211  
   212  	rows    chan sql.Row
   213  	errChan chan error
   214  	cancel  context.CancelFunc
   215  }
   216  
   217  var _ sql.RowIter = prollyDiffIter{}
   218  
   219  // newProllyDiffIter produces dolt_diff system table and dolt_diff table
   220  // function rows. The rows first have the "to" columns on the left and the
   221  // "from" columns on the right. After the "to" and "from" columns, a commit
   222  // name, and commit date is also present. The final column is the diff_type
   223  // column.
   224  //
   225  // An example: to_pk, to_col1, to_commit, to_commit_date, from_pk, from_col1, from_commit, from_commit_date, diff_type
   226  //
   227  // |targetFromSchema| and |targetToSchema| defines what the schema should be for
   228  // the row data on the "from" or "to" side. In the above example, both schemas are
   229  // identical with two columns "pk" and "col1". The dolt diff table function for
   230  // example can provide two different schemas.
   231  //
   232  // The |from| and |to| tables in the DiffPartition may have different schemas
   233  // than |targetFromSchema| or |targetToSchema|. We convert the rows from the
   234  // schema of |from| to |targetFromSchema| and the schema of |to| to
   235  // |targetToSchema|. See the tablediff_prolly package.
   236  func newProllyDiffIter(ctx *sql.Context, dp DiffPartition, targetFromSchema, targetToSchema schema.Schema) (prollyDiffIter, error) {
   237  	fromCm := commitInfo2{
   238  		name: dp.fromName,
   239  		ts:   (*time.Time)(dp.fromDate),
   240  	}
   241  	toCm := commitInfo2{
   242  		name: dp.toName,
   243  		ts:   (*time.Time)(dp.toDate),
   244  	}
   245  	var from, to prolly.Map
   246  
   247  	var fsch schema.Schema = schema.EmptySchema
   248  	if dp.from != nil {
   249  		idx, err := dp.from.GetRowData(ctx)
   250  		if err != nil {
   251  			return prollyDiffIter{}, err
   252  		}
   253  		from = durable.ProllyMapFromIndex(idx)
   254  		if fsch, err = dp.from.GetSchema(ctx); err != nil {
   255  			return prollyDiffIter{}, err
   256  		}
   257  	}
   258  
   259  	var tsch schema.Schema = schema.EmptySchema
   260  	if dp.to != nil {
   261  		idx, err := dp.to.GetRowData(ctx)
   262  		if err != nil {
   263  			return prollyDiffIter{}, err
   264  		}
   265  		to = durable.ProllyMapFromIndex(idx)
   266  		if tsch, err = dp.to.GetSchema(ctx); err != nil {
   267  			return prollyDiffIter{}, err
   268  		}
   269  	}
   270  
   271  	var nodeStore tree.NodeStore
   272  	if dp.to != nil {
   273  		nodeStore = dp.to.NodeStore()
   274  	} else {
   275  		nodeStore = dp.from.NodeStore()
   276  	}
   277  
   278  	fromConverter, err := NewProllyRowConverter(fsch, targetFromSchema, ctx.Warn, nodeStore)
   279  	if err != nil {
   280  		return prollyDiffIter{}, err
   281  	}
   282  
   283  	toConverter, err := NewProllyRowConverter(tsch, targetToSchema, ctx.Warn, nodeStore)
   284  	if err != nil {
   285  		return prollyDiffIter{}, err
   286  	}
   287  
   288  	fromVD := fsch.GetValueDescriptor()
   289  	toVD := tsch.GetValueDescriptor()
   290  	keyless := schema.IsKeyless(targetFromSchema) && schema.IsKeyless(targetToSchema)
   291  	child, cancel := context.WithCancel(ctx)
   292  	iter := prollyDiffIter{
   293  		from:          from,
   294  		to:            to,
   295  		fromSch:       fsch,
   296  		toSch:         tsch,
   297  		targetFromSch: targetFromSchema,
   298  		targetToSch:   targetToSchema,
   299  		fromConverter: fromConverter,
   300  		toConverter:   toConverter,
   301  		fromVD:        fromVD,
   302  		toVD:          toVD,
   303  		keyless:       keyless,
   304  		fromCm:        fromCm,
   305  		toCm:          toCm,
   306  		rows:          make(chan sql.Row, 64),
   307  		errChan:       make(chan error),
   308  		cancel:        cancel,
   309  	}
   310  
   311  	go func() {
   312  		iter.queueRows(child)
   313  	}()
   314  
   315  	return iter, nil
   316  }
   317  
   318  func (itr prollyDiffIter) Next(ctx *sql.Context) (sql.Row, error) {
   319  	select {
   320  	case <-ctx.Done():
   321  		return nil, ctx.Err()
   322  	case err := <-itr.errChan:
   323  		return nil, err
   324  	case row, ok := <-itr.rows:
   325  		if !ok {
   326  			return nil, io.EOF
   327  		}
   328  		return row, nil
   329  	}
   330  }
   331  
   332  func (itr prollyDiffIter) Close(ctx *sql.Context) error {
   333  	itr.cancel()
   334  	return nil
   335  }
   336  
   337  func (itr prollyDiffIter) queueRows(ctx context.Context) {
   338  	// TODO: Determine whether or not the schema has changed. If it has, then all rows should count as modifications in the diff.
   339  	considerAllRowsModified := false
   340  	err := prolly.DiffMaps(ctx, itr.from, itr.to, considerAllRowsModified, func(ctx context.Context, d tree.Diff) error {
   341  		dItr, err := itr.makeDiffRowItr(ctx, d)
   342  		if err != nil {
   343  			return err
   344  		}
   345  		for {
   346  			r, err := dItr.Next(ctx)
   347  			if err == io.EOF {
   348  				return nil
   349  			}
   350  			if err != nil {
   351  				return err
   352  			}
   353  			select {
   354  			case <-ctx.Done():
   355  				return ctx.Err()
   356  			case itr.rows <- r:
   357  				continue
   358  			}
   359  		}
   360  	})
   361  	if err != nil && err != io.EOF {
   362  		select {
   363  		case <-ctx.Done():
   364  		case itr.errChan <- err:
   365  		}
   366  		return
   367  	}
   368  	// we need to drain itr.rows before returning io.EOF
   369  	close(itr.rows)
   370  }
   371  
   372  // todo(andy): copy string fields
   373  func (itr prollyDiffIter) makeDiffRowItr(ctx context.Context, d tree.Diff) (*repeatingRowIter, error) {
   374  	if !itr.keyless {
   375  		r, err := itr.getDiffRow(ctx, d)
   376  		if err != nil {
   377  			return nil, err
   378  		}
   379  		return &repeatingRowIter{row: r, n: 1}, nil
   380  	}
   381  
   382  	r, n, err := itr.getDiffRowAndCardinality(ctx, d)
   383  	if err != nil {
   384  		return nil, err
   385  	}
   386  	return &repeatingRowIter{row: r, n: n}, nil
   387  }
   388  
   389  func (itr prollyDiffIter) getDiffRowAndCardinality(ctx context.Context, d tree.Diff) (r sql.Row, n uint64, err error) {
   390  	switch d.Type {
   391  	case tree.AddedDiff:
   392  		n = val.ReadKeylessCardinality(val.Tuple(d.To))
   393  	case tree.RemovedDiff:
   394  		n = val.ReadKeylessCardinality(val.Tuple(d.From))
   395  	case tree.ModifiedDiff:
   396  		fN := val.ReadKeylessCardinality(val.Tuple(d.From))
   397  		tN := val.ReadKeylessCardinality(val.Tuple(d.To))
   398  		if fN < tN {
   399  			n = tN - fN
   400  			d.Type = tree.AddedDiff
   401  		} else {
   402  			n = fN - tN
   403  			d.Type = tree.RemovedDiff
   404  		}
   405  	}
   406  
   407  	r, err = itr.getDiffRow(ctx, d)
   408  	if err != nil {
   409  		return nil, 0, err
   410  	}
   411  
   412  	return r, n, nil
   413  }
   414  
   415  func (itr prollyDiffIter) getDiffRow(ctx context.Context, dif tree.Diff) (row sql.Row, err error) {
   416  	tLen := schemaSize(itr.targetToSch)
   417  	fLen := schemaSize(itr.targetFromSch)
   418  
   419  	if fLen == 0 && dif.Type == tree.AddedDiff {
   420  		fLen = tLen
   421  	} else if tLen == 0 && dif.Type == tree.RemovedDiff {
   422  		tLen = fLen
   423  	}
   424  	// 2 commit names, 2 commit dates, 1 diff_type
   425  	row = make(sql.Row, fLen+tLen+5)
   426  
   427  	// todo (dhruv): implement warnings for row column value coercions.
   428  
   429  	if dif.Type != tree.RemovedDiff {
   430  		err = itr.toConverter.PutConverted(ctx, val.Tuple(dif.Key), val.Tuple(dif.To), row[0:tLen])
   431  		if err != nil {
   432  			return nil, err
   433  		}
   434  	}
   435  
   436  	idx := tLen
   437  	row[idx] = itr.toCm.name
   438  	row[idx+1] = maybeTime(itr.toCm.ts)
   439  
   440  	if dif.Type != tree.AddedDiff {
   441  		err = itr.fromConverter.PutConverted(ctx, val.Tuple(dif.Key), val.Tuple(dif.From), row[tLen+2:tLen+2+fLen])
   442  		if err != nil {
   443  			return nil, err
   444  		}
   445  	}
   446  
   447  	idx = fLen + 2 + tLen
   448  	row[idx] = itr.fromCm.name
   449  	row[idx+1] = maybeTime(itr.fromCm.ts)
   450  	row[idx+2] = diffTypeString(dif)
   451  
   452  	return row, nil
   453  }
   454  
   455  type repeatingRowIter struct {
   456  	row sql.Row
   457  	n   uint64
   458  }
   459  
   460  func (r *repeatingRowIter) Next(ctx context.Context) (sql.Row, error) {
   461  	if r.n == 0 {
   462  		return nil, io.EOF
   463  	}
   464  	r.n--
   465  	c := make(sql.Row, len(r.row))
   466  	copy(c, r.row)
   467  	return c, nil
   468  }
   469  
   470  func schemaSize(sch schema.Schema) int {
   471  	if sch == nil {
   472  		return 0
   473  	}
   474  	return sch.GetAllCols().Size()
   475  }
   476  
   477  func diffTypeString(d tree.Diff) (s string) {
   478  	switch d.Type {
   479  	case tree.AddedDiff:
   480  		s = diffTypeAdded
   481  	case tree.ModifiedDiff:
   482  		s = diffTypeModified
   483  	case tree.RemovedDiff:
   484  		s = diffTypeRemoved
   485  	}
   486  	return
   487  }
   488  
   489  func maybeTime(t *time.Time) interface{} {
   490  	if t != nil {
   491  		return *t
   492  	}
   493  	return nil
   494  }
   495  
   496  //------------------------------------
   497  // diffPartitionRowIter
   498  //------------------------------------
   499  
   500  var _ sql.RowIter = (*diffPartitionRowIter)(nil)
   501  
   502  type diffPartitionRowIter struct {
   503  	diffPartitions   *DiffPartitions
   504  	ddb              *doltdb.DoltDB
   505  	joiner           *rowconv.Joiner
   506  	currentPartition *sql.Partition
   507  	currentRowIter   *sql.RowIter
   508  }
   509  
   510  func NewDiffPartitionRowIter(partition sql.Partition, ddb *doltdb.DoltDB, joiner *rowconv.Joiner) *diffPartitionRowIter {
   511  	return &diffPartitionRowIter{
   512  		currentPartition: &partition,
   513  		ddb:              ddb,
   514  		joiner:           joiner,
   515  	}
   516  }
   517  
   518  func (itr *diffPartitionRowIter) Next(ctx *sql.Context) (sql.Row, error) {
   519  	for {
   520  		if itr.currentPartition == nil {
   521  			nextPartition, err := itr.diffPartitions.Next(ctx)
   522  			if err != nil {
   523  				return nil, err
   524  			}
   525  			itr.currentPartition = &nextPartition
   526  		}
   527  
   528  		if itr.currentRowIter == nil {
   529  			dp := (*itr.currentPartition).(DiffPartition)
   530  			rowIter, err := dp.GetRowIter(ctx, itr.ddb, itr.joiner, sql.IndexLookup{})
   531  			if err != nil {
   532  				return nil, err
   533  			}
   534  			itr.currentRowIter = &rowIter
   535  		}
   536  
   537  		row, err := (*itr.currentRowIter).Next(ctx)
   538  		if err == io.EOF {
   539  			itr.currentPartition = nil
   540  			itr.currentRowIter = nil
   541  
   542  			if itr.diffPartitions == nil {
   543  				return nil, err
   544  			}
   545  
   546  			continue
   547  		} else if err != nil {
   548  			return nil, err
   549  		} else {
   550  			return row, nil
   551  		}
   552  	}
   553  }
   554  
   555  func (itr *diffPartitionRowIter) Close(_ *sql.Context) error {
   556  	return nil
   557  }