github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/scrub.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"strings"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/sql/catalog/resolver"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/rowcontainer"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    25  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    26  	"github.com/cockroachdb/errors"
    27  )
    28  
    29  type scrubNode struct {
    30  	optColumnsSlot
    31  
    32  	n *tree.Scrub
    33  
    34  	run scrubRun
    35  }
    36  
    37  // checkOperation is an interface for scrub check execution. The
    38  // different types of checks implement the interface. The checks are
    39  // then bundled together and iterated through to pull results.
    40  //
    41  // NB: Other changes that need to be made to implement a new check are:
    42  //  1) Add the option parsing in startScrubTable
    43  //  2) Queue the checkOperation structs into scrubNode.checkQueue.
    44  //
    45  // TODO(joey): Eventually we will add the ability to repair check
    46  // failures. In that case, we can add a AttemptRepair function that is
    47  // called after each call to Next.
    48  type checkOperation interface {
    49  	// Started indicates if a checkOperation has already been initialized
    50  	// by Start during the lifetime of the operation.
    51  	Started() bool
    52  
    53  	// Start initializes the check. In many cases, this does the bulk of
    54  	// the work behind a check.
    55  	Start(params runParams) error
    56  
    57  	// Next will return the next check result. The datums returned have
    58  	// the column types specified by scrubTypes, which are the valeus
    59  	// returned to the user.
    60  	//
    61  	// Next is not called if Done() is false.
    62  	Next(params runParams) (tree.Datums, error)
    63  
    64  	// Done indicates when there are no more results to iterate through.
    65  	Done(context.Context) bool
    66  
    67  	// Close will clean up any in progress resources.
    68  	Close(context.Context)
    69  }
    70  
    71  // Scrub checks the database.
    72  // Privileges: superuser.
    73  func (p *planner) Scrub(ctx context.Context, n *tree.Scrub) (planNode, error) {
    74  	if err := p.RequireAdminRole(ctx, "SCRUB"); err != nil {
    75  		return nil, err
    76  	}
    77  	return &scrubNode{n: n}, nil
    78  }
    79  
    80  // scrubRun contains the run-time state of scrubNode during local execution.
    81  type scrubRun struct {
    82  	checkQueue []checkOperation
    83  	row        tree.Datums
    84  }
    85  
    86  func (n *scrubNode) startExec(params runParams) error {
    87  	switch n.n.Typ {
    88  	case tree.ScrubTable:
    89  		// If the tableName provided refers to a view and error will be
    90  		// returned here.
    91  		tableDesc, err := params.p.ResolveExistingObjectEx(
    92  			params.ctx, n.n.Table, true /*required*/, resolver.ResolveRequireTableDesc)
    93  		if err != nil {
    94  			return err
    95  		}
    96  		tn, ok := params.p.ResolvedName(n.n.Table).(*tree.TableName)
    97  		if !ok {
    98  			return errors.AssertionFailedf("%q was not resolved as a table", n.n.Table)
    99  		}
   100  		if err := n.startScrubTable(params.ctx, params.p, tableDesc, tn); err != nil {
   101  			return err
   102  		}
   103  	case tree.ScrubDatabase:
   104  		if err := n.startScrubDatabase(params.ctx, params.p, &n.n.Database); err != nil {
   105  			return err
   106  		}
   107  	default:
   108  		return errors.AssertionFailedf("unexpected SCRUB type received, got: %v", n.n.Typ)
   109  	}
   110  	return nil
   111  }
   112  
   113  func (n *scrubNode) Next(params runParams) (bool, error) {
   114  	for len(n.run.checkQueue) > 0 {
   115  		nextCheck := n.run.checkQueue[0]
   116  		if !nextCheck.Started() {
   117  			if err := nextCheck.Start(params); err != nil {
   118  				return false, err
   119  			}
   120  		}
   121  
   122  		// Check if the iterator is finished before calling Next. This
   123  		// happens if there are no more results to report.
   124  		if !nextCheck.Done(params.ctx) {
   125  			var err error
   126  			n.run.row, err = nextCheck.Next(params)
   127  			if err != nil {
   128  				return false, err
   129  			}
   130  			return true, nil
   131  		}
   132  
   133  		nextCheck.Close(params.ctx)
   134  		// Prepare the next iterator. If we happen to finish this iterator,
   135  		// we want to begin the next one so we still return a result.
   136  		n.run.checkQueue = n.run.checkQueue[1:]
   137  	}
   138  	return false, nil
   139  }
   140  
   141  func (n *scrubNode) Values() tree.Datums {
   142  	return n.run.row
   143  }
   144  
   145  func (n *scrubNode) Close(ctx context.Context) {
   146  	// Close any iterators which have not been completed.
   147  	for len(n.run.checkQueue) > 0 {
   148  		n.run.checkQueue[0].Close(ctx)
   149  		n.run.checkQueue = n.run.checkQueue[1:]
   150  	}
   151  }
   152  
   153  // startScrubDatabase prepares a scrub check for each of the tables in
   154  // the database. Views are skipped without errors.
   155  func (n *scrubNode) startScrubDatabase(ctx context.Context, p *planner, name *tree.Name) error {
   156  	// Check that the database exists.
   157  	database := string(*name)
   158  	dbDesc, err := p.ResolveUncachedDatabaseByName(ctx, database, true /*required*/)
   159  	if err != nil {
   160  		return err
   161  	}
   162  
   163  	schemas, err := p.Tables().GetSchemasForDatabase(ctx, p.txn, dbDesc.ID)
   164  	if err != nil {
   165  		return err
   166  	}
   167  
   168  	var tbNames TableNames
   169  	for _, schema := range schemas {
   170  		toAppend, err := resolver.GetObjectNames(ctx, p.txn, p, p.ExecCfg().Codec, dbDesc, schema, true /*explicitPrefix*/)
   171  		if err != nil {
   172  			return err
   173  		}
   174  		tbNames = append(tbNames, toAppend...)
   175  	}
   176  
   177  	for i := range tbNames {
   178  		tableName := &tbNames[i]
   179  		objDesc, err := p.LogicalSchemaAccessor().GetObjectDesc(
   180  			ctx,
   181  			p.txn,
   182  			p.ExecCfg().Settings,
   183  			p.ExecCfg().Codec,
   184  			tableName.Catalog(),
   185  			tableName.Schema(),
   186  			tableName.Table(),
   187  			p.ObjectLookupFlags(true /*required*/, false /*requireMutable*/),
   188  		)
   189  		if err != nil {
   190  			return err
   191  		}
   192  		tableDesc := objDesc.(*sqlbase.ImmutableTableDescriptor)
   193  		// Skip non-tables and don't throw an error if we encounter one.
   194  		if !tableDesc.IsTable() {
   195  			continue
   196  		}
   197  		if err := n.startScrubTable(ctx, p, tableDesc, tableName); err != nil {
   198  			return err
   199  		}
   200  	}
   201  	return nil
   202  }
   203  
   204  func (n *scrubNode) startScrubTable(
   205  	ctx context.Context,
   206  	p *planner,
   207  	tableDesc *sqlbase.ImmutableTableDescriptor,
   208  	tableName *tree.TableName,
   209  ) error {
   210  	ts, hasTS, err := p.getTimestamp(ctx, n.n.AsOf)
   211  	if err != nil {
   212  		return err
   213  	}
   214  	// Process SCRUB options. These are only present during a SCRUB TABLE
   215  	// statement.
   216  	var indexesSet bool
   217  	var physicalCheckSet bool
   218  	var constraintsSet bool
   219  	for _, option := range n.n.Options {
   220  		switch v := option.(type) {
   221  		case *tree.ScrubOptionIndex:
   222  			if indexesSet {
   223  				return pgerror.Newf(pgcode.Syntax,
   224  					"cannot specify INDEX option more than once")
   225  			}
   226  			indexesSet = true
   227  			checks, err := createIndexCheckOperations(v.IndexNames, tableDesc, tableName, ts)
   228  			if err != nil {
   229  				return err
   230  			}
   231  			n.run.checkQueue = append(n.run.checkQueue, checks...)
   232  		case *tree.ScrubOptionPhysical:
   233  			if physicalCheckSet {
   234  				return pgerror.Newf(pgcode.Syntax,
   235  					"cannot specify PHYSICAL option more than once")
   236  			}
   237  			if hasTS {
   238  				return pgerror.Newf(pgcode.Syntax,
   239  					"cannot use AS OF SYSTEM TIME with PHYSICAL option")
   240  			}
   241  			physicalCheckSet = true
   242  			physicalChecks := createPhysicalCheckOperations(tableDesc, tableName)
   243  			n.run.checkQueue = append(n.run.checkQueue, physicalChecks...)
   244  		case *tree.ScrubOptionConstraint:
   245  			if constraintsSet {
   246  				return pgerror.Newf(pgcode.Syntax,
   247  					"cannot specify CONSTRAINT option more than once")
   248  			}
   249  			constraintsSet = true
   250  			constraintsToCheck, err := createConstraintCheckOperations(
   251  				ctx, p, v.ConstraintNames, tableDesc, tableName, ts)
   252  			if err != nil {
   253  				return err
   254  			}
   255  			n.run.checkQueue = append(n.run.checkQueue, constraintsToCheck...)
   256  		default:
   257  			panic(fmt.Sprintf("Unhandled SCRUB option received: %+v", v))
   258  		}
   259  	}
   260  
   261  	// When no options are provided the default behavior is to run
   262  	// exhaustive checks.
   263  	if len(n.n.Options) == 0 {
   264  		indexesToCheck, err := createIndexCheckOperations(nil /* indexNames */, tableDesc, tableName,
   265  			ts)
   266  		if err != nil {
   267  			return err
   268  		}
   269  		n.run.checkQueue = append(n.run.checkQueue, indexesToCheck...)
   270  		constraintsToCheck, err := createConstraintCheckOperations(
   271  			ctx, p, nil /* constraintNames */, tableDesc, tableName, ts)
   272  		if err != nil {
   273  			return err
   274  		}
   275  		n.run.checkQueue = append(n.run.checkQueue, constraintsToCheck...)
   276  
   277  		physicalChecks := createPhysicalCheckOperations(tableDesc, tableName)
   278  		n.run.checkQueue = append(n.run.checkQueue, physicalChecks...)
   279  	}
   280  	return nil
   281  }
   282  
   283  // getPrimaryColIdxs returns a list of the primary index columns and
   284  // their corresponding index in the columns list.
   285  func getPrimaryColIdxs(
   286  	tableDesc *sqlbase.ImmutableTableDescriptor, columns []*sqlbase.ColumnDescriptor,
   287  ) (primaryColIdxs []int, err error) {
   288  	for i, colID := range tableDesc.PrimaryIndex.ColumnIDs {
   289  		rowIdx := -1
   290  		for idx, col := range columns {
   291  			if col.ID == colID {
   292  				rowIdx = idx
   293  				break
   294  			}
   295  		}
   296  		if rowIdx == -1 {
   297  			return nil, errors.Errorf(
   298  				"could not find primary index column in projection: columnID=%d columnName=%s",
   299  				colID,
   300  				tableDesc.PrimaryIndex.ColumnNames[i])
   301  		}
   302  		primaryColIdxs = append(primaryColIdxs, rowIdx)
   303  	}
   304  	return primaryColIdxs, nil
   305  }
   306  
   307  // col returns the string for referencing a column, with a specific alias,
   308  // e.g. "table.col".
   309  func colRef(tableAlias string, columnName string) string {
   310  	u := tree.UnrestrictedName(columnName)
   311  	if tableAlias == "" {
   312  		return u.String()
   313  	}
   314  	return fmt.Sprintf("%s.%s", tableAlias, &u)
   315  }
   316  
   317  // colRefs returns the strings for referencing a list of columns (as a list).
   318  func colRefs(tableAlias string, columnNames []string) []string {
   319  	res := make([]string, len(columnNames))
   320  	for i := range res {
   321  		res[i] = colRef(tableAlias, columnNames[i])
   322  	}
   323  	return res
   324  }
   325  
   326  // pairwiseOp joins each string on the left with the string on the right, with a
   327  // given operator in-between. For example
   328  //   pairwiseOp([]string{"a","b"}, []string{"x", "y"}, "=")
   329  // returns
   330  //   []string{"a = x", "b = y"}.
   331  func pairwiseOp(left []string, right []string, op string) []string {
   332  	if len(left) != len(right) {
   333  		panic(errors.AssertionFailedf("slice length mismatch (%d vs %d)", len(left), len(right)))
   334  	}
   335  	res := make([]string, len(left))
   336  	for i := range res {
   337  		res[i] = fmt.Sprintf("%s %s %s", left[i], op, right[i])
   338  	}
   339  	return res
   340  }
   341  
   342  // createPhysicalCheckOperations will return the physicalCheckOperation
   343  // for all indexes on a table.
   344  func createPhysicalCheckOperations(
   345  	tableDesc *sqlbase.ImmutableTableDescriptor, tableName *tree.TableName,
   346  ) (checks []checkOperation) {
   347  	checks = append(checks, newPhysicalCheckOperation(tableName, tableDesc, &tableDesc.PrimaryIndex))
   348  	for i := range tableDesc.Indexes {
   349  		checks = append(checks, newPhysicalCheckOperation(tableName, tableDesc, &tableDesc.Indexes[i]))
   350  	}
   351  	return checks
   352  }
   353  
   354  // createIndexCheckOperations will return the checkOperations for the
   355  // provided indexes. If indexNames is nil, then all indexes are
   356  // returned.
   357  // TODO(joey): This can be simplified with
   358  // TableDescriptor.FindIndexByName(), but this will only report the
   359  // first invalid index.
   360  func createIndexCheckOperations(
   361  	indexNames tree.NameList,
   362  	tableDesc *sqlbase.ImmutableTableDescriptor,
   363  	tableName *tree.TableName,
   364  	asOf hlc.Timestamp,
   365  ) (results []checkOperation, err error) {
   366  	if indexNames == nil {
   367  		// Populate results with all secondary indexes of the
   368  		// table.
   369  		for i := range tableDesc.Indexes {
   370  			results = append(results, newIndexCheckOperation(
   371  				tableName,
   372  				tableDesc,
   373  				&tableDesc.Indexes[i],
   374  				asOf,
   375  			))
   376  		}
   377  		return results, nil
   378  	}
   379  
   380  	// Find the indexes corresponding to the user input index names.
   381  	names := make(map[string]struct{})
   382  	for _, idxName := range indexNames {
   383  		names[idxName.String()] = struct{}{}
   384  	}
   385  	for i := range tableDesc.Indexes {
   386  		if _, ok := names[tableDesc.Indexes[i].Name]; ok {
   387  			results = append(results, newIndexCheckOperation(
   388  				tableName,
   389  				tableDesc,
   390  				&tableDesc.Indexes[i],
   391  				asOf,
   392  			))
   393  			delete(names, tableDesc.Indexes[i].Name)
   394  		}
   395  	}
   396  	if len(names) > 0 {
   397  		// Get a list of all the indexes that could not be found.
   398  		missingIndexNames := []string(nil)
   399  		for _, idxName := range indexNames {
   400  			if _, ok := names[idxName.String()]; ok {
   401  				missingIndexNames = append(missingIndexNames, idxName.String())
   402  			}
   403  		}
   404  		return nil, pgerror.Newf(pgcode.UndefinedObject,
   405  			"specified indexes to check that do not exist on table %q: %v",
   406  			tableDesc.Name, strings.Join(missingIndexNames, ", "))
   407  	}
   408  	return results, nil
   409  }
   410  
   411  // createConstraintCheckOperations will return all of the constraints
   412  // that are being checked. If constraintNames is nil, then all
   413  // constraints are returned.
   414  // TODO(joey): Only SQL CHECK and FOREIGN KEY constraints are
   415  // implemented.
   416  func createConstraintCheckOperations(
   417  	ctx context.Context,
   418  	p *planner,
   419  	constraintNames tree.NameList,
   420  	tableDesc *sqlbase.ImmutableTableDescriptor,
   421  	tableName *tree.TableName,
   422  	asOf hlc.Timestamp,
   423  ) (results []checkOperation, err error) {
   424  	constraints, err := tableDesc.GetConstraintInfo(ctx, p.txn, p.ExecCfg().Codec)
   425  	if err != nil {
   426  		return nil, err
   427  	}
   428  
   429  	// Keep only the constraints specified by the constraints in
   430  	// constraintNames.
   431  	if constraintNames != nil {
   432  		wantedConstraints := make(map[string]sqlbase.ConstraintDetail)
   433  		for _, constraintName := range constraintNames {
   434  			if v, ok := constraints[string(constraintName)]; ok {
   435  				wantedConstraints[string(constraintName)] = v
   436  			} else {
   437  				return nil, pgerror.Newf(pgcode.UndefinedObject,
   438  					"constraint %q of relation %q does not exist", constraintName, tableDesc.Name)
   439  			}
   440  		}
   441  		constraints = wantedConstraints
   442  	}
   443  
   444  	// Populate results with all constraints on the table.
   445  	for _, constraint := range constraints {
   446  		switch constraint.Kind {
   447  		case sqlbase.ConstraintTypeCheck:
   448  			results = append(results, newSQLCheckConstraintCheckOperation(
   449  				tableName,
   450  				tableDesc,
   451  				constraint.CheckConstraint,
   452  				asOf,
   453  			))
   454  		case sqlbase.ConstraintTypeFK:
   455  			results = append(results, newSQLForeignKeyCheckOperation(
   456  				tableName,
   457  				tableDesc,
   458  				constraint,
   459  				asOf,
   460  			))
   461  		}
   462  	}
   463  	return results, nil
   464  }
   465  
   466  // scrubRunDistSQL run a distSQLPhysicalPlan plan in distSQL. If
   467  // RowContainer is returned, the caller must close it.
   468  func scrubRunDistSQL(
   469  	ctx context.Context, planCtx *PlanningCtx, p *planner, plan *PhysicalPlan, columnTypes []*types.T,
   470  ) (*rowcontainer.RowContainer, error) {
   471  	ci := sqlbase.ColTypeInfoFromColTypes(columnTypes)
   472  	acc := p.extendedEvalCtx.Mon.MakeBoundAccount()
   473  	rows := rowcontainer.NewRowContainer(acc, ci, 0 /* rowCapacity */)
   474  	rowResultWriter := NewRowResultWriter(rows)
   475  	recv := MakeDistSQLReceiver(
   476  		ctx,
   477  		rowResultWriter,
   478  		tree.Rows,
   479  		p.ExecCfg().RangeDescriptorCache,
   480  		p.ExecCfg().LeaseHolderCache,
   481  		p.txn,
   482  		func(ts hlc.Timestamp) {
   483  			p.ExecCfg().Clock.Update(ts)
   484  		},
   485  		p.extendedEvalCtx.Tracing,
   486  	)
   487  	defer recv.Release()
   488  
   489  	// Copy the evalCtx, as dsp.Run() might change it.
   490  	evalCtxCopy := p.extendedEvalCtx
   491  	p.extendedEvalCtx.DistSQLPlanner.Run(
   492  		planCtx, p.txn, plan, recv, &evalCtxCopy, nil, /* finishedSetupFn */
   493  	)()
   494  	if rowResultWriter.Err() != nil {
   495  		return rows, rowResultWriter.Err()
   496  	} else if rows.Len() == 0 {
   497  		rows.Close(ctx)
   498  		return nil, nil
   499  	}
   500  
   501  	return rows, nil
   502  }