github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/scrub_index.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sql
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/sql/scrub"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  )
    24  
    25  // indexCheckOperation implements the checkOperation interface. It is a
    26  // scrub check for a secondary index's integrity. This operation will
    27  // detect:
    28  // 1) Missing index entries. When there is a secondary index entry
    29  //    expected, but is not found.
    30  // 2) Dangling index references. When there is a secondary index entry
    31  //    that refers to a primary index key that cannot be found.
    32  type indexCheckOperation struct {
    33  	tableName *tree.TableName
    34  	tableDesc *sqlbase.ImmutableTableDescriptor
    35  	indexDesc *sqlbase.IndexDescriptor
    36  	asOf      hlc.Timestamp
    37  
    38  	// columns is a list of the columns returned by one side of the
    39  	// queries join. The actual resulting rows from the RowContainer is
    40  	// twice this.
    41  	columns []*sqlbase.ColumnDescriptor
    42  	// primaryColIdxs maps PrimaryIndex.Columns to the row
    43  	// indexes in the query result tree.Datums.
    44  	primaryColIdxs []int
    45  
    46  	run indexCheckRun
    47  }
    48  
    49  // indexCheckRun contains the run-time state for indexCheckOperation
    50  // during local execution.
    51  type indexCheckRun struct {
    52  	started  bool
    53  	rows     []tree.Datums
    54  	rowIndex int
    55  }
    56  
    57  func newIndexCheckOperation(
    58  	tableName *tree.TableName,
    59  	tableDesc *sqlbase.ImmutableTableDescriptor,
    60  	indexDesc *sqlbase.IndexDescriptor,
    61  	asOf hlc.Timestamp,
    62  ) *indexCheckOperation {
    63  	return &indexCheckOperation{
    64  		tableName: tableName,
    65  		tableDesc: tableDesc,
    66  		indexDesc: indexDesc,
    67  		asOf:      asOf,
    68  	}
    69  }
    70  
    71  // Start will plan and run an index check using the distSQL execution
    72  // engine.
    73  func (o *indexCheckOperation) Start(params runParams) error {
    74  	ctx := params.ctx
    75  
    76  	colToIdx := make(map[sqlbase.ColumnID]int)
    77  	for i := range o.tableDesc.Columns {
    78  		id := o.tableDesc.Columns[i].ID
    79  		colToIdx[id] = i
    80  	}
    81  
    82  	var pkColumns, otherColumns []*sqlbase.ColumnDescriptor
    83  
    84  	for _, colID := range o.tableDesc.PrimaryIndex.ColumnIDs {
    85  		col := &o.tableDesc.Columns[colToIdx[colID]]
    86  		pkColumns = append(pkColumns, col)
    87  		colToIdx[colID] = -1
    88  	}
    89  
    90  	maybeAddOtherCol := func(colID sqlbase.ColumnID) {
    91  		pos := colToIdx[colID]
    92  		if pos == -1 {
    93  			// Skip PK column.
    94  			return
    95  		}
    96  		col := &o.tableDesc.Columns[pos]
    97  		otherColumns = append(otherColumns, col)
    98  	}
    99  
   100  	// Collect all of the columns we are fetching from the index. This
   101  	// includes the columns involved in the index: columns, extra columns,
   102  	// and store columns.
   103  	for _, colID := range o.indexDesc.ColumnIDs {
   104  		maybeAddOtherCol(colID)
   105  	}
   106  	for _, colID := range o.indexDesc.ExtraColumnIDs {
   107  		maybeAddOtherCol(colID)
   108  	}
   109  	for _, colID := range o.indexDesc.StoreColumnIDs {
   110  		maybeAddOtherCol(colID)
   111  	}
   112  
   113  	colNames := func(cols []*sqlbase.ColumnDescriptor) []string {
   114  		res := make([]string, len(cols))
   115  		for i := range cols {
   116  			res[i] = cols[i].Name
   117  		}
   118  		return res
   119  	}
   120  
   121  	checkQuery := createIndexCheckQuery(
   122  		colNames(pkColumns), colNames(otherColumns), o.tableDesc.ID, o.indexDesc.ID,
   123  	)
   124  
   125  	rows, err := params.extendedEvalCtx.ExecCfg.InternalExecutor.Query(
   126  		ctx, "scrub-index", params.p.txn, checkQuery,
   127  	)
   128  	if err != nil {
   129  		return err
   130  	}
   131  
   132  	o.run.started = true
   133  	o.run.rows = rows
   134  	o.primaryColIdxs = make([]int, len(pkColumns))
   135  	for i := range o.primaryColIdxs {
   136  		o.primaryColIdxs[i] = i
   137  	}
   138  	o.columns = append(pkColumns, otherColumns...)
   139  	return nil
   140  }
   141  
   142  // Next implements the checkOperation interface.
   143  func (o *indexCheckOperation) Next(params runParams) (tree.Datums, error) {
   144  	row := o.run.rows[o.run.rowIndex]
   145  	o.run.rowIndex++
   146  
   147  	// Check if this row has results from the left. See the comment above
   148  	// createIndexCheckQuery indicating why this is true.
   149  	var isMissingIndexReferenceError bool
   150  	if row[o.primaryColIdxs[0]] != tree.DNull {
   151  		isMissingIndexReferenceError = true
   152  	}
   153  
   154  	colLen := len(o.columns)
   155  	var errorType tree.Datum
   156  	var primaryKeyDatums tree.Datums
   157  	if isMissingIndexReferenceError {
   158  		errorType = tree.NewDString(scrub.MissingIndexEntryError)
   159  		// Fetch the primary index values from the primary index row data.
   160  		for _, rowIdx := range o.primaryColIdxs {
   161  			primaryKeyDatums = append(primaryKeyDatums, row[rowIdx])
   162  		}
   163  	} else {
   164  		errorType = tree.NewDString(scrub.DanglingIndexReferenceError)
   165  		// Fetch the primary index values from the secondary index row
   166  		// data, because no primary index was found. The secondary index columns
   167  		// are offset by the length of the distinct columns, as the first
   168  		// set of columns is for the primary index.
   169  		for _, rowIdx := range o.primaryColIdxs {
   170  			primaryKeyDatums = append(primaryKeyDatums, row[rowIdx+colLen])
   171  		}
   172  	}
   173  	primaryKey := tree.NewDString(primaryKeyDatums.String())
   174  	timestamp, err := tree.MakeDTimestamp(
   175  		params.extendedEvalCtx.GetStmtTimestamp(), time.Nanosecond)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	details := make(map[string]interface{})
   181  	rowDetails := make(map[string]interface{})
   182  	details["row_data"] = rowDetails
   183  	details["index_name"] = o.indexDesc.Name
   184  	if isMissingIndexReferenceError {
   185  		// Fetch the primary index values from the primary index row data.
   186  		for rowIdx, col := range o.columns {
   187  			// TODO(joey): We should maybe try to get the underlying type.
   188  			rowDetails[col.Name] = row[rowIdx].String()
   189  		}
   190  	} else {
   191  		// Fetch the primary index values from the secondary index row data,
   192  		// because no primary index was found. The secondary index columns
   193  		// are offset by the length of the distinct columns, as the first
   194  		// set of columns is for the primary index.
   195  		for rowIdx, col := range o.columns {
   196  			// TODO(joey): We should maybe try to get the underlying type.
   197  			rowDetails[col.Name] = row[rowIdx+colLen].String()
   198  		}
   199  	}
   200  
   201  	detailsJSON, err := tree.MakeDJSON(details)
   202  	if err != nil {
   203  		return nil, err
   204  	}
   205  
   206  	return tree.Datums{
   207  		// TODO(joey): Add the job UUID once the SCRUB command uses jobs.
   208  		tree.DNull, /* job_uuid */
   209  		errorType,
   210  		tree.NewDString(o.tableName.Catalog()),
   211  		tree.NewDString(o.tableName.Table()),
   212  		primaryKey,
   213  		timestamp,
   214  		tree.DBoolFalse,
   215  		detailsJSON,
   216  	}, nil
   217  }
   218  
   219  // Started implements the checkOperation interface.
   220  func (o *indexCheckOperation) Started() bool {
   221  	return o.run.started
   222  }
   223  
   224  // Done implements the checkOperation interface.
   225  func (o *indexCheckOperation) Done(ctx context.Context) bool {
   226  	return o.run.rows == nil || o.run.rowIndex >= len(o.run.rows)
   227  }
   228  
   229  // Close4 implements the checkOperation interface.
   230  func (o *indexCheckOperation) Close(ctx context.Context) {
   231  	o.run.rows = nil
   232  }
   233  
   234  // createIndexCheckQuery will make the index check query for a given
   235  // table and secondary index.
   236  //
   237  // The primary column names and the rest of the index
   238  // columnsIt will also take into account an AS OF
   239  // SYSTEM TIME clause.
   240  //
   241  // For example, given the following table schema:
   242  //
   243  //   CREATE TABLE table (
   244  //     k INT, l INT, a INT, b INT, c INT,
   245  //     PRIMARY KEY (k, l),
   246  //     INDEX idx (a,b),
   247  //   )
   248  //
   249  // The generated query to check the `v_idx` will be:
   250  //
   251  //   SELECT pri.k  pri.l, pri.a, pri.b,
   252  //          sec.k, sec.l, sec.a, sec.b
   253  //   FROM
   254  //     (SELECT k, l, a, b FROM [tbl_id AS table_pri]@{FORCE_INDEX=[1]}) AS pri
   255  //   FULL OUTER JOIN
   256  //     (SELECT k, l, a, b FROM [tbl_id AS table_sec]@{FORCE_INDEX=[idx_id]} AS sec
   257  //   ON
   258  //     pri.k = sec.k AND
   259  //     pri.l = sec.l AND
   260  //     pri.a IS NOT DISTINCT FROM sec.a AND
   261  //     pri.b IS NOT DISTINCT FROM sec.b
   262  //   WHERE
   263  //     pri.k IS NULL OR sec.k IS NULL
   264  //
   265  // Explanation:
   266  //   1) We scan both the primary index and the secondary index.
   267  //
   268  //   2) We join them on equality on the PK columns and "IS NOT DISTINCT FROM" on
   269  //      the other index columns. "IS NOT DISTINCT FROM" is like equality except
   270  //      that NULL equals NULL; it is not needed for the PK columns because those
   271  //      can't be NULL.
   272  //
   273  //      Note: currently, only the PK columns will be used as join equality
   274  //      columns, but that is sufficient.
   275  //
   276  //   3) We select the "outer" rows (those that had no match), effectively
   277  //      achieving a "double" anti-join. We use the PK columns which cannot be
   278  //      NULL except on these rows.
   279  //
   280  //   4) The results are as follows:
   281  //       - if a PK column on the left is NULL, that means that the right-hand
   282  //         side row from the secondary index had no match in the primary index.
   283  //       - if a PK column on the right is NULL, that means that the left-hand
   284  //         side row from the primary key had no match in the secondary index.
   285  //
   286  func createIndexCheckQuery(
   287  	pkColumns []string, otherColumns []string, tableID sqlbase.ID, indexID sqlbase.IndexID,
   288  ) string {
   289  	allColumns := append(pkColumns, otherColumns...)
   290  	// We need to make sure we can handle the non-public column `rowid`
   291  	// that is created for implicit primary keys. In order to do so, the
   292  	// rendered columns need to explicit in the inner selects.
   293  	const checkIndexQuery = `
   294      SELECT %[1]s, %[2]s
   295      FROM
   296        (SELECT %[8]s FROM [%[3]d AS table_pri]@{FORCE_INDEX=[1]}) AS pri
   297      FULL OUTER JOIN
   298        (SELECT %[8]s FROM [%[3]d AS table_sec]@{FORCE_INDEX=[%[4]d]}) AS sec
   299      ON %[5]s
   300      WHERE %[6]s IS NULL OR %[7]s IS NULL`
   301  	return fmt.Sprintf(
   302  		checkIndexQuery,
   303  
   304  		// 1: pri.k, pri.l, pri.a, pri.b
   305  		strings.Join(colRefs("pri", allColumns), ", "),
   306  
   307  		// 2: sec.k, sec.l, sec.a, sec.b
   308  		strings.Join(colRefs("sec", allColumns), ", "),
   309  
   310  		// 3
   311  		tableID,
   312  
   313  		// 4
   314  		indexID,
   315  
   316  		// 5: pri.k = sec.k AND pri.l = sec.l AND
   317  		//    pri.a IS NOT DISTINCT FROM sec.a AND pri.b IS NOT DISTINCT FROM sec.b
   318  		// Note: otherColumns can be empty.
   319  		strings.Join(
   320  			append(
   321  				pairwiseOp(colRefs("pri", pkColumns), colRefs("sec", pkColumns), "="),
   322  				pairwiseOp(colRefs("pri", otherColumns), colRefs("sec", otherColumns), "IS NOT DISTINCT FROM")...,
   323  			),
   324  			" AND ",
   325  		),
   326  
   327  		// 6: pri.k
   328  		colRef("pri", pkColumns[0]),
   329  
   330  		// 7: sec.k
   331  		colRef("sec", pkColumns[0]),
   332  
   333  		// 8: k, l, a, b
   334  		strings.Join(colRefs("", append(pkColumns, otherColumns...)), ", "),
   335  	)
   336  }