github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/scrub_tablereader.go

github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/scrub_tablereader.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package rowexec
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/execinfra"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/execinfrapb"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/row"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/scrub"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    23  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    24  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    25  	"github.com/cockroachdb/cockroach/pkg/util"
    26  	"github.com/cockroachdb/cockroach/pkg/util/log"
    27  	"github.com/cockroachdb/errors"
    28  )
    29  
    30  // ScrubTypes is the schema for TableReaders that are doing a SCRUB
    31  // check. This schema is what TableReader output streams are overrided
    32  // to for check. The column types correspond to:
    33  // - Error type.
    34  // - Primary key as a string, if it was obtainable.
    35  // - JSON of all decoded column values.
    36  //
    37  // TODO(joey): If we want a way find the key for the error, we will need
    38  // additional data such as the key bytes and the table descriptor ID.
    39  // Repair won't be possible without this.
    40  var ScrubTypes = []*types.T{
    41  	types.String,
    42  	types.String,
    43  	types.Jsonb,
    44  }
    45  
    46  type scrubTableReader struct {
    47  	tableReader
    48  	tableDesc sqlbase.TableDescriptor
    49  	// fetcherResultToColIdx maps Fetcher results to the column index in
    50  	// the TableDescriptor. This is only initialized and used during scrub
    51  	// physical checks.
    52  	fetcherResultToColIdx []int
    53  	// indexIdx refers to the index being scanned. This is only used
    54  	// during scrub physical checks.
    55  	indexIdx int
    56  }
    57  
    58  var _ execinfra.Processor = &scrubTableReader{}
    59  var _ execinfra.RowSource = &scrubTableReader{}
    60  
    61  var scrubTableReaderProcName = "scrub"
    62  
    63  // newScrubTableReader creates a scrubTableReader.
    64  func newScrubTableReader(
    65  	flowCtx *execinfra.FlowCtx,
    66  	processorID int32,
    67  	spec *execinfrapb.TableReaderSpec,
    68  	post *execinfrapb.PostProcessSpec,
    69  	output execinfra.RowReceiver,
    70  ) (*scrubTableReader, error) {
    71  	// NB: we hit this with a zero NodeID (but !ok) with multi-tenancy.
    72  	if nodeID, ok := flowCtx.NodeID.OptionalNodeID(); nodeID == 0 && ok {
    73  		return nil, errors.Errorf("attempting to create a tableReader with uninitialized NodeID")
    74  	}
    75  	tr := &scrubTableReader{
    76  		indexIdx: int(spec.IndexIdx),
    77  	}
    78  
    79  	tr.tableDesc = spec.Table
    80  	tr.limitHint = execinfra.LimitHint(spec.LimitHint, post)
    81  
    82  	if err := tr.Init(
    83  		tr,
    84  		post,
    85  		ScrubTypes,
    86  		flowCtx,
    87  		processorID,
    88  		output,
    89  		nil, /* memMonitor */
    90  		execinfra.ProcStateOpts{
    91  			// We don't pass tr.input as an inputToDrain; tr.input is just an adapter
    92  			// on top of a Fetcher; draining doesn't apply to it. Moreover, Andrei
    93  			// doesn't trust that the adapter will do the right thing on a Next() call
    94  			// after it had previously returned an error.
    95  			InputsToDrain:        nil,
    96  			TrailingMetaCallback: tr.generateTrailingMeta,
    97  		},
    98  	); err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	var neededColumns util.FastIntSet
   103  	// If we are doing a scrub physical check, NeededColumns needs to be
   104  	// changed to be all columns available in the index we are scanning.
   105  	// This is because the emitted schema is ScrubTypes so NeededColumns
   106  	// does not correctly represent the data being scanned.
   107  	if spec.IndexIdx == 0 {
   108  		neededColumns.AddRange(0, len(spec.Table.Columns)-1)
   109  		for i := range spec.Table.Columns {
   110  			tr.fetcherResultToColIdx = append(tr.fetcherResultToColIdx, i)
   111  		}
   112  	} else {
   113  		colIdxMap := spec.Table.ColumnIdxMap()
   114  		err := spec.Table.Indexes[spec.IndexIdx-1].RunOverAllColumns(func(id sqlbase.ColumnID) error {
   115  			neededColumns.Add(colIdxMap[id])
   116  			return nil
   117  		})
   118  		if err != nil {
   119  			return nil, err
   120  		}
   121  	}
   122  
   123  	var fetcher row.Fetcher
   124  	if _, _, err := initRowFetcher(
   125  		flowCtx, &fetcher, &tr.tableDesc, int(spec.IndexIdx), tr.tableDesc.ColumnIdxMap(),
   126  		spec.Reverse, neededColumns, true /* isCheck */, &tr.alloc,
   127  		execinfra.ScanVisibilityPublic, spec.LockingStrength,
   128  	); err != nil {
   129  		return nil, err
   130  	}
   131  	tr.fetcher = &fetcher
   132  
   133  	tr.spans = make(roachpb.Spans, len(spec.Spans))
   134  	for i, s := range spec.Spans {
   135  		tr.spans[i] = s.Span
   136  	}
   137  
   138  	return tr, nil
   139  }
   140  
   141  // generateScrubErrorRow will create an EncDatumRow describing a
   142  // physical check error encountered when scanning table data. The schema
   143  // of the EncDatumRow is the ScrubTypes constant.
   144  func (tr *scrubTableReader) generateScrubErrorRow(
   145  	row sqlbase.EncDatumRow, scrubErr *scrub.Error,
   146  ) (sqlbase.EncDatumRow, error) {
   147  	details := make(map[string]interface{})
   148  	var index *sqlbase.IndexDescriptor
   149  	if tr.indexIdx == 0 {
   150  		index = &tr.tableDesc.PrimaryIndex
   151  	} else {
   152  		index = &tr.tableDesc.Indexes[tr.indexIdx-1]
   153  	}
   154  	// Collect all the row values into JSON
   155  	rowDetails := make(map[string]interface{})
   156  	for i, colIdx := range tr.fetcherResultToColIdx {
   157  		col := tr.tableDesc.Columns[colIdx]
   158  		// TODO(joey): We should maybe try to get the underlying type.
   159  		rowDetails[col.Name] = row[i].String(col.Type)
   160  	}
   161  	details["row_data"] = rowDetails
   162  	details["index_name"] = index.Name
   163  	details["error_message"] = scrub.UnwrapScrubError(error(scrubErr)).Error()
   164  
   165  	detailsJSON, err := tree.MakeDJSON(details)
   166  	if err != nil {
   167  		return nil, err
   168  	}
   169  
   170  	primaryKeyValues := tr.prettyPrimaryKeyValues(row, &tr.tableDesc)
   171  	return sqlbase.EncDatumRow{
   172  		sqlbase.DatumToEncDatum(
   173  			ScrubTypes[0],
   174  			tree.NewDString(scrubErr.Code),
   175  		),
   176  		sqlbase.DatumToEncDatum(
   177  			ScrubTypes[1],
   178  			tree.NewDString(primaryKeyValues),
   179  		),
   180  		sqlbase.DatumToEncDatum(
   181  			ScrubTypes[2],
   182  			detailsJSON,
   183  		),
   184  	}, nil
   185  }
   186  
   187  func (tr *scrubTableReader) prettyPrimaryKeyValues(
   188  	row sqlbase.EncDatumRow, table *sqlbase.TableDescriptor,
   189  ) string {
   190  	colIdxMap := make(map[sqlbase.ColumnID]int, len(table.Columns))
   191  	for i := range table.Columns {
   192  		id := table.Columns[i].ID
   193  		colIdxMap[id] = i
   194  	}
   195  	colIDToRowIdxMap := make(map[sqlbase.ColumnID]int, len(table.Columns))
   196  	for rowIdx, colIdx := range tr.fetcherResultToColIdx {
   197  		colIDToRowIdxMap[tr.tableDesc.Columns[colIdx].ID] = rowIdx
   198  	}
   199  	var primaryKeyValues bytes.Buffer
   200  	primaryKeyValues.WriteByte('(')
   201  	for i, id := range table.PrimaryIndex.ColumnIDs {
   202  		if i > 0 {
   203  			primaryKeyValues.WriteByte(',')
   204  		}
   205  		primaryKeyValues.WriteString(
   206  			row[colIDToRowIdxMap[id]].String(table.Columns[colIdxMap[id]].Type))
   207  	}
   208  	primaryKeyValues.WriteByte(')')
   209  	return primaryKeyValues.String()
   210  }
   211  
   212  // Start is part of the RowSource interface.
   213  func (tr *scrubTableReader) Start(ctx context.Context) context.Context {
   214  	if tr.FlowCtx.Txn == nil {
   215  		tr.MoveToDraining(errors.Errorf("scrubTableReader outside of txn"))
   216  	}
   217  
   218  	ctx = tr.StartInternal(ctx, scrubTableReaderProcName)
   219  
   220  	log.VEventf(ctx, 1, "starting")
   221  
   222  	if err := tr.fetcher.StartScan(
   223  		ctx, tr.FlowCtx.Txn, tr.spans,
   224  		true /* limit batches */, tr.limitHint, tr.FlowCtx.TraceKV,
   225  	); err != nil {
   226  		tr.MoveToDraining(err)
   227  	}
   228  
   229  	return ctx
   230  }
   231  
   232  // Next is part of the RowSource interface.
   233  func (tr *scrubTableReader) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) {
   234  	for tr.State == execinfra.StateRunning {
   235  		var row sqlbase.EncDatumRow
   236  		var err error
   237  		// If we are running a scrub physical check, we use a specialized
   238  		// procedure that runs additional checks while fetching the row
   239  		// data.
   240  		row, err = tr.fetcher.NextRowWithErrors(tr.Ctx)
   241  		// There are four cases that can happen after NextRowWithErrors:
   242  		// 1) We encounter a ScrubError. We do not propagate the error up,
   243  		//    but instead generate and emit a row for the final results.
   244  		// 2) No errors were found. We simply continue scanning the data
   245  		//    and discard the row values, as they are not needed for any
   246  		//    results.
   247  		// 3) A non-scrub error was encountered. This was not considered a
   248  		//    physical data error, and so we propagate this to the user
   249  		//    immediately.
   250  		// 4) There was no error or row data. This signals that there is
   251  		//    no more data to scan.
   252  		//
   253  		// NB: Cases 3 and 4 are handled further below, in the standard
   254  		// table scanning code path.
   255  		var v *scrub.Error
   256  		if errors.As(err, &v) {
   257  			row, err = tr.generateScrubErrorRow(row, v)
   258  		} else if err == nil && row != nil {
   259  			continue
   260  		}
   261  		if row == nil || err != nil {
   262  			tr.MoveToDraining(scrub.UnwrapScrubError(err))
   263  			break
   264  		}
   265  
   266  		if outRow := tr.ProcessRowHelper(row); outRow != nil {
   267  			return outRow, nil
   268  		}
   269  	}
   270  	return nil, tr.DrainHelper()
   271  }