github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/rowexec/scrub_tablereader.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package rowexec 12 13 import ( 14 "bytes" 15 "context" 16 17 "github.com/cockroachdb/cockroach/pkg/roachpb" 18 "github.com/cockroachdb/cockroach/pkg/sql/execinfra" 19 "github.com/cockroachdb/cockroach/pkg/sql/execinfrapb" 20 "github.com/cockroachdb/cockroach/pkg/sql/row" 21 "github.com/cockroachdb/cockroach/pkg/sql/scrub" 22 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 24 "github.com/cockroachdb/cockroach/pkg/sql/types" 25 "github.com/cockroachdb/cockroach/pkg/util" 26 "github.com/cockroachdb/cockroach/pkg/util/log" 27 "github.com/cockroachdb/errors" 28 ) 29 30 // ScrubTypes is the schema for TableReaders that are doing a SCRUB 31 // check. This schema is what TableReader output streams are overrided 32 // to for check. The column types correspond to: 33 // - Error type. 34 // - Primary key as a string, if it was obtainable. 35 // - JSON of all decoded column values. 36 // 37 // TODO(joey): If we want a way find the key for the error, we will need 38 // additional data such as the key bytes and the table descriptor ID. 39 // Repair won't be possible without this. 40 var ScrubTypes = []*types.T{ 41 types.String, 42 types.String, 43 types.Jsonb, 44 } 45 46 type scrubTableReader struct { 47 tableReader 48 tableDesc sqlbase.TableDescriptor 49 // fetcherResultToColIdx maps Fetcher results to the column index in 50 // the TableDescriptor. This is only initialized and used during scrub 51 // physical checks. 52 fetcherResultToColIdx []int 53 // indexIdx refers to the index being scanned. This is only used 54 // during scrub physical checks. 55 indexIdx int 56 } 57 58 var _ execinfra.Processor = &scrubTableReader{} 59 var _ execinfra.RowSource = &scrubTableReader{} 60 61 var scrubTableReaderProcName = "scrub" 62 63 // newScrubTableReader creates a scrubTableReader. 64 func newScrubTableReader( 65 flowCtx *execinfra.FlowCtx, 66 processorID int32, 67 spec *execinfrapb.TableReaderSpec, 68 post *execinfrapb.PostProcessSpec, 69 output execinfra.RowReceiver, 70 ) (*scrubTableReader, error) { 71 // NB: we hit this with a zero NodeID (but !ok) with multi-tenancy. 72 if nodeID, ok := flowCtx.NodeID.OptionalNodeID(); nodeID == 0 && ok { 73 return nil, errors.Errorf("attempting to create a tableReader with uninitialized NodeID") 74 } 75 tr := &scrubTableReader{ 76 indexIdx: int(spec.IndexIdx), 77 } 78 79 tr.tableDesc = spec.Table 80 tr.limitHint = execinfra.LimitHint(spec.LimitHint, post) 81 82 if err := tr.Init( 83 tr, 84 post, 85 ScrubTypes, 86 flowCtx, 87 processorID, 88 output, 89 nil, /* memMonitor */ 90 execinfra.ProcStateOpts{ 91 // We don't pass tr.input as an inputToDrain; tr.input is just an adapter 92 // on top of a Fetcher; draining doesn't apply to it. Moreover, Andrei 93 // doesn't trust that the adapter will do the right thing on a Next() call 94 // after it had previously returned an error. 95 InputsToDrain: nil, 96 TrailingMetaCallback: tr.generateTrailingMeta, 97 }, 98 ); err != nil { 99 return nil, err 100 } 101 102 var neededColumns util.FastIntSet 103 // If we are doing a scrub physical check, NeededColumns needs to be 104 // changed to be all columns available in the index we are scanning. 105 // This is because the emitted schema is ScrubTypes so NeededColumns 106 // does not correctly represent the data being scanned. 107 if spec.IndexIdx == 0 { 108 neededColumns.AddRange(0, len(spec.Table.Columns)-1) 109 for i := range spec.Table.Columns { 110 tr.fetcherResultToColIdx = append(tr.fetcherResultToColIdx, i) 111 } 112 } else { 113 colIdxMap := spec.Table.ColumnIdxMap() 114 err := spec.Table.Indexes[spec.IndexIdx-1].RunOverAllColumns(func(id sqlbase.ColumnID) error { 115 neededColumns.Add(colIdxMap[id]) 116 return nil 117 }) 118 if err != nil { 119 return nil, err 120 } 121 } 122 123 var fetcher row.Fetcher 124 if _, _, err := initRowFetcher( 125 flowCtx, &fetcher, &tr.tableDesc, int(spec.IndexIdx), tr.tableDesc.ColumnIdxMap(), 126 spec.Reverse, neededColumns, true /* isCheck */, &tr.alloc, 127 execinfra.ScanVisibilityPublic, spec.LockingStrength, 128 ); err != nil { 129 return nil, err 130 } 131 tr.fetcher = &fetcher 132 133 tr.spans = make(roachpb.Spans, len(spec.Spans)) 134 for i, s := range spec.Spans { 135 tr.spans[i] = s.Span 136 } 137 138 return tr, nil 139 } 140 141 // generateScrubErrorRow will create an EncDatumRow describing a 142 // physical check error encountered when scanning table data. The schema 143 // of the EncDatumRow is the ScrubTypes constant. 144 func (tr *scrubTableReader) generateScrubErrorRow( 145 row sqlbase.EncDatumRow, scrubErr *scrub.Error, 146 ) (sqlbase.EncDatumRow, error) { 147 details := make(map[string]interface{}) 148 var index *sqlbase.IndexDescriptor 149 if tr.indexIdx == 0 { 150 index = &tr.tableDesc.PrimaryIndex 151 } else { 152 index = &tr.tableDesc.Indexes[tr.indexIdx-1] 153 } 154 // Collect all the row values into JSON 155 rowDetails := make(map[string]interface{}) 156 for i, colIdx := range tr.fetcherResultToColIdx { 157 col := tr.tableDesc.Columns[colIdx] 158 // TODO(joey): We should maybe try to get the underlying type. 159 rowDetails[col.Name] = row[i].String(col.Type) 160 } 161 details["row_data"] = rowDetails 162 details["index_name"] = index.Name 163 details["error_message"] = scrub.UnwrapScrubError(error(scrubErr)).Error() 164 165 detailsJSON, err := tree.MakeDJSON(details) 166 if err != nil { 167 return nil, err 168 } 169 170 primaryKeyValues := tr.prettyPrimaryKeyValues(row, &tr.tableDesc) 171 return sqlbase.EncDatumRow{ 172 sqlbase.DatumToEncDatum( 173 ScrubTypes[0], 174 tree.NewDString(scrubErr.Code), 175 ), 176 sqlbase.DatumToEncDatum( 177 ScrubTypes[1], 178 tree.NewDString(primaryKeyValues), 179 ), 180 sqlbase.DatumToEncDatum( 181 ScrubTypes[2], 182 detailsJSON, 183 ), 184 }, nil 185 } 186 187 func (tr *scrubTableReader) prettyPrimaryKeyValues( 188 row sqlbase.EncDatumRow, table *sqlbase.TableDescriptor, 189 ) string { 190 colIdxMap := make(map[sqlbase.ColumnID]int, len(table.Columns)) 191 for i := range table.Columns { 192 id := table.Columns[i].ID 193 colIdxMap[id] = i 194 } 195 colIDToRowIdxMap := make(map[sqlbase.ColumnID]int, len(table.Columns)) 196 for rowIdx, colIdx := range tr.fetcherResultToColIdx { 197 colIDToRowIdxMap[tr.tableDesc.Columns[colIdx].ID] = rowIdx 198 } 199 var primaryKeyValues bytes.Buffer 200 primaryKeyValues.WriteByte('(') 201 for i, id := range table.PrimaryIndex.ColumnIDs { 202 if i > 0 { 203 primaryKeyValues.WriteByte(',') 204 } 205 primaryKeyValues.WriteString( 206 row[colIDToRowIdxMap[id]].String(table.Columns[colIdxMap[id]].Type)) 207 } 208 primaryKeyValues.WriteByte(')') 209 return primaryKeyValues.String() 210 } 211 212 // Start is part of the RowSource interface. 213 func (tr *scrubTableReader) Start(ctx context.Context) context.Context { 214 if tr.FlowCtx.Txn == nil { 215 tr.MoveToDraining(errors.Errorf("scrubTableReader outside of txn")) 216 } 217 218 ctx = tr.StartInternal(ctx, scrubTableReaderProcName) 219 220 log.VEventf(ctx, 1, "starting") 221 222 if err := tr.fetcher.StartScan( 223 ctx, tr.FlowCtx.Txn, tr.spans, 224 true /* limit batches */, tr.limitHint, tr.FlowCtx.TraceKV, 225 ); err != nil { 226 tr.MoveToDraining(err) 227 } 228 229 return ctx 230 } 231 232 // Next is part of the RowSource interface. 233 func (tr *scrubTableReader) Next() (sqlbase.EncDatumRow, *execinfrapb.ProducerMetadata) { 234 for tr.State == execinfra.StateRunning { 235 var row sqlbase.EncDatumRow 236 var err error 237 // If we are running a scrub physical check, we use a specialized 238 // procedure that runs additional checks while fetching the row 239 // data. 240 row, err = tr.fetcher.NextRowWithErrors(tr.Ctx) 241 // There are four cases that can happen after NextRowWithErrors: 242 // 1) We encounter a ScrubError. We do not propagate the error up, 243 // but instead generate and emit a row for the final results. 244 // 2) No errors were found. We simply continue scanning the data 245 // and discard the row values, as they are not needed for any 246 // results. 247 // 3) A non-scrub error was encountered. This was not considered a 248 // physical data error, and so we propagate this to the user 249 // immediately. 250 // 4) There was no error or row data. This signals that there is 251 // no more data to scan. 252 // 253 // NB: Cases 3 and 4 are handled further below, in the standard 254 // table scanning code path. 255 var v *scrub.Error 256 if errors.As(err, &v) { 257 row, err = tr.generateScrubErrorRow(row, v) 258 } else if err == nil && row != nil { 259 continue 260 } 261 if row == nil || err != nil { 262 tr.MoveToDraining(scrub.UnwrapScrubError(err)) 263 break 264 } 265 266 if outRow := tr.ProcessRowHelper(row); outRow != nil { 267 return outRow, nil 268 } 269 } 270 return nil, tr.DrainHelper() 271 }