github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/scrub_index.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/sql/scrub" 20 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 21 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 ) 24 25 // indexCheckOperation implements the checkOperation interface. It is a 26 // scrub check for a secondary index's integrity. This operation will 27 // detect: 28 // 1) Missing index entries. When there is a secondary index entry 29 // expected, but is not found. 30 // 2) Dangling index references. When there is a secondary index entry 31 // that refers to a primary index key that cannot be found. 32 type indexCheckOperation struct { 33 tableName *tree.TableName 34 tableDesc *sqlbase.ImmutableTableDescriptor 35 indexDesc *sqlbase.IndexDescriptor 36 asOf hlc.Timestamp 37 38 // columns is a list of the columns returned by one side of the 39 // queries join. The actual resulting rows from the RowContainer is 40 // twice this. 41 columns []*sqlbase.ColumnDescriptor 42 // primaryColIdxs maps PrimaryIndex.Columns to the row 43 // indexes in the query result tree.Datums. 44 primaryColIdxs []int 45 46 run indexCheckRun 47 } 48 49 // indexCheckRun contains the run-time state for indexCheckOperation 50 // during local execution. 51 type indexCheckRun struct { 52 started bool 53 rows []tree.Datums 54 rowIndex int 55 } 56 57 func newIndexCheckOperation( 58 tableName *tree.TableName, 59 tableDesc *sqlbase.ImmutableTableDescriptor, 60 indexDesc *sqlbase.IndexDescriptor, 61 asOf hlc.Timestamp, 62 ) *indexCheckOperation { 63 return &indexCheckOperation{ 64 tableName: tableName, 65 tableDesc: tableDesc, 66 indexDesc: indexDesc, 67 asOf: asOf, 68 } 69 } 70 71 // Start will plan and run an index check using the distSQL execution 72 // engine. 73 func (o *indexCheckOperation) Start(params runParams) error { 74 ctx := params.ctx 75 76 colToIdx := make(map[sqlbase.ColumnID]int) 77 for i := range o.tableDesc.Columns { 78 id := o.tableDesc.Columns[i].ID 79 colToIdx[id] = i 80 } 81 82 var pkColumns, otherColumns []*sqlbase.ColumnDescriptor 83 84 for _, colID := range o.tableDesc.PrimaryIndex.ColumnIDs { 85 col := &o.tableDesc.Columns[colToIdx[colID]] 86 pkColumns = append(pkColumns, col) 87 colToIdx[colID] = -1 88 } 89 90 maybeAddOtherCol := func(colID sqlbase.ColumnID) { 91 pos := colToIdx[colID] 92 if pos == -1 { 93 // Skip PK column. 94 return 95 } 96 col := &o.tableDesc.Columns[pos] 97 otherColumns = append(otherColumns, col) 98 } 99 100 // Collect all of the columns we are fetching from the index. This 101 // includes the columns involved in the index: columns, extra columns, 102 // and store columns. 103 for _, colID := range o.indexDesc.ColumnIDs { 104 maybeAddOtherCol(colID) 105 } 106 for _, colID := range o.indexDesc.ExtraColumnIDs { 107 maybeAddOtherCol(colID) 108 } 109 for _, colID := range o.indexDesc.StoreColumnIDs { 110 maybeAddOtherCol(colID) 111 } 112 113 colNames := func(cols []*sqlbase.ColumnDescriptor) []string { 114 res := make([]string, len(cols)) 115 for i := range cols { 116 res[i] = cols[i].Name 117 } 118 return res 119 } 120 121 checkQuery := createIndexCheckQuery( 122 colNames(pkColumns), colNames(otherColumns), o.tableDesc.ID, o.indexDesc.ID, 123 ) 124 125 rows, err := params.extendedEvalCtx.ExecCfg.InternalExecutor.Query( 126 ctx, "scrub-index", params.p.txn, checkQuery, 127 ) 128 if err != nil { 129 return err 130 } 131 132 o.run.started = true 133 o.run.rows = rows 134 o.primaryColIdxs = make([]int, len(pkColumns)) 135 for i := range o.primaryColIdxs { 136 o.primaryColIdxs[i] = i 137 } 138 o.columns = append(pkColumns, otherColumns...) 139 return nil 140 } 141 142 // Next implements the checkOperation interface. 143 func (o *indexCheckOperation) Next(params runParams) (tree.Datums, error) { 144 row := o.run.rows[o.run.rowIndex] 145 o.run.rowIndex++ 146 147 // Check if this row has results from the left. See the comment above 148 // createIndexCheckQuery indicating why this is true. 149 var isMissingIndexReferenceError bool 150 if row[o.primaryColIdxs[0]] != tree.DNull { 151 isMissingIndexReferenceError = true 152 } 153 154 colLen := len(o.columns) 155 var errorType tree.Datum 156 var primaryKeyDatums tree.Datums 157 if isMissingIndexReferenceError { 158 errorType = tree.NewDString(scrub.MissingIndexEntryError) 159 // Fetch the primary index values from the primary index row data. 160 for _, rowIdx := range o.primaryColIdxs { 161 primaryKeyDatums = append(primaryKeyDatums, row[rowIdx]) 162 } 163 } else { 164 errorType = tree.NewDString(scrub.DanglingIndexReferenceError) 165 // Fetch the primary index values from the secondary index row 166 // data, because no primary index was found. The secondary index columns 167 // are offset by the length of the distinct columns, as the first 168 // set of columns is for the primary index. 169 for _, rowIdx := range o.primaryColIdxs { 170 primaryKeyDatums = append(primaryKeyDatums, row[rowIdx+colLen]) 171 } 172 } 173 primaryKey := tree.NewDString(primaryKeyDatums.String()) 174 timestamp, err := tree.MakeDTimestamp( 175 params.extendedEvalCtx.GetStmtTimestamp(), time.Nanosecond) 176 if err != nil { 177 return nil, err 178 } 179 180 details := make(map[string]interface{}) 181 rowDetails := make(map[string]interface{}) 182 details["row_data"] = rowDetails 183 details["index_name"] = o.indexDesc.Name 184 if isMissingIndexReferenceError { 185 // Fetch the primary index values from the primary index row data. 186 for rowIdx, col := range o.columns { 187 // TODO(joey): We should maybe try to get the underlying type. 188 rowDetails[col.Name] = row[rowIdx].String() 189 } 190 } else { 191 // Fetch the primary index values from the secondary index row data, 192 // because no primary index was found. The secondary index columns 193 // are offset by the length of the distinct columns, as the first 194 // set of columns is for the primary index. 195 for rowIdx, col := range o.columns { 196 // TODO(joey): We should maybe try to get the underlying type. 197 rowDetails[col.Name] = row[rowIdx+colLen].String() 198 } 199 } 200 201 detailsJSON, err := tree.MakeDJSON(details) 202 if err != nil { 203 return nil, err 204 } 205 206 return tree.Datums{ 207 // TODO(joey): Add the job UUID once the SCRUB command uses jobs. 208 tree.DNull, /* job_uuid */ 209 errorType, 210 tree.NewDString(o.tableName.Catalog()), 211 tree.NewDString(o.tableName.Table()), 212 primaryKey, 213 timestamp, 214 tree.DBoolFalse, 215 detailsJSON, 216 }, nil 217 } 218 219 // Started implements the checkOperation interface. 220 func (o *indexCheckOperation) Started() bool { 221 return o.run.started 222 } 223 224 // Done implements the checkOperation interface. 225 func (o *indexCheckOperation) Done(ctx context.Context) bool { 226 return o.run.rows == nil || o.run.rowIndex >= len(o.run.rows) 227 } 228 229 // Close4 implements the checkOperation interface. 230 func (o *indexCheckOperation) Close(ctx context.Context) { 231 o.run.rows = nil 232 } 233 234 // createIndexCheckQuery will make the index check query for a given 235 // table and secondary index. 236 // 237 // The primary column names and the rest of the index 238 // columnsIt will also take into account an AS OF 239 // SYSTEM TIME clause. 240 // 241 // For example, given the following table schema: 242 // 243 // CREATE TABLE table ( 244 // k INT, l INT, a INT, b INT, c INT, 245 // PRIMARY KEY (k, l), 246 // INDEX idx (a,b), 247 // ) 248 // 249 // The generated query to check the `v_idx` will be: 250 // 251 // SELECT pri.k pri.l, pri.a, pri.b, 252 // sec.k, sec.l, sec.a, sec.b 253 // FROM 254 // (SELECT k, l, a, b FROM [tbl_id AS table_pri]@{FORCE_INDEX=[1]}) AS pri 255 // FULL OUTER JOIN 256 // (SELECT k, l, a, b FROM [tbl_id AS table_sec]@{FORCE_INDEX=[idx_id]} AS sec 257 // ON 258 // pri.k = sec.k AND 259 // pri.l = sec.l AND 260 // pri.a IS NOT DISTINCT FROM sec.a AND 261 // pri.b IS NOT DISTINCT FROM sec.b 262 // WHERE 263 // pri.k IS NULL OR sec.k IS NULL 264 // 265 // Explanation: 266 // 1) We scan both the primary index and the secondary index. 267 // 268 // 2) We join them on equality on the PK columns and "IS NOT DISTINCT FROM" on 269 // the other index columns. "IS NOT DISTINCT FROM" is like equality except 270 // that NULL equals NULL; it is not needed for the PK columns because those 271 // can't be NULL. 272 // 273 // Note: currently, only the PK columns will be used as join equality 274 // columns, but that is sufficient. 275 // 276 // 3) We select the "outer" rows (those that had no match), effectively 277 // achieving a "double" anti-join. We use the PK columns which cannot be 278 // NULL except on these rows. 279 // 280 // 4) The results are as follows: 281 // - if a PK column on the left is NULL, that means that the right-hand 282 // side row from the secondary index had no match in the primary index. 283 // - if a PK column on the right is NULL, that means that the left-hand 284 // side row from the primary key had no match in the secondary index. 285 // 286 func createIndexCheckQuery( 287 pkColumns []string, otherColumns []string, tableID sqlbase.ID, indexID sqlbase.IndexID, 288 ) string { 289 allColumns := append(pkColumns, otherColumns...) 290 // We need to make sure we can handle the non-public column `rowid` 291 // that is created for implicit primary keys. In order to do so, the 292 // rendered columns need to explicit in the inner selects. 293 const checkIndexQuery = ` 294 SELECT %[1]s, %[2]s 295 FROM 296 (SELECT %[8]s FROM [%[3]d AS table_pri]@{FORCE_INDEX=[1]}) AS pri 297 FULL OUTER JOIN 298 (SELECT %[8]s FROM [%[3]d AS table_sec]@{FORCE_INDEX=[%[4]d]}) AS sec 299 ON %[5]s 300 WHERE %[6]s IS NULL OR %[7]s IS NULL` 301 return fmt.Sprintf( 302 checkIndexQuery, 303 304 // 1: pri.k, pri.l, pri.a, pri.b 305 strings.Join(colRefs("pri", allColumns), ", "), 306 307 // 2: sec.k, sec.l, sec.a, sec.b 308 strings.Join(colRefs("sec", allColumns), ", "), 309 310 // 3 311 tableID, 312 313 // 4 314 indexID, 315 316 // 5: pri.k = sec.k AND pri.l = sec.l AND 317 // pri.a IS NOT DISTINCT FROM sec.a AND pri.b IS NOT DISTINCT FROM sec.b 318 // Note: otherColumns can be empty. 319 strings.Join( 320 append( 321 pairwiseOp(colRefs("pri", pkColumns), colRefs("sec", pkColumns), "="), 322 pairwiseOp(colRefs("pri", otherColumns), colRefs("sec", otherColumns), "IS NOT DISTINCT FROM")..., 323 ), 324 " AND ", 325 ), 326 327 // 6: pri.k 328 colRef("pri", pkColumns[0]), 329 330 // 7: sec.k 331 colRef("sec", pkColumns[0]), 332 333 // 8: k, l, a, b 334 strings.Join(colRefs("", append(pkColumns, otherColumns...)), ", "), 335 ) 336 }