github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/partitionccl/partition.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package partitionccl 10 11 import ( 12 "context" 13 "strings" 14 15 "github.com/cockroachdb/cockroach/pkg/ccl/utilccl" 16 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 17 "github.com/cockroachdb/cockroach/pkg/sql" 18 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" 19 "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" 20 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 21 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 22 "github.com/cockroachdb/cockroach/pkg/sql/types" 23 "github.com/cockroachdb/cockroach/pkg/util/encoding" 24 "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented" 25 "github.com/cockroachdb/errors" 26 ) 27 28 // valueEncodePartitionTuple typechecks the datums in maybeTuple. It returns the 29 // concatenation of these datums, each encoded using the table "value" encoding. 30 // The special values of DEFAULT (for list) and MAXVALUE (for range) are encoded 31 // as NOT NULL. 32 // 33 // TODO(dan): The typechecking here should be run during plan construction, so 34 // we can support placeholders. 35 func valueEncodePartitionTuple( 36 typ tree.PartitionByType, 37 evalCtx *tree.EvalContext, 38 maybeTuple tree.Expr, 39 cols []sqlbase.ColumnDescriptor, 40 ) ([]byte, error) { 41 // Replace any occurrences of the MINVALUE/MAXVALUE pseudo-names 42 // into MinVal and MaxVal, to be recognized below. 43 // We are operating in a context where the expressions cannot 44 // refer to table columns, so these two names are unambiguously 45 // referring to the desired partition boundaries. 46 maybeTuple, _ = tree.WalkExpr(replaceMinMaxValVisitor{}, maybeTuple) 47 48 tuple, ok := maybeTuple.(*tree.Tuple) 49 if !ok { 50 // If we don't already have a tuple, promote whatever we have to a 1-tuple. 51 tuple = &tree.Tuple{Exprs: []tree.Expr{maybeTuple}} 52 } 53 54 if len(tuple.Exprs) != len(cols) { 55 return nil, errors.Errorf("partition has %d columns but %d values were supplied", 56 len(cols), len(tuple.Exprs)) 57 } 58 59 var value, scratch []byte 60 for i, expr := range tuple.Exprs { 61 expr = tree.StripParens(expr) 62 switch expr.(type) { 63 case tree.DefaultVal: 64 if typ != tree.PartitionByList { 65 return nil, errors.Errorf("%s cannot be used with PARTITION BY %s", expr, typ) 66 } 67 // NOT NULL is used to signal that a PartitionSpecialValCode follows. 68 value = encoding.EncodeNotNullValue(value, encoding.NoColumnID) 69 value = encoding.EncodeNonsortingUvarint(value, uint64(sqlbase.PartitionDefaultVal)) 70 continue 71 case tree.PartitionMinVal: 72 if typ != tree.PartitionByRange { 73 return nil, errors.Errorf("%s cannot be used with PARTITION BY %s", expr, typ) 74 } 75 // NOT NULL is used to signal that a PartitionSpecialValCode follows. 76 value = encoding.EncodeNotNullValue(value, encoding.NoColumnID) 77 value = encoding.EncodeNonsortingUvarint(value, uint64(sqlbase.PartitionMinVal)) 78 continue 79 case tree.PartitionMaxVal: 80 if typ != tree.PartitionByRange { 81 return nil, errors.Errorf("%s cannot be used with PARTITION BY %s", expr, typ) 82 } 83 // NOT NULL is used to signal that a PartitionSpecialValCode follows. 84 value = encoding.EncodeNotNullValue(value, encoding.NoColumnID) 85 value = encoding.EncodeNonsortingUvarint(value, uint64(sqlbase.PartitionMaxVal)) 86 continue 87 case *tree.Placeholder: 88 return nil, unimplemented.NewWithIssuef( 89 19464, "placeholders are not supported in PARTITION BY") 90 default: 91 // Fall-through. 92 } 93 94 var semaCtx tree.SemaContext 95 typedExpr, err := sqlbase.SanitizeVarFreeExpr(evalCtx.Context, expr, cols[i].Type, "partition", 96 &semaCtx, false /* allowImpure */) 97 if err != nil { 98 return nil, err 99 } 100 if !tree.IsConst(evalCtx, typedExpr) { 101 return nil, pgerror.Newf(pgcode.Syntax, 102 "%s: partition values must be constant", typedExpr) 103 } 104 datum, err := typedExpr.Eval(evalCtx) 105 if err != nil { 106 return nil, errors.Wrapf(err, "evaluating %s", typedExpr) 107 } 108 if err := sqlbase.CheckDatumTypeFitsColumnType(&cols[i], datum.ResolvedType()); err != nil { 109 return nil, err 110 } 111 value, err = sqlbase.EncodeTableValue( 112 value, sqlbase.ColumnID(encoding.NoColumnID), datum, scratch, 113 ) 114 if err != nil { 115 return nil, err 116 } 117 } 118 return value, nil 119 } 120 121 // replaceMinMaxValVisitor replaces occurrences of the unqualified 122 // identifiers "minvalue" and "maxvalue" in the partitioning 123 // (sub-)exprs by the symbolic values tree.PartitionMinVal and 124 // tree.PartitionMaxVal. 125 type replaceMinMaxValVisitor struct{} 126 127 // VisitPre satisfies the tree.Visitor interface. 128 func (v replaceMinMaxValVisitor) VisitPre(expr tree.Expr) (recurse bool, newExpr tree.Expr) { 129 if t, ok := expr.(*tree.UnresolvedName); ok && t.NumParts == 1 { 130 switch t.Parts[0] { 131 case "minvalue": 132 return false, tree.PartitionMinVal{} 133 case "maxvalue": 134 return false, tree.PartitionMaxVal{} 135 } 136 } 137 return true, expr 138 } 139 140 // VisitPost satisfies the Visitor interface. 141 func (replaceMinMaxValVisitor) VisitPost(expr tree.Expr) tree.Expr { return expr } 142 143 func createPartitioningImpl( 144 ctx context.Context, 145 evalCtx *tree.EvalContext, 146 tableDesc *sqlbase.MutableTableDescriptor, 147 indexDesc *sqlbase.IndexDescriptor, 148 partBy *tree.PartitionBy, 149 colOffset int, 150 ) (sqlbase.PartitioningDescriptor, error) { 151 partDesc := sqlbase.PartitioningDescriptor{} 152 if partBy == nil { 153 return partDesc, nil 154 } 155 partDesc.NumColumns = uint32(len(partBy.Fields)) 156 157 partitioningString := func() string { 158 // We don't have the fields for our parent partitions handy, but we can use 159 // the names from the index we're partitioning. They must have matched or we 160 // would have already returned an error. 161 partCols := append([]string(nil), indexDesc.ColumnNames[:colOffset]...) 162 for _, p := range partBy.Fields { 163 partCols = append(partCols, string(p)) 164 } 165 return strings.Join(partCols, ", ") 166 } 167 168 var cols []sqlbase.ColumnDescriptor 169 for i := 0; i < len(partBy.Fields); i++ { 170 if colOffset+i >= len(indexDesc.ColumnNames) { 171 return partDesc, pgerror.Newf(pgcode.Syntax, 172 "declared partition columns (%s) exceed the number of columns in index being partitioned (%s)", 173 partitioningString(), strings.Join(indexDesc.ColumnNames, ", ")) 174 } 175 // Search by name because some callsites of this method have not 176 // allocated ids yet (so they are still all the 0 value). 177 col, err := tableDesc.FindActiveColumnByName(indexDesc.ColumnNames[colOffset+i]) 178 if err != nil { 179 return partDesc, err 180 } 181 cols = append(cols, *col) 182 if string(partBy.Fields[i]) != col.Name { 183 // This used to print the first `colOffset + len(partBy.Fields)` fields 184 // but there might not be this many columns in the index. See #37682. 185 n := colOffset + i + 1 186 return partDesc, pgerror.Newf(pgcode.Syntax, 187 "declared partition columns (%s) do not match first %d columns in index being partitioned (%s)", 188 partitioningString(), n, strings.Join(indexDesc.ColumnNames[:n], ", ")) 189 } 190 } 191 192 for _, l := range partBy.List { 193 p := sqlbase.PartitioningDescriptor_List{ 194 Name: string(l.Name), 195 } 196 for _, expr := range l.Exprs { 197 encodedTuple, err := valueEncodePartitionTuple( 198 tree.PartitionByList, evalCtx, expr, cols) 199 if err != nil { 200 return partDesc, errors.Wrapf(err, "PARTITION %s", p.Name) 201 } 202 p.Values = append(p.Values, encodedTuple) 203 } 204 if l.Subpartition != nil { 205 newColOffset := colOffset + int(partDesc.NumColumns) 206 subpartitioning, err := createPartitioningImpl( 207 ctx, evalCtx, tableDesc, indexDesc, l.Subpartition, newColOffset) 208 if err != nil { 209 return partDesc, err 210 } 211 p.Subpartitioning = subpartitioning 212 } 213 partDesc.List = append(partDesc.List, p) 214 } 215 216 for _, r := range partBy.Range { 217 p := sqlbase.PartitioningDescriptor_Range{ 218 Name: string(r.Name), 219 } 220 var err error 221 p.FromInclusive, err = valueEncodePartitionTuple( 222 tree.PartitionByRange, evalCtx, &tree.Tuple{Exprs: r.From}, cols) 223 if err != nil { 224 return partDesc, errors.Wrapf(err, "PARTITION %s", p.Name) 225 } 226 p.ToExclusive, err = valueEncodePartitionTuple( 227 tree.PartitionByRange, evalCtx, &tree.Tuple{Exprs: r.To}, cols) 228 if err != nil { 229 return partDesc, errors.Wrapf(err, "PARTITION %s", p.Name) 230 } 231 if r.Subpartition != nil { 232 return partDesc, errors.Newf("PARTITION %s: cannot subpartition a range partition", p.Name) 233 } 234 partDesc.Range = append(partDesc.Range, p) 235 } 236 237 return partDesc, nil 238 } 239 240 // createPartitioning constructs the partitioning descriptor for an index that 241 // is partitioned into ranges, each addressable by zone configs. 242 func createPartitioning( 243 ctx context.Context, 244 st *cluster.Settings, 245 evalCtx *tree.EvalContext, 246 tableDesc *sqlbase.MutableTableDescriptor, 247 indexDesc *sqlbase.IndexDescriptor, 248 partBy *tree.PartitionBy, 249 ) (sqlbase.PartitioningDescriptor, error) { 250 org := sql.ClusterOrganization.Get(&st.SV) 251 if err := utilccl.CheckEnterpriseEnabled(st, evalCtx.ClusterID, org, "partitions"); err != nil { 252 return sqlbase.PartitioningDescriptor{}, err 253 } 254 255 return createPartitioningImpl( 256 ctx, evalCtx, tableDesc, indexDesc, partBy, 0 /* colOffset */) 257 } 258 259 // selectPartitionExprs constructs an expression for selecting all rows in the 260 // given partitions. 261 func selectPartitionExprs( 262 evalCtx *tree.EvalContext, tableDesc *sqlbase.TableDescriptor, partNames tree.NameList, 263 ) (tree.Expr, error) { 264 exprsByPartName := make(map[string]tree.TypedExpr) 265 for _, partName := range partNames { 266 exprsByPartName[string(partName)] = nil 267 } 268 269 a := &sqlbase.DatumAlloc{} 270 var prefixDatums []tree.Datum 271 if err := tableDesc.ForeachNonDropIndex(func(idxDesc *sqlbase.IndexDescriptor) error { 272 genExpr := true 273 return selectPartitionExprsByName( 274 a, evalCtx, tableDesc, idxDesc, &idxDesc.Partitioning, prefixDatums, exprsByPartName, genExpr) 275 }); err != nil { 276 return nil, err 277 } 278 279 var expr tree.TypedExpr = tree.DBoolFalse 280 for _, partName := range partNames { 281 partExpr, ok := exprsByPartName[string(partName)] 282 if !ok || partExpr == nil { 283 return nil, errors.Errorf("unknown partition: %s", partName) 284 } 285 expr = tree.NewTypedOrExpr(expr, partExpr) 286 } 287 288 var err error 289 expr, err = evalCtx.NormalizeExpr(expr) 290 if err != nil { 291 return nil, err 292 } 293 // In order to typecheck during simplification and normalization, we used 294 // dummy IndexVars. Swap them out for actual column references. 295 finalExpr, err := tree.SimpleVisit(expr, func(e tree.Expr) (recurse bool, newExpr tree.Expr, _ error) { 296 if ivar, ok := e.(*tree.IndexedVar); ok { 297 col, err := tableDesc.FindColumnByID(sqlbase.ColumnID(ivar.Idx)) 298 if err != nil { 299 return false, nil, err 300 } 301 return false, &tree.ColumnItem{ColumnName: tree.Name(col.Name)}, nil 302 } 303 return true, e, nil 304 }) 305 return finalExpr, err 306 } 307 308 // selectPartitionExprsByName constructs an expression for selecting all rows in 309 // each partition and subpartition in the given index. To make it easy to 310 // simplify and normalize the exprs, references to table columns are represented 311 // as TypedOrdinalReferences with an ordinal of the column ID. 312 // 313 // NB Subpartitions do not affect the expression for their parent partitions. So 314 // if a partition foo (a=3) is then subpartitiond by (b=5) and no DEFAULT, the 315 // expression for foo is still `a=3`, not `a=3 AND b=5`. This means that if some 316 // partition is requested, we can omit all of the subpartitions, because they'll 317 // also necessarily select subsets of the rows it will. "requested" here is 318 // indicated by the caller by setting the corresponding name in the 319 // `exprsByPartName` map to nil. In this case, `genExpr` is then set to false 320 // for subpartitions of this call, which causes each subpartition to only 321 // register itself in the map with a placeholder entry (so we can still verify 322 // that the requested partitions are all valid). 323 func selectPartitionExprsByName( 324 a *sqlbase.DatumAlloc, 325 evalCtx *tree.EvalContext, 326 tableDesc *sqlbase.TableDescriptor, 327 idxDesc *sqlbase.IndexDescriptor, 328 partDesc *sqlbase.PartitioningDescriptor, 329 prefixDatums tree.Datums, 330 exprsByPartName map[string]tree.TypedExpr, 331 genExpr bool, 332 ) error { 333 if partDesc.NumColumns == 0 { 334 return nil 335 } 336 337 // Setting genExpr to false skips the expression generation and only 338 // registers each descendent partition in the map with a placeholder entry. 339 if !genExpr { 340 for _, l := range partDesc.List { 341 exprsByPartName[l.Name] = tree.DBoolFalse 342 var fakeDatums tree.Datums 343 if err := selectPartitionExprsByName( 344 a, evalCtx, tableDesc, idxDesc, &l.Subpartitioning, fakeDatums, exprsByPartName, genExpr, 345 ); err != nil { 346 return err 347 } 348 } 349 for _, r := range partDesc.Range { 350 exprsByPartName[r.Name] = tree.DBoolFalse 351 } 352 return nil 353 } 354 355 var colVars tree.Exprs 356 { 357 // The recursive calls of selectPartitionExprsByName don't pass though 358 // the column ordinal references, so reconstruct them here. 359 colVars = make(tree.Exprs, len(prefixDatums)+int(partDesc.NumColumns)) 360 for i := range colVars { 361 col, err := tableDesc.FindActiveColumnByID(idxDesc.ColumnIDs[i]) 362 if err != nil { 363 return err 364 } 365 colVars[i] = tree.NewTypedOrdinalReference(int(col.ID), col.Type) 366 } 367 } 368 369 if len(partDesc.List) > 0 { 370 type exprAndPartName struct { 371 expr tree.TypedExpr 372 name string 373 } 374 // Any partitions using DEFAULT must specifically exclude any relevant 375 // higher specificity partitions (e.g for partitions `(1, DEFAULT)`, 376 // `(1, 2)`, the expr for the former must exclude the latter. This is 377 // done by bucketing the expression for each partition value by the 378 // number of DEFAULTs it involves. 379 partValueExprs := make([][]exprAndPartName, int(partDesc.NumColumns)+1) 380 381 for _, l := range partDesc.List { 382 for _, valueEncBuf := range l.Values { 383 t, _, err := sqlbase.DecodePartitionTuple( 384 a, evalCtx.Codec, tableDesc, idxDesc, partDesc, valueEncBuf, prefixDatums) 385 if err != nil { 386 return err 387 } 388 allDatums := append(prefixDatums, t.Datums...) 389 390 // When len(allDatums) < len(colVars), the missing elements are DEFAULTs, so 391 // we can simply exclude them from the expr. 392 typContents := make([]*types.T, len(allDatums)) 393 for i, d := range allDatums { 394 typContents[i] = d.ResolvedType() 395 } 396 tupleTyp := types.MakeTuple(typContents) 397 partValueExpr := tree.NewTypedComparisonExpr(tree.EQ, 398 tree.NewTypedTuple(tupleTyp, colVars[:len(allDatums)]), 399 tree.NewDTuple(tupleTyp, allDatums...)) 400 partValueExprs[len(t.Datums)] = append(partValueExprs[len(t.Datums)], exprAndPartName{ 401 expr: partValueExpr, 402 name: l.Name, 403 }) 404 405 genExpr := true 406 if _, ok := exprsByPartName[l.Name]; ok { 407 // Presence of a partition name in the exprsByPartName map 408 // means the caller has expressed an interested in this 409 // partition, which means any subpartitions can be skipped 410 // (because they must by definition be a subset of this 411 // partition). This saves us a little work and also helps 412 // out the normalization & simplification of the resulting 413 // expression, since it doesn't have to account for which 414 // partitions overlap. 415 genExpr = false 416 } 417 if err := selectPartitionExprsByName( 418 a, evalCtx, tableDesc, idxDesc, &l.Subpartitioning, allDatums, exprsByPartName, genExpr, 419 ); err != nil { 420 return err 421 } 422 } 423 } 424 425 // Walk backward through partValueExprs, so partition values with fewest 426 // DEFAULTs to most. As we go, keep an expression to be AND NOT'd with 427 // each partition value's expression in `excludeExpr`. This handles the 428 // exclusion of `(1, 2)` from the expression for `(1, DEFAULT)` in the 429 // example above. 430 // 431 // TODO(dan): The result of the way this currently works is correct but 432 // too broad. In a two column partitioning with cases for `(a, b)` and 433 // `(c, DEFAULT)`, the expression generated for `(c, DEFAULT)` will 434 // needlessly exclude `(a, b)`. Concretely, we end up with expressions 435 // like `(a) IN (1) AND ... (a, b) != (2, 3)`, where the `!= (2, 3)` 436 // part is irrelevant. This only happens in fairly unrealistic 437 // partitionings, so it's unclear if anything really needs to be done 438 // here. 439 excludeExpr := tree.TypedExpr(tree.DBoolFalse) 440 for i := len(partValueExprs) - 1; i >= 0; i-- { 441 nextExcludeExpr := tree.TypedExpr(tree.DBoolFalse) 442 for _, v := range partValueExprs[i] { 443 nextExcludeExpr = tree.NewTypedOrExpr(nextExcludeExpr, v.expr) 444 partValueExpr := tree.NewTypedAndExpr(v.expr, tree.NewTypedNotExpr(excludeExpr)) 445 // We can get multiple expressions for the same partition in 446 // a single-col `PARTITION foo VALUES IN ((1), (2))`. 447 if e, ok := exprsByPartName[v.name]; !ok || e == nil { 448 exprsByPartName[v.name] = partValueExpr 449 } else { 450 exprsByPartName[v.name] = tree.NewTypedOrExpr(e, partValueExpr) 451 } 452 } 453 excludeExpr = tree.NewTypedOrExpr(excludeExpr, nextExcludeExpr) 454 } 455 } 456 457 for range partDesc.Range { 458 return errors.New("TODO(dan): unsupported for range partitionings") 459 } 460 461 return nil 462 } 463 464 func init() { 465 sql.CreatePartitioningCCL = createPartitioning 466 }