github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/partitionccl/partition.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package partitionccl
    10  
    11  import (
    12  	"context"
    13  	"strings"
    14  
    15  	"github.com/cockroachdb/cockroach/pkg/ccl/utilccl"
    16  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    17  	"github.com/cockroachdb/cockroach/pkg/sql"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    23  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    24  	"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
    25  	"github.com/cockroachdb/errors"
    26  )
    27  
    28  // valueEncodePartitionTuple typechecks the datums in maybeTuple. It returns the
    29  // concatenation of these datums, each encoded using the table "value" encoding.
    30  // The special values of DEFAULT (for list) and MAXVALUE (for range) are encoded
    31  // as NOT NULL.
    32  //
    33  // TODO(dan): The typechecking here should be run during plan construction, so
    34  // we can support placeholders.
    35  func valueEncodePartitionTuple(
    36  	typ tree.PartitionByType,
    37  	evalCtx *tree.EvalContext,
    38  	maybeTuple tree.Expr,
    39  	cols []sqlbase.ColumnDescriptor,
    40  ) ([]byte, error) {
    41  	// Replace any occurrences of the MINVALUE/MAXVALUE pseudo-names
    42  	// into MinVal and MaxVal, to be recognized below.
    43  	// We are operating in a context where the expressions cannot
    44  	// refer to table columns, so these two names are unambiguously
    45  	// referring to the desired partition boundaries.
    46  	maybeTuple, _ = tree.WalkExpr(replaceMinMaxValVisitor{}, maybeTuple)
    47  
    48  	tuple, ok := maybeTuple.(*tree.Tuple)
    49  	if !ok {
    50  		// If we don't already have a tuple, promote whatever we have to a 1-tuple.
    51  		tuple = &tree.Tuple{Exprs: []tree.Expr{maybeTuple}}
    52  	}
    53  
    54  	if len(tuple.Exprs) != len(cols) {
    55  		return nil, errors.Errorf("partition has %d columns but %d values were supplied",
    56  			len(cols), len(tuple.Exprs))
    57  	}
    58  
    59  	var value, scratch []byte
    60  	for i, expr := range tuple.Exprs {
    61  		expr = tree.StripParens(expr)
    62  		switch expr.(type) {
    63  		case tree.DefaultVal:
    64  			if typ != tree.PartitionByList {
    65  				return nil, errors.Errorf("%s cannot be used with PARTITION BY %s", expr, typ)
    66  			}
    67  			// NOT NULL is used to signal that a PartitionSpecialValCode follows.
    68  			value = encoding.EncodeNotNullValue(value, encoding.NoColumnID)
    69  			value = encoding.EncodeNonsortingUvarint(value, uint64(sqlbase.PartitionDefaultVal))
    70  			continue
    71  		case tree.PartitionMinVal:
    72  			if typ != tree.PartitionByRange {
    73  				return nil, errors.Errorf("%s cannot be used with PARTITION BY %s", expr, typ)
    74  			}
    75  			// NOT NULL is used to signal that a PartitionSpecialValCode follows.
    76  			value = encoding.EncodeNotNullValue(value, encoding.NoColumnID)
    77  			value = encoding.EncodeNonsortingUvarint(value, uint64(sqlbase.PartitionMinVal))
    78  			continue
    79  		case tree.PartitionMaxVal:
    80  			if typ != tree.PartitionByRange {
    81  				return nil, errors.Errorf("%s cannot be used with PARTITION BY %s", expr, typ)
    82  			}
    83  			// NOT NULL is used to signal that a PartitionSpecialValCode follows.
    84  			value = encoding.EncodeNotNullValue(value, encoding.NoColumnID)
    85  			value = encoding.EncodeNonsortingUvarint(value, uint64(sqlbase.PartitionMaxVal))
    86  			continue
    87  		case *tree.Placeholder:
    88  			return nil, unimplemented.NewWithIssuef(
    89  				19464, "placeholders are not supported in PARTITION BY")
    90  		default:
    91  			// Fall-through.
    92  		}
    93  
    94  		var semaCtx tree.SemaContext
    95  		typedExpr, err := sqlbase.SanitizeVarFreeExpr(evalCtx.Context, expr, cols[i].Type, "partition",
    96  			&semaCtx, false /* allowImpure */)
    97  		if err != nil {
    98  			return nil, err
    99  		}
   100  		if !tree.IsConst(evalCtx, typedExpr) {
   101  			return nil, pgerror.Newf(pgcode.Syntax,
   102  				"%s: partition values must be constant", typedExpr)
   103  		}
   104  		datum, err := typedExpr.Eval(evalCtx)
   105  		if err != nil {
   106  			return nil, errors.Wrapf(err, "evaluating %s", typedExpr)
   107  		}
   108  		if err := sqlbase.CheckDatumTypeFitsColumnType(&cols[i], datum.ResolvedType()); err != nil {
   109  			return nil, err
   110  		}
   111  		value, err = sqlbase.EncodeTableValue(
   112  			value, sqlbase.ColumnID(encoding.NoColumnID), datum, scratch,
   113  		)
   114  		if err != nil {
   115  			return nil, err
   116  		}
   117  	}
   118  	return value, nil
   119  }
   120  
   121  // replaceMinMaxValVisitor replaces occurrences of the unqualified
   122  // identifiers "minvalue" and "maxvalue" in the partitioning
   123  // (sub-)exprs by the symbolic values tree.PartitionMinVal and
   124  // tree.PartitionMaxVal.
   125  type replaceMinMaxValVisitor struct{}
   126  
   127  // VisitPre satisfies the tree.Visitor interface.
   128  func (v replaceMinMaxValVisitor) VisitPre(expr tree.Expr) (recurse bool, newExpr tree.Expr) {
   129  	if t, ok := expr.(*tree.UnresolvedName); ok && t.NumParts == 1 {
   130  		switch t.Parts[0] {
   131  		case "minvalue":
   132  			return false, tree.PartitionMinVal{}
   133  		case "maxvalue":
   134  			return false, tree.PartitionMaxVal{}
   135  		}
   136  	}
   137  	return true, expr
   138  }
   139  
   140  // VisitPost satisfies the Visitor interface.
   141  func (replaceMinMaxValVisitor) VisitPost(expr tree.Expr) tree.Expr { return expr }
   142  
   143  func createPartitioningImpl(
   144  	ctx context.Context,
   145  	evalCtx *tree.EvalContext,
   146  	tableDesc *sqlbase.MutableTableDescriptor,
   147  	indexDesc *sqlbase.IndexDescriptor,
   148  	partBy *tree.PartitionBy,
   149  	colOffset int,
   150  ) (sqlbase.PartitioningDescriptor, error) {
   151  	partDesc := sqlbase.PartitioningDescriptor{}
   152  	if partBy == nil {
   153  		return partDesc, nil
   154  	}
   155  	partDesc.NumColumns = uint32(len(partBy.Fields))
   156  
   157  	partitioningString := func() string {
   158  		// We don't have the fields for our parent partitions handy, but we can use
   159  		// the names from the index we're partitioning. They must have matched or we
   160  		// would have already returned an error.
   161  		partCols := append([]string(nil), indexDesc.ColumnNames[:colOffset]...)
   162  		for _, p := range partBy.Fields {
   163  			partCols = append(partCols, string(p))
   164  		}
   165  		return strings.Join(partCols, ", ")
   166  	}
   167  
   168  	var cols []sqlbase.ColumnDescriptor
   169  	for i := 0; i < len(partBy.Fields); i++ {
   170  		if colOffset+i >= len(indexDesc.ColumnNames) {
   171  			return partDesc, pgerror.Newf(pgcode.Syntax,
   172  				"declared partition columns (%s) exceed the number of columns in index being partitioned (%s)",
   173  				partitioningString(), strings.Join(indexDesc.ColumnNames, ", "))
   174  		}
   175  		// Search by name because some callsites of this method have not
   176  		// allocated ids yet (so they are still all the 0 value).
   177  		col, err := tableDesc.FindActiveColumnByName(indexDesc.ColumnNames[colOffset+i])
   178  		if err != nil {
   179  			return partDesc, err
   180  		}
   181  		cols = append(cols, *col)
   182  		if string(partBy.Fields[i]) != col.Name {
   183  			// This used to print the first `colOffset + len(partBy.Fields)` fields
   184  			// but there might not be this many columns in the index. See #37682.
   185  			n := colOffset + i + 1
   186  			return partDesc, pgerror.Newf(pgcode.Syntax,
   187  				"declared partition columns (%s) do not match first %d columns in index being partitioned (%s)",
   188  				partitioningString(), n, strings.Join(indexDesc.ColumnNames[:n], ", "))
   189  		}
   190  	}
   191  
   192  	for _, l := range partBy.List {
   193  		p := sqlbase.PartitioningDescriptor_List{
   194  			Name: string(l.Name),
   195  		}
   196  		for _, expr := range l.Exprs {
   197  			encodedTuple, err := valueEncodePartitionTuple(
   198  				tree.PartitionByList, evalCtx, expr, cols)
   199  			if err != nil {
   200  				return partDesc, errors.Wrapf(err, "PARTITION %s", p.Name)
   201  			}
   202  			p.Values = append(p.Values, encodedTuple)
   203  		}
   204  		if l.Subpartition != nil {
   205  			newColOffset := colOffset + int(partDesc.NumColumns)
   206  			subpartitioning, err := createPartitioningImpl(
   207  				ctx, evalCtx, tableDesc, indexDesc, l.Subpartition, newColOffset)
   208  			if err != nil {
   209  				return partDesc, err
   210  			}
   211  			p.Subpartitioning = subpartitioning
   212  		}
   213  		partDesc.List = append(partDesc.List, p)
   214  	}
   215  
   216  	for _, r := range partBy.Range {
   217  		p := sqlbase.PartitioningDescriptor_Range{
   218  			Name: string(r.Name),
   219  		}
   220  		var err error
   221  		p.FromInclusive, err = valueEncodePartitionTuple(
   222  			tree.PartitionByRange, evalCtx, &tree.Tuple{Exprs: r.From}, cols)
   223  		if err != nil {
   224  			return partDesc, errors.Wrapf(err, "PARTITION %s", p.Name)
   225  		}
   226  		p.ToExclusive, err = valueEncodePartitionTuple(
   227  			tree.PartitionByRange, evalCtx, &tree.Tuple{Exprs: r.To}, cols)
   228  		if err != nil {
   229  			return partDesc, errors.Wrapf(err, "PARTITION %s", p.Name)
   230  		}
   231  		if r.Subpartition != nil {
   232  			return partDesc, errors.Newf("PARTITION %s: cannot subpartition a range partition", p.Name)
   233  		}
   234  		partDesc.Range = append(partDesc.Range, p)
   235  	}
   236  
   237  	return partDesc, nil
   238  }
   239  
   240  // createPartitioning constructs the partitioning descriptor for an index that
   241  // is partitioned into ranges, each addressable by zone configs.
   242  func createPartitioning(
   243  	ctx context.Context,
   244  	st *cluster.Settings,
   245  	evalCtx *tree.EvalContext,
   246  	tableDesc *sqlbase.MutableTableDescriptor,
   247  	indexDesc *sqlbase.IndexDescriptor,
   248  	partBy *tree.PartitionBy,
   249  ) (sqlbase.PartitioningDescriptor, error) {
   250  	org := sql.ClusterOrganization.Get(&st.SV)
   251  	if err := utilccl.CheckEnterpriseEnabled(st, evalCtx.ClusterID, org, "partitions"); err != nil {
   252  		return sqlbase.PartitioningDescriptor{}, err
   253  	}
   254  
   255  	return createPartitioningImpl(
   256  		ctx, evalCtx, tableDesc, indexDesc, partBy, 0 /* colOffset */)
   257  }
   258  
   259  // selectPartitionExprs constructs an expression for selecting all rows in the
   260  // given partitions.
   261  func selectPartitionExprs(
   262  	evalCtx *tree.EvalContext, tableDesc *sqlbase.TableDescriptor, partNames tree.NameList,
   263  ) (tree.Expr, error) {
   264  	exprsByPartName := make(map[string]tree.TypedExpr)
   265  	for _, partName := range partNames {
   266  		exprsByPartName[string(partName)] = nil
   267  	}
   268  
   269  	a := &sqlbase.DatumAlloc{}
   270  	var prefixDatums []tree.Datum
   271  	if err := tableDesc.ForeachNonDropIndex(func(idxDesc *sqlbase.IndexDescriptor) error {
   272  		genExpr := true
   273  		return selectPartitionExprsByName(
   274  			a, evalCtx, tableDesc, idxDesc, &idxDesc.Partitioning, prefixDatums, exprsByPartName, genExpr)
   275  	}); err != nil {
   276  		return nil, err
   277  	}
   278  
   279  	var expr tree.TypedExpr = tree.DBoolFalse
   280  	for _, partName := range partNames {
   281  		partExpr, ok := exprsByPartName[string(partName)]
   282  		if !ok || partExpr == nil {
   283  			return nil, errors.Errorf("unknown partition: %s", partName)
   284  		}
   285  		expr = tree.NewTypedOrExpr(expr, partExpr)
   286  	}
   287  
   288  	var err error
   289  	expr, err = evalCtx.NormalizeExpr(expr)
   290  	if err != nil {
   291  		return nil, err
   292  	}
   293  	// In order to typecheck during simplification and normalization, we used
   294  	// dummy IndexVars. Swap them out for actual column references.
   295  	finalExpr, err := tree.SimpleVisit(expr, func(e tree.Expr) (recurse bool, newExpr tree.Expr, _ error) {
   296  		if ivar, ok := e.(*tree.IndexedVar); ok {
   297  			col, err := tableDesc.FindColumnByID(sqlbase.ColumnID(ivar.Idx))
   298  			if err != nil {
   299  				return false, nil, err
   300  			}
   301  			return false, &tree.ColumnItem{ColumnName: tree.Name(col.Name)}, nil
   302  		}
   303  		return true, e, nil
   304  	})
   305  	return finalExpr, err
   306  }
   307  
   308  // selectPartitionExprsByName constructs an expression for selecting all rows in
   309  // each partition and subpartition in the given index. To make it easy to
   310  // simplify and normalize the exprs, references to table columns are represented
   311  // as TypedOrdinalReferences with an ordinal of the column ID.
   312  //
   313  // NB Subpartitions do not affect the expression for their parent partitions. So
   314  // if a partition foo (a=3) is then subpartitiond by (b=5) and no DEFAULT, the
   315  // expression for foo is still `a=3`, not `a=3 AND b=5`. This means that if some
   316  // partition is requested, we can omit all of the subpartitions, because they'll
   317  // also necessarily select subsets of the rows it will. "requested" here is
   318  // indicated by the caller by setting the corresponding name in the
   319  // `exprsByPartName` map to nil. In this case, `genExpr` is then set to false
   320  // for subpartitions of this call, which causes each subpartition to only
   321  // register itself in the map with a placeholder entry (so we can still verify
   322  // that the requested partitions are all valid).
   323  func selectPartitionExprsByName(
   324  	a *sqlbase.DatumAlloc,
   325  	evalCtx *tree.EvalContext,
   326  	tableDesc *sqlbase.TableDescriptor,
   327  	idxDesc *sqlbase.IndexDescriptor,
   328  	partDesc *sqlbase.PartitioningDescriptor,
   329  	prefixDatums tree.Datums,
   330  	exprsByPartName map[string]tree.TypedExpr,
   331  	genExpr bool,
   332  ) error {
   333  	if partDesc.NumColumns == 0 {
   334  		return nil
   335  	}
   336  
   337  	// Setting genExpr to false skips the expression generation and only
   338  	// registers each descendent partition in the map with a placeholder entry.
   339  	if !genExpr {
   340  		for _, l := range partDesc.List {
   341  			exprsByPartName[l.Name] = tree.DBoolFalse
   342  			var fakeDatums tree.Datums
   343  			if err := selectPartitionExprsByName(
   344  				a, evalCtx, tableDesc, idxDesc, &l.Subpartitioning, fakeDatums, exprsByPartName, genExpr,
   345  			); err != nil {
   346  				return err
   347  			}
   348  		}
   349  		for _, r := range partDesc.Range {
   350  			exprsByPartName[r.Name] = tree.DBoolFalse
   351  		}
   352  		return nil
   353  	}
   354  
   355  	var colVars tree.Exprs
   356  	{
   357  		// The recursive calls of selectPartitionExprsByName don't pass though
   358  		// the column ordinal references, so reconstruct them here.
   359  		colVars = make(tree.Exprs, len(prefixDatums)+int(partDesc.NumColumns))
   360  		for i := range colVars {
   361  			col, err := tableDesc.FindActiveColumnByID(idxDesc.ColumnIDs[i])
   362  			if err != nil {
   363  				return err
   364  			}
   365  			colVars[i] = tree.NewTypedOrdinalReference(int(col.ID), col.Type)
   366  		}
   367  	}
   368  
   369  	if len(partDesc.List) > 0 {
   370  		type exprAndPartName struct {
   371  			expr tree.TypedExpr
   372  			name string
   373  		}
   374  		// Any partitions using DEFAULT must specifically exclude any relevant
   375  		// higher specificity partitions (e.g for partitions `(1, DEFAULT)`,
   376  		// `(1, 2)`, the expr for the former must exclude the latter. This is
   377  		// done by bucketing the expression for each partition value by the
   378  		// number of DEFAULTs it involves.
   379  		partValueExprs := make([][]exprAndPartName, int(partDesc.NumColumns)+1)
   380  
   381  		for _, l := range partDesc.List {
   382  			for _, valueEncBuf := range l.Values {
   383  				t, _, err := sqlbase.DecodePartitionTuple(
   384  					a, evalCtx.Codec, tableDesc, idxDesc, partDesc, valueEncBuf, prefixDatums)
   385  				if err != nil {
   386  					return err
   387  				}
   388  				allDatums := append(prefixDatums, t.Datums...)
   389  
   390  				// When len(allDatums) < len(colVars), the missing elements are DEFAULTs, so
   391  				// we can simply exclude them from the expr.
   392  				typContents := make([]*types.T, len(allDatums))
   393  				for i, d := range allDatums {
   394  					typContents[i] = d.ResolvedType()
   395  				}
   396  				tupleTyp := types.MakeTuple(typContents)
   397  				partValueExpr := tree.NewTypedComparisonExpr(tree.EQ,
   398  					tree.NewTypedTuple(tupleTyp, colVars[:len(allDatums)]),
   399  					tree.NewDTuple(tupleTyp, allDatums...))
   400  				partValueExprs[len(t.Datums)] = append(partValueExprs[len(t.Datums)], exprAndPartName{
   401  					expr: partValueExpr,
   402  					name: l.Name,
   403  				})
   404  
   405  				genExpr := true
   406  				if _, ok := exprsByPartName[l.Name]; ok {
   407  					// Presence of a partition name in the exprsByPartName map
   408  					// means the caller has expressed an interested in this
   409  					// partition, which means any subpartitions can be skipped
   410  					// (because they must by definition be a subset of this
   411  					// partition). This saves us a little work and also helps
   412  					// out the normalization & simplification of the resulting
   413  					// expression, since it doesn't have to account for which
   414  					// partitions overlap.
   415  					genExpr = false
   416  				}
   417  				if err := selectPartitionExprsByName(
   418  					a, evalCtx, tableDesc, idxDesc, &l.Subpartitioning, allDatums, exprsByPartName, genExpr,
   419  				); err != nil {
   420  					return err
   421  				}
   422  			}
   423  		}
   424  
   425  		// Walk backward through partValueExprs, so partition values with fewest
   426  		// DEFAULTs to most. As we go, keep an expression to be AND NOT'd with
   427  		// each partition value's expression in `excludeExpr`. This handles the
   428  		// exclusion of `(1, 2)` from the expression for `(1, DEFAULT)` in the
   429  		// example above.
   430  		//
   431  		// TODO(dan): The result of the way this currently works is correct but
   432  		// too broad. In a two column partitioning with cases for `(a, b)` and
   433  		// `(c, DEFAULT)`, the expression generated for `(c, DEFAULT)` will
   434  		// needlessly exclude `(a, b)`. Concretely, we end up with expressions
   435  		// like `(a) IN (1) AND ... (a, b) != (2, 3)`, where the `!= (2, 3)`
   436  		// part is irrelevant. This only happens in fairly unrealistic
   437  		// partitionings, so it's unclear if anything really needs to be done
   438  		// here.
   439  		excludeExpr := tree.TypedExpr(tree.DBoolFalse)
   440  		for i := len(partValueExprs) - 1; i >= 0; i-- {
   441  			nextExcludeExpr := tree.TypedExpr(tree.DBoolFalse)
   442  			for _, v := range partValueExprs[i] {
   443  				nextExcludeExpr = tree.NewTypedOrExpr(nextExcludeExpr, v.expr)
   444  				partValueExpr := tree.NewTypedAndExpr(v.expr, tree.NewTypedNotExpr(excludeExpr))
   445  				// We can get multiple expressions for the same partition in
   446  				// a single-col `PARTITION foo VALUES IN ((1), (2))`.
   447  				if e, ok := exprsByPartName[v.name]; !ok || e == nil {
   448  					exprsByPartName[v.name] = partValueExpr
   449  				} else {
   450  					exprsByPartName[v.name] = tree.NewTypedOrExpr(e, partValueExpr)
   451  				}
   452  			}
   453  			excludeExpr = tree.NewTypedOrExpr(excludeExpr, nextExcludeExpr)
   454  		}
   455  	}
   456  
   457  	for range partDesc.Range {
   458  		return errors.New("TODO(dan): unsupported for range partitionings")
   459  	}
   460  
   461  	return nil
   462  }
   463  
   464  func init() {
   465  	sql.CreatePartitioningCCL = createPartitioning
   466  }