github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sqlbase/partition.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sqlbase
    12  
    13  import (
    14  	"fmt"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/keys"
    17  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    19  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    20  	"github.com/cockroachdb/cockroach/pkg/util/interval"
    21  	"github.com/cockroachdb/errors"
    22  )
    23  
    24  // PartitionSpecialValCode identifies a special value.
    25  type PartitionSpecialValCode uint64
    26  
    27  const (
    28  	// PartitionDefaultVal represents the special DEFAULT value.
    29  	PartitionDefaultVal PartitionSpecialValCode = 0
    30  	// PartitionMaxVal represents the special MAXVALUE value.
    31  	PartitionMaxVal PartitionSpecialValCode = 1
    32  	// PartitionMinVal represents the special MINVALUE value.
    33  	PartitionMinVal PartitionSpecialValCode = 2
    34  )
    35  
    36  func (c PartitionSpecialValCode) String() string {
    37  	switch c {
    38  	case PartitionDefaultVal:
    39  		return (tree.DefaultVal{}).String()
    40  	case PartitionMinVal:
    41  		return (tree.PartitionMinVal{}).String()
    42  	case PartitionMaxVal:
    43  		return (tree.PartitionMaxVal{}).String()
    44  	}
    45  	panic("unreachable")
    46  }
    47  
    48  // PartitionTuple represents a tuple in a partitioning specification.
    49  //
    50  // It contains any number of true datums, stored in the Datums field, followed
    51  // by any number of special partitioning values, represented by the Special and
    52  // SpecialCount fields.
    53  type PartitionTuple struct {
    54  	Datums       tree.Datums
    55  	Special      PartitionSpecialValCode
    56  	SpecialCount int
    57  }
    58  
    59  func (t *PartitionTuple) String() string {
    60  	f := tree.NewFmtCtx(tree.FmtSimple)
    61  	f.WriteByte('(')
    62  	for i := 0; i < len(t.Datums)+t.SpecialCount; i++ {
    63  		if i > 0 {
    64  			f.WriteString(", ")
    65  		}
    66  		if i < len(t.Datums) {
    67  			f.FormatNode(t.Datums[i])
    68  		} else {
    69  			f.WriteString(t.Special.String())
    70  		}
    71  	}
    72  	f.WriteByte(')')
    73  	return f.CloseAndGetString()
    74  }
    75  
    76  // DecodePartitionTuple parses columns (which are a prefix of the columns of
    77  // `idxDesc`) encoded with the "value" encoding and returns the parsed datums.
    78  // It also reencodes them into a key as they would be for `idxDesc` (accounting
    79  // for index dirs, interleaves, subpartitioning, etc).
    80  //
    81  // For a list partitioning, this returned key can be used as a prefix scan to
    82  // select all rows that have the given columns as a prefix (this is true even if
    83  // the list partitioning contains DEFAULT).
    84  //
    85  // Examples of the key returned for a list partitioning:
    86  //   - (1, 2) -> /table/index/1/2
    87  //   - (1, DEFAULT) -> /table/index/1
    88  //   - (DEFAULT, DEFAULT) -> /table/index
    89  //
    90  // For a range partitioning, this returned key can be used as a exclusive end
    91  // key to select all rows strictly less than ones with the given columns as a
    92  // prefix (this is true even if the range partitioning contains MINVALUE or
    93  // MAXVALUE).
    94  //
    95  // Examples of the key returned for a range partitioning:
    96  //   - (1, 2) -> /table/index/1/3
    97  //   - (1, MAXVALUE) -> /table/index/2
    98  //   - (MAXVALUE, MAXVALUE) -> (/table/index).PrefixEnd()
    99  //
   100  // NB: It is checked here that if an entry for a list partitioning contains
   101  // DEFAULT, everything in that entry "after" also has to be DEFAULT. So, (1, 2,
   102  // DEFAULT) is valid but (1, DEFAULT, 2) is not. Similarly for range
   103  // partitioning and MINVALUE/MAXVALUE.
   104  func DecodePartitionTuple(
   105  	a *DatumAlloc,
   106  	codec keys.SQLCodec,
   107  	tableDesc *TableDescriptor,
   108  	idxDesc *IndexDescriptor,
   109  	partDesc *PartitioningDescriptor,
   110  	valueEncBuf []byte,
   111  	prefixDatums tree.Datums,
   112  ) (*PartitionTuple, []byte, error) {
   113  	if len(prefixDatums)+int(partDesc.NumColumns) > len(idxDesc.ColumnIDs) {
   114  		return nil, nil, fmt.Errorf("not enough columns in index for this partitioning")
   115  	}
   116  
   117  	t := &PartitionTuple{
   118  		Datums: make(tree.Datums, 0, int(partDesc.NumColumns)),
   119  	}
   120  
   121  	colIDs := idxDesc.ColumnIDs[len(prefixDatums) : len(prefixDatums)+int(partDesc.NumColumns)]
   122  	for _, colID := range colIDs {
   123  		col, err := tableDesc.FindColumnByID(colID)
   124  		if err != nil {
   125  			return nil, nil, err
   126  		}
   127  		if _, dataOffset, _, typ, err := encoding.DecodeValueTag(valueEncBuf); err != nil {
   128  			return nil, nil, errors.Wrapf(err, "decoding")
   129  		} else if typ == encoding.NotNull {
   130  			// NOT NULL signals that a PartitionSpecialValCode follows
   131  			var valCode uint64
   132  			valueEncBuf, _, valCode, err = encoding.DecodeNonsortingUvarint(valueEncBuf[dataOffset:])
   133  			if err != nil {
   134  				return nil, nil, err
   135  			}
   136  			nextSpecial := PartitionSpecialValCode(valCode)
   137  			if t.SpecialCount > 0 && t.Special != nextSpecial {
   138  				return nil, nil, errors.Newf("non-%[1]s value (%[2]s) not allowed after %[1]s",
   139  					t.Special, nextSpecial)
   140  			}
   141  			t.Special = nextSpecial
   142  			t.SpecialCount++
   143  		} else {
   144  			var datum tree.Datum
   145  			datum, valueEncBuf, err = DecodeTableValue(a, col.Type, valueEncBuf)
   146  			if err != nil {
   147  				return nil, nil, errors.Wrapf(err, "decoding")
   148  			}
   149  			if t.SpecialCount > 0 {
   150  				return nil, nil, errors.Newf("non-%[1]s value (%[2]s) not allowed after %[1]s",
   151  					t.Special, datum)
   152  			}
   153  			t.Datums = append(t.Datums, datum)
   154  		}
   155  	}
   156  	if len(valueEncBuf) > 0 {
   157  		return nil, nil, errors.New("superfluous data in encoded value")
   158  	}
   159  
   160  	allDatums := append(prefixDatums, t.Datums...)
   161  	colMap := make(map[ColumnID]int, len(allDatums))
   162  	for i := range allDatums {
   163  		colMap[idxDesc.ColumnIDs[i]] = i
   164  	}
   165  
   166  	indexKeyPrefix := MakeIndexKeyPrefix(codec, tableDesc, idxDesc.ID)
   167  	key, _, err := EncodePartialIndexKey(
   168  		tableDesc, idxDesc, len(allDatums), colMap, allDatums, indexKeyPrefix)
   169  	if err != nil {
   170  		return nil, nil, err
   171  	}
   172  
   173  	// Currently, key looks something like `/table/index/1`. Given a range
   174  	// partitioning of (1), we're done. This can be used as the exclusive end
   175  	// key of a scan to fetch all rows strictly less than (1).
   176  	//
   177  	// If `specialIdx` is not the sentinel, then we're actually in a case like
   178  	// `(1, MAXVALUE, ..., MAXVALUE)`. Since this index could have a descending
   179  	// nullable column, we can't rely on `/table/index/1/0xff` to be _strictly_
   180  	// larger than everything it should match. Instead, we need `PrefixEnd()`.
   181  	// This also intuitively makes sense; we're essentially a key that is
   182  	// guaranteed to be less than `(2, MINVALUE, ..., MINVALUE)`.
   183  	if t.SpecialCount > 0 && t.Special == PartitionMaxVal {
   184  		key = roachpb.Key(key).PrefixEnd()
   185  	}
   186  
   187  	return t, key, nil
   188  }
   189  
   190  type partitionInterval struct {
   191  	name  string
   192  	start roachpb.Key
   193  	end   roachpb.Key
   194  }
   195  
   196  var _ interval.Interface = partitionInterval{}
   197  
   198  // ID is part of `interval.Interface` but unused in validatePartitioningDescriptor.
   199  func (ps partitionInterval) ID() uintptr { return 0 }
   200  
   201  // Range is part of `interval.Interface`.
   202  func (ps partitionInterval) Range() interval.Range {
   203  	return interval.Range{Start: []byte(ps.start), End: []byte(ps.end)}
   204  }