github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sqlbase/partition.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sqlbase 12 13 import ( 14 "fmt" 15 16 "github.com/cockroachdb/cockroach/pkg/keys" 17 "github.com/cockroachdb/cockroach/pkg/roachpb" 18 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 19 "github.com/cockroachdb/cockroach/pkg/util/encoding" 20 "github.com/cockroachdb/cockroach/pkg/util/interval" 21 "github.com/cockroachdb/errors" 22 ) 23 24 // PartitionSpecialValCode identifies a special value. 25 type PartitionSpecialValCode uint64 26 27 const ( 28 // PartitionDefaultVal represents the special DEFAULT value. 29 PartitionDefaultVal PartitionSpecialValCode = 0 30 // PartitionMaxVal represents the special MAXVALUE value. 31 PartitionMaxVal PartitionSpecialValCode = 1 32 // PartitionMinVal represents the special MINVALUE value. 33 PartitionMinVal PartitionSpecialValCode = 2 34 ) 35 36 func (c PartitionSpecialValCode) String() string { 37 switch c { 38 case PartitionDefaultVal: 39 return (tree.DefaultVal{}).String() 40 case PartitionMinVal: 41 return (tree.PartitionMinVal{}).String() 42 case PartitionMaxVal: 43 return (tree.PartitionMaxVal{}).String() 44 } 45 panic("unreachable") 46 } 47 48 // PartitionTuple represents a tuple in a partitioning specification. 49 // 50 // It contains any number of true datums, stored in the Datums field, followed 51 // by any number of special partitioning values, represented by the Special and 52 // SpecialCount fields. 53 type PartitionTuple struct { 54 Datums tree.Datums 55 Special PartitionSpecialValCode 56 SpecialCount int 57 } 58 59 func (t *PartitionTuple) String() string { 60 f := tree.NewFmtCtx(tree.FmtSimple) 61 f.WriteByte('(') 62 for i := 0; i < len(t.Datums)+t.SpecialCount; i++ { 63 if i > 0 { 64 f.WriteString(", ") 65 } 66 if i < len(t.Datums) { 67 f.FormatNode(t.Datums[i]) 68 } else { 69 f.WriteString(t.Special.String()) 70 } 71 } 72 f.WriteByte(')') 73 return f.CloseAndGetString() 74 } 75 76 // DecodePartitionTuple parses columns (which are a prefix of the columns of 77 // `idxDesc`) encoded with the "value" encoding and returns the parsed datums. 78 // It also reencodes them into a key as they would be for `idxDesc` (accounting 79 // for index dirs, interleaves, subpartitioning, etc). 80 // 81 // For a list partitioning, this returned key can be used as a prefix scan to 82 // select all rows that have the given columns as a prefix (this is true even if 83 // the list partitioning contains DEFAULT). 84 // 85 // Examples of the key returned for a list partitioning: 86 // - (1, 2) -> /table/index/1/2 87 // - (1, DEFAULT) -> /table/index/1 88 // - (DEFAULT, DEFAULT) -> /table/index 89 // 90 // For a range partitioning, this returned key can be used as a exclusive end 91 // key to select all rows strictly less than ones with the given columns as a 92 // prefix (this is true even if the range partitioning contains MINVALUE or 93 // MAXVALUE). 94 // 95 // Examples of the key returned for a range partitioning: 96 // - (1, 2) -> /table/index/1/3 97 // - (1, MAXVALUE) -> /table/index/2 98 // - (MAXVALUE, MAXVALUE) -> (/table/index).PrefixEnd() 99 // 100 // NB: It is checked here that if an entry for a list partitioning contains 101 // DEFAULT, everything in that entry "after" also has to be DEFAULT. So, (1, 2, 102 // DEFAULT) is valid but (1, DEFAULT, 2) is not. Similarly for range 103 // partitioning and MINVALUE/MAXVALUE. 104 func DecodePartitionTuple( 105 a *DatumAlloc, 106 codec keys.SQLCodec, 107 tableDesc *TableDescriptor, 108 idxDesc *IndexDescriptor, 109 partDesc *PartitioningDescriptor, 110 valueEncBuf []byte, 111 prefixDatums tree.Datums, 112 ) (*PartitionTuple, []byte, error) { 113 if len(prefixDatums)+int(partDesc.NumColumns) > len(idxDesc.ColumnIDs) { 114 return nil, nil, fmt.Errorf("not enough columns in index for this partitioning") 115 } 116 117 t := &PartitionTuple{ 118 Datums: make(tree.Datums, 0, int(partDesc.NumColumns)), 119 } 120 121 colIDs := idxDesc.ColumnIDs[len(prefixDatums) : len(prefixDatums)+int(partDesc.NumColumns)] 122 for _, colID := range colIDs { 123 col, err := tableDesc.FindColumnByID(colID) 124 if err != nil { 125 return nil, nil, err 126 } 127 if _, dataOffset, _, typ, err := encoding.DecodeValueTag(valueEncBuf); err != nil { 128 return nil, nil, errors.Wrapf(err, "decoding") 129 } else if typ == encoding.NotNull { 130 // NOT NULL signals that a PartitionSpecialValCode follows 131 var valCode uint64 132 valueEncBuf, _, valCode, err = encoding.DecodeNonsortingUvarint(valueEncBuf[dataOffset:]) 133 if err != nil { 134 return nil, nil, err 135 } 136 nextSpecial := PartitionSpecialValCode(valCode) 137 if t.SpecialCount > 0 && t.Special != nextSpecial { 138 return nil, nil, errors.Newf("non-%[1]s value (%[2]s) not allowed after %[1]s", 139 t.Special, nextSpecial) 140 } 141 t.Special = nextSpecial 142 t.SpecialCount++ 143 } else { 144 var datum tree.Datum 145 datum, valueEncBuf, err = DecodeTableValue(a, col.Type, valueEncBuf) 146 if err != nil { 147 return nil, nil, errors.Wrapf(err, "decoding") 148 } 149 if t.SpecialCount > 0 { 150 return nil, nil, errors.Newf("non-%[1]s value (%[2]s) not allowed after %[1]s", 151 t.Special, datum) 152 } 153 t.Datums = append(t.Datums, datum) 154 } 155 } 156 if len(valueEncBuf) > 0 { 157 return nil, nil, errors.New("superfluous data in encoded value") 158 } 159 160 allDatums := append(prefixDatums, t.Datums...) 161 colMap := make(map[ColumnID]int, len(allDatums)) 162 for i := range allDatums { 163 colMap[idxDesc.ColumnIDs[i]] = i 164 } 165 166 indexKeyPrefix := MakeIndexKeyPrefix(codec, tableDesc, idxDesc.ID) 167 key, _, err := EncodePartialIndexKey( 168 tableDesc, idxDesc, len(allDatums), colMap, allDatums, indexKeyPrefix) 169 if err != nil { 170 return nil, nil, err 171 } 172 173 // Currently, key looks something like `/table/index/1`. Given a range 174 // partitioning of (1), we're done. This can be used as the exclusive end 175 // key of a scan to fetch all rows strictly less than (1). 176 // 177 // If `specialIdx` is not the sentinel, then we're actually in a case like 178 // `(1, MAXVALUE, ..., MAXVALUE)`. Since this index could have a descending 179 // nullable column, we can't rely on `/table/index/1/0xff` to be _strictly_ 180 // larger than everything it should match. Instead, we need `PrefixEnd()`. 181 // This also intuitively makes sense; we're essentially a key that is 182 // guaranteed to be less than `(2, MINVALUE, ..., MINVALUE)`. 183 if t.SpecialCount > 0 && t.Special == PartitionMaxVal { 184 key = roachpb.Key(key).PrefixEnd() 185 } 186 187 return t, key, nil 188 } 189 190 type partitionInterval struct { 191 name string 192 start roachpb.Key 193 end roachpb.Key 194 } 195 196 var _ interval.Interface = partitionInterval{} 197 198 // ID is part of `interval.Interface` but unused in validatePartitioningDescriptor. 199 func (ps partitionInterval) ID() uintptr { return 0 } 200 201 // Range is part of `interval.Interface`. 202 func (ps partitionInterval) Range() interval.Range { 203 return interval.Range{Start: []byte(ps.start), End: []byte(ps.end)} 204 }