github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/partition_utils.go (about) 1 // Copyright 2017 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sql 12 13 import ( 14 "bytes" 15 16 "github.com/cockroachdb/cockroach/pkg/base" 17 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 18 "github.com/cockroachdb/cockroach/pkg/keys" 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 21 "github.com/cockroachdb/cockroach/pkg/sql/covering" 22 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 24 "github.com/cockroachdb/cockroach/pkg/util/uuid" 25 ) 26 27 // GenerateSubzoneSpans constructs from a TableDescriptor the entries mapping 28 // zone config spans to subzones for use in the SubzoneSpans field of 29 // zonepb.ZoneConfig. SubzoneSpans controls which splits are created, so only 30 // the spans corresponding to entries in subzones are returned. 31 // 32 // Zone configs target indexes and partitions via `subzones`, which are attached 33 // to a table-scoped row in `system.zones`. Each subzone represents one index 34 // (primary or secondary) or one partition (or subpartition) and contains the 35 // usual zone config constraints. They are saved to `system.zones` sparsely 36 // (only when set by a user) and are the most specific entry in the normal 37 // cluster-default/database/table/subzone config hierarchy. 38 // 39 // Each non-interleaved index and partition can be mapped to spans in the 40 // keyspace. Indexes and range partitions each map to one span, while each list 41 // partition maps to one or more spans. Each partition span is contained by some 42 // index span and each subpartition span is contained by one of its parent 43 // partition's spans. The spans for a given level of a range partitioning 44 // (corresponding to one `PARTITION BY` in sql or one `PartitionDescriptor`) are 45 // disjoint, but the spans for a given level of a list partitioning may overlap 46 // if DEFAULT is used. A list partitioning which includes both (1, DEFAULT) and 47 // (1, 2) will overlap with the latter getting precedence in the zone config 48 // hierarchy. NB: In a valid PartitionDescriptor, no partitions with the same 49 // number of DEFAULTs will overlap (this property is used by 50 // `indexCoveringsForPartitioning`). 51 // 52 // These subzone spans are kept denormalized to the relevant `system.zone` row 53 // for performance. Given a TableDescriptor, the spans for every 54 // index/partition/subpartition are created, filtered out if they don't have a 55 // config set for them, and precedence applied (via `OverlapCoveringMerge`) to 56 // produce a set of non-overlapping spans, which each map to a subzone. There 57 // may be "holes" (uncovered spans) in this set. 58 // 59 // The returned spans are returned in exactly the format required by 60 // `system.zones`. They must be sorted and non-overlapping. Each contains an 61 // IndexID, which maps to one of the input `subzones` by indexing into the 62 // slice. As space optimizations, all `Key`s and `EndKey`s of `SubzoneSpan` omit 63 // the common prefix (the encoded table ID) and if `EndKey` is equal to 64 // `Key.PrefixEnd()` it is omitted. 65 // 66 // This function has tests in the partitionccl package. 67 // 68 // TODO(benesch): remove the hasNewSubzones parameter when a statement to clear 69 // all subzones at once is introduced. 70 func GenerateSubzoneSpans( 71 st *cluster.Settings, 72 clusterID uuid.UUID, 73 codec keys.SQLCodec, 74 tableDesc *sqlbase.TableDescriptor, 75 subzones []zonepb.Subzone, 76 hasNewSubzones bool, 77 ) ([]zonepb.SubzoneSpan, error) { 78 // Removing zone configs does not require a valid license. 79 if hasNewSubzones { 80 org := ClusterOrganization.Get(&st.SV) 81 if err := base.CheckEnterpriseEnabled(st, clusterID, org, 82 "replication zones on indexes or partitions"); err != nil { 83 return nil, err 84 } 85 } 86 87 a := &sqlbase.DatumAlloc{} 88 89 subzoneIndexByIndexID := make(map[sqlbase.IndexID]int32) 90 subzoneIndexByPartition := make(map[string]int32) 91 for i, subzone := range subzones { 92 if len(subzone.PartitionName) > 0 { 93 subzoneIndexByPartition[subzone.PartitionName] = int32(i) 94 } else { 95 subzoneIndexByIndexID[sqlbase.IndexID(subzone.IndexID)] = int32(i) 96 } 97 } 98 99 var indexCovering covering.Covering 100 var partitionCoverings []covering.Covering 101 if err := tableDesc.ForeachNonDropIndex(func(idxDesc *sqlbase.IndexDescriptor) error { 102 _, indexSubzoneExists := subzoneIndexByIndexID[idxDesc.ID] 103 if indexSubzoneExists { 104 idxSpan := tableDesc.IndexSpan(codec, idxDesc.ID) 105 // Each index starts with a unique prefix, so (from a precedence 106 // perspective) it's safe to append them all together. 107 indexCovering = append(indexCovering, covering.Range{ 108 Start: idxSpan.Key, End: idxSpan.EndKey, 109 Payload: zonepb.Subzone{IndexID: uint32(idxDesc.ID)}, 110 }) 111 } 112 113 var emptyPrefix []tree.Datum 114 indexPartitionCoverings, err := indexCoveringsForPartitioning( 115 a, codec, tableDesc, idxDesc, &idxDesc.Partitioning, subzoneIndexByPartition, emptyPrefix) 116 if err != nil { 117 return err 118 } 119 // The returned indexPartitionCoverings are sorted with highest 120 // precedence first. They all start with the index prefix, so cannot 121 // overlap with the partition coverings for any other index, so (from a 122 // precedence perspective) it's safe to append them all together. 123 partitionCoverings = append(partitionCoverings, indexPartitionCoverings...) 124 125 return nil 126 }); err != nil { 127 return nil, err 128 } 129 130 // OverlapCoveringMerge returns the payloads for any coverings that overlap 131 // in the same order they were input. So, we require that they be ordered 132 // with highest precedence first, so the first payload of each range is the 133 // one we need. 134 ranges := covering.OverlapCoveringMerge(append(partitionCoverings, indexCovering)) 135 136 // NB: This assumes that none of the indexes are interleaved, which is 137 // checked in PartitionDescriptor validation. 138 sharedPrefix := codec.TablePrefix(uint32(tableDesc.ID)) 139 140 var subzoneSpans []zonepb.SubzoneSpan 141 for _, r := range ranges { 142 payloads := r.Payload.([]interface{}) 143 if len(payloads) == 0 { 144 continue 145 } 146 subzoneSpan := zonepb.SubzoneSpan{ 147 Key: bytes.TrimPrefix(r.Start, sharedPrefix), 148 EndKey: bytes.TrimPrefix(r.End, sharedPrefix), 149 } 150 var ok bool 151 if subzone := payloads[0].(zonepb.Subzone); len(subzone.PartitionName) > 0 { 152 subzoneSpan.SubzoneIndex, ok = subzoneIndexByPartition[subzone.PartitionName] 153 } else { 154 subzoneSpan.SubzoneIndex, ok = subzoneIndexByIndexID[sqlbase.IndexID(subzone.IndexID)] 155 } 156 if !ok { 157 continue 158 } 159 if bytes.Equal(subzoneSpan.Key.PrefixEnd(), subzoneSpan.EndKey) { 160 subzoneSpan.EndKey = nil 161 } 162 subzoneSpans = append(subzoneSpans, subzoneSpan) 163 } 164 return subzoneSpans, nil 165 } 166 167 // indexCoveringsForPartitioning returns span coverings representing the 168 // partitions in partDesc (including subpartitions). They are sorted with 169 // highest precedence first and the interval.Range payloads are each a 170 // `zonepb.Subzone` with the PartitionName set. 171 func indexCoveringsForPartitioning( 172 a *sqlbase.DatumAlloc, 173 codec keys.SQLCodec, 174 tableDesc *sqlbase.TableDescriptor, 175 idxDesc *sqlbase.IndexDescriptor, 176 partDesc *sqlbase.PartitioningDescriptor, 177 relevantPartitions map[string]int32, 178 prefixDatums []tree.Datum, 179 ) ([]covering.Covering, error) { 180 if partDesc.NumColumns == 0 { 181 return nil, nil 182 } 183 184 var coverings []covering.Covering 185 var descendentCoverings []covering.Covering 186 187 if len(partDesc.List) > 0 { 188 // The returned spans are required to be ordered with highest precedence 189 // first. The span for (1, DEFAULT) overlaps with (1, 2) and needs to be 190 // returned at a lower precedence. Luckily, because of the partitioning 191 // validation, we're guaranteed that all entries in a list partitioning 192 // with the same number of DEFAULTs are non-overlapping. So, bucket the 193 // `interval.Range`s by the number of non-DEFAULT columns and return 194 // them ordered from least # of DEFAULTs to most. 195 listCoverings := make([]covering.Covering, int(partDesc.NumColumns)+1) 196 for _, p := range partDesc.List { 197 for _, valueEncBuf := range p.Values { 198 t, keyPrefix, err := sqlbase.DecodePartitionTuple( 199 a, codec, tableDesc, idxDesc, partDesc, valueEncBuf, prefixDatums) 200 if err != nil { 201 return nil, err 202 } 203 if _, ok := relevantPartitions[p.Name]; ok { 204 listCoverings[len(t.Datums)] = append(listCoverings[len(t.Datums)], covering.Range{ 205 Start: keyPrefix, End: roachpb.Key(keyPrefix).PrefixEnd(), 206 Payload: zonepb.Subzone{PartitionName: p.Name}, 207 }) 208 } 209 newPrefixDatums := append(prefixDatums, t.Datums...) 210 subpartitionCoverings, err := indexCoveringsForPartitioning( 211 a, codec, tableDesc, idxDesc, &p.Subpartitioning, relevantPartitions, newPrefixDatums) 212 if err != nil { 213 return nil, err 214 } 215 descendentCoverings = append(descendentCoverings, subpartitionCoverings...) 216 } 217 } 218 for i := range listCoverings { 219 if covering := listCoverings[len(listCoverings)-i-1]; len(covering) > 0 { 220 coverings = append(coverings, covering) 221 } 222 } 223 } 224 225 if len(partDesc.Range) > 0 { 226 for _, p := range partDesc.Range { 227 if _, ok := relevantPartitions[p.Name]; !ok { 228 continue 229 } 230 _, fromKey, err := sqlbase.DecodePartitionTuple( 231 a, codec, tableDesc, idxDesc, partDesc, p.FromInclusive, prefixDatums) 232 if err != nil { 233 return nil, err 234 } 235 _, toKey, err := sqlbase.DecodePartitionTuple( 236 a, codec, tableDesc, idxDesc, partDesc, p.ToExclusive, prefixDatums) 237 if err != nil { 238 return nil, err 239 } 240 if _, ok := relevantPartitions[p.Name]; ok { 241 coverings = append(coverings, covering.Covering{{ 242 Start: fromKey, End: toKey, 243 Payload: zonepb.Subzone{PartitionName: p.Name}, 244 }}) 245 } 246 } 247 } 248 249 // descendentCoverings are from subpartitions and so get precedence; append 250 // them to the front. 251 return append(descendentCoverings, coverings...), nil 252 }