github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/config/system.go (about) 1 // Copyright 2015 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package config 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "sort" 18 19 "github.com/cockroachdb/cockroach/pkg/config/zonepb" 20 "github.com/cockroachdb/cockroach/pkg/keys" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 25 ) 26 27 type zoneConfigHook func( 28 sysCfg *SystemConfig, objectID uint32, 29 ) (zone *zonepb.ZoneConfig, placeholder *zonepb.ZoneConfig, cache bool, err error) 30 31 var ( 32 // ZoneConfigHook is a function used to lookup a zone config given a table 33 // or database ID. 34 // This is also used by testing to simplify fake configs. 35 ZoneConfigHook zoneConfigHook 36 37 // testingLargestIDHook is a function used to bypass GetLargestObjectID 38 // in tests. 39 testingLargestIDHook func(uint32) uint32 40 ) 41 42 type zoneEntry struct { 43 zone *zonepb.ZoneConfig 44 placeholder *zonepb.ZoneConfig 45 46 // combined merges the zone and placeholder configs into a combined config. 47 // If both have subzone information, the placeholder information is preferred. 48 // This may never happen, but while the existing code gives preference to the 49 // placeholder, there appear to be no guarantees that there can be no overlap. 50 // 51 // TODO(andyk): Use the combined value everywhere early in 19.2, so there's 52 // enough bake time to ensure this is OK to do. Until then, only use the 53 // combined value in GetZoneConfigForObject, which is only used by the 54 // optimizer. 55 combined *zonepb.ZoneConfig 56 } 57 58 // SystemConfig embeds a SystemConfigEntries message which contains an 59 // entry for every system descriptor (e.g. databases, tables, zone 60 // configs). It also has a map from object ID to unmarshaled zone 61 // config for caching. 62 // The shouldSplitCache caches information about the descriptor ID, 63 // saying whether or not it should be considered for splitting at all. 64 // A database descriptor or a table view descriptor are examples of IDs 65 // that should not be considered for splits. 66 type SystemConfig struct { 67 SystemConfigEntries 68 DefaultZoneConfig *zonepb.ZoneConfig 69 mu struct { 70 syncutil.RWMutex 71 zoneCache map[uint32]zoneEntry 72 shouldSplitCache map[uint32]bool 73 } 74 } 75 76 // NewSystemConfig returns an initialized instance of SystemConfig. 77 func NewSystemConfig(defaultZoneConfig *zonepb.ZoneConfig) *SystemConfig { 78 sc := &SystemConfig{} 79 sc.DefaultZoneConfig = defaultZoneConfig 80 sc.mu.zoneCache = map[uint32]zoneEntry{} 81 sc.mu.shouldSplitCache = map[uint32]bool{} 82 return sc 83 } 84 85 // Equal checks for equality. 86 // 87 // It assumes that s.Values and other.Values are sorted in key order. 88 func (s *SystemConfig) Equal(other *SystemConfigEntries) bool { 89 if len(s.Values) != len(other.Values) { 90 return false 91 } 92 for i := range s.Values { 93 leftKV, rightKV := s.Values[i], other.Values[i] 94 if !leftKV.Key.Equal(rightKV.Key) { 95 return false 96 } 97 leftVal, rightVal := leftKV.Value, rightKV.Value 98 if !leftVal.EqualData(rightVal) { 99 return false 100 } 101 if leftVal.Timestamp != rightVal.Timestamp { 102 return false 103 } 104 } 105 return true 106 } 107 108 // GetDesc looks for the descriptor value given a key, if a zone is created in 109 // a test without creating a Descriptor, a dummy descriptor is returned. 110 // If the key is invalid in decoding an ID, GetDesc panics. 111 func (s *SystemConfig) GetDesc(key roachpb.Key) *roachpb.Value { 112 if getVal := s.GetValue(key); getVal != nil { 113 return getVal 114 } 115 116 id, err := keys.TODOSQLCodec.DecodeDescMetadataID(key) 117 if err != nil { 118 // No ID found for key. No roachpb.Value corresponds to this key. 119 panic(err) 120 } 121 122 testingLock.Lock() 123 _, ok := testingZoneConfig[uint32(id)] 124 testingLock.Unlock() 125 126 if ok { 127 // A test installed a zone config for this ID, but no descriptor. 128 // Synthesize an empty descriptor to force split to occur, or else the 129 // zone config won't apply to any ranges. Most tests that use 130 // TestingSetZoneConfig are too low-level to create tables and zone 131 // configs through proper channels. 132 // 133 // Getting here outside tests is impossible. 134 var val roachpb.Value 135 if err := val.SetProto(sqlbase.WrapDescriptor(&sqlbase.TableDescriptor{})); err != nil { 136 panic(err) 137 } 138 return &val 139 } 140 return nil 141 } 142 143 // GetValue searches the kv list for 'key' and returns its 144 // roachpb.Value if found. 145 func (s *SystemConfig) GetValue(key roachpb.Key) *roachpb.Value { 146 if kv := s.get(key); kv != nil { 147 return &kv.Value 148 } 149 return nil 150 } 151 152 // get searches the kv list for 'key' and returns its roachpb.KeyValue 153 // if found. 154 func (s *SystemConfig) get(key roachpb.Key) *roachpb.KeyValue { 155 if index, found := s.GetIndex(key); found { 156 // TODO(marc): I'm pretty sure a Value returned by MVCCScan can 157 // never be nil. Should check. 158 return &s.Values[index] 159 } 160 return nil 161 } 162 163 // GetIndex searches the kv list for 'key' and returns its index if found. 164 func (s *SystemConfig) GetIndex(key roachpb.Key) (int, bool) { 165 l := len(s.Values) 166 index := sort.Search(l, func(i int) bool { 167 return bytes.Compare(s.Values[i].Key, key) >= 0 168 }) 169 if index == l || !key.Equal(s.Values[index].Key) { 170 return 0, false 171 } 172 return index, true 173 } 174 175 // GetLargestObjectID returns the largest object ID found in the config which is 176 // less than or equal to maxID. If maxID is 0, returns the largest ID in the 177 // config. 178 func (s *SystemConfig) GetLargestObjectID(maxID uint32) (uint32, error) { 179 testingLock.Lock() 180 hook := testingLargestIDHook 181 testingLock.Unlock() 182 if hook != nil { 183 return hook(maxID), nil 184 } 185 186 // Search for the descriptor table entries within the SystemConfig. 187 highBound := keys.TODOSQLCodec.TablePrefix(keys.DescriptorTableID + 1) 188 highIndex := sort.Search(len(s.Values), func(i int) bool { 189 return bytes.Compare(s.Values[i].Key, highBound) >= 0 190 }) 191 lowBound := keys.TODOSQLCodec.TablePrefix(keys.DescriptorTableID) 192 lowIndex := sort.Search(len(s.Values), func(i int) bool { 193 return bytes.Compare(s.Values[i].Key, lowBound) >= 0 194 }) 195 196 if highIndex == lowIndex { 197 return 0, fmt.Errorf("descriptor table not found in system config of %d values", len(s.Values)) 198 } 199 200 // No maximum specified; maximum ID is the last entry in the descriptor 201 // table. 202 if maxID == 0 { 203 id, err := keys.TODOSQLCodec.DecodeDescMetadataID(s.Values[highIndex-1].Key) 204 if err != nil { 205 return 0, err 206 } 207 return uint32(id), nil 208 } 209 210 // Maximum specified: need to search the descriptor table. Binary search 211 // through all descriptor table values to find the first descriptor with ID 212 // >= maxID. 213 searchSlice := s.Values[lowIndex:highIndex] 214 var err error 215 maxIdx := sort.Search(len(searchSlice), func(i int) bool { 216 var id uint64 217 id, err = keys.TODOSQLCodec.DecodeDescMetadataID(searchSlice[i].Key) 218 if err != nil { 219 return false 220 } 221 return uint32(id) >= maxID 222 }) 223 if err != nil { 224 return 0, err 225 } 226 227 // If we found an index within the list, maxIdx might point to a descriptor 228 // with exactly maxID. 229 if maxIdx < len(searchSlice) { 230 id, err := keys.TODOSQLCodec.DecodeDescMetadataID(searchSlice[maxIdx].Key) 231 if err != nil { 232 return 0, err 233 } 234 if uint32(id) == maxID { 235 return uint32(id), nil 236 } 237 } 238 239 if maxIdx == 0 { 240 return 0, fmt.Errorf("no descriptors present with ID < %d", maxID) 241 } 242 243 // Return ID of the immediately preceding descriptor. 244 id, err := keys.TODOSQLCodec.DecodeDescMetadataID(searchSlice[maxIdx-1].Key) 245 if err != nil { 246 return 0, err 247 } 248 return uint32(id), nil 249 } 250 251 // GetZoneConfigForKey looks up the zone config for the object (table 252 // or database, specified by key.id). It is the caller's 253 // responsibility to ensure that the range does not need to be split. 254 func (s *SystemConfig) GetZoneConfigForKey(key roachpb.RKey) (*zonepb.ZoneConfig, error) { 255 return s.getZoneConfigForKey(DecodeKeyIntoZoneIDAndSuffix(key)) 256 } 257 258 // DecodeKeyIntoZoneIDAndSuffix figures out the zone that the key belongs to. 259 func DecodeKeyIntoZoneIDAndSuffix(key roachpb.RKey) (id uint32, keySuffix []byte) { 260 objectID, keySuffix, ok := DecodeObjectID(key) 261 if !ok { 262 // Not in the structured data namespace. 263 objectID = keys.RootNamespaceID 264 } else if objectID <= keys.MaxSystemConfigDescID || isPseudoTableID(objectID) { 265 // For now, you cannot set the zone config on gossiped tables. The only 266 // way to set a zone config on these tables is to modify config for the 267 // system database as a whole. This is largely because all the 268 // "system config" tables are colocated in the same range by default and 269 // thus couldn't be managed separately. 270 // Furthermore pseudo-table ids should be considered to be a part of the 271 // system database as they aren't real tables. 272 objectID = keys.SystemDatabaseID 273 } 274 275 // Special-case known system ranges to their special zone configs. 276 if key.Equal(roachpb.RKeyMin) || bytes.HasPrefix(key, keys.Meta1Prefix) || bytes.HasPrefix(key, keys.Meta2Prefix) { 277 objectID = keys.MetaRangesID 278 } else if bytes.HasPrefix(key, keys.SystemPrefix) { 279 if bytes.HasPrefix(key, keys.NodeLivenessPrefix) { 280 objectID = keys.LivenessRangesID 281 } else if bytes.HasPrefix(key, keys.TimeseriesPrefix) { 282 objectID = keys.TimeseriesRangesID 283 } else { 284 objectID = keys.SystemRangesID 285 } 286 } 287 return objectID, keySuffix 288 } 289 290 // isPseudoTableID returns true if id is in keys.PseudoTableIDs. 291 func isPseudoTableID(id uint32) bool { 292 for _, pseudoTableID := range keys.PseudoTableIDs { 293 if id == pseudoTableID { 294 return true 295 } 296 } 297 return false 298 } 299 300 // GetZoneConfigForObject returns the combined zone config for the given object 301 // identifier. 302 // NOTE: any subzones from the zone placeholder will be automatically merged 303 // into the cached zone so the caller doesn't need special-case handling code. 304 func (s *SystemConfig) GetZoneConfigForObject(id uint32) (*zonepb.ZoneConfig, error) { 305 entry, err := s.getZoneEntry(id) 306 if err != nil { 307 return nil, err 308 } 309 return entry.combined, nil 310 } 311 312 // getZoneEntry returns the zone entry for the given object ID. In the fast 313 // path, the zone is already in the cache, and is directly returned. Otherwise, 314 // getZoneEntry will hydrate new zonepb.ZoneConfig(s) from the SystemConfig and install 315 // them as an entry in the cache. 316 func (s *SystemConfig) getZoneEntry(id uint32) (zoneEntry, error) { 317 s.mu.RLock() 318 entry, ok := s.mu.zoneCache[id] 319 s.mu.RUnlock() 320 if ok { 321 return entry, nil 322 } 323 testingLock.Lock() 324 hook := ZoneConfigHook 325 testingLock.Unlock() 326 zone, placeholder, cache, err := hook(s, id) 327 if err != nil { 328 return zoneEntry{}, err 329 } 330 if zone != nil { 331 entry := zoneEntry{zone: zone, placeholder: placeholder, combined: zone} 332 if placeholder != nil { 333 // Merge placeholder with zone by copying over subzone information. 334 // Placeholders should only define the Subzones and SubzoneSpans fields. 335 combined := *zone 336 combined.Subzones = placeholder.Subzones 337 combined.SubzoneSpans = placeholder.SubzoneSpans 338 entry.combined = &combined 339 } 340 341 if cache { 342 s.mu.Lock() 343 s.mu.zoneCache[id] = entry 344 s.mu.Unlock() 345 } 346 return entry, nil 347 } 348 return zoneEntry{}, nil 349 } 350 351 func (s *SystemConfig) getZoneConfigForKey( 352 id uint32, keySuffix []byte, 353 ) (*zonepb.ZoneConfig, error) { 354 entry, err := s.getZoneEntry(id) 355 if err != nil { 356 return nil, err 357 } 358 if entry.zone != nil { 359 if entry.placeholder != nil { 360 if subzone, _ := entry.placeholder.GetSubzoneForKeySuffix(keySuffix); subzone != nil { 361 if indexSubzone := entry.placeholder.GetSubzone(subzone.IndexID, ""); indexSubzone != nil { 362 subzone.Config.InheritFromParent(&indexSubzone.Config) 363 } 364 subzone.Config.InheritFromParent(entry.zone) 365 return &subzone.Config, nil 366 } 367 } else if subzone, _ := entry.zone.GetSubzoneForKeySuffix(keySuffix); subzone != nil { 368 if indexSubzone := entry.zone.GetSubzone(subzone.IndexID, ""); indexSubzone != nil { 369 subzone.Config.InheritFromParent(&indexSubzone.Config) 370 } 371 subzone.Config.InheritFromParent(entry.zone) 372 return &subzone.Config, nil 373 } 374 return entry.zone, nil 375 } 376 return s.DefaultZoneConfig, nil 377 } 378 379 var staticSplits = []roachpb.RKey{ 380 roachpb.RKey(keys.NodeLivenessPrefix), // end of meta records / start of node liveness span 381 roachpb.RKey(keys.NodeLivenessKeyMax), // end of node liveness span 382 roachpb.RKey(keys.TimeseriesPrefix), // start of timeseries span 383 roachpb.RKey(keys.TimeseriesPrefix.PrefixEnd()), // end of timeseries span 384 roachpb.RKey(keys.TableDataMin), // end of system ranges / start of system config tables 385 } 386 387 // StaticSplits are predefined split points in the system keyspace. 388 // Corresponding ranges are created at cluster bootstrap time. 389 // 390 // There are two reasons for a static split. First, spans that are critical to 391 // cluster stability, like the node liveness span, are split into their own 392 // ranges to ease debugging (see #17297). Second, spans in the system keyspace 393 // that can be targeted by zone configs, like the meta span and the timeseries 394 // span, are split off into their own ranges because zone configs cannot apply 395 // to fractions of a range. 396 // 397 // Note that these are not the only splits created at cluster bootstrap; splits 398 // between various system tables are also created. 399 func StaticSplits() []roachpb.RKey { 400 return staticSplits 401 } 402 403 // ComputeSplitKey takes a start and end key and returns the first key at which 404 // to split the span [start, end). Returns nil if no splits are required. 405 // 406 // Splits are required between user tables (i.e. /table/<id>), at the start 407 // of the system-config tables (i.e. /table/0), and at certain points within the 408 // system ranges that come before the system tables. The system-config range is 409 // somewhat special in that it can contain multiple SQL tables 410 // (/table/0-/table/<max-system-config-desc>) within a single range. 411 func (s *SystemConfig) ComputeSplitKey(startKey, endKey roachpb.RKey) (rr roachpb.RKey) { 412 // Before dealing with splits necessitated by SQL tables, handle all of the 413 // static splits earlier in the keyspace. Note that this list must be kept in 414 // the proper order (ascending in the keyspace) for the logic below to work. 415 // 416 // For new clusters, the static splits correspond to ranges created at 417 // bootstrap time. Older stores might be used with a version with more 418 // staticSplits though, in which case this code is useful. 419 for _, split := range staticSplits { 420 if startKey.Less(split) { 421 if split.Less(endKey) { 422 // The split point is contained within [startKey, endKey), so we need to 423 // create the split. 424 return split 425 } 426 // [startKey, endKey) is contained between the previous split point and 427 // this split point. 428 return nil 429 } 430 // [startKey, endKey) is somewhere greater than this split point. Continue. 431 } 432 433 // If the above iteration over the static split points didn't decide anything, 434 // the key range must be somewhere in the SQL table part of the keyspace. 435 startID, _, ok := DecodeObjectID(startKey) 436 if !ok || startID <= keys.MaxSystemConfigDescID { 437 // The start key is either: 438 // - not part of the structured data span 439 // - part of the system span 440 // In either case, start looking for splits at the first ID usable 441 // by the user data span. 442 startID = keys.MaxSystemConfigDescID + 1 443 } 444 445 // Build key prefixes for sequential table IDs until we reach endKey. Note 446 // that there are two disjoint sets of sequential keys: non-system reserved 447 // tables have sequential IDs, as do user tables, but the two ranges contain a 448 // gap. 449 450 // findSplitKey returns the first possible split key between the given range 451 // of IDs. 452 findSplitKey := func(startID, endID uint32) roachpb.RKey { 453 // endID could be smaller than startID if we don't have user tables. 454 for id := startID; id <= endID; id++ { 455 tableKey := roachpb.RKey(keys.TODOSQLCodec.TablePrefix(id)) 456 // This logic is analogous to the well-commented static split logic above. 457 if startKey.Less(tableKey) && s.shouldSplit(id) { 458 if tableKey.Less(endKey) { 459 return tableKey 460 } 461 return nil 462 } 463 464 zoneVal := s.GetValue(MakeZoneKey(id)) 465 if zoneVal == nil { 466 continue 467 } 468 var zone zonepb.ZoneConfig 469 if err := zoneVal.GetProto(&zone); err != nil { 470 // An error while decoding the zone proto is unfortunate, but logging a 471 // message here would be excessively spammy. Just move on, which 472 // effectively assumes there are no subzones for this table. 473 continue 474 } 475 // This logic is analogous to the well-commented static split logic above. 476 for _, s := range zone.SubzoneSplits() { 477 subzoneKey := append(tableKey, s...) 478 if startKey.Less(subzoneKey) { 479 if subzoneKey.Less(endKey) { 480 return subzoneKey 481 } 482 return nil 483 } 484 } 485 } 486 return nil 487 } 488 489 // If the startKey falls within the non-system reserved range, compute those 490 // keys first. 491 if startID <= keys.MaxReservedDescID { 492 endID, err := s.GetLargestObjectID(keys.MaxReservedDescID) 493 if err != nil { 494 log.Errorf(context.TODO(), "unable to determine largest reserved object ID from system config: %s", err) 495 return nil 496 } 497 if splitKey := findSplitKey(startID, endID); splitKey != nil { 498 return splitKey 499 } 500 startID = keys.MaxReservedDescID + 1 501 } 502 503 // Find the split key in the user space. 504 endID, err := s.GetLargestObjectID(0) 505 if err != nil { 506 log.Errorf(context.TODO(), "unable to determine largest object ID from system config: %s", err) 507 return nil 508 } 509 return findSplitKey(startID, endID) 510 } 511 512 // NeedsSplit returns whether the range [startKey, endKey) needs a split due 513 // to zone configs. 514 func (s *SystemConfig) NeedsSplit(startKey, endKey roachpb.RKey) bool { 515 return len(s.ComputeSplitKey(startKey, endKey)) > 0 516 } 517 518 // shouldSplit checks if the ID is eligible for a split at all. 519 // It uses the internal cache to find a value, and tries to find 520 // it using the hook if ID isn't found in the cache. 521 func (s *SystemConfig) shouldSplit(ID uint32) bool { 522 // Check the cache. 523 { 524 s.mu.RLock() 525 shouldSplit, ok := s.mu.shouldSplitCache[ID] 526 s.mu.RUnlock() 527 if ok { 528 return shouldSplit 529 } 530 } 531 532 var shouldSplit bool 533 if ID < keys.MinUserDescID { 534 // The ID might be one of the reserved IDs that refer to ranges but not any 535 // actual descriptors. 536 shouldSplit = true 537 } else { 538 desc := s.GetDesc(keys.TODOSQLCodec.DescMetadataKey(ID)) 539 shouldSplit = desc != nil && sqlbase.ShouldSplitAtDesc(desc) 540 } 541 // Populate the cache. 542 s.mu.Lock() 543 s.mu.shouldSplitCache[ID] = shouldSplit 544 s.mu.Unlock() 545 return shouldSplit 546 }