github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/config/system.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package config
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"sort"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/config/zonepb"
    20  	"github.com/cockroachdb/cockroach/pkg/keys"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    25  )
    26  
    27  type zoneConfigHook func(
    28  	sysCfg *SystemConfig, objectID uint32,
    29  ) (zone *zonepb.ZoneConfig, placeholder *zonepb.ZoneConfig, cache bool, err error)
    30  
    31  var (
    32  	// ZoneConfigHook is a function used to lookup a zone config given a table
    33  	// or database ID.
    34  	// This is also used by testing to simplify fake configs.
    35  	ZoneConfigHook zoneConfigHook
    36  
    37  	// testingLargestIDHook is a function used to bypass GetLargestObjectID
    38  	// in tests.
    39  	testingLargestIDHook func(uint32) uint32
    40  )
    41  
    42  type zoneEntry struct {
    43  	zone        *zonepb.ZoneConfig
    44  	placeholder *zonepb.ZoneConfig
    45  
    46  	// combined merges the zone and placeholder configs into a combined config.
    47  	// If both have subzone information, the placeholder information is preferred.
    48  	// This may never happen, but while the existing code gives preference to the
    49  	// placeholder, there appear to be no guarantees that there can be no overlap.
    50  	//
    51  	// TODO(andyk): Use the combined value everywhere early in 19.2, so there's
    52  	// enough bake time to ensure this is OK to do. Until then, only use the
    53  	// combined value in GetZoneConfigForObject, which is only used by the
    54  	// optimizer.
    55  	combined *zonepb.ZoneConfig
    56  }
    57  
    58  // SystemConfig embeds a SystemConfigEntries message which contains an
    59  // entry for every system descriptor (e.g. databases, tables, zone
    60  // configs). It also has a map from object ID to unmarshaled zone
    61  // config for caching.
    62  // The shouldSplitCache caches information about the descriptor ID,
    63  // saying whether or not it should be considered for splitting at all.
    64  // A database descriptor or a table view descriptor are examples of IDs
    65  // that should not be considered for splits.
    66  type SystemConfig struct {
    67  	SystemConfigEntries
    68  	DefaultZoneConfig *zonepb.ZoneConfig
    69  	mu                struct {
    70  		syncutil.RWMutex
    71  		zoneCache        map[uint32]zoneEntry
    72  		shouldSplitCache map[uint32]bool
    73  	}
    74  }
    75  
    76  // NewSystemConfig returns an initialized instance of SystemConfig.
    77  func NewSystemConfig(defaultZoneConfig *zonepb.ZoneConfig) *SystemConfig {
    78  	sc := &SystemConfig{}
    79  	sc.DefaultZoneConfig = defaultZoneConfig
    80  	sc.mu.zoneCache = map[uint32]zoneEntry{}
    81  	sc.mu.shouldSplitCache = map[uint32]bool{}
    82  	return sc
    83  }
    84  
    85  // Equal checks for equality.
    86  //
    87  // It assumes that s.Values and other.Values are sorted in key order.
    88  func (s *SystemConfig) Equal(other *SystemConfigEntries) bool {
    89  	if len(s.Values) != len(other.Values) {
    90  		return false
    91  	}
    92  	for i := range s.Values {
    93  		leftKV, rightKV := s.Values[i], other.Values[i]
    94  		if !leftKV.Key.Equal(rightKV.Key) {
    95  			return false
    96  		}
    97  		leftVal, rightVal := leftKV.Value, rightKV.Value
    98  		if !leftVal.EqualData(rightVal) {
    99  			return false
   100  		}
   101  		if leftVal.Timestamp != rightVal.Timestamp {
   102  			return false
   103  		}
   104  	}
   105  	return true
   106  }
   107  
   108  // GetDesc looks for the descriptor value given a key, if a zone is created in
   109  // a test without creating a Descriptor, a dummy descriptor is returned.
   110  // If the key is invalid in decoding an ID, GetDesc panics.
   111  func (s *SystemConfig) GetDesc(key roachpb.Key) *roachpb.Value {
   112  	if getVal := s.GetValue(key); getVal != nil {
   113  		return getVal
   114  	}
   115  
   116  	id, err := keys.TODOSQLCodec.DecodeDescMetadataID(key)
   117  	if err != nil {
   118  		// No ID found for key. No roachpb.Value corresponds to this key.
   119  		panic(err)
   120  	}
   121  
   122  	testingLock.Lock()
   123  	_, ok := testingZoneConfig[uint32(id)]
   124  	testingLock.Unlock()
   125  
   126  	if ok {
   127  		// A test installed a zone config for this ID, but no descriptor.
   128  		// Synthesize an empty descriptor to force split to occur, or else the
   129  		// zone config won't apply to any ranges. Most tests that use
   130  		// TestingSetZoneConfig are too low-level to create tables and zone
   131  		// configs through proper channels.
   132  		//
   133  		// Getting here outside tests is impossible.
   134  		var val roachpb.Value
   135  		if err := val.SetProto(sqlbase.WrapDescriptor(&sqlbase.TableDescriptor{})); err != nil {
   136  			panic(err)
   137  		}
   138  		return &val
   139  	}
   140  	return nil
   141  }
   142  
   143  // GetValue searches the kv list for 'key' and returns its
   144  // roachpb.Value if found.
   145  func (s *SystemConfig) GetValue(key roachpb.Key) *roachpb.Value {
   146  	if kv := s.get(key); kv != nil {
   147  		return &kv.Value
   148  	}
   149  	return nil
   150  }
   151  
   152  // get searches the kv list for 'key' and returns its roachpb.KeyValue
   153  // if found.
   154  func (s *SystemConfig) get(key roachpb.Key) *roachpb.KeyValue {
   155  	if index, found := s.GetIndex(key); found {
   156  		// TODO(marc): I'm pretty sure a Value returned by MVCCScan can
   157  		// never be nil. Should check.
   158  		return &s.Values[index]
   159  	}
   160  	return nil
   161  }
   162  
   163  // GetIndex searches the kv list for 'key' and returns its index if found.
   164  func (s *SystemConfig) GetIndex(key roachpb.Key) (int, bool) {
   165  	l := len(s.Values)
   166  	index := sort.Search(l, func(i int) bool {
   167  		return bytes.Compare(s.Values[i].Key, key) >= 0
   168  	})
   169  	if index == l || !key.Equal(s.Values[index].Key) {
   170  		return 0, false
   171  	}
   172  	return index, true
   173  }
   174  
   175  // GetLargestObjectID returns the largest object ID found in the config which is
   176  // less than or equal to maxID. If maxID is 0, returns the largest ID in the
   177  // config.
   178  func (s *SystemConfig) GetLargestObjectID(maxID uint32) (uint32, error) {
   179  	testingLock.Lock()
   180  	hook := testingLargestIDHook
   181  	testingLock.Unlock()
   182  	if hook != nil {
   183  		return hook(maxID), nil
   184  	}
   185  
   186  	// Search for the descriptor table entries within the SystemConfig.
   187  	highBound := keys.TODOSQLCodec.TablePrefix(keys.DescriptorTableID + 1)
   188  	highIndex := sort.Search(len(s.Values), func(i int) bool {
   189  		return bytes.Compare(s.Values[i].Key, highBound) >= 0
   190  	})
   191  	lowBound := keys.TODOSQLCodec.TablePrefix(keys.DescriptorTableID)
   192  	lowIndex := sort.Search(len(s.Values), func(i int) bool {
   193  		return bytes.Compare(s.Values[i].Key, lowBound) >= 0
   194  	})
   195  
   196  	if highIndex == lowIndex {
   197  		return 0, fmt.Errorf("descriptor table not found in system config of %d values", len(s.Values))
   198  	}
   199  
   200  	// No maximum specified; maximum ID is the last entry in the descriptor
   201  	// table.
   202  	if maxID == 0 {
   203  		id, err := keys.TODOSQLCodec.DecodeDescMetadataID(s.Values[highIndex-1].Key)
   204  		if err != nil {
   205  			return 0, err
   206  		}
   207  		return uint32(id), nil
   208  	}
   209  
   210  	// Maximum specified: need to search the descriptor table.  Binary search
   211  	// through all descriptor table values to find the first descriptor with ID
   212  	// >= maxID.
   213  	searchSlice := s.Values[lowIndex:highIndex]
   214  	var err error
   215  	maxIdx := sort.Search(len(searchSlice), func(i int) bool {
   216  		var id uint64
   217  		id, err = keys.TODOSQLCodec.DecodeDescMetadataID(searchSlice[i].Key)
   218  		if err != nil {
   219  			return false
   220  		}
   221  		return uint32(id) >= maxID
   222  	})
   223  	if err != nil {
   224  		return 0, err
   225  	}
   226  
   227  	// If we found an index within the list, maxIdx might point to a descriptor
   228  	// with exactly maxID.
   229  	if maxIdx < len(searchSlice) {
   230  		id, err := keys.TODOSQLCodec.DecodeDescMetadataID(searchSlice[maxIdx].Key)
   231  		if err != nil {
   232  			return 0, err
   233  		}
   234  		if uint32(id) == maxID {
   235  			return uint32(id), nil
   236  		}
   237  	}
   238  
   239  	if maxIdx == 0 {
   240  		return 0, fmt.Errorf("no descriptors present with ID < %d", maxID)
   241  	}
   242  
   243  	// Return ID of the immediately preceding descriptor.
   244  	id, err := keys.TODOSQLCodec.DecodeDescMetadataID(searchSlice[maxIdx-1].Key)
   245  	if err != nil {
   246  		return 0, err
   247  	}
   248  	return uint32(id), nil
   249  }
   250  
   251  // GetZoneConfigForKey looks up the zone config for the object (table
   252  // or database, specified by key.id). It is the caller's
   253  // responsibility to ensure that the range does not need to be split.
   254  func (s *SystemConfig) GetZoneConfigForKey(key roachpb.RKey) (*zonepb.ZoneConfig, error) {
   255  	return s.getZoneConfigForKey(DecodeKeyIntoZoneIDAndSuffix(key))
   256  }
   257  
   258  // DecodeKeyIntoZoneIDAndSuffix figures out the zone that the key belongs to.
   259  func DecodeKeyIntoZoneIDAndSuffix(key roachpb.RKey) (id uint32, keySuffix []byte) {
   260  	objectID, keySuffix, ok := DecodeObjectID(key)
   261  	if !ok {
   262  		// Not in the structured data namespace.
   263  		objectID = keys.RootNamespaceID
   264  	} else if objectID <= keys.MaxSystemConfigDescID || isPseudoTableID(objectID) {
   265  		// For now, you cannot set the zone config on gossiped tables. The only
   266  		// way to set a zone config on these tables is to modify config for the
   267  		// system database as a whole. This is largely because all the
   268  		// "system config" tables are colocated in the same range by default and
   269  		// thus couldn't be managed separately.
   270  		// Furthermore pseudo-table ids should be considered to be a part of the
   271  		// system database as they aren't real tables.
   272  		objectID = keys.SystemDatabaseID
   273  	}
   274  
   275  	// Special-case known system ranges to their special zone configs.
   276  	if key.Equal(roachpb.RKeyMin) || bytes.HasPrefix(key, keys.Meta1Prefix) || bytes.HasPrefix(key, keys.Meta2Prefix) {
   277  		objectID = keys.MetaRangesID
   278  	} else if bytes.HasPrefix(key, keys.SystemPrefix) {
   279  		if bytes.HasPrefix(key, keys.NodeLivenessPrefix) {
   280  			objectID = keys.LivenessRangesID
   281  		} else if bytes.HasPrefix(key, keys.TimeseriesPrefix) {
   282  			objectID = keys.TimeseriesRangesID
   283  		} else {
   284  			objectID = keys.SystemRangesID
   285  		}
   286  	}
   287  	return objectID, keySuffix
   288  }
   289  
   290  // isPseudoTableID returns true if id is in keys.PseudoTableIDs.
   291  func isPseudoTableID(id uint32) bool {
   292  	for _, pseudoTableID := range keys.PseudoTableIDs {
   293  		if id == pseudoTableID {
   294  			return true
   295  		}
   296  	}
   297  	return false
   298  }
   299  
   300  // GetZoneConfigForObject returns the combined zone config for the given object
   301  // identifier.
   302  // NOTE: any subzones from the zone placeholder will be automatically merged
   303  // into the cached zone so the caller doesn't need special-case handling code.
   304  func (s *SystemConfig) GetZoneConfigForObject(id uint32) (*zonepb.ZoneConfig, error) {
   305  	entry, err := s.getZoneEntry(id)
   306  	if err != nil {
   307  		return nil, err
   308  	}
   309  	return entry.combined, nil
   310  }
   311  
   312  // getZoneEntry returns the zone entry for the given object ID. In the fast
   313  // path, the zone is already in the cache, and is directly returned. Otherwise,
   314  // getZoneEntry will hydrate new zonepb.ZoneConfig(s) from the SystemConfig and install
   315  // them as an entry in the cache.
   316  func (s *SystemConfig) getZoneEntry(id uint32) (zoneEntry, error) {
   317  	s.mu.RLock()
   318  	entry, ok := s.mu.zoneCache[id]
   319  	s.mu.RUnlock()
   320  	if ok {
   321  		return entry, nil
   322  	}
   323  	testingLock.Lock()
   324  	hook := ZoneConfigHook
   325  	testingLock.Unlock()
   326  	zone, placeholder, cache, err := hook(s, id)
   327  	if err != nil {
   328  		return zoneEntry{}, err
   329  	}
   330  	if zone != nil {
   331  		entry := zoneEntry{zone: zone, placeholder: placeholder, combined: zone}
   332  		if placeholder != nil {
   333  			// Merge placeholder with zone by copying over subzone information.
   334  			// Placeholders should only define the Subzones and SubzoneSpans fields.
   335  			combined := *zone
   336  			combined.Subzones = placeholder.Subzones
   337  			combined.SubzoneSpans = placeholder.SubzoneSpans
   338  			entry.combined = &combined
   339  		}
   340  
   341  		if cache {
   342  			s.mu.Lock()
   343  			s.mu.zoneCache[id] = entry
   344  			s.mu.Unlock()
   345  		}
   346  		return entry, nil
   347  	}
   348  	return zoneEntry{}, nil
   349  }
   350  
   351  func (s *SystemConfig) getZoneConfigForKey(
   352  	id uint32, keySuffix []byte,
   353  ) (*zonepb.ZoneConfig, error) {
   354  	entry, err := s.getZoneEntry(id)
   355  	if err != nil {
   356  		return nil, err
   357  	}
   358  	if entry.zone != nil {
   359  		if entry.placeholder != nil {
   360  			if subzone, _ := entry.placeholder.GetSubzoneForKeySuffix(keySuffix); subzone != nil {
   361  				if indexSubzone := entry.placeholder.GetSubzone(subzone.IndexID, ""); indexSubzone != nil {
   362  					subzone.Config.InheritFromParent(&indexSubzone.Config)
   363  				}
   364  				subzone.Config.InheritFromParent(entry.zone)
   365  				return &subzone.Config, nil
   366  			}
   367  		} else if subzone, _ := entry.zone.GetSubzoneForKeySuffix(keySuffix); subzone != nil {
   368  			if indexSubzone := entry.zone.GetSubzone(subzone.IndexID, ""); indexSubzone != nil {
   369  				subzone.Config.InheritFromParent(&indexSubzone.Config)
   370  			}
   371  			subzone.Config.InheritFromParent(entry.zone)
   372  			return &subzone.Config, nil
   373  		}
   374  		return entry.zone, nil
   375  	}
   376  	return s.DefaultZoneConfig, nil
   377  }
   378  
   379  var staticSplits = []roachpb.RKey{
   380  	roachpb.RKey(keys.NodeLivenessPrefix),           // end of meta records / start of node liveness span
   381  	roachpb.RKey(keys.NodeLivenessKeyMax),           // end of node liveness span
   382  	roachpb.RKey(keys.TimeseriesPrefix),             // start of timeseries span
   383  	roachpb.RKey(keys.TimeseriesPrefix.PrefixEnd()), // end of timeseries span
   384  	roachpb.RKey(keys.TableDataMin),                 // end of system ranges / start of system config tables
   385  }
   386  
   387  // StaticSplits are predefined split points in the system keyspace.
   388  // Corresponding ranges are created at cluster bootstrap time.
   389  //
   390  // There are two reasons for a static split. First, spans that are critical to
   391  // cluster stability, like the node liveness span, are split into their own
   392  // ranges to ease debugging (see #17297). Second, spans in the system keyspace
   393  // that can be targeted by zone configs, like the meta span and the timeseries
   394  // span, are split off into their own ranges because zone configs cannot apply
   395  // to fractions of a range.
   396  //
   397  // Note that these are not the only splits created at cluster bootstrap; splits
   398  // between various system tables are also created.
   399  func StaticSplits() []roachpb.RKey {
   400  	return staticSplits
   401  }
   402  
   403  // ComputeSplitKey takes a start and end key and returns the first key at which
   404  // to split the span [start, end). Returns nil if no splits are required.
   405  //
   406  // Splits are required between user tables (i.e. /table/<id>), at the start
   407  // of the system-config tables (i.e. /table/0), and at certain points within the
   408  // system ranges that come before the system tables. The system-config range is
   409  // somewhat special in that it can contain multiple SQL tables
   410  // (/table/0-/table/<max-system-config-desc>) within a single range.
   411  func (s *SystemConfig) ComputeSplitKey(startKey, endKey roachpb.RKey) (rr roachpb.RKey) {
   412  	// Before dealing with splits necessitated by SQL tables, handle all of the
   413  	// static splits earlier in the keyspace. Note that this list must be kept in
   414  	// the proper order (ascending in the keyspace) for the logic below to work.
   415  	//
   416  	// For new clusters, the static splits correspond to ranges created at
   417  	// bootstrap time. Older stores might be used with a version with more
   418  	// staticSplits though, in which case this code is useful.
   419  	for _, split := range staticSplits {
   420  		if startKey.Less(split) {
   421  			if split.Less(endKey) {
   422  				// The split point is contained within [startKey, endKey), so we need to
   423  				// create the split.
   424  				return split
   425  			}
   426  			// [startKey, endKey) is contained between the previous split point and
   427  			// this split point.
   428  			return nil
   429  		}
   430  		// [startKey, endKey) is somewhere greater than this split point. Continue.
   431  	}
   432  
   433  	// If the above iteration over the static split points didn't decide anything,
   434  	// the key range must be somewhere in the SQL table part of the keyspace.
   435  	startID, _, ok := DecodeObjectID(startKey)
   436  	if !ok || startID <= keys.MaxSystemConfigDescID {
   437  		// The start key is either:
   438  		// - not part of the structured data span
   439  		// - part of the system span
   440  		// In either case, start looking for splits at the first ID usable
   441  		// by the user data span.
   442  		startID = keys.MaxSystemConfigDescID + 1
   443  	}
   444  
   445  	// Build key prefixes for sequential table IDs until we reach endKey. Note
   446  	// that there are two disjoint sets of sequential keys: non-system reserved
   447  	// tables have sequential IDs, as do user tables, but the two ranges contain a
   448  	// gap.
   449  
   450  	// findSplitKey returns the first possible split key between the given range
   451  	// of IDs.
   452  	findSplitKey := func(startID, endID uint32) roachpb.RKey {
   453  		// endID could be smaller than startID if we don't have user tables.
   454  		for id := startID; id <= endID; id++ {
   455  			tableKey := roachpb.RKey(keys.TODOSQLCodec.TablePrefix(id))
   456  			// This logic is analogous to the well-commented static split logic above.
   457  			if startKey.Less(tableKey) && s.shouldSplit(id) {
   458  				if tableKey.Less(endKey) {
   459  					return tableKey
   460  				}
   461  				return nil
   462  			}
   463  
   464  			zoneVal := s.GetValue(MakeZoneKey(id))
   465  			if zoneVal == nil {
   466  				continue
   467  			}
   468  			var zone zonepb.ZoneConfig
   469  			if err := zoneVal.GetProto(&zone); err != nil {
   470  				// An error while decoding the zone proto is unfortunate, but logging a
   471  				// message here would be excessively spammy. Just move on, which
   472  				// effectively assumes there are no subzones for this table.
   473  				continue
   474  			}
   475  			// This logic is analogous to the well-commented static split logic above.
   476  			for _, s := range zone.SubzoneSplits() {
   477  				subzoneKey := append(tableKey, s...)
   478  				if startKey.Less(subzoneKey) {
   479  					if subzoneKey.Less(endKey) {
   480  						return subzoneKey
   481  					}
   482  					return nil
   483  				}
   484  			}
   485  		}
   486  		return nil
   487  	}
   488  
   489  	// If the startKey falls within the non-system reserved range, compute those
   490  	// keys first.
   491  	if startID <= keys.MaxReservedDescID {
   492  		endID, err := s.GetLargestObjectID(keys.MaxReservedDescID)
   493  		if err != nil {
   494  			log.Errorf(context.TODO(), "unable to determine largest reserved object ID from system config: %s", err)
   495  			return nil
   496  		}
   497  		if splitKey := findSplitKey(startID, endID); splitKey != nil {
   498  			return splitKey
   499  		}
   500  		startID = keys.MaxReservedDescID + 1
   501  	}
   502  
   503  	// Find the split key in the user space.
   504  	endID, err := s.GetLargestObjectID(0)
   505  	if err != nil {
   506  		log.Errorf(context.TODO(), "unable to determine largest object ID from system config: %s", err)
   507  		return nil
   508  	}
   509  	return findSplitKey(startID, endID)
   510  }
   511  
   512  // NeedsSplit returns whether the range [startKey, endKey) needs a split due
   513  // to zone configs.
   514  func (s *SystemConfig) NeedsSplit(startKey, endKey roachpb.RKey) bool {
   515  	return len(s.ComputeSplitKey(startKey, endKey)) > 0
   516  }
   517  
   518  // shouldSplit checks if the ID is eligible for a split at all.
   519  // It uses the internal cache to find a value, and tries to find
   520  // it using the hook if ID isn't found in the cache.
   521  func (s *SystemConfig) shouldSplit(ID uint32) bool {
   522  	// Check the cache.
   523  	{
   524  		s.mu.RLock()
   525  		shouldSplit, ok := s.mu.shouldSplitCache[ID]
   526  		s.mu.RUnlock()
   527  		if ok {
   528  			return shouldSplit
   529  		}
   530  	}
   531  
   532  	var shouldSplit bool
   533  	if ID < keys.MinUserDescID {
   534  		// The ID might be one of the reserved IDs that refer to ranges but not any
   535  		// actual descriptors.
   536  		shouldSplit = true
   537  	} else {
   538  		desc := s.GetDesc(keys.TODOSQLCodec.DescMetadataKey(ID))
   539  		shouldSplit = desc != nil && sqlbase.ShouldSplitAtDesc(desc)
   540  	}
   541  	// Populate the cache.
   542  	s.mu.Lock()
   543  	s.mu.shouldSplitCache[ID] = shouldSplit
   544  	s.mu.Unlock()
   545  	return shouldSplit
   546  }