github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/stores.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"unsafe"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/clusterversion"
    19  	"github.com/cockroachdb/cockroach/pkg/gossip"
    20  	"github.com/cockroachdb/cockroach/pkg/keys"
    21  	"github.com/cockroachdb/cockroach/pkg/kv"
    22  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    23  	"github.com/cockroachdb/cockroach/pkg/storage"
    24  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    25  	"github.com/cockroachdb/cockroach/pkg/util/log"
    26  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    27  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    28  	"github.com/cockroachdb/errors"
    29  )
    30  
    31  // Stores provides methods to access a collection of stores. There's
    32  // a visitor pattern and also an implementation of the client.Sender
    33  // interface which directs a call to the appropriate store based on
    34  // the call's key range. Stores also implements the gossip.Storage
    35  // interface, which allows gossip bootstrap information to be
    36  // persisted consistently to every store and the most recent bootstrap
    37  // information to be read at node startup.
    38  type Stores struct {
    39  	log.AmbientContext
    40  	clock    *hlc.Clock
    41  	storeMap syncutil.IntMap // map[roachpb.StoreID]*Store
    42  
    43  	mu struct {
    44  		syncutil.Mutex
    45  		biLatestTS hlc.Timestamp         // Timestamp of gossip bootstrap info
    46  		latestBI   *gossip.BootstrapInfo // Latest cached bootstrap info
    47  	}
    48  }
    49  
    50  var _ kv.Sender = &Stores{}      // Stores implements the client.Sender interface
    51  var _ gossip.Storage = &Stores{} // Stores implements the gossip.Storage interface
    52  
    53  // NewStores returns a local-only sender which directly accesses
    54  // a collection of stores.
    55  func NewStores(ambient log.AmbientContext, clock *hlc.Clock) *Stores {
    56  	return &Stores{
    57  		AmbientContext: ambient,
    58  		clock:          clock,
    59  	}
    60  }
    61  
    62  // IsMeta1Leaseholder returns whether the specified stores owns
    63  // the meta1 lease. Returns an error if any.
    64  func (ls *Stores) IsMeta1Leaseholder(now hlc.Timestamp) (bool, error) {
    65  	repl, _, err := ls.GetReplicaForRangeID(1)
    66  	if roachpb.IsRangeNotFoundError(err) {
    67  		return false, nil
    68  	}
    69  	if err != nil {
    70  		return false, err
    71  	}
    72  	return repl.OwnsValidLease(now), nil
    73  }
    74  
    75  // GetStoreCount returns the number of stores this node is exporting.
    76  func (ls *Stores) GetStoreCount() int {
    77  	var count int
    78  	ls.storeMap.Range(func(_ int64, _ unsafe.Pointer) bool {
    79  		count++
    80  		return true
    81  	})
    82  	return count
    83  }
    84  
    85  // HasStore returns true if the specified store is owned by this Stores.
    86  func (ls *Stores) HasStore(storeID roachpb.StoreID) bool {
    87  	_, ok := ls.storeMap.Load(int64(storeID))
    88  	return ok
    89  }
    90  
    91  // GetStore looks up the store by store ID. Returns an error
    92  // if not found.
    93  func (ls *Stores) GetStore(storeID roachpb.StoreID) (*Store, error) {
    94  	if value, ok := ls.storeMap.Load(int64(storeID)); ok {
    95  		return (*Store)(value), nil
    96  	}
    97  	return nil, roachpb.NewStoreNotFoundError(storeID)
    98  }
    99  
   100  // AddStore adds the specified store to the store map.
   101  func (ls *Stores) AddStore(s *Store) {
   102  	if _, loaded := ls.storeMap.LoadOrStore(int64(s.Ident.StoreID), unsafe.Pointer(s)); loaded {
   103  		panic(fmt.Sprintf("cannot add store twice: %+v", s.Ident))
   104  	}
   105  	// If we've already read the gossip bootstrap info, ensure that
   106  	// all stores have the most recent values.
   107  	ls.mu.Lock()
   108  	defer ls.mu.Unlock()
   109  	if ls.mu.biLatestTS != (hlc.Timestamp{}) {
   110  		if err := ls.updateBootstrapInfoLocked(ls.mu.latestBI); err != nil {
   111  			ctx := ls.AnnotateCtx(context.TODO())
   112  			log.Errorf(ctx, "failed to update bootstrap info on newly added store: %+v", err)
   113  		}
   114  	}
   115  }
   116  
   117  // RemoveStore removes the specified store from the store map.
   118  func (ls *Stores) RemoveStore(s *Store) {
   119  	ls.storeMap.Delete(int64(s.Ident.StoreID))
   120  }
   121  
   122  // VisitStores implements a visitor pattern over stores in the
   123  // storeMap. The specified function is invoked with each store in
   124  // turn. Care is taken to invoke the visitor func without the lock
   125  // held to avoid inconsistent lock orderings, as some visitor
   126  // functions may call back into the Stores object. Stores are visited
   127  // in random order.
   128  func (ls *Stores) VisitStores(visitor func(s *Store) error) error {
   129  	var err error
   130  	ls.storeMap.Range(func(k int64, v unsafe.Pointer) bool {
   131  		err = visitor((*Store)(v))
   132  		return err == nil
   133  	})
   134  	return err
   135  }
   136  
   137  // GetReplicaForRangeID returns the replica and store which contains the
   138  // specified range. If the replica is not found on any store then
   139  // roachpb.RangeNotFoundError will be returned.
   140  func (ls *Stores) GetReplicaForRangeID(
   141  	rangeID roachpb.RangeID,
   142  ) (replica *Replica, store *Store, err error) {
   143  	err = ls.VisitStores(func(s *Store) error {
   144  		r, err := s.GetReplica(rangeID)
   145  		if err == nil {
   146  			replica, store = r, s
   147  			return nil
   148  		}
   149  		if errors.HasType(err, (*roachpb.RangeNotFoundError)(nil)) {
   150  			return nil
   151  		}
   152  		return err
   153  	})
   154  	if err != nil {
   155  		return nil, nil, err
   156  	}
   157  	if replica == nil {
   158  		return nil, nil, roachpb.NewRangeNotFoundError(rangeID, 0)
   159  	}
   160  	return replica, store, nil
   161  }
   162  
   163  // Send implements the client.Sender interface. The store is looked up from the
   164  // store map using the ID specified in the request.
   165  func (ls *Stores) Send(
   166  	ctx context.Context, ba roachpb.BatchRequest,
   167  ) (*roachpb.BatchResponse, *roachpb.Error) {
   168  	if err := ba.ValidateForEvaluation(); err != nil {
   169  		log.Fatalf(ctx, "invalid batch (%s): %s", ba, err)
   170  	}
   171  
   172  	store, err := ls.GetStore(ba.Replica.StoreID)
   173  	if err != nil {
   174  		return nil, roachpb.NewError(err)
   175  	}
   176  
   177  	br, pErr := store.Send(ctx, ba)
   178  	if br != nil && br.Error != nil {
   179  		panic(roachpb.ErrorUnexpectedlySet(store, br))
   180  	}
   181  	return br, pErr
   182  }
   183  
   184  // RangeFeed registers a rangefeed over the specified span. It sends updates to
   185  // the provided stream and returns with an optional error when the rangefeed is
   186  // complete.
   187  func (ls *Stores) RangeFeed(
   188  	args *roachpb.RangeFeedRequest, stream roachpb.Internal_RangeFeedServer,
   189  ) *roachpb.Error {
   190  	ctx := stream.Context()
   191  	if args.RangeID == 0 {
   192  		log.Fatal(ctx, "rangefeed request missing range ID")
   193  	} else if args.Replica.StoreID == 0 {
   194  		log.Fatal(ctx, "rangefeed request missing store ID")
   195  	}
   196  
   197  	store, err := ls.GetStore(args.Replica.StoreID)
   198  	if err != nil {
   199  		return roachpb.NewError(err)
   200  	}
   201  
   202  	return store.RangeFeed(args, stream)
   203  }
   204  
   205  // ReadBootstrapInfo implements the gossip.Storage interface. Read
   206  // attempts to read gossip bootstrap info from every known store and
   207  // finds the most recent from all stores to initialize the bootstrap
   208  // info argument. Returns an error on any issues reading data for the
   209  // stores (but excluding the case in which no data has been persisted
   210  // yet).
   211  func (ls *Stores) ReadBootstrapInfo(bi *gossip.BootstrapInfo) error {
   212  	var latestTS hlc.Timestamp
   213  
   214  	ctx := ls.AnnotateCtx(context.TODO())
   215  	var err error
   216  
   217  	// Find the most recent bootstrap info.
   218  	ls.storeMap.Range(func(k int64, v unsafe.Pointer) bool {
   219  		s := (*Store)(v)
   220  		var storeBI gossip.BootstrapInfo
   221  		var ok bool
   222  		ok, err = storage.MVCCGetProto(ctx, s.engine, keys.StoreGossipKey(), hlc.Timestamp{}, &storeBI,
   223  			storage.MVCCGetOptions{})
   224  		if err != nil {
   225  			return false
   226  		}
   227  		if ok && latestTS.Less(storeBI.Timestamp) {
   228  			latestTS = storeBI.Timestamp
   229  			*bi = storeBI
   230  		}
   231  		return true
   232  	})
   233  	if err != nil {
   234  		return err
   235  	}
   236  	log.Infof(ctx, "read %d node addresses from persistent storage", len(bi.Addresses))
   237  
   238  	ls.mu.Lock()
   239  	defer ls.mu.Unlock()
   240  	return ls.updateBootstrapInfoLocked(bi)
   241  }
   242  
   243  // WriteBootstrapInfo implements the gossip.Storage interface. Write
   244  // persists the supplied bootstrap info to every known store. Returns
   245  // nil on success; otherwise returns first error encountered writing
   246  // to the stores.
   247  func (ls *Stores) WriteBootstrapInfo(bi *gossip.BootstrapInfo) error {
   248  	ls.mu.Lock()
   249  	defer ls.mu.Unlock()
   250  	bi.Timestamp = ls.clock.Now()
   251  	if err := ls.updateBootstrapInfoLocked(bi); err != nil {
   252  		return err
   253  	}
   254  	ctx := ls.AnnotateCtx(context.TODO())
   255  	log.Infof(ctx, "wrote %d node addresses to persistent storage", len(bi.Addresses))
   256  	return nil
   257  }
   258  
   259  func (ls *Stores) updateBootstrapInfoLocked(bi *gossip.BootstrapInfo) error {
   260  	if bi.Timestamp.Less(ls.mu.biLatestTS) {
   261  		return nil
   262  	}
   263  	ctx := ls.AnnotateCtx(context.TODO())
   264  	// Update the latest timestamp and set cached version.
   265  	ls.mu.biLatestTS = bi.Timestamp
   266  	ls.mu.latestBI = protoutil.Clone(bi).(*gossip.BootstrapInfo)
   267  	// Update all stores.
   268  	var err error
   269  	ls.storeMap.Range(func(k int64, v unsafe.Pointer) bool {
   270  		s := (*Store)(v)
   271  		err = storage.MVCCPutProto(ctx, s.engine, nil, keys.StoreGossipKey(), hlc.Timestamp{}, nil, bi)
   272  		return err == nil
   273  	})
   274  	return err
   275  }
   276  
   277  // ReadVersionFromEngineOrZero reads the persisted cluster version from the
   278  // engine, falling back to the zero value.
   279  func ReadVersionFromEngineOrZero(
   280  	ctx context.Context, reader storage.Reader,
   281  ) (clusterversion.ClusterVersion, error) {
   282  	var cv clusterversion.ClusterVersion
   283  	cv, err := ReadClusterVersion(ctx, reader)
   284  	if err != nil {
   285  		return clusterversion.ClusterVersion{}, err
   286  	}
   287  	return cv, nil
   288  }
   289  
   290  // WriteClusterVersionToEngines writes the given version to the given engines,
   291  // Returns nil on success; otherwise returns first error encountered writing to
   292  // the stores.
   293  //
   294  // WriteClusterVersion makes no attempt to validate the supplied version.
   295  func WriteClusterVersionToEngines(
   296  	ctx context.Context, engines []storage.Engine, cv clusterversion.ClusterVersion,
   297  ) error {
   298  	for _, eng := range engines {
   299  		if err := WriteClusterVersion(ctx, eng, cv); err != nil {
   300  			return errors.Wrapf(err, "error writing version to engine %s", eng)
   301  		}
   302  	}
   303  	return nil
   304  }
   305  
   306  // SynthesizeClusterVersionFromEngines returns the cluster version that was read
   307  // from the engines or, if none are initialized, binaryMinSupportedVersion.
   308  // Typically all initialized engines will have the same version persisted,
   309  // though ill-timed crashes can result in situations where this is not the
   310  // case. Then, the largest version seen is returned.
   311  //
   312  // binaryVersion is the version of this binary. An error is returned if
   313  // any engine has a higher version, as this would indicate that this node
   314  // has previously acked the higher cluster version but is now running an
   315  // old binary, which is unsafe.
   316  //
   317  // binaryMinSupportedVersion is the minimum version supported by this binary. An
   318  // error is returned if any engine has a version lower that this.
   319  func SynthesizeClusterVersionFromEngines(
   320  	ctx context.Context,
   321  	engines []storage.Engine,
   322  	binaryVersion, binaryMinSupportedVersion roachpb.Version,
   323  ) (clusterversion.ClusterVersion, error) {
   324  	// Find the most recent bootstrap info.
   325  	type originVersion struct {
   326  		roachpb.Version
   327  		origin string
   328  	}
   329  
   330  	maxPossibleVersion := roachpb.Version{Major: 999999} // Sort above any real version.
   331  	minStoreVersion := originVersion{
   332  		Version: maxPossibleVersion,
   333  		origin:  "(no store)",
   334  	}
   335  
   336  	// We run this twice because it's only after having seen all the versions
   337  	// that we can decide whether the node catches a version error. However, we
   338  	// also want to name at least one engine that violates the version
   339  	// constraints, which at the latest the second loop will achieve (because
   340  	// then minStoreVersion don't change any more).
   341  	for _, eng := range engines {
   342  		eng := eng.(storage.Reader) // we're read only
   343  		var cv clusterversion.ClusterVersion
   344  		cv, err := ReadVersionFromEngineOrZero(ctx, eng)
   345  		if err != nil {
   346  			return clusterversion.ClusterVersion{}, err
   347  		}
   348  		if cv.Version == (roachpb.Version{}) {
   349  			// This is needed when a node first joins an existing cluster, in
   350  			// which case it won't know what version to use until the first
   351  			// Gossip update comes in.
   352  			cv.Version = binaryMinSupportedVersion
   353  		}
   354  
   355  		// Avoid running a binary with a store that is too new. For example,
   356  		// restarting into 1.1 after having upgraded to 1.2 doesn't work.
   357  		if binaryVersion.Less(cv.Version) {
   358  			return clusterversion.ClusterVersion{}, errors.Errorf(
   359  				"cockroach version v%s is incompatible with data in store %s; use version v%s or later",
   360  				binaryVersion, eng, cv.Version)
   361  		}
   362  
   363  		// Track smallest use version encountered.
   364  		if cv.Version.Less(minStoreVersion.Version) {
   365  			minStoreVersion.Version = cv.Version
   366  			minStoreVersion.origin = fmt.Sprint(eng)
   367  		}
   368  	}
   369  
   370  	// If no use version was found, fall back to our binaryMinSupportedVersion. This
   371  	// is the case when a brand new node is joining an existing cluster (which
   372  	// may be on any older version this binary supports).
   373  	if minStoreVersion.Version == maxPossibleVersion {
   374  		minStoreVersion.Version = binaryMinSupportedVersion
   375  	}
   376  
   377  	cv := clusterversion.ClusterVersion{
   378  		Version: minStoreVersion.Version,
   379  	}
   380  	log.Eventf(ctx, "read ClusterVersion %+v", cv)
   381  
   382  	// Avoid running a binary too new for this store. This is what you'd catch
   383  	// if, say, you restarted directly from 1.0 into 1.2 (bumping the min
   384  	// version) without going through 1.1 first. It would also be what you catch if
   385  	// you are starting 1.1 for the first time (after 1.0), but it crashes
   386  	// half-way through the startup sequence (so now some stores have 1.1, but
   387  	// some 1.0), in which case you are expected to run 1.1 again (hopefully
   388  	// without the crash this time) which would then rewrite all the stores.
   389  	//
   390  	// We only verify this now because as we iterate through the stores, we
   391  	// may not yet have picked up the final versions we're actually planning
   392  	// to use.
   393  	if minStoreVersion.Version.Less(binaryMinSupportedVersion) {
   394  		return clusterversion.ClusterVersion{}, errors.Errorf("store %s, last used with cockroach version v%s, "+
   395  			"is too old for running version v%s (which requires data from v%s or later)",
   396  			minStoreVersion.origin, minStoreVersion.Version, binaryVersion, binaryMinSupportedVersion)
   397  	}
   398  	return cv, nil
   399  }
   400  
   401  func (ls *Stores) engines() []storage.Engine {
   402  	var engines []storage.Engine
   403  	ls.storeMap.Range(func(_ int64, v unsafe.Pointer) bool {
   404  		engines = append(engines, (*Store)(v).Engine())
   405  		return true // want more
   406  	})
   407  	return engines
   408  }