github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_gossip.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/config"
    17  	"github.com/cockroachdb/cockroach/pkg/gossip"
    18  	"github.com/cockroachdb/cockroach/pkg/keys"
    19  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase"
    20  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb"
    21  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    22  	"github.com/cockroachdb/cockroach/pkg/util/log"
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  const configGossipTTL = 0 // does not expire
    27  
    28  func (r *Replica) gossipFirstRange(ctx context.Context) {
    29  	r.mu.Lock()
    30  	defer r.mu.Unlock()
    31  	// Gossip is not provided for the bootstrap store and for some tests.
    32  	if r.store.Gossip() == nil {
    33  		return
    34  	}
    35  	log.Event(ctx, "gossiping sentinel and first range")
    36  	if log.V(1) {
    37  		log.Infof(ctx, "gossiping sentinel from store %d, r%d", r.store.StoreID(), r.RangeID)
    38  	}
    39  	if err := r.store.Gossip().AddInfo(
    40  		gossip.KeySentinel, r.store.ClusterID().GetBytes(),
    41  		r.store.cfg.SentinelGossipTTL()); err != nil {
    42  		log.Errorf(ctx, "failed to gossip sentinel: %+v", err)
    43  	}
    44  	if log.V(1) {
    45  		log.Infof(ctx, "gossiping first range from store %d, r%d: %s",
    46  			r.store.StoreID(), r.RangeID, r.mu.state.Desc.Replicas())
    47  	}
    48  	if err := r.store.Gossip().AddInfoProto(
    49  		gossip.KeyFirstRangeDescriptor, r.mu.state.Desc, configGossipTTL); err != nil {
    50  		log.Errorf(ctx, "failed to gossip first range metadata: %+v", err)
    51  	}
    52  }
    53  
    54  // shouldGossip returns true if this replica should be gossiping. Gossip is
    55  // inherently inconsistent and asynchronous, we're using the lease as a way to
    56  // ensure that only one node gossips at a time.
    57  func (r *Replica) shouldGossip() bool {
    58  	return r.OwnsValidLease(r.store.Clock().Now())
    59  }
    60  
    61  // MaybeGossipSystemConfig scans the entire SystemConfig span and gossips it.
    62  // Further calls come from the trigger on EndTxn or range lease acquisition.
    63  //
    64  // Note that MaybeGossipSystemConfig gossips information only when the
    65  // lease is actually held. The method does not request a range lease
    66  // here since RequestLease and applyRaftCommand call the method and we
    67  // need to avoid deadlocking in redirectOnOrAcquireLease.
    68  //
    69  // MaybeGossipSystemConfig must only be called from Raft commands
    70  // (which provide the necessary serialization to avoid data races).
    71  //
    72  // TODO(nvanbenschoten,bdarnell): even though this is best effort, we
    73  // should log louder when we continually fail to gossip system config.
    74  func (r *Replica) MaybeGossipSystemConfig(ctx context.Context) error {
    75  	if r.store.Gossip() == nil {
    76  		log.VEventf(ctx, 2, "not gossiping system config because gossip isn't initialized")
    77  		return nil
    78  	}
    79  	if !r.IsInitialized() {
    80  		log.VEventf(ctx, 2, "not gossiping system config because the replica isn't initialized")
    81  		return nil
    82  	}
    83  	if !r.ContainsKey(keys.SystemConfigSpan.Key) {
    84  		log.VEventf(ctx, 3,
    85  			"not gossiping system config because the replica doesn't contain the system config's start key")
    86  		return nil
    87  	}
    88  	if !r.shouldGossip() {
    89  		log.VEventf(ctx, 2, "not gossiping system config because the replica doesn't hold the lease")
    90  		return nil
    91  	}
    92  
    93  	// TODO(marc): check for bad split in the middle of the SystemConfig span.
    94  	loadedCfg, err := r.loadSystemConfig(ctx)
    95  	if err != nil {
    96  		if errors.Is(err, errSystemConfigIntent) {
    97  			log.VEventf(ctx, 2, "not gossiping system config because intents were found on SystemConfigSpan")
    98  			r.markSystemConfigGossipFailed()
    99  			return nil
   100  		}
   101  		return errors.Wrap(err, "could not load SystemConfig span")
   102  	}
   103  
   104  	if gossipedCfg := r.store.Gossip().GetSystemConfig(); gossipedCfg != nil && gossipedCfg.Equal(loadedCfg) &&
   105  		r.store.Gossip().InfoOriginatedHere(gossip.KeySystemConfig) {
   106  		log.VEventf(ctx, 2, "not gossiping unchanged system config")
   107  		// Clear the failure bit if all intents have been resolved but there's
   108  		// nothing new to gossip.
   109  		r.markSystemConfigGossipSuccess()
   110  		return nil
   111  	}
   112  
   113  	log.VEventf(ctx, 2, "gossiping system config")
   114  	if err := r.store.Gossip().AddInfoProto(gossip.KeySystemConfig, loadedCfg, 0); err != nil {
   115  		return errors.Wrap(err, "failed to gossip system config")
   116  	}
   117  	r.markSystemConfigGossipSuccess()
   118  	return nil
   119  }
   120  
   121  // MaybeGossipSystemConfigIfHaveFailure is a trigger to gossip the system config
   122  // due to an abort of a transaction keyed in the system config span. It will
   123  // call MaybeGossipSystemConfig if failureToGossipSystemConfig is true.
   124  func (r *Replica) MaybeGossipSystemConfigIfHaveFailure(ctx context.Context) error {
   125  	r.mu.RLock()
   126  	failed := r.mu.failureToGossipSystemConfig
   127  	r.mu.RUnlock()
   128  	if !failed {
   129  		return nil
   130  	}
   131  	return r.MaybeGossipSystemConfig(ctx)
   132  }
   133  
   134  // MaybeGossipNodeLiveness gossips information for all node liveness
   135  // records stored on this range. To scan and gossip, this replica
   136  // must hold the lease to a range which contains some or all of the
   137  // node liveness records. After scanning the records, it checks
   138  // against what's already in gossip and only gossips records which
   139  // are out of date.
   140  func (r *Replica) MaybeGossipNodeLiveness(ctx context.Context, span roachpb.Span) error {
   141  	if r.store.Gossip() == nil || !r.IsInitialized() {
   142  		return nil
   143  	}
   144  
   145  	if !r.ContainsKeyRange(span.Key, span.EndKey) || !r.shouldGossip() {
   146  		return nil
   147  	}
   148  
   149  	ba := roachpb.BatchRequest{}
   150  	ba.Timestamp = r.store.Clock().Now()
   151  	ba.Add(&roachpb.ScanRequest{RequestHeader: roachpb.RequestHeaderFromSpan(span)})
   152  	// Call evaluateBatch instead of Send to avoid reacquiring latches.
   153  	rec := NewReplicaEvalContext(r, todoSpanSet)
   154  	rw := r.Engine().NewReadOnly()
   155  	defer rw.Close()
   156  
   157  	br, result, pErr :=
   158  		evaluateBatch(ctx, kvserverbase.CmdIDKey(""), rw, rec, nil, &ba, true /* readOnly */)
   159  	if pErr != nil {
   160  		return errors.Wrapf(pErr.GoError(), "couldn't scan node liveness records in span %s", span)
   161  	}
   162  	if len(result.Local.EncounteredIntents) > 0 {
   163  		return errors.Errorf("unexpected intents on node liveness span %s: %+v", span, result.Local.EncounteredIntents)
   164  	}
   165  	kvs := br.Responses[0].GetInner().(*roachpb.ScanResponse).Rows
   166  	log.VEventf(ctx, 2, "gossiping %d node liveness record(s) from span %s", len(kvs), span)
   167  	for _, kv := range kvs {
   168  		var kvLiveness, gossipLiveness kvserverpb.Liveness
   169  		if err := kv.Value.GetProto(&kvLiveness); err != nil {
   170  			return errors.Wrapf(err, "failed to unmarshal liveness value %s", kv.Key)
   171  		}
   172  		key := gossip.MakeNodeLivenessKey(kvLiveness.NodeID)
   173  		// Look up liveness from gossip; skip gossiping anew if unchanged.
   174  		if err := r.store.Gossip().GetInfoProto(key, &gossipLiveness); err == nil {
   175  			if gossipLiveness == kvLiveness && r.store.Gossip().InfoOriginatedHere(key) {
   176  				continue
   177  			}
   178  		}
   179  		if err := r.store.Gossip().AddInfoProto(key, &kvLiveness, 0); err != nil {
   180  			return errors.Wrapf(err, "failed to gossip node liveness (%+v)", kvLiveness)
   181  		}
   182  	}
   183  	return nil
   184  }
   185  
   186  var errSystemConfigIntent = errors.New("must retry later due to intent on SystemConfigSpan")
   187  
   188  // loadSystemConfig scans the system config span and returns the system
   189  // config.
   190  func (r *Replica) loadSystemConfig(ctx context.Context) (*config.SystemConfigEntries, error) {
   191  	ba := roachpb.BatchRequest{}
   192  	ba.ReadConsistency = roachpb.INCONSISTENT
   193  	ba.Timestamp = r.store.Clock().Now()
   194  	ba.Add(&roachpb.ScanRequest{RequestHeader: roachpb.RequestHeaderFromSpan(keys.SystemConfigSpan)})
   195  	// Call evaluateBatch instead of Send to avoid reacquiring latches.
   196  	rec := NewReplicaEvalContext(r, todoSpanSet)
   197  	rw := r.Engine().NewReadOnly()
   198  	defer rw.Close()
   199  
   200  	br, result, pErr := evaluateBatch(
   201  		ctx, kvserverbase.CmdIDKey(""), rw, rec, nil, &ba, true, /* readOnly */
   202  	)
   203  	if pErr != nil {
   204  		return nil, pErr.GoError()
   205  	}
   206  	if intents := result.Local.DetachEncounteredIntents(); len(intents) > 0 {
   207  		// There were intents, so what we read may not be consistent. Attempt
   208  		// to nudge the intents in case they're expired; next time around we'll
   209  		// hopefully have more luck.
   210  		// This is called from handleReadWriteLocalEvalResult (with raftMu
   211  		// locked), so disallow synchronous processing (which blocks that mutex
   212  		// for too long and is a potential deadlock).
   213  		if err := r.store.intentResolver.CleanupIntentsAsync(ctx, intents, false /* allowSync */); err != nil {
   214  			log.Warningf(ctx, "%v", err)
   215  		}
   216  		return nil, errSystemConfigIntent
   217  	}
   218  	kvs := br.Responses[0].GetInner().(*roachpb.ScanResponse).Rows
   219  	sysCfg := &config.SystemConfigEntries{}
   220  	sysCfg.Values = kvs
   221  	return sysCfg, nil
   222  }
   223  
   224  // getLeaseForGossip tries to obtain a range lease. Only one of the replicas
   225  // should gossip; the bool returned indicates whether it's us.
   226  func (r *Replica) getLeaseForGossip(ctx context.Context) (bool, *roachpb.Error) {
   227  	// If no Gossip available (some tests) or range too fresh, noop.
   228  	if r.store.Gossip() == nil || !r.IsInitialized() {
   229  		return false, roachpb.NewErrorf("no gossip or range not initialized")
   230  	}
   231  	var hasLease bool
   232  	var pErr *roachpb.Error
   233  	if err := r.store.Stopper().RunTask(
   234  		ctx, "storage.Replica: acquiring lease to gossip",
   235  		func(ctx context.Context) {
   236  			// Check for or obtain the lease, if none active.
   237  			_, pErr = r.redirectOnOrAcquireLease(ctx)
   238  			hasLease = pErr == nil
   239  			if pErr != nil {
   240  				switch e := pErr.GetDetail().(type) {
   241  				case *roachpb.NotLeaseHolderError:
   242  					// NotLeaseHolderError means there is an active lease, but only if
   243  					// the lease holder is set; otherwise, it's likely a timeout.
   244  					if e.LeaseHolder != nil {
   245  						pErr = nil
   246  					}
   247  				default:
   248  					// Any other error is worth being logged visibly.
   249  					log.Warningf(ctx, "could not acquire lease for range gossip: %s", e)
   250  				}
   251  			}
   252  		}); err != nil {
   253  		pErr = roachpb.NewError(err)
   254  	}
   255  	return hasLease, pErr
   256  }
   257  
   258  // maybeGossipFirstRange adds the sentinel and first range metadata to gossip
   259  // if this is the first range and a range lease can be obtained. The Store
   260  // calls this periodically on first range replicas.
   261  func (r *Replica) maybeGossipFirstRange(ctx context.Context) *roachpb.Error {
   262  	if !r.IsFirstRange() {
   263  		return nil
   264  	}
   265  
   266  	// When multiple nodes are initialized with overlapping Gossip addresses, they all
   267  	// will attempt to gossip their cluster ID. This is a fairly obvious misconfiguration,
   268  	// so we error out below.
   269  	if gossipClusterID, err := r.store.Gossip().GetClusterID(); err == nil {
   270  		if gossipClusterID != r.store.ClusterID() {
   271  			log.Fatalf(
   272  				ctx, "store %d belongs to cluster %s, but attempted to join cluster %s via gossip",
   273  				r.store.StoreID(), r.store.ClusterID(), gossipClusterID)
   274  		}
   275  	}
   276  
   277  	// Gossip the cluster ID from all replicas of the first range; there
   278  	// is no expiration on the cluster ID.
   279  	if log.V(1) {
   280  		log.Infof(ctx, "gossiping cluster id %q from store %d, r%d", r.store.ClusterID(),
   281  			r.store.StoreID(), r.RangeID)
   282  	}
   283  	if err := r.store.Gossip().AddClusterID(r.store.ClusterID()); err != nil {
   284  		log.Errorf(ctx, "failed to gossip cluster ID: %+v", err)
   285  	}
   286  
   287  	if r.store.cfg.TestingKnobs.DisablePeriodicGossips {
   288  		return nil
   289  	}
   290  
   291  	hasLease, pErr := r.getLeaseForGossip(ctx)
   292  	if pErr != nil {
   293  		return pErr
   294  	} else if !hasLease {
   295  		return nil
   296  	}
   297  	r.gossipFirstRange(ctx)
   298  	return nil
   299  }