github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/replica_gossip.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/config" 17 "github.com/cockroachdb/cockroach/pkg/gossip" 18 "github.com/cockroachdb/cockroach/pkg/keys" 19 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" 20 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" 21 "github.com/cockroachdb/cockroach/pkg/roachpb" 22 "github.com/cockroachdb/cockroach/pkg/util/log" 23 "github.com/cockroachdb/errors" 24 ) 25 26 const configGossipTTL = 0 // does not expire 27 28 func (r *Replica) gossipFirstRange(ctx context.Context) { 29 r.mu.Lock() 30 defer r.mu.Unlock() 31 // Gossip is not provided for the bootstrap store and for some tests. 32 if r.store.Gossip() == nil { 33 return 34 } 35 log.Event(ctx, "gossiping sentinel and first range") 36 if log.V(1) { 37 log.Infof(ctx, "gossiping sentinel from store %d, r%d", r.store.StoreID(), r.RangeID) 38 } 39 if err := r.store.Gossip().AddInfo( 40 gossip.KeySentinel, r.store.ClusterID().GetBytes(), 41 r.store.cfg.SentinelGossipTTL()); err != nil { 42 log.Errorf(ctx, "failed to gossip sentinel: %+v", err) 43 } 44 if log.V(1) { 45 log.Infof(ctx, "gossiping first range from store %d, r%d: %s", 46 r.store.StoreID(), r.RangeID, r.mu.state.Desc.Replicas()) 47 } 48 if err := r.store.Gossip().AddInfoProto( 49 gossip.KeyFirstRangeDescriptor, r.mu.state.Desc, configGossipTTL); err != nil { 50 log.Errorf(ctx, "failed to gossip first range metadata: %+v", err) 51 } 52 } 53 54 // shouldGossip returns true if this replica should be gossiping. Gossip is 55 // inherently inconsistent and asynchronous, we're using the lease as a way to 56 // ensure that only one node gossips at a time. 57 func (r *Replica) shouldGossip() bool { 58 return r.OwnsValidLease(r.store.Clock().Now()) 59 } 60 61 // MaybeGossipSystemConfig scans the entire SystemConfig span and gossips it. 62 // Further calls come from the trigger on EndTxn or range lease acquisition. 63 // 64 // Note that MaybeGossipSystemConfig gossips information only when the 65 // lease is actually held. The method does not request a range lease 66 // here since RequestLease and applyRaftCommand call the method and we 67 // need to avoid deadlocking in redirectOnOrAcquireLease. 68 // 69 // MaybeGossipSystemConfig must only be called from Raft commands 70 // (which provide the necessary serialization to avoid data races). 71 // 72 // TODO(nvanbenschoten,bdarnell): even though this is best effort, we 73 // should log louder when we continually fail to gossip system config. 74 func (r *Replica) MaybeGossipSystemConfig(ctx context.Context) error { 75 if r.store.Gossip() == nil { 76 log.VEventf(ctx, 2, "not gossiping system config because gossip isn't initialized") 77 return nil 78 } 79 if !r.IsInitialized() { 80 log.VEventf(ctx, 2, "not gossiping system config because the replica isn't initialized") 81 return nil 82 } 83 if !r.ContainsKey(keys.SystemConfigSpan.Key) { 84 log.VEventf(ctx, 3, 85 "not gossiping system config because the replica doesn't contain the system config's start key") 86 return nil 87 } 88 if !r.shouldGossip() { 89 log.VEventf(ctx, 2, "not gossiping system config because the replica doesn't hold the lease") 90 return nil 91 } 92 93 // TODO(marc): check for bad split in the middle of the SystemConfig span. 94 loadedCfg, err := r.loadSystemConfig(ctx) 95 if err != nil { 96 if errors.Is(err, errSystemConfigIntent) { 97 log.VEventf(ctx, 2, "not gossiping system config because intents were found on SystemConfigSpan") 98 r.markSystemConfigGossipFailed() 99 return nil 100 } 101 return errors.Wrap(err, "could not load SystemConfig span") 102 } 103 104 if gossipedCfg := r.store.Gossip().GetSystemConfig(); gossipedCfg != nil && gossipedCfg.Equal(loadedCfg) && 105 r.store.Gossip().InfoOriginatedHere(gossip.KeySystemConfig) { 106 log.VEventf(ctx, 2, "not gossiping unchanged system config") 107 // Clear the failure bit if all intents have been resolved but there's 108 // nothing new to gossip. 109 r.markSystemConfigGossipSuccess() 110 return nil 111 } 112 113 log.VEventf(ctx, 2, "gossiping system config") 114 if err := r.store.Gossip().AddInfoProto(gossip.KeySystemConfig, loadedCfg, 0); err != nil { 115 return errors.Wrap(err, "failed to gossip system config") 116 } 117 r.markSystemConfigGossipSuccess() 118 return nil 119 } 120 121 // MaybeGossipSystemConfigIfHaveFailure is a trigger to gossip the system config 122 // due to an abort of a transaction keyed in the system config span. It will 123 // call MaybeGossipSystemConfig if failureToGossipSystemConfig is true. 124 func (r *Replica) MaybeGossipSystemConfigIfHaveFailure(ctx context.Context) error { 125 r.mu.RLock() 126 failed := r.mu.failureToGossipSystemConfig 127 r.mu.RUnlock() 128 if !failed { 129 return nil 130 } 131 return r.MaybeGossipSystemConfig(ctx) 132 } 133 134 // MaybeGossipNodeLiveness gossips information for all node liveness 135 // records stored on this range. To scan and gossip, this replica 136 // must hold the lease to a range which contains some or all of the 137 // node liveness records. After scanning the records, it checks 138 // against what's already in gossip and only gossips records which 139 // are out of date. 140 func (r *Replica) MaybeGossipNodeLiveness(ctx context.Context, span roachpb.Span) error { 141 if r.store.Gossip() == nil || !r.IsInitialized() { 142 return nil 143 } 144 145 if !r.ContainsKeyRange(span.Key, span.EndKey) || !r.shouldGossip() { 146 return nil 147 } 148 149 ba := roachpb.BatchRequest{} 150 ba.Timestamp = r.store.Clock().Now() 151 ba.Add(&roachpb.ScanRequest{RequestHeader: roachpb.RequestHeaderFromSpan(span)}) 152 // Call evaluateBatch instead of Send to avoid reacquiring latches. 153 rec := NewReplicaEvalContext(r, todoSpanSet) 154 rw := r.Engine().NewReadOnly() 155 defer rw.Close() 156 157 br, result, pErr := 158 evaluateBatch(ctx, kvserverbase.CmdIDKey(""), rw, rec, nil, &ba, true /* readOnly */) 159 if pErr != nil { 160 return errors.Wrapf(pErr.GoError(), "couldn't scan node liveness records in span %s", span) 161 } 162 if len(result.Local.EncounteredIntents) > 0 { 163 return errors.Errorf("unexpected intents on node liveness span %s: %+v", span, result.Local.EncounteredIntents) 164 } 165 kvs := br.Responses[0].GetInner().(*roachpb.ScanResponse).Rows 166 log.VEventf(ctx, 2, "gossiping %d node liveness record(s) from span %s", len(kvs), span) 167 for _, kv := range kvs { 168 var kvLiveness, gossipLiveness kvserverpb.Liveness 169 if err := kv.Value.GetProto(&kvLiveness); err != nil { 170 return errors.Wrapf(err, "failed to unmarshal liveness value %s", kv.Key) 171 } 172 key := gossip.MakeNodeLivenessKey(kvLiveness.NodeID) 173 // Look up liveness from gossip; skip gossiping anew if unchanged. 174 if err := r.store.Gossip().GetInfoProto(key, &gossipLiveness); err == nil { 175 if gossipLiveness == kvLiveness && r.store.Gossip().InfoOriginatedHere(key) { 176 continue 177 } 178 } 179 if err := r.store.Gossip().AddInfoProto(key, &kvLiveness, 0); err != nil { 180 return errors.Wrapf(err, "failed to gossip node liveness (%+v)", kvLiveness) 181 } 182 } 183 return nil 184 } 185 186 var errSystemConfigIntent = errors.New("must retry later due to intent on SystemConfigSpan") 187 188 // loadSystemConfig scans the system config span and returns the system 189 // config. 190 func (r *Replica) loadSystemConfig(ctx context.Context) (*config.SystemConfigEntries, error) { 191 ba := roachpb.BatchRequest{} 192 ba.ReadConsistency = roachpb.INCONSISTENT 193 ba.Timestamp = r.store.Clock().Now() 194 ba.Add(&roachpb.ScanRequest{RequestHeader: roachpb.RequestHeaderFromSpan(keys.SystemConfigSpan)}) 195 // Call evaluateBatch instead of Send to avoid reacquiring latches. 196 rec := NewReplicaEvalContext(r, todoSpanSet) 197 rw := r.Engine().NewReadOnly() 198 defer rw.Close() 199 200 br, result, pErr := evaluateBatch( 201 ctx, kvserverbase.CmdIDKey(""), rw, rec, nil, &ba, true, /* readOnly */ 202 ) 203 if pErr != nil { 204 return nil, pErr.GoError() 205 } 206 if intents := result.Local.DetachEncounteredIntents(); len(intents) > 0 { 207 // There were intents, so what we read may not be consistent. Attempt 208 // to nudge the intents in case they're expired; next time around we'll 209 // hopefully have more luck. 210 // This is called from handleReadWriteLocalEvalResult (with raftMu 211 // locked), so disallow synchronous processing (which blocks that mutex 212 // for too long and is a potential deadlock). 213 if err := r.store.intentResolver.CleanupIntentsAsync(ctx, intents, false /* allowSync */); err != nil { 214 log.Warningf(ctx, "%v", err) 215 } 216 return nil, errSystemConfigIntent 217 } 218 kvs := br.Responses[0].GetInner().(*roachpb.ScanResponse).Rows 219 sysCfg := &config.SystemConfigEntries{} 220 sysCfg.Values = kvs 221 return sysCfg, nil 222 } 223 224 // getLeaseForGossip tries to obtain a range lease. Only one of the replicas 225 // should gossip; the bool returned indicates whether it's us. 226 func (r *Replica) getLeaseForGossip(ctx context.Context) (bool, *roachpb.Error) { 227 // If no Gossip available (some tests) or range too fresh, noop. 228 if r.store.Gossip() == nil || !r.IsInitialized() { 229 return false, roachpb.NewErrorf("no gossip or range not initialized") 230 } 231 var hasLease bool 232 var pErr *roachpb.Error 233 if err := r.store.Stopper().RunTask( 234 ctx, "storage.Replica: acquiring lease to gossip", 235 func(ctx context.Context) { 236 // Check for or obtain the lease, if none active. 237 _, pErr = r.redirectOnOrAcquireLease(ctx) 238 hasLease = pErr == nil 239 if pErr != nil { 240 switch e := pErr.GetDetail().(type) { 241 case *roachpb.NotLeaseHolderError: 242 // NotLeaseHolderError means there is an active lease, but only if 243 // the lease holder is set; otherwise, it's likely a timeout. 244 if e.LeaseHolder != nil { 245 pErr = nil 246 } 247 default: 248 // Any other error is worth being logged visibly. 249 log.Warningf(ctx, "could not acquire lease for range gossip: %s", e) 250 } 251 } 252 }); err != nil { 253 pErr = roachpb.NewError(err) 254 } 255 return hasLease, pErr 256 } 257 258 // maybeGossipFirstRange adds the sentinel and first range metadata to gossip 259 // if this is the first range and a range lease can be obtained. The Store 260 // calls this periodically on first range replicas. 261 func (r *Replica) maybeGossipFirstRange(ctx context.Context) *roachpb.Error { 262 if !r.IsFirstRange() { 263 return nil 264 } 265 266 // When multiple nodes are initialized with overlapping Gossip addresses, they all 267 // will attempt to gossip their cluster ID. This is a fairly obvious misconfiguration, 268 // so we error out below. 269 if gossipClusterID, err := r.store.Gossip().GetClusterID(); err == nil { 270 if gossipClusterID != r.store.ClusterID() { 271 log.Fatalf( 272 ctx, "store %d belongs to cluster %s, but attempted to join cluster %s via gossip", 273 r.store.StoreID(), r.store.ClusterID(), gossipClusterID) 274 } 275 } 276 277 // Gossip the cluster ID from all replicas of the first range; there 278 // is no expiration on the cluster ID. 279 if log.V(1) { 280 log.Infof(ctx, "gossiping cluster id %q from store %d, r%d", r.store.ClusterID(), 281 r.store.StoreID(), r.RangeID) 282 } 283 if err := r.store.Gossip().AddClusterID(r.store.ClusterID()); err != nil { 284 log.Errorf(ctx, "failed to gossip cluster ID: %+v", err) 285 } 286 287 if r.store.cfg.TestingKnobs.DisablePeriodicGossips { 288 return nil 289 } 290 291 hasLease, pErr := r.getLeaseForGossip(ctx) 292 if pErr != nil { 293 return pErr 294 } else if !hasLease { 295 return nil 296 } 297 r.gossipFirstRange(ctx) 298 return nil 299 }