github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/gossip/infostore.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package gossip 12 13 import ( 14 "bytes" 15 "context" 16 "fmt" 17 "math" 18 "regexp" 19 "strings" 20 "time" 21 22 "github.com/cockroachdb/cockroach/pkg/base" 23 "github.com/cockroachdb/cockroach/pkg/roachpb" 24 "github.com/cockroachdb/cockroach/pkg/util" 25 "github.com/cockroachdb/cockroach/pkg/util/hlc" 26 "github.com/cockroachdb/cockroach/pkg/util/log" 27 "github.com/cockroachdb/cockroach/pkg/util/stop" 28 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 29 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 30 "github.com/cockroachdb/errors" 31 ) 32 33 type stringMatcher interface { 34 MatchString(string) bool 35 } 36 37 type allMatcher struct{} 38 39 func (allMatcher) MatchString(string) bool { 40 return true 41 } 42 43 // callback holds regexp pattern match and GossipCallback method. 44 type callback struct { 45 matcher stringMatcher 46 method Callback 47 redundant bool 48 } 49 50 // infoStore objects manage maps of Info objects. They maintain a 51 // sequence number generator which they use to allocate new info 52 // objects. 53 // 54 // infoStores can be queried for incremental updates occurring since a 55 // specified map of peer node high water timestamps. 56 // 57 // infoStores can be combined using deltas from peer nodes. 58 // 59 // infoStores are not thread safe. 60 type infoStore struct { 61 log.AmbientContext 62 63 nodeID *base.NodeIDContainer 64 stopper *stop.Stopper 65 66 Infos infoMap `json:"infos,omitempty"` // Map from key to info 67 NodeAddr util.UnresolvedAddr `json:"-"` // Address of node owning this info store: "host:port" 68 highWaterStamps map[roachpb.NodeID]int64 // Per-node information for gossip peers 69 callbacks []*callback 70 71 callbackWorkMu syncutil.Mutex // Protects callbackWork 72 callbackWork []func() 73 callbackCh chan struct{} // Channel to signal the callback goroutine 74 } 75 76 var monoTime struct { 77 syncutil.Mutex 78 last int64 79 } 80 81 var errNotFresh = errors.New("info not fresh") 82 83 // monotonicUnixNano returns a monotonically increasing value for 84 // nanoseconds in Unix time. Since equal times are ignored with 85 // updates to infos, we're careful to avoid incorrectly ignoring a 86 // newly created value in the event one is created within the same 87 // nanosecond. Really unlikely except for the case of unittests, but 88 // better safe than sorry. 89 func monotonicUnixNano() int64 { 90 monoTime.Lock() 91 defer monoTime.Unlock() 92 93 now := timeutil.Now().UnixNano() 94 if now <= monoTime.last { 95 now = monoTime.last + 1 96 } 97 monoTime.last = now 98 return now 99 } 100 101 // ratchetMonotonic increases the monotonic clock to be at least v. Used to 102 // guarantee that clock values generated by the local node ID always increase 103 // even in the presence of local infos that were received from a remote with a 104 // timestamp in the future (which can happen in the presence of backward clock 105 // jumps and a crash). 106 func ratchetMonotonic(v int64) { 107 monoTime.Lock() 108 if monoTime.last < v { 109 monoTime.last = v 110 } 111 monoTime.Unlock() 112 } 113 114 // ratchetHighWaterStamp sets stamps[nodeID] to max(stamps[nodeID], newStamp). 115 func ratchetHighWaterStamp(stamps map[roachpb.NodeID]int64, nodeID roachpb.NodeID, newStamp int64) { 116 if nodeID != 0 && stamps[nodeID] < newStamp { 117 stamps[nodeID] = newStamp 118 } 119 } 120 121 // mergeHighWaterStamps merges the high water stamps in src into dest by 122 // performing a ratchet operation for each stamp in src. The existing stamps in 123 // dest will either remain the same (if they are smaller than the corresponding 124 // stamp in src) or be bumped to the higher value in src. 125 func mergeHighWaterStamps(dest *map[roachpb.NodeID]int64, src map[roachpb.NodeID]int64) { 126 if *dest == nil { 127 *dest = src 128 return 129 } 130 for nodeID, newStamp := range src { 131 ratchetHighWaterStamp(*dest, nodeID, newStamp) 132 } 133 } 134 135 // String returns a string representation of an infostore. 136 func (is *infoStore) String() string { 137 var buf strings.Builder 138 if infoCount := len(is.Infos); infoCount > 0 { 139 fmt.Fprintf(&buf, "infostore with %d info(s): ", infoCount) 140 } else { 141 return "infostore (empty)" 142 } 143 144 prepend := "" 145 146 if err := is.visitInfos(func(key string, i *Info) error { 147 fmt.Fprintf(&buf, "%sinfo %q: %+v", prepend, key, i.Value) 148 prepend = ", " 149 return nil 150 }, false /* deleteExpired */); err != nil { 151 // This should never happen because the func we pass above never errors out. 152 panic(err) 153 } 154 return buf.String() 155 } 156 157 // newInfoStore allocates and returns a new infoStore. 158 func newInfoStore( 159 ambient log.AmbientContext, 160 nodeID *base.NodeIDContainer, 161 nodeAddr util.UnresolvedAddr, 162 stopper *stop.Stopper, 163 ) *infoStore { 164 is := &infoStore{ 165 AmbientContext: ambient, 166 nodeID: nodeID, 167 stopper: stopper, 168 Infos: make(infoMap), 169 NodeAddr: nodeAddr, 170 highWaterStamps: map[roachpb.NodeID]int64{}, 171 callbackCh: make(chan struct{}, 1), 172 } 173 174 is.stopper.RunWorker(context.Background(), func(ctx context.Context) { 175 for { 176 for { 177 is.callbackWorkMu.Lock() 178 work := is.callbackWork 179 is.callbackWork = nil 180 is.callbackWorkMu.Unlock() 181 182 if len(work) == 0 { 183 break 184 } 185 for _, w := range work { 186 w() 187 } 188 } 189 190 select { 191 case <-is.callbackCh: 192 case <-is.stopper.ShouldQuiesce(): 193 return 194 } 195 } 196 }) 197 return is 198 } 199 200 // newInfo allocates and returns a new info object using the specified 201 // value and time-to-live. 202 func (is *infoStore) newInfo(val []byte, ttl time.Duration) *Info { 203 nodeID := is.nodeID.Get() 204 if nodeID == 0 { 205 panic("gossip infostore's NodeID is 0") 206 } 207 now := monotonicUnixNano() 208 ttlStamp := now + int64(ttl) 209 if ttl == 0 { 210 ttlStamp = math.MaxInt64 211 } 212 v := roachpb.MakeValueFromBytesAndTimestamp(val, hlc.Timestamp{WallTime: now}) 213 return &Info{ 214 Value: v, 215 TTLStamp: ttlStamp, 216 NodeID: nodeID, 217 } 218 } 219 220 // getInfo returns the Info at key. Returns nil when key is not present 221 // in the infoStore. Does not modify the infoStore. 222 func (is *infoStore) getInfo(key string) *Info { 223 if info, ok := is.Infos[key]; ok { 224 // Check TTL and ignore if too old. 225 if !info.expired(monotonicUnixNano()) { 226 return info 227 } 228 } 229 return nil 230 } 231 232 // addInfo adds or updates an info in the infos map. 233 // 234 // Returns nil if info was added; error otherwise. 235 func (is *infoStore) addInfo(key string, i *Info) error { 236 if i.NodeID == 0 { 237 panic("gossip info's NodeID is 0") 238 } 239 // Only replace an existing info if new timestamp is greater, or if 240 // timestamps are equal, but new hops is smaller. 241 existingInfo, ok := is.Infos[key] 242 if ok { 243 iNanos := i.Value.Timestamp.WallTime 244 existingNanos := existingInfo.Value.Timestamp.WallTime 245 if iNanos < existingNanos || (iNanos == existingNanos && i.Hops >= existingInfo.Hops) { 246 return errNotFresh 247 } 248 } 249 if i.OrigStamp == 0 { 250 i.Value.InitChecksum([]byte(key)) 251 i.OrigStamp = monotonicUnixNano() 252 if highWaterStamp, ok := is.highWaterStamps[i.NodeID]; ok && highWaterStamp >= i.OrigStamp { 253 // Report both timestamps in the crash. 254 log.Fatalf(context.Background(), 255 "high water stamp %d >= %d", log.Safe(highWaterStamp), log.Safe(i.OrigStamp)) 256 } 257 } 258 // Update info map. 259 is.Infos[key] = i 260 // Update the high water timestamp & min hops for the originating node. 261 ratchetHighWaterStamp(is.highWaterStamps, i.NodeID, i.OrigStamp) 262 changed := existingInfo == nil || 263 !bytes.Equal(existingInfo.Value.RawBytes, i.Value.RawBytes) 264 is.processCallbacks(key, i.Value, changed) 265 return nil 266 } 267 268 // getHighWaterStamps returns a copy of the high water stamps map of 269 // gossip peer info maintained by this infostore. Does not modify 270 // the infoStore. 271 func (is *infoStore) getHighWaterStamps() map[roachpb.NodeID]int64 { 272 copy := make(map[roachpb.NodeID]int64, len(is.highWaterStamps)) 273 for k, hws := range is.highWaterStamps { 274 copy[k] = hws 275 } 276 return copy 277 } 278 279 // registerCallback registers a callback for a key pattern to be 280 // invoked whenever new info for a gossip key matching pattern is 281 // received. The callback method is invoked with the info key which 282 // matched pattern. Returns a function to unregister the callback. 283 // Note: the callback may fire after being unregistered. 284 func (is *infoStore) registerCallback( 285 pattern string, method Callback, opts ...CallbackOption, 286 ) func() { 287 var matcher stringMatcher 288 if pattern == ".*" { 289 matcher = allMatcher{} 290 } else { 291 matcher = regexp.MustCompile(pattern) 292 } 293 cb := &callback{matcher: matcher, method: method} 294 for _, opt := range opts { 295 opt.apply(cb) 296 } 297 298 is.callbacks = append(is.callbacks, cb) 299 if err := is.visitInfos(func(key string, i *Info) error { 300 if matcher.MatchString(key) { 301 is.runCallbacks(key, i.Value, method) 302 } 303 return nil 304 }, true /* deleteExpired */); err != nil { 305 panic(err) 306 } 307 308 return func() { 309 for i, targetCB := range is.callbacks { 310 if targetCB == cb { 311 numCBs := len(is.callbacks) 312 is.callbacks[i] = is.callbacks[numCBs-1] 313 is.callbacks = is.callbacks[:numCBs-1] 314 break 315 } 316 } 317 } 318 } 319 320 // processCallbacks processes callbacks for the specified key by 321 // matching each callback's regular expression against the key and invoking 322 // the corresponding callback method on a match. 323 func (is *infoStore) processCallbacks(key string, content roachpb.Value, changed bool) { 324 var matches []Callback 325 for _, cb := range is.callbacks { 326 if (changed || cb.redundant) && cb.matcher.MatchString(key) { 327 matches = append(matches, cb.method) 328 } 329 } 330 is.runCallbacks(key, content, matches...) 331 } 332 333 func (is *infoStore) runCallbacks(key string, content roachpb.Value, callbacks ...Callback) { 334 // Add the callbacks to the callback work list. 335 f := func() { 336 for _, method := range callbacks { 337 method(key, content) 338 } 339 } 340 is.callbackWorkMu.Lock() 341 is.callbackWork = append(is.callbackWork, f) 342 is.callbackWorkMu.Unlock() 343 344 // Signal the callback goroutine. Callbacks run in a goroutine to avoid mutex 345 // reentry. We also guarantee callbacks are run in order such that if a key 346 // is updated twice in succession, the second callback will never be run 347 // before the first. 348 select { 349 case is.callbackCh <- struct{}{}: 350 default: 351 } 352 } 353 354 // visitInfos implements a visitor pattern to run the visitInfo function against 355 // each info in turn. If deleteExpired is specified as true then the method will 356 // delete any infos that it finds which are expired, so it may modify the 357 // infoStore. If it is specified as false, the method will ignore expired infos 358 // without deleting them or modifying the infoStore. 359 func (is *infoStore) visitInfos(visitInfo func(string, *Info) error, deleteExpired bool) error { 360 now := monotonicUnixNano() 361 362 if visitInfo != nil { 363 for k, i := range is.Infos { 364 if i.expired(now) { 365 if deleteExpired { 366 delete(is.Infos, k) 367 } 368 continue 369 } 370 if err := visitInfo(k, i); err != nil { 371 return err 372 } 373 } 374 } 375 376 return nil 377 } 378 379 // combine combines an incremental delta with the current infoStore. 380 // All hop distances on infos are incremented to indicate they've 381 // arrived from an external source. Returns the count of "fresh" 382 // infos in the provided delta. 383 func (is *infoStore) combine( 384 infos map[string]*Info, nodeID roachpb.NodeID, 385 ) (freshCount int, err error) { 386 localNodeID := is.nodeID.Get() 387 for key, i := range infos { 388 if i.NodeID == localNodeID { 389 ratchetMonotonic(i.OrigStamp) 390 } 391 392 infoCopy := *i 393 infoCopy.Hops++ 394 infoCopy.PeerID = nodeID 395 if infoCopy.OrigStamp == 0 { 396 panic(errors.Errorf("combining info from n%d with 0 original timestamp", nodeID)) 397 } 398 // errNotFresh errors from addInfo are ignored; they indicate that 399 // the data in *is is newer than in *delta. 400 if addErr := is.addInfo(key, &infoCopy); addErr == nil { 401 freshCount++ 402 } else if !errors.Is(addErr, errNotFresh) { 403 err = addErr 404 } 405 } 406 return 407 } 408 409 // delta returns a map of infos which have originating timestamps 410 // newer than the high water timestamps indicated by the supplied 411 // map (which is taken from the perspective of the peer node we're 412 // taking this delta for). 413 // 414 // May modify the infoStore. 415 func (is *infoStore) delta(highWaterTimestamps map[roachpb.NodeID]int64) map[string]*Info { 416 infos := make(map[string]*Info) 417 // Compute delta of infos. 418 if err := is.visitInfos(func(key string, i *Info) error { 419 if i.isFresh(highWaterTimestamps[i.NodeID]) { 420 infos[key] = i 421 } 422 return nil 423 }, true /* deleteExpired */); err != nil { 424 panic(err) 425 } 426 427 return infos 428 } 429 430 // populateMostDistantMarkers adds the node ID infos to the infos map. The node 431 // ID infos are used as markers in the mostDistant calculation and need to be 432 // propagated regardless of high water stamps. 433 func (is *infoStore) populateMostDistantMarkers(infos map[string]*Info) { 434 if err := is.visitInfos(func(key string, i *Info) error { 435 if IsNodeIDKey(key) { 436 infos[key] = i 437 } 438 return nil 439 }, true /* deleteExpired */); err != nil { 440 panic(err) 441 } 442 } 443 444 // mostDistant returns the most distant gossip node known to the store 445 // as well as the number of hops to reach it. 446 // 447 // Uses haveOutgoingConn to check for whether or not this node is already 448 // in the process of connecting to a given node (but haven't yet received 449 // Infos from it) for the purposes of excluding them from the result. 450 // This check is particularly useful if mostDistant is called multiple times 451 // in quick succession. 452 // 453 // May modify the infoStore. 454 func (is *infoStore) mostDistant( 455 hasOutgoingConn func(roachpb.NodeID) bool, 456 ) (roachpb.NodeID, uint32) { 457 localNodeID := is.nodeID.Get() 458 var nodeID roachpb.NodeID 459 var maxHops uint32 460 if err := is.visitInfos(func(key string, i *Info) error { 461 // Only consider NodeID keys here because they're re-gossiped every time a 462 // node restarts and periodically after that, so their Hops values are more 463 // likely to be accurate than keys which are rarely re-gossiped, which can 464 // acquire unreliably high Hops values in some pathological cases such as 465 // those described in #9819. 466 if i.NodeID != localNodeID && i.Hops > maxHops && 467 IsNodeIDKey(key) && !hasOutgoingConn(i.NodeID) { 468 maxHops = i.Hops 469 nodeID = i.NodeID 470 } 471 return nil 472 }, true /* deleteExpired */); err != nil { 473 panic(err) 474 } 475 return nodeID, maxHops 476 } 477 478 // leastUseful determines which node ID from amongst the set is 479 // currently contributing the least. Returns the node ID. If nodes is 480 // empty, returns 0. 481 // 482 // May modify the infoStore. 483 func (is *infoStore) leastUseful(nodes nodeSet) roachpb.NodeID { 484 contrib := make(map[roachpb.NodeID]map[roachpb.NodeID]struct{}, nodes.len()) 485 for node := range nodes.nodes { 486 contrib[node] = map[roachpb.NodeID]struct{}{} 487 } 488 if err := is.visitInfos(func(key string, i *Info) error { 489 if _, ok := contrib[i.PeerID]; !ok { 490 contrib[i.PeerID] = map[roachpb.NodeID]struct{}{} 491 } 492 contrib[i.PeerID][i.NodeID] = struct{}{} 493 return nil 494 }, true /* deleteExpired */); err != nil { 495 panic(err) 496 } 497 498 least := math.MaxInt32 499 var leastNode roachpb.NodeID 500 for id, m := range contrib { 501 count := len(m) 502 if nodes.hasNode(id) { 503 if count < least { 504 least = count 505 leastNode = id 506 } 507 } 508 } 509 return leastNode 510 }