github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/gossip/infostore.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package gossip
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"fmt"
    17  	"math"
    18  	"regexp"
    19  	"strings"
    20  	"time"
    21  
    22  	"github.com/cockroachdb/cockroach/pkg/base"
    23  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    24  	"github.com/cockroachdb/cockroach/pkg/util"
    25  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    26  	"github.com/cockroachdb/cockroach/pkg/util/log"
    27  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    28  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    29  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    30  	"github.com/cockroachdb/errors"
    31  )
    32  
    33  type stringMatcher interface {
    34  	MatchString(string) bool
    35  }
    36  
    37  type allMatcher struct{}
    38  
    39  func (allMatcher) MatchString(string) bool {
    40  	return true
    41  }
    42  
    43  // callback holds regexp pattern match and GossipCallback method.
    44  type callback struct {
    45  	matcher   stringMatcher
    46  	method    Callback
    47  	redundant bool
    48  }
    49  
    50  // infoStore objects manage maps of Info objects. They maintain a
    51  // sequence number generator which they use to allocate new info
    52  // objects.
    53  //
    54  // infoStores can be queried for incremental updates occurring since a
    55  // specified map of peer node high water timestamps.
    56  //
    57  // infoStores can be combined using deltas from peer nodes.
    58  //
    59  // infoStores are not thread safe.
    60  type infoStore struct {
    61  	log.AmbientContext
    62  
    63  	nodeID  *base.NodeIDContainer
    64  	stopper *stop.Stopper
    65  
    66  	Infos           infoMap                  `json:"infos,omitempty"` // Map from key to info
    67  	NodeAddr        util.UnresolvedAddr      `json:"-"`               // Address of node owning this info store: "host:port"
    68  	highWaterStamps map[roachpb.NodeID]int64 // Per-node information for gossip peers
    69  	callbacks       []*callback
    70  
    71  	callbackWorkMu syncutil.Mutex // Protects callbackWork
    72  	callbackWork   []func()
    73  	callbackCh     chan struct{} // Channel to signal the callback goroutine
    74  }
    75  
    76  var monoTime struct {
    77  	syncutil.Mutex
    78  	last int64
    79  }
    80  
    81  var errNotFresh = errors.New("info not fresh")
    82  
    83  // monotonicUnixNano returns a monotonically increasing value for
    84  // nanoseconds in Unix time. Since equal times are ignored with
    85  // updates to infos, we're careful to avoid incorrectly ignoring a
    86  // newly created value in the event one is created within the same
    87  // nanosecond. Really unlikely except for the case of unittests, but
    88  // better safe than sorry.
    89  func monotonicUnixNano() int64 {
    90  	monoTime.Lock()
    91  	defer monoTime.Unlock()
    92  
    93  	now := timeutil.Now().UnixNano()
    94  	if now <= monoTime.last {
    95  		now = monoTime.last + 1
    96  	}
    97  	monoTime.last = now
    98  	return now
    99  }
   100  
   101  // ratchetMonotonic increases the monotonic clock to be at least v. Used to
   102  // guarantee that clock values generated by the local node ID always increase
   103  // even in the presence of local infos that were received from a remote with a
   104  // timestamp in the future (which can happen in the presence of backward clock
   105  // jumps and a crash).
   106  func ratchetMonotonic(v int64) {
   107  	monoTime.Lock()
   108  	if monoTime.last < v {
   109  		monoTime.last = v
   110  	}
   111  	monoTime.Unlock()
   112  }
   113  
   114  // ratchetHighWaterStamp sets stamps[nodeID] to max(stamps[nodeID], newStamp).
   115  func ratchetHighWaterStamp(stamps map[roachpb.NodeID]int64, nodeID roachpb.NodeID, newStamp int64) {
   116  	if nodeID != 0 && stamps[nodeID] < newStamp {
   117  		stamps[nodeID] = newStamp
   118  	}
   119  }
   120  
   121  // mergeHighWaterStamps merges the high water stamps in src into dest by
   122  // performing a ratchet operation for each stamp in src. The existing stamps in
   123  // dest will either remain the same (if they are smaller than the corresponding
   124  // stamp in src) or be bumped to the higher value in src.
   125  func mergeHighWaterStamps(dest *map[roachpb.NodeID]int64, src map[roachpb.NodeID]int64) {
   126  	if *dest == nil {
   127  		*dest = src
   128  		return
   129  	}
   130  	for nodeID, newStamp := range src {
   131  		ratchetHighWaterStamp(*dest, nodeID, newStamp)
   132  	}
   133  }
   134  
   135  // String returns a string representation of an infostore.
   136  func (is *infoStore) String() string {
   137  	var buf strings.Builder
   138  	if infoCount := len(is.Infos); infoCount > 0 {
   139  		fmt.Fprintf(&buf, "infostore with %d info(s): ", infoCount)
   140  	} else {
   141  		return "infostore (empty)"
   142  	}
   143  
   144  	prepend := ""
   145  
   146  	if err := is.visitInfos(func(key string, i *Info) error {
   147  		fmt.Fprintf(&buf, "%sinfo %q: %+v", prepend, key, i.Value)
   148  		prepend = ", "
   149  		return nil
   150  	}, false /* deleteExpired */); err != nil {
   151  		// This should never happen because the func we pass above never errors out.
   152  		panic(err)
   153  	}
   154  	return buf.String()
   155  }
   156  
   157  // newInfoStore allocates and returns a new infoStore.
   158  func newInfoStore(
   159  	ambient log.AmbientContext,
   160  	nodeID *base.NodeIDContainer,
   161  	nodeAddr util.UnresolvedAddr,
   162  	stopper *stop.Stopper,
   163  ) *infoStore {
   164  	is := &infoStore{
   165  		AmbientContext:  ambient,
   166  		nodeID:          nodeID,
   167  		stopper:         stopper,
   168  		Infos:           make(infoMap),
   169  		NodeAddr:        nodeAddr,
   170  		highWaterStamps: map[roachpb.NodeID]int64{},
   171  		callbackCh:      make(chan struct{}, 1),
   172  	}
   173  
   174  	is.stopper.RunWorker(context.Background(), func(ctx context.Context) {
   175  		for {
   176  			for {
   177  				is.callbackWorkMu.Lock()
   178  				work := is.callbackWork
   179  				is.callbackWork = nil
   180  				is.callbackWorkMu.Unlock()
   181  
   182  				if len(work) == 0 {
   183  					break
   184  				}
   185  				for _, w := range work {
   186  					w()
   187  				}
   188  			}
   189  
   190  			select {
   191  			case <-is.callbackCh:
   192  			case <-is.stopper.ShouldQuiesce():
   193  				return
   194  			}
   195  		}
   196  	})
   197  	return is
   198  }
   199  
   200  // newInfo allocates and returns a new info object using the specified
   201  // value and time-to-live.
   202  func (is *infoStore) newInfo(val []byte, ttl time.Duration) *Info {
   203  	nodeID := is.nodeID.Get()
   204  	if nodeID == 0 {
   205  		panic("gossip infostore's NodeID is 0")
   206  	}
   207  	now := monotonicUnixNano()
   208  	ttlStamp := now + int64(ttl)
   209  	if ttl == 0 {
   210  		ttlStamp = math.MaxInt64
   211  	}
   212  	v := roachpb.MakeValueFromBytesAndTimestamp(val, hlc.Timestamp{WallTime: now})
   213  	return &Info{
   214  		Value:    v,
   215  		TTLStamp: ttlStamp,
   216  		NodeID:   nodeID,
   217  	}
   218  }
   219  
   220  // getInfo returns the Info at key. Returns nil when key is not present
   221  // in the infoStore. Does not modify the infoStore.
   222  func (is *infoStore) getInfo(key string) *Info {
   223  	if info, ok := is.Infos[key]; ok {
   224  		// Check TTL and ignore if too old.
   225  		if !info.expired(monotonicUnixNano()) {
   226  			return info
   227  		}
   228  	}
   229  	return nil
   230  }
   231  
   232  // addInfo adds or updates an info in the infos map.
   233  //
   234  // Returns nil if info was added; error otherwise.
   235  func (is *infoStore) addInfo(key string, i *Info) error {
   236  	if i.NodeID == 0 {
   237  		panic("gossip info's NodeID is 0")
   238  	}
   239  	// Only replace an existing info if new timestamp is greater, or if
   240  	// timestamps are equal, but new hops is smaller.
   241  	existingInfo, ok := is.Infos[key]
   242  	if ok {
   243  		iNanos := i.Value.Timestamp.WallTime
   244  		existingNanos := existingInfo.Value.Timestamp.WallTime
   245  		if iNanos < existingNanos || (iNanos == existingNanos && i.Hops >= existingInfo.Hops) {
   246  			return errNotFresh
   247  		}
   248  	}
   249  	if i.OrigStamp == 0 {
   250  		i.Value.InitChecksum([]byte(key))
   251  		i.OrigStamp = monotonicUnixNano()
   252  		if highWaterStamp, ok := is.highWaterStamps[i.NodeID]; ok && highWaterStamp >= i.OrigStamp {
   253  			// Report both timestamps in the crash.
   254  			log.Fatalf(context.Background(),
   255  				"high water stamp %d >= %d", log.Safe(highWaterStamp), log.Safe(i.OrigStamp))
   256  		}
   257  	}
   258  	// Update info map.
   259  	is.Infos[key] = i
   260  	// Update the high water timestamp & min hops for the originating node.
   261  	ratchetHighWaterStamp(is.highWaterStamps, i.NodeID, i.OrigStamp)
   262  	changed := existingInfo == nil ||
   263  		!bytes.Equal(existingInfo.Value.RawBytes, i.Value.RawBytes)
   264  	is.processCallbacks(key, i.Value, changed)
   265  	return nil
   266  }
   267  
   268  // getHighWaterStamps returns a copy of the high water stamps map of
   269  // gossip peer info maintained by this infostore. Does not modify
   270  // the infoStore.
   271  func (is *infoStore) getHighWaterStamps() map[roachpb.NodeID]int64 {
   272  	copy := make(map[roachpb.NodeID]int64, len(is.highWaterStamps))
   273  	for k, hws := range is.highWaterStamps {
   274  		copy[k] = hws
   275  	}
   276  	return copy
   277  }
   278  
   279  // registerCallback registers a callback for a key pattern to be
   280  // invoked whenever new info for a gossip key matching pattern is
   281  // received. The callback method is invoked with the info key which
   282  // matched pattern. Returns a function to unregister the callback.
   283  // Note: the callback may fire after being unregistered.
   284  func (is *infoStore) registerCallback(
   285  	pattern string, method Callback, opts ...CallbackOption,
   286  ) func() {
   287  	var matcher stringMatcher
   288  	if pattern == ".*" {
   289  		matcher = allMatcher{}
   290  	} else {
   291  		matcher = regexp.MustCompile(pattern)
   292  	}
   293  	cb := &callback{matcher: matcher, method: method}
   294  	for _, opt := range opts {
   295  		opt.apply(cb)
   296  	}
   297  
   298  	is.callbacks = append(is.callbacks, cb)
   299  	if err := is.visitInfos(func(key string, i *Info) error {
   300  		if matcher.MatchString(key) {
   301  			is.runCallbacks(key, i.Value, method)
   302  		}
   303  		return nil
   304  	}, true /* deleteExpired */); err != nil {
   305  		panic(err)
   306  	}
   307  
   308  	return func() {
   309  		for i, targetCB := range is.callbacks {
   310  			if targetCB == cb {
   311  				numCBs := len(is.callbacks)
   312  				is.callbacks[i] = is.callbacks[numCBs-1]
   313  				is.callbacks = is.callbacks[:numCBs-1]
   314  				break
   315  			}
   316  		}
   317  	}
   318  }
   319  
   320  // processCallbacks processes callbacks for the specified key by
   321  // matching each callback's regular expression against the key and invoking
   322  // the corresponding callback method on a match.
   323  func (is *infoStore) processCallbacks(key string, content roachpb.Value, changed bool) {
   324  	var matches []Callback
   325  	for _, cb := range is.callbacks {
   326  		if (changed || cb.redundant) && cb.matcher.MatchString(key) {
   327  			matches = append(matches, cb.method)
   328  		}
   329  	}
   330  	is.runCallbacks(key, content, matches...)
   331  }
   332  
   333  func (is *infoStore) runCallbacks(key string, content roachpb.Value, callbacks ...Callback) {
   334  	// Add the callbacks to the callback work list.
   335  	f := func() {
   336  		for _, method := range callbacks {
   337  			method(key, content)
   338  		}
   339  	}
   340  	is.callbackWorkMu.Lock()
   341  	is.callbackWork = append(is.callbackWork, f)
   342  	is.callbackWorkMu.Unlock()
   343  
   344  	// Signal the callback goroutine. Callbacks run in a goroutine to avoid mutex
   345  	// reentry. We also guarantee callbacks are run in order such that if a key
   346  	// is updated twice in succession, the second callback will never be run
   347  	// before the first.
   348  	select {
   349  	case is.callbackCh <- struct{}{}:
   350  	default:
   351  	}
   352  }
   353  
   354  // visitInfos implements a visitor pattern to run the visitInfo function against
   355  // each info in turn. If deleteExpired is specified as true then the method will
   356  // delete any infos that it finds which are expired, so it may modify the
   357  // infoStore. If it is specified as false, the method will ignore expired infos
   358  // without deleting them or modifying the infoStore.
   359  func (is *infoStore) visitInfos(visitInfo func(string, *Info) error, deleteExpired bool) error {
   360  	now := monotonicUnixNano()
   361  
   362  	if visitInfo != nil {
   363  		for k, i := range is.Infos {
   364  			if i.expired(now) {
   365  				if deleteExpired {
   366  					delete(is.Infos, k)
   367  				}
   368  				continue
   369  			}
   370  			if err := visitInfo(k, i); err != nil {
   371  				return err
   372  			}
   373  		}
   374  	}
   375  
   376  	return nil
   377  }
   378  
   379  // combine combines an incremental delta with the current infoStore.
   380  // All hop distances on infos are incremented to indicate they've
   381  // arrived from an external source. Returns the count of "fresh"
   382  // infos in the provided delta.
   383  func (is *infoStore) combine(
   384  	infos map[string]*Info, nodeID roachpb.NodeID,
   385  ) (freshCount int, err error) {
   386  	localNodeID := is.nodeID.Get()
   387  	for key, i := range infos {
   388  		if i.NodeID == localNodeID {
   389  			ratchetMonotonic(i.OrigStamp)
   390  		}
   391  
   392  		infoCopy := *i
   393  		infoCopy.Hops++
   394  		infoCopy.PeerID = nodeID
   395  		if infoCopy.OrigStamp == 0 {
   396  			panic(errors.Errorf("combining info from n%d with 0 original timestamp", nodeID))
   397  		}
   398  		// errNotFresh errors from addInfo are ignored; they indicate that
   399  		// the data in *is is newer than in *delta.
   400  		if addErr := is.addInfo(key, &infoCopy); addErr == nil {
   401  			freshCount++
   402  		} else if !errors.Is(addErr, errNotFresh) {
   403  			err = addErr
   404  		}
   405  	}
   406  	return
   407  }
   408  
   409  // delta returns a map of infos which have originating timestamps
   410  // newer than the high water timestamps indicated by the supplied
   411  // map (which is taken from the perspective of the peer node we're
   412  // taking this delta for).
   413  //
   414  // May modify the infoStore.
   415  func (is *infoStore) delta(highWaterTimestamps map[roachpb.NodeID]int64) map[string]*Info {
   416  	infos := make(map[string]*Info)
   417  	// Compute delta of infos.
   418  	if err := is.visitInfos(func(key string, i *Info) error {
   419  		if i.isFresh(highWaterTimestamps[i.NodeID]) {
   420  			infos[key] = i
   421  		}
   422  		return nil
   423  	}, true /* deleteExpired */); err != nil {
   424  		panic(err)
   425  	}
   426  
   427  	return infos
   428  }
   429  
   430  // populateMostDistantMarkers adds the node ID infos to the infos map. The node
   431  // ID infos are used as markers in the mostDistant calculation and need to be
   432  // propagated regardless of high water stamps.
   433  func (is *infoStore) populateMostDistantMarkers(infos map[string]*Info) {
   434  	if err := is.visitInfos(func(key string, i *Info) error {
   435  		if IsNodeIDKey(key) {
   436  			infos[key] = i
   437  		}
   438  		return nil
   439  	}, true /* deleteExpired */); err != nil {
   440  		panic(err)
   441  	}
   442  }
   443  
   444  // mostDistant returns the most distant gossip node known to the store
   445  // as well as the number of hops to reach it.
   446  //
   447  // Uses haveOutgoingConn to check for whether or not this node is already
   448  // in the process of connecting to a given node (but haven't yet received
   449  // Infos from it) for the purposes of excluding them from the result.
   450  // This check is particularly useful if mostDistant is called multiple times
   451  // in quick succession.
   452  //
   453  // May modify the infoStore.
   454  func (is *infoStore) mostDistant(
   455  	hasOutgoingConn func(roachpb.NodeID) bool,
   456  ) (roachpb.NodeID, uint32) {
   457  	localNodeID := is.nodeID.Get()
   458  	var nodeID roachpb.NodeID
   459  	var maxHops uint32
   460  	if err := is.visitInfos(func(key string, i *Info) error {
   461  		// Only consider NodeID keys here because they're re-gossiped every time a
   462  		// node restarts and periodically after that, so their Hops values are more
   463  		// likely to be accurate than keys which are rarely re-gossiped, which can
   464  		// acquire unreliably high Hops values in some pathological cases such as
   465  		// those described in #9819.
   466  		if i.NodeID != localNodeID && i.Hops > maxHops &&
   467  			IsNodeIDKey(key) && !hasOutgoingConn(i.NodeID) {
   468  			maxHops = i.Hops
   469  			nodeID = i.NodeID
   470  		}
   471  		return nil
   472  	}, true /* deleteExpired */); err != nil {
   473  		panic(err)
   474  	}
   475  	return nodeID, maxHops
   476  }
   477  
   478  // leastUseful determines which node ID from amongst the set is
   479  // currently contributing the least. Returns the node ID. If nodes is
   480  // empty, returns 0.
   481  //
   482  // May modify the infoStore.
   483  func (is *infoStore) leastUseful(nodes nodeSet) roachpb.NodeID {
   484  	contrib := make(map[roachpb.NodeID]map[roachpb.NodeID]struct{}, nodes.len())
   485  	for node := range nodes.nodes {
   486  		contrib[node] = map[roachpb.NodeID]struct{}{}
   487  	}
   488  	if err := is.visitInfos(func(key string, i *Info) error {
   489  		if _, ok := contrib[i.PeerID]; !ok {
   490  			contrib[i.PeerID] = map[roachpb.NodeID]struct{}{}
   491  		}
   492  		contrib[i.PeerID][i.NodeID] = struct{}{}
   493  		return nil
   494  	}, true /* deleteExpired */); err != nil {
   495  		panic(err)
   496  	}
   497  
   498  	least := math.MaxInt32
   499  	var leastNode roachpb.NodeID
   500  	for id, m := range contrib {
   501  		count := len(m)
   502  		if nodes.hasNode(id) {
   503  			if count < least {
   504  				least = count
   505  				leastNode = id
   506  			}
   507  		}
   508  	}
   509  	return leastNode
   510  }