github.com/MetalBlockchain/metalgo@v1.11.9/snow/networking/benchlist/benchlist.go (about)

     1  // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package benchlist
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"math/rand"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	"go.uber.org/zap"
    15  
    16  	"github.com/MetalBlockchain/metalgo/ids"
    17  	"github.com/MetalBlockchain/metalgo/snow"
    18  	"github.com/MetalBlockchain/metalgo/snow/validators"
    19  	"github.com/MetalBlockchain/metalgo/utils/heap"
    20  	"github.com/MetalBlockchain/metalgo/utils/set"
    21  	"github.com/MetalBlockchain/metalgo/utils/timer/mockable"
    22  
    23  	safemath "github.com/MetalBlockchain/metalgo/utils/math"
    24  )
    25  
    26  // If a peer consistently does not respond to queries, it will
    27  // increase latencies on the network whenever that peer is polled.
    28  // If we cannot terminate the poll early, then the poll will wait
    29  // the full timeout before finalizing the poll and making progress.
    30  // This can increase network latencies to an undesirable level.
    31  
    32  // Therefore, nodes that consistently fail are "benched" such that
    33  // queries to that node fail immediately to avoid waiting up to
    34  // the full network timeout for a response.
    35  type Benchlist interface {
    36  	// RegisterResponse registers the response to a query message
    37  	RegisterResponse(nodeID ids.NodeID)
    38  	// RegisterFailure registers that we didn't receive a response within the timeout
    39  	RegisterFailure(nodeID ids.NodeID)
    40  	// IsBenched returns true if messages to [validatorID]
    41  	// should not be sent over the network and should immediately fail.
    42  	IsBenched(nodeID ids.NodeID) bool
    43  }
    44  
    45  type failureStreak struct {
    46  	// Time of first consecutive timeout
    47  	firstFailure time.Time
    48  	// Number of consecutive message timeouts
    49  	consecutive int
    50  }
    51  
    52  type benchlist struct {
    53  	lock sync.RWMutex
    54  	// Context of the chain this is the benchlist for
    55  	ctx *snow.ConsensusContext
    56  
    57  	numBenched, weightBenched prometheus.Gauge
    58  
    59  	// Used to notify the timer that it should recalculate when it should fire
    60  	resetTimer chan struct{}
    61  
    62  	// Tells the time. Can be faked for testing.
    63  	clock mockable.Clock
    64  
    65  	// notified when a node is benched or unbenched
    66  	benchable Benchable
    67  
    68  	// Validator set of the network
    69  	vdrs validators.Manager
    70  
    71  	// Validator ID --> Consecutive failure information
    72  	// [streaklock] must be held when touching [failureStreaks]
    73  	streaklock     sync.Mutex
    74  	failureStreaks map[ids.NodeID]failureStreak
    75  
    76  	// IDs of validators that are currently benched
    77  	benchlistSet set.Set[ids.NodeID]
    78  
    79  	// Min heap of benched validators ordered by when they can be unbenched
    80  	benchedHeap heap.Map[ids.NodeID, time.Time]
    81  
    82  	// A validator will be benched if [threshold] messages in a row
    83  	// to them time out and the first of those messages was more than
    84  	// [minimumFailingDuration] ago
    85  	threshold              int
    86  	minimumFailingDuration time.Duration
    87  
    88  	// A benched validator will be benched for between [duration/2] and [duration]
    89  	duration time.Duration
    90  
    91  	// The maximum percentage of total network stake that may be benched
    92  	// Must be in [0,1)
    93  	maxPortion float64
    94  }
    95  
    96  // NewBenchlist returns a new Benchlist
    97  func NewBenchlist(
    98  	ctx *snow.ConsensusContext,
    99  	benchable Benchable,
   100  	validators validators.Manager,
   101  	threshold int,
   102  	minimumFailingDuration,
   103  	duration time.Duration,
   104  	maxPortion float64,
   105  	reg prometheus.Registerer,
   106  ) (Benchlist, error) {
   107  	if maxPortion < 0 || maxPortion >= 1 {
   108  		return nil, fmt.Errorf("max portion of benched stake must be in [0,1) but got %f", maxPortion)
   109  	}
   110  
   111  	benchlist := &benchlist{
   112  		ctx: ctx,
   113  		numBenched: prometheus.NewGauge(prometheus.GaugeOpts{
   114  			Name: "benched_num",
   115  			Help: "Number of currently benched validators",
   116  		}),
   117  		weightBenched: prometheus.NewGauge(prometheus.GaugeOpts{
   118  			Name: "benched_weight",
   119  			Help: "Weight of currently benched validators",
   120  		}),
   121  		resetTimer:             make(chan struct{}, 1),
   122  		failureStreaks:         make(map[ids.NodeID]failureStreak),
   123  		benchlistSet:           set.Set[ids.NodeID]{},
   124  		benchable:              benchable,
   125  		benchedHeap:            heap.NewMap[ids.NodeID, time.Time](time.Time.Before),
   126  		vdrs:                   validators,
   127  		threshold:              threshold,
   128  		minimumFailingDuration: minimumFailingDuration,
   129  		duration:               duration,
   130  		maxPortion:             maxPortion,
   131  	}
   132  
   133  	err := errors.Join(
   134  		reg.Register(benchlist.numBenched),
   135  		reg.Register(benchlist.weightBenched),
   136  	)
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  
   141  	go benchlist.run()
   142  	return benchlist, nil
   143  }
   144  
   145  // TODO: Close this goroutine during node shutdown
   146  func (b *benchlist) run() {
   147  	timer := time.NewTimer(0)
   148  	defer timer.Stop()
   149  
   150  	for {
   151  		// Invariant: The [timer] is not stopped.
   152  		select {
   153  		case <-timer.C:
   154  		case <-b.resetTimer:
   155  			if !timer.Stop() {
   156  				<-timer.C
   157  			}
   158  		}
   159  
   160  		b.waitForBenchedNodes()
   161  
   162  		b.removedExpiredNodes()
   163  
   164  		// Note: If there are no nodes to remove, [duration] will be 0 and we
   165  		// will immediately wait until there are benched nodes.
   166  		duration := b.durationToSleep()
   167  		timer.Reset(duration)
   168  	}
   169  }
   170  
   171  func (b *benchlist) waitForBenchedNodes() {
   172  	for {
   173  		b.lock.RLock()
   174  		_, _, ok := b.benchedHeap.Peek()
   175  		b.lock.RUnlock()
   176  		if ok {
   177  			return
   178  		}
   179  
   180  		// Invariant: Whenever a new node is benched we ensure that resetTimer
   181  		// has a pending message while the write lock is held.
   182  		<-b.resetTimer
   183  	}
   184  }
   185  
   186  func (b *benchlist) removedExpiredNodes() {
   187  	b.lock.Lock()
   188  	defer b.lock.Unlock()
   189  
   190  	now := b.clock.Time()
   191  	for {
   192  		_, next, ok := b.benchedHeap.Peek()
   193  		if !ok {
   194  			break
   195  		}
   196  		if now.Before(next) {
   197  			break
   198  		}
   199  
   200  		nodeID, _, _ := b.benchedHeap.Pop()
   201  		b.ctx.Log.Debug("removing node from benchlist",
   202  			zap.Stringer("nodeID", nodeID),
   203  		)
   204  		b.benchlistSet.Remove(nodeID)
   205  		b.benchable.Unbenched(b.ctx.ChainID, nodeID)
   206  	}
   207  
   208  	b.numBenched.Set(float64(b.benchedHeap.Len()))
   209  	benchedStake, err := b.vdrs.SubsetWeight(b.ctx.SubnetID, b.benchlistSet)
   210  	if err != nil {
   211  		b.ctx.Log.Error("error calculating benched stake",
   212  			zap.Stringer("subnetID", b.ctx.SubnetID),
   213  			zap.Error(err),
   214  		)
   215  		return
   216  	}
   217  	b.weightBenched.Set(float64(benchedStake))
   218  }
   219  
   220  func (b *benchlist) durationToSleep() time.Duration {
   221  	b.lock.RLock()
   222  	defer b.lock.RUnlock()
   223  
   224  	_, next, ok := b.benchedHeap.Peek()
   225  	if !ok {
   226  		return 0
   227  	}
   228  
   229  	now := b.clock.Time()
   230  	return next.Sub(now)
   231  }
   232  
   233  // IsBenched returns true if messages to [nodeID] should not be sent over the
   234  // network and should immediately fail.
   235  func (b *benchlist) IsBenched(nodeID ids.NodeID) bool {
   236  	b.lock.RLock()
   237  	defer b.lock.RUnlock()
   238  
   239  	return b.benchlistSet.Contains(nodeID)
   240  }
   241  
   242  // RegisterResponse notes that we received a response from [nodeID]
   243  func (b *benchlist) RegisterResponse(nodeID ids.NodeID) {
   244  	b.streaklock.Lock()
   245  	defer b.streaklock.Unlock()
   246  
   247  	delete(b.failureStreaks, nodeID)
   248  }
   249  
   250  // RegisterFailure notes that a request to [nodeID] timed out
   251  func (b *benchlist) RegisterFailure(nodeID ids.NodeID) {
   252  	b.lock.Lock()
   253  	defer b.lock.Unlock()
   254  
   255  	if b.benchlistSet.Contains(nodeID) {
   256  		// This validator is benched. Ignore failures until they're not.
   257  		return
   258  	}
   259  
   260  	b.streaklock.Lock()
   261  	failureStreak := b.failureStreaks[nodeID]
   262  	// Increment consecutive failures
   263  	failureStreak.consecutive++
   264  	now := b.clock.Time()
   265  	// Update first failure time
   266  	if failureStreak.firstFailure.IsZero() {
   267  		// This is the first consecutive failure
   268  		failureStreak.firstFailure = now
   269  	}
   270  	b.failureStreaks[nodeID] = failureStreak
   271  	b.streaklock.Unlock()
   272  
   273  	if failureStreak.consecutive >= b.threshold && now.After(failureStreak.firstFailure.Add(b.minimumFailingDuration)) {
   274  		b.bench(nodeID)
   275  	}
   276  }
   277  
   278  // Assumes [b.lock] is held
   279  // Assumes [nodeID] is not already benched
   280  func (b *benchlist) bench(nodeID ids.NodeID) {
   281  	validatorStake := b.vdrs.GetWeight(b.ctx.SubnetID, nodeID)
   282  	if validatorStake == 0 {
   283  		// We might want to bench a non-validator because they don't respond to
   284  		// my Get requests, but we choose to only bench validators.
   285  		return
   286  	}
   287  
   288  	benchedStake, err := b.vdrs.SubsetWeight(b.ctx.SubnetID, b.benchlistSet)
   289  	if err != nil {
   290  		b.ctx.Log.Error("error calculating benched stake",
   291  			zap.Stringer("subnetID", b.ctx.SubnetID),
   292  			zap.Error(err),
   293  		)
   294  		return
   295  	}
   296  
   297  	newBenchedStake, err := safemath.Add64(benchedStake, validatorStake)
   298  	if err != nil {
   299  		// This should never happen
   300  		b.ctx.Log.Error("overflow calculating new benched stake",
   301  			zap.Stringer("nodeID", nodeID),
   302  		)
   303  		return
   304  	}
   305  
   306  	totalStake, err := b.vdrs.TotalWeight(b.ctx.SubnetID)
   307  	if err != nil {
   308  		b.ctx.Log.Error("error calculating total stake",
   309  			zap.Stringer("subnetID", b.ctx.SubnetID),
   310  			zap.Error(err),
   311  		)
   312  		return
   313  	}
   314  
   315  	maxBenchedStake := float64(totalStake) * b.maxPortion
   316  
   317  	if float64(newBenchedStake) > maxBenchedStake {
   318  		b.ctx.Log.Debug("not benching node",
   319  			zap.String("reason", "benched stake would exceed max"),
   320  			zap.Stringer("nodeID", nodeID),
   321  			zap.Float64("benchedStake", float64(newBenchedStake)),
   322  			zap.Float64("maxBenchedStake", maxBenchedStake),
   323  		)
   324  		return
   325  	}
   326  
   327  	// Validator is benched for between [b.duration]/2 and [b.duration]
   328  	now := b.clock.Time()
   329  	minBenchDuration := b.duration / 2
   330  	minBenchedUntil := now.Add(minBenchDuration)
   331  	maxBenchedUntil := now.Add(b.duration)
   332  	diff := maxBenchedUntil.Sub(minBenchedUntil)
   333  	benchedUntil := minBenchedUntil.Add(time.Duration(rand.Float64() * float64(diff))) // #nosec G404
   334  
   335  	b.ctx.Log.Debug("benching validator after consecutive failed queries",
   336  		zap.Stringer("nodeID", nodeID),
   337  		zap.Duration("benchDuration", benchedUntil.Sub(now)),
   338  		zap.Int("numFailedQueries", b.threshold),
   339  	)
   340  
   341  	// Add to benchlist times with randomized delay
   342  	b.benchlistSet.Add(nodeID)
   343  	b.benchable.Benched(b.ctx.ChainID, nodeID)
   344  
   345  	b.streaklock.Lock()
   346  	delete(b.failureStreaks, nodeID)
   347  	b.streaklock.Unlock()
   348  
   349  	b.benchedHeap.Push(nodeID, benchedUntil)
   350  
   351  	// Update the timer to account for the newly benched node.
   352  	select {
   353  	case b.resetTimer <- struct{}{}:
   354  	default:
   355  	}
   356  
   357  	// Update metrics
   358  	b.numBenched.Set(float64(b.benchedHeap.Len()))
   359  	b.weightBenched.Set(float64(newBenchedStake))
   360  }