github.com/aidoskuneen/adk-node@v0.0.0-20220315131952-2e32567cb7f4/p2p/tracker/tracker.go (about)

     1  // Copyright 2021 The adkgo Authors
     2  // This file is part of the adkgo library (adapted for adkgo from go--ethereum v1.10.8).
     3  //
     4  // the adkgo library is free software: you can redistribute it and/or modify
     5  // it under the terms of the GNU Lesser General Public License as published by
     6  // the Free Software Foundation, either version 3 of the License, or
     7  // (at your option) any later version.
     8  //
     9  // the adkgo library is distributed in the hope that it will be useful,
    10  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  // GNU Lesser General Public License for more details.
    13  //
    14  // You should have received a copy of the GNU Lesser General Public License
    15  // along with the adkgo library. If not, see <http://www.gnu.org/licenses/>.
    16  
    17  package tracker
    18  
    19  import (
    20  	"container/list"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/aidoskuneen/adk-node/log"
    26  	"github.com/aidoskuneen/adk-node/metrics"
    27  )
    28  
    29  const (
    30  	// trackedGaugeName is the prefix of the per-packet request tracking.
    31  	trackedGaugeName = "p2p/tracked"
    32  
    33  	// lostMeterName is the prefix of the per-packet request expirations.
    34  	lostMeterName = "p2p/lost"
    35  
    36  	// staleMeterName is the prefix of the per-packet stale responses.
    37  	staleMeterName = "p2p/stale"
    38  
    39  	// waitHistName is the prefix of the per-packet (req only) waiting time histograms.
    40  	waitHistName = "p2p/wait"
    41  
    42  	// maxTrackedPackets is a huge number to act as a failsafe on the number of
    43  	// pending requests the node will track. It should never be hit unless an
    44  	// attacker figures out a way to spin requests.
    45  	maxTrackedPackets = 100000
    46  )
    47  
    48  // request tracks sent network requests which have not yet received a response.
    49  type request struct {
    50  	peer    string
    51  	version uint // Protocol version
    52  
    53  	reqCode uint64 // Protocol message code of the request
    54  	resCode uint64 // Protocol message code of the expected response
    55  
    56  	time   time.Time     // Timestamp when the request was made
    57  	expire *list.Element // Expiration marker to untrack it
    58  }
    59  
    60  // Tracker is a pending network request tracker to measure how much time it takes
    61  // a remote peer to respond.
    62  type Tracker struct {
    63  	protocol string        // Protocol capability identifier for the metrics
    64  	timeout  time.Duration // Global timeout after which to drop a tracked packet
    65  
    66  	pending map[uint64]*request // Currently pending requests
    67  	expire  *list.List          // Linked list tracking the expiration order
    68  	wake    *time.Timer         // Timer tracking the expiration of the next item
    69  
    70  	lock sync.Mutex // Lock protecting from concurrent updates
    71  }
    72  
    73  // New creates a new network request tracker to monitor how much time it takes to
    74  // fill certain requests and how individual peers perform.
    75  func New(protocol string, timeout time.Duration) *Tracker {
    76  	return &Tracker{
    77  		protocol: protocol,
    78  		timeout:  timeout,
    79  		pending:  make(map[uint64]*request),
    80  		expire:   list.New(),
    81  	}
    82  }
    83  
    84  // Track adds a network request to the tracker to wait for a response to arrive
    85  // or until the request it cancelled or times out.
    86  func (t *Tracker) Track(peer string, version uint, reqCode uint64, resCode uint64, id uint64) {
    87  	if !metrics.Enabled {
    88  		return
    89  	}
    90  	t.lock.Lock()
    91  	defer t.lock.Unlock()
    92  
    93  	// If there's a duplicate request, we've just random-collided (or more probably,
    94  	// we have a bug), report it. We could also add a metric, but we're not really
    95  	// expecting ourselves to be buggy, so a noisy warning should be enough.
    96  	if _, ok := t.pending[id]; ok {
    97  		log.Error("Network request id collision", "protocol", t.protocol, "version", version, "code", reqCode, "id", id)
    98  		return
    99  	}
   100  	// If we have too many pending requests, bail out instead of leaking memory
   101  	if pending := len(t.pending); pending >= maxTrackedPackets {
   102  		log.Error("Request tracker exceeded allowance", "pending", pending, "peer", peer, "protocol", t.protocol, "version", version, "code", reqCode)
   103  		return
   104  	}
   105  	// Id doesn't exist yet, start tracking it
   106  	t.pending[id] = &request{
   107  		peer:    peer,
   108  		version: version,
   109  		reqCode: reqCode,
   110  		resCode: resCode,
   111  		time:    time.Now(),
   112  		expire:  t.expire.PushBack(id),
   113  	}
   114  	g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, version, reqCode)
   115  	metrics.GetOrRegisterGauge(g, nil).Inc(1)
   116  
   117  	// If we've just inserted the first item, start the expiration timer
   118  	if t.wake == nil {
   119  		t.wake = time.AfterFunc(t.timeout, t.clean)
   120  	}
   121  }
   122  
   123  // clean is called automatically when a preset time passes without a response
   124  // being dleivered for the first network request.
   125  func (t *Tracker) clean() {
   126  	t.lock.Lock()
   127  	defer t.lock.Unlock()
   128  
   129  	// Expire anything within a certain threshold (might be no items at all if
   130  	// we raced with the delivery)
   131  	for t.expire.Len() > 0 {
   132  		// Stop iterating if the next pending request is still alive
   133  		var (
   134  			head = t.expire.Front()
   135  			id   = head.Value.(uint64)
   136  			req  = t.pending[id]
   137  		)
   138  		if time.Since(req.time) < t.timeout+5*time.Millisecond {
   139  			break
   140  		}
   141  		// Nope, dead, drop it
   142  		t.expire.Remove(head)
   143  		delete(t.pending, id)
   144  
   145  		g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode)
   146  		metrics.GetOrRegisterGauge(g, nil).Dec(1)
   147  
   148  		m := fmt.Sprintf("%s/%s/%d/%#02x", lostMeterName, t.protocol, req.version, req.reqCode)
   149  		metrics.GetOrRegisterMeter(m, nil).Mark(1)
   150  	}
   151  	t.schedule()
   152  }
   153  
   154  // schedule starts a timer to trigger on the expiration of the first network
   155  // packet.
   156  func (t *Tracker) schedule() {
   157  	if t.expire.Len() == 0 {
   158  		t.wake = nil
   159  		return
   160  	}
   161  	t.wake = time.AfterFunc(time.Until(t.pending[t.expire.Front().Value.(uint64)].time.Add(t.timeout)), t.clean)
   162  }
   163  
   164  // Fulfil fills a pending request, if any is available, reporting on various metrics.
   165  func (t *Tracker) Fulfil(peer string, version uint, code uint64, id uint64) {
   166  	if !metrics.Enabled {
   167  		return
   168  	}
   169  	t.lock.Lock()
   170  	defer t.lock.Unlock()
   171  
   172  	// If it's a non existing request, track as stale response
   173  	req, ok := t.pending[id]
   174  	if !ok {
   175  		m := fmt.Sprintf("%s/%s/%d/%#02x", staleMeterName, t.protocol, version, code)
   176  		metrics.GetOrRegisterMeter(m, nil).Mark(1)
   177  		return
   178  	}
   179  	// If the response is funky, it might be some active attack
   180  	if req.peer != peer || req.version != version || req.resCode != code {
   181  		log.Warn("Network response id collision",
   182  			"have", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, version, code),
   183  			"want", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, req.version, req.resCode),
   184  		)
   185  		return
   186  	}
   187  	// Everything matches, mark the request serviced and meter it
   188  	t.expire.Remove(req.expire)
   189  	delete(t.pending, id)
   190  	if req.expire.Prev() == nil {
   191  		if t.wake.Stop() {
   192  			t.schedule()
   193  		}
   194  	}
   195  	g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode)
   196  	metrics.GetOrRegisterGauge(g, nil).Dec(1)
   197  
   198  	h := fmt.Sprintf("%s/%s/%d/%#02x", waitHistName, t.protocol, req.version, req.reqCode)
   199  	sampler := func() metrics.Sample {
   200  		return metrics.ResettingSample(
   201  			metrics.NewExpDecaySample(1028, 0.015),
   202  		)
   203  	}
   204  	metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(time.Since(req.time).Microseconds())
   205  }