github.com/klaytn/klaytn@v1.12.1/networks/p2p/tracker/tracker.go (about)

     1  // Modifications Copyright 2022 The klaytn Authors
     2  // Copyright 2021 The go-ethereum Authors
     3  // This file is part of the go-ethereum library.
     4  //
     5  // The go-ethereum library is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Lesser General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // The go-ethereum library is distributed in the hope that it will be useful,
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  // GNU Lesser General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Lesser General Public License
    16  // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
    17  //
    18  // This file is derived from p2p/tracker/tracker.go (2022/06/29).
    19  // Modified and improved for the klaytn development.
    20  
    21  package tracker
    22  
    23  import (
    24  	"container/list"
    25  	"fmt"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/klaytn/klaytn/log"
    30  	metricutils "github.com/klaytn/klaytn/metrics/utils"
    31  	"github.com/rcrowley/go-metrics"
    32  )
    33  
    34  var logger = log.NewModuleLogger(log.SnapshotSync)
    35  
    36  const (
    37  	// trackedGaugeName is the prefix of the per-packet request tracking.
    38  	trackedGaugeName = "p2p/tracked"
    39  
    40  	// lostMeterName is the prefix of the per-packet request expirations.
    41  	lostMeterName = "p2p/lost"
    42  
    43  	// staleMeterName is the prefix of the per-packet stale responses.
    44  	staleMeterName = "p2p/stale"
    45  
    46  	// waitHistName is the prefix of the per-packet (req only) waiting time histograms.
    47  	waitHistName = "p2p/wait"
    48  
    49  	// maxTrackedPackets is a huge number to act as a failsafe on the number of
    50  	// pending requests the node will track. It should never be hit unless an
    51  	// attacker figures out a way to spin requests.
    52  	maxTrackedPackets = 100000
    53  )
    54  
    55  // request tracks sent network requests which have not yet received a response.
    56  type request struct {
    57  	peer    string
    58  	version uint // Protocol version
    59  
    60  	reqCode uint64 // Protocol message code of the request
    61  	resCode uint64 // Protocol message code of the expected response
    62  
    63  	time   time.Time     // Timestamp when the request was made
    64  	expire *list.Element // Expiration marker to untrack it
    65  }
    66  
    67  // Tracker is a pending network request tracker to measure how much time it takes
    68  // a remote peer to respond.
    69  type Tracker struct {
    70  	protocol string        // Protocol capability identifier for the metrics
    71  	timeout  time.Duration // Global timeout after which to drop a tracked packet
    72  
    73  	pending map[uint64]*request // Currently pending requests
    74  	expire  *list.List          // Linked list tracking the expiration order
    75  	wake    *time.Timer         // Timer tracking the expiration of the next item
    76  
    77  	lock sync.Mutex // Lock protecting from concurrent updates
    78  }
    79  
    80  // New creates a new network request tracker to monitor how much time it takes to
    81  // fill certain requests and how individual peers perform.
    82  func New(protocol string, timeout time.Duration) *Tracker {
    83  	return &Tracker{
    84  		protocol: protocol,
    85  		timeout:  timeout,
    86  		pending:  make(map[uint64]*request),
    87  		expire:   list.New(),
    88  	}
    89  }
    90  
    91  // Track adds a network request to the tracker to wait for a response to arrive
    92  // or until the request it cancelled or times out.
    93  func (t *Tracker) Track(peer string, version uint, reqCode uint64, resCode uint64, id uint64) {
    94  	if !metricutils.Enabled {
    95  		return
    96  	}
    97  	t.lock.Lock()
    98  	defer t.lock.Unlock()
    99  
   100  	// If there's a duplicate request, we've just random-collided (or more probably,
   101  	// we have a bug), report it. We could also add a metric, but we're not really
   102  	// expecting ourselves to be buggy, so a noisy warning should be enough.
   103  	if _, ok := t.pending[id]; ok {
   104  		logger.Error("Network request id collision", "protocol", t.protocol, "version", version, "code", reqCode, "id", id)
   105  		return
   106  	}
   107  	// If we have too many pending requests, bail out instead of leaking memory
   108  	if pending := len(t.pending); pending >= maxTrackedPackets {
   109  		logger.Error("Request tracker exceeded allowance", "pending", pending, "peer", peer, "protocol", t.protocol, "version", version, "code", reqCode)
   110  		return
   111  	}
   112  	// Id doesn't exist yet, start tracking it
   113  	t.pending[id] = &request{
   114  		peer:    peer,
   115  		version: version,
   116  		reqCode: reqCode,
   117  		resCode: resCode,
   118  		time:    time.Now(),
   119  		expire:  t.expire.PushBack(id),
   120  	}
   121  	g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, version, reqCode)
   122  	gauge := metrics.GetOrRegisterGauge(g, nil)
   123  	gauge.Update(gauge.Value() + 1)
   124  
   125  	// If we've just inserted the first item, start the expiration timer
   126  	if t.wake == nil {
   127  		t.wake = time.AfterFunc(t.timeout, t.clean)
   128  	}
   129  }
   130  
   131  // clean is called automatically when a preset time passes without a response
   132  // being dleivered for the first network request.
   133  func (t *Tracker) clean() {
   134  	t.lock.Lock()
   135  	defer t.lock.Unlock()
   136  
   137  	// Expire anything within a certain threshold (might be no items at all if
   138  	// we raced with the delivery)
   139  	for t.expire.Len() > 0 {
   140  		// Stop iterating if the next pending request is still alive
   141  		var (
   142  			head = t.expire.Front()
   143  			id   = head.Value.(uint64)
   144  			req  = t.pending[id]
   145  		)
   146  		if time.Since(req.time) < t.timeout+5*time.Millisecond {
   147  			break
   148  		}
   149  		// Nope, dead, drop it
   150  		t.expire.Remove(head)
   151  		delete(t.pending, id)
   152  
   153  		g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode)
   154  		gauge := metrics.GetOrRegisterGauge(g, nil)
   155  		gauge.Update(gauge.Value() - 1)
   156  
   157  		m := fmt.Sprintf("%s/%s/%d/%#02x", lostMeterName, t.protocol, req.version, req.reqCode)
   158  		metrics.GetOrRegisterMeter(m, nil).Mark(1)
   159  	}
   160  	t.schedule()
   161  }
   162  
   163  // schedule starts a timer to trigger on the expiration of the first network
   164  // packet.
   165  func (t *Tracker) schedule() {
   166  	if t.expire.Len() == 0 {
   167  		t.wake = nil
   168  		return
   169  	}
   170  	t.wake = time.AfterFunc(time.Until(t.pending[t.expire.Front().Value.(uint64)].time.Add(t.timeout)), t.clean)
   171  }
   172  
   173  // Fulfil fills a pending request, if any is available, reporting on various metrics.
   174  func (t *Tracker) Fulfil(peer string, version uint, code uint64, id uint64) {
   175  	if !metricutils.Enabled {
   176  		return
   177  	}
   178  	t.lock.Lock()
   179  	defer t.lock.Unlock()
   180  
   181  	// If it's a non existing request, track as stale response
   182  	req, ok := t.pending[id]
   183  	if !ok {
   184  		m := fmt.Sprintf("%s/%s/%d/%#02x", staleMeterName, t.protocol, version, code)
   185  		metrics.GetOrRegisterMeter(m, nil).Mark(1)
   186  		return
   187  	}
   188  	// If the response is funky, it might be some active attack
   189  	if req.peer != peer || req.version != version || req.resCode != code {
   190  		logger.Warn("Network response id collision",
   191  			"have", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, version, code),
   192  			"want", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, req.version, req.resCode),
   193  		)
   194  		return
   195  	}
   196  	// Everything matches, mark the request serviced and meter it
   197  	t.expire.Remove(req.expire)
   198  	delete(t.pending, id)
   199  	if req.expire.Prev() == nil {
   200  		if t.wake.Stop() {
   201  			t.schedule()
   202  		}
   203  	}
   204  	g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode)
   205  	gauge := metrics.GetOrRegisterGauge(g, nil)
   206  	gauge.Update(gauge.Value() - 1)
   207  
   208  	// TODO-Klaytn-SnapSync update the following metrics if necessary
   209  	//h := fmt.Sprintf("%s/%s/%d/%#02x", waitHistName, t.protocol, req.version, req.reqCode)
   210  	//sampler := func() metrics.Sample {
   211  	//	return metrics.ResettingSample(
   212  	//		metrics.NewExpDecaySample(1028, 0.015),
   213  	//	)
   214  	//}
   215  	//metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(time.Since(req.time).Microseconds())
   216  }