github.com/klaytn/klaytn@v1.12.1/networks/p2p/tracker/tracker.go (about) 1 // Modifications Copyright 2022 The klaytn Authors 2 // Copyright 2021 The go-ethereum Authors 3 // This file is part of the go-ethereum library. 4 // 5 // The go-ethereum library is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Lesser General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // The go-ethereum library is distributed in the hope that it will be useful, 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Lesser General Public License for more details. 14 // 15 // You should have received a copy of the GNU Lesser General Public License 16 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 17 // 18 // This file is derived from p2p/tracker/tracker.go (2022/06/29). 19 // Modified and improved for the klaytn development. 20 21 package tracker 22 23 import ( 24 "container/list" 25 "fmt" 26 "sync" 27 "time" 28 29 "github.com/klaytn/klaytn/log" 30 metricutils "github.com/klaytn/klaytn/metrics/utils" 31 "github.com/rcrowley/go-metrics" 32 ) 33 34 var logger = log.NewModuleLogger(log.SnapshotSync) 35 36 const ( 37 // trackedGaugeName is the prefix of the per-packet request tracking. 38 trackedGaugeName = "p2p/tracked" 39 40 // lostMeterName is the prefix of the per-packet request expirations. 41 lostMeterName = "p2p/lost" 42 43 // staleMeterName is the prefix of the per-packet stale responses. 44 staleMeterName = "p2p/stale" 45 46 // waitHistName is the prefix of the per-packet (req only) waiting time histograms. 47 waitHistName = "p2p/wait" 48 49 // maxTrackedPackets is a huge number to act as a failsafe on the number of 50 // pending requests the node will track. It should never be hit unless an 51 // attacker figures out a way to spin requests. 52 maxTrackedPackets = 100000 53 ) 54 55 // request tracks sent network requests which have not yet received a response. 56 type request struct { 57 peer string 58 version uint // Protocol version 59 60 reqCode uint64 // Protocol message code of the request 61 resCode uint64 // Protocol message code of the expected response 62 63 time time.Time // Timestamp when the request was made 64 expire *list.Element // Expiration marker to untrack it 65 } 66 67 // Tracker is a pending network request tracker to measure how much time it takes 68 // a remote peer to respond. 69 type Tracker struct { 70 protocol string // Protocol capability identifier for the metrics 71 timeout time.Duration // Global timeout after which to drop a tracked packet 72 73 pending map[uint64]*request // Currently pending requests 74 expire *list.List // Linked list tracking the expiration order 75 wake *time.Timer // Timer tracking the expiration of the next item 76 77 lock sync.Mutex // Lock protecting from concurrent updates 78 } 79 80 // New creates a new network request tracker to monitor how much time it takes to 81 // fill certain requests and how individual peers perform. 82 func New(protocol string, timeout time.Duration) *Tracker { 83 return &Tracker{ 84 protocol: protocol, 85 timeout: timeout, 86 pending: make(map[uint64]*request), 87 expire: list.New(), 88 } 89 } 90 91 // Track adds a network request to the tracker to wait for a response to arrive 92 // or until the request it cancelled or times out. 93 func (t *Tracker) Track(peer string, version uint, reqCode uint64, resCode uint64, id uint64) { 94 if !metricutils.Enabled { 95 return 96 } 97 t.lock.Lock() 98 defer t.lock.Unlock() 99 100 // If there's a duplicate request, we've just random-collided (or more probably, 101 // we have a bug), report it. We could also add a metric, but we're not really 102 // expecting ourselves to be buggy, so a noisy warning should be enough. 103 if _, ok := t.pending[id]; ok { 104 logger.Error("Network request id collision", "protocol", t.protocol, "version", version, "code", reqCode, "id", id) 105 return 106 } 107 // If we have too many pending requests, bail out instead of leaking memory 108 if pending := len(t.pending); pending >= maxTrackedPackets { 109 logger.Error("Request tracker exceeded allowance", "pending", pending, "peer", peer, "protocol", t.protocol, "version", version, "code", reqCode) 110 return 111 } 112 // Id doesn't exist yet, start tracking it 113 t.pending[id] = &request{ 114 peer: peer, 115 version: version, 116 reqCode: reqCode, 117 resCode: resCode, 118 time: time.Now(), 119 expire: t.expire.PushBack(id), 120 } 121 g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, version, reqCode) 122 gauge := metrics.GetOrRegisterGauge(g, nil) 123 gauge.Update(gauge.Value() + 1) 124 125 // If we've just inserted the first item, start the expiration timer 126 if t.wake == nil { 127 t.wake = time.AfterFunc(t.timeout, t.clean) 128 } 129 } 130 131 // clean is called automatically when a preset time passes without a response 132 // being dleivered for the first network request. 133 func (t *Tracker) clean() { 134 t.lock.Lock() 135 defer t.lock.Unlock() 136 137 // Expire anything within a certain threshold (might be no items at all if 138 // we raced with the delivery) 139 for t.expire.Len() > 0 { 140 // Stop iterating if the next pending request is still alive 141 var ( 142 head = t.expire.Front() 143 id = head.Value.(uint64) 144 req = t.pending[id] 145 ) 146 if time.Since(req.time) < t.timeout+5*time.Millisecond { 147 break 148 } 149 // Nope, dead, drop it 150 t.expire.Remove(head) 151 delete(t.pending, id) 152 153 g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode) 154 gauge := metrics.GetOrRegisterGauge(g, nil) 155 gauge.Update(gauge.Value() - 1) 156 157 m := fmt.Sprintf("%s/%s/%d/%#02x", lostMeterName, t.protocol, req.version, req.reqCode) 158 metrics.GetOrRegisterMeter(m, nil).Mark(1) 159 } 160 t.schedule() 161 } 162 163 // schedule starts a timer to trigger on the expiration of the first network 164 // packet. 165 func (t *Tracker) schedule() { 166 if t.expire.Len() == 0 { 167 t.wake = nil 168 return 169 } 170 t.wake = time.AfterFunc(time.Until(t.pending[t.expire.Front().Value.(uint64)].time.Add(t.timeout)), t.clean) 171 } 172 173 // Fulfil fills a pending request, if any is available, reporting on various metrics. 174 func (t *Tracker) Fulfil(peer string, version uint, code uint64, id uint64) { 175 if !metricutils.Enabled { 176 return 177 } 178 t.lock.Lock() 179 defer t.lock.Unlock() 180 181 // If it's a non existing request, track as stale response 182 req, ok := t.pending[id] 183 if !ok { 184 m := fmt.Sprintf("%s/%s/%d/%#02x", staleMeterName, t.protocol, version, code) 185 metrics.GetOrRegisterMeter(m, nil).Mark(1) 186 return 187 } 188 // If the response is funky, it might be some active attack 189 if req.peer != peer || req.version != version || req.resCode != code { 190 logger.Warn("Network response id collision", 191 "have", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, version, code), 192 "want", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, req.version, req.resCode), 193 ) 194 return 195 } 196 // Everything matches, mark the request serviced and meter it 197 t.expire.Remove(req.expire) 198 delete(t.pending, id) 199 if req.expire.Prev() == nil { 200 if t.wake.Stop() { 201 t.schedule() 202 } 203 } 204 g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode) 205 gauge := metrics.GetOrRegisterGauge(g, nil) 206 gauge.Update(gauge.Value() - 1) 207 208 // TODO-Klaytn-SnapSync update the following metrics if necessary 209 //h := fmt.Sprintf("%s/%s/%d/%#02x", waitHistName, t.protocol, req.version, req.reqCode) 210 //sampler := func() metrics.Sample { 211 // return metrics.ResettingSample( 212 // metrics.NewExpDecaySample(1028, 0.015), 213 // ) 214 //} 215 //metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(time.Since(req.time).Microseconds()) 216 }