github.com/Unheilbar/quorum@v1.0.0/p2p/tracker/tracker.go (about) 1 // Copyright 2021 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package tracker 18 19 import ( 20 "container/list" 21 "fmt" 22 "sync" 23 "time" 24 25 "github.com/ethereum/go-ethereum/log" 26 "github.com/ethereum/go-ethereum/metrics" 27 ) 28 29 const ( 30 // trackedGaugeName is the prefix of the per-packet request tracking. 31 trackedGaugeName = "p2p/tracked" 32 33 // lostMeterName is the prefix of the per-packet request expirations. 34 lostMeterName = "p2p/lost" 35 36 // staleMeterName is the prefix of the per-packet stale responses. 37 staleMeterName = "p2p/stale" 38 39 // waitHistName is the prefix of the per-packet (req only) waiting time histograms. 40 waitHistName = "p2p/wait" 41 42 // maxTrackedPackets is a huge number to act as a failsafe on the number of 43 // pending requests the node will track. It should never be hit unless an 44 // attacker figures out a way to spin requests. 45 maxTrackedPackets = 100000 46 ) 47 48 // request tracks sent network requests which have not yet received a response. 49 type request struct { 50 peer string 51 version uint // Protocol version 52 53 reqCode uint64 // Protocol message code of the request 54 resCode uint64 // Protocol message code of the expected response 55 56 time time.Time // Timestamp when the request was made 57 expire *list.Element // Expiration marker to untrack it 58 } 59 60 // Tracker is a pending network request tracker to measure how much time it takes 61 // a remote peer to respond. 62 type Tracker struct { 63 protocol string // Protocol capability identifier for the metrics 64 timeout time.Duration // Global timeout after which to drop a tracked packet 65 66 pending map[uint64]*request // Currently pending requests 67 expire *list.List // Linked list tracking the expiration order 68 wake *time.Timer // Timer tracking the expiration of the next item 69 70 lock sync.Mutex // Lock protecting from concurrent updates 71 } 72 73 // New creates a new network request tracker to monitor how much time it takes to 74 // fill certain requests and how individual peers perform. 75 func New(protocol string, timeout time.Duration) *Tracker { 76 return &Tracker{ 77 protocol: protocol, 78 timeout: timeout, 79 pending: make(map[uint64]*request), 80 expire: list.New(), 81 } 82 } 83 84 // Track adds a network request to the tracker to wait for a response to arrive 85 // or until the request it cancelled or times out. 86 func (t *Tracker) Track(peer string, version uint, reqCode uint64, resCode uint64, id uint64) { 87 if !metrics.Enabled { 88 return 89 } 90 t.lock.Lock() 91 defer t.lock.Unlock() 92 93 // If there's a duplicate request, we've just random-collided (or more probably, 94 // we have a bug), report it. We could also add a metric, but we're not really 95 // expecting ourselves to be buggy, so a noisy warning should be enough. 96 if _, ok := t.pending[id]; ok { 97 log.Error("Network request id collision", "protocol", t.protocol, "version", version, "code", reqCode, "id", id) 98 return 99 } 100 // If we have too many pending requests, bail out instead of leaking memory 101 if pending := len(t.pending); pending >= maxTrackedPackets { 102 log.Error("Request tracker exceeded allowance", "pending", pending, "peer", peer, "protocol", t.protocol, "version", version, "code", reqCode) 103 return 104 } 105 // Id doesn't exist yet, start tracking it 106 t.pending[id] = &request{ 107 peer: peer, 108 version: version, 109 reqCode: reqCode, 110 resCode: resCode, 111 time: time.Now(), 112 expire: t.expire.PushBack(id), 113 } 114 g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, version, reqCode) 115 metrics.GetOrRegisterGauge(g, nil).Inc(1) 116 117 // If we've just inserted the first item, start the expiration timer 118 if t.wake == nil { 119 t.wake = time.AfterFunc(t.timeout, t.clean) 120 } 121 } 122 123 // clean is called automatically when a preset time passes without a response 124 // being dleivered for the first network request. 125 func (t *Tracker) clean() { 126 t.lock.Lock() 127 defer t.lock.Unlock() 128 129 // Expire anything within a certain threshold (might be no items at all if 130 // we raced with the delivery) 131 for t.expire.Len() > 0 { 132 // Stop iterating if the next pending request is still alive 133 var ( 134 head = t.expire.Front() 135 id = head.Value.(uint64) 136 req = t.pending[id] 137 ) 138 if time.Since(req.time) < t.timeout+5*time.Millisecond { 139 break 140 } 141 // Nope, dead, drop it 142 t.expire.Remove(head) 143 delete(t.pending, id) 144 145 g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode) 146 metrics.GetOrRegisterGauge(g, nil).Dec(1) 147 148 m := fmt.Sprintf("%s/%s/%d/%#02x", lostMeterName, t.protocol, req.version, req.reqCode) 149 metrics.GetOrRegisterMeter(m, nil).Mark(1) 150 } 151 t.schedule() 152 } 153 154 // schedule starts a timer to trigger on the expiration of the first network 155 // packet. 156 func (t *Tracker) schedule() { 157 if t.expire.Len() == 0 { 158 t.wake = nil 159 return 160 } 161 t.wake = time.AfterFunc(time.Until(t.pending[t.expire.Front().Value.(uint64)].time.Add(t.timeout)), t.clean) 162 } 163 164 // Fulfil fills a pending request, if any is available, reporting on various metrics. 165 func (t *Tracker) Fulfil(peer string, version uint, code uint64, id uint64) { 166 if !metrics.Enabled { 167 return 168 } 169 t.lock.Lock() 170 defer t.lock.Unlock() 171 172 // If it's a non existing request, track as stale response 173 req, ok := t.pending[id] 174 if !ok { 175 m := fmt.Sprintf("%s/%s/%d/%#02x", staleMeterName, t.protocol, version, code) 176 metrics.GetOrRegisterMeter(m, nil).Mark(1) 177 return 178 } 179 // If the response is funky, it might be some active attack 180 if req.peer != peer || req.version != version || req.resCode != code { 181 log.Warn("Network response id collision", 182 "have", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, version, code), 183 "want", fmt.Sprintf("%s:%s/%d:%d", peer, t.protocol, req.version, req.resCode), 184 ) 185 return 186 } 187 // Everything matches, mark the request serviced and meter it 188 t.expire.Remove(req.expire) 189 delete(t.pending, id) 190 if req.expire.Prev() == nil { 191 if t.wake.Stop() { 192 t.schedule() 193 } 194 } 195 g := fmt.Sprintf("%s/%s/%d/%#02x", trackedGaugeName, t.protocol, req.version, req.reqCode) 196 metrics.GetOrRegisterGauge(g, nil).Dec(1) 197 198 h := fmt.Sprintf("%s/%s/%d/%#02x", waitHistName, t.protocol, req.version, req.reqCode) 199 sampler := func() metrics.Sample { 200 return metrics.ResettingSample( 201 metrics.NewExpDecaySample(1028, 0.015), 202 ) 203 } 204 metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(time.Since(req.time).Microseconds()) 205 }