github.com/bloxroute-labs/bor@v0.1.4/les/retrieve.go (about) 1 // Copyright 2017 The go-ethereum Authors 2 // This file is part of the go-ethereum library. 3 // 4 // The go-ethereum library is free software: you can redistribute it and/or modify 5 // it under the terms of the GNU Lesser General Public License as published by 6 // the Free Software Foundation, either version 3 of the License, or 7 // (at your option) any later version. 8 // 9 // The go-ethereum library is distributed in the hope that it will be useful, 10 // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 // GNU Lesser General Public License for more details. 13 // 14 // You should have received a copy of the GNU Lesser General Public License 15 // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>. 16 17 package les 18 19 import ( 20 "context" 21 "crypto/rand" 22 "encoding/binary" 23 "fmt" 24 "sync" 25 "time" 26 27 "github.com/maticnetwork/bor/common/mclock" 28 "github.com/maticnetwork/bor/light" 29 ) 30 31 var ( 32 retryQueue = time.Millisecond * 100 33 softRequestTimeout = time.Millisecond * 500 34 hardRequestTimeout = time.Second * 10 35 ) 36 37 // retrieveManager is a layer on top of requestDistributor which takes care of 38 // matching replies by request ID and handles timeouts and resends if necessary. 39 type retrieveManager struct { 40 dist *requestDistributor 41 peers *peerSet 42 serverPool peerSelector 43 44 lock sync.RWMutex 45 sentReqs map[uint64]*sentReq 46 } 47 48 // validatorFunc is a function that processes a reply message 49 type validatorFunc func(distPeer, *Msg) error 50 51 // peerSelector receives feedback info about response times and timeouts 52 type peerSelector interface { 53 adjustResponseTime(*poolEntry, time.Duration, bool) 54 } 55 56 // sentReq represents a request sent and tracked by retrieveManager 57 type sentReq struct { 58 rm *retrieveManager 59 req *distReq 60 id uint64 61 validate validatorFunc 62 63 eventsCh chan reqPeerEvent 64 stopCh chan struct{} 65 stopped bool 66 err error 67 68 lock sync.RWMutex // protect access to sentTo map 69 sentTo map[distPeer]sentReqToPeer 70 71 lastReqQueued bool // last request has been queued but not sent 72 lastReqSentTo distPeer // if not nil then last request has been sent to given peer but not timed out 73 reqSrtoCount int // number of requests that reached soft (but not hard) timeout 74 } 75 76 // sentReqToPeer notifies the request-from-peer goroutine (tryRequest) about a response 77 // delivered by the given peer. Only one delivery is allowed per request per peer, 78 // after which delivered is set to true, the validity of the response is sent on the 79 // valid channel and no more responses are accepted. 80 type sentReqToPeer struct { 81 delivered, frozen bool 82 event chan int 83 } 84 85 // reqPeerEvent is sent by the request-from-peer goroutine (tryRequest) to the 86 // request state machine (retrieveLoop) through the eventsCh channel. 87 type reqPeerEvent struct { 88 event int 89 peer distPeer 90 } 91 92 const ( 93 rpSent = iota // if peer == nil, not sent (no suitable peers) 94 rpSoftTimeout 95 rpHardTimeout 96 rpDeliveredValid 97 rpDeliveredInvalid 98 rpNotDelivered 99 ) 100 101 // newRetrieveManager creates the retrieve manager 102 func newRetrieveManager(peers *peerSet, dist *requestDistributor, serverPool peerSelector) *retrieveManager { 103 return &retrieveManager{ 104 peers: peers, 105 dist: dist, 106 serverPool: serverPool, 107 sentReqs: make(map[uint64]*sentReq), 108 } 109 } 110 111 // retrieve sends a request (to multiple peers if necessary) and waits for an answer 112 // that is delivered through the deliver function and successfully validated by the 113 // validator callback. It returns when a valid answer is delivered or the context is 114 // cancelled. 115 func (rm *retrieveManager) retrieve(ctx context.Context, reqID uint64, req *distReq, val validatorFunc, shutdown chan struct{}) error { 116 sentReq := rm.sendReq(reqID, req, val) 117 select { 118 case <-sentReq.stopCh: 119 case <-ctx.Done(): 120 sentReq.stop(ctx.Err()) 121 case <-shutdown: 122 sentReq.stop(fmt.Errorf("Client is shutting down")) 123 } 124 return sentReq.getError() 125 } 126 127 // sendReq starts a process that keeps trying to retrieve a valid answer for a 128 // request from any suitable peers until stopped or succeeded. 129 func (rm *retrieveManager) sendReq(reqID uint64, req *distReq, val validatorFunc) *sentReq { 130 r := &sentReq{ 131 rm: rm, 132 req: req, 133 id: reqID, 134 sentTo: make(map[distPeer]sentReqToPeer), 135 stopCh: make(chan struct{}), 136 eventsCh: make(chan reqPeerEvent, 10), 137 validate: val, 138 } 139 140 canSend := req.canSend 141 req.canSend = func(p distPeer) bool { 142 // add an extra check to canSend: the request has not been sent to the same peer before 143 r.lock.RLock() 144 _, sent := r.sentTo[p] 145 r.lock.RUnlock() 146 return !sent && canSend(p) 147 } 148 149 request := req.request 150 req.request = func(p distPeer) func() { 151 // before actually sending the request, put an entry into the sentTo map 152 r.lock.Lock() 153 r.sentTo[p] = sentReqToPeer{delivered: false, frozen: false, event: make(chan int, 1)} 154 r.lock.Unlock() 155 return request(p) 156 } 157 rm.lock.Lock() 158 rm.sentReqs[reqID] = r 159 rm.lock.Unlock() 160 161 go r.retrieveLoop() 162 return r 163 } 164 165 // deliver is called by the LES protocol manager to deliver reply messages to waiting requests 166 func (rm *retrieveManager) deliver(peer distPeer, msg *Msg) error { 167 rm.lock.RLock() 168 req, ok := rm.sentReqs[msg.ReqID] 169 rm.lock.RUnlock() 170 171 if ok { 172 return req.deliver(peer, msg) 173 } 174 return errResp(ErrUnexpectedResponse, "reqID = %v", msg.ReqID) 175 } 176 177 // frozen is called by the LES protocol manager when a server has suspended its service and we 178 // should not expect an answer for the requests already sent there 179 func (rm *retrieveManager) frozen(peer distPeer) { 180 rm.lock.RLock() 181 defer rm.lock.RUnlock() 182 183 for _, req := range rm.sentReqs { 184 req.frozen(peer) 185 } 186 } 187 188 // reqStateFn represents a state of the retrieve loop state machine 189 type reqStateFn func() reqStateFn 190 191 // retrieveLoop is the retrieval state machine event loop 192 func (r *sentReq) retrieveLoop() { 193 go r.tryRequest() 194 r.lastReqQueued = true 195 state := r.stateRequesting 196 197 for state != nil { 198 state = state() 199 } 200 201 r.rm.lock.Lock() 202 delete(r.rm.sentReqs, r.id) 203 r.rm.lock.Unlock() 204 } 205 206 // stateRequesting: a request has been queued or sent recently; when it reaches soft timeout, 207 // a new request is sent to a new peer 208 func (r *sentReq) stateRequesting() reqStateFn { 209 select { 210 case ev := <-r.eventsCh: 211 r.update(ev) 212 switch ev.event { 213 case rpSent: 214 if ev.peer == nil { 215 // request send failed, no more suitable peers 216 if r.waiting() { 217 // we are already waiting for sent requests which may succeed so keep waiting 218 return r.stateNoMorePeers 219 } 220 // nothing to wait for, no more peers to ask, return with error 221 r.stop(light.ErrNoPeers) 222 // no need to go to stopped state because waiting() already returned false 223 return nil 224 } 225 case rpSoftTimeout: 226 // last request timed out, try asking a new peer 227 go r.tryRequest() 228 r.lastReqQueued = true 229 return r.stateRequesting 230 case rpDeliveredInvalid, rpNotDelivered: 231 // if it was the last sent request (set to nil by update) then start a new one 232 if !r.lastReqQueued && r.lastReqSentTo == nil { 233 go r.tryRequest() 234 r.lastReqQueued = true 235 } 236 return r.stateRequesting 237 case rpDeliveredValid: 238 r.stop(nil) 239 return r.stateStopped 240 } 241 return r.stateRequesting 242 case <-r.stopCh: 243 return r.stateStopped 244 } 245 } 246 247 // stateNoMorePeers: could not send more requests because no suitable peers are available. 248 // Peers may become suitable for a certain request later or new peers may appear so we 249 // keep trying. 250 func (r *sentReq) stateNoMorePeers() reqStateFn { 251 select { 252 case <-time.After(retryQueue): 253 go r.tryRequest() 254 r.lastReqQueued = true 255 return r.stateRequesting 256 case ev := <-r.eventsCh: 257 r.update(ev) 258 if ev.event == rpDeliveredValid { 259 r.stop(nil) 260 return r.stateStopped 261 } 262 if r.waiting() { 263 return r.stateNoMorePeers 264 } 265 r.stop(light.ErrNoPeers) 266 return nil 267 case <-r.stopCh: 268 return r.stateStopped 269 } 270 } 271 272 // stateStopped: request succeeded or cancelled, just waiting for some peers 273 // to either answer or time out hard 274 func (r *sentReq) stateStopped() reqStateFn { 275 for r.waiting() { 276 r.update(<-r.eventsCh) 277 } 278 return nil 279 } 280 281 // update updates the queued/sent flags and timed out peers counter according to the event 282 func (r *sentReq) update(ev reqPeerEvent) { 283 switch ev.event { 284 case rpSent: 285 r.lastReqQueued = false 286 r.lastReqSentTo = ev.peer 287 case rpSoftTimeout: 288 r.lastReqSentTo = nil 289 r.reqSrtoCount++ 290 case rpHardTimeout: 291 r.reqSrtoCount-- 292 case rpDeliveredValid, rpDeliveredInvalid, rpNotDelivered: 293 if ev.peer == r.lastReqSentTo { 294 r.lastReqSentTo = nil 295 } else { 296 r.reqSrtoCount-- 297 } 298 } 299 } 300 301 // waiting returns true if the retrieval mechanism is waiting for an answer from 302 // any peer 303 func (r *sentReq) waiting() bool { 304 return r.lastReqQueued || r.lastReqSentTo != nil || r.reqSrtoCount > 0 305 } 306 307 // tryRequest tries to send the request to a new peer and waits for it to either 308 // succeed or time out if it has been sent. It also sends the appropriate reqPeerEvent 309 // messages to the request's event channel. 310 func (r *sentReq) tryRequest() { 311 sent := r.rm.dist.queue(r.req) 312 var p distPeer 313 select { 314 case p = <-sent: 315 case <-r.stopCh: 316 if r.rm.dist.cancel(r.req) { 317 p = nil 318 } else { 319 p = <-sent 320 } 321 } 322 323 r.eventsCh <- reqPeerEvent{rpSent, p} 324 if p == nil { 325 return 326 } 327 328 reqSent := mclock.Now() 329 srto, hrto := false, false 330 331 r.lock.RLock() 332 s, ok := r.sentTo[p] 333 r.lock.RUnlock() 334 if !ok { 335 panic(nil) 336 } 337 338 defer func() { 339 // send feedback to server pool and remove peer if hard timeout happened 340 pp, ok := p.(*peer) 341 if ok && r.rm.serverPool != nil { 342 respTime := time.Duration(mclock.Now() - reqSent) 343 r.rm.serverPool.adjustResponseTime(pp.poolEntry, respTime, srto) 344 } 345 if hrto { 346 pp.Log().Debug("Request timed out hard") 347 if r.rm.peers != nil { 348 r.rm.peers.Unregister(pp.id) 349 } 350 } 351 352 r.lock.Lock() 353 delete(r.sentTo, p) 354 r.lock.Unlock() 355 }() 356 357 select { 358 case event := <-s.event: 359 if event == rpNotDelivered { 360 r.lock.Lock() 361 delete(r.sentTo, p) 362 r.lock.Unlock() 363 } 364 r.eventsCh <- reqPeerEvent{event, p} 365 return 366 case <-time.After(softRequestTimeout): 367 srto = true 368 r.eventsCh <- reqPeerEvent{rpSoftTimeout, p} 369 } 370 371 select { 372 case event := <-s.event: 373 if event == rpNotDelivered { 374 r.lock.Lock() 375 delete(r.sentTo, p) 376 r.lock.Unlock() 377 } 378 r.eventsCh <- reqPeerEvent{event, p} 379 case <-time.After(hardRequestTimeout): 380 hrto = true 381 r.eventsCh <- reqPeerEvent{rpHardTimeout, p} 382 } 383 } 384 385 // deliver a reply belonging to this request 386 func (r *sentReq) deliver(peer distPeer, msg *Msg) error { 387 r.lock.Lock() 388 defer r.lock.Unlock() 389 390 s, ok := r.sentTo[peer] 391 if !ok || s.delivered { 392 return errResp(ErrUnexpectedResponse, "reqID = %v", msg.ReqID) 393 } 394 if s.frozen { 395 return nil 396 } 397 valid := r.validate(peer, msg) == nil 398 r.sentTo[peer] = sentReqToPeer{delivered: true, frozen: false, event: s.event} 399 if valid { 400 s.event <- rpDeliveredValid 401 } else { 402 s.event <- rpDeliveredInvalid 403 } 404 if !valid { 405 return errResp(ErrInvalidResponse, "reqID = %v", msg.ReqID) 406 } 407 return nil 408 } 409 410 // frozen sends a "not delivered" event to the peer event channel belonging to the 411 // given peer if the request has been sent there, causing the state machine to not 412 // expect an answer and potentially even send the request to the same peer again 413 // when canSend allows it. 414 func (r *sentReq) frozen(peer distPeer) { 415 r.lock.Lock() 416 defer r.lock.Unlock() 417 418 s, ok := r.sentTo[peer] 419 if ok && !s.delivered && !s.frozen { 420 r.sentTo[peer] = sentReqToPeer{delivered: false, frozen: true, event: s.event} 421 s.event <- rpNotDelivered 422 } 423 } 424 425 // stop stops the retrieval process and sets an error code that will be returned 426 // by getError 427 func (r *sentReq) stop(err error) { 428 r.lock.Lock() 429 if !r.stopped { 430 r.stopped = true 431 r.err = err 432 close(r.stopCh) 433 } 434 r.lock.Unlock() 435 } 436 437 // getError returns any retrieval error (either internally generated or set by the 438 // stop function) after stopCh has been closed 439 func (r *sentReq) getError() error { 440 return r.err 441 } 442 443 // genReqID generates a new random request ID 444 func genReqID() uint64 { 445 var rnd [8]byte 446 rand.Read(rnd[:]) 447 return binary.BigEndian.Uint64(rnd[:]) 448 }