github.com/ethersphere/bee/v2@v2.2.0/pkg/retrieval/retrieval.go (about) 1 // Copyright 2020 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package retrieval provides the retrieval protocol 6 // implementation. The protocol is used to retrieve 7 // chunks over the network using forwarding-kademlia 8 // routing. 9 package retrieval 10 11 import ( 12 "context" 13 "errors" 14 "fmt" 15 "time" 16 17 "github.com/ethersphere/bee/v2/pkg/accounting" 18 "github.com/ethersphere/bee/v2/pkg/cac" 19 "github.com/ethersphere/bee/v2/pkg/log" 20 "github.com/ethersphere/bee/v2/pkg/p2p" 21 "github.com/ethersphere/bee/v2/pkg/p2p/protobuf" 22 "github.com/ethersphere/bee/v2/pkg/pricer" 23 pb "github.com/ethersphere/bee/v2/pkg/retrieval/pb" 24 "github.com/ethersphere/bee/v2/pkg/skippeers" 25 "github.com/ethersphere/bee/v2/pkg/soc" 26 storage "github.com/ethersphere/bee/v2/pkg/storage" 27 "github.com/ethersphere/bee/v2/pkg/swarm" 28 "github.com/ethersphere/bee/v2/pkg/topology" 29 "github.com/ethersphere/bee/v2/pkg/tracing" 30 "github.com/opentracing/opentracing-go" 31 "github.com/opentracing/opentracing-go/ext" 32 olog "github.com/opentracing/opentracing-go/log" 33 "resenje.org/singleflight" 34 ) 35 36 // loggerName is the tree path name of the logger for this package. 37 const loggerName = "retrieval" 38 39 const ( 40 protocolName = "retrieval" 41 protocolVersion = "1.4.0" 42 streamName = "retrieval" 43 ) 44 45 var _ Interface = (*Service)(nil) 46 47 type Interface interface { 48 // RetrieveChunk retrieves a chunk from the network using the retrieval protocol. 49 // it takes as parameters a context, a chunk address to retrieve (content-addressed or single-owner) and 50 // a source peer address, for the case that we are requesting the chunk for another peer. In case the request 51 // originates at the current node (i.e. no forwarding involved), the caller should use swarm.ZeroAddress 52 // as the value for sourcePeerAddress. 53 RetrieveChunk(ctx context.Context, address, sourcePeerAddr swarm.Address) (chunk swarm.Chunk, err error) 54 } 55 56 type retrievalResult struct { 57 chunk swarm.Chunk 58 peer swarm.Address 59 err error 60 } 61 62 type Storer interface { 63 Cache() storage.Putter 64 Lookup() storage.Getter 65 } 66 67 type Service struct { 68 addr swarm.Address 69 radiusFunc func() (uint8, error) 70 streamer p2p.Streamer 71 peerSuggester topology.ClosestPeerer 72 storer Storer 73 singleflight singleflight.Group[string, swarm.Chunk] 74 logger log.Logger 75 accounting accounting.Interface 76 metrics metrics 77 pricer pricer.Interface 78 tracer *tracing.Tracer 79 caching bool 80 errSkip *skippeers.List 81 } 82 83 func New( 84 addr swarm.Address, 85 radiusFunc func() (uint8, error), 86 storer Storer, 87 streamer p2p.Streamer, 88 chunkPeerer topology.ClosestPeerer, 89 logger log.Logger, 90 accounting accounting.Interface, 91 pricer pricer.Interface, 92 tracer *tracing.Tracer, 93 forwarderCaching bool, 94 ) *Service { 95 return &Service{ 96 addr: addr, 97 radiusFunc: radiusFunc, 98 streamer: streamer, 99 peerSuggester: chunkPeerer, 100 storer: storer, 101 logger: logger.WithName(loggerName).Register(), 102 accounting: accounting, 103 pricer: pricer, 104 metrics: newMetrics(), 105 tracer: tracer, 106 caching: forwarderCaching, 107 errSkip: skippeers.NewList(), 108 } 109 } 110 111 func (s *Service) Protocol() p2p.ProtocolSpec { 112 return p2p.ProtocolSpec{ 113 Name: protocolName, 114 Version: protocolVersion, 115 StreamSpecs: []p2p.StreamSpec{ 116 { 117 Name: streamName, 118 Handler: s.handler, 119 }, 120 }, 121 } 122 } 123 124 const ( 125 RetrieveChunkTimeout = time.Second * 30 126 preemptiveInterval = time.Second 127 overDraftRefresh = time.Millisecond * 600 128 skiplistDur = time.Minute 129 originSuffix = "_origin" 130 maxOriginErrors = 32 131 maxMultiplexForwards = 2 132 ) 133 134 func (s *Service) RetrieveChunk(ctx context.Context, chunkAddr, sourcePeerAddr swarm.Address) (swarm.Chunk, error) { 135 loggerV1 := s.logger 136 137 s.metrics.RequestCounter.Inc() 138 139 origin := sourcePeerAddr.IsZero() 140 141 if chunkAddr.IsZero() || chunkAddr.IsEmpty() || !chunkAddr.IsValidLength() { 142 return nil, fmt.Errorf("invalid address queried") 143 } 144 145 flightRoute := chunkAddr.String() 146 if origin { 147 flightRoute = chunkAddr.String() + originSuffix 148 } 149 150 totalRetrieveAttempts := 0 151 requestStartTime := time.Now() 152 defer func() { 153 s.metrics.RequestDurationTime.Observe(time.Since(requestStartTime).Seconds()) 154 s.metrics.RequestAttempts.Observe(float64(totalRetrieveAttempts)) 155 }() 156 157 spanCtx := context.WithoutCancel(ctx) 158 159 v, _, err := s.singleflight.Do(ctx, flightRoute, func(ctx context.Context) (swarm.Chunk, error) { 160 161 skip := skippeers.NewList() 162 defer skip.Close() 163 164 var preemptiveTicker <-chan time.Time 165 166 if !sourcePeerAddr.IsZero() { 167 skip.Forever(chunkAddr, sourcePeerAddr) 168 } 169 170 quit := make(chan struct{}) 171 defer close(quit) 172 173 var forwards = maxMultiplexForwards 174 175 // if we are the origin node, allow many preemptive retries to speed up the retrieval of the chunk. 176 errorsLeft := 1 177 if origin { 178 ticker := time.NewTicker(preemptiveInterval) 179 defer ticker.Stop() 180 preemptiveTicker = ticker.C 181 errorsLeft = maxOriginErrors 182 } 183 184 resultC := make(chan retrievalResult, 1) 185 retryC := make(chan struct{}, forwards+1) 186 187 retry := func() { 188 select { 189 case retryC <- struct{}{}: 190 case <-ctx.Done(): 191 default: 192 } 193 } 194 195 retry() 196 197 inflight := 0 198 199 for errorsLeft > 0 { 200 201 select { 202 case <-ctx.Done(): 203 return nil, ctx.Err() 204 case <-preemptiveTicker: 205 retry() 206 case <-retryC: 207 208 totalRetrieveAttempts++ 209 s.metrics.PeerRequestCounter.Inc() 210 211 fullSkip := append(skip.ChunkPeers(chunkAddr), s.errSkip.ChunkPeers(chunkAddr)...) 212 peer, err := s.closestPeer(chunkAddr, fullSkip, origin) 213 214 if errors.Is(err, topology.ErrNotFound) { 215 if skip.PruneExpiresAfter(chunkAddr, overDraftRefresh) == 0 { //no overdraft peers, we have depleted ALL peers 216 if inflight == 0 { 217 loggerV1.Debug("no peers left", "chunk_address", chunkAddr, "errors_left", errorsLeft, "isOrigin", origin, "own_proximity", swarm.Proximity(s.addr.Bytes(), chunkAddr.Bytes()), "error", err) 218 return nil, err 219 } 220 continue // there is still an inflight request, wait for it's result 221 } 222 223 loggerV1.Debug("sleeping to refresh overdraft balance", "chunk_address", chunkAddr) 224 225 select { 226 case <-time.After(overDraftRefresh): 227 retry() 228 continue 229 case <-ctx.Done(): 230 return nil, ctx.Err() 231 } 232 } 233 234 if err != nil { 235 if inflight == 0 { 236 loggerV1.Debug("peer selection", "chunk_address", chunkAddr, "error", err) 237 return nil, err 238 } 239 continue 240 } 241 242 // since we can reach into the neighborhood of the chunk 243 // act as the multiplexer and push the chunk in parallel to multiple peers. 244 // neighbor peers will also have multiple retries, which means almost the entire neighborhood 245 // will be scanned for the chunk, starting from the closest to the furthest peer in the neighborhood. 246 if radius, err := s.radiusFunc(); err == nil && swarm.Proximity(peer.Bytes(), chunkAddr.Bytes()) >= radius { 247 for ; forwards > 0; forwards-- { 248 retry() 249 errorsLeft++ 250 } 251 } 252 253 action, err := s.prepareCredit(ctx, peer, chunkAddr, origin) 254 if err != nil { 255 skip.Add(chunkAddr, peer, overDraftRefresh) 256 retry() 257 continue 258 } 259 skip.Forever(chunkAddr, peer) 260 261 inflight++ 262 263 go func() { 264 span, _, ctx := s.tracer.FollowSpanFromContext(spanCtx, "retrieve-chunk", s.logger, opentracing.Tag{Key: "address", Value: chunkAddr.String()}) 265 defer span.Finish() 266 s.retrieveChunk(ctx, quit, chunkAddr, peer, resultC, action, span) 267 }() 268 269 case res := <-resultC: 270 271 inflight-- 272 273 if res.err == nil { 274 loggerV1.Debug("retrieved chunk", "chunk_address", chunkAddr, "peer_address", res.peer, "peer_proximity", swarm.Proximity(res.peer.Bytes(), chunkAddr.Bytes())) 275 return res.chunk, nil 276 } 277 278 loggerV1.Debug("failed to get chunk", "chunk_address", chunkAddr, "peer_address", res.peer, 279 "peer_proximity", swarm.Proximity(res.peer.Bytes(), chunkAddr.Bytes()), "error", res.err) 280 281 errorsLeft-- 282 s.errSkip.Add(chunkAddr, res.peer, skiplistDur) 283 retry() 284 } 285 } 286 287 return nil, storage.ErrNotFound 288 }) 289 if err != nil { 290 s.metrics.RequestFailureCounter.Inc() 291 s.logger.Debug("retrieval failed", "chunk_address", chunkAddr, "error", err) 292 return nil, err 293 } 294 295 s.metrics.RequestSuccessCounter.Inc() 296 297 return v, nil 298 } 299 300 func (s *Service) retrieveChunk(ctx context.Context, quit chan struct{}, chunkAddr, peer swarm.Address, result chan retrievalResult, action accounting.Action, span opentracing.Span) { 301 302 var ( 303 startTime = time.Now() 304 err error 305 chunk swarm.Chunk 306 ) 307 308 defer func() { 309 action.Cleanup() 310 if err != nil { 311 ext.LogError(span, err) 312 s.metrics.TotalErrors.Inc() 313 } else { 314 span.LogFields(olog.Bool("success", true)) 315 } 316 select { 317 case result <- retrievalResult{err: err, chunk: chunk, peer: peer}: 318 case <-quit: 319 return 320 } 321 }() 322 323 ctx, cancel := context.WithTimeout(ctx, RetrieveChunkTimeout) 324 defer cancel() 325 326 stream, err := s.streamer.NewStream(ctx, peer, nil, protocolName, protocolVersion, streamName) 327 if err != nil { 328 err = fmt.Errorf("new stream: %w", err) 329 return 330 } 331 332 defer func() { 333 if err != nil { 334 _ = stream.Reset() 335 } else { 336 _ = stream.FullClose() 337 } 338 }() 339 340 w, r := protobuf.NewWriterAndReader(stream) 341 err = w.WriteMsgWithContext(ctx, &pb.Request{Addr: chunkAddr.Bytes()}) 342 if err != nil { 343 err = fmt.Errorf("write request: %w peer %s", err, peer.String()) 344 return 345 } 346 347 var d pb.Delivery 348 if err = r.ReadMsgWithContext(ctx, &d); err != nil { 349 err = fmt.Errorf("read delivery: %w peer %s", err, peer.String()) 350 return 351 } 352 if d.Err != "" { 353 err = p2p.NewChunkDeliveryError(d.Err) 354 return 355 } 356 357 s.metrics.ChunkRetrieveTime.Observe(time.Since(startTime).Seconds()) 358 s.metrics.TotalRetrieved.Inc() 359 360 chunk = swarm.NewChunk(chunkAddr, d.Data) 361 if !cac.Valid(chunk) { 362 if !soc.Valid(chunk) { 363 s.metrics.InvalidChunkRetrieved.Inc() 364 err = swarm.ErrInvalidChunk 365 return 366 } 367 } 368 369 err = action.Apply() 370 } 371 372 func (s *Service) prepareCredit(ctx context.Context, peer, chunk swarm.Address, origin bool) (accounting.Action, error) { 373 374 price := s.pricer.PeerPrice(peer, chunk) 375 s.metrics.ChunkPrice.Observe(float64(price)) 376 377 creditAction, err := s.accounting.PrepareCredit(ctx, peer, price, origin) 378 if err != nil { 379 return nil, err 380 } 381 382 return creditAction, nil 383 } 384 385 // closestPeer returns address of the peer that is closest to the chunk with 386 // provided address addr. This function will ignore peers with addresses 387 // provided in skipPeers and if allowUpstream is true, peers that are further of 388 // the chunk than this node is, could also be returned, allowing the upstream 389 // retrieve request. 390 func (s *Service) closestPeer(addr swarm.Address, skipPeers []swarm.Address, allowUpstream bool) (swarm.Address, error) { 391 392 var ( 393 closest swarm.Address 394 err error 395 ) 396 397 closest, err = s.peerSuggester.ClosestPeer(addr, false, topology.Select{Reachable: true, Healthy: true}, skipPeers...) 398 if errors.Is(err, topology.ErrNotFound) { 399 closest, err = s.peerSuggester.ClosestPeer(addr, false, topology.Select{Reachable: true}, skipPeers...) 400 if errors.Is(err, topology.ErrNotFound) { 401 closest, err = s.peerSuggester.ClosestPeer(addr, false, topology.Select{}, skipPeers...) 402 } 403 } 404 405 if err != nil { 406 return swarm.Address{}, err 407 } 408 409 if allowUpstream { 410 return closest, nil 411 } 412 413 closer, err := closest.Closer(addr, s.addr) 414 if err != nil { 415 return swarm.Address{}, fmt.Errorf("distance compare addr %s closest %s base address %s: %w", addr.String(), closest.String(), s.addr.String(), err) 416 } 417 if !closer { 418 return swarm.Address{}, topology.ErrNotFound 419 } 420 421 return closest, nil 422 } 423 424 func (s *Service) handler(p2pctx context.Context, p p2p.Peer, stream p2p.Stream) (err error) { 425 ctx, cancel := context.WithTimeout(p2pctx, RetrieveChunkTimeout) 426 defer cancel() 427 428 w, r := protobuf.NewWriterAndReader(stream) 429 var attemptedWrite bool 430 431 defer func() { 432 if err != nil { 433 if !attemptedWrite { 434 _ = w.WriteMsgWithContext(ctx, &pb.Delivery{Err: err.Error()}) 435 } 436 _ = stream.Reset() 437 } else { 438 _ = stream.FullClose() 439 } 440 }() 441 var req pb.Request 442 if err := r.ReadMsgWithContext(ctx, &req); err != nil { 443 return fmt.Errorf("read request: %w peer %s", err, p.Address.String()) 444 } 445 446 addr := swarm.NewAddress(req.Addr) 447 448 if addr.IsZero() || addr.IsEmpty() || !addr.IsValidLength() { 449 return fmt.Errorf("invalid address queried by peer %s", p.Address.String()) 450 } 451 452 var forwarded bool 453 454 span, _, ctx := s.tracer.StartSpanFromContext(ctx, "handle-retrieve-chunk", s.logger, opentracing.Tag{Key: "address", Value: addr.String()}) 455 defer func() { 456 if err != nil { 457 ext.LogError(span, err) 458 } else { 459 span.LogFields(olog.Bool("success", true)) 460 } 461 span.LogFields(olog.Bool("forwarded", forwarded)) 462 span.Finish() 463 }() 464 465 chunk, err := s.storer.Lookup().Get(ctx, addr) 466 if err != nil { 467 if errors.Is(err, storage.ErrNotFound) { 468 // forward the request 469 chunk, err = s.RetrieveChunk(ctx, addr, p.Address) 470 if err != nil { 471 return fmt.Errorf("retrieve chunk: %w", err) 472 } 473 forwarded = true 474 } else { 475 return fmt.Errorf("get from store: %w", err) 476 } 477 } 478 479 chunkPrice := s.pricer.Price(chunk.Address()) 480 debit, err := s.accounting.PrepareDebit(ctx, p.Address, chunkPrice) 481 if err != nil { 482 return fmt.Errorf("prepare debit to peer %s before writeback: %w", p.Address.String(), err) 483 } 484 defer debit.Cleanup() 485 486 attemptedWrite = true 487 488 if err := w.WriteMsgWithContext(ctx, &pb.Delivery{ 489 Data: chunk.Data(), 490 }); err != nil { 491 return fmt.Errorf("write delivery: %w peer %s", err, p.Address.String()) 492 } 493 494 // debit price from p's balance 495 if err := debit.Apply(); err != nil { 496 return fmt.Errorf("apply debit: %w", err) 497 } 498 499 // cache the request last, so that putting to the localstore does not slow down the request flow 500 if s.caching && forwarded { 501 if err := s.storer.Cache().Put(p2pctx, chunk); err != nil { 502 s.logger.Debug("retrieve cache put", "error", err) 503 } 504 } 505 506 return nil 507 } 508 509 func (s *Service) Close() error { 510 return s.errSkip.Close() 511 }