github.com/ethersphere/bee/v2@v2.2.0/pkg/pushsync/pushsync.go (about) 1 // Copyright 2020 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package pushsync provides the pushsync protocol 6 // implementation. 7 package pushsync 8 9 import ( 10 "context" 11 "errors" 12 "fmt" 13 "strconv" 14 "time" 15 16 "github.com/ethersphere/bee/v2/pkg/accounting" 17 "github.com/ethersphere/bee/v2/pkg/cac" 18 "github.com/ethersphere/bee/v2/pkg/crypto" 19 "github.com/ethersphere/bee/v2/pkg/log" 20 "github.com/ethersphere/bee/v2/pkg/p2p" 21 "github.com/ethersphere/bee/v2/pkg/p2p/protobuf" 22 "github.com/ethersphere/bee/v2/pkg/postage" 23 "github.com/ethersphere/bee/v2/pkg/pricer" 24 "github.com/ethersphere/bee/v2/pkg/pushsync/pb" 25 "github.com/ethersphere/bee/v2/pkg/skippeers" 26 "github.com/ethersphere/bee/v2/pkg/soc" 27 storage "github.com/ethersphere/bee/v2/pkg/storage" 28 "github.com/ethersphere/bee/v2/pkg/swarm" 29 "github.com/ethersphere/bee/v2/pkg/topology" 30 "github.com/ethersphere/bee/v2/pkg/tracing" 31 opentracing "github.com/opentracing/opentracing-go" 32 "github.com/opentracing/opentracing-go/ext" 33 olog "github.com/opentracing/opentracing-go/log" 34 ) 35 36 // loggerName is the tree path name of the logger for this package. 37 const loggerName = "pushsync" 38 39 const ( 40 protocolName = "pushsync" 41 protocolVersion = "1.3.0" 42 streamName = "pushsync" 43 ) 44 45 const ( 46 defaultTTL = 30 * time.Second // request time to live 47 preemptiveInterval = 5 * time.Second // P90 request time to live 48 skiplistDur = 5 * time.Minute 49 overDraftRefresh = time.Millisecond * 600 50 ) 51 52 const ( 53 maxMultiplexForwards = 2 // number of extra peers to forward the request from the multiplex node 54 maxPushErrors = 32 55 ) 56 57 var ( 58 ErrNoPush = errors.New("could not push chunk") 59 ErrOutOfDepthStoring = errors.New("storing outside of the neighborhood") 60 ErrWarmup = errors.New("node warmup time not complete") 61 ErrShallowReceipt = errors.New("shallow receipt") 62 ) 63 64 type PushSyncer interface { 65 PushChunkToClosest(ctx context.Context, ch swarm.Chunk) (*Receipt, error) 66 } 67 68 type Receipt struct { 69 Address swarm.Address 70 Signature []byte 71 Nonce []byte 72 } 73 74 type Storer interface { 75 storage.PushReporter 76 ReservePutter() storage.Putter 77 } 78 79 type PushSync struct { 80 address swarm.Address 81 networkID uint64 82 radius func() (uint8, error) 83 nonce []byte 84 streamer p2p.StreamerDisconnecter 85 store Storer 86 topologyDriver topology.Driver 87 unwrap func(swarm.Chunk) 88 logger log.Logger 89 accounting accounting.Interface 90 pricer pricer.Interface 91 metrics metrics 92 tracer *tracing.Tracer 93 validStamp postage.ValidStampFn 94 signer crypto.Signer 95 fullNode bool 96 errSkip *skippeers.List 97 warmupPeriod time.Time 98 } 99 100 type receiptResult struct { 101 pushTime time.Time 102 peer swarm.Address 103 receipt *pb.Receipt 104 err error 105 } 106 107 func New( 108 address swarm.Address, 109 networkID uint64, 110 nonce []byte, 111 streamer p2p.StreamerDisconnecter, 112 store Storer, 113 radius func() (uint8, error), 114 topology topology.Driver, 115 fullNode bool, 116 unwrap func(swarm.Chunk), 117 validStamp postage.ValidStampFn, 118 logger log.Logger, 119 accounting accounting.Interface, 120 pricer pricer.Interface, 121 signer crypto.Signer, 122 tracer *tracing.Tracer, 123 warmupTime time.Duration, 124 ) *PushSync { 125 ps := &PushSync{ 126 address: address, 127 radius: radius, 128 networkID: networkID, 129 nonce: nonce, 130 streamer: streamer, 131 store: store, 132 topologyDriver: topology, 133 fullNode: fullNode, 134 unwrap: unwrap, 135 logger: logger.WithName(loggerName).Register(), 136 accounting: accounting, 137 pricer: pricer, 138 metrics: newMetrics(), 139 tracer: tracer, 140 signer: signer, 141 errSkip: skippeers.NewList(), 142 warmupPeriod: time.Now().Add(warmupTime), 143 } 144 145 ps.validStamp = ps.validStampWrapper(validStamp) 146 return ps 147 } 148 149 func (s *PushSync) Protocol() p2p.ProtocolSpec { 150 return p2p.ProtocolSpec{ 151 Name: protocolName, 152 Version: protocolVersion, 153 StreamSpecs: []p2p.StreamSpec{ 154 { 155 Name: streamName, 156 Handler: s.handler, 157 }, 158 }, 159 } 160 } 161 162 // handler handles chunk delivery from other node and forwards to its destination node. 163 // If the current node is the destination, it stores in the local store and sends a receipt. 164 func (ps *PushSync) handler(ctx context.Context, p p2p.Peer, stream p2p.Stream) (err error) { 165 now := time.Now() 166 167 w, r := protobuf.NewWriterAndReader(stream) 168 var attemptedWrite bool 169 170 ctx, cancel := context.WithTimeout(ctx, defaultTTL) 171 defer cancel() 172 173 defer func() { 174 if err != nil { 175 ps.metrics.TotalHandlerTime.WithLabelValues("failure").Observe(time.Since(now).Seconds()) 176 ps.metrics.TotalHandlerErrors.Inc() 177 if !attemptedWrite { 178 _ = w.WriteMsgWithContext(ctx, &pb.Receipt{Err: err.Error()}) 179 } 180 _ = stream.Reset() 181 } else { 182 ps.metrics.TotalHandlerTime.WithLabelValues("success").Observe(time.Since(now).Seconds()) 183 _ = stream.FullClose() 184 } 185 }() 186 187 var ch pb.Delivery 188 if err = r.ReadMsgWithContext(ctx, &ch); err != nil { 189 return fmt.Errorf("pushsync read delivery: %w", err) 190 } 191 192 ps.metrics.TotalReceived.Inc() 193 194 chunk := swarm.NewChunk(swarm.NewAddress(ch.Address), ch.Data) 195 chunkAddress := chunk.Address() 196 197 span, _, ctx := ps.tracer.StartSpanFromContext(ctx, "pushsync-handler", ps.logger, opentracing.Tag{Key: "address", Value: chunkAddress.String()}, opentracing.Tag{Key: "tagID", Value: chunk.TagID()}, opentracing.Tag{Key: "sender_address", Value: p.Address.String()}) 198 199 var ( 200 stored bool 201 reason string 202 ) 203 204 defer func() { 205 if err != nil { 206 ext.LogError(span, err) 207 } else { 208 var logs []olog.Field 209 logs = append(logs, olog.Bool("success", true)) 210 if stored { 211 logs = append(logs, olog.Bool("stored", true)) 212 logs = append(logs, olog.String("reason", reason)) 213 } 214 span.LogFields(logs...) 215 } 216 span.Finish() 217 }() 218 219 stamp := new(postage.Stamp) 220 err = stamp.UnmarshalBinary(ch.Stamp) 221 if err != nil { 222 return fmt.Errorf("pushsync stamp unmarshall: %w", err) 223 } 224 chunk.WithStamp(stamp) 225 226 if cac.Valid(chunk) { 227 go ps.unwrap(chunk) 228 } else if !soc.Valid(chunk) { 229 return swarm.ErrInvalidChunk 230 } 231 232 price := ps.pricer.Price(chunkAddress) 233 234 store := func(ctx context.Context) error { 235 ps.metrics.Storer.Inc() 236 237 chunkToPut, err := ps.validStamp(chunk) 238 if err != nil { 239 return fmt.Errorf("invalid stamp: %w", err) 240 } 241 242 err = ps.store.ReservePutter().Put(ctx, chunkToPut) 243 if err != nil { 244 return fmt.Errorf("reserve put: %w", err) 245 } 246 247 signature, err := ps.signer.Sign(chunkToPut.Address().Bytes()) 248 if err != nil { 249 return fmt.Errorf("receipt signature: %w", err) 250 } 251 252 // return back receipt 253 debit, err := ps.accounting.PrepareDebit(ctx, p.Address, price) 254 if err != nil { 255 return fmt.Errorf("prepare debit to peer %s before writeback: %w", p.Address.String(), err) 256 } 257 defer debit.Cleanup() 258 259 attemptedWrite = true 260 261 receipt := pb.Receipt{Address: chunkToPut.Address().Bytes(), Signature: signature, Nonce: ps.nonce} 262 if err := w.WriteMsgWithContext(ctx, &receipt); err != nil { 263 return fmt.Errorf("send receipt to peer %s: %w", p.Address.String(), err) 264 } 265 266 return debit.Apply() 267 } 268 269 rad, err := ps.radius() 270 if err != nil { 271 return fmt.Errorf("pushsync: storage radius: %w", err) 272 } 273 274 if ps.topologyDriver.IsReachable() && swarm.Proximity(ps.address.Bytes(), chunkAddress.Bytes()) >= rad { 275 stored, reason = true, "is within AOR" 276 return store(ctx) 277 } 278 279 switch receipt, err := ps.pushToClosest(ctx, chunk, false); { 280 case errors.Is(err, topology.ErrWantSelf): 281 stored, reason = true, "want self" 282 return store(ctx) 283 case errors.Is(err, ErrShallowReceipt): 284 fallthrough 285 case err == nil: 286 ps.metrics.Forwarder.Inc() 287 288 debit, err := ps.accounting.PrepareDebit(ctx, p.Address, price) 289 if err != nil { 290 return fmt.Errorf("prepare debit to peer %s before writeback: %w", p.Address.String(), err) 291 } 292 defer debit.Cleanup() 293 294 attemptedWrite = true 295 296 // pass back the receipt 297 if err := w.WriteMsgWithContext(ctx, receipt); err != nil { 298 return fmt.Errorf("send receipt to peer %s: %w", p.Address.String(), err) 299 } 300 301 return debit.Apply() 302 default: 303 ps.metrics.Forwarder.Inc() 304 return fmt.Errorf("handler: push to closest chunk %s: %w", chunkAddress, err) 305 306 } 307 } 308 309 // PushChunkToClosest sends chunk to the closest peer by opening a stream. It then waits for 310 // a receipt from that peer and returns error or nil based on the receiving and 311 // the validity of the receipt. 312 func (ps *PushSync) PushChunkToClosest(ctx context.Context, ch swarm.Chunk) (*Receipt, error) { 313 ps.metrics.TotalOutgoing.Inc() 314 r, err := ps.pushToClosest(ctx, ch, true) 315 if errors.Is(err, ErrShallowReceipt) { 316 return &Receipt{ 317 Address: swarm.NewAddress(r.Address), 318 Signature: r.Signature, 319 Nonce: r.Nonce, 320 }, err 321 } 322 323 if err != nil { 324 return nil, err 325 } 326 327 return &Receipt{ 328 Address: swarm.NewAddress(r.Address), 329 Signature: r.Signature, 330 Nonce: r.Nonce, 331 }, nil 332 } 333 334 // pushToClosest attempts to push the chunk into the network. 335 func (ps *PushSync) pushToClosest(ctx context.Context, ch swarm.Chunk, origin bool) (*pb.Receipt, error) { 336 337 if !ps.warmedUp() { 338 return nil, ErrWarmup 339 } 340 341 ctx, cancel := context.WithCancel(ctx) 342 defer cancel() 343 344 ps.metrics.TotalRequests.Inc() 345 346 var ( 347 sentErrorsLeft = 1 348 preemptiveTicker <-chan time.Time 349 inflight int 350 parallelForwards = maxMultiplexForwards 351 ) 352 353 if origin { 354 ticker := time.NewTicker(preemptiveInterval) 355 defer ticker.Stop() 356 preemptiveTicker = ticker.C 357 sentErrorsLeft = maxPushErrors 358 } 359 360 resultChan := make(chan receiptResult) 361 362 retryC := make(chan struct{}, max(1, parallelForwards)) 363 364 retry := func() { 365 select { 366 case retryC <- struct{}{}: 367 case <-ctx.Done(): 368 default: 369 } 370 } 371 372 retry() 373 374 rad, err := ps.radius() 375 if err != nil { 376 return nil, fmt.Errorf("pushsync: storage radius: %w", err) 377 } 378 379 skip := skippeers.NewList() 380 defer skip.Close() 381 382 neighborsOnly := false 383 384 for sentErrorsLeft > 0 { 385 select { 386 case <-ctx.Done(): 387 return nil, ErrNoPush 388 case <-preemptiveTicker: 389 retry() 390 case <-retryC: 391 392 // Origin peers should not store the chunk initially so that the chunk is always forwarded into the network. 393 // If no peer can be found from an origin peer, the origin peer may store the chunk. 394 // Non-origin peers store the chunk if the chunk is within depth. 395 // For non-origin peers, if the chunk is not within depth, they may store the chunk if they are the closest peer to the chunk. 396 fullSkip := append(skip.ChunkPeers(ch.Address()), ps.errSkip.ChunkPeers(ch.Address())...) 397 peer, err := ps.closestPeer(ch.Address(), origin, fullSkip) 398 if errors.Is(err, topology.ErrNotFound) { 399 if skip.PruneExpiresAfter(ch.Address(), overDraftRefresh) == 0 { //no overdraft peers, we have depleted ALL peers 400 if inflight == 0 { 401 if ps.fullNode { 402 if cac.Valid(ch) { 403 go ps.unwrap(ch) 404 } 405 return nil, topology.ErrWantSelf 406 } 407 ps.logger.Debug("no peers left", "chunk_address", ch.Address(), "error", err) 408 return nil, err 409 } 410 continue // there is still an inflight request, wait for it's result 411 } 412 413 ps.logger.Debug("sleeping to refresh overdraft balance", "chunk_address", ch.Address()) 414 415 select { 416 case <-time.After(overDraftRefresh): 417 retry() 418 continue 419 case <-ctx.Done(): 420 return nil, ctx.Err() 421 } 422 } 423 424 if err != nil { 425 if inflight == 0 { 426 return nil, err 427 } 428 ps.logger.Debug("next peer", "chunk_address", ch.Address(), "error", err) 429 continue 430 } 431 432 peerPO := swarm.Proximity(peer.Bytes(), ch.Address().Bytes()) 433 434 // all future requests should land directly into the neighborhood 435 if neighborsOnly && peerPO < rad { 436 skip.Forever(ch.Address(), peer) 437 continue 438 } 439 440 // since we can reach into the neighborhood of the chunk 441 // act as the multiplexer and push the chunk in parallel to multiple peers 442 if peerPO >= rad { 443 neighborsOnly = true 444 for ; parallelForwards > 0; parallelForwards-- { 445 retry() 446 sentErrorsLeft++ 447 } 448 } 449 450 action, err := ps.prepareCredit(ctx, peer, ch, origin) 451 if err != nil { 452 retry() 453 skip.Add(ch.Address(), peer, overDraftRefresh) 454 continue 455 } 456 skip.Forever(ch.Address(), peer) 457 458 ps.metrics.TotalSendAttempts.Inc() 459 inflight++ 460 461 go ps.push(ctx, resultChan, peer, ch, action) 462 463 case result := <-resultChan: 464 465 inflight-- 466 467 ps.measurePushPeer(result.pushTime, result.err) 468 469 if result.err == nil { 470 switch err := ps.checkReceipt(result.receipt); { 471 case err == nil: 472 return result.receipt, nil 473 case errors.Is(err, ErrShallowReceipt): 474 ps.errSkip.Add(ch.Address(), result.peer, skiplistDur) 475 return result.receipt, err 476 } 477 } 478 479 ps.metrics.TotalFailedSendAttempts.Inc() 480 ps.logger.Debug("could not push to peer", "chunk_address", ch.Address(), "peer_address", result.peer, "error", result.err) 481 482 sentErrorsLeft-- 483 ps.errSkip.Add(ch.Address(), result.peer, skiplistDur) 484 485 retry() 486 } 487 } 488 489 return nil, ErrNoPush 490 } 491 492 func (ps *PushSync) closestPeer(chunkAddress swarm.Address, origin bool, skipList []swarm.Address) (swarm.Address, error) { 493 494 includeSelf := ps.fullNode && !origin 495 496 peer, err := ps.topologyDriver.ClosestPeer(chunkAddress, includeSelf, topology.Select{Reachable: true, Healthy: true}, skipList...) 497 if errors.Is(err, topology.ErrNotFound) { 498 peer, err := ps.topologyDriver.ClosestPeer(chunkAddress, includeSelf, topology.Select{Reachable: true}, skipList...) 499 if errors.Is(err, topology.ErrNotFound) { 500 return ps.topologyDriver.ClosestPeer(chunkAddress, includeSelf, topology.Select{}, skipList...) 501 } 502 return peer, err 503 } 504 505 return peer, err 506 } 507 508 func (ps *PushSync) push(parentCtx context.Context, resultChan chan<- receiptResult, peer swarm.Address, ch swarm.Chunk, action accounting.Action) { 509 ctx, cancel := context.WithTimeout(context.Background(), defaultTTL) 510 defer cancel() 511 512 var ( 513 err error 514 receipt *pb.Receipt 515 ) 516 517 now := time.Now() 518 519 spanInner, _, _ := ps.tracer.FollowSpanFromContext(context.WithoutCancel(parentCtx), "push-chunk-async", ps.logger, opentracing.Tag{Key: "address", Value: ch.Address().String()}) 520 521 defer func() { 522 if err != nil { 523 ext.LogError(spanInner, err) 524 } else { 525 spanInner.LogFields(olog.Bool("success", true)) 526 } 527 spanInner.Finish() 528 select { 529 case resultChan <- receiptResult{pushTime: now, peer: peer, err: err, receipt: receipt}: 530 case <-parentCtx.Done(): 531 } 532 }() 533 534 defer action.Cleanup() 535 536 spanInner.LogFields(olog.String("peer_address", peer.String())) 537 538 receipt, err = ps.pushChunkToPeer(tracing.WithContext(ctx, spanInner.Context()), peer, ch) 539 if err != nil { 540 return 541 } 542 543 ps.metrics.TotalSent.Inc() 544 545 err = action.Apply() 546 } 547 548 func (ps *PushSync) checkReceipt(receipt *pb.Receipt) error { 549 550 addr := swarm.NewAddress(receipt.Address) 551 552 publicKey, err := crypto.Recover(receipt.Signature, addr.Bytes()) 553 if err != nil { 554 return fmt.Errorf("pushsync: receipt recover: %w", err) 555 } 556 557 peer, err := crypto.NewOverlayAddress(*publicKey, ps.networkID, receipt.Nonce) 558 if err != nil { 559 return fmt.Errorf("pushsync: receipt storer address: %w", err) 560 } 561 562 po := swarm.Proximity(addr.Bytes(), peer.Bytes()) 563 564 d, err := ps.radius() 565 if err != nil { 566 return fmt.Errorf("pushsync: storage radius: %w", err) 567 } 568 569 if po < d { 570 ps.metrics.ShallowReceiptDepth.WithLabelValues(strconv.Itoa(int(po))).Inc() 571 ps.metrics.ShallowReceipt.Inc() 572 ps.logger.Debug("shallow receipt", "chunk_address", addr, "peer_address", peer, "proximity_order", po) 573 return ErrShallowReceipt 574 } 575 576 ps.metrics.ReceiptDepth.WithLabelValues(strconv.Itoa(int(po))).Inc() 577 ps.logger.Debug("chunk pushed", "chunk_address", addr, "peer_address", peer, "proximity_order", po) 578 579 return nil 580 } 581 582 func (ps *PushSync) pushChunkToPeer(ctx context.Context, peer swarm.Address, ch swarm.Chunk) (receipt *pb.Receipt, err error) { 583 584 streamer, err := ps.streamer.NewStream(ctx, peer, nil, protocolName, protocolVersion, streamName) 585 if err != nil { 586 return nil, fmt.Errorf("new stream for peer %s: %w", peer.String(), err) 587 } 588 589 defer func() { 590 if err != nil { 591 _ = streamer.Reset() 592 } else { 593 _ = streamer.FullClose() 594 } 595 }() 596 597 w, r := protobuf.NewWriterAndReader(streamer) 598 stamp, err := ch.Stamp().MarshalBinary() 599 if err != nil { 600 return nil, err 601 } 602 err = w.WriteMsgWithContext(ctx, &pb.Delivery{ 603 Address: ch.Address().Bytes(), 604 Data: ch.Data(), 605 Stamp: stamp, 606 }) 607 if err != nil { 608 return nil, err 609 } 610 611 err = ps.store.Report(ctx, ch, storage.ChunkSent) 612 if err != nil && !errors.Is(err, storage.ErrNotFound) { 613 err = fmt.Errorf("tag %d increment: %w", ch.TagID(), err) 614 return 615 } 616 617 var rec pb.Receipt 618 if err = r.ReadMsgWithContext(ctx, &rec); err != nil { 619 return nil, err 620 } 621 if rec.Err != "" { 622 return nil, p2p.NewChunkDeliveryError(rec.Err) 623 } 624 625 if !ch.Address().Equal(swarm.NewAddress(rec.Address)) { 626 return nil, fmt.Errorf("invalid receipt. chunk %s, peer %s", ch.Address(), peer) 627 } 628 629 return &rec, nil 630 } 631 632 func (ps *PushSync) prepareCredit(ctx context.Context, peer swarm.Address, ch swarm.Chunk, origin bool) (accounting.Action, error) { 633 creditAction, err := ps.accounting.PrepareCredit(ctx, peer, ps.pricer.PeerPrice(peer, ch.Address()), origin) 634 if err != nil { 635 return nil, err 636 } 637 638 return creditAction, nil 639 } 640 641 func (ps *PushSync) measurePushPeer(t time.Time, err error) { 642 var status string 643 if err != nil { 644 status = "failure" 645 } else { 646 status = "success" 647 } 648 ps.metrics.PushToPeerTime.WithLabelValues(status).Observe(time.Since(t).Seconds()) 649 } 650 651 func (ps *PushSync) validStampWrapper(f postage.ValidStampFn) postage.ValidStampFn { 652 return func(c swarm.Chunk) (swarm.Chunk, error) { 653 t := time.Now() 654 chunk, err := f(c) 655 if err != nil { 656 ps.metrics.InvalidStampErrors.Inc() 657 ps.metrics.StampValidationTime.WithLabelValues("failure").Observe(time.Since(t).Seconds()) 658 } else { 659 ps.metrics.StampValidationTime.WithLabelValues("success").Observe(time.Since(t).Seconds()) 660 } 661 return chunk, err 662 } 663 } 664 665 func (s *PushSync) Close() error { 666 return s.errSkip.Close() 667 } 668 669 func (ps *PushSync) warmedUp() bool { 670 return time.Now().After(ps.warmupPeriod) 671 }