github.com/ethersphere/bee/v2@v2.2.0/pkg/pusher/pusher.go (about) 1 // Copyright 2020 The Swarm Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package pusher provides protocol-orchestrating functionality 6 // over the pushsync protocol. It makes sure that chunks meant 7 // to be distributed over the network are sent used using the 8 // pushsync protocol. 9 package pusher 10 11 import ( 12 "context" 13 "encoding/hex" 14 "errors" 15 "sync" 16 "time" 17 18 "github.com/ethersphere/bee/v2/pkg/log" 19 "github.com/ethersphere/bee/v2/pkg/postage" 20 "github.com/ethersphere/bee/v2/pkg/pushsync" 21 storage "github.com/ethersphere/bee/v2/pkg/storage" 22 "github.com/ethersphere/bee/v2/pkg/swarm" 23 "github.com/ethersphere/bee/v2/pkg/topology" 24 "github.com/ethersphere/bee/v2/pkg/tracing" 25 "github.com/opentracing/opentracing-go" 26 "github.com/opentracing/opentracing-go/ext" 27 olog "github.com/opentracing/opentracing-go/log" 28 ) 29 30 // loggerName is the tree path name of the logger for this package. 31 const loggerName = "pusher" 32 33 type Op struct { 34 Chunk swarm.Chunk 35 Err chan error 36 Direct bool 37 Span opentracing.Span 38 } 39 40 type OpChan <-chan *Op 41 42 type Storer interface { 43 storage.PushReporter 44 storage.PushSubscriber 45 ReservePutter() storage.Putter 46 } 47 48 type Service struct { 49 networkID uint64 50 storer Storer 51 pushSyncer pushsync.PushSyncer 52 validStamp postage.ValidStampFn 53 logger log.Logger 54 metrics metrics 55 quit chan struct{} 56 chunksWorkerQuitC chan struct{} 57 inflight *inflight 58 attempts *attempts 59 smuggler chan OpChan 60 } 61 62 const ( 63 traceDuration = 30 * time.Second // duration for every root tracing span 64 ConcurrentPushes = 100 // how many chunks to push simultaneously 65 DefaultRetryCount = 6 66 ) 67 68 var ( 69 ErrInvalidAddress = errors.New("invalid address") 70 ) 71 72 func New( 73 networkID uint64, 74 storer Storer, 75 pushSyncer pushsync.PushSyncer, 76 validStamp postage.ValidStampFn, 77 logger log.Logger, 78 warmupTime time.Duration, 79 retryCount int, 80 ) *Service { 81 p := &Service{ 82 networkID: networkID, 83 storer: storer, 84 pushSyncer: pushSyncer, 85 validStamp: validStamp, 86 logger: logger.WithName(loggerName).Register(), 87 metrics: newMetrics(), 88 quit: make(chan struct{}), 89 chunksWorkerQuitC: make(chan struct{}), 90 inflight: newInflight(), 91 attempts: &attempts{retryCount: retryCount, attempts: make(map[string]int)}, 92 smuggler: make(chan OpChan), 93 } 94 go p.chunksWorker(warmupTime) 95 return p 96 } 97 98 // chunksWorker is a loop that keeps looking for chunks that are locally uploaded ( by monitoring pushIndex ) 99 // and pushes them to the closest peer and get a receipt. 100 func (s *Service) chunksWorker(warmupTime time.Duration) { 101 defer close(s.chunksWorkerQuitC) 102 select { 103 case <-time.After(warmupTime): 104 case <-s.quit: 105 return 106 } 107 108 var ( 109 ctx, cancel = context.WithCancel(context.Background()) 110 sem = make(chan struct{}, ConcurrentPushes) 111 cc = make(chan *Op) 112 ) 113 114 // inflight.set handles the backpressure for the maximum amount of inflight chunks 115 // and duplicate handling. 116 chunks, unsubscribe := s.storer.SubscribePush(ctx) 117 defer func() { 118 unsubscribe() 119 cancel() 120 }() 121 122 var wg sync.WaitGroup 123 124 push := func(op *Op) { 125 var ( 126 err error 127 doRepeat bool 128 ) 129 130 defer func() { 131 // no peer was found which may mean that the node is suffering from connections issues 132 // we must slow down the pusher to prevent constant retries 133 if errors.Is(err, topology.ErrNotFound) { 134 select { 135 case <-time.After(time.Second * 5): 136 case <-s.quit: 137 } 138 } 139 140 wg.Done() 141 <-sem 142 if doRepeat { 143 select { 144 case cc <- op: 145 case <-s.quit: 146 } 147 } 148 }() 149 150 s.metrics.TotalToPush.Inc() 151 startTime := time.Now() 152 153 spanCtx := ctx 154 if op.Span != nil { 155 spanCtx = tracing.WithContext(spanCtx, op.Span.Context()) 156 } else { 157 op.Span = opentracing.NoopTracer{}.StartSpan("noOp") 158 } 159 160 if op.Direct { 161 err = s.pushDirect(spanCtx, s.logger, op) 162 } else { 163 doRepeat, err = s.pushDeferred(spanCtx, s.logger, op) 164 } 165 166 if err != nil { 167 s.metrics.TotalErrors.Inc() 168 s.metrics.ErrorTime.Observe(time.Since(startTime).Seconds()) 169 ext.LogError(op.Span, err) 170 } else { 171 op.Span.LogFields(olog.Bool("success", true)) 172 } 173 174 s.metrics.SyncTime.Observe(time.Since(startTime).Seconds()) 175 s.metrics.TotalSynced.Inc() 176 } 177 178 go func() { 179 for { 180 select { 181 case ch, ok := <-chunks: 182 if !ok { 183 chunks = nil 184 continue 185 } 186 select { 187 case cc <- &Op{Chunk: ch, Direct: false}: 188 case <-s.quit: 189 return 190 } 191 case apiC := <-s.smuggler: 192 go func() { 193 for { 194 select { 195 case op := <-apiC: 196 select { 197 case cc <- op: 198 case <-s.quit: 199 return 200 } 201 case <-s.quit: 202 return 203 } 204 } 205 }() 206 case <-s.quit: 207 return 208 } 209 } 210 }() 211 212 defer wg.Wait() 213 214 for { 215 select { 216 case op := <-cc: 217 if s.inflight.set(op.Chunk) { 218 if op.Direct { 219 select { 220 case op.Err <- nil: 221 default: 222 s.logger.Debug("chunk already in flight, skipping", "chunk", op.Chunk.Address()) 223 } 224 } 225 continue 226 } 227 select { 228 case sem <- struct{}{}: 229 wg.Add(1) 230 go push(op) 231 case <-s.quit: 232 return 233 } 234 case <-s.quit: 235 return 236 } 237 } 238 239 } 240 241 func (s *Service) pushDeferred(ctx context.Context, logger log.Logger, op *Op) (bool, error) { 242 loggerV1 := logger.V(1).Build() 243 244 defer s.inflight.delete(op.Chunk) 245 246 if _, err := s.validStamp(op.Chunk); err != nil { 247 loggerV1.Warning( 248 "stamp with is no longer valid, skipping syncing for chunk", 249 "batch_id", hex.EncodeToString(op.Chunk.Stamp().BatchID()), 250 "chunk_address", op.Chunk.Address(), 251 "error", err, 252 ) 253 254 return false, errors.Join(err, s.storer.Report(ctx, op.Chunk, storage.ChunkCouldNotSync)) 255 } 256 257 switch receipt, err := s.pushSyncer.PushChunkToClosest(ctx, op.Chunk); { 258 case errors.Is(err, topology.ErrWantSelf): 259 // store the chunk 260 loggerV1.Debug("chunk stays here, i'm the closest node", "chunk_address", op.Chunk.Address()) 261 err = s.storer.ReservePutter().Put(ctx, op.Chunk) 262 if err != nil { 263 loggerV1.Error(err, "pusher: failed to store chunk") 264 return true, err 265 } 266 err = s.storer.Report(ctx, op.Chunk, storage.ChunkStored) 267 if err != nil { 268 loggerV1.Error(err, "pusher: failed reporting chunk") 269 return true, err 270 } 271 case errors.Is(err, pushsync.ErrShallowReceipt): 272 if retry := s.shallowReceipt(receipt); retry { 273 return true, err 274 } 275 if err := s.storer.Report(ctx, op.Chunk, storage.ChunkSynced); err != nil { 276 loggerV1.Error(err, "pusher: failed to report sync status") 277 return true, err 278 } 279 case err == nil: 280 if err := s.storer.Report(ctx, op.Chunk, storage.ChunkSynced); err != nil { 281 loggerV1.Error(err, "pusher: failed to report sync status") 282 return true, err 283 } 284 default: 285 loggerV1.Error(err, "pusher: failed PushChunkToClosest") 286 return true, err 287 } 288 289 return false, nil 290 } 291 292 func (s *Service) pushDirect(ctx context.Context, logger log.Logger, op *Op) error { 293 loggerV1 := logger.V(1).Build() 294 295 var err error 296 297 defer func() { 298 s.inflight.delete(op.Chunk) 299 select { 300 case op.Err <- err: 301 default: 302 loggerV1.Error(err, "pusher: failed to return error for direct upload") 303 } 304 }() 305 306 _, err = s.validStamp(op.Chunk) 307 if err != nil { 308 logger.Warning( 309 "stamp with is no longer valid, skipping direct upload for chunk", 310 "batch_id", hex.EncodeToString(op.Chunk.Stamp().BatchID()), 311 "chunk_address", op.Chunk.Address(), 312 "error", err, 313 ) 314 return err 315 } 316 317 switch _, err = s.pushSyncer.PushChunkToClosest(ctx, op.Chunk); { 318 case errors.Is(err, topology.ErrWantSelf): 319 // store the chunk 320 loggerV1.Debug("chunk stays here, i'm the closest node", "chunk_address", op.Chunk.Address()) 321 err = s.storer.ReservePutter().Put(ctx, op.Chunk) 322 if err != nil { 323 loggerV1.Error(err, "pusher: failed to store chunk") 324 } 325 case err != nil: 326 loggerV1.Error(err, "pusher: failed PushChunkToClosest") 327 } 328 329 return err 330 } 331 332 func (s *Service) shallowReceipt(receipt *pushsync.Receipt) bool { 333 if s.attempts.try(receipt.Address) { 334 return true 335 } 336 s.attempts.delete(receipt.Address) 337 return false 338 } 339 340 func (s *Service) AddFeed(c <-chan *Op) { 341 go func() { 342 select { 343 case s.smuggler <- c: 344 s.logger.Info("got a chunk being smuggled") 345 case <-s.quit: 346 } 347 }() 348 } 349 350 func (s *Service) Close() error { 351 s.logger.Info("pusher shutting down") 352 close(s.quit) 353 354 // Wait for chunks worker to finish 355 select { 356 case <-s.chunksWorkerQuitC: 357 case <-time.After(6 * time.Second): 358 } 359 return nil 360 }