github.com/celestiaorg/celestia-node@v0.15.0-beta.1/share/getters/shrex.go (about) 1 package getters 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "time" 8 9 "go.opentelemetry.io/otel" 10 "go.opentelemetry.io/otel/attribute" 11 "go.opentelemetry.io/otel/metric" 12 "go.opentelemetry.io/otel/trace" 13 14 "github.com/celestiaorg/rsmt2d" 15 16 "github.com/celestiaorg/celestia-node/header" 17 "github.com/celestiaorg/celestia-node/libs/utils" 18 "github.com/celestiaorg/celestia-node/share" 19 "github.com/celestiaorg/celestia-node/share/ipld" 20 "github.com/celestiaorg/celestia-node/share/p2p" 21 "github.com/celestiaorg/celestia-node/share/p2p/peers" 22 "github.com/celestiaorg/celestia-node/share/p2p/shrexeds" 23 "github.com/celestiaorg/celestia-node/share/p2p/shrexnd" 24 ) 25 26 var _ share.Getter = (*ShrexGetter)(nil) 27 28 const ( 29 // defaultMinRequestTimeout value is set according to observed time taken by healthy peer to 30 // serve getEDS request for block size 256 31 defaultMinRequestTimeout = time.Minute // should be >= shrexeds server write timeout 32 defaultMinAttemptsCount = 3 33 ) 34 35 var meter = otel.Meter("shrex/getter") 36 37 type metrics struct { 38 edsAttempts metric.Int64Histogram 39 ndAttempts metric.Int64Histogram 40 } 41 42 func (m *metrics) recordEDSAttempt(ctx context.Context, attemptCount int, success bool) { 43 if m == nil { 44 return 45 } 46 ctx = utils.ResetContextOnError(ctx) 47 m.edsAttempts.Record(ctx, int64(attemptCount), 48 metric.WithAttributes( 49 attribute.Bool("success", success))) 50 } 51 52 func (m *metrics) recordNDAttempt(ctx context.Context, attemptCount int, success bool) { 53 if m == nil { 54 return 55 } 56 ctx = utils.ResetContextOnError(ctx) 57 m.ndAttempts.Record(ctx, int64(attemptCount), 58 metric.WithAttributes( 59 attribute.Bool("success", success))) 60 } 61 62 func (sg *ShrexGetter) WithMetrics() error { 63 edsAttemptHistogram, err := meter.Int64Histogram( 64 "getters_shrex_eds_attempts_per_request", 65 metric.WithDescription("Number of attempts per shrex/eds request"), 66 ) 67 if err != nil { 68 return err 69 } 70 71 ndAttemptHistogram, err := meter.Int64Histogram( 72 "getters_shrex_nd_attempts_per_request", 73 metric.WithDescription("Number of attempts per shrex/nd request"), 74 ) 75 if err != nil { 76 return err 77 } 78 79 sg.metrics = &metrics{ 80 edsAttempts: edsAttemptHistogram, 81 ndAttempts: ndAttemptHistogram, 82 } 83 return nil 84 } 85 86 // ShrexGetter is a share.Getter that uses the shrex/eds and shrex/nd protocol to retrieve shares. 87 type ShrexGetter struct { 88 edsClient *shrexeds.Client 89 ndClient *shrexnd.Client 90 91 peerManager *peers.Manager 92 93 // minRequestTimeout limits minimal timeout given to single peer by getter for serving the request. 94 minRequestTimeout time.Duration 95 // minAttemptsCount will be used to split request timeout into multiple attempts. It will allow to 96 // attempt multiple peers in scope of one request before context timeout is reached 97 minAttemptsCount int 98 99 metrics *metrics 100 } 101 102 func NewShrexGetter(edsClient *shrexeds.Client, ndClient *shrexnd.Client, peerManager *peers.Manager) *ShrexGetter { 103 return &ShrexGetter{ 104 edsClient: edsClient, 105 ndClient: ndClient, 106 peerManager: peerManager, 107 minRequestTimeout: defaultMinRequestTimeout, 108 minAttemptsCount: defaultMinAttemptsCount, 109 } 110 } 111 112 func (sg *ShrexGetter) Start(ctx context.Context) error { 113 return sg.peerManager.Start(ctx) 114 } 115 116 func (sg *ShrexGetter) Stop(ctx context.Context) error { 117 return sg.peerManager.Stop(ctx) 118 } 119 120 func (sg *ShrexGetter) GetShare(context.Context, *header.ExtendedHeader, int, int) (share.Share, error) { 121 return nil, fmt.Errorf("getter/shrex: GetShare %w", errOperationNotSupported) 122 } 123 124 func (sg *ShrexGetter) GetEDS(ctx context.Context, header *header.ExtendedHeader) (*rsmt2d.ExtendedDataSquare, error) { 125 var ( 126 attempt int 127 err error 128 ) 129 ctx, span := tracer.Start(ctx, "shrex/get-eds") 130 defer func() { 131 utils.SetStatusAndEnd(span, err) 132 }() 133 134 // short circuit if the data root is empty 135 if header.DAH.Equals(share.EmptyRoot()) { 136 return share.EmptyExtendedDataSquare(), nil 137 } 138 for { 139 if ctx.Err() != nil { 140 sg.metrics.recordEDSAttempt(ctx, attempt, false) 141 return nil, errors.Join(err, ctx.Err()) 142 } 143 attempt++ 144 start := time.Now() 145 peer, setStatus, getErr := sg.peerManager.Peer(ctx, header.DAH.Hash(), header.Height()) 146 if getErr != nil { 147 log.Debugw("eds: couldn't find peer", 148 "hash", header.DAH.String(), 149 "err", getErr, 150 "finished (s)", time.Since(start)) 151 sg.metrics.recordEDSAttempt(ctx, attempt, false) 152 return nil, errors.Join(err, getErr) 153 } 154 155 reqStart := time.Now() 156 reqCtx, cancel := ctxWithSplitTimeout(ctx, sg.minAttemptsCount-attempt+1, sg.minRequestTimeout) 157 eds, getErr := sg.edsClient.RequestEDS(reqCtx, header.DAH.Hash(), peer) 158 cancel() 159 switch { 160 case getErr == nil: 161 setStatus(peers.ResultNoop) 162 sg.metrics.recordEDSAttempt(ctx, attempt, true) 163 return eds, nil 164 case errors.Is(getErr, context.DeadlineExceeded), 165 errors.Is(getErr, context.Canceled): 166 setStatus(peers.ResultCooldownPeer) 167 case errors.Is(getErr, p2p.ErrNotFound): 168 getErr = share.ErrNotFound 169 setStatus(peers.ResultCooldownPeer) 170 case errors.Is(getErr, p2p.ErrInvalidResponse): 171 setStatus(peers.ResultBlacklistPeer) 172 default: 173 setStatus(peers.ResultCooldownPeer) 174 } 175 176 if !ErrorContains(err, getErr) { 177 err = errors.Join(err, getErr) 178 } 179 log.Debugw("eds: request failed", 180 "hash", header.DAH.String(), 181 "peer", peer.String(), 182 "attempt", attempt, 183 "err", getErr, 184 "finished (s)", time.Since(reqStart)) 185 } 186 } 187 188 func (sg *ShrexGetter) GetSharesByNamespace( 189 ctx context.Context, 190 header *header.ExtendedHeader, 191 namespace share.Namespace, 192 ) (share.NamespacedShares, error) { 193 if err := namespace.ValidateForData(); err != nil { 194 return nil, err 195 } 196 var ( 197 attempt int 198 err error 199 ) 200 ctx, span := tracer.Start(ctx, "shrex/get-shares-by-namespace", trace.WithAttributes( 201 attribute.String("namespace", namespace.String()), 202 )) 203 defer func() { 204 utils.SetStatusAndEnd(span, err) 205 }() 206 207 // verify that the namespace could exist inside the roots before starting network requests 208 dah := header.DAH 209 roots := ipld.FilterRootByNamespace(dah, namespace) 210 if len(roots) == 0 { 211 return []share.NamespacedRow{}, nil 212 } 213 214 for { 215 if ctx.Err() != nil { 216 sg.metrics.recordNDAttempt(ctx, attempt, false) 217 return nil, errors.Join(err, ctx.Err()) 218 } 219 attempt++ 220 start := time.Now() 221 peer, setStatus, getErr := sg.peerManager.Peer(ctx, header.DAH.Hash(), header.Height()) 222 if getErr != nil { 223 log.Debugw("nd: couldn't find peer", 224 "hash", dah.String(), 225 "namespace", namespace.String(), 226 "err", getErr, 227 "finished (s)", time.Since(start)) 228 sg.metrics.recordNDAttempt(ctx, attempt, false) 229 return nil, errors.Join(err, getErr) 230 } 231 232 reqStart := time.Now() 233 reqCtx, cancel := ctxWithSplitTimeout(ctx, sg.minAttemptsCount-attempt+1, sg.minRequestTimeout) 234 nd, getErr := sg.ndClient.RequestND(reqCtx, dah, namespace, peer) 235 cancel() 236 switch { 237 case getErr == nil: 238 // both inclusion and non-inclusion cases needs verification 239 if verErr := nd.Verify(dah, namespace); verErr != nil { 240 getErr = verErr 241 setStatus(peers.ResultBlacklistPeer) 242 break 243 } 244 setStatus(peers.ResultNoop) 245 sg.metrics.recordNDAttempt(ctx, attempt, true) 246 return nd, nil 247 case errors.Is(getErr, context.DeadlineExceeded), 248 errors.Is(getErr, context.Canceled): 249 setStatus(peers.ResultCooldownPeer) 250 case errors.Is(getErr, p2p.ErrNotFound): 251 getErr = share.ErrNotFound 252 setStatus(peers.ResultCooldownPeer) 253 case errors.Is(getErr, p2p.ErrInvalidResponse): 254 setStatus(peers.ResultBlacklistPeer) 255 default: 256 setStatus(peers.ResultCooldownPeer) 257 } 258 259 if !ErrorContains(err, getErr) { 260 err = errors.Join(err, getErr) 261 } 262 log.Debugw("nd: request failed", 263 "hash", dah.String(), 264 "namespace", namespace.String(), 265 "peer", peer.String(), 266 "attempt", attempt, 267 "err", getErr, 268 "finished (s)", time.Since(reqStart)) 269 } 270 }