github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/trace/trace.go (about) 1 package trace 2 3 import ( 4 "context" 5 "fmt" 6 "time" 7 8 lru "github.com/hashicorp/golang-lru/v2" 9 "github.com/rs/zerolog" 10 "go.opentelemetry.io/otel" 11 "go.opentelemetry.io/otel/attribute" 12 "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" 13 "go.opentelemetry.io/otel/sdk/resource" 14 sdktrace "go.opentelemetry.io/otel/sdk/trace" 15 semconv "go.opentelemetry.io/otel/semconv/v1.10.0" 16 "go.opentelemetry.io/otel/trace" 17 18 "github.com/onflow/flow-go/model/flow" 19 ) 20 21 const DefaultEntityCacheSize = 1000 22 23 const SensitivityCaptureAll = 0 24 const EntityTypeBlock = "Block" 25 const EntityTypeCollection = "Collection" 26 const EntityTypeTransaction = "Transaction" 27 28 type SpanName string 29 30 func (s SpanName) Child(subOp string) SpanName { 31 return SpanName(string(s) + "." + subOp) 32 } 33 34 func IsSampled(span trace.Span) bool { 35 return span.SpanContext().IsSampled() 36 } 37 38 // Tracer is the implementation of the Tracer interface 39 // TODO(rbtz): make private 40 type Tracer struct { 41 tracer trace.Tracer 42 shutdown func(context.Context) error 43 log zerolog.Logger 44 spanCache *lru.Cache[flow.Identifier, trace.Span] 45 chainID string 46 sensitivity uint 47 } 48 49 // NewTracer creates a new OpenTelemetry-based tracer. 50 func NewTracer( 51 log zerolog.Logger, 52 serviceName string, 53 chainID string, 54 sensitivity uint, 55 ) ( 56 *Tracer, 57 error, 58 ) { 59 ctx := context.TODO() 60 res, err := resource.New( 61 ctx, 62 resource.WithAttributes( 63 semconv.ServiceNameKey.String(serviceName), 64 ), 65 resource.WithFromEnv(), 66 ) 67 if err != nil { 68 return nil, fmt.Errorf("failed to create resource: %w", err) 69 } 70 71 // OLTP trace gRPC client initialization. Connection parameters for the exporter are extracted 72 // from environment variables. e.g.: `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`. 73 // 74 // For more information, see OpenTelemetry specification: 75 // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.12.0/specification/protocol/exporter.md 76 traceExporter, err := otlptracegrpc.New(ctx) 77 if err != nil { 78 return nil, fmt.Errorf("failed to create trace exporter: %w", err) 79 } 80 81 tracerProvider := sdktrace.NewTracerProvider( 82 sdktrace.WithResource(res), 83 sdktrace.WithBatcher(traceExporter), 84 ) 85 86 otel.SetTracerProvider(tracerProvider) 87 otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) { 88 log.Debug().Err(err).Msg("tracing error") 89 })) 90 91 spanCache, err := lru.New[flow.Identifier, trace.Span](int(DefaultEntityCacheSize)) 92 if err != nil { 93 return nil, err 94 } 95 96 return &Tracer{ 97 tracer: tracerProvider.Tracer(""), 98 shutdown: tracerProvider.Shutdown, 99 log: log, 100 spanCache: spanCache, 101 sensitivity: sensitivity, 102 chainID: chainID, 103 }, nil 104 } 105 106 // Ready returns a channel that will close when the network stack is ready. 107 func (t *Tracer) Ready() <-chan struct{} { 108 ready := make(chan struct{}) 109 close(ready) 110 return ready 111 } 112 113 // Done returns a channel that will close when shutdown is complete. 114 func (t *Tracer) Done() <-chan struct{} { 115 done := make(chan struct{}) 116 go func() { 117 ctx, cancel := context.WithTimeout(context.Background(), time.Second) 118 defer cancel() 119 120 if err := t.shutdown(ctx); err != nil { 121 t.log.Error().Err(err).Msg("failed to shutdown tracer") 122 } 123 124 t.spanCache.Purge() 125 close(done) 126 }() 127 return done 128 } 129 130 func (t *Tracer) startEntitySpan( 131 ctx context.Context, 132 entityID flow.Identifier, 133 entityType string, 134 spanName SpanName, 135 opts ...trace.SpanStartOption, 136 ) ( 137 trace.Span, 138 context.Context, 139 ) { 140 if !t.ShouldSample(entityID) { 141 return NoopSpan, ctx 142 } 143 144 ctx, rootSpan := t.entityRootSpan(ctx, entityID, entityType) 145 return t.StartSpanFromParent(rootSpan, spanName, opts...), ctx 146 } 147 148 // entityRootSpan returns the root span for the given entity from the cache 149 // and if not exist it would construct it and cache it and return it 150 // This should be used mostly for the very first span created for an entity on the service 151 func (t *Tracer) entityRootSpan( 152 ctx context.Context, 153 entityID flow.Identifier, 154 entityType string, 155 ) ( 156 context.Context, 157 trace.Span, 158 ) { 159 if c, ok := t.spanCache.Get(entityID); ok { 160 return trace.ContextWithSpan(ctx, c), c 161 } 162 163 traceID := (*trace.TraceID)(entityID[:16]) 164 spanConfig := trace.SpanContextConfig{ 165 TraceID: *traceID, 166 TraceFlags: trace.TraceFlags(0).WithSampled(true), 167 } 168 ctx = trace.ContextWithSpanContext(ctx, trace.NewSpanContext(spanConfig)) 169 ctx, span := t.tracer.Start(ctx, string(entityType)) 170 171 span.SetAttributes( 172 attribute.String("entity_id", entityID.String()), 173 attribute.String("chainID", t.chainID), 174 ) 175 t.spanCache.Add(entityID, span) 176 177 span.End() // end span right away 178 return ctx, span 179 } 180 181 func (t *Tracer) BlockRootSpan(blockID flow.Identifier) trace.Span { 182 _, span := t.entityRootSpan(context.Background(), blockID, EntityTypeBlock) 183 return span 184 } 185 186 func (t *Tracer) StartBlockSpan( 187 ctx context.Context, 188 blockID flow.Identifier, 189 spanName SpanName, 190 opts ...trace.SpanStartOption, 191 ) ( 192 trace.Span, 193 context.Context, 194 ) { 195 return t.startEntitySpan(ctx, blockID, EntityTypeBlock, spanName, opts...) 196 } 197 198 func (t *Tracer) StartCollectionSpan( 199 ctx context.Context, 200 collectionID flow.Identifier, 201 spanName SpanName, 202 opts ...trace.SpanStartOption, 203 ) ( 204 trace.Span, 205 context.Context, 206 ) { 207 return t.startEntitySpan(ctx, collectionID, EntityTypeCollection, spanName, opts...) 208 } 209 210 func (t *Tracer) StartSpanFromContext( 211 ctx context.Context, 212 operationName SpanName, 213 opts ...trace.SpanStartOption, 214 ) ( 215 trace.Span, 216 context.Context, 217 ) { 218 ctx, span := t.tracer.Start(ctx, string(operationName), opts...) 219 return span, ctx 220 } 221 222 func (t *Tracer) StartSpanFromParent( 223 parentSpan trace.Span, 224 operationName SpanName, 225 opts ...trace.SpanStartOption, 226 ) trace.Span { 227 if !IsSampled(parentSpan) { 228 return NoopSpan 229 } 230 231 ctx := trace.ContextWithSpan(context.Background(), parentSpan) 232 _, span := t.tracer.Start(ctx, string(operationName), opts...) 233 return span 234 } 235 236 func (t *Tracer) ShouldSample(entityID flow.Identifier) bool { 237 return entityID.IsSampled(t.sensitivity) 238 } 239 240 func (t *Tracer) StartSampledSpanFromParent( 241 parentSpan trace.Span, 242 entityID flow.Identifier, 243 operationName SpanName, 244 opts ...trace.SpanStartOption, 245 ) trace.Span { 246 if !t.ShouldSample(entityID) { 247 return NoopSpan 248 } 249 250 return t.StartSpanFromParent(parentSpan, operationName, opts...) 251 } 252 253 func (t *Tracer) RecordSpanFromParent( 254 parentSpan trace.Span, 255 operationName SpanName, 256 duration time.Duration, 257 attrs []attribute.KeyValue, 258 opts ...trace.SpanStartOption, 259 ) { 260 if !IsSampled(parentSpan) { 261 return 262 } 263 end := time.Now() 264 start := end.Add(-duration) 265 ctx := trace.ContextWithSpanContext(context.Background(), parentSpan.SpanContext()) 266 opts = append(opts, 267 trace.WithAttributes(attrs...), 268 trace.WithTimestamp(start), 269 ) 270 _, span := t.tracer.Start(ctx, string(operationName), opts...) 271 span.End(trace.WithTimestamp(end)) 272 } 273 274 // WithSpanFromContext encapsulates executing a function within an span, i.e., it starts a span with the specified SpanName from the context, 275 // executes the function f, and finishes the span once the function returns. 276 func (t *Tracer) WithSpanFromContext(ctx context.Context, 277 operationName SpanName, 278 f func(), 279 opts ...trace.SpanStartOption, 280 ) { 281 span, _ := t.StartSpanFromContext(ctx, operationName, opts...) 282 defer span.End() 283 284 f() 285 }