github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/trace/trace.go (about)

     1  package trace
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	lru "github.com/hashicorp/golang-lru/v2"
     9  	"github.com/rs/zerolog"
    10  	"go.opentelemetry.io/otel"
    11  	"go.opentelemetry.io/otel/attribute"
    12  	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
    13  	"go.opentelemetry.io/otel/sdk/resource"
    14  	sdktrace "go.opentelemetry.io/otel/sdk/trace"
    15  	semconv "go.opentelemetry.io/otel/semconv/v1.10.0"
    16  	"go.opentelemetry.io/otel/trace"
    17  
    18  	"github.com/onflow/flow-go/model/flow"
    19  )
    20  
    21  const DefaultEntityCacheSize = 1000
    22  
    23  const SensitivityCaptureAll = 0
    24  const EntityTypeBlock = "Block"
    25  const EntityTypeCollection = "Collection"
    26  const EntityTypeTransaction = "Transaction"
    27  
    28  type SpanName string
    29  
    30  func (s SpanName) Child(subOp string) SpanName {
    31  	return SpanName(string(s) + "." + subOp)
    32  }
    33  
    34  func IsSampled(span trace.Span) bool {
    35  	return span.SpanContext().IsSampled()
    36  }
    37  
    38  // Tracer is the implementation of the Tracer interface
    39  // TODO(rbtz): make private
    40  type Tracer struct {
    41  	tracer      trace.Tracer
    42  	shutdown    func(context.Context) error
    43  	log         zerolog.Logger
    44  	spanCache   *lru.Cache[flow.Identifier, trace.Span]
    45  	chainID     string
    46  	sensitivity uint
    47  }
    48  
    49  // NewTracer creates a new OpenTelemetry-based tracer.
    50  func NewTracer(
    51  	log zerolog.Logger,
    52  	serviceName string,
    53  	chainID string,
    54  	sensitivity uint,
    55  ) (
    56  	*Tracer,
    57  	error,
    58  ) {
    59  	ctx := context.TODO()
    60  	res, err := resource.New(
    61  		ctx,
    62  		resource.WithAttributes(
    63  			semconv.ServiceNameKey.String(serviceName),
    64  		),
    65  		resource.WithFromEnv(),
    66  	)
    67  	if err != nil {
    68  		return nil, fmt.Errorf("failed to create resource: %w", err)
    69  	}
    70  
    71  	// OLTP trace gRPC client initialization. Connection parameters for the exporter are extracted
    72  	// from environment variables. e.g.: `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`.
    73  	//
    74  	// For more information, see OpenTelemetry specification:
    75  	// https://github.com/open-telemetry/opentelemetry-specification/blob/v1.12.0/specification/protocol/exporter.md
    76  	traceExporter, err := otlptracegrpc.New(ctx)
    77  	if err != nil {
    78  		return nil, fmt.Errorf("failed to create trace exporter: %w", err)
    79  	}
    80  
    81  	tracerProvider := sdktrace.NewTracerProvider(
    82  		sdktrace.WithResource(res),
    83  		sdktrace.WithBatcher(traceExporter),
    84  	)
    85  
    86  	otel.SetTracerProvider(tracerProvider)
    87  	otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) {
    88  		log.Debug().Err(err).Msg("tracing error")
    89  	}))
    90  
    91  	spanCache, err := lru.New[flow.Identifier, trace.Span](int(DefaultEntityCacheSize))
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	return &Tracer{
    97  		tracer:      tracerProvider.Tracer(""),
    98  		shutdown:    tracerProvider.Shutdown,
    99  		log:         log,
   100  		spanCache:   spanCache,
   101  		sensitivity: sensitivity,
   102  		chainID:     chainID,
   103  	}, nil
   104  }
   105  
   106  // Ready returns a channel that will close when the network stack is ready.
   107  func (t *Tracer) Ready() <-chan struct{} {
   108  	ready := make(chan struct{})
   109  	close(ready)
   110  	return ready
   111  }
   112  
   113  // Done returns a channel that will close when shutdown is complete.
   114  func (t *Tracer) Done() <-chan struct{} {
   115  	done := make(chan struct{})
   116  	go func() {
   117  		ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   118  		defer cancel()
   119  
   120  		if err := t.shutdown(ctx); err != nil {
   121  			t.log.Error().Err(err).Msg("failed to shutdown tracer")
   122  		}
   123  
   124  		t.spanCache.Purge()
   125  		close(done)
   126  	}()
   127  	return done
   128  }
   129  
   130  func (t *Tracer) startEntitySpan(
   131  	ctx context.Context,
   132  	entityID flow.Identifier,
   133  	entityType string,
   134  	spanName SpanName,
   135  	opts ...trace.SpanStartOption,
   136  ) (
   137  	trace.Span,
   138  	context.Context,
   139  ) {
   140  	if !t.ShouldSample(entityID) {
   141  		return NoopSpan, ctx
   142  	}
   143  
   144  	ctx, rootSpan := t.entityRootSpan(ctx, entityID, entityType)
   145  	return t.StartSpanFromParent(rootSpan, spanName, opts...), ctx
   146  }
   147  
   148  // entityRootSpan returns the root span for the given entity from the cache
   149  // and if not exist it would construct it and cache it and return it
   150  // This should be used mostly for the very first span created for an entity on the service
   151  func (t *Tracer) entityRootSpan(
   152  	ctx context.Context,
   153  	entityID flow.Identifier,
   154  	entityType string,
   155  ) (
   156  	context.Context,
   157  	trace.Span,
   158  ) {
   159  	if c, ok := t.spanCache.Get(entityID); ok {
   160  		return trace.ContextWithSpan(ctx, c), c
   161  	}
   162  
   163  	traceID := (*trace.TraceID)(entityID[:16])
   164  	spanConfig := trace.SpanContextConfig{
   165  		TraceID:    *traceID,
   166  		TraceFlags: trace.TraceFlags(0).WithSampled(true),
   167  	}
   168  	ctx = trace.ContextWithSpanContext(ctx, trace.NewSpanContext(spanConfig))
   169  	ctx, span := t.tracer.Start(ctx, string(entityType))
   170  
   171  	span.SetAttributes(
   172  		attribute.String("entity_id", entityID.String()),
   173  		attribute.String("chainID", t.chainID),
   174  	)
   175  	t.spanCache.Add(entityID, span)
   176  
   177  	span.End() // end span right away
   178  	return ctx, span
   179  }
   180  
   181  func (t *Tracer) BlockRootSpan(blockID flow.Identifier) trace.Span {
   182  	_, span := t.entityRootSpan(context.Background(), blockID, EntityTypeBlock)
   183  	return span
   184  }
   185  
   186  func (t *Tracer) StartBlockSpan(
   187  	ctx context.Context,
   188  	blockID flow.Identifier,
   189  	spanName SpanName,
   190  	opts ...trace.SpanStartOption,
   191  ) (
   192  	trace.Span,
   193  	context.Context,
   194  ) {
   195  	return t.startEntitySpan(ctx, blockID, EntityTypeBlock, spanName, opts...)
   196  }
   197  
   198  func (t *Tracer) StartCollectionSpan(
   199  	ctx context.Context,
   200  	collectionID flow.Identifier,
   201  	spanName SpanName,
   202  	opts ...trace.SpanStartOption,
   203  ) (
   204  	trace.Span,
   205  	context.Context,
   206  ) {
   207  	return t.startEntitySpan(ctx, collectionID, EntityTypeCollection, spanName, opts...)
   208  }
   209  
   210  func (t *Tracer) StartSpanFromContext(
   211  	ctx context.Context,
   212  	operationName SpanName,
   213  	opts ...trace.SpanStartOption,
   214  ) (
   215  	trace.Span,
   216  	context.Context,
   217  ) {
   218  	ctx, span := t.tracer.Start(ctx, string(operationName), opts...)
   219  	return span, ctx
   220  }
   221  
   222  func (t *Tracer) StartSpanFromParent(
   223  	parentSpan trace.Span,
   224  	operationName SpanName,
   225  	opts ...trace.SpanStartOption,
   226  ) trace.Span {
   227  	if !IsSampled(parentSpan) {
   228  		return NoopSpan
   229  	}
   230  
   231  	ctx := trace.ContextWithSpan(context.Background(), parentSpan)
   232  	_, span := t.tracer.Start(ctx, string(operationName), opts...)
   233  	return span
   234  }
   235  
   236  func (t *Tracer) ShouldSample(entityID flow.Identifier) bool {
   237  	return entityID.IsSampled(t.sensitivity)
   238  }
   239  
   240  func (t *Tracer) StartSampledSpanFromParent(
   241  	parentSpan trace.Span,
   242  	entityID flow.Identifier,
   243  	operationName SpanName,
   244  	opts ...trace.SpanStartOption,
   245  ) trace.Span {
   246  	if !t.ShouldSample(entityID) {
   247  		return NoopSpan
   248  	}
   249  
   250  	return t.StartSpanFromParent(parentSpan, operationName, opts...)
   251  }
   252  
   253  func (t *Tracer) RecordSpanFromParent(
   254  	parentSpan trace.Span,
   255  	operationName SpanName,
   256  	duration time.Duration,
   257  	attrs []attribute.KeyValue,
   258  	opts ...trace.SpanStartOption,
   259  ) {
   260  	if !IsSampled(parentSpan) {
   261  		return
   262  	}
   263  	end := time.Now()
   264  	start := end.Add(-duration)
   265  	ctx := trace.ContextWithSpanContext(context.Background(), parentSpan.SpanContext())
   266  	opts = append(opts,
   267  		trace.WithAttributes(attrs...),
   268  		trace.WithTimestamp(start),
   269  	)
   270  	_, span := t.tracer.Start(ctx, string(operationName), opts...)
   271  	span.End(trace.WithTimestamp(end))
   272  }
   273  
   274  // WithSpanFromContext encapsulates executing a function within an span, i.e., it starts a span with the specified SpanName from the context,
   275  // executes the function f, and finishes the span once the function returns.
   276  func (t *Tracer) WithSpanFromContext(ctx context.Context,
   277  	operationName SpanName,
   278  	f func(),
   279  	opts ...trace.SpanStartOption,
   280  ) {
   281  	span, _ := t.StartSpanFromContext(ctx, operationName, opts...)
   282  	defer span.End()
   283  
   284  	f()
   285  }