github.com/koko1123/flow-go-1@v0.29.6/module/trace/trace.go (about)

     1  package trace
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	lru "github.com/hashicorp/golang-lru"
     9  	"github.com/rs/zerolog"
    10  	"go.opentelemetry.io/otel"
    11  	"go.opentelemetry.io/otel/attribute"
    12  	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
    13  	"go.opentelemetry.io/otel/sdk/resource"
    14  	sdktrace "go.opentelemetry.io/otel/sdk/trace"
    15  	semconv "go.opentelemetry.io/otel/semconv/v1.10.0"
    16  	"go.opentelemetry.io/otel/trace"
    17  
    18  	"github.com/koko1123/flow-go-1/model/flow"
    19  )
    20  
    21  const DefaultEntityCacheSize = 1000
    22  
    23  const SensitivityCaptureAll = 0
    24  const EntityTypeBlock = "Block"
    25  const EntityTypeCollection = "Collection"
    26  const EntityTypeTransaction = "Transaction"
    27  
    28  type SpanName string
    29  
    30  func (s SpanName) Child(subOp string) SpanName {
    31  	return SpanName(string(s) + "." + subOp)
    32  }
    33  
    34  func IsSampled(span trace.Span) bool {
    35  	return span.SpanContext().IsSampled()
    36  }
    37  
    38  // Tracer is the implementation of the Tracer interface
    39  // TODO(rbtz): make private
    40  type Tracer struct {
    41  	tracer      trace.Tracer
    42  	shutdown    func(context.Context) error
    43  	log         zerolog.Logger
    44  	spanCache   *lru.Cache
    45  	chainID     string
    46  	sensitivity uint
    47  }
    48  
    49  // NewTracer creates a new OpenTelemetry-based tracer.
    50  func NewTracer(
    51  	log zerolog.Logger,
    52  	serviceName string,
    53  	chainID string,
    54  	sensitivity uint,
    55  ) (
    56  	*Tracer,
    57  	error,
    58  ) {
    59  	ctx := context.TODO()
    60  	res, err := resource.New(
    61  		ctx,
    62  		resource.WithAttributes(
    63  			semconv.ServiceNameKey.String(serviceName),
    64  		),
    65  		resource.WithFromEnv(),
    66  	)
    67  	if err != nil {
    68  		return nil, fmt.Errorf("failed to create resource: %w", err)
    69  	}
    70  
    71  	// OLTP trace gRPC client initialization. Connection parameters for the exporter are extracted
    72  	// from environment variables. e.g.: `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`.
    73  	//
    74  	// For more information, see OpenTelemetry specification:
    75  	// https://github.com/open-telemetry/opentelemetry-specification/blob/v1.12.0/specification/protocol/exporter.md
    76  	traceExporter, err := otlptracegrpc.New(ctx)
    77  	if err != nil {
    78  		return nil, fmt.Errorf("failed to create trace exporter: %w", err)
    79  	}
    80  
    81  	tracerProvider := sdktrace.NewTracerProvider(
    82  		sdktrace.WithResource(res),
    83  		sdktrace.WithBatcher(traceExporter),
    84  	)
    85  
    86  	otel.SetTracerProvider(tracerProvider)
    87  	otel.SetErrorHandler(otel.ErrorHandlerFunc(func(err error) {
    88  		log.Debug().Err(err).Msg("tracing error")
    89  	}))
    90  
    91  	spanCache, err := lru.New(int(DefaultEntityCacheSize))
    92  	if err != nil {
    93  		return nil, err
    94  	}
    95  
    96  	return &Tracer{
    97  		tracer:      tracerProvider.Tracer(""),
    98  		shutdown:    tracerProvider.Shutdown,
    99  		log:         log,
   100  		spanCache:   spanCache,
   101  		sensitivity: sensitivity,
   102  		chainID:     chainID,
   103  	}, nil
   104  }
   105  
   106  // Ready returns a channel that will close when the network stack is ready.
   107  func (t *Tracer) Ready() <-chan struct{} {
   108  	ready := make(chan struct{})
   109  	close(ready)
   110  	return ready
   111  }
   112  
   113  // Done returns a channel that will close when shutdown is complete.
   114  func (t *Tracer) Done() <-chan struct{} {
   115  	done := make(chan struct{})
   116  	go func() {
   117  		ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   118  		defer cancel()
   119  
   120  		if err := t.shutdown(ctx); err != nil {
   121  			t.log.Error().Err(err).Msg("failed to shutdown tracer")
   122  		}
   123  
   124  		t.spanCache.Purge()
   125  		close(done)
   126  	}()
   127  	return done
   128  }
   129  
   130  func (t *Tracer) startEntitySpan(
   131  	ctx context.Context,
   132  	entityID flow.Identifier,
   133  	entityType string,
   134  	spanName SpanName,
   135  	opts ...trace.SpanStartOption,
   136  ) (
   137  	trace.Span,
   138  	context.Context,
   139  ) {
   140  	if !entityID.IsSampled(t.sensitivity) {
   141  		return NoopSpan, ctx
   142  	}
   143  
   144  	ctx, rootSpan := t.entityRootSpan(ctx, entityID, entityType)
   145  	return t.StartSpanFromParent(rootSpan, spanName, opts...), ctx
   146  }
   147  
   148  // entityRootSpan returns the root span for the given entity from the cache
   149  // and if not exist it would construct it and cache it and return it
   150  // This should be used mostly for the very first span created for an entity on the service
   151  func (t *Tracer) entityRootSpan(
   152  	ctx context.Context,
   153  	entityID flow.Identifier,
   154  	entityType string,
   155  	opts ...trace.SpanStartOption,
   156  ) (
   157  	context.Context,
   158  	trace.Span,
   159  ) {
   160  	if c, ok := t.spanCache.Get(entityID); ok {
   161  		span := c.(trace.Span)
   162  		return trace.ContextWithSpan(ctx, span), span
   163  	}
   164  
   165  	traceID := (*trace.TraceID)(entityID[:16])
   166  	spanConfig := trace.SpanContextConfig{
   167  		TraceID:    *traceID,
   168  		TraceFlags: trace.TraceFlags(0).WithSampled(true),
   169  	}
   170  	ctx = trace.ContextWithSpanContext(ctx, trace.NewSpanContext(spanConfig))
   171  	ctx, span := t.tracer.Start(ctx, string(entityType))
   172  
   173  	span.SetAttributes(
   174  		attribute.String("entity_id", entityID.String()),
   175  		attribute.String("chainID", t.chainID),
   176  	)
   177  	t.spanCache.Add(entityID, span)
   178  
   179  	span.End() // end span right away
   180  	return ctx, span
   181  }
   182  
   183  func (t *Tracer) StartBlockSpan(
   184  	ctx context.Context,
   185  	blockID flow.Identifier,
   186  	spanName SpanName,
   187  	opts ...trace.SpanStartOption,
   188  ) (
   189  	trace.Span,
   190  	context.Context,
   191  ) {
   192  	return t.startEntitySpan(ctx, blockID, EntityTypeBlock, spanName, opts...)
   193  }
   194  
   195  func (t *Tracer) StartCollectionSpan(
   196  	ctx context.Context,
   197  	collectionID flow.Identifier,
   198  	spanName SpanName,
   199  	opts ...trace.SpanStartOption,
   200  ) (
   201  	trace.Span,
   202  	context.Context,
   203  ) {
   204  	return t.startEntitySpan(ctx, collectionID, EntityTypeCollection, spanName, opts...)
   205  }
   206  
   207  // StartTransactionSpan starts a span that will be aggregated under the given
   208  // transaction.
   209  // All spans for the same transaction will be aggregated under a root span
   210  func (t *Tracer) StartTransactionSpan(
   211  	ctx context.Context,
   212  	transactionID flow.Identifier,
   213  	spanName SpanName,
   214  	opts ...trace.SpanStartOption,
   215  ) (
   216  	trace.Span,
   217  	context.Context,
   218  ) {
   219  	return t.startEntitySpan(ctx, transactionID, EntityTypeTransaction, spanName, opts...)
   220  }
   221  
   222  func (t *Tracer) StartSpanFromContext(
   223  	ctx context.Context,
   224  	operationName SpanName,
   225  	opts ...trace.SpanStartOption,
   226  ) (
   227  	trace.Span,
   228  	context.Context,
   229  ) {
   230  	ctx, span := t.tracer.Start(ctx, string(operationName), opts...)
   231  	return span, ctx
   232  }
   233  
   234  func (t *Tracer) StartSpanFromParent(
   235  	parentSpan trace.Span,
   236  	operationName SpanName,
   237  	opts ...trace.SpanStartOption,
   238  ) trace.Span {
   239  	if !IsSampled(parentSpan) {
   240  		return NoopSpan
   241  	}
   242  	ctx := trace.ContextWithSpan(context.Background(), parentSpan)
   243  	_, span := t.tracer.Start(ctx, string(operationName), opts...)
   244  	return span
   245  }
   246  
   247  func (t *Tracer) RecordSpanFromParent(
   248  	parentSpan trace.Span,
   249  	operationName SpanName,
   250  	duration time.Duration,
   251  	attrs []attribute.KeyValue,
   252  	opts ...trace.SpanStartOption,
   253  ) {
   254  	if !IsSampled(parentSpan) {
   255  		return
   256  	}
   257  	end := time.Now()
   258  	start := end.Add(-duration)
   259  	ctx := trace.ContextWithSpanContext(context.Background(), parentSpan.SpanContext())
   260  	opts = append(opts,
   261  		trace.WithAttributes(attrs...),
   262  		trace.WithTimestamp(start),
   263  	)
   264  	_, span := t.tracer.Start(ctx, string(operationName), opts...)
   265  	span.End(trace.WithTimestamp(end))
   266  }
   267  
   268  // WithSpanFromContext encapsulates executing a function within an span, i.e., it starts a span with the specified SpanName from the context,
   269  // executes the function f, and finishes the span once the function returns.
   270  func (t *Tracer) WithSpanFromContext(ctx context.Context,
   271  	operationName SpanName,
   272  	f func(),
   273  	opts ...trace.SpanStartOption,
   274  ) {
   275  	span, _ := t.StartSpanFromContext(ctx, operationName, opts...)
   276  	defer span.End()
   277  
   278  	f()
   279  }