github.com/pachyderm/pachyderm@v1.13.4/src/client/pkg/tracing/tracing.go (about)

     1  package tracing
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	otgrpc "github.com/opentracing-contrib/go-grpc"
    13  	opentracing "github.com/opentracing/opentracing-go"
    14  	log "github.com/sirupsen/logrus"
    15  	"github.com/uber/jaeger-client-go"
    16  	jaegercfg "github.com/uber/jaeger-client-go/config"
    17  	"google.golang.org/grpc"
    18  )
    19  
    20  // JaegerServiceName is the name pachd (and the pachyderm client) uses to
    21  // describe itself when it reports traces to Jaeger
    22  const JaegerServiceName = "pachd"
    23  
    24  // If you have Jaeger deployed and the JAEGER_ENDPOINT environment variable set
    25  // to the address of your Jaeger instance's HTTP collection API, setting this
    26  // environment variable to "true" will cause pachyderm to attach a Jaeger trace
    27  // to any RPCs that it sends (this is primarily intended to be set in pachctl
    28  // though any binary that includes our go client library will be able to use
    29  // this env var)
    30  //
    31  // Note that tracing calls can slow them down somewhat and make interesting
    32  // traces hard to find in Jaeger, so you may not want this variable set for
    33  // every call.
    34  const jaegerEndpointEnvVar = "JAEGER_ENDPOINT"
    35  
    36  // ShortTraceEnvVar is what the client reads to decide whether to send a trace.
    37  // Below, this is implemented by the span inclusion func
    38  // addTraceIfTracingEnabled() (which is itself used by the GRPC interceptor)
    39  const ShortTraceEnvVar = "PACH_TRACE"
    40  
    41  // jaegerOnce is used to ensure that the Jaeger tracer is only initialized once
    42  var jaegerOnce sync.Once
    43  
    44  // jaegerEndpoint is set using jaegerOnce on startup, and then returned by
    45  // future calls to InstallJaegerTracerFromEnv
    46  var jaegerEndpoint string
    47  
    48  // TagAnySpan tags any span associated with 'spanBox' (which must be either a
    49  // span itself or a context.Context) with 'kvs'
    50  func TagAnySpan(spanBox interface{}, kvs ...interface{}) opentracing.Span {
    51  	if spanBox == nil {
    52  		return nil
    53  	}
    54  
    55  	// extract span from 'spanBox'
    56  	var span opentracing.Span
    57  	switch v := spanBox.(type) {
    58  	case opentracing.Span:
    59  		span = v
    60  	case context.Context:
    61  		span = opentracing.SpanFromContext(v) // may return nil
    62  	default:
    63  		log.Errorf("invalid type %T passed to TagAnySpan", spanBox)
    64  	}
    65  	if span == nil {
    66  		return nil
    67  	}
    68  
    69  	// tag 'span'
    70  	for i := 0; i < len(kvs); i += 2 {
    71  		if len(kvs) == i+1 {
    72  			span = span.SetTag("extra", kvs[i]) // likely forgot key or value--best effort
    73  			break
    74  		}
    75  		if key, ok := kvs[i].(string); ok {
    76  			span = span.SetTag(key, kvs[i+1]) // common case -- skip printf
    77  		} else {
    78  			span = span.SetTag(fmt.Sprintf("%v", kvs[i]), kvs[i+1])
    79  		}
    80  	}
    81  	return span
    82  }
    83  
    84  // AddSpanToAnyExisting checks 'ctx' for Jaeger tracing information, and if
    85  // tracing metadata is present, it generates a new span for 'operation', marks
    86  // it as a child of the existing span, and returns it.
    87  func AddSpanToAnyExisting(ctx context.Context, operation string, kvs ...interface{}) (opentracing.Span, context.Context) {
    88  	if parentSpan := opentracing.SpanFromContext(ctx); parentSpan != nil {
    89  		span := opentracing.StartSpan(operation, opentracing.ChildOf(parentSpan.Context()))
    90  		span = TagAnySpan(span, kvs...)
    91  		return span, opentracing.ContextWithSpan(ctx, span)
    92  	}
    93  	return nil, ctx
    94  }
    95  
    96  // FinishAnySpan calls span.Finish() if span is not nil. Pairs with
    97  // AddSpanToAnyExisting
    98  func FinishAnySpan(span opentracing.Span, kvs ...interface{}) {
    99  	span = TagAnySpan(span, kvs...)
   100  	if span != nil {
   101  		span.Finish()
   102  	}
   103  }
   104  
   105  // InstallJaegerTracerFromEnv installs a Jaeger client as the opentracing global
   106  // tracer, relying on environment variables to configure the client
   107  func InstallJaegerTracerFromEnv() string {
   108  	jaegerOnce.Do(func() {
   109  		var onUserMachine bool
   110  		jaegerEndpoint, onUserMachine = os.LookupEnv(jaegerEndpointEnvVar)
   111  		if !onUserMachine {
   112  			if host, ok := os.LookupEnv("JAEGER_COLLECTOR_SERVICE_HOST"); ok {
   113  				port := os.Getenv("JAEGER_COLLECTOR_SERVICE_PORT_JAEGER_COLLECTOR_HTTP")
   114  				jaegerEndpoint = fmt.Sprintf("%s:%s", host, port)
   115  			}
   116  		}
   117  		if jaegerEndpoint == "" {
   118  			return // break early -- not using Jaeger
   119  		}
   120  
   121  		// canonicalize jaegerEndpoint as http://<hostport>/api/traces
   122  		jaegerEndpoint = strings.TrimPrefix(jaegerEndpoint, "http://")
   123  		jaegerEndpoint = strings.TrimSuffix(jaegerEndpoint, "/api/traces")
   124  		jaegerEndpoint = fmt.Sprintf("http://%s/api/traces", jaegerEndpoint)
   125  		cfg := jaegercfg.Configuration{
   126  			ServiceName: JaegerServiceName,
   127  			// Configure Jaeger to sample every call, but use the SpanInclusionFunc
   128  			// addTraceIfTracingEnabled (defined below) to skip sampling every RPC
   129  			// unless the PACH_TRACE environment variable is set
   130  			Sampler: &jaegercfg.SamplerConfig{
   131  				Type:  "const",
   132  				Param: 1,
   133  			},
   134  			Reporter: &jaegercfg.ReporterConfig{
   135  				LogSpans:            true,
   136  				BufferFlushInterval: 1 * time.Second,
   137  				CollectorEndpoint:   jaegerEndpoint,
   138  			},
   139  		}
   140  
   141  		// configure jaeger logger
   142  		logger := jaeger.Logger(jaeger.NullLogger)
   143  		if !onUserMachine {
   144  			logger = jaeger.StdLogger
   145  		}
   146  
   147  		// Hack: ignore second argument (io.Closer) because the Jaeger
   148  		// implementation of opentracing.Tracer also implements io.Closer (i.e. the
   149  		// first and second return values from cfg.New(), here, are two interfaces
   150  		// that wrap the same underlying type). Instead of storing the second return
   151  		// value here, just cast the tracer to io.Closer in CloseAndReportTraces()
   152  		// (below) and call 'Close()' on it there.
   153  		tracer, _, err := cfg.NewTracer(jaegercfg.Logger(logger))
   154  		if err != nil {
   155  			log.Errorf("jaeger-collector service is deployed, but Pachyderm could not install Jaeger tracer: %v", err)
   156  			return
   157  		}
   158  		opentracing.SetGlobalTracer(tracer)
   159  	})
   160  	return jaegerEndpoint
   161  }
   162  
   163  // addTraceIfTracingEnabled is an otgrpc span inclusion func that propagates
   164  // existing traces, but won't start any new ones
   165  func addTraceIfTracingEnabled(
   166  	parentSpanCtx opentracing.SpanContext,
   167  	method string,
   168  	req, resp interface{}) bool {
   169  	// Always trace if PACH_TRACE is on
   170  	if _, shortTracingOn := os.LookupEnv(ShortTraceEnvVar); shortTracingOn {
   171  		if !IsActive() {
   172  			log.Error("PACH_TRACE is set, indicating tracing is requested, but no connection to Jaeger has been established")
   173  		}
   174  		return true
   175  	}
   176  
   177  	// Otherwise, only propagate an existing trace
   178  	if parentSpanCtx == nil {
   179  		return false
   180  	}
   181  	if jaegerCtx, ok := parentSpanCtx.(jaeger.SpanContext); ok {
   182  		return jaegerCtx.IsValid()
   183  	}
   184  	// Non-Jaeger context. This shouldn't happen, unless some Pachyderm user is
   185  	// propagating e.g. Zipkin traces through the Pachyderm client. In that
   186  	// case, we wouldn't know where to report traces anyway
   187  	return false
   188  }
   189  
   190  // IsActive returns true if a connection to Jaeger has been established and a
   191  // global tracer has been installed
   192  func IsActive() bool {
   193  	return opentracing.IsGlobalTracerRegistered()
   194  }
   195  
   196  // UnaryClientInterceptor returns a GRPC interceptor for non-streaming GRPC RPCs
   197  func UnaryClientInterceptor() grpc.UnaryClientInterceptor {
   198  	return otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer(),
   199  		otgrpc.IncludingSpans(otgrpc.SpanInclusionFunc(addTraceIfTracingEnabled)))
   200  }
   201  
   202  // StreamClientInterceptor returns a GRPC interceptor for non-streaming GRPC RPCs
   203  func StreamClientInterceptor() grpc.StreamClientInterceptor {
   204  	return otgrpc.OpenTracingStreamClientInterceptor(opentracing.GlobalTracer(),
   205  		otgrpc.IncludingSpans(otgrpc.SpanInclusionFunc(addTraceIfTracingEnabled)))
   206  }
   207  
   208  // UnaryServerInterceptor returns a GRPC interceptor for non-streaming GRPC RPCs
   209  func UnaryServerInterceptor() grpc.UnaryServerInterceptor {
   210  	return otgrpc.OpenTracingServerInterceptor(opentracing.GlobalTracer(),
   211  		otgrpc.IncludingSpans(otgrpc.SpanInclusionFunc(addTraceIfTracingEnabled)))
   212  }
   213  
   214  // StreamServerInterceptor returns a GRPC interceptor for non-streaming GRPC RPCs
   215  func StreamServerInterceptor() grpc.StreamServerInterceptor {
   216  	return otgrpc.OpenTracingStreamServerInterceptor(opentracing.GlobalTracer(),
   217  		otgrpc.IncludingSpans(otgrpc.SpanInclusionFunc(addTraceIfTracingEnabled)))
   218  }
   219  
   220  // CloseAndReportTraces tries to close the global tracer, which, in the case of
   221  // the Jaeger tracer, causes it to send any unreported traces to the collector
   222  func CloseAndReportTraces() {
   223  	if c, ok := opentracing.GlobalTracer().(io.Closer); ok {
   224  		c.Close()
   225  	}
   226  }