github.com/rudderlabs/rudder-go-kit@v0.30.0/stats/internal/otel/otel.go (about) 1 package otel 2 3 import ( 4 "context" 5 "fmt" 6 "time" 7 8 promClient "github.com/prometheus/client_golang/prometheus" 9 "go.opentelemetry.io/otel" 10 "go.opentelemetry.io/otel/attribute" 11 "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" 12 "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" 13 "go.opentelemetry.io/otel/exporters/zipkin" 14 "go.opentelemetry.io/otel/propagation" 15 sdkmetric "go.opentelemetry.io/otel/sdk/metric" 16 "go.opentelemetry.io/otel/sdk/resource" 17 sdktrace "go.opentelemetry.io/otel/sdk/trace" 18 semconv "go.opentelemetry.io/otel/semconv/v1.24.0" 19 "golang.org/x/sync/errgroup" 20 21 "github.com/rudderlabs/rudder-go-kit/stats/internal/otel/prometheus" 22 ) 23 24 // DefaultRetryConfig represents the default retry configuration 25 var DefaultRetryConfig = RetryConfig{ 26 Enabled: true, 27 InitialInterval: 5 * time.Second, 28 MaxInterval: 30 * time.Second, 29 MaxElapsedTime: time.Minute, 30 } 31 32 type Manager struct { 33 tp *sdktrace.TracerProvider 34 mp *sdkmetric.MeterProvider 35 } 36 37 // Setup simplifies the creation of tracer and meter providers with GRPC 38 func (m *Manager) Setup( 39 ctx context.Context, res *resource.Resource, opts ...Option, 40 ) ( 41 *sdktrace.TracerProvider, 42 *sdkmetric.MeterProvider, 43 error, 44 ) { 45 var c config 46 for _, opt := range opts { 47 opt(&c) 48 } 49 if c.retryConfig == nil { 50 c.retryConfig = &DefaultRetryConfig 51 } 52 if c.logger == nil { 53 c.logger = nopLogger{} 54 } 55 56 if !c.tracerProviderConfig.enabled && !c.meterProviderConfig.enabled { 57 return nil, nil, fmt.Errorf("no trace provider or meter provider to initialize") 58 } 59 60 if c.tracerProviderConfig.enabled { 61 if c.tracerProviderConfig.customSpanExporter != nil { 62 m.tp = sdktrace.NewTracerProvider(m.buildTracerProviderOptions( 63 &c, res, c.tracerProviderConfig.customSpanExporter)..., 64 ) 65 } else if c.tracerProviderConfig.withZipkin { 66 traceExporter, err := zipkin.New(c.tracesEndpoint) 67 if err != nil { 68 return nil, nil, fmt.Errorf("failed to create zipkin trace exporter: %w", err) 69 } 70 71 m.tp = sdktrace.NewTracerProvider(m.buildTracerProviderOptions(&c, res, traceExporter)...) 72 } else { 73 tracerProviderOptions := []otlptracegrpc.Option{ 74 otlptracegrpc.WithEndpoint(c.tracesEndpoint), 75 otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{ 76 Enabled: c.retryConfig.Enabled, 77 InitialInterval: c.retryConfig.InitialInterval, 78 MaxInterval: c.retryConfig.MaxInterval, 79 MaxElapsedTime: c.retryConfig.MaxElapsedTime, 80 }), 81 } 82 if c.withInsecure { 83 tracerProviderOptions = append(tracerProviderOptions, otlptracegrpc.WithInsecure()) 84 } 85 traceExporter, err := otlptracegrpc.New(ctx, tracerProviderOptions...) 86 if err != nil { 87 return nil, nil, fmt.Errorf("failed to create trace exporter: %w", err) 88 } 89 90 m.tp = sdktrace.NewTracerProvider(m.buildTracerProviderOptions(&c, res, traceExporter)...) 91 } 92 93 if c.tracerProviderConfig.textMapPropagator != nil { 94 otel.SetTextMapPropagator(c.tracerProviderConfig.textMapPropagator) 95 } 96 97 if c.tracerProviderConfig.global { 98 otel.SetTracerProvider(m.tp) 99 } 100 } 101 102 if c.meterProviderConfig.enabled { 103 var err error 104 m.mp, err = m.buildMeterProvider(ctx, c, res) 105 if err != nil { 106 return nil, nil, err 107 } 108 if c.meterProviderConfig.global { 109 otel.SetMeterProvider(m.mp) 110 } 111 } 112 113 return m.tp, m.mp, nil 114 } 115 116 func (m *Manager) buildTracerProviderOptions( 117 c *config, 118 res *resource.Resource, exp sdktrace.SpanExporter, 119 ) []sdktrace.TracerProviderOption { 120 opts := []sdktrace.TracerProviderOption{ 121 sdktrace.WithResource(res), 122 sdktrace.WithSampler(sdktrace.TraceIDRatioBased(c.tracerProviderConfig.samplingRate)), 123 } 124 125 if c.tracerProviderConfig.withSyncer { 126 opts = append(opts, sdktrace.WithSyncer(exp)) 127 } else { 128 opts = append(opts, sdktrace.WithSpanProcessor(sdktrace.NewBatchSpanProcessor(exp))) 129 } 130 131 return opts 132 } 133 134 func (m *Manager) buildMeterProvider( 135 ctx context.Context, c config, res *resource.Resource, 136 ) (*sdkmetric.MeterProvider, error) { 137 if c.meterProviderConfig.grpcEndpoint == nil && c.meterProviderConfig.prometheusRegisterer == nil { 138 return nil, fmt.Errorf("no grpc endpoint or prometheus registerer to initialize meter provider") 139 } 140 if c.meterProviderConfig.grpcEndpoint != nil && c.meterProviderConfig.prometheusRegisterer != nil { 141 return nil, fmt.Errorf("cannot initialize meter provider with both grpc endpoint and prometheus registerer") 142 } 143 if c.meterProviderConfig.prometheusRegisterer != nil { 144 return m.buildPrometheusMeterProvider(c, res) 145 } 146 return m.buildOTLPMeterProvider(ctx, c, res) 147 } 148 149 func (m *Manager) buildPrometheusMeterProvider(c config, res *resource.Resource) (*sdkmetric.MeterProvider, error) { 150 exporterOptions := []prometheus.Option{ 151 prometheus.WithRegisterer(c.meterProviderConfig.prometheusRegisterer), 152 prometheus.WithLogger(c.logger), 153 } 154 exp, err := prometheus.New(exporterOptions...) 155 if err != nil { 156 return nil, fmt.Errorf("prometheus: failed to create metric exporter: %w", err) 157 } 158 159 return sdkmetric.NewMeterProvider(m.getMeterProviderOptions(c, res, exp)...), nil 160 } 161 162 func (m *Manager) buildOTLPMeterProvider( 163 ctx context.Context, c config, res *resource.Resource, 164 ) (*sdkmetric.MeterProvider, error) { 165 opts := []otlpmetricgrpc.Option{ 166 otlpmetricgrpc.WithEndpoint(*c.meterProviderConfig.grpcEndpoint), 167 otlpmetricgrpc.WithRetry(otlpmetricgrpc.RetryConfig{ 168 Enabled: c.retryConfig.Enabled, 169 InitialInterval: c.retryConfig.InitialInterval, 170 MaxInterval: c.retryConfig.MaxInterval, 171 MaxElapsedTime: c.retryConfig.MaxElapsedTime, 172 }), 173 } 174 if c.withInsecure { 175 opts = append(opts, otlpmetricgrpc.WithInsecure()) 176 } 177 if len(c.meterProviderConfig.otlpMetricGRPCOptions) > 0 { 178 opts = append(opts, c.meterProviderConfig.otlpMetricGRPCOptions...) 179 } 180 exp, err := otlpmetricgrpc.New(ctx, opts...) 181 if err != nil { 182 return nil, fmt.Errorf("otlp: failed to create metric exporter: %w", err) 183 } 184 185 reader := sdkmetric.NewPeriodicReader( 186 exp, 187 sdkmetric.WithInterval(c.meterProviderConfig.exportsInterval), 188 ) 189 190 return sdkmetric.NewMeterProvider(m.getMeterProviderOptions(c, res, reader)...), nil 191 } 192 193 func (m *Manager) getMeterProviderOptions(c config, res *resource.Resource, r sdkmetric.Reader) []sdkmetric.Option { 194 opts := []sdkmetric.Option{ 195 sdkmetric.WithResource(res), 196 sdkmetric.WithReader(r), 197 } 198 var views []sdkmetric.View 199 if len(c.meterProviderConfig.views) > 0 { 200 views = append(views, c.meterProviderConfig.views...) 201 } 202 if c.meterProviderConfig.defaultHistogramBuckets != nil { 203 views = append(views, c.meterProviderConfig.defaultHistogramBuckets) 204 } 205 if len(views) > 0 { 206 opts = append(opts, sdkmetric.WithView(views...)) 207 } 208 return opts 209 } 210 211 // Shutdown allows you to gracefully clean up after the OTel manager (e.g. close underlying gRPC connection) 212 func (m *Manager) Shutdown(ctx context.Context) error { 213 var g errgroup.Group 214 if m.tp != nil { 215 g.Go(func() error { 216 return m.tp.Shutdown(ctx) 217 }) 218 } 219 if m.mp != nil { 220 g.Go(func() error { 221 return m.mp.Shutdown(ctx) 222 }) 223 } 224 225 done := make(chan error) 226 go func() { 227 done <- g.Wait() 228 close(done) 229 }() 230 231 select { 232 case <-ctx.Done(): 233 return ctx.Err() 234 case err := <-done: 235 return err 236 } 237 } 238 239 // NewResource allows the creation of an OpenTelemetry resource 240 // https://opentelemetry.io/docs/concepts/glossary/#resource 241 func NewResource(svcName, svcVersion string, attrs ...attribute.KeyValue) (*resource.Resource, error) { 242 defaultAttrs := []attribute.KeyValue{ 243 semconv.ServiceNameKey.String(svcName), 244 semconv.ServiceVersionKey.String(svcVersion), 245 } 246 return resource.Merge( 247 resource.Default(), 248 resource.NewWithAttributes(semconv.SchemaURL, append(defaultAttrs, attrs...)...), 249 ) 250 } 251 252 // RetryConfig defines configuration for retrying batches in case of export failure 253 // using an exponential backoff. 254 type RetryConfig struct { 255 // Enabled indicates whether to not retry sending batches in case of 256 // export failure. 257 Enabled bool 258 // InitialInterval the time to wait after the first failure before 259 // retrying. 260 InitialInterval time.Duration 261 // MaxInterval is the upper bound on backoff interval. Once this value is 262 // reached the delay between consecutive retries will always be 263 // `MaxInterval`. 264 MaxInterval time.Duration 265 // MaxElapsedTime is the maximum amount of time (including retries) spent 266 // trying to send a request/batch. Once this value is reached, the data 267 // is discarded. 268 MaxElapsedTime time.Duration 269 } 270 271 type config struct { 272 retryConfig *RetryConfig 273 withInsecure bool 274 275 tracesEndpoint string 276 tracerProviderConfig tracerProviderConfig 277 meterProviderConfig meterProviderConfig 278 279 logger logger 280 } 281 282 type tracerProviderConfig struct { 283 enabled bool 284 global bool 285 samplingRate float64 286 textMapPropagator propagation.TextMapPropagator 287 customSpanExporter SpanExporter 288 withSyncer bool 289 withZipkin bool 290 } 291 292 type meterProviderConfig struct { 293 enabled bool 294 global bool 295 exportsInterval time.Duration 296 views []sdkmetric.View 297 // defaultHistogramBuckets is not part of the above "views" because the order 298 // by which we add views matter. We have to add the default view last because the 299 // views criteria are applied in order and the default one is the more generic. 300 // Thus, if we put it first it will be applied to all histogram instruments removing 301 // the ability to customize the buckets of specific histograms. 302 defaultHistogramBuckets sdkmetric.View 303 grpcEndpoint *string 304 prometheusRegisterer promClient.Registerer 305 otlpMetricGRPCOptions []otlpmetricgrpc.Option 306 } 307 308 type logger interface { 309 Info(...interface{}) 310 Error(...interface{}) 311 } 312 313 type nopLogger struct{} 314 315 func (nopLogger) Info(...interface{}) {} 316 func (nopLogger) Error(...interface{}) {}