go.temporal.io/server@v1.23.0/common/telemetry/config.go (about) 1 // The MIT License 2 // 3 // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. 4 // 5 // Copyright (c) 2020 Uber Technologies, Inc. 6 // 7 // Permission is hereby granted, free of charge, to any person obtaining a copy 8 // of this software and associated documentation files (the "Software"), to deal 9 // in the Software without restriction, including without limitation the rights 10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 // copies of the Software, and to permit persons to whom the Software is 12 // furnished to do so, subject to the following conditions: 13 // 14 // The above copyright notice and this permission notice shall be included in 15 // all copies or substantial portions of the Software. 16 // 17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 // THE SOFTWARE. 24 25 package telemetry 26 27 import ( 28 "context" 29 "fmt" 30 "strings" 31 "sync" 32 "time" 33 34 "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" 35 "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" 36 "go.opentelemetry.io/otel/sdk/metric" 37 otelsdktrace "go.opentelemetry.io/otel/sdk/trace" 38 "google.golang.org/grpc" 39 "google.golang.org/grpc/backoff" 40 "google.golang.org/grpc/credentials/insecure" 41 "gopkg.in/yaml.v3" 42 43 "go.temporal.io/server/common/util" 44 ) 45 46 const ( 47 // the following defaults were taken from the grpc docs as of grpc v1.46. 48 // they are not available programatically 49 50 defaultReadBufferSize = 32 * 1024 51 defaultWriteBufferSize = 32 * 1024 52 defaultMinConnectTimeout = 10 * time.Second 53 54 // the following defaults were taken from the otel library as of v1.7. 55 // they are not available programatically 56 57 retryDefaultEnabled = true 58 retryDefaultInitialInterval = 5 * time.Second 59 retryDefaultMaxInterval = 30 * time.Second 60 retryDefaultMaxElapsedTime = 1 * time.Minute 61 ) 62 63 type ( 64 metadata struct { 65 Name string 66 Labels map[string]string 67 } 68 69 connection struct { 70 Kind string 71 Metadata metadata 72 Spec interface{} `yaml:"-"` 73 } 74 75 grpcconn struct { 76 Endpoint string 77 Block bool 78 ConnectParams struct { 79 MinConnectTimeout time.Duration `yaml:"min_connect_timeout"` 80 Backoff struct { 81 BaseDelay time.Duration `yaml:"base_delay"` 82 Multiplier float64 83 Jitter float64 84 MaxDelay time.Duration `yaml:"max_delay"` 85 } 86 } `yaml:"connect_params"` 87 UserAgent string `yaml:"user_agent"` 88 ReadBufferSize int `yaml:"read_buffer_size"` 89 WriteBufferSize int `yaml:"write_buffer_size"` 90 Authority string 91 Insecure bool 92 93 cc *grpc.ClientConn 94 } 95 96 exporter struct { 97 Kind struct { 98 Signal string 99 Model string 100 Protocol string 101 } 102 Metadata metadata 103 Spec interface{} `yaml:"-"` 104 } 105 106 otlpGrpcExporter struct { 107 ConnectionName string `yaml:"connection_name"` 108 Connection grpcconn 109 Headers map[string]string 110 Timeout time.Duration 111 Retry struct { 112 Enabled bool 113 InitialInterval time.Duration `yaml:"initial_interval"` 114 MaxInterval time.Duration `yaml:"max_interval"` 115 MaxElapsedTime time.Duration `yaml:"max_elapsed_time"` 116 } 117 } 118 119 otlpGrpcSpanExporter struct { 120 otlpGrpcExporter `yaml:",inline"` 121 } 122 otlpGrpcMetricExporter struct { 123 otlpGrpcExporter `yaml:",inline"` 124 } 125 126 exportConfig struct { 127 Connections []connection 128 Exporters []exporter 129 } 130 131 // sharedConnSpanExporter and sharedConnMetricExporter exist to wrap a span 132 // exporter that uses a shared *grpc.ClientConn so that the grpc.Dial call 133 // doesn't happen until Start() is called. Without this wrapper the 134 // grpc.ClientConn (which can only be created via grpc.Dial or 135 // grpc.DialContext) would need to exist at _construction_ time, meaning 136 // that we would need to dial at construction rather then during the start 137 // phase. 138 139 sharedConnSpanExporter struct { 140 baseOpts []otlptracegrpc.Option 141 dialer interface { 142 Dial(context.Context) (*grpc.ClientConn, error) 143 } 144 startOnce sync.Once 145 otelsdktrace.SpanExporter 146 } 147 148 sharedConnMetricExporter struct { 149 baseOpts []otlpmetricgrpc.Option 150 dialer interface { 151 Dial(context.Context) (*grpc.ClientConn, error) 152 } 153 startOnce sync.Once 154 metric.Exporter 155 } 156 157 // ExportConfig represents YAML structured configuration for a set of OTEL 158 // trace/span/log exporters. 159 ExportConfig struct { 160 inner exportConfig `yaml:",inline"` 161 } 162 ) 163 164 // UnmarshalYAML loads the state of an ExportConfig from parsed YAML 165 func (ec *ExportConfig) UnmarshalYAML(n *yaml.Node) error { 166 return n.Decode(&ec.inner) 167 } 168 169 func (ec *ExportConfig) SpanExporters() ([]otelsdktrace.SpanExporter, error) { 170 return ec.inner.SpanExporters() 171 } 172 173 func (ec *ExportConfig) MetricExporters() ([]metric.Exporter, error) { 174 return ec.inner.MetricExporters() 175 } 176 177 // Dial returns the cached *grpc.ClientConn instance or creates a new one, 178 // caches and then returns it. This function is not threadsafe. 179 func (g *grpcconn) Dial(ctx context.Context) (*grpc.ClientConn, error) { 180 var err error 181 if g.cc == nil { 182 g.cc, err = grpc.DialContext(ctx, g.Endpoint, g.dialOpts()...) 183 } 184 return g.cc, err 185 } 186 187 func (g *grpcconn) dialOpts() []grpc.DialOption { 188 out := []grpc.DialOption{ 189 grpc.WithReadBufferSize(util.Coalesce(g.ReadBufferSize, defaultReadBufferSize)), 190 grpc.WithWriteBufferSize(util.Coalesce(g.WriteBufferSize, defaultWriteBufferSize)), 191 grpc.WithUserAgent(g.UserAgent), 192 grpc.WithConnectParams(grpc.ConnectParams{ 193 MinConnectTimeout: util.Coalesce(g.ConnectParams.MinConnectTimeout, defaultMinConnectTimeout), 194 Backoff: backoff.Config{ 195 BaseDelay: util.Coalesce(g.ConnectParams.Backoff.BaseDelay, backoff.DefaultConfig.BaseDelay), 196 MaxDelay: util.Coalesce(g.ConnectParams.Backoff.MaxDelay, backoff.DefaultConfig.MaxDelay), 197 Jitter: util.Coalesce(g.ConnectParams.Backoff.Jitter, backoff.DefaultConfig.Jitter), 198 Multiplier: util.Coalesce(g.ConnectParams.Backoff.Multiplier, backoff.DefaultConfig.Multiplier), 199 }, 200 }), 201 } 202 if g.Insecure { 203 out = append(out, grpc.WithTransportCredentials(insecure.NewCredentials())) 204 } 205 if g.Block { 206 out = append(out, grpc.WithBlock()) 207 } 208 if g.Authority != "" { 209 out = append(out, grpc.WithAuthority(g.Authority)) 210 } 211 return out 212 } 213 214 // SpanExporters builds the set of OTEL SpanExporter objects defined by the YAML 215 // unmarshaled into this ExportConfig object. The returned SpanExporters have 216 // not been started. 217 func (ec *exportConfig) SpanExporters() ([]otelsdktrace.SpanExporter, error) { 218 out := make([]otelsdktrace.SpanExporter, 0, len(ec.Exporters)) 219 for _, expcfg := range ec.Exporters { 220 if !strings.HasPrefix(expcfg.Kind.Signal, "trace") { 221 continue 222 } 223 switch spec := expcfg.Spec.(type) { 224 case *otlpGrpcSpanExporter: 225 spanexp, err := ec.buildOtlpGrpcSpanExporter(spec) 226 if err != nil { 227 return nil, err 228 } 229 out = append(out, spanexp) 230 default: 231 return nil, fmt.Errorf("unsupported span exporter type: %T", spec) 232 } 233 } 234 return out, nil 235 } 236 237 func (ec *exportConfig) MetricExporters() ([]metric.Exporter, error) { 238 out := make([]metric.Exporter, 0, len(ec.Exporters)) 239 for _, expcfg := range ec.Exporters { 240 if !strings.HasPrefix(expcfg.Kind.Signal, "metric") { 241 continue 242 } 243 switch spec := expcfg.Spec.(type) { 244 case *otlpGrpcMetricExporter: 245 metricexp, err := ec.buildOtlpGrpcMetricExporter(spec) 246 if err != nil { 247 return nil, err 248 } 249 out = append(out, metricexp) 250 default: 251 return nil, fmt.Errorf("unsupported metric exporter type: %T", spec) 252 } 253 } 254 return out, nil 255 256 } 257 258 func (ec *exportConfig) buildOtlpGrpcMetricExporter( 259 cfg *otlpGrpcMetricExporter, 260 ) (metric.Exporter, error) { 261 dopts := cfg.Connection.dialOpts() 262 opts := []otlpmetricgrpc.Option{ 263 otlpmetricgrpc.WithEndpoint(cfg.Connection.Endpoint), 264 otlpmetricgrpc.WithHeaders(cfg.Headers), 265 otlpmetricgrpc.WithTimeout(util.Coalesce(cfg.Timeout, 10*time.Second)), 266 otlpmetricgrpc.WithDialOption(dopts...), 267 otlpmetricgrpc.WithRetry(otlpmetricgrpc.RetryConfig{ 268 Enabled: util.Coalesce(cfg.Retry.Enabled, retryDefaultEnabled), 269 InitialInterval: util.Coalesce(cfg.Retry.InitialInterval, retryDefaultInitialInterval), 270 MaxInterval: util.Coalesce(cfg.Retry.MaxInterval, retryDefaultMaxInterval), 271 MaxElapsedTime: util.Coalesce(cfg.Retry.MaxElapsedTime, retryDefaultMaxElapsedTime), 272 }), 273 } 274 275 // work around https://github.com/open-telemetry/opentelemetry-go/issues/2940 276 if cfg.Connection.Insecure { 277 opts = append(opts, otlpmetricgrpc.WithInsecure()) 278 } 279 280 if cfg.ConnectionName == "" { 281 return otlpmetricgrpc.New(context.Background(), opts...) 282 } 283 284 conncfg, ok := ec.findNamedGrpcConnCfg(cfg.ConnectionName) 285 if !ok { 286 return nil, fmt.Errorf("OTEL exporter connection %q not found", cfg.ConnectionName) 287 } 288 return &sharedConnMetricExporter{ 289 baseOpts: opts, 290 dialer: conncfg, 291 }, nil 292 } 293 294 func (ec *exportConfig) buildOtlpGrpcSpanExporter( 295 cfg *otlpGrpcSpanExporter, 296 ) (otelsdktrace.SpanExporter, error) { 297 opts := []otlptracegrpc.Option{ 298 otlptracegrpc.WithEndpoint(cfg.Connection.Endpoint), 299 otlptracegrpc.WithHeaders(cfg.Headers), 300 otlptracegrpc.WithTimeout(util.Coalesce(cfg.Timeout, 10*time.Second)), 301 otlptracegrpc.WithDialOption(cfg.Connection.dialOpts()...), 302 otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{ 303 Enabled: util.Coalesce(cfg.Retry.Enabled, retryDefaultEnabled), 304 InitialInterval: util.Coalesce(cfg.Retry.InitialInterval, retryDefaultInitialInterval), 305 MaxInterval: util.Coalesce(cfg.Retry.MaxInterval, retryDefaultMaxInterval), 306 MaxElapsedTime: util.Coalesce(cfg.Retry.MaxElapsedTime, retryDefaultMaxElapsedTime), 307 }), 308 } 309 310 // work around https://github.com/open-telemetry/opentelemetry-go/issues/2940 311 if cfg.Connection.Insecure { 312 opts = append(opts, otlptracegrpc.WithInsecure()) 313 } 314 315 if cfg.ConnectionName == "" { 316 return otlptracegrpc.NewUnstarted(opts...), nil 317 } 318 319 conncfg, ok := ec.findNamedGrpcConnCfg(cfg.ConnectionName) 320 if !ok { 321 return nil, fmt.Errorf("OTEL exporter connection %q not found", cfg.ConnectionName) 322 } 323 return &sharedConnSpanExporter{ 324 baseOpts: opts, 325 dialer: conncfg, 326 }, nil 327 } 328 329 // Start initiates the connection to an upstream grpc OTLP server 330 func (scse *sharedConnSpanExporter) Start(ctx context.Context) error { 331 var err error 332 scse.startOnce.Do(func() { 333 var cc *grpc.ClientConn 334 cc, err = scse.dialer.Dial(ctx) 335 if err != nil { 336 return 337 } 338 opts := append(scse.baseOpts, otlptracegrpc.WithGRPCConn(cc)) 339 scse.SpanExporter, err = otlptracegrpc.New(ctx, opts...) 340 }) 341 return err 342 } 343 344 // Start initiates the connection to an upstream grpc OTLP server 345 func (scme *sharedConnMetricExporter) Start(ctx context.Context) error { 346 var err error 347 scme.startOnce.Do(func() { 348 var cc *grpc.ClientConn 349 cc, err = scme.dialer.Dial(ctx) 350 if err != nil { 351 return 352 } 353 opts := append(scme.baseOpts, otlpmetricgrpc.WithGRPCConn(cc)) 354 scme.Exporter, err = otlpmetricgrpc.New(ctx, opts...) 355 }) 356 return err 357 } 358 359 func (ec *exportConfig) findNamedGrpcConnCfg(name string) (*grpcconn, bool) { 360 if name == "" { 361 return nil, false 362 } 363 for _, conn := range ec.Connections { 364 if gconn, ok := conn.Spec.(*grpcconn); ok && conn.Metadata.Name == name { 365 return gconn, true 366 } 367 } 368 return nil, false 369 } 370 371 // UnmarshalYAML loads the state of a generic connection from parsed YAML 372 func (c *connection) UnmarshalYAML(n *yaml.Node) error { 373 type conn connection 374 type overlay struct { 375 *conn `yaml:",inline"` 376 Spec yaml.Node `yaml:"spec"` 377 } 378 obj := overlay{conn: (*conn)(c)} 379 err := n.Decode(&obj) 380 if err != nil { 381 return err 382 } 383 switch c.Kind { 384 case "grpc": 385 c.Spec = &grpcconn{} 386 default: 387 return fmt.Errorf("unsupported connection kind: %q", c.Kind) 388 } 389 return obj.Spec.Decode(c.Spec) 390 } 391 392 // UnmarshalYAML loads the state of a generic exporter from parsed YAML 393 func (e *exporter) UnmarshalYAML(n *yaml.Node) error { 394 type exp exporter 395 type overlay struct { 396 *exp `yaml:",inline"` 397 Spec yaml.Node `yaml:"spec"` 398 } 399 obj := overlay{exp: (*exp)(e)} 400 err := n.Decode(&obj) 401 if err != nil { 402 return err 403 } 404 descriptor := fmt.Sprintf("%v+%v+%v", e.Kind.Signal, e.Kind.Model, e.Kind.Protocol) 405 switch descriptor { 406 case "traces+otlp+grpc", "trace+otlp+grpc": 407 e.Spec = new(otlpGrpcSpanExporter) 408 case "metrics+otlp+grpc", "metric+otlp+grpc": 409 e.Spec = new(otlpGrpcMetricExporter) 410 default: 411 return fmt.Errorf( 412 "unsupported exporter kind: signal=%q; model=%q; protocol=%q", 413 e.Kind.Signal, 414 e.Kind.Model, 415 e.Kind.Protocol, 416 ) 417 } 418 return obj.Spec.Decode(e.Spec) 419 }