go.temporal.io/server@v1.23.0/common/telemetry/config.go (about)

     1  // The MIT License
     2  //
     3  // Copyright (c) 2020 Temporal Technologies Inc.  All rights reserved.
     4  //
     5  // Copyright (c) 2020 Uber Technologies, Inc.
     6  //
     7  // Permission is hereby granted, free of charge, to any person obtaining a copy
     8  // of this software and associated documentation files (the "Software"), to deal
     9  // in the Software without restriction, including without limitation the rights
    10  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    11  // copies of the Software, and to permit persons to whom the Software is
    12  // furnished to do so, subject to the following conditions:
    13  //
    14  // The above copyright notice and this permission notice shall be included in
    15  // all copies or substantial portions of the Software.
    16  //
    17  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    18  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    19  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    20  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    21  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    22  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    23  // THE SOFTWARE.
    24  
    25  package telemetry
    26  
    27  import (
    28  	"context"
    29  	"fmt"
    30  	"strings"
    31  	"sync"
    32  	"time"
    33  
    34  	"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
    35  	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
    36  	"go.opentelemetry.io/otel/sdk/metric"
    37  	otelsdktrace "go.opentelemetry.io/otel/sdk/trace"
    38  	"google.golang.org/grpc"
    39  	"google.golang.org/grpc/backoff"
    40  	"google.golang.org/grpc/credentials/insecure"
    41  	"gopkg.in/yaml.v3"
    42  
    43  	"go.temporal.io/server/common/util"
    44  )
    45  
    46  const (
    47  	// the following defaults were taken from the grpc docs as of grpc v1.46.
    48  	// they are not available programatically
    49  
    50  	defaultReadBufferSize    = 32 * 1024
    51  	defaultWriteBufferSize   = 32 * 1024
    52  	defaultMinConnectTimeout = 10 * time.Second
    53  
    54  	// the following defaults were taken from the otel library as of v1.7.
    55  	// they are not available programatically
    56  
    57  	retryDefaultEnabled         = true
    58  	retryDefaultInitialInterval = 5 * time.Second
    59  	retryDefaultMaxInterval     = 30 * time.Second
    60  	retryDefaultMaxElapsedTime  = 1 * time.Minute
    61  )
    62  
    63  type (
    64  	metadata struct {
    65  		Name   string
    66  		Labels map[string]string
    67  	}
    68  
    69  	connection struct {
    70  		Kind     string
    71  		Metadata metadata
    72  		Spec     interface{} `yaml:"-"`
    73  	}
    74  
    75  	grpcconn struct {
    76  		Endpoint      string
    77  		Block         bool
    78  		ConnectParams struct {
    79  			MinConnectTimeout time.Duration `yaml:"min_connect_timeout"`
    80  			Backoff           struct {
    81  				BaseDelay  time.Duration `yaml:"base_delay"`
    82  				Multiplier float64
    83  				Jitter     float64
    84  				MaxDelay   time.Duration `yaml:"max_delay"`
    85  			}
    86  		} `yaml:"connect_params"`
    87  		UserAgent       string `yaml:"user_agent"`
    88  		ReadBufferSize  int    `yaml:"read_buffer_size"`
    89  		WriteBufferSize int    `yaml:"write_buffer_size"`
    90  		Authority       string
    91  		Insecure        bool
    92  
    93  		cc *grpc.ClientConn
    94  	}
    95  
    96  	exporter struct {
    97  		Kind struct {
    98  			Signal   string
    99  			Model    string
   100  			Protocol string
   101  		}
   102  		Metadata metadata
   103  		Spec     interface{} `yaml:"-"`
   104  	}
   105  
   106  	otlpGrpcExporter struct {
   107  		ConnectionName string `yaml:"connection_name"`
   108  		Connection     grpcconn
   109  		Headers        map[string]string
   110  		Timeout        time.Duration
   111  		Retry          struct {
   112  			Enabled         bool
   113  			InitialInterval time.Duration `yaml:"initial_interval"`
   114  			MaxInterval     time.Duration `yaml:"max_interval"`
   115  			MaxElapsedTime  time.Duration `yaml:"max_elapsed_time"`
   116  		}
   117  	}
   118  
   119  	otlpGrpcSpanExporter struct {
   120  		otlpGrpcExporter `yaml:",inline"`
   121  	}
   122  	otlpGrpcMetricExporter struct {
   123  		otlpGrpcExporter `yaml:",inline"`
   124  	}
   125  
   126  	exportConfig struct {
   127  		Connections []connection
   128  		Exporters   []exporter
   129  	}
   130  
   131  	// sharedConnSpanExporter and sharedConnMetricExporter exist to wrap a span
   132  	// exporter that uses a shared *grpc.ClientConn so that the grpc.Dial call
   133  	// doesn't happen until Start() is called. Without this wrapper the
   134  	// grpc.ClientConn (which can only be created via grpc.Dial or
   135  	// grpc.DialContext) would need to exist at _construction_ time, meaning
   136  	// that we would need to dial at construction rather then during the start
   137  	// phase.
   138  
   139  	sharedConnSpanExporter struct {
   140  		baseOpts []otlptracegrpc.Option
   141  		dialer   interface {
   142  			Dial(context.Context) (*grpc.ClientConn, error)
   143  		}
   144  		startOnce sync.Once
   145  		otelsdktrace.SpanExporter
   146  	}
   147  
   148  	sharedConnMetricExporter struct {
   149  		baseOpts []otlpmetricgrpc.Option
   150  		dialer   interface {
   151  			Dial(context.Context) (*grpc.ClientConn, error)
   152  		}
   153  		startOnce sync.Once
   154  		metric.Exporter
   155  	}
   156  
   157  	// ExportConfig represents YAML structured configuration for a set of OTEL
   158  	// trace/span/log exporters.
   159  	ExportConfig struct {
   160  		inner exportConfig `yaml:",inline"`
   161  	}
   162  )
   163  
   164  // UnmarshalYAML loads the state of an ExportConfig from parsed YAML
   165  func (ec *ExportConfig) UnmarshalYAML(n *yaml.Node) error {
   166  	return n.Decode(&ec.inner)
   167  }
   168  
   169  func (ec *ExportConfig) SpanExporters() ([]otelsdktrace.SpanExporter, error) {
   170  	return ec.inner.SpanExporters()
   171  }
   172  
   173  func (ec *ExportConfig) MetricExporters() ([]metric.Exporter, error) {
   174  	return ec.inner.MetricExporters()
   175  }
   176  
   177  // Dial returns the cached *grpc.ClientConn instance or creates a new one,
   178  // caches and then returns it. This function is not threadsafe.
   179  func (g *grpcconn) Dial(ctx context.Context) (*grpc.ClientConn, error) {
   180  	var err error
   181  	if g.cc == nil {
   182  		g.cc, err = grpc.DialContext(ctx, g.Endpoint, g.dialOpts()...)
   183  	}
   184  	return g.cc, err
   185  }
   186  
   187  func (g *grpcconn) dialOpts() []grpc.DialOption {
   188  	out := []grpc.DialOption{
   189  		grpc.WithReadBufferSize(util.Coalesce(g.ReadBufferSize, defaultReadBufferSize)),
   190  		grpc.WithWriteBufferSize(util.Coalesce(g.WriteBufferSize, defaultWriteBufferSize)),
   191  		grpc.WithUserAgent(g.UserAgent),
   192  		grpc.WithConnectParams(grpc.ConnectParams{
   193  			MinConnectTimeout: util.Coalesce(g.ConnectParams.MinConnectTimeout, defaultMinConnectTimeout),
   194  			Backoff: backoff.Config{
   195  				BaseDelay:  util.Coalesce(g.ConnectParams.Backoff.BaseDelay, backoff.DefaultConfig.BaseDelay),
   196  				MaxDelay:   util.Coalesce(g.ConnectParams.Backoff.MaxDelay, backoff.DefaultConfig.MaxDelay),
   197  				Jitter:     util.Coalesce(g.ConnectParams.Backoff.Jitter, backoff.DefaultConfig.Jitter),
   198  				Multiplier: util.Coalesce(g.ConnectParams.Backoff.Multiplier, backoff.DefaultConfig.Multiplier),
   199  			},
   200  		}),
   201  	}
   202  	if g.Insecure {
   203  		out = append(out, grpc.WithTransportCredentials(insecure.NewCredentials()))
   204  	}
   205  	if g.Block {
   206  		out = append(out, grpc.WithBlock())
   207  	}
   208  	if g.Authority != "" {
   209  		out = append(out, grpc.WithAuthority(g.Authority))
   210  	}
   211  	return out
   212  }
   213  
   214  // SpanExporters builds the set of OTEL SpanExporter objects defined by the YAML
   215  // unmarshaled into this ExportConfig object. The returned SpanExporters have
   216  // not been started.
   217  func (ec *exportConfig) SpanExporters() ([]otelsdktrace.SpanExporter, error) {
   218  	out := make([]otelsdktrace.SpanExporter, 0, len(ec.Exporters))
   219  	for _, expcfg := range ec.Exporters {
   220  		if !strings.HasPrefix(expcfg.Kind.Signal, "trace") {
   221  			continue
   222  		}
   223  		switch spec := expcfg.Spec.(type) {
   224  		case *otlpGrpcSpanExporter:
   225  			spanexp, err := ec.buildOtlpGrpcSpanExporter(spec)
   226  			if err != nil {
   227  				return nil, err
   228  			}
   229  			out = append(out, spanexp)
   230  		default:
   231  			return nil, fmt.Errorf("unsupported span exporter type: %T", spec)
   232  		}
   233  	}
   234  	return out, nil
   235  }
   236  
   237  func (ec *exportConfig) MetricExporters() ([]metric.Exporter, error) {
   238  	out := make([]metric.Exporter, 0, len(ec.Exporters))
   239  	for _, expcfg := range ec.Exporters {
   240  		if !strings.HasPrefix(expcfg.Kind.Signal, "metric") {
   241  			continue
   242  		}
   243  		switch spec := expcfg.Spec.(type) {
   244  		case *otlpGrpcMetricExporter:
   245  			metricexp, err := ec.buildOtlpGrpcMetricExporter(spec)
   246  			if err != nil {
   247  				return nil, err
   248  			}
   249  			out = append(out, metricexp)
   250  		default:
   251  			return nil, fmt.Errorf("unsupported metric exporter type: %T", spec)
   252  		}
   253  	}
   254  	return out, nil
   255  
   256  }
   257  
   258  func (ec *exportConfig) buildOtlpGrpcMetricExporter(
   259  	cfg *otlpGrpcMetricExporter,
   260  ) (metric.Exporter, error) {
   261  	dopts := cfg.Connection.dialOpts()
   262  	opts := []otlpmetricgrpc.Option{
   263  		otlpmetricgrpc.WithEndpoint(cfg.Connection.Endpoint),
   264  		otlpmetricgrpc.WithHeaders(cfg.Headers),
   265  		otlpmetricgrpc.WithTimeout(util.Coalesce(cfg.Timeout, 10*time.Second)),
   266  		otlpmetricgrpc.WithDialOption(dopts...),
   267  		otlpmetricgrpc.WithRetry(otlpmetricgrpc.RetryConfig{
   268  			Enabled:         util.Coalesce(cfg.Retry.Enabled, retryDefaultEnabled),
   269  			InitialInterval: util.Coalesce(cfg.Retry.InitialInterval, retryDefaultInitialInterval),
   270  			MaxInterval:     util.Coalesce(cfg.Retry.MaxInterval, retryDefaultMaxInterval),
   271  			MaxElapsedTime:  util.Coalesce(cfg.Retry.MaxElapsedTime, retryDefaultMaxElapsedTime),
   272  		}),
   273  	}
   274  
   275  	// work around https://github.com/open-telemetry/opentelemetry-go/issues/2940
   276  	if cfg.Connection.Insecure {
   277  		opts = append(opts, otlpmetricgrpc.WithInsecure())
   278  	}
   279  
   280  	if cfg.ConnectionName == "" {
   281  		return otlpmetricgrpc.New(context.Background(), opts...)
   282  	}
   283  
   284  	conncfg, ok := ec.findNamedGrpcConnCfg(cfg.ConnectionName)
   285  	if !ok {
   286  		return nil, fmt.Errorf("OTEL exporter connection %q not found", cfg.ConnectionName)
   287  	}
   288  	return &sharedConnMetricExporter{
   289  		baseOpts: opts,
   290  		dialer:   conncfg,
   291  	}, nil
   292  }
   293  
   294  func (ec *exportConfig) buildOtlpGrpcSpanExporter(
   295  	cfg *otlpGrpcSpanExporter,
   296  ) (otelsdktrace.SpanExporter, error) {
   297  	opts := []otlptracegrpc.Option{
   298  		otlptracegrpc.WithEndpoint(cfg.Connection.Endpoint),
   299  		otlptracegrpc.WithHeaders(cfg.Headers),
   300  		otlptracegrpc.WithTimeout(util.Coalesce(cfg.Timeout, 10*time.Second)),
   301  		otlptracegrpc.WithDialOption(cfg.Connection.dialOpts()...),
   302  		otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{
   303  			Enabled:         util.Coalesce(cfg.Retry.Enabled, retryDefaultEnabled),
   304  			InitialInterval: util.Coalesce(cfg.Retry.InitialInterval, retryDefaultInitialInterval),
   305  			MaxInterval:     util.Coalesce(cfg.Retry.MaxInterval, retryDefaultMaxInterval),
   306  			MaxElapsedTime:  util.Coalesce(cfg.Retry.MaxElapsedTime, retryDefaultMaxElapsedTime),
   307  		}),
   308  	}
   309  
   310  	// work around https://github.com/open-telemetry/opentelemetry-go/issues/2940
   311  	if cfg.Connection.Insecure {
   312  		opts = append(opts, otlptracegrpc.WithInsecure())
   313  	}
   314  
   315  	if cfg.ConnectionName == "" {
   316  		return otlptracegrpc.NewUnstarted(opts...), nil
   317  	}
   318  
   319  	conncfg, ok := ec.findNamedGrpcConnCfg(cfg.ConnectionName)
   320  	if !ok {
   321  		return nil, fmt.Errorf("OTEL exporter connection %q not found", cfg.ConnectionName)
   322  	}
   323  	return &sharedConnSpanExporter{
   324  		baseOpts: opts,
   325  		dialer:   conncfg,
   326  	}, nil
   327  }
   328  
   329  // Start initiates the connection to an upstream grpc OTLP server
   330  func (scse *sharedConnSpanExporter) Start(ctx context.Context) error {
   331  	var err error
   332  	scse.startOnce.Do(func() {
   333  		var cc *grpc.ClientConn
   334  		cc, err = scse.dialer.Dial(ctx)
   335  		if err != nil {
   336  			return
   337  		}
   338  		opts := append(scse.baseOpts, otlptracegrpc.WithGRPCConn(cc))
   339  		scse.SpanExporter, err = otlptracegrpc.New(ctx, opts...)
   340  	})
   341  	return err
   342  }
   343  
   344  // Start initiates the connection to an upstream grpc OTLP server
   345  func (scme *sharedConnMetricExporter) Start(ctx context.Context) error {
   346  	var err error
   347  	scme.startOnce.Do(func() {
   348  		var cc *grpc.ClientConn
   349  		cc, err = scme.dialer.Dial(ctx)
   350  		if err != nil {
   351  			return
   352  		}
   353  		opts := append(scme.baseOpts, otlpmetricgrpc.WithGRPCConn(cc))
   354  		scme.Exporter, err = otlpmetricgrpc.New(ctx, opts...)
   355  	})
   356  	return err
   357  }
   358  
   359  func (ec *exportConfig) findNamedGrpcConnCfg(name string) (*grpcconn, bool) {
   360  	if name == "" {
   361  		return nil, false
   362  	}
   363  	for _, conn := range ec.Connections {
   364  		if gconn, ok := conn.Spec.(*grpcconn); ok && conn.Metadata.Name == name {
   365  			return gconn, true
   366  		}
   367  	}
   368  	return nil, false
   369  }
   370  
   371  // UnmarshalYAML loads the state of a generic connection from parsed YAML
   372  func (c *connection) UnmarshalYAML(n *yaml.Node) error {
   373  	type conn connection
   374  	type overlay struct {
   375  		*conn `yaml:",inline"`
   376  		Spec  yaml.Node `yaml:"spec"`
   377  	}
   378  	obj := overlay{conn: (*conn)(c)}
   379  	err := n.Decode(&obj)
   380  	if err != nil {
   381  		return err
   382  	}
   383  	switch c.Kind {
   384  	case "grpc":
   385  		c.Spec = &grpcconn{}
   386  	default:
   387  		return fmt.Errorf("unsupported connection kind: %q", c.Kind)
   388  	}
   389  	return obj.Spec.Decode(c.Spec)
   390  }
   391  
   392  // UnmarshalYAML loads the state of a generic exporter from parsed YAML
   393  func (e *exporter) UnmarshalYAML(n *yaml.Node) error {
   394  	type exp exporter
   395  	type overlay struct {
   396  		*exp `yaml:",inline"`
   397  		Spec yaml.Node `yaml:"spec"`
   398  	}
   399  	obj := overlay{exp: (*exp)(e)}
   400  	err := n.Decode(&obj)
   401  	if err != nil {
   402  		return err
   403  	}
   404  	descriptor := fmt.Sprintf("%v+%v+%v", e.Kind.Signal, e.Kind.Model, e.Kind.Protocol)
   405  	switch descriptor {
   406  	case "traces+otlp+grpc", "trace+otlp+grpc":
   407  		e.Spec = new(otlpGrpcSpanExporter)
   408  	case "metrics+otlp+grpc", "metric+otlp+grpc":
   409  		e.Spec = new(otlpGrpcMetricExporter)
   410  	default:
   411  		return fmt.Errorf(
   412  			"unsupported exporter kind: signal=%q; model=%q; protocol=%q",
   413  			e.Kind.Signal,
   414  			e.Kind.Model,
   415  			e.Kind.Protocol,
   416  		)
   417  	}
   418  	return obj.Spec.Decode(e.Spec)
   419  }