github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/x/instrument/config.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package instrument
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  	"io"
    27  	"net"
    28  	"net/http"
    29  	"time"
    30  
    31  	prom "github.com/m3db/prometheus_client_golang/prometheus"
    32  	"github.com/uber-go/tally"
    33  	"github.com/uber-go/tally/m3"
    34  	"github.com/uber-go/tally/multi"
    35  	"github.com/uber-go/tally/prometheus"
    36  	"go.uber.org/zap"
    37  )
    38  
    39  var errNoReporterConfigured = errors.New("no reporter configured")
    40  
    41  // ScopeConfiguration configures a metric scope.
    42  type ScopeConfiguration struct {
    43  	// Prefix of metrics in this scope.
    44  	Prefix string `yaml:"prefix"`
    45  
    46  	// Metrics reporting interval.
    47  	ReportingInterval time.Duration `yaml:"reportingInterval"`
    48  
    49  	// Common tags shared by metrics reported.
    50  	CommonTags map[string]string `yaml:"tags"`
    51  }
    52  
    53  // MetricsConfiguration configures options for emitting metrics.
    54  type MetricsConfiguration struct {
    55  	// Root scope configuration.
    56  	RootScope *ScopeConfiguration `yaml:"scope"`
    57  
    58  	// M3 reporter configuration.
    59  	M3Reporter *m3.Configuration `yaml:"m3"`
    60  
    61  	// Prometheus reporter configuration.
    62  	PrometheusReporter *PrometheusConfiguration `yaml:"prometheus"`
    63  
    64  	// Metrics sampling rate.
    65  	SamplingRate float64 `yaml:"samplingRate" validate:"nonzero,min=0.0,max=1.0"`
    66  
    67  	// Extended metrics type.
    68  	ExtendedMetrics *ExtendedMetricsType `yaml:"extended"`
    69  
    70  	// Metric sanitization type.
    71  	Sanitization *MetricSanitizationType `yaml:"sanitization"`
    72  }
    73  
    74  // NewRootScope creates a new tally.Scope based on a tally.CachedStatsReporter
    75  // based on the the the config.
    76  func (mc *MetricsConfiguration) NewRootScope() (tally.Scope, io.Closer, error) {
    77  	opts := NewRootScopeAndReportersOptions{}
    78  	scope, closer, _, err := mc.NewRootScopeAndReporters(opts)
    79  	return scope, closer, err
    80  }
    81  
    82  // MetricsConfigurationReporters is the reporters constructed.
    83  type MetricsConfigurationReporters struct {
    84  	AllReporters       []tally.CachedStatsReporter
    85  	M3Reporter         *MetricsConfigurationM3Reporter
    86  	PrometheusReporter *MetricsConfigurationPrometheusReporter
    87  }
    88  
    89  // MetricsConfigurationM3Reporter is the M3 reporter if constructed.
    90  type MetricsConfigurationM3Reporter struct {
    91  	Reporter m3.Reporter
    92  }
    93  
    94  // MetricsConfigurationPrometheusReporter is the Prometheus reporter if constructed.
    95  type MetricsConfigurationPrometheusReporter struct {
    96  	Reporter prometheus.Reporter
    97  	Registry *prom.Registry
    98  }
    99  
   100  // NewRootScopeAndReportersOptions is a set of options.
   101  type NewRootScopeAndReportersOptions struct {
   102  	PrometheusHandlerListener    net.Listener
   103  	PrometheusDefaultServeMux    *http.ServeMux
   104  	PrometheusExternalRegistries []PrometheusExternalRegistry
   105  	PrometheusOnError            func(e error)
   106  	// CommonLabels will be appended to every metric gathered.
   107  	CommonLabels map[string]string
   108  }
   109  
   110  type metricsClosers struct {
   111  	// serverCloser is responsible for closing the http server handling /metrics
   112  	// if one was started up as a part of reporter creation.
   113  	serverCloser io.Closer
   114  	// reporterClose is responsible for closing the underlying tally.Reporter
   115  	// responsible for reporting metrics for all registered scopes.
   116  	reporterCloser io.Closer
   117  }
   118  
   119  func (m metricsClosers) Close() error {
   120  	if err := m.reporterCloser.Close(); err != nil {
   121  		return err
   122  	}
   123  
   124  	if m.serverCloser != nil {
   125  		return m.serverCloser.Close()
   126  	}
   127  
   128  	return nil
   129  }
   130  
   131  // NewRootScopeAndReporters creates a new tally.Scope based on a tally.CachedStatsReporter
   132  // based on the the the config along with the reporters used.
   133  func (mc *MetricsConfiguration) NewRootScopeAndReporters(
   134  	opts NewRootScopeAndReportersOptions,
   135  ) (
   136  	tally.Scope,
   137  	io.Closer,
   138  	MetricsConfigurationReporters,
   139  	error,
   140  ) {
   141  	var (
   142  		result  MetricsConfigurationReporters
   143  		closers metricsClosers
   144  	)
   145  	if mc.M3Reporter != nil {
   146  		r, err := mc.M3Reporter.NewReporter()
   147  		if err != nil {
   148  			return nil, nil, MetricsConfigurationReporters{}, err
   149  		}
   150  		result.AllReporters = append(result.AllReporters, r)
   151  		result.M3Reporter = &MetricsConfigurationM3Reporter{
   152  			Reporter: r,
   153  		}
   154  	}
   155  	if mc.PrometheusReporter != nil {
   156  		// Set a default on error method for sane handling when registering metrics
   157  		// results in an error with the Prometheus reporter.
   158  		onError := func(e error) {
   159  			logger := NewOptions().Logger()
   160  			logger.Error("register metrics error", zap.Error(e))
   161  		}
   162  		if opts.PrometheusOnError != nil {
   163  			onError = opts.PrometheusOnError
   164  		}
   165  
   166  		// Override the default registry with an empty one that does not have the default
   167  		// registered collectors (Go and Process). The M3 reporters will emit the Go metrics
   168  		// and the Process metrics are reported by both the M3 process reporter and a
   169  		// modified Prometheus process collector, which reports everything except the
   170  		// number of open FDs.
   171  		//
   172  		// Collecting the number of F.Ds for a process that has many of them can take a long
   173  		// time and be very CPU intensive, especially the default Prometheus collector
   174  		// implementation which is less optimized than the M3 implementation.
   175  		//
   176  		// TODO: Emit the Prometheus process stats from our own process reporter so we
   177  		// get the same stats regardless of the reporter used. See issue:
   178  		// https://github.com/m3db/m3/issues/1649
   179  		registry := prom.NewRegistry()
   180  		if err := registry.Register(NewPrometheusProcessCollector(ProcessCollectorOpts{
   181  			DisableOpenFDs: true,
   182  		})); err != nil {
   183  			return nil, nil, MetricsConfigurationReporters{}, fmt.Errorf("could not create process collector: %v", err)
   184  		}
   185  		opts := PrometheusConfigurationOptions{
   186  			Registry:           registry,
   187  			ExternalRegistries: opts.PrometheusExternalRegistries,
   188  			HandlerListener:    opts.PrometheusHandlerListener,
   189  			DefaultServeMux:    opts.PrometheusDefaultServeMux,
   190  			OnError:            onError,
   191  			CommonLabels:       opts.CommonLabels,
   192  		}
   193  
   194  		// Use default instrument package default histogram buckets if not set.
   195  		if len(mc.PrometheusReporter.DefaultHistogramBuckets) == 0 {
   196  			for _, v := range DefaultHistogramTimerHistogramBuckets().AsValues() {
   197  				bucket := prometheus.HistogramObjective{
   198  					Upper: v,
   199  				}
   200  				mc.PrometheusReporter.DefaultHistogramBuckets =
   201  					append(mc.PrometheusReporter.DefaultHistogramBuckets, bucket)
   202  			}
   203  		}
   204  
   205  		if len(mc.PrometheusReporter.DefaultSummaryObjectives) == 0 {
   206  			for k, v := range DefaultSummaryQuantileObjectives() {
   207  				q := prometheus.SummaryObjective{
   208  					Percentile:   k,
   209  					AllowedError: v,
   210  				}
   211  				mc.PrometheusReporter.DefaultSummaryObjectives =
   212  					append(mc.PrometheusReporter.DefaultSummaryObjectives, q)
   213  			}
   214  		}
   215  
   216  		r, srvCloser, err := mc.PrometheusReporter.NewReporter(opts)
   217  		if err != nil {
   218  			return nil, nil, MetricsConfigurationReporters{}, err
   219  		}
   220  		closers.serverCloser = srvCloser
   221  
   222  		result.AllReporters = append(result.AllReporters, r)
   223  		result.PrometheusReporter = &MetricsConfigurationPrometheusReporter{
   224  			Reporter: r,
   225  			Registry: registry,
   226  		}
   227  	}
   228  	if len(result.AllReporters) == 0 {
   229  		return nil, nil, MetricsConfigurationReporters{}, errNoReporterConfigured
   230  	}
   231  
   232  	var r tally.CachedStatsReporter
   233  	if len(result.AllReporters) == 1 {
   234  		r = result.AllReporters[0]
   235  	} else {
   236  		r = multi.NewMultiCachedReporter(result.AllReporters...)
   237  	}
   238  
   239  	scope, closer := mc.NewRootScopeReporter(r)
   240  	closers.reporterCloser = closer
   241  
   242  	return scope, closers, result, nil
   243  }
   244  
   245  // NewRootScopeReporter creates a new tally.Scope based on a given tally.CachedStatsReporter
   246  // and given root scope config. In most cases NewRootScope should be used, but for cases such
   247  // as hooking into the reporter to manually flush it.
   248  func (mc *MetricsConfiguration) NewRootScopeReporter(
   249  	r tally.CachedStatsReporter,
   250  ) (tally.Scope, io.Closer) {
   251  	var (
   252  		prefix string
   253  		tags   map[string]string
   254  	)
   255  
   256  	if mc.RootScope != nil {
   257  		if mc.RootScope.Prefix != "" {
   258  			prefix = mc.RootScope.Prefix
   259  		}
   260  		if mc.RootScope.CommonTags != nil {
   261  			tags = mc.RootScope.CommonTags
   262  		}
   263  	}
   264  
   265  	var sanitizeOpts *tally.SanitizeOptions
   266  	if mc.Sanitization != nil {
   267  		sanitizeOpts = mc.Sanitization.NewOptions()
   268  	}
   269  
   270  	scopeOpts := tally.ScopeOptions{
   271  		Tags:            tags,
   272  		Prefix:          prefix,
   273  		CachedReporter:  r,
   274  		SanitizeOptions: sanitizeOpts,
   275  	}
   276  	reportInterval := mc.ReportInterval()
   277  	scope, closer := tally.NewRootScope(scopeOpts, reportInterval)
   278  	if mc.ExtendedMetrics != nil {
   279  		StartReportingExtendedMetrics(scope, reportInterval, *mc.ExtendedMetrics)
   280  	}
   281  
   282  	return scope, closer
   283  }
   284  
   285  // SampleRate returns the metrics sampling rate.
   286  func (mc *MetricsConfiguration) SampleRate() float64 {
   287  	if mc.SamplingRate > 0.0 && mc.SamplingRate <= 1.0 {
   288  		return mc.SamplingRate
   289  	}
   290  	return defaultSamplingRate
   291  }
   292  
   293  // ReportInterval returns the metrics reporting interval.
   294  func (mc *MetricsConfiguration) ReportInterval() time.Duration {
   295  	if mc.RootScope != nil && mc.RootScope.ReportingInterval != 0 {
   296  		return mc.RootScope.ReportingInterval
   297  	}
   298  	return defaultReportingInterval
   299  }