github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/changefeedccl/metrics.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package changefeedccl
    10  
    11  import (
    12  	"context"
    13  	"time"
    14  
    15  	"github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/kvfeed"
    16  	"github.com/cockroachdb/cockroach/pkg/jobs"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
    18  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    19  	"github.com/cockroachdb/cockroach/pkg/util/metric"
    20  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    21  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    22  )
    23  
    24  type metricsSink struct {
    25  	metrics *Metrics
    26  	wrapped Sink
    27  }
    28  
    29  func makeMetricsSink(metrics *Metrics, s Sink) *metricsSink {
    30  	m := &metricsSink{
    31  		metrics: metrics,
    32  		wrapped: s,
    33  	}
    34  	return m
    35  }
    36  
    37  func (s *metricsSink) EmitRow(
    38  	ctx context.Context, table *sqlbase.TableDescriptor, key, value []byte, updated hlc.Timestamp,
    39  ) error {
    40  	start := timeutil.Now()
    41  	err := s.wrapped.EmitRow(ctx, table, key, value, updated)
    42  	if err == nil {
    43  		s.metrics.EmittedMessages.Inc(1)
    44  		s.metrics.EmittedBytes.Inc(int64(len(key) + len(value)))
    45  		s.metrics.EmitNanos.Inc(timeutil.Since(start).Nanoseconds())
    46  	}
    47  	return err
    48  }
    49  
    50  func (s *metricsSink) EmitResolvedTimestamp(
    51  	ctx context.Context, encoder Encoder, resolved hlc.Timestamp,
    52  ) error {
    53  	start := timeutil.Now()
    54  	err := s.wrapped.EmitResolvedTimestamp(ctx, encoder, resolved)
    55  	if err == nil {
    56  		s.metrics.EmittedMessages.Inc(1)
    57  		// TODO(dan): This wasn't correct. The wrapped sink may emit the payload
    58  		// any number of times.
    59  		// s.metrics.EmittedBytes.Inc(int64(len(payload)))
    60  		s.metrics.EmitNanos.Inc(timeutil.Since(start).Nanoseconds())
    61  	}
    62  	return err
    63  }
    64  
    65  func (s *metricsSink) Flush(ctx context.Context) error {
    66  	start := timeutil.Now()
    67  	err := s.wrapped.Flush(ctx)
    68  	if err == nil {
    69  		s.metrics.Flushes.Inc(1)
    70  		s.metrics.FlushNanos.Inc(timeutil.Since(start).Nanoseconds())
    71  	}
    72  	return err
    73  }
    74  
    75  func (s *metricsSink) Close() error {
    76  	return s.wrapped.Close()
    77  }
    78  
    79  var (
    80  	metaChangefeedEmittedMessages = metric.Metadata{
    81  		Name:        "changefeed.emitted_messages",
    82  		Help:        "Messages emitted by all feeds",
    83  		Measurement: "Messages",
    84  		Unit:        metric.Unit_COUNT,
    85  	}
    86  	metaChangefeedEmittedBytes = metric.Metadata{
    87  		Name:        "changefeed.emitted_bytes",
    88  		Help:        "Bytes emitted by all feeds",
    89  		Measurement: "Bytes",
    90  		Unit:        metric.Unit_BYTES,
    91  	}
    92  	metaChangefeedFlushes = metric.Metadata{
    93  		Name:        "changefeed.flushes",
    94  		Help:        "Total flushes across all feeds",
    95  		Measurement: "Flushes",
    96  		Unit:        metric.Unit_COUNT,
    97  	}
    98  	metaChangefeedErrorRetries = metric.Metadata{
    99  		Name:        "changefeed.error_retries",
   100  		Help:        "Total retryable errors encountered by all changefeeds",
   101  		Measurement: "Errors",
   102  		Unit:        metric.Unit_COUNT,
   103  	}
   104  
   105  	metaChangefeedProcessingNanos = metric.Metadata{
   106  		Name:        "changefeed.processing_nanos",
   107  		Help:        "Time spent processing KV changes into SQL rows",
   108  		Measurement: "Nanoseconds",
   109  		Unit:        metric.Unit_NANOSECONDS,
   110  	}
   111  	metaChangefeedTableMetadataNanos = metric.Metadata{
   112  		Name:        "changefeed.table_metadata_nanos",
   113  		Help:        "Time blocked while verifying table metadata histories",
   114  		Measurement: "Nanoseconds",
   115  		Unit:        metric.Unit_NANOSECONDS,
   116  	}
   117  	metaChangefeedEmitNanos = metric.Metadata{
   118  		Name:        "changefeed.emit_nanos",
   119  		Help:        "Total time spent emitting all feeds",
   120  		Measurement: "Nanoseconds",
   121  		Unit:        metric.Unit_NANOSECONDS,
   122  	}
   123  	metaChangefeedFlushNanos = metric.Metadata{
   124  		Name:        "changefeed.flush_nanos",
   125  		Help:        "Total time spent flushing all feeds",
   126  		Measurement: "Nanoseconds",
   127  		Unit:        metric.Unit_NANOSECONDS,
   128  	}
   129  
   130  	// TODO(dan): This was intended to be a measure of the minimum distance of
   131  	// any changefeed ahead of its gc ttl threshold, but keeping that correct in
   132  	// the face of changing zone configs is much harder, so this will have to do
   133  	// for now.
   134  	metaChangefeedMaxBehindNanos = metric.Metadata{
   135  		Name:        "changefeed.max_behind_nanos",
   136  		Help:        "Largest commit-to-emit duration of any running feed",
   137  		Measurement: "Nanoseconds",
   138  		Unit:        metric.Unit_NANOSECONDS,
   139  	}
   140  )
   141  
   142  // Metrics are for production monitoring of changefeeds.
   143  type Metrics struct {
   144  	KVFeedMetrics   kvfeed.Metrics
   145  	EmittedMessages *metric.Counter
   146  	EmittedBytes    *metric.Counter
   147  	Flushes         *metric.Counter
   148  	ErrorRetries    *metric.Counter
   149  
   150  	ProcessingNanos    *metric.Counter
   151  	TableMetadataNanos *metric.Counter
   152  	EmitNanos          *metric.Counter
   153  	FlushNanos         *metric.Counter
   154  
   155  	mu struct {
   156  		syncutil.Mutex
   157  		id       int
   158  		resolved map[int]hlc.Timestamp
   159  	}
   160  	MaxBehindNanos *metric.Gauge
   161  }
   162  
   163  // MetricStruct implements the metric.Struct interface.
   164  func (*Metrics) MetricStruct() {}
   165  
   166  // MakeMetrics makes the metrics for changefeed monitoring.
   167  func MakeMetrics(histogramWindow time.Duration) metric.Struct {
   168  	m := &Metrics{
   169  		KVFeedMetrics:   kvfeed.MakeMetrics(histogramWindow),
   170  		EmittedMessages: metric.NewCounter(metaChangefeedEmittedMessages),
   171  		EmittedBytes:    metric.NewCounter(metaChangefeedEmittedBytes),
   172  		Flushes:         metric.NewCounter(metaChangefeedFlushes),
   173  		ErrorRetries:    metric.NewCounter(metaChangefeedErrorRetries),
   174  
   175  		ProcessingNanos:    metric.NewCounter(metaChangefeedProcessingNanos),
   176  		TableMetadataNanos: metric.NewCounter(metaChangefeedTableMetadataNanos),
   177  		EmitNanos:          metric.NewCounter(metaChangefeedEmitNanos),
   178  		FlushNanos:         metric.NewCounter(metaChangefeedFlushNanos),
   179  	}
   180  	m.mu.resolved = make(map[int]hlc.Timestamp)
   181  
   182  	m.MaxBehindNanos = metric.NewFunctionalGauge(metaChangefeedMaxBehindNanos, func() int64 {
   183  		now := timeutil.Now()
   184  		var maxBehind time.Duration
   185  		m.mu.Lock()
   186  		for _, resolved := range m.mu.resolved {
   187  			if behind := now.Sub(resolved.GoTime()); behind > maxBehind {
   188  				maxBehind = behind
   189  			}
   190  		}
   191  		m.mu.Unlock()
   192  		return maxBehind.Nanoseconds()
   193  	})
   194  	return m
   195  }
   196  
   197  func init() {
   198  	jobs.MakeChangefeedMetricsHook = MakeMetrics
   199  }