github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/metric/metric.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package metric provides primitives for collecting metrics.
    16  package metric
    17  
    18  import (
    19  	"errors"
    20  	"fmt"
    21  	"sort"
    22  	"sync/atomic"
    23  
    24  	"github.com/SagerNet/gvisor/pkg/eventchannel"
    25  	"github.com/SagerNet/gvisor/pkg/log"
    26  	pb "github.com/SagerNet/gvisor/pkg/metric/metric_go_proto"
    27  	"github.com/SagerNet/gvisor/pkg/sync"
    28  )
    29  
    30  var (
    31  	// ErrNameInUse indicates that another metric is already defined for
    32  	// the given name.
    33  	ErrNameInUse = errors.New("metric name already in use")
    34  
    35  	// ErrInitializationDone indicates that the caller tried to create a
    36  	// new metric after initialization.
    37  	ErrInitializationDone = errors.New("metric cannot be created after initialization is complete")
    38  
    39  	// WeirdnessMetric is a metric with fields created to track the number
    40  	// of weird occurrences such as time fallback, partial_result, vsyscall
    41  	// count, watchdog startup timeouts and stuck tasks.
    42  	WeirdnessMetric = MustCreateNewUint64Metric("/weirdness", true /* sync */, "Increment for weird occurrences of problems such as time fallback, partial result, vsyscalls invoked in the sandbox, watchdog startup timeouts and stuck tasks.",
    43  		Field{
    44  			name:          "weirdness_type",
    45  			allowedValues: []string{"time_fallback", "partial_result", "vsyscall_count", "watchdog_stuck_startup", "watchdog_stuck_tasks"},
    46  		})
    47  
    48  	// SuspiciousOperationsMetric is a metric with fields created to detect
    49  	// operations such as opening an executable file to write from a gofer.
    50  	SuspiciousOperationsMetric = MustCreateNewUint64Metric("/suspicious_operations", true /* sync */, "Increment for suspicious operations such as opening an executable file to write from a gofer.",
    51  		Field{
    52  			name:          "operation_type",
    53  			allowedValues: []string{"opened_write_execute_file"},
    54  		})
    55  )
    56  
    57  // Uint64Metric encapsulates a uint64 that represents some kind of metric to be
    58  // monitored. We currently support metrics with at most one field.
    59  //
    60  // Metrics are not saved across save/restore and thus reset to zero on restore.
    61  //
    62  // TODO(b/67298427): Support metric fields.
    63  type Uint64Metric struct {
    64  	// value is the actual value of the metric. It must be accessed atomically.
    65  	value uint64
    66  
    67  	// numFields is the number of metric fields. It is immutable once
    68  	// initialized.
    69  	numFields int
    70  
    71  	// mu protects the below fields.
    72  	mu sync.RWMutex `state:"nosave"`
    73  
    74  	// fields is the map of fields in the metric.
    75  	fields map[string]uint64
    76  }
    77  
    78  var (
    79  	// initialized indicates that all metrics are registered. allMetrics is
    80  	// immutable once initialized is true.
    81  	initialized bool
    82  
    83  	// allMetrics are the registered metrics.
    84  	allMetrics = makeMetricSet()
    85  )
    86  
    87  // Initialize sends a metric registration event over the event channel.
    88  //
    89  // Precondition:
    90  //  * All metrics are registered.
    91  //  * Initialize/Disable has not been called.
    92  func Initialize() error {
    93  	if initialized {
    94  		return errors.New("metric.Initialize called after metric.Initialize or metric.Disable")
    95  	}
    96  
    97  	m := pb.MetricRegistration{}
    98  	for _, v := range allMetrics.m {
    99  		m.Metrics = append(m.Metrics, v.metadata)
   100  	}
   101  	if err := eventchannel.Emit(&m); err != nil {
   102  		return fmt.Errorf("unable to emit metric initialize event: %w", err)
   103  	}
   104  
   105  	initialized = true
   106  	return nil
   107  }
   108  
   109  // Disable sends an empty metric registration event over the event channel,
   110  // disabling metric collection.
   111  //
   112  // Precondition:
   113  //  * All metrics are registered.
   114  //  * Initialize/Disable has not been called.
   115  func Disable() error {
   116  	if initialized {
   117  		return errors.New("metric.Disable called after metric.Initialize or metric.Disable")
   118  	}
   119  
   120  	m := pb.MetricRegistration{}
   121  	if err := eventchannel.Emit(&m); err != nil {
   122  		return fmt.Errorf("unable to emit metric disable event: %w", err)
   123  	}
   124  
   125  	initialized = true
   126  	return nil
   127  }
   128  
   129  type customUint64Metric struct {
   130  	// metadata describes the metric. It is immutable.
   131  	metadata *pb.MetricMetadata
   132  
   133  	// value returns the current value of the metric for the given set of
   134  	// fields. It takes a variadic number of field values as argument.
   135  	value func(fieldValues ...string) uint64
   136  }
   137  
   138  // Field contains the field name and allowed values for the metric which is
   139  // used in registration of the metric.
   140  type Field struct {
   141  	// name is the metric field name.
   142  	name string
   143  
   144  	// allowedValues is the list of allowed values for the field.
   145  	allowedValues []string
   146  }
   147  
   148  // RegisterCustomUint64Metric registers a metric with the given name.
   149  //
   150  // Register must only be called at init and will return and error if called
   151  // after Initialized.
   152  //
   153  // Preconditions:
   154  // * name must be globally unique.
   155  // * Initialize/Disable have not been called.
   156  // * value is expected to accept exactly len(fields) arguments.
   157  func RegisterCustomUint64Metric(name string, cumulative, sync bool, units pb.MetricMetadata_Units, description string, value func(...string) uint64, fields ...Field) error {
   158  	if initialized {
   159  		return ErrInitializationDone
   160  	}
   161  
   162  	if _, ok := allMetrics.m[name]; ok {
   163  		return ErrNameInUse
   164  	}
   165  
   166  	allMetrics.m[name] = customUint64Metric{
   167  		metadata: &pb.MetricMetadata{
   168  			Name:        name,
   169  			Description: description,
   170  			Cumulative:  cumulative,
   171  			Sync:        sync,
   172  			Type:        pb.MetricMetadata_TYPE_UINT64,
   173  			Units:       units,
   174  		},
   175  		value: value,
   176  	}
   177  
   178  	// Metrics can exist without fields.
   179  	if l := len(fields); l > 1 {
   180  		return fmt.Errorf("%d fields provided, must be <= 1", l)
   181  	}
   182  
   183  	for _, field := range fields {
   184  		allMetrics.m[name].metadata.Fields = append(allMetrics.m[name].metadata.Fields, &pb.MetricMetadata_Field{
   185  			FieldName:     field.name,
   186  			AllowedValues: field.allowedValues,
   187  		})
   188  	}
   189  	return nil
   190  }
   191  
   192  // MustRegisterCustomUint64Metric calls RegisterCustomUint64Metric for metrics
   193  // without fields and panics if it returns an error.
   194  func MustRegisterCustomUint64Metric(name string, cumulative, sync bool, description string, value func(...string) uint64, fields ...Field) {
   195  	if err := RegisterCustomUint64Metric(name, cumulative, sync, pb.MetricMetadata_UNITS_NONE, description, value, fields...); err != nil {
   196  		panic(fmt.Sprintf("Unable to register metric %q: %s", name, err))
   197  	}
   198  }
   199  
   200  // NewUint64Metric creates and registers a new cumulative metric with the given
   201  // name.
   202  //
   203  // Metrics must be statically defined (i.e., at init).
   204  func NewUint64Metric(name string, sync bool, units pb.MetricMetadata_Units, description string, fields ...Field) (*Uint64Metric, error) {
   205  	m := Uint64Metric{
   206  		numFields: len(fields),
   207  	}
   208  
   209  	if m.numFields == 1 {
   210  		m.fields = make(map[string]uint64)
   211  		for _, fieldValue := range fields[0].allowedValues {
   212  			m.fields[fieldValue] = 0
   213  		}
   214  	}
   215  	return &m, RegisterCustomUint64Metric(name, true /* cumulative */, sync, units, description, m.Value, fields...)
   216  }
   217  
   218  // MustCreateNewUint64Metric calls NewUint64Metric and panics if it returns an
   219  // error.
   220  func MustCreateNewUint64Metric(name string, sync bool, description string, fields ...Field) *Uint64Metric {
   221  	m, err := NewUint64Metric(name, sync, pb.MetricMetadata_UNITS_NONE, description, fields...)
   222  	if err != nil {
   223  		panic(fmt.Sprintf("Unable to create metric %q: %s", name, err))
   224  	}
   225  	return m
   226  }
   227  
   228  // MustCreateNewUint64NanosecondsMetric calls NewUint64Metric and panics if it
   229  // returns an error.
   230  func MustCreateNewUint64NanosecondsMetric(name string, sync bool, description string) *Uint64Metric {
   231  	m, err := NewUint64Metric(name, sync, pb.MetricMetadata_UNITS_NANOSECONDS, description)
   232  	if err != nil {
   233  		panic(fmt.Sprintf("Unable to create metric %q: %s", name, err))
   234  	}
   235  	return m
   236  }
   237  
   238  // Value returns the current value of the metric for the given set of fields.
   239  func (m *Uint64Metric) Value(fieldValues ...string) uint64 {
   240  	if m.numFields != len(fieldValues) {
   241  		panic(fmt.Sprintf("Number of fieldValues %d is not equal to the number of metric fields %d", len(fieldValues), m.numFields))
   242  	}
   243  
   244  	switch m.numFields {
   245  	case 0:
   246  		return atomic.LoadUint64(&m.value)
   247  	case 1:
   248  		m.mu.RLock()
   249  		defer m.mu.RUnlock()
   250  
   251  		fieldValue := fieldValues[0]
   252  		if _, ok := m.fields[fieldValue]; !ok {
   253  			panic(fmt.Sprintf("Metric does not allow to have field value %s", fieldValue))
   254  		}
   255  		return m.fields[fieldValue]
   256  	default:
   257  		panic("Sentry metrics do not support more than one field")
   258  	}
   259  }
   260  
   261  // Increment increments the metric field by 1.
   262  func (m *Uint64Metric) Increment(fieldValues ...string) {
   263  	m.IncrementBy(1, fieldValues...)
   264  }
   265  
   266  // IncrementBy increments the metric by v.
   267  func (m *Uint64Metric) IncrementBy(v uint64, fieldValues ...string) {
   268  	if m.numFields != len(fieldValues) {
   269  		panic(fmt.Sprintf("Number of fieldValues %d is not equal to the number of metric fields %d", len(fieldValues), m.numFields))
   270  	}
   271  
   272  	switch m.numFields {
   273  	case 0:
   274  		atomic.AddUint64(&m.value, v)
   275  		return
   276  	case 1:
   277  		fieldValue := fieldValues[0]
   278  		m.mu.Lock()
   279  		defer m.mu.Unlock()
   280  
   281  		if _, ok := m.fields[fieldValue]; !ok {
   282  			panic(fmt.Sprintf("Metric does not allow to have field value %s", fieldValue))
   283  		}
   284  		m.fields[fieldValue] += v
   285  	default:
   286  		panic("Sentry metrics do not support more than one field")
   287  	}
   288  }
   289  
   290  // metricSet holds named metrics.
   291  type metricSet struct {
   292  	m map[string]customUint64Metric
   293  }
   294  
   295  // makeMetricSet returns a new metricSet.
   296  func makeMetricSet() metricSet {
   297  	return metricSet{
   298  		m: make(map[string]customUint64Metric),
   299  	}
   300  }
   301  
   302  // Values returns a snapshot of all values in m.
   303  func (m *metricSet) Values() metricValues {
   304  	vals := make(metricValues)
   305  
   306  	for k, v := range m.m {
   307  		fields := v.metadata.GetFields()
   308  		switch len(fields) {
   309  		case 0:
   310  			vals[k] = v.value()
   311  		case 1:
   312  			values := fields[0].GetAllowedValues()
   313  			fieldsMap := make(map[string]uint64)
   314  			for _, fieldValue := range values {
   315  				fieldsMap[fieldValue] = v.value(fieldValue)
   316  			}
   317  			vals[k] = fieldsMap
   318  		default:
   319  			panic(fmt.Sprintf("Unsupported number of metric fields: %d", len(fields)))
   320  		}
   321  	}
   322  	return vals
   323  }
   324  
   325  // metricValues contains a copy of the values of all metrics. It is a map
   326  // with key as metric name and value can be either uint64 or map[string]uint64
   327  // to support metrics with one field.
   328  type metricValues map[string]interface{}
   329  
   330  var (
   331  	// emitMu protects metricsAtLastEmit and ensures that all emitted
   332  	// metrics are strongly ordered (older metrics are never emitted after
   333  	// newer metrics).
   334  	emitMu sync.Mutex
   335  
   336  	// metricsAtLastEmit contains the state of the metrics at the last emit event.
   337  	metricsAtLastEmit metricValues
   338  )
   339  
   340  // EmitMetricUpdate emits a MetricUpdate over the event channel.
   341  //
   342  // Only metrics that have changed since the last call are emitted.
   343  //
   344  // EmitMetricUpdate is thread-safe.
   345  //
   346  // Preconditions:
   347  // * Initialize has been called.
   348  func EmitMetricUpdate() {
   349  	emitMu.Lock()
   350  	defer emitMu.Unlock()
   351  
   352  	snapshot := allMetrics.Values()
   353  
   354  	m := pb.MetricUpdate{}
   355  	// On the first call metricsAtLastEmit will be empty. Include all
   356  	// metrics then.
   357  	for k, v := range snapshot {
   358  		prev, ok := metricsAtLastEmit[k]
   359  		switch t := v.(type) {
   360  		case uint64:
   361  			// Metric exists and value did not change.
   362  			if ok && prev.(uint64) == t {
   363  				continue
   364  			}
   365  
   366  			m.Metrics = append(m.Metrics, &pb.MetricValue{
   367  				Name:  k,
   368  				Value: &pb.MetricValue_Uint64Value{Uint64Value: t},
   369  			})
   370  		case map[string]uint64:
   371  			for fieldValue, metricValue := range t {
   372  				// Emit data on the first call only if the field
   373  				// value has been incremented. For all other
   374  				// calls, emit data if the field value has been
   375  				// changed from the previous emit.
   376  				if (!ok && metricValue == 0) || (ok && prev.(map[string]uint64)[fieldValue] == metricValue) {
   377  					continue
   378  				}
   379  
   380  				m.Metrics = append(m.Metrics, &pb.MetricValue{
   381  					Name:        k,
   382  					FieldValues: []string{fieldValue},
   383  					Value:       &pb.MetricValue_Uint64Value{Uint64Value: metricValue},
   384  				})
   385  			}
   386  		}
   387  	}
   388  
   389  	metricsAtLastEmit = snapshot
   390  	if len(m.Metrics) == 0 {
   391  		return
   392  	}
   393  
   394  	if log.IsLogging(log.Debug) {
   395  		sort.Slice(m.Metrics, func(i, j int) bool {
   396  			return m.Metrics[i].Name < m.Metrics[j].Name
   397  		})
   398  		log.Debugf("Emitting metrics:")
   399  		for _, metric := range m.Metrics {
   400  			log.Debugf("%s: %+v", metric.Name, metric.Value)
   401  		}
   402  	}
   403  
   404  	if err := eventchannel.Emit(&m); err != nil {
   405  		log.Warningf("Unable to emit metrics: %s", err)
   406  	}
   407  }