gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/prometheus/prometheus_verify.go (about)

     1  // Copyright 2022 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package prometheus
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"math"
    21  	"strings"
    22  	"sync"
    23  	"time"
    24  	"unicode"
    25  
    26  	pb "gvisor.dev/gvisor/pkg/metric/metric_go_proto"
    27  )
    28  
    29  const (
    30  	// maxExportStaleness is the maximum allowed age of a snapshot when it is verified.
    31  	// Used to avoid exporting snapshots from bogus times from ages past.
    32  	maxExportStaleness = 10 * time.Second
    33  
    34  	// MetaMetricPrefix is a prefix used for metrics defined by the metric server,
    35  	// as opposed to metrics generated by each sandbox.
    36  	// For this reason, this prefix is not allowed to be used in sandbox metrics.
    37  	MetaMetricPrefix = "meta_"
    38  )
    39  
    40  // Prometheus process-level metric names and definitions.
    41  // These are not necessarily exported, but we enforce that sandboxes may not
    42  // export metrics sharing the same names.
    43  // https://prometheus.io/docs/instrumenting/writing_clientlibs/#process-metrics
    44  var (
    45  	ProcessCPUSecondsTotal = Metric{
    46  		Name: "process_cpu_seconds_total",
    47  		Type: TypeGauge,
    48  		Help: "Total user and system CPU time spent in seconds.",
    49  	}
    50  	ProcessOpenFDs = Metric{
    51  		Name: "process_open_fds",
    52  		Type: TypeGauge,
    53  		Help: "Number of open file descriptors.",
    54  	}
    55  	ProcessMaxFDs = Metric{
    56  		Name: "process_max_fds",
    57  		Type: TypeGauge,
    58  		Help: "Maximum number of open file descriptors.",
    59  	}
    60  	ProcessVirtualMemoryBytes = Metric{
    61  		Name: "process_virtual_memory_bytes",
    62  		Type: TypeGauge,
    63  		Help: "Virtual memory size in bytes.",
    64  	}
    65  	ProcessVirtualMemoryMaxBytes = Metric{
    66  		Name: "process_virtual_memory_max_bytes",
    67  		Type: TypeGauge,
    68  		Help: "Maximum amount of virtual memory available in bytes.",
    69  	}
    70  	ProcessResidentMemoryBytes = Metric{
    71  		Name: "process_resident_memory_bytes",
    72  		Type: TypeGauge,
    73  		Help: "Resident memory size in bytes.",
    74  	}
    75  	ProcessHeapBytes = Metric{
    76  		Name: "process_heap_bytes",
    77  		Type: TypeGauge,
    78  		Help: "Process heap size in bytes.",
    79  	}
    80  	ProcessStartTimeSeconds = Metric{
    81  		Name: "process_start_time_seconds",
    82  		Type: TypeGauge,
    83  		Help: "Start time of the process since unix epoch in seconds.",
    84  	}
    85  	ProcessThreads = Metric{
    86  		Name: "process_threads",
    87  		Type: TypeGauge,
    88  		Help: "Number of OS threads in the process.",
    89  	}
    90  )
    91  
    92  // processMetrics is the set of process-level metrics.
    93  var processMetrics = [9]*Metric{
    94  	&ProcessCPUSecondsTotal,
    95  	&ProcessOpenFDs,
    96  	&ProcessMaxFDs,
    97  	&ProcessVirtualMemoryBytes,
    98  	&ProcessVirtualMemoryMaxBytes,
    99  	&ProcessResidentMemoryBytes,
   100  	&ProcessHeapBytes,
   101  	&ProcessStartTimeSeconds,
   102  	&ProcessThreads,
   103  }
   104  
   105  // internedStringMap allows for interning strings.
   106  type internedStringMap map[string]*string
   107  
   108  // Intern returns the interned version of the given string.
   109  // If it is not already interned in the map, this function interns it.
   110  func (m internedStringMap) Intern(s string) string {
   111  	if existing, found := m[s]; found {
   112  		return *existing
   113  	}
   114  	m[s] = &s
   115  	return s
   116  }
   117  
   118  // globalInternMap is a string intern map used for globally-relevant data that repeats across
   119  // verifiers, such as metric names and field names, but not field values or combinations of field
   120  // values.
   121  var (
   122  	globalInternMu  sync.Mutex
   123  	verifierCount   uint64
   124  	globalInternMap = make(internedStringMap)
   125  )
   126  
   127  // globalIntern returns the interned version of the given string.
   128  // If it is not already interned in the map, this function interns it.
   129  func globalIntern(s string) string {
   130  	globalInternMu.Lock()
   131  	defer globalInternMu.Unlock()
   132  	return globalInternMap.Intern(s)
   133  }
   134  
   135  func globalInternVerifierCreated() {
   136  	globalInternMu.Lock()
   137  	defer globalInternMu.Unlock()
   138  	verifierCount++
   139  }
   140  
   141  func globalInternVerifierReleased() {
   142  	globalInternMu.Lock()
   143  	defer globalInternMu.Unlock()
   144  	verifierCount--
   145  	if verifierCount <= 0 {
   146  		verifierCount = 0
   147  		// No more verifiers active, so release the global map to not keep consuming needless resources.
   148  		globalInternMap = make(internedStringMap)
   149  	}
   150  }
   151  
   152  // numberPacker holds packedNumber data. It is useful to store large amounts of Number structs in a
   153  // small memory footprint.
   154  type numberPacker struct {
   155  	// `data` *must* be pre-allocated if there is any number to be stored in it.
   156  	// Attempts to pack a number that cannot fit into the existing space
   157  	// allocated for this slice will cause a panic.
   158  	// Callers may use `needsIndirection` to determine whether a number needs
   159  	// space in this slice or not ahead of packing it.
   160  	data []uint64
   161  }
   162  
   163  // packedNumber is a non-serializable but smaller-memory-footprint container for a numerical value.
   164  // It can be unpacked out to a Number struct.
   165  // This contains 4 bytes where we try to pack as much as possible.
   166  // For the overhwelmingly-common case of integers that fit in 30 bits (i.e. 30 bits where the first
   167  // 2 bits are zero, we store them directly here. Otherwise, we store the offset of a 64-bit number
   168  // within numberPacker.
   169  // Layout, going from highest to lowest bit:
   170  // Bit 0 is the type: 0 for integer, 1 for float.
   171  // Bit 1 is 0 if the number's value is stored within the next 30 bits, or 1 if the next 30 bits
   172  // refer to an offset within numberPacker instead.
   173  // In the case of a float, the next two bits (bits 2 and 3) may be used to encode a special value:
   174  //   - 00 means not a special value
   175  //   - 01 means NaN
   176  //   - 10 means -infinity
   177  //   - 11 means +infinity
   178  //
   179  // When not using a special value, the 32-bit exponent must fit in 5 bits, and is encoded using a
   180  // bias of 2^4, meaning it ranges from -15 (encoded as 0b00000) to 16 (encoded as 0b11111), and an
   181  // exponent of 0 is encoded as 0b01111.
   182  // Floats that do not fit within this range must be encoded indirectly as float64s, similar to
   183  // integers that don't fit in 30 bits.
   184  type packedNumber uint32
   185  
   186  // Useful masks and other bit-twiddling stuff for packedNumber.
   187  const (
   188  	typeField                = uint32(1 << 31)
   189  	typeFieldInteger         = uint32(0)
   190  	typeFieldFloat           = uint32(typeField)
   191  	storageField             = uint32(1 << 30)
   192  	storageFieldDirect       = uint32(0)
   193  	storageFieldIndirect     = uint32(storageField)
   194  	valueField               = uint32(1<<30 - 1)
   195  	maxDirectUint            = uint64(valueField)
   196  	float32ExponentField     = uint32(0x7f800000)
   197  	float32ExponentShift     = uint32(23)
   198  	float32ExponentBias      = uint32(127)
   199  	float32FractionField     = uint32(0x7fffff)
   200  	packedFloatExponentField = uint32(0x0f800000)
   201  	packedFloatExponentBias  = uint32(15)
   202  	packedFloatNaN           = packedNumber(typeFieldFloat | storageFieldDirect | 0x10000000)
   203  	packedFloatNegInf        = packedNumber(typeFieldFloat | storageFieldDirect | 0x20000000)
   204  	packedFloatInf           = packedNumber(typeFieldFloat | storageFieldDirect | 0x30000000)
   205  )
   206  
   207  // needsPackerStorage returns 0 for numbers that can be
   208  // stored directly into the 32 bits of a packedNumber, or 1 for numbers that
   209  // need more bits and would need to be stored into a numberPacker's `data`
   210  // field.
   211  //
   212  //go:nosplit
   213  func needsPackerStorage(n *Number) uint64 {
   214  	if n.Float == 0.0 {
   215  		v := n.Int
   216  		if v >= 0 && v <= int64(valueField) {
   217  			return 0
   218  		}
   219  		return 1
   220  	}
   221  	// n is a float.
   222  	v := n.Float
   223  	if math.IsNaN(v) || v == math.Inf(-1) || v == math.Inf(1) {
   224  		return 0
   225  	}
   226  	if v >= 0.0 && float64(float32(v)) == v {
   227  		float32Bits := math.Float32bits(float32(v))
   228  		exponent := (float32Bits&float32ExponentField)>>float32ExponentShift - float32ExponentBias
   229  		packedExponent := (exponent + packedFloatExponentBias) << float32ExponentShift
   230  		if packedExponent&packedFloatExponentField == packedExponent {
   231  			return 0
   232  		}
   233  	}
   234  	return 1
   235  }
   236  
   237  // isIndirect returns 1 iff this packedNumber needs storage in a numberPacker.
   238  //
   239  //go:nosplit
   240  func (n packedNumber) isIndirect() uint64 {
   241  	if uint32(n)&storageField == storageFieldIndirect {
   242  		return 1
   243  	}
   244  	return 0
   245  }
   246  
   247  // errOutOfPackerMemory is emitted when the number cannot be packed into a numberPacker.
   248  var errOutOfPackerMemory = errors.New("out of numberPacker memory")
   249  
   250  // pack packs a Number into a packedNumber.
   251  //
   252  //go:nosplit
   253  func (p *numberPacker) pack(n *Number) packedNumber {
   254  	if n.Float == 0.0 {
   255  		v := n.Int
   256  		if v >= 0 && v <= int64(maxDirectUint) {
   257  			// We can store the integer value directly.
   258  			return packedNumber(typeFieldInteger | storageFieldDirect | uint32(v))
   259  		}
   260  		if len(p.data) == cap(p.data) {
   261  			panic(errOutOfPackerMemory)
   262  		}
   263  		p.data = append(p.data, uint64(v))
   264  		return packedNumber(typeFieldInteger | storageFieldIndirect | uint32(len(p.data)-1))
   265  	}
   266  	// n is a float.
   267  	v := n.Float
   268  	if math.IsNaN(v) {
   269  		return packedFloatNaN
   270  	}
   271  	if v == math.Inf(-1) {
   272  		return packedFloatNegInf
   273  	}
   274  	if v == math.Inf(1) {
   275  		return packedFloatInf
   276  	}
   277  	if v >= 0.0 && float64(float32(v)) == v {
   278  		float32Bits := math.Float32bits(float32(v))
   279  		exponent := (float32Bits&float32ExponentField)>>float32ExponentShift - float32ExponentBias
   280  		packedExponent := (exponent + packedFloatExponentBias) << float32ExponentShift
   281  		if packedExponent&packedFloatExponentField == packedExponent {
   282  			float32Fraction := float32Bits & float32FractionField
   283  			return packedNumber(typeFieldFloat | storageFieldDirect | packedExponent | float32Fraction)
   284  		}
   285  	}
   286  	if len(p.data) == cap(p.data) {
   287  		panic(errOutOfPackerMemory)
   288  	}
   289  	p.data = append(p.data, math.Float64bits(v))
   290  	return packedNumber(typeFieldFloat | storageFieldIndirect | uint32(len(p.data)-1))
   291  }
   292  
   293  // packInt packs an integer.
   294  //
   295  //go:nosplit
   296  func (p *numberPacker) packInt(val int64) packedNumber {
   297  	n := Number{Int: val}
   298  	return p.pack(&n)
   299  }
   300  
   301  // packFloat packs a floating-point number.
   302  //
   303  //go:nosplit
   304  func (p *numberPacker) packFloat(val float64) packedNumber {
   305  	n := Number{Float: val}
   306  	return p.pack(&n)
   307  }
   308  
   309  // unpack unpacks a packedNumber back into a Number.
   310  func (p *numberPacker) unpack(n packedNumber) *Number {
   311  	switch uint32(n) & typeField {
   312  	case typeFieldInteger:
   313  		switch uint32(n) & storageField {
   314  		case storageFieldDirect:
   315  			return NewInt(int64(uint32(n) & valueField))
   316  		case storageFieldIndirect:
   317  			return NewInt(int64(p.data[uint32(n)&valueField]))
   318  		}
   319  	case typeFieldFloat:
   320  		switch uint32(n) & storageField {
   321  		case storageFieldDirect:
   322  			switch n {
   323  			case packedFloatNaN:
   324  				return NewFloat(math.NaN())
   325  			case packedFloatNegInf:
   326  				return NewFloat(math.Inf(-1))
   327  			case packedFloatInf:
   328  				return NewFloat(math.Inf(1))
   329  			default:
   330  				exponent := ((uint32(n) & packedFloatExponentField) >> float32ExponentShift) - packedFloatExponentBias
   331  				float32Bits := ((exponent + float32ExponentBias) << float32ExponentShift) | (uint32(n) & float32FractionField)
   332  				return NewFloat(float64(math.Float32frombits(float32Bits)))
   333  			}
   334  		case storageFieldIndirect:
   335  			return NewFloat(math.Float64frombits(p.data[uint32(n)&valueField]))
   336  		}
   337  	}
   338  	panic("unreachable")
   339  }
   340  
   341  // mustUnpackInt unpacks an integer.
   342  // It panics if the packedNumber is not an integer.
   343  func (p *numberPacker) mustUnpackInt(n packedNumber) int64 {
   344  	num := p.unpack(n)
   345  	if !num.IsInteger() {
   346  		panic("not an integer")
   347  	}
   348  	return num.Int
   349  }
   350  
   351  // mustUnpackFloat unpacks a floating-point number.
   352  // It panics if the packedNumber is not an floating-point number.
   353  func (p *numberPacker) mustUnpackFloat(n packedNumber) float64 {
   354  	num := p.unpack(n)
   355  	if *num == zero {
   356  		return 0.0
   357  	}
   358  	if num.IsInteger() {
   359  		panic("not a float")
   360  	}
   361  	return num.Float
   362  }
   363  
   364  // portTo ports over a packedNumber from this numberPacker to a new one.
   365  // It is equivalent to `p.pack(other.unpack(n))` but avoids
   366  // allocations in the overwhelmingly-common case where the number is direct.
   367  func (p *numberPacker) portTo(other *numberPacker, n packedNumber) packedNumber {
   368  	if uint32(n)&storageField == storageFieldDirect {
   369  		// `n` is self-contained, just return as-is.
   370  		return n
   371  	}
   372  	if len(other.data) == cap(other.data) {
   373  		panic(errOutOfPackerMemory)
   374  	}
   375  	other.data = append(other.data, p.data[uint32(n)&valueField])
   376  	return packedNumber(uint32(n)&(typeField|storageField) | uint32(len(other.data)-1))
   377  }
   378  
   379  // distributionSnapshot contains the data for a single field combination of a
   380  // distribution ("histogram") metric.
   381  type distributionSnapshot struct {
   382  	// sum is the sum of all samples across all buckets.
   383  	sum packedNumber
   384  
   385  	// count is the number of samples across all buckets.
   386  	count packedNumber
   387  
   388  	// min is the lowest-recorded sample in the distribution.
   389  	// It is only meaningful when count >= 1.
   390  	min packedNumber
   391  
   392  	// max is the highest-recorded sample in the distribution.
   393  	// It is only meaningful when count >= 1.
   394  	max packedNumber
   395  
   396  	// ssd is the sum-of-squared-deviations computation of the distribution.
   397  	// If non-zero, it is always a floating-point number.
   398  	// It is only meaningful when count >= 2.
   399  	ssd packedNumber
   400  
   401  	// numSamples is the number of samples in each bucket.
   402  	numSamples []packedNumber
   403  }
   404  
   405  // verifiableMetric verifies a single metric within a Verifier.
   406  type verifiableMetric struct {
   407  	metadata              *pb.MetricMetadata
   408  	wantMetric            Metric
   409  	numFields             uint32
   410  	verifier              *Verifier
   411  	allowedFieldValues    map[string]map[string]struct{}
   412  	wantBucketUpperBounds []Number
   413  
   414  	// The following fields are used to verify that values are actually increasing monotonically.
   415  	// They are only read and modified when the parent Verifier.mu is held.
   416  	// They are mapped by their combination of field values.
   417  
   418  	// lastCounterValue is used for counter metrics.
   419  	lastCounterValue map[string]packedNumber
   420  
   421  	// lastDistributionSnapshot is used for distribution ("histogram") metrics.
   422  	lastDistributionSnapshot map[string]*distributionSnapshot
   423  }
   424  
   425  // newVerifiableMetric creates a new verifiableMetric that can verify the
   426  // values of a metric with the given metadata.
   427  func newVerifiableMetric(metadata *pb.MetricMetadata, verifier *Verifier) (*verifiableMetric, error) {
   428  	promName := metadata.GetPrometheusName()
   429  	if metadata.GetName() == "" || promName == "" {
   430  		return nil, errors.New("metric has no name")
   431  	}
   432  	for _, processMetric := range processMetrics {
   433  		if promName == processMetric.Name {
   434  			return nil, fmt.Errorf("metric name %q is reserved by Prometheus for process-level metrics", promName)
   435  		}
   436  	}
   437  	if strings.HasPrefix(promName, MetaMetricPrefix) {
   438  		return nil, fmt.Errorf("metric name %q starts with %q which is a reserved prefix", promName, "meta_")
   439  	}
   440  	if !unicode.IsLower(rune(promName[0])) {
   441  		return nil, fmt.Errorf("invalid initial character in prometheus metric name: %q", promName)
   442  	}
   443  	for _, r := range promName {
   444  		if !unicode.IsLower(r) && !unicode.IsDigit(r) && r != '_' {
   445  			return nil, fmt.Errorf("invalid character %c in prometheus metric name %q", r, promName)
   446  		}
   447  	}
   448  	numFields := uint32(len(metadata.GetFields()))
   449  	var allowedFieldValues map[string]map[string]struct{}
   450  	if numFields > 0 {
   451  		seenFields := make(map[string]struct{}, numFields)
   452  		allowedFieldValues = make(map[string]map[string]struct{}, numFields)
   453  		for _, field := range metadata.GetFields() {
   454  			fieldName := field.GetFieldName()
   455  			if _, alreadyExists := seenFields[fieldName]; alreadyExists {
   456  				return nil, fmt.Errorf("field %s is defined twice", fieldName)
   457  			}
   458  			seenFields[fieldName] = struct{}{}
   459  			if len(field.GetAllowedValues()) == 0 {
   460  				return nil, fmt.Errorf("field %s has no allowed values", fieldName)
   461  			}
   462  			fieldValues := make(map[string]struct{}, len(field.GetAllowedValues()))
   463  			for _, value := range field.GetAllowedValues() {
   464  				if _, alreadyExists := fieldValues[value]; alreadyExists {
   465  					return nil, fmt.Errorf("field %s has duplicate allowed value %q", fieldName, value)
   466  				}
   467  				fieldValues[globalIntern(value)] = struct{}{}
   468  			}
   469  			allowedFieldValues[globalIntern(fieldName)] = fieldValues
   470  		}
   471  	}
   472  	v := &verifiableMetric{
   473  		metadata: metadata,
   474  		verifier: verifier,
   475  		wantMetric: Metric{
   476  			Name: globalIntern(promName),
   477  			Help: globalIntern(metadata.GetDescription()),
   478  		},
   479  		numFields:          numFields,
   480  		allowedFieldValues: allowedFieldValues,
   481  	}
   482  	numFieldCombinations := len(allowedFieldValues)
   483  	switch metadata.GetType() {
   484  	case pb.MetricMetadata_TYPE_UINT64:
   485  		v.wantMetric.Type = TypeGauge
   486  		if metadata.GetCumulative() {
   487  			v.wantMetric.Type = TypeCounter
   488  			v.lastCounterValue = make(map[string]packedNumber, numFieldCombinations)
   489  		}
   490  	case pb.MetricMetadata_TYPE_DISTRIBUTION:
   491  		v.wantMetric.Type = TypeHistogram
   492  		numBuckets := len(metadata.GetDistributionBucketLowerBounds()) + 1
   493  		if numBuckets <= 1 || numBuckets > 256 {
   494  			return nil, fmt.Errorf("unsupported number of buckets: %d", numBuckets)
   495  		}
   496  		v.wantBucketUpperBounds = make([]Number, numBuckets)
   497  		for i, boundary := range metadata.GetDistributionBucketLowerBounds() {
   498  			v.wantBucketUpperBounds[i] = Number{Int: boundary}
   499  		}
   500  		v.wantBucketUpperBounds[numBuckets-1] = Number{Float: math.Inf(1)}
   501  		v.lastDistributionSnapshot = make(map[string]*distributionSnapshot, numFieldCombinations)
   502  	default:
   503  		return nil, fmt.Errorf("invalid type: %v", metadata.GetType())
   504  	}
   505  	return v, nil
   506  }
   507  
   508  func (v *verifiableMetric) numFieldCombinations() int {
   509  	return len(v.allowedFieldValues)
   510  }
   511  
   512  // verify does read-only checks on `data`.
   513  // `metricFieldsSeen` is passed across calls to `verify`. It is used to track the set of metric
   514  // field values that have already been seen. `verify` should populate this.
   515  // `dataToFieldsSeen` is passed across calls to `verify` and other methods of `verifiableMetric`.
   516  // It is used to store the canonical representation of the field values seen for each *Data.
   517  //
   518  // Precondition: `Verifier.mu` is held.
   519  func (v *verifiableMetric) verify(data *Data, metricFieldsSeen map[string]struct{}, dataToFieldsSeen map[*Data]string) error {
   520  	if *data.Metric != v.wantMetric {
   521  		return fmt.Errorf("invalid metric definition: got %+v want %+v", data.Metric, v.wantMetric)
   522  	}
   523  
   524  	// Verify fields.
   525  	if uint32(len(data.Labels)) != v.numFields {
   526  		return fmt.Errorf("invalid number of fields: got %d want %d", len(data.Labels), v.numFields)
   527  	}
   528  	var fieldValues strings.Builder
   529  	firstField := true
   530  	for _, field := range v.metadata.GetFields() {
   531  		fieldName := field.GetFieldName()
   532  		value, found := data.Labels[fieldName]
   533  		if !found {
   534  			return fmt.Errorf("did not specify field %q", fieldName)
   535  		}
   536  		if _, allowed := v.allowedFieldValues[fieldName][value]; !allowed {
   537  			return fmt.Errorf("value %q is not allowed for field %s", value, fieldName)
   538  		}
   539  		if !firstField {
   540  			fieldValues.WriteRune(',')
   541  		}
   542  		fieldValues.WriteString(value)
   543  		firstField = false
   544  	}
   545  	fieldValuesStr := fieldValues.String()
   546  	if _, alreadySeen := metricFieldsSeen[fieldValuesStr]; alreadySeen {
   547  		return fmt.Errorf("combination of field values %q was already seen", fieldValuesStr)
   548  	}
   549  
   550  	// Verify value.
   551  	gotNumber := data.Number != nil
   552  	gotHistogram := data.HistogramValue != nil
   553  	numSpecified := 0
   554  	if gotNumber {
   555  		numSpecified++
   556  	}
   557  	if gotHistogram {
   558  		numSpecified++
   559  	}
   560  	if numSpecified != 1 {
   561  		return fmt.Errorf("invalid number of value fields specified: %d", numSpecified)
   562  	}
   563  	switch v.metadata.GetType() {
   564  	case pb.MetricMetadata_TYPE_UINT64:
   565  		if !gotNumber {
   566  			return errors.New("expected number value for gauge or counter")
   567  		}
   568  		if !data.Number.IsInteger() {
   569  			return fmt.Errorf("integer metric got non-integer value: %v", data.Number)
   570  		}
   571  	case pb.MetricMetadata_TYPE_DISTRIBUTION:
   572  		if !gotHistogram {
   573  			return errors.New("expected histogram value for histogram")
   574  		}
   575  		if len(data.HistogramValue.Buckets) != len(v.wantBucketUpperBounds) {
   576  			return fmt.Errorf("invalid number of buckets: got %d want %d", len(data.HistogramValue.Buckets), len(v.wantBucketUpperBounds))
   577  		}
   578  		if data.HistogramValue.SumOfSquaredDeviations.IsInteger() && data.HistogramValue.SumOfSquaredDeviations.Int != 0 {
   579  			return fmt.Errorf("sum of squared deviations must be a floating-point value, got %v", data.HistogramValue.SumOfSquaredDeviations)
   580  		}
   581  		for i, b := range data.HistogramValue.Buckets {
   582  			if want := v.wantBucketUpperBounds[i]; b.UpperBound != want {
   583  				return fmt.Errorf("invalid upper bound for bucket %d (0-based): got %v want %v", i, b.UpperBound, want)
   584  			}
   585  		}
   586  	default:
   587  		return fmt.Errorf("invalid metric type: %v", v.wantMetric.Type)
   588  	}
   589  
   590  	// All passed. Update the maps that are shared across calls.
   591  	fieldValuesStr = v.verifier.internMap.Intern(fieldValuesStr)
   592  	dataToFieldsSeen[data] = fieldValuesStr
   593  	metricFieldsSeen[fieldValuesStr] = struct{}{}
   594  	return nil
   595  }
   596  
   597  // verifyIncrement verifies that incremental metrics are monotonically increasing.
   598  //
   599  // Preconditions: `verify` has succeeded on the given `data`, and `Verifier.mu` is held.
   600  func (v *verifiableMetric) verifyIncrement(data *Data, fieldValues string, packer *numberPacker) error {
   601  	switch v.wantMetric.Type {
   602  	case TypeCounter:
   603  		last := packer.unpack(v.lastCounterValue[v.verifier.internMap.Intern(fieldValues)])
   604  		if !last.SameType(data.Number) {
   605  			return fmt.Errorf("counter number type changed: %v vs %v", last, data.Number)
   606  		}
   607  		if last.GreaterThan(data.Number) {
   608  			return fmt.Errorf("counter value decreased from %v to %v", last, data.Number)
   609  		}
   610  	case TypeHistogram:
   611  		lastDistributionSnapshot := v.lastDistributionSnapshot[v.verifier.internMap.Intern(fieldValues)]
   612  		if lastDistributionSnapshot == nil {
   613  			lastDistributionSnapshot = &distributionSnapshot{
   614  				numSamples: make([]packedNumber, len(v.wantBucketUpperBounds)),
   615  			}
   616  			v.lastDistributionSnapshot[v.verifier.internMap.Intern(fieldValues)] = lastDistributionSnapshot
   617  		}
   618  		lastCount := packer.mustUnpackInt(lastDistributionSnapshot.count)
   619  		if lastCount >= 1 {
   620  			lastMin := packer.unpack(lastDistributionSnapshot.min)
   621  			if !lastMin.SameType(&data.HistogramValue.Min) {
   622  				return fmt.Errorf("minimum value type changed: %v vs %v", lastMin, data.HistogramValue.Min)
   623  			}
   624  			if data.HistogramValue.Min.GreaterThan(lastMin) {
   625  				return fmt.Errorf("minimum value strictly increased: from %v to %v", lastMin, data.HistogramValue.Min)
   626  			}
   627  			lastMax := packer.unpack(lastDistributionSnapshot.max)
   628  			if !lastMax.SameType(&data.HistogramValue.Max) {
   629  				return fmt.Errorf("maximum value type changed: %v vs %v", lastMax, data.HistogramValue.Max)
   630  			}
   631  			if lastMax.GreaterThan(&data.HistogramValue.Max) {
   632  				return fmt.Errorf("maximum value strictly decreased: from %v to %v", lastMax, data.HistogramValue.Max)
   633  			}
   634  		}
   635  		if lastCount >= 2 {
   636  			// We already verified that the new data is a floating-point number
   637  			// earlier, no need to double-check here.
   638  			lastSSD := packer.mustUnpackFloat(lastDistributionSnapshot.ssd)
   639  			if data.HistogramValue.SumOfSquaredDeviations.Float < lastSSD {
   640  				return fmt.Errorf("sum of squared deviations decreased from %v to %v", lastSSD, data.HistogramValue.SumOfSquaredDeviations.Float)
   641  			}
   642  		}
   643  		numSamples := lastDistributionSnapshot.numSamples
   644  		for i, b := range data.HistogramValue.Buckets {
   645  			if uint64(packer.mustUnpackInt(numSamples[i])) > b.Samples {
   646  				return fmt.Errorf("number of samples in bucket %d (0-based) decreased from %d to %d", i, packer.mustUnpackInt(numSamples[i]), b.Samples)
   647  			}
   648  		}
   649  	}
   650  	return nil
   651  }
   652  
   653  // packerCapacityNeeded returns the `numberPacker` capacity to store `Data`.
   654  func (v *verifiableMetric) packerCapacityNeededForData(data *Data, fieldValues string) uint64 {
   655  	switch v.wantMetric.Type {
   656  	case TypeCounter:
   657  		return needsPackerStorage(data.Number)
   658  	case TypeHistogram:
   659  		var toPack uint64
   660  		var totalSamples uint64
   661  		var buf Number
   662  		for _, b := range data.HistogramValue.Buckets {
   663  			buf = Number{Int: int64(b.Samples)}
   664  			toPack += needsPackerStorage(&buf)
   665  			totalSamples += b.Samples
   666  		}
   667  		toPack += needsPackerStorage(&data.HistogramValue.Total)
   668  		toPack += needsPackerStorage(&data.HistogramValue.Min)
   669  		toPack += needsPackerStorage(&data.HistogramValue.Max)
   670  		toPack += needsPackerStorage(&data.HistogramValue.SumOfSquaredDeviations)
   671  		buf = Number{Int: int64(totalSamples)}
   672  		toPack += needsPackerStorage(&buf)
   673  		return toPack
   674  	default:
   675  		return 0
   676  	}
   677  }
   678  
   679  // packerCapacityNeededForLast returns the `numberPacker` capacity needed to
   680  // store the last snapshot's data that was not seen in the current snapshot
   681  // (aka not in metricFieldsSeen).
   682  func (v *verifiableMetric) packerCapacityNeededForLast(metricFieldsSeen map[string]struct{}) uint64 {
   683  	var capacity uint64
   684  	switch v.wantMetric.Type {
   685  	case TypeCounter:
   686  		for fieldValues, lastCounterValue := range v.lastCounterValue {
   687  			if _, found := metricFieldsSeen[fieldValues]; found {
   688  				continue
   689  			}
   690  			capacity += lastCounterValue.isIndirect()
   691  		}
   692  	case TypeHistogram:
   693  		for fieldValues, distributionSnapshot := range v.lastDistributionSnapshot {
   694  			if _, found := metricFieldsSeen[fieldValues]; found {
   695  				continue
   696  			}
   697  			for _, b := range distributionSnapshot.numSamples {
   698  				capacity += b.isIndirect()
   699  			}
   700  			capacity += distributionSnapshot.sum.isIndirect()
   701  			capacity += distributionSnapshot.count.isIndirect()
   702  			capacity += distributionSnapshot.min.isIndirect()
   703  			capacity += distributionSnapshot.max.isIndirect()
   704  			capacity += distributionSnapshot.ssd.isIndirect()
   705  		}
   706  	}
   707  	return capacity
   708  }
   709  
   710  // update updates incremental metrics' "last seen" data.
   711  //
   712  // Preconditions: `verifyIncrement` has succeeded on the given `data`, `Verifier.mu` is held,
   713  // and `packer` is guaranteed to have enough room to store all numbers.
   714  func (v *verifiableMetric) update(data *Data, fieldValues string, packer *numberPacker) {
   715  	switch v.wantMetric.Type {
   716  	case TypeCounter:
   717  		v.lastCounterValue[v.verifier.internMap.Intern(fieldValues)] = packer.pack(data.Number)
   718  	case TypeHistogram:
   719  		lastDistributionSnapshot := v.lastDistributionSnapshot[v.verifier.internMap.Intern(fieldValues)]
   720  		lastBucketSamples := lastDistributionSnapshot.numSamples
   721  		var count uint64
   722  		for i, b := range data.HistogramValue.Buckets {
   723  			lastBucketSamples[i] = packer.packInt(int64(b.Samples))
   724  			count += b.Samples
   725  		}
   726  		lastDistributionSnapshot.sum = packer.pack(&data.HistogramValue.Total)
   727  		lastDistributionSnapshot.count = packer.packInt(int64(count))
   728  		lastDistributionSnapshot.min = packer.pack(&data.HistogramValue.Min)
   729  		lastDistributionSnapshot.max = packer.pack(&data.HistogramValue.Max)
   730  		lastDistributionSnapshot.ssd = packer.pack(&data.HistogramValue.SumOfSquaredDeviations)
   731  	}
   732  }
   733  
   734  // repackUnseen packs all numbers that must be carried over from snapshot to snapshot and which were
   735  // not seen in the latest snapshot's data.
   736  // This function should carry over all numbers typically packed in `v.update` but for all metric
   737  // field combinations that are not in `metricFieldsSeen`.
   738  //
   739  // Preconditions: `verifyIncrement` has succeeded on the given `data`,
   740  // and `newPacker` is guaranteed to have enough room to store all numbers.
   741  func (v *verifiableMetric) repackUnseen(metricFieldsSeen map[string]struct{}, oldPacker, newPacker *numberPacker) {
   742  	switch v.wantMetric.Type {
   743  	case TypeCounter:
   744  		for fieldValues, lastCounterValue := range v.lastCounterValue {
   745  			if _, found := metricFieldsSeen[fieldValues]; found {
   746  				continue
   747  			}
   748  			v.lastCounterValue[fieldValues] = oldPacker.portTo(newPacker, lastCounterValue)
   749  		}
   750  	case TypeHistogram:
   751  		for fieldValues, lastDistributionSnapshot := range v.lastDistributionSnapshot {
   752  			if _, found := metricFieldsSeen[fieldValues]; found {
   753  				continue
   754  			}
   755  			lastBucketSamples := lastDistributionSnapshot.numSamples
   756  			for i, b := range lastBucketSamples {
   757  				lastBucketSamples[i] = oldPacker.portTo(newPacker, b)
   758  			}
   759  			lastDistributionSnapshot.sum = oldPacker.portTo(newPacker, lastDistributionSnapshot.sum)
   760  			lastDistributionSnapshot.count = oldPacker.portTo(newPacker, lastDistributionSnapshot.count)
   761  			lastDistributionSnapshot.min = oldPacker.portTo(newPacker, lastDistributionSnapshot.min)
   762  			lastDistributionSnapshot.max = oldPacker.portTo(newPacker, lastDistributionSnapshot.max)
   763  			lastDistributionSnapshot.ssd = oldPacker.portTo(newPacker, lastDistributionSnapshot.ssd)
   764  		}
   765  	}
   766  }
   767  
   768  // Verifier allows verifying metric snapshot against metric registration data.
   769  // The aim is to prevent a compromised Sentry from emitting bogus data or DoS'ing metric ingestion.
   770  // A single Verifier should be used per sandbox. It is expected to be reused across exports such
   771  // that it can enforce the export snapshot timestamp is strictly monotonically increasing.
   772  type Verifier struct {
   773  	knownMetrics map[string]*verifiableMetric
   774  
   775  	// mu protects the fields below.
   776  	mu sync.Mutex
   777  
   778  	// internMap is used to intern strings relevant to this verifier only.
   779  	// Globally-relevant strings should be interned in globalInternMap.
   780  	internMap internedStringMap
   781  
   782  	// lastPacker is a reference to the numberPacker used to pack numbers in the last successful
   783  	// verification round.
   784  	lastPacker *numberPacker
   785  
   786  	// lastTimestamp is the snapshot timestamp of the last successfully-verified snapshot.
   787  	lastTimestamp time.Time
   788  }
   789  
   790  // NewVerifier returns a new metric verifier that can verify the integrity of snapshots against
   791  // the given metric registration data.
   792  // It returns a cleanup function that must be called when the Verifier is no longer needed.
   793  func NewVerifier(registration *pb.MetricRegistration) (*Verifier, func(), error) {
   794  	globalInternVerifierCreated()
   795  	verifier := &Verifier{
   796  		knownMetrics: make(map[string]*verifiableMetric),
   797  		internMap:    make(internedStringMap),
   798  	}
   799  	for _, metric := range registration.GetMetrics() {
   800  		metricName := metric.GetPrometheusName()
   801  		if _, alreadyExists := verifier.knownMetrics[metricName]; alreadyExists {
   802  			globalInternVerifierReleased()
   803  			return nil, func() {}, fmt.Errorf("metric %q registered twice", metricName)
   804  		}
   805  		verifiableM, err := newVerifiableMetric(metric, verifier)
   806  		if err != nil {
   807  			globalInternVerifierReleased()
   808  			return nil, func() {}, fmt.Errorf("metric %q: %v", metricName, err)
   809  		}
   810  		verifier.knownMetrics[globalIntern(metricName)] = verifiableM
   811  	}
   812  	return verifier, globalInternVerifierReleased, nil
   813  }
   814  
   815  // Verify verifies the integrity of a snapshot against the metric registration data of the Verifier.
   816  // It assumes that it will be called on snapshots obtained chronologically over time.
   817  func (v *Verifier) Verify(snapshot *Snapshot) error {
   818  	var err error
   819  
   820  	// Basic timestamp checks.
   821  	now := timeNow()
   822  	if snapshot.When.After(now) {
   823  		return errors.New("snapshot is from the future")
   824  	}
   825  	if snapshot.When.Before(now.Add(-maxExportStaleness)) {
   826  		return fmt.Errorf("snapshot is too old; it is from %v, expected at least %v (%v from now)", snapshot.When, now.Add(-maxExportStaleness), maxExportStaleness)
   827  	}
   828  
   829  	// Start critical section.
   830  	v.mu.Lock()
   831  	defer v.mu.Unlock()
   832  
   833  	// Metrics checks.
   834  	fieldsSeen := make(map[string]map[string]struct{}, len(v.knownMetrics))
   835  	dataToFieldsSeen := make(map[*Data]string, len(snapshot.Data))
   836  	for _, data := range snapshot.Data {
   837  		metricName := data.Metric.Name
   838  		verifiableM, found := v.knownMetrics[metricName]
   839  		if !found {
   840  			return fmt.Errorf("snapshot contains unknown metric %q", metricName)
   841  		}
   842  		metricName = globalIntern(metricName)
   843  		metricFieldsSeen, found := fieldsSeen[metricName]
   844  		if !found {
   845  			metricFieldsSeen = make(map[string]struct{}, verifiableM.numFieldCombinations())
   846  			fieldsSeen[metricName] = metricFieldsSeen
   847  		}
   848  		if err = verifiableM.verify(data, metricFieldsSeen, dataToFieldsSeen); err != nil {
   849  			return fmt.Errorf("metric %q: %v", metricName, err)
   850  		}
   851  	}
   852  
   853  	if v.lastTimestamp.After(snapshot.When) {
   854  		return fmt.Errorf("consecutive snapshots are not chronologically ordered: last verified snapshot was exported at %v, this one is from %v", v.lastTimestamp, snapshot.When)
   855  	}
   856  
   857  	for _, data := range snapshot.Data {
   858  		if err := v.knownMetrics[data.Metric.Name].verifyIncrement(data, dataToFieldsSeen[data], v.lastPacker); err != nil {
   859  			return fmt.Errorf("metric %q: %v", data.Metric.Name, err)
   860  		}
   861  	}
   862  	var neededPackerCapacity uint64
   863  	for _, data := range snapshot.Data {
   864  		neededPackerCapacity += v.knownMetrics[data.Metric.Name].packerCapacityNeededForData(data, dataToFieldsSeen[data])
   865  	}
   866  	for name, metric := range v.knownMetrics {
   867  		neededPackerCapacity += metric.packerCapacityNeededForLast(fieldsSeen[name])
   868  	}
   869  	if neededPackerCapacity > uint64(valueField) {
   870  		return fmt.Errorf("snapshot contains too many large numbers to fit into packer memory (%d numbers needing indirection)", neededPackerCapacity)
   871  	}
   872  
   873  	// All checks succeeded, update last-seen data.
   874  	// We need to be guaranteed to not fail past this point in the function.
   875  	newPacker := &numberPacker{}
   876  	if neededPackerCapacity != 0 {
   877  		newPacker.data = make([]uint64, 0, neededPackerCapacity)
   878  	}
   879  	v.lastTimestamp = snapshot.When
   880  	for _, data := range snapshot.Data {
   881  		v.knownMetrics[globalIntern(data.Metric.Name)].update(data, v.internMap.Intern(dataToFieldsSeen[data]), newPacker)
   882  	}
   883  	if uint64(len(newPacker.data)) != neededPackerCapacity {
   884  		for name, metric := range v.knownMetrics {
   885  			metric.repackUnseen(fieldsSeen[name], v.lastPacker, newPacker)
   886  		}
   887  	}
   888  	if uint64(len(newPacker.data)) != neededPackerCapacity {
   889  		// We panic here because this represents an internal logic error,
   890  		// not something the user did wrong.
   891  		panic(fmt.Sprintf("did not pack the expected number of numbers in numberPacker: packed %d, expected %d; this indicates a logic error in verifyIncrement", len(newPacker.data), neededPackerCapacity))
   892  	}
   893  	v.lastPacker = newPacker
   894  	return nil
   895  }
   896  
   897  // AllMetrics returns the metadata of all the metrics that were declared as
   898  // part of this Verifier.
   899  func (v *Verifier) AllMetrics() []*pb.MetricMetadata {
   900  	metrics := make([]*pb.MetricMetadata, 0, len(v.knownMetrics))
   901  	for _, m := range v.knownMetrics {
   902  		metrics = append(metrics, m.metadata)
   903  	}
   904  	return metrics
   905  }