github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/prometheus/prometheus_verify.go (about) 1 // Copyright 2022 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package prometheus 16 17 import ( 18 "errors" 19 "fmt" 20 "math" 21 "strings" 22 "sync" 23 "time" 24 "unicode" 25 26 pb "github.com/metacubex/gvisor/pkg/metric/metric_go_proto" 27 ) 28 29 const ( 30 // maxExportStaleness is the maximum allowed age of a snapshot when it is verified. 31 // Used to avoid exporting snapshots from bogus times from ages past. 32 maxExportStaleness = 10 * time.Second 33 34 // MetaMetricPrefix is a prefix used for metrics defined by the metric server, 35 // as opposed to metrics generated by each sandbox. 36 // For this reason, this prefix is not allowed to be used in sandbox metrics. 37 MetaMetricPrefix = "meta_" 38 ) 39 40 // Prometheus process-level metric names and definitions. 41 // These are not necessarily exported, but we enforce that sandboxes may not 42 // export metrics sharing the same names. 43 // https://prometheus.io/docs/instrumenting/writing_clientlibs/#process-metrics 44 var ( 45 ProcessCPUSecondsTotal = Metric{ 46 Name: "process_cpu_seconds_total", 47 Type: TypeGauge, 48 Help: "Total user and system CPU time spent in seconds.", 49 } 50 ProcessOpenFDs = Metric{ 51 Name: "process_open_fds", 52 Type: TypeGauge, 53 Help: "Number of open file descriptors.", 54 } 55 ProcessMaxFDs = Metric{ 56 Name: "process_max_fds", 57 Type: TypeGauge, 58 Help: "Maximum number of open file descriptors.", 59 } 60 ProcessVirtualMemoryBytes = Metric{ 61 Name: "process_virtual_memory_bytes", 62 Type: TypeGauge, 63 Help: "Virtual memory size in bytes.", 64 } 65 ProcessVirtualMemoryMaxBytes = Metric{ 66 Name: "process_virtual_memory_max_bytes", 67 Type: TypeGauge, 68 Help: "Maximum amount of virtual memory available in bytes.", 69 } 70 ProcessResidentMemoryBytes = Metric{ 71 Name: "process_resident_memory_bytes", 72 Type: TypeGauge, 73 Help: "Resident memory size in bytes.", 74 } 75 ProcessHeapBytes = Metric{ 76 Name: "process_heap_bytes", 77 Type: TypeGauge, 78 Help: "Process heap size in bytes.", 79 } 80 ProcessStartTimeSeconds = Metric{ 81 Name: "process_start_time_seconds", 82 Type: TypeGauge, 83 Help: "Start time of the process since unix epoch in seconds.", 84 } 85 ProcessThreads = Metric{ 86 Name: "process_threads", 87 Type: TypeGauge, 88 Help: "Number of OS threads in the process.", 89 } 90 ) 91 92 // processMetrics is the set of process-level metrics. 93 var processMetrics = [9]*Metric{ 94 &ProcessCPUSecondsTotal, 95 &ProcessOpenFDs, 96 &ProcessMaxFDs, 97 &ProcessVirtualMemoryBytes, 98 &ProcessVirtualMemoryMaxBytes, 99 &ProcessResidentMemoryBytes, 100 &ProcessHeapBytes, 101 &ProcessStartTimeSeconds, 102 &ProcessThreads, 103 } 104 105 // internedStringMap allows for interning strings. 106 type internedStringMap map[string]*string 107 108 // Intern returns the interned version of the given string. 109 // If it is not already interned in the map, this function interns it. 110 func (m internedStringMap) Intern(s string) string { 111 if existing, found := m[s]; found { 112 return *existing 113 } 114 m[s] = &s 115 return s 116 } 117 118 // globalInternMap is a string intern map used for globally-relevant data that repeats across 119 // verifiers, such as metric names and field names, but not field values or combinations of field 120 // values. 121 var ( 122 globalInternMu sync.Mutex 123 verifierCount uint64 124 globalInternMap = make(internedStringMap) 125 ) 126 127 // globalIntern returns the interned version of the given string. 128 // If it is not already interned in the map, this function interns it. 129 func globalIntern(s string) string { 130 globalInternMu.Lock() 131 defer globalInternMu.Unlock() 132 return globalInternMap.Intern(s) 133 } 134 135 func globalInternVerifierCreated() { 136 globalInternMu.Lock() 137 defer globalInternMu.Unlock() 138 verifierCount++ 139 } 140 141 func globalInternVerifierReleased() { 142 globalInternMu.Lock() 143 defer globalInternMu.Unlock() 144 verifierCount-- 145 if verifierCount <= 0 { 146 verifierCount = 0 147 // No more verifiers active, so release the global map to not keep consuming needless resources. 148 globalInternMap = make(internedStringMap) 149 } 150 } 151 152 // numberPacker holds packedNumber data. It is useful to store large amounts of Number structs in a 153 // small memory footprint. 154 type numberPacker struct { 155 // `data` *must* be pre-allocated if there is any number to be stored in it. 156 // Attempts to pack a number that cannot fit into the existing space 157 // allocated for this slice will cause a panic. 158 // Callers may use `needsIndirection` to determine whether a number needs 159 // space in this slice or not ahead of packing it. 160 data []uint64 161 } 162 163 // packedNumber is a non-serializable but smaller-memory-footprint container for a numerical value. 164 // It can be unpacked out to a Number struct. 165 // This contains 4 bytes where we try to pack as much as possible. 166 // For the overhwelmingly-common case of integers that fit in 30 bits (i.e. 30 bits where the first 167 // 2 bits are zero, we store them directly here. Otherwise, we store the offset of a 64-bit number 168 // within numberPacker. 169 // Layout, going from highest to lowest bit: 170 // Bit 0 is the type: 0 for integer, 1 for float. 171 // Bit 1 is 0 if the number's value is stored within the next 30 bits, or 1 if the next 30 bits 172 // refer to an offset within numberPacker instead. 173 // In the case of a float, the next two bits (bits 2 and 3) may be used to encode a special value: 174 // - 00 means not a special value 175 // - 01 means NaN 176 // - 10 means -infinity 177 // - 11 means +infinity 178 // 179 // When not using a special value, the 32-bit exponent must fit in 5 bits, and is encoded using a 180 // bias of 2^4, meaning it ranges from -15 (encoded as 0b00000) to 16 (encoded as 0b11111), and an 181 // exponent of 0 is encoded as 0b01111. 182 // Floats that do not fit within this range must be encoded indirectly as float64s, similar to 183 // integers that don't fit in 30 bits. 184 type packedNumber uint32 185 186 // Useful masks and other bit-twiddling stuff for packedNumber. 187 const ( 188 typeField = uint32(1 << 31) 189 typeFieldInteger = uint32(0) 190 typeFieldFloat = uint32(typeField) 191 storageField = uint32(1 << 30) 192 storageFieldDirect = uint32(0) 193 storageFieldIndirect = uint32(storageField) 194 valueField = uint32(1<<30 - 1) 195 maxDirectUint = uint64(valueField) 196 float32ExponentField = uint32(0x7f800000) 197 float32ExponentShift = uint32(23) 198 float32ExponentBias = uint32(127) 199 float32FractionField = uint32(0x7fffff) 200 packedFloatExponentField = uint32(0x0f800000) 201 packedFloatExponentBias = uint32(15) 202 packedFloatNaN = packedNumber(typeFieldFloat | storageFieldDirect | 0x10000000) 203 packedFloatNegInf = packedNumber(typeFieldFloat | storageFieldDirect | 0x20000000) 204 packedFloatInf = packedNumber(typeFieldFloat | storageFieldDirect | 0x30000000) 205 ) 206 207 // needsPackerStorage returns 0 for numbers that can be 208 // stored directly into the 32 bits of a packedNumber, or 1 for numbers that 209 // need more bits and would need to be stored into a numberPacker's `data` 210 // field. 211 // 212 //go:nosplit 213 func needsPackerStorage(n *Number) uint64 { 214 if n.Float == 0.0 { 215 v := n.Int 216 if v >= 0 && v <= int64(valueField) { 217 return 0 218 } 219 return 1 220 } 221 // n is a float. 222 v := n.Float 223 if math.IsNaN(v) || v == math.Inf(-1) || v == math.Inf(1) { 224 return 0 225 } 226 if v >= 0.0 && float64(float32(v)) == v { 227 float32Bits := math.Float32bits(float32(v)) 228 exponent := (float32Bits&float32ExponentField)>>float32ExponentShift - float32ExponentBias 229 packedExponent := (exponent + packedFloatExponentBias) << float32ExponentShift 230 if packedExponent&packedFloatExponentField == packedExponent { 231 return 0 232 } 233 } 234 return 1 235 } 236 237 // isIndirect returns 1 iff this packedNumber needs storage in a numberPacker. 238 // 239 //go:nosplit 240 func (n packedNumber) isIndirect() uint64 { 241 if uint32(n)&storageField == storageFieldIndirect { 242 return 1 243 } 244 return 0 245 } 246 247 // errOutOfPackerMemory is emitted when the number cannot be packed into a numberPacker. 248 var errOutOfPackerMemory = errors.New("out of numberPacker memory") 249 250 // pack packs a Number into a packedNumber. 251 // 252 //go:nosplit 253 func (p *numberPacker) pack(n *Number) packedNumber { 254 if n.Float == 0.0 { 255 v := n.Int 256 if v >= 0 && v <= int64(maxDirectUint) { 257 // We can store the integer value directly. 258 return packedNumber(typeFieldInteger | storageFieldDirect | uint32(v)) 259 } 260 if len(p.data) == cap(p.data) { 261 panic(errOutOfPackerMemory) 262 } 263 p.data = append(p.data, uint64(v)) 264 return packedNumber(typeFieldInteger | storageFieldIndirect | uint32(len(p.data)-1)) 265 } 266 // n is a float. 267 v := n.Float 268 if math.IsNaN(v) { 269 return packedFloatNaN 270 } 271 if v == math.Inf(-1) { 272 return packedFloatNegInf 273 } 274 if v == math.Inf(1) { 275 return packedFloatInf 276 } 277 if v >= 0.0 && float64(float32(v)) == v { 278 float32Bits := math.Float32bits(float32(v)) 279 exponent := (float32Bits&float32ExponentField)>>float32ExponentShift - float32ExponentBias 280 packedExponent := (exponent + packedFloatExponentBias) << float32ExponentShift 281 if packedExponent&packedFloatExponentField == packedExponent { 282 float32Fraction := float32Bits & float32FractionField 283 return packedNumber(typeFieldFloat | storageFieldDirect | packedExponent | float32Fraction) 284 } 285 } 286 if len(p.data) == cap(p.data) { 287 panic(errOutOfPackerMemory) 288 } 289 p.data = append(p.data, math.Float64bits(v)) 290 return packedNumber(typeFieldFloat | storageFieldIndirect | uint32(len(p.data)-1)) 291 } 292 293 // packInt packs an integer. 294 // 295 //go:nosplit 296 func (p *numberPacker) packInt(val int64) packedNumber { 297 n := Number{Int: val} 298 return p.pack(&n) 299 } 300 301 // packFloat packs a floating-point number. 302 // 303 //go:nosplit 304 func (p *numberPacker) packFloat(val float64) packedNumber { 305 n := Number{Float: val} 306 return p.pack(&n) 307 } 308 309 // unpack unpacks a packedNumber back into a Number. 310 func (p *numberPacker) unpack(n packedNumber) *Number { 311 switch uint32(n) & typeField { 312 case typeFieldInteger: 313 switch uint32(n) & storageField { 314 case storageFieldDirect: 315 return NewInt(int64(uint32(n) & valueField)) 316 case storageFieldIndirect: 317 return NewInt(int64(p.data[uint32(n)&valueField])) 318 } 319 case typeFieldFloat: 320 switch uint32(n) & storageField { 321 case storageFieldDirect: 322 switch n { 323 case packedFloatNaN: 324 return NewFloat(math.NaN()) 325 case packedFloatNegInf: 326 return NewFloat(math.Inf(-1)) 327 case packedFloatInf: 328 return NewFloat(math.Inf(1)) 329 default: 330 exponent := ((uint32(n) & packedFloatExponentField) >> float32ExponentShift) - packedFloatExponentBias 331 float32Bits := ((exponent + float32ExponentBias) << float32ExponentShift) | (uint32(n) & float32FractionField) 332 return NewFloat(float64(math.Float32frombits(float32Bits))) 333 } 334 case storageFieldIndirect: 335 return NewFloat(math.Float64frombits(p.data[uint32(n)&valueField])) 336 } 337 } 338 panic("unreachable") 339 } 340 341 // mustUnpackInt unpacks an integer. 342 // It panics if the packedNumber is not an integer. 343 func (p *numberPacker) mustUnpackInt(n packedNumber) int64 { 344 num := p.unpack(n) 345 if !num.IsInteger() { 346 panic("not an integer") 347 } 348 return num.Int 349 } 350 351 // mustUnpackFloat unpacks a floating-point number. 352 // It panics if the packedNumber is not an floating-point number. 353 func (p *numberPacker) mustUnpackFloat(n packedNumber) float64 { 354 num := p.unpack(n) 355 if *num == zero { 356 return 0.0 357 } 358 if num.IsInteger() { 359 panic("not a float") 360 } 361 return num.Float 362 } 363 364 // portTo ports over a packedNumber from this numberPacker to a new one. 365 // It is equivalent to `p.pack(other.unpack(n))` but avoids 366 // allocations in the overwhelmingly-common case where the number is direct. 367 func (p *numberPacker) portTo(other *numberPacker, n packedNumber) packedNumber { 368 if uint32(n)&storageField == storageFieldDirect { 369 // `n` is self-contained, just return as-is. 370 return n 371 } 372 if len(other.data) == cap(other.data) { 373 panic(errOutOfPackerMemory) 374 } 375 other.data = append(other.data, p.data[uint32(n)&valueField]) 376 return packedNumber(uint32(n)&(typeField|storageField) | uint32(len(other.data)-1)) 377 } 378 379 // distributionSnapshot contains the data for a single field combination of a 380 // distribution ("histogram") metric. 381 type distributionSnapshot struct { 382 // sum is the sum of all samples across all buckets. 383 sum packedNumber 384 385 // count is the number of samples across all buckets. 386 count packedNumber 387 388 // min is the lowest-recorded sample in the distribution. 389 // It is only meaningful when count >= 1. 390 min packedNumber 391 392 // max is the highest-recorded sample in the distribution. 393 // It is only meaningful when count >= 1. 394 max packedNumber 395 396 // ssd is the sum-of-squared-deviations computation of the distribution. 397 // If non-zero, it is always a floating-point number. 398 // It is only meaningful when count >= 2. 399 ssd packedNumber 400 401 // numSamples is the number of samples in each bucket. 402 numSamples []packedNumber 403 } 404 405 // verifiableMetric verifies a single metric within a Verifier. 406 type verifiableMetric struct { 407 metadata *pb.MetricMetadata 408 wantMetric Metric 409 numFields uint32 410 verifier *Verifier 411 allowedFieldValues map[string]map[string]struct{} 412 wantBucketUpperBounds []Number 413 414 // The following fields are used to verify that values are actually increasing monotonically. 415 // They are only read and modified when the parent Verifier.mu is held. 416 // They are mapped by their combination of field values. 417 418 // lastCounterValue is used for counter metrics. 419 lastCounterValue map[string]packedNumber 420 421 // lastDistributionSnapshot is used for distribution ("histogram") metrics. 422 lastDistributionSnapshot map[string]*distributionSnapshot 423 } 424 425 // newVerifiableMetric creates a new verifiableMetric that can verify the 426 // values of a metric with the given metadata. 427 func newVerifiableMetric(metadata *pb.MetricMetadata, verifier *Verifier) (*verifiableMetric, error) { 428 promName := metadata.GetPrometheusName() 429 if metadata.GetName() == "" || promName == "" { 430 return nil, errors.New("metric has no name") 431 } 432 for _, processMetric := range processMetrics { 433 if promName == processMetric.Name { 434 return nil, fmt.Errorf("metric name %q is reserved by Prometheus for process-level metrics", promName) 435 } 436 } 437 if strings.HasPrefix(promName, MetaMetricPrefix) { 438 return nil, fmt.Errorf("metric name %q starts with %q which is a reserved prefix", promName, "meta_") 439 } 440 if !unicode.IsLower(rune(promName[0])) { 441 return nil, fmt.Errorf("invalid initial character in prometheus metric name: %q", promName) 442 } 443 for _, r := range promName { 444 if !unicode.IsLower(r) && !unicode.IsDigit(r) && r != '_' { 445 return nil, fmt.Errorf("invalid character %c in prometheus metric name %q", r, promName) 446 } 447 } 448 numFields := uint32(len(metadata.GetFields())) 449 var allowedFieldValues map[string]map[string]struct{} 450 if numFields > 0 { 451 seenFields := make(map[string]struct{}, numFields) 452 allowedFieldValues = make(map[string]map[string]struct{}, numFields) 453 for _, field := range metadata.GetFields() { 454 fieldName := field.GetFieldName() 455 if _, alreadyExists := seenFields[fieldName]; alreadyExists { 456 return nil, fmt.Errorf("field %s is defined twice", fieldName) 457 } 458 seenFields[fieldName] = struct{}{} 459 if len(field.GetAllowedValues()) == 0 { 460 return nil, fmt.Errorf("field %s has no allowed values", fieldName) 461 } 462 fieldValues := make(map[string]struct{}, len(field.GetAllowedValues())) 463 for _, value := range field.GetAllowedValues() { 464 if _, alreadyExists := fieldValues[value]; alreadyExists { 465 return nil, fmt.Errorf("field %s has duplicate allowed value %q", fieldName, value) 466 } 467 fieldValues[globalIntern(value)] = struct{}{} 468 } 469 allowedFieldValues[globalIntern(fieldName)] = fieldValues 470 } 471 } 472 v := &verifiableMetric{ 473 metadata: metadata, 474 verifier: verifier, 475 wantMetric: Metric{ 476 Name: globalIntern(promName), 477 Help: globalIntern(metadata.GetDescription()), 478 }, 479 numFields: numFields, 480 allowedFieldValues: allowedFieldValues, 481 } 482 numFieldCombinations := len(allowedFieldValues) 483 switch metadata.GetType() { 484 case pb.MetricMetadata_TYPE_UINT64: 485 v.wantMetric.Type = TypeGauge 486 if metadata.GetCumulative() { 487 v.wantMetric.Type = TypeCounter 488 v.lastCounterValue = make(map[string]packedNumber, numFieldCombinations) 489 } 490 case pb.MetricMetadata_TYPE_DISTRIBUTION: 491 v.wantMetric.Type = TypeHistogram 492 numBuckets := len(metadata.GetDistributionBucketLowerBounds()) + 1 493 if numBuckets <= 1 || numBuckets > 256 { 494 return nil, fmt.Errorf("unsupported number of buckets: %d", numBuckets) 495 } 496 v.wantBucketUpperBounds = make([]Number, numBuckets) 497 for i, boundary := range metadata.GetDistributionBucketLowerBounds() { 498 v.wantBucketUpperBounds[i] = Number{Int: boundary} 499 } 500 v.wantBucketUpperBounds[numBuckets-1] = Number{Float: math.Inf(1)} 501 v.lastDistributionSnapshot = make(map[string]*distributionSnapshot, numFieldCombinations) 502 default: 503 return nil, fmt.Errorf("invalid type: %v", metadata.GetType()) 504 } 505 return v, nil 506 } 507 508 func (v *verifiableMetric) numFieldCombinations() int { 509 return len(v.allowedFieldValues) 510 } 511 512 // verify does read-only checks on `data`. 513 // `metricFieldsSeen` is passed across calls to `verify`. It is used to track the set of metric 514 // field values that have already been seen. `verify` should populate this. 515 // `dataToFieldsSeen` is passed across calls to `verify` and other methods of `verifiableMetric`. 516 // It is used to store the canonical representation of the field values seen for each *Data. 517 // 518 // Precondition: `Verifier.mu` is held. 519 func (v *verifiableMetric) verify(data *Data, metricFieldsSeen map[string]struct{}, dataToFieldsSeen map[*Data]string) error { 520 if *data.Metric != v.wantMetric { 521 return fmt.Errorf("invalid metric definition: got %+v want %+v", data.Metric, v.wantMetric) 522 } 523 524 // Verify fields. 525 if uint32(len(data.Labels)) != v.numFields { 526 return fmt.Errorf("invalid number of fields: got %d want %d", len(data.Labels), v.numFields) 527 } 528 var fieldValues strings.Builder 529 firstField := true 530 for _, field := range v.metadata.GetFields() { 531 fieldName := field.GetFieldName() 532 value, found := data.Labels[fieldName] 533 if !found { 534 return fmt.Errorf("did not specify field %q", fieldName) 535 } 536 if _, allowed := v.allowedFieldValues[fieldName][value]; !allowed { 537 return fmt.Errorf("value %q is not allowed for field %s", value, fieldName) 538 } 539 if !firstField { 540 fieldValues.WriteRune(',') 541 } 542 fieldValues.WriteString(value) 543 firstField = false 544 } 545 fieldValuesStr := fieldValues.String() 546 if _, alreadySeen := metricFieldsSeen[fieldValuesStr]; alreadySeen { 547 return fmt.Errorf("combination of field values %q was already seen", fieldValuesStr) 548 } 549 550 // Verify value. 551 gotNumber := data.Number != nil 552 gotHistogram := data.HistogramValue != nil 553 numSpecified := 0 554 if gotNumber { 555 numSpecified++ 556 } 557 if gotHistogram { 558 numSpecified++ 559 } 560 if numSpecified != 1 { 561 return fmt.Errorf("invalid number of value fields specified: %d", numSpecified) 562 } 563 switch v.metadata.GetType() { 564 case pb.MetricMetadata_TYPE_UINT64: 565 if !gotNumber { 566 return errors.New("expected number value for gauge or counter") 567 } 568 if !data.Number.IsInteger() { 569 return fmt.Errorf("integer metric got non-integer value: %v", data.Number) 570 } 571 case pb.MetricMetadata_TYPE_DISTRIBUTION: 572 if !gotHistogram { 573 return errors.New("expected histogram value for histogram") 574 } 575 if len(data.HistogramValue.Buckets) != len(v.wantBucketUpperBounds) { 576 return fmt.Errorf("invalid number of buckets: got %d want %d", len(data.HistogramValue.Buckets), len(v.wantBucketUpperBounds)) 577 } 578 if data.HistogramValue.SumOfSquaredDeviations.IsInteger() && data.HistogramValue.SumOfSquaredDeviations.Int != 0 { 579 return fmt.Errorf("sum of squared deviations must be a floating-point value, got %v", data.HistogramValue.SumOfSquaredDeviations) 580 } 581 for i, b := range data.HistogramValue.Buckets { 582 if want := v.wantBucketUpperBounds[i]; b.UpperBound != want { 583 return fmt.Errorf("invalid upper bound for bucket %d (0-based): got %v want %v", i, b.UpperBound, want) 584 } 585 } 586 default: 587 return fmt.Errorf("invalid metric type: %v", v.wantMetric.Type) 588 } 589 590 // All passed. Update the maps that are shared across calls. 591 fieldValuesStr = v.verifier.internMap.Intern(fieldValuesStr) 592 dataToFieldsSeen[data] = fieldValuesStr 593 metricFieldsSeen[fieldValuesStr] = struct{}{} 594 return nil 595 } 596 597 // verifyIncrement verifies that incremental metrics are monotonically increasing. 598 // 599 // Preconditions: `verify` has succeeded on the given `data`, and `Verifier.mu` is held. 600 func (v *verifiableMetric) verifyIncrement(data *Data, fieldValues string, packer *numberPacker) error { 601 switch v.wantMetric.Type { 602 case TypeCounter: 603 last := packer.unpack(v.lastCounterValue[v.verifier.internMap.Intern(fieldValues)]) 604 if !last.SameType(data.Number) { 605 return fmt.Errorf("counter number type changed: %v vs %v", last, data.Number) 606 } 607 if last.GreaterThan(data.Number) { 608 return fmt.Errorf("counter value decreased from %v to %v", last, data.Number) 609 } 610 case TypeHistogram: 611 lastDistributionSnapshot := v.lastDistributionSnapshot[v.verifier.internMap.Intern(fieldValues)] 612 if lastDistributionSnapshot == nil { 613 lastDistributionSnapshot = &distributionSnapshot{ 614 numSamples: make([]packedNumber, len(v.wantBucketUpperBounds)), 615 } 616 v.lastDistributionSnapshot[v.verifier.internMap.Intern(fieldValues)] = lastDistributionSnapshot 617 } 618 lastCount := packer.mustUnpackInt(lastDistributionSnapshot.count) 619 if lastCount >= 1 { 620 lastMin := packer.unpack(lastDistributionSnapshot.min) 621 if !lastMin.SameType(&data.HistogramValue.Min) { 622 return fmt.Errorf("minimum value type changed: %v vs %v", lastMin, data.HistogramValue.Min) 623 } 624 if data.HistogramValue.Min.GreaterThan(lastMin) { 625 return fmt.Errorf("minimum value strictly increased: from %v to %v", lastMin, data.HistogramValue.Min) 626 } 627 lastMax := packer.unpack(lastDistributionSnapshot.max) 628 if !lastMax.SameType(&data.HistogramValue.Max) { 629 return fmt.Errorf("maximum value type changed: %v vs %v", lastMax, data.HistogramValue.Max) 630 } 631 if lastMax.GreaterThan(&data.HistogramValue.Max) { 632 return fmt.Errorf("maximum value strictly decreased: from %v to %v", lastMax, data.HistogramValue.Max) 633 } 634 } 635 if lastCount >= 2 { 636 // We already verified that the new data is a floating-point number 637 // earlier, no need to double-check here. 638 lastSSD := packer.mustUnpackFloat(lastDistributionSnapshot.ssd) 639 if data.HistogramValue.SumOfSquaredDeviations.Float < lastSSD { 640 return fmt.Errorf("sum of squared deviations decreased from %v to %v", lastSSD, data.HistogramValue.SumOfSquaredDeviations.Float) 641 } 642 } 643 numSamples := lastDistributionSnapshot.numSamples 644 for i, b := range data.HistogramValue.Buckets { 645 if uint64(packer.mustUnpackInt(numSamples[i])) > b.Samples { 646 return fmt.Errorf("number of samples in bucket %d (0-based) decreased from %d to %d", i, packer.mustUnpackInt(numSamples[i]), b.Samples) 647 } 648 } 649 } 650 return nil 651 } 652 653 // packerCapacityNeeded returns the `numberPacker` capacity to store `Data`. 654 func (v *verifiableMetric) packerCapacityNeededForData(data *Data, fieldValues string) uint64 { 655 switch v.wantMetric.Type { 656 case TypeCounter: 657 return needsPackerStorage(data.Number) 658 case TypeHistogram: 659 var toPack uint64 660 var totalSamples uint64 661 var buf Number 662 for _, b := range data.HistogramValue.Buckets { 663 buf = Number{Int: int64(b.Samples)} 664 toPack += needsPackerStorage(&buf) 665 totalSamples += b.Samples 666 } 667 toPack += needsPackerStorage(&data.HistogramValue.Total) 668 toPack += needsPackerStorage(&data.HistogramValue.Min) 669 toPack += needsPackerStorage(&data.HistogramValue.Max) 670 toPack += needsPackerStorage(&data.HistogramValue.SumOfSquaredDeviations) 671 buf = Number{Int: int64(totalSamples)} 672 toPack += needsPackerStorage(&buf) 673 return toPack 674 default: 675 return 0 676 } 677 } 678 679 // packerCapacityNeededForLast returns the `numberPacker` capacity needed to 680 // store the last snapshot's data that was not seen in the current snapshot 681 // (aka not in metricFieldsSeen). 682 func (v *verifiableMetric) packerCapacityNeededForLast(metricFieldsSeen map[string]struct{}) uint64 { 683 var capacity uint64 684 switch v.wantMetric.Type { 685 case TypeCounter: 686 for fieldValues, lastCounterValue := range v.lastCounterValue { 687 if _, found := metricFieldsSeen[fieldValues]; found { 688 continue 689 } 690 capacity += lastCounterValue.isIndirect() 691 } 692 case TypeHistogram: 693 for fieldValues, distributionSnapshot := range v.lastDistributionSnapshot { 694 if _, found := metricFieldsSeen[fieldValues]; found { 695 continue 696 } 697 for _, b := range distributionSnapshot.numSamples { 698 capacity += b.isIndirect() 699 } 700 capacity += distributionSnapshot.sum.isIndirect() 701 capacity += distributionSnapshot.count.isIndirect() 702 capacity += distributionSnapshot.min.isIndirect() 703 capacity += distributionSnapshot.max.isIndirect() 704 capacity += distributionSnapshot.ssd.isIndirect() 705 } 706 } 707 return capacity 708 } 709 710 // update updates incremental metrics' "last seen" data. 711 // 712 // Preconditions: `verifyIncrement` has succeeded on the given `data`, `Verifier.mu` is held, 713 // and `packer` is guaranteed to have enough room to store all numbers. 714 func (v *verifiableMetric) update(data *Data, fieldValues string, packer *numberPacker) { 715 switch v.wantMetric.Type { 716 case TypeCounter: 717 v.lastCounterValue[v.verifier.internMap.Intern(fieldValues)] = packer.pack(data.Number) 718 case TypeHistogram: 719 lastDistributionSnapshot := v.lastDistributionSnapshot[v.verifier.internMap.Intern(fieldValues)] 720 lastBucketSamples := lastDistributionSnapshot.numSamples 721 var count uint64 722 for i, b := range data.HistogramValue.Buckets { 723 lastBucketSamples[i] = packer.packInt(int64(b.Samples)) 724 count += b.Samples 725 } 726 lastDistributionSnapshot.sum = packer.pack(&data.HistogramValue.Total) 727 lastDistributionSnapshot.count = packer.packInt(int64(count)) 728 lastDistributionSnapshot.min = packer.pack(&data.HistogramValue.Min) 729 lastDistributionSnapshot.max = packer.pack(&data.HistogramValue.Max) 730 lastDistributionSnapshot.ssd = packer.pack(&data.HistogramValue.SumOfSquaredDeviations) 731 } 732 } 733 734 // repackUnseen packs all numbers that must be carried over from snapshot to snapshot and which were 735 // not seen in the latest snapshot's data. 736 // This function should carry over all numbers typically packed in `v.update` but for all metric 737 // field combinations that are not in `metricFieldsSeen`. 738 // 739 // Preconditions: `verifyIncrement` has succeeded on the given `data`, 740 // and `newPacker` is guaranteed to have enough room to store all numbers. 741 func (v *verifiableMetric) repackUnseen(metricFieldsSeen map[string]struct{}, oldPacker, newPacker *numberPacker) { 742 switch v.wantMetric.Type { 743 case TypeCounter: 744 for fieldValues, lastCounterValue := range v.lastCounterValue { 745 if _, found := metricFieldsSeen[fieldValues]; found { 746 continue 747 } 748 v.lastCounterValue[fieldValues] = oldPacker.portTo(newPacker, lastCounterValue) 749 } 750 case TypeHistogram: 751 for fieldValues, lastDistributionSnapshot := range v.lastDistributionSnapshot { 752 if _, found := metricFieldsSeen[fieldValues]; found { 753 continue 754 } 755 lastBucketSamples := lastDistributionSnapshot.numSamples 756 for i, b := range lastBucketSamples { 757 lastBucketSamples[i] = oldPacker.portTo(newPacker, b) 758 } 759 lastDistributionSnapshot.sum = oldPacker.portTo(newPacker, lastDistributionSnapshot.sum) 760 lastDistributionSnapshot.count = oldPacker.portTo(newPacker, lastDistributionSnapshot.count) 761 lastDistributionSnapshot.min = oldPacker.portTo(newPacker, lastDistributionSnapshot.min) 762 lastDistributionSnapshot.max = oldPacker.portTo(newPacker, lastDistributionSnapshot.max) 763 lastDistributionSnapshot.ssd = oldPacker.portTo(newPacker, lastDistributionSnapshot.ssd) 764 } 765 } 766 } 767 768 // Verifier allows verifying metric snapshot against metric registration data. 769 // The aim is to prevent a compromised Sentry from emitting bogus data or DoS'ing metric ingestion. 770 // A single Verifier should be used per sandbox. It is expected to be reused across exports such 771 // that it can enforce the export snapshot timestamp is strictly monotonically increasing. 772 type Verifier struct { 773 knownMetrics map[string]*verifiableMetric 774 775 // mu protects the fields below. 776 mu sync.Mutex 777 778 // internMap is used to intern strings relevant to this verifier only. 779 // Globally-relevant strings should be interned in globalInternMap. 780 internMap internedStringMap 781 782 // lastPacker is a reference to the numberPacker used to pack numbers in the last successful 783 // verification round. 784 lastPacker *numberPacker 785 786 // lastTimestamp is the snapshot timestamp of the last successfully-verified snapshot. 787 lastTimestamp time.Time 788 } 789 790 // NewVerifier returns a new metric verifier that can verify the integrity of snapshots against 791 // the given metric registration data. 792 // It returns a cleanup function that must be called when the Verifier is no longer needed. 793 func NewVerifier(registration *pb.MetricRegistration) (*Verifier, func(), error) { 794 globalInternVerifierCreated() 795 verifier := &Verifier{ 796 knownMetrics: make(map[string]*verifiableMetric), 797 internMap: make(internedStringMap), 798 } 799 for _, metric := range registration.GetMetrics() { 800 metricName := metric.GetPrometheusName() 801 if _, alreadyExists := verifier.knownMetrics[metricName]; alreadyExists { 802 globalInternVerifierReleased() 803 return nil, func() {}, fmt.Errorf("metric %q registered twice", metricName) 804 } 805 verifiableM, err := newVerifiableMetric(metric, verifier) 806 if err != nil { 807 globalInternVerifierReleased() 808 return nil, func() {}, fmt.Errorf("metric %q: %v", metricName, err) 809 } 810 verifier.knownMetrics[globalIntern(metricName)] = verifiableM 811 } 812 return verifier, globalInternVerifierReleased, nil 813 } 814 815 // Verify verifies the integrity of a snapshot against the metric registration data of the Verifier. 816 // It assumes that it will be called on snapshots obtained chronologically over time. 817 func (v *Verifier) Verify(snapshot *Snapshot) error { 818 var err error 819 820 // Basic timestamp checks. 821 now := timeNow() 822 if snapshot.When.After(now) { 823 return errors.New("snapshot is from the future") 824 } 825 if snapshot.When.Before(now.Add(-maxExportStaleness)) { 826 return fmt.Errorf("snapshot is too old; it is from %v, expected at least %v (%v from now)", snapshot.When, now.Add(-maxExportStaleness), maxExportStaleness) 827 } 828 829 // Start critical section. 830 v.mu.Lock() 831 defer v.mu.Unlock() 832 833 // Metrics checks. 834 fieldsSeen := make(map[string]map[string]struct{}, len(v.knownMetrics)) 835 dataToFieldsSeen := make(map[*Data]string, len(snapshot.Data)) 836 for _, data := range snapshot.Data { 837 metricName := data.Metric.Name 838 verifiableM, found := v.knownMetrics[metricName] 839 if !found { 840 return fmt.Errorf("snapshot contains unknown metric %q", metricName) 841 } 842 metricName = globalIntern(metricName) 843 metricFieldsSeen, found := fieldsSeen[metricName] 844 if !found { 845 metricFieldsSeen = make(map[string]struct{}, verifiableM.numFieldCombinations()) 846 fieldsSeen[metricName] = metricFieldsSeen 847 } 848 if err = verifiableM.verify(data, metricFieldsSeen, dataToFieldsSeen); err != nil { 849 return fmt.Errorf("metric %q: %v", metricName, err) 850 } 851 } 852 853 if v.lastTimestamp.After(snapshot.When) { 854 return fmt.Errorf("consecutive snapshots are not chronologically ordered: last verified snapshot was exported at %v, this one is from %v", v.lastTimestamp, snapshot.When) 855 } 856 857 for _, data := range snapshot.Data { 858 if err := v.knownMetrics[data.Metric.Name].verifyIncrement(data, dataToFieldsSeen[data], v.lastPacker); err != nil { 859 return fmt.Errorf("metric %q: %v", data.Metric.Name, err) 860 } 861 } 862 var neededPackerCapacity uint64 863 for _, data := range snapshot.Data { 864 neededPackerCapacity += v.knownMetrics[data.Metric.Name].packerCapacityNeededForData(data, dataToFieldsSeen[data]) 865 } 866 for name, metric := range v.knownMetrics { 867 neededPackerCapacity += metric.packerCapacityNeededForLast(fieldsSeen[name]) 868 } 869 if neededPackerCapacity > uint64(valueField) { 870 return fmt.Errorf("snapshot contains too many large numbers to fit into packer memory (%d numbers needing indirection)", neededPackerCapacity) 871 } 872 873 // All checks succeeded, update last-seen data. 874 // We need to be guaranteed to not fail past this point in the function. 875 newPacker := &numberPacker{} 876 if neededPackerCapacity != 0 { 877 newPacker.data = make([]uint64, 0, neededPackerCapacity) 878 } 879 v.lastTimestamp = snapshot.When 880 for _, data := range snapshot.Data { 881 v.knownMetrics[globalIntern(data.Metric.Name)].update(data, v.internMap.Intern(dataToFieldsSeen[data]), newPacker) 882 } 883 if uint64(len(newPacker.data)) != neededPackerCapacity { 884 for name, metric := range v.knownMetrics { 885 metric.repackUnseen(fieldsSeen[name], v.lastPacker, newPacker) 886 } 887 } 888 if uint64(len(newPacker.data)) != neededPackerCapacity { 889 // We panic here because this represents an internal logic error, 890 // not something the user did wrong. 891 panic(fmt.Sprintf("did not pack the expected number of numbers in numberPacker: packed %d, expected %d; this indicates a logic error in verifyIncrement", len(newPacker.data), neededPackerCapacity)) 892 } 893 v.lastPacker = newPacker 894 return nil 895 } 896 897 // AllMetrics returns the metadata of all the metrics that were declared as 898 // part of this Verifier. 899 func (v *Verifier) AllMetrics() []*pb.MetricMetadata { 900 metrics := make([]*pb.MetricMetadata, 0, len(v.knownMetrics)) 901 for _, m := range v.knownMetrics { 902 metrics = append(metrics, m.metadata) 903 } 904 return metrics 905 }