github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/metric/metric.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package metric provides primitives for collecting metrics. 16 package metric 17 18 import ( 19 "errors" 20 "fmt" 21 "sort" 22 "sync/atomic" 23 24 "github.com/SagerNet/gvisor/pkg/eventchannel" 25 "github.com/SagerNet/gvisor/pkg/log" 26 pb "github.com/SagerNet/gvisor/pkg/metric/metric_go_proto" 27 "github.com/SagerNet/gvisor/pkg/sync" 28 ) 29 30 var ( 31 // ErrNameInUse indicates that another metric is already defined for 32 // the given name. 33 ErrNameInUse = errors.New("metric name already in use") 34 35 // ErrInitializationDone indicates that the caller tried to create a 36 // new metric after initialization. 37 ErrInitializationDone = errors.New("metric cannot be created after initialization is complete") 38 39 // WeirdnessMetric is a metric with fields created to track the number 40 // of weird occurrences such as time fallback, partial_result, vsyscall 41 // count, watchdog startup timeouts and stuck tasks. 42 WeirdnessMetric = MustCreateNewUint64Metric("/weirdness", true /* sync */, "Increment for weird occurrences of problems such as time fallback, partial result, vsyscalls invoked in the sandbox, watchdog startup timeouts and stuck tasks.", 43 Field{ 44 name: "weirdness_type", 45 allowedValues: []string{"time_fallback", "partial_result", "vsyscall_count", "watchdog_stuck_startup", "watchdog_stuck_tasks"}, 46 }) 47 48 // SuspiciousOperationsMetric is a metric with fields created to detect 49 // operations such as opening an executable file to write from a gofer. 50 SuspiciousOperationsMetric = MustCreateNewUint64Metric("/suspicious_operations", true /* sync */, "Increment for suspicious operations such as opening an executable file to write from a gofer.", 51 Field{ 52 name: "operation_type", 53 allowedValues: []string{"opened_write_execute_file"}, 54 }) 55 ) 56 57 // Uint64Metric encapsulates a uint64 that represents some kind of metric to be 58 // monitored. We currently support metrics with at most one field. 59 // 60 // Metrics are not saved across save/restore and thus reset to zero on restore. 61 // 62 // TODO(b/67298427): Support metric fields. 63 type Uint64Metric struct { 64 // value is the actual value of the metric. It must be accessed atomically. 65 value uint64 66 67 // numFields is the number of metric fields. It is immutable once 68 // initialized. 69 numFields int 70 71 // mu protects the below fields. 72 mu sync.RWMutex `state:"nosave"` 73 74 // fields is the map of fields in the metric. 75 fields map[string]uint64 76 } 77 78 var ( 79 // initialized indicates that all metrics are registered. allMetrics is 80 // immutable once initialized is true. 81 initialized bool 82 83 // allMetrics are the registered metrics. 84 allMetrics = makeMetricSet() 85 ) 86 87 // Initialize sends a metric registration event over the event channel. 88 // 89 // Precondition: 90 // * All metrics are registered. 91 // * Initialize/Disable has not been called. 92 func Initialize() error { 93 if initialized { 94 return errors.New("metric.Initialize called after metric.Initialize or metric.Disable") 95 } 96 97 m := pb.MetricRegistration{} 98 for _, v := range allMetrics.m { 99 m.Metrics = append(m.Metrics, v.metadata) 100 } 101 if err := eventchannel.Emit(&m); err != nil { 102 return fmt.Errorf("unable to emit metric initialize event: %w", err) 103 } 104 105 initialized = true 106 return nil 107 } 108 109 // Disable sends an empty metric registration event over the event channel, 110 // disabling metric collection. 111 // 112 // Precondition: 113 // * All metrics are registered. 114 // * Initialize/Disable has not been called. 115 func Disable() error { 116 if initialized { 117 return errors.New("metric.Disable called after metric.Initialize or metric.Disable") 118 } 119 120 m := pb.MetricRegistration{} 121 if err := eventchannel.Emit(&m); err != nil { 122 return fmt.Errorf("unable to emit metric disable event: %w", err) 123 } 124 125 initialized = true 126 return nil 127 } 128 129 type customUint64Metric struct { 130 // metadata describes the metric. It is immutable. 131 metadata *pb.MetricMetadata 132 133 // value returns the current value of the metric for the given set of 134 // fields. It takes a variadic number of field values as argument. 135 value func(fieldValues ...string) uint64 136 } 137 138 // Field contains the field name and allowed values for the metric which is 139 // used in registration of the metric. 140 type Field struct { 141 // name is the metric field name. 142 name string 143 144 // allowedValues is the list of allowed values for the field. 145 allowedValues []string 146 } 147 148 // RegisterCustomUint64Metric registers a metric with the given name. 149 // 150 // Register must only be called at init and will return and error if called 151 // after Initialized. 152 // 153 // Preconditions: 154 // * name must be globally unique. 155 // * Initialize/Disable have not been called. 156 // * value is expected to accept exactly len(fields) arguments. 157 func RegisterCustomUint64Metric(name string, cumulative, sync bool, units pb.MetricMetadata_Units, description string, value func(...string) uint64, fields ...Field) error { 158 if initialized { 159 return ErrInitializationDone 160 } 161 162 if _, ok := allMetrics.m[name]; ok { 163 return ErrNameInUse 164 } 165 166 allMetrics.m[name] = customUint64Metric{ 167 metadata: &pb.MetricMetadata{ 168 Name: name, 169 Description: description, 170 Cumulative: cumulative, 171 Sync: sync, 172 Type: pb.MetricMetadata_TYPE_UINT64, 173 Units: units, 174 }, 175 value: value, 176 } 177 178 // Metrics can exist without fields. 179 if l := len(fields); l > 1 { 180 return fmt.Errorf("%d fields provided, must be <= 1", l) 181 } 182 183 for _, field := range fields { 184 allMetrics.m[name].metadata.Fields = append(allMetrics.m[name].metadata.Fields, &pb.MetricMetadata_Field{ 185 FieldName: field.name, 186 AllowedValues: field.allowedValues, 187 }) 188 } 189 return nil 190 } 191 192 // MustRegisterCustomUint64Metric calls RegisterCustomUint64Metric for metrics 193 // without fields and panics if it returns an error. 194 func MustRegisterCustomUint64Metric(name string, cumulative, sync bool, description string, value func(...string) uint64, fields ...Field) { 195 if err := RegisterCustomUint64Metric(name, cumulative, sync, pb.MetricMetadata_UNITS_NONE, description, value, fields...); err != nil { 196 panic(fmt.Sprintf("Unable to register metric %q: %s", name, err)) 197 } 198 } 199 200 // NewUint64Metric creates and registers a new cumulative metric with the given 201 // name. 202 // 203 // Metrics must be statically defined (i.e., at init). 204 func NewUint64Metric(name string, sync bool, units pb.MetricMetadata_Units, description string, fields ...Field) (*Uint64Metric, error) { 205 m := Uint64Metric{ 206 numFields: len(fields), 207 } 208 209 if m.numFields == 1 { 210 m.fields = make(map[string]uint64) 211 for _, fieldValue := range fields[0].allowedValues { 212 m.fields[fieldValue] = 0 213 } 214 } 215 return &m, RegisterCustomUint64Metric(name, true /* cumulative */, sync, units, description, m.Value, fields...) 216 } 217 218 // MustCreateNewUint64Metric calls NewUint64Metric and panics if it returns an 219 // error. 220 func MustCreateNewUint64Metric(name string, sync bool, description string, fields ...Field) *Uint64Metric { 221 m, err := NewUint64Metric(name, sync, pb.MetricMetadata_UNITS_NONE, description, fields...) 222 if err != nil { 223 panic(fmt.Sprintf("Unable to create metric %q: %s", name, err)) 224 } 225 return m 226 } 227 228 // MustCreateNewUint64NanosecondsMetric calls NewUint64Metric and panics if it 229 // returns an error. 230 func MustCreateNewUint64NanosecondsMetric(name string, sync bool, description string) *Uint64Metric { 231 m, err := NewUint64Metric(name, sync, pb.MetricMetadata_UNITS_NANOSECONDS, description) 232 if err != nil { 233 panic(fmt.Sprintf("Unable to create metric %q: %s", name, err)) 234 } 235 return m 236 } 237 238 // Value returns the current value of the metric for the given set of fields. 239 func (m *Uint64Metric) Value(fieldValues ...string) uint64 { 240 if m.numFields != len(fieldValues) { 241 panic(fmt.Sprintf("Number of fieldValues %d is not equal to the number of metric fields %d", len(fieldValues), m.numFields)) 242 } 243 244 switch m.numFields { 245 case 0: 246 return atomic.LoadUint64(&m.value) 247 case 1: 248 m.mu.RLock() 249 defer m.mu.RUnlock() 250 251 fieldValue := fieldValues[0] 252 if _, ok := m.fields[fieldValue]; !ok { 253 panic(fmt.Sprintf("Metric does not allow to have field value %s", fieldValue)) 254 } 255 return m.fields[fieldValue] 256 default: 257 panic("Sentry metrics do not support more than one field") 258 } 259 } 260 261 // Increment increments the metric field by 1. 262 func (m *Uint64Metric) Increment(fieldValues ...string) { 263 m.IncrementBy(1, fieldValues...) 264 } 265 266 // IncrementBy increments the metric by v. 267 func (m *Uint64Metric) IncrementBy(v uint64, fieldValues ...string) { 268 if m.numFields != len(fieldValues) { 269 panic(fmt.Sprintf("Number of fieldValues %d is not equal to the number of metric fields %d", len(fieldValues), m.numFields)) 270 } 271 272 switch m.numFields { 273 case 0: 274 atomic.AddUint64(&m.value, v) 275 return 276 case 1: 277 fieldValue := fieldValues[0] 278 m.mu.Lock() 279 defer m.mu.Unlock() 280 281 if _, ok := m.fields[fieldValue]; !ok { 282 panic(fmt.Sprintf("Metric does not allow to have field value %s", fieldValue)) 283 } 284 m.fields[fieldValue] += v 285 default: 286 panic("Sentry metrics do not support more than one field") 287 } 288 } 289 290 // metricSet holds named metrics. 291 type metricSet struct { 292 m map[string]customUint64Metric 293 } 294 295 // makeMetricSet returns a new metricSet. 296 func makeMetricSet() metricSet { 297 return metricSet{ 298 m: make(map[string]customUint64Metric), 299 } 300 } 301 302 // Values returns a snapshot of all values in m. 303 func (m *metricSet) Values() metricValues { 304 vals := make(metricValues) 305 306 for k, v := range m.m { 307 fields := v.metadata.GetFields() 308 switch len(fields) { 309 case 0: 310 vals[k] = v.value() 311 case 1: 312 values := fields[0].GetAllowedValues() 313 fieldsMap := make(map[string]uint64) 314 for _, fieldValue := range values { 315 fieldsMap[fieldValue] = v.value(fieldValue) 316 } 317 vals[k] = fieldsMap 318 default: 319 panic(fmt.Sprintf("Unsupported number of metric fields: %d", len(fields))) 320 } 321 } 322 return vals 323 } 324 325 // metricValues contains a copy of the values of all metrics. It is a map 326 // with key as metric name and value can be either uint64 or map[string]uint64 327 // to support metrics with one field. 328 type metricValues map[string]interface{} 329 330 var ( 331 // emitMu protects metricsAtLastEmit and ensures that all emitted 332 // metrics are strongly ordered (older metrics are never emitted after 333 // newer metrics). 334 emitMu sync.Mutex 335 336 // metricsAtLastEmit contains the state of the metrics at the last emit event. 337 metricsAtLastEmit metricValues 338 ) 339 340 // EmitMetricUpdate emits a MetricUpdate over the event channel. 341 // 342 // Only metrics that have changed since the last call are emitted. 343 // 344 // EmitMetricUpdate is thread-safe. 345 // 346 // Preconditions: 347 // * Initialize has been called. 348 func EmitMetricUpdate() { 349 emitMu.Lock() 350 defer emitMu.Unlock() 351 352 snapshot := allMetrics.Values() 353 354 m := pb.MetricUpdate{} 355 // On the first call metricsAtLastEmit will be empty. Include all 356 // metrics then. 357 for k, v := range snapshot { 358 prev, ok := metricsAtLastEmit[k] 359 switch t := v.(type) { 360 case uint64: 361 // Metric exists and value did not change. 362 if ok && prev.(uint64) == t { 363 continue 364 } 365 366 m.Metrics = append(m.Metrics, &pb.MetricValue{ 367 Name: k, 368 Value: &pb.MetricValue_Uint64Value{Uint64Value: t}, 369 }) 370 case map[string]uint64: 371 for fieldValue, metricValue := range t { 372 // Emit data on the first call only if the field 373 // value has been incremented. For all other 374 // calls, emit data if the field value has been 375 // changed from the previous emit. 376 if (!ok && metricValue == 0) || (ok && prev.(map[string]uint64)[fieldValue] == metricValue) { 377 continue 378 } 379 380 m.Metrics = append(m.Metrics, &pb.MetricValue{ 381 Name: k, 382 FieldValues: []string{fieldValue}, 383 Value: &pb.MetricValue_Uint64Value{Uint64Value: metricValue}, 384 }) 385 } 386 } 387 } 388 389 metricsAtLastEmit = snapshot 390 if len(m.Metrics) == 0 { 391 return 392 } 393 394 if log.IsLogging(log.Debug) { 395 sort.Slice(m.Metrics, func(i, j int) bool { 396 return m.Metrics[i].Name < m.Metrics[j].Name 397 }) 398 log.Debugf("Emitting metrics:") 399 for _, metric := range m.Metrics { 400 log.Debugf("%s: %+v", metric.Name, metric.Value) 401 } 402 } 403 404 if err := eventchannel.Emit(&m); err != nil { 405 log.Warningf("Unable to emit metrics: %s", err) 406 } 407 }