github.com/cilium/cilium@v1.16.2/pkg/endpointmanager/policymap_pressure.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package endpointmanager
     5  
     6  import (
     7  	"golang.org/x/exp/maps"
     8  
     9  	"github.com/cilium/cilium/pkg/endpoint"
    10  	"github.com/cilium/cilium/pkg/lock"
    11  	"github.com/cilium/cilium/pkg/logging/logfields"
    12  	"github.com/cilium/cilium/pkg/maps/policymap"
    13  	"github.com/cilium/cilium/pkg/metrics"
    14  	"github.com/cilium/cilium/pkg/time"
    15  	"github.com/cilium/cilium/pkg/trigger"
    16  )
    17  
    18  // Update upserts the endpoint ID and updates the max endpoint policy map pressure metric.
    19  func (p *policyMapPressure) Update(ev endpoint.PolicyMapPressureEvent) {
    20  	val := ev.Value
    21  	p.Lock()
    22  	p.current[ev.EndpointID] = val
    23  	p.Unlock()
    24  
    25  	log.WithField(logfields.Value, val).Debug("EndpointManager policymap received event")
    26  
    27  	p.trigger.Trigger()
    28  }
    29  
    30  // Remove removes an endpoints policy map pressure by endpoint ID.
    31  // Should be called to clean up the metric when an endpoint is removed.
    32  func (p *policyMapPressure) Remove(id uint16) {
    33  	p.Lock()
    34  	delete(p.current, id)
    35  	p.Unlock()
    36  
    37  	p.trigger.Trigger()
    38  }
    39  
    40  var policyMapPressureMinInterval = 10 * time.Second
    41  
    42  func newPolicyMapPressure() *policyMapPressure {
    43  	if !metrics.BPFMapPressure {
    44  		return nil
    45  	}
    46  
    47  	p := new(policyMapPressure)
    48  	p.gauge = metrics.NewBPFMapPressureGauge(policymap.MapName+"*", policymap.PressureMetricThreshold)
    49  	p.current = make(map[uint16]float64)
    50  
    51  	var err error
    52  	p.trigger, err = trigger.NewTrigger(trigger.Parameters{
    53  		// It seems like 10s is a small enough window of time where the user
    54  		// can still reasonably react to a rising BPF map pressure. Keep it
    55  		// below the default Prometheus scrape interval of 15s anyway.
    56  		MinInterval: policyMapPressureMinInterval,
    57  		TriggerFunc: func([]string) { p.update() },
    58  		Name:        "endpointmanager-policymap-max-size-metrics",
    59  	})
    60  	if err != nil {
    61  		log.WithError(err).Panic("Failed to initialize trigger for policymap pressure metric")
    62  	}
    63  
    64  	return p
    65  }
    66  
    67  func (mgr *policyMapPressure) update() {
    68  	log.Debug("EndpointManager policymap event metric update triggered")
    69  
    70  	if mgr.gauge == nil {
    71  		return
    72  	}
    73  
    74  	mgr.RLock()
    75  	max := float64(0)
    76  	for _, value := range maps.Values(mgr.current) {
    77  		if value > max {
    78  			max = value
    79  		}
    80  	}
    81  	mgr.RUnlock()
    82  	mgr.gauge.Set(max)
    83  }
    84  
    85  type metricsGauge interface {
    86  	Set(value float64)
    87  }
    88  
    89  // policyMapPressure implements policyMapPressure to provide the endpoint's
    90  // policymap pressure metric. It only exports the maximum policymap pressure
    91  // from all endpoints within the EndpointManager to reduce cardinality of the
    92  // metric.
    93  type policyMapPressure struct {
    94  	lock.RWMutex
    95  
    96  	// current holds the current maximum policymap pressure values by endpoint ID
    97  	// that is pushed into gauge via trigger..
    98  	current map[uint16]float64
    99  
   100  	// gauge is the gauge metric.
   101  	gauge metricsGauge
   102  
   103  	// trigger handles exporting / updating the gauge with the value in current
   104  	// on an interval.
   105  	trigger *trigger.Trigger
   106  }