github.com/cilium/cilium@v1.16.2/pkg/bpf/map_linux.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  //go:build linux
     5  
     6  package bpf
     7  
     8  import (
     9  	"context"
    10  	"errors"
    11  	"fmt"
    12  	"io/fs"
    13  	"os"
    14  	"path"
    15  	"reflect"
    16  	"strings"
    17  
    18  	"github.com/cilium/ebpf"
    19  	"github.com/sirupsen/logrus"
    20  
    21  	"github.com/cilium/cilium/api/v1/models"
    22  	"github.com/cilium/cilium/pkg/controller"
    23  	"github.com/cilium/cilium/pkg/lock"
    24  	"github.com/cilium/cilium/pkg/logging/logfields"
    25  	"github.com/cilium/cilium/pkg/metrics"
    26  	"github.com/cilium/cilium/pkg/option"
    27  	"github.com/cilium/cilium/pkg/spanstat"
    28  	"github.com/cilium/cilium/pkg/time"
    29  )
    30  
    31  var (
    32  	// ErrMaxLookup is returned when the maximum number of map element lookups has
    33  	// been reached.
    34  	ErrMaxLookup = errors.New("maximum number of lookups reached")
    35  
    36  	bpfMapSyncControllerGroup = controller.NewGroup("bpf-map-sync")
    37  )
    38  
    39  type MapKey interface {
    40  	fmt.Stringer
    41  
    42  	// New must return a pointer to a new MapKey.
    43  	New() MapKey
    44  }
    45  
    46  type MapValue interface {
    47  	fmt.Stringer
    48  
    49  	// New must return a pointer to a new MapValue.
    50  	New() MapValue
    51  }
    52  
    53  type cacheEntry struct {
    54  	Key   MapKey
    55  	Value MapValue
    56  
    57  	DesiredAction DesiredAction
    58  	LastError     error
    59  }
    60  
    61  type Map struct {
    62  	m *ebpf.Map
    63  	// spec will be nil after the map has been created
    64  	spec *ebpf.MapSpec
    65  
    66  	key   MapKey
    67  	value MapValue
    68  
    69  	name string
    70  	path string
    71  	lock lock.RWMutex
    72  
    73  	// cachedCommonName is the common portion of the name excluding any
    74  	// endpoint ID
    75  	cachedCommonName string
    76  
    77  	// enableSync is true when synchronization retries have been enabled.
    78  	enableSync bool
    79  
    80  	// withValueCache is true when map cache has been enabled
    81  	withValueCache bool
    82  
    83  	// cache as key/value entries when map cache is enabled or as key-only when
    84  	// pressure metric is enabled
    85  	cache map[string]*cacheEntry
    86  
    87  	// errorResolverLastScheduled is the timestamp when the error resolver
    88  	// was last scheduled
    89  	errorResolverLastScheduled time.Time
    90  
    91  	// outstandingErrors states whether there are outstanding errors, occurred while
    92  	// syncing an entry with the kernel, that need to be resolved. This variable exists
    93  	// to avoid iterating over the full cache to check if reconciliation is necessary,
    94  	// but it is possible that it gets out of sync if an error is automatically
    95  	// resolved while performing a subsequent Update/Delete operation on the same key.
    96  	outstandingErrors bool
    97  
    98  	// pressureGauge is a metric that tracks the pressure on this map
    99  	pressureGauge *metrics.GaugeWithThreshold
   100  
   101  	// is true when events buffer is enabled.
   102  	eventsBufferEnabled bool
   103  
   104  	// contains optional event buffer which stores last n bpf map events.
   105  	events *eventsBuffer
   106  
   107  	// group is the metric group name for this map, it classifies maps of the same
   108  	// type that share the same metric group.
   109  	group string
   110  }
   111  
   112  func (m *Map) Type() ebpf.MapType {
   113  	if m.m != nil {
   114  		return m.m.Type()
   115  	}
   116  	if m.spec != nil {
   117  		return m.spec.Type
   118  	}
   119  	return ebpf.UnspecifiedMap
   120  }
   121  
   122  func (m *Map) KeySize() uint32 {
   123  	if m.m != nil {
   124  		return m.m.KeySize()
   125  	}
   126  	if m.spec != nil {
   127  		return m.spec.KeySize
   128  	}
   129  	return 0
   130  }
   131  
   132  func (m *Map) ValueSize() uint32 {
   133  	if m.m != nil {
   134  		return m.m.ValueSize()
   135  	}
   136  	if m.spec != nil {
   137  		return m.spec.ValueSize
   138  	}
   139  	return 0
   140  }
   141  
   142  func (m *Map) MaxEntries() uint32 {
   143  	if m.m != nil {
   144  		return m.m.MaxEntries()
   145  	}
   146  	if m.spec != nil {
   147  		return m.spec.MaxEntries
   148  	}
   149  	return 0
   150  }
   151  
   152  func (m *Map) Flags() uint32 {
   153  	if m.m != nil {
   154  		return m.m.Flags()
   155  	}
   156  	if m.spec != nil {
   157  		return m.spec.Flags
   158  	}
   159  	return 0
   160  }
   161  
   162  func (m *Map) updateMetrics() {
   163  	if m.group == "" {
   164  		return
   165  	}
   166  	metrics.UpdateMapCapacity(m.group, m.MaxEntries())
   167  }
   168  
   169  // NewMap creates a new Map instance - object representing a BPF map
   170  func NewMap(name string, mapType ebpf.MapType, mapKey MapKey, mapValue MapValue,
   171  	maxEntries int, flags uint32) *Map {
   172  
   173  	keySize := reflect.TypeOf(mapKey).Elem().Size()
   174  	valueSize := reflect.TypeOf(mapValue).Elem().Size()
   175  
   176  	return &Map{
   177  		spec: &ebpf.MapSpec{
   178  			Type:       mapType,
   179  			Name:       path.Base(name),
   180  			KeySize:    uint32(keySize),
   181  			ValueSize:  uint32(valueSize),
   182  			MaxEntries: uint32(maxEntries),
   183  			Flags:      flags,
   184  		},
   185  		name:  path.Base(name),
   186  		key:   mapKey,
   187  		value: mapValue,
   188  		group: name,
   189  	}
   190  }
   191  
   192  // NewMap creates a new Map instance - object representing a BPF map
   193  func NewMapWithInnerSpec(name string, mapType ebpf.MapType, mapKey MapKey, mapValue MapValue,
   194  	maxEntries int, flags uint32, innerSpec *ebpf.MapSpec) *Map {
   195  
   196  	keySize := reflect.TypeOf(mapKey).Elem().Size()
   197  	valueSize := reflect.TypeOf(mapValue).Elem().Size()
   198  
   199  	return &Map{
   200  		spec: &ebpf.MapSpec{
   201  			Type:       mapType,
   202  			Name:       path.Base(name),
   203  			KeySize:    uint32(keySize),
   204  			ValueSize:  uint32(valueSize),
   205  			MaxEntries: uint32(maxEntries),
   206  			Flags:      flags,
   207  			InnerMap:   innerSpec,
   208  		},
   209  		name:  path.Base(name),
   210  		key:   mapKey,
   211  		value: mapValue,
   212  	}
   213  }
   214  
   215  func (m *Map) commonName() string {
   216  	if m.cachedCommonName != "" {
   217  		return m.cachedCommonName
   218  	}
   219  
   220  	m.cachedCommonName = extractCommonName(m.name)
   221  	return m.cachedCommonName
   222  }
   223  
   224  func (m *Map) NonPrefixedName() string {
   225  	return strings.TrimPrefix(m.name, metrics.Namespace+"_")
   226  }
   227  
   228  // scheduleErrorResolver schedules a periodic resolver controller that scans
   229  // all BPF map caches for unresolved errors and attempts to resolve them. On
   230  // error of resolution, the controller is-rescheduled in an expedited manner
   231  // with an exponential back-off.
   232  //
   233  // m.lock must be held for writing
   234  func (m *Map) scheduleErrorResolver() {
   235  	m.outstandingErrors = true
   236  
   237  	if time.Since(m.errorResolverLastScheduled) <= errorResolverSchedulerMinInterval {
   238  		return
   239  	}
   240  
   241  	m.errorResolverLastScheduled = time.Now()
   242  
   243  	go func() {
   244  		time.Sleep(errorResolverSchedulerDelay)
   245  		mapControllers.UpdateController(m.controllerName(),
   246  			controller.ControllerParams{
   247  				Group:       bpfMapSyncControllerGroup,
   248  				DoFunc:      m.resolveErrors,
   249  				RunInterval: errorResolverSchedulerMinInterval,
   250  			},
   251  		)
   252  	}()
   253  
   254  }
   255  
   256  // WithCache enables use of a cache. This will store all entries inserted from
   257  // user space in a local cache (map) and will indicate the status of each
   258  // individual entry.
   259  func (m *Map) WithCache() *Map {
   260  	if m.cache == nil {
   261  		m.cache = map[string]*cacheEntry{}
   262  	}
   263  	m.withValueCache = true
   264  	m.enableSync = true
   265  	return m
   266  }
   267  
   268  // WithEvents enables use of the event buffer, if the buffer is enabled.
   269  // This stores all map events (i.e. add/update/delete) in a bounded event buffer.
   270  // If eventTTL is not zero, than events that are older than the TTL
   271  // will periodically be removed from the buffer.
   272  // Enabling events will use aprox proportional to 100MB for every million capacity
   273  // in maxSize.
   274  //
   275  // TODO: The IPCache map have many periodic update events added by a controller for entries such as the 0.0.0.0/0 range.
   276  // These fill the event buffer with possibly unnecessary events.
   277  // We should either provide an option to aggregate these events, ignore hem from the ipcache event buffer or store them in a separate buffer.
   278  func (m *Map) WithEvents(c option.BPFEventBufferConfig) *Map {
   279  	if !c.Enabled {
   280  		return m
   281  	}
   282  	m.scopedLogger().WithFields(logrus.Fields{
   283  		"size": c.MaxSize,
   284  		"ttl":  c.TTL,
   285  	}).Debug("enabling events buffer")
   286  	m.eventsBufferEnabled = true
   287  	m.initEventsBuffer(c.MaxSize, c.TTL)
   288  	return m
   289  }
   290  
   291  func (m *Map) WithGroupName(group string) *Map {
   292  	m.group = group
   293  	return m
   294  }
   295  
   296  // WithPressureMetricThreshold enables the tracking of a metric that measures
   297  // the pressure of this map. This metric is only reported if over the
   298  // threshold.
   299  func (m *Map) WithPressureMetricThreshold(threshold float64) *Map {
   300  	// When pressure metric is enabled, we keep track of map keys in cache
   301  	if m.cache == nil {
   302  		m.cache = map[string]*cacheEntry{}
   303  	}
   304  
   305  	m.pressureGauge = metrics.NewBPFMapPressureGauge(m.NonPrefixedName(), threshold)
   306  
   307  	return m
   308  }
   309  
   310  // WithPressureMetric enables tracking and reporting of this map pressure with
   311  // threshold 0.
   312  func (m *Map) WithPressureMetric() *Map {
   313  	return m.WithPressureMetricThreshold(0.0)
   314  }
   315  
   316  // UpdatePressureMetricWithSize updates map pressure metric using the given map size.
   317  func (m *Map) UpdatePressureMetricWithSize(size int32) {
   318  	if m.pressureGauge == nil {
   319  		return
   320  	}
   321  
   322  	// Do a lazy check of MetricsConfig as it is not available at map static
   323  	// initialization.
   324  	if !metrics.BPFMapPressure {
   325  		if !m.withValueCache {
   326  			m.cache = nil
   327  		}
   328  		m.pressureGauge = nil
   329  		return
   330  	}
   331  
   332  	pvalue := float64(size) / float64(m.MaxEntries())
   333  	m.pressureGauge.Set(pvalue)
   334  }
   335  
   336  func (m *Map) updatePressureMetric() {
   337  	// Skipping pressure metric gauge updates for LRU map as the cache size
   338  	// does not accurately represent the actual map sie.
   339  	if m.spec != nil && m.spec.Type == ebpf.LRUHash {
   340  		return
   341  	}
   342  	m.UpdatePressureMetricWithSize(int32(len(m.cache)))
   343  }
   344  
   345  func (m *Map) FD() int {
   346  	return m.m.FD()
   347  }
   348  
   349  // Name returns the basename of this map.
   350  func (m *Map) Name() string {
   351  	return m.name
   352  }
   353  
   354  // Path returns the path to this map on the filesystem.
   355  func (m *Map) Path() (string, error) {
   356  	if err := m.setPathIfUnset(); err != nil {
   357  		return "", err
   358  	}
   359  
   360  	return m.path, nil
   361  }
   362  
   363  // Unpin attempts to unpin (remove) the map from the filesystem.
   364  func (m *Map) Unpin() error {
   365  	path, err := m.Path()
   366  	if err != nil {
   367  		return err
   368  	}
   369  
   370  	return os.RemoveAll(path)
   371  }
   372  
   373  // UnpinIfExists tries to unpin (remove) the map only if it exists.
   374  func (m *Map) UnpinIfExists() error {
   375  	found, err := m.exist()
   376  	if err != nil {
   377  		return err
   378  	}
   379  
   380  	if !found {
   381  		return nil
   382  	}
   383  
   384  	return m.Unpin()
   385  }
   386  
   387  func (m *Map) controllerName() string {
   388  	return fmt.Sprintf("bpf-map-sync-%s", m.name)
   389  }
   390  
   391  // OpenMap opens the map at pinPath.
   392  func OpenMap(pinPath string, key MapKey, value MapValue) (*Map, error) {
   393  	if !path.IsAbs(pinPath) {
   394  		return nil, fmt.Errorf("pinPath must be absolute: %s", pinPath)
   395  	}
   396  
   397  	em, err := ebpf.LoadPinnedMap(pinPath, nil)
   398  	if err != nil {
   399  		return nil, err
   400  	}
   401  
   402  	m := &Map{
   403  		m:     em,
   404  		name:  path.Base(pinPath),
   405  		path:  pinPath,
   406  		key:   key,
   407  		value: value,
   408  	}
   409  
   410  	m.updateMetrics()
   411  	registerMap(pinPath, m)
   412  
   413  	return m, nil
   414  }
   415  
   416  func (m *Map) setPathIfUnset() error {
   417  	if m.path == "" {
   418  		if m.name == "" {
   419  			return fmt.Errorf("either path or name must be set")
   420  		}
   421  
   422  		m.path = MapPath(m.name)
   423  	}
   424  
   425  	return nil
   426  }
   427  
   428  // Recreate removes any pin at the Map's pin path, recreates and re-pins it.
   429  func (m *Map) Recreate() error {
   430  	m.lock.Lock()
   431  	defer m.lock.Unlock()
   432  
   433  	if m.m != nil {
   434  		return fmt.Errorf("map already open: %s", m.name)
   435  	}
   436  
   437  	if err := m.setPathIfUnset(); err != nil {
   438  		return err
   439  	}
   440  
   441  	if err := os.Remove(m.path); err != nil && !errors.Is(err, fs.ErrNotExist) {
   442  		return fmt.Errorf("removing pinned map %s: %w", m.name, err)
   443  	}
   444  
   445  	m.scopedLogger().Infof("Removed map pin at %s, recreating and re-pinning map %s", m.path, m.name)
   446  
   447  	return m.openOrCreate(true)
   448  }
   449  
   450  // IsOpen returns true if the map has been opened.
   451  func (m *Map) IsOpen() bool {
   452  	m.lock.Lock()
   453  	defer m.lock.Unlock()
   454  	return m.m != nil
   455  }
   456  
   457  // OpenOrCreate attempts to open the Map, or if it does not yet exist, create
   458  // the Map. If the existing map's attributes such as map type, key/value size,
   459  // capacity, etc. do not match the Map's attributes, then the map will be
   460  // deleted and reopened without any attempt to retain its previous contents.
   461  // If the map is marked as non-persistent, it will always be recreated.
   462  //
   463  // Returns whether the map was deleted and recreated, or an optional error.
   464  func (m *Map) OpenOrCreate() error {
   465  	m.lock.Lock()
   466  	defer m.lock.Unlock()
   467  
   468  	return m.openOrCreate(true)
   469  }
   470  
   471  // CreateUnpinned creates the map without pinning it to the file system.
   472  //
   473  // TODO(tb): Remove this when all map creation takes MapSpec.
   474  func (m *Map) CreateUnpinned() error {
   475  	m.lock.Lock()
   476  	defer m.lock.Unlock()
   477  
   478  	return m.openOrCreate(false)
   479  }
   480  
   481  // Create is similar to OpenOrCreate, but closes the map after creating or
   482  // opening it.
   483  func (m *Map) Create() error {
   484  	if err := m.OpenOrCreate(); err != nil {
   485  		return err
   486  	}
   487  	return m.Close()
   488  }
   489  
   490  func (m *Map) openOrCreate(pin bool) error {
   491  	if m.m != nil {
   492  		return nil
   493  	}
   494  
   495  	if m.spec == nil {
   496  		return fmt.Errorf("attempted to create map %s without MapSpec", m.name)
   497  	}
   498  
   499  	if err := m.setPathIfUnset(); err != nil {
   500  		return err
   501  	}
   502  
   503  	m.spec.Flags |= GetPreAllocateMapFlags(m.spec.Type)
   504  
   505  	if m.spec.InnerMap != nil {
   506  		m.spec.InnerMap.Flags |= GetPreAllocateMapFlags(m.spec.InnerMap.Type)
   507  	}
   508  
   509  	if pin {
   510  		m.spec.Pinning = ebpf.PinByName
   511  	}
   512  
   513  	em, err := OpenOrCreateMap(m.spec, path.Dir(m.path))
   514  	if err != nil {
   515  		return err
   516  	}
   517  
   518  	m.updateMetrics()
   519  	registerMap(m.path, m)
   520  
   521  	// Consume the MapSpec.
   522  	m.spec = nil
   523  
   524  	// Retain the Map.
   525  	m.m = em
   526  
   527  	return nil
   528  }
   529  
   530  // Open opens the BPF map. All calls to Open() are serialized due to acquiring
   531  // m.lock
   532  func (m *Map) Open() error {
   533  	m.lock.Lock()
   534  	defer m.lock.Unlock()
   535  
   536  	return m.open()
   537  }
   538  
   539  // open opens the BPF map. It is identical to Open() but should be used when
   540  // m.lock is already held. open() may only be used if m.lock is held for
   541  // writing.
   542  func (m *Map) open() error {
   543  	if m.m != nil {
   544  		return nil
   545  	}
   546  
   547  	if err := m.setPathIfUnset(); err != nil {
   548  		return err
   549  	}
   550  
   551  	em, err := ebpf.LoadPinnedMap(m.path, nil)
   552  	if err != nil {
   553  		return fmt.Errorf("loading pinned map %s: %w", m.path, err)
   554  	}
   555  
   556  	m.updateMetrics()
   557  	registerMap(m.path, m)
   558  
   559  	m.m = em
   560  
   561  	return nil
   562  }
   563  
   564  func (m *Map) Close() error {
   565  	m.lock.Lock()
   566  	defer m.lock.Unlock()
   567  
   568  	if m.enableSync {
   569  		mapControllers.RemoveController(m.controllerName())
   570  	}
   571  
   572  	if m.m != nil {
   573  		m.m.Close()
   574  		m.m = nil
   575  	}
   576  
   577  	unregisterMap(m.path, m)
   578  
   579  	return nil
   580  }
   581  
   582  func (m *Map) NextKey(key, nextKeyOut interface{}) error {
   583  	var duration *spanstat.SpanStat
   584  	if metrics.BPFSyscallDuration.IsEnabled() {
   585  		duration = spanstat.Start()
   586  	}
   587  
   588  	err := m.m.NextKey(key, nextKeyOut)
   589  
   590  	if metrics.BPFSyscallDuration.IsEnabled() {
   591  		metrics.BPFSyscallDuration.WithLabelValues(metricOpGetNextKey, metrics.Error2Outcome(err)).Observe(duration.End(err == nil).Total().Seconds())
   592  	}
   593  
   594  	return err
   595  }
   596  
   597  type DumpCallback func(key MapKey, value MapValue)
   598  
   599  // DumpWithCallback iterates over the Map and calls the given DumpCallback for
   600  // each map entry. With the current implementation, it is safe for callbacks to
   601  // retain the values received, as they are guaranteed to be new instances.
   602  //
   603  // TODO(tb): This package currently doesn't support dumping per-cpu maps, as
   604  // ReadValueSize is always set to the size of a single value.
   605  func (m *Map) DumpWithCallback(cb DumpCallback) error {
   606  	if cb == nil {
   607  		return errors.New("empty callback")
   608  	}
   609  
   610  	if err := m.Open(); err != nil {
   611  		return err
   612  	}
   613  
   614  	m.lock.RLock()
   615  	defer m.lock.RUnlock()
   616  
   617  	// Don't need deep copies here, only fresh pointers.
   618  	mk := m.key.New()
   619  	mv := m.value.New()
   620  
   621  	i := m.m.Iterate()
   622  	for i.Next(mk, mv) {
   623  		cb(mk, mv)
   624  
   625  		mk = m.key.New()
   626  		mv = m.value.New()
   627  	}
   628  
   629  	return i.Err()
   630  }
   631  
   632  // DumpWithCallbackIfExists is similar to DumpWithCallback, but returns earlier
   633  // if the given map does not exist.
   634  func (m *Map) DumpWithCallbackIfExists(cb DumpCallback) error {
   635  	found, err := m.exist()
   636  	if err != nil {
   637  		return err
   638  	}
   639  
   640  	if found {
   641  		return m.DumpWithCallback(cb)
   642  	}
   643  
   644  	return nil
   645  }
   646  
   647  // DumpReliablyWithCallback is similar to DumpWithCallback, but performs
   648  // additional tracking of the current and recently seen keys, so that if an
   649  // element is removed from the underlying kernel map during the dump, the dump
   650  // can continue from a recently seen key rather than restarting from scratch.
   651  // In addition, it caps the maximum number of map entry iterations at 4 times
   652  // the maximum map size. If this limit is reached, ErrMaxLookup is returned.
   653  //
   654  // The caller must provide a callback for handling each entry, and a stats
   655  // object initialized via a call to NewDumpStats().
   656  func (m *Map) DumpReliablyWithCallback(cb DumpCallback, stats *DumpStats) error {
   657  	if cb == nil {
   658  		return errors.New("empty callback")
   659  	}
   660  
   661  	if stats == nil {
   662  		return errors.New("stats is nil")
   663  	}
   664  
   665  	var (
   666  		prevKey    = m.key.New()
   667  		currentKey = m.key.New()
   668  		nextKey    = m.key.New()
   669  		value      = m.value.New()
   670  
   671  		prevKeyValid = false
   672  	)
   673  
   674  	stats.start()
   675  	defer stats.finish()
   676  
   677  	if err := m.Open(); err != nil {
   678  		return err
   679  	}
   680  
   681  	// Get the first map key.
   682  	if err := m.NextKey(nil, currentKey); err != nil {
   683  		stats.Lookup = 1
   684  		if errors.Is(err, ebpf.ErrKeyNotExist) {
   685  			// Empty map, nothing to iterate.
   686  			stats.Completed = true
   687  			return nil
   688  		}
   689  	}
   690  
   691  	// maxLookup is an upper bound limit to prevent backtracking forever
   692  	// when iterating over the map's elements (the map might be concurrently
   693  	// updated while being iterated)
   694  	maxLookup := stats.MaxEntries * 4
   695  
   696  	// This loop stops when all elements have been iterated (Map.NextKey() returns
   697  	// ErrKeyNotExist) OR, in order to avoid hanging if
   698  	// the map is continuously updated, when maxLookup has been reached
   699  	for stats.Lookup = 1; stats.Lookup <= maxLookup; stats.Lookup++ {
   700  		// currentKey was set by the first m.NextKey() above. We know it existed in
   701  		// the map, but it may have been deleted by a concurrent map operation.
   702  		//
   703  		// If currentKey is no longer in the map, nextKey may be the first key in
   704  		// the map again. Continue with nextKey only if we still find currentKey in
   705  		// the Lookup() after the call to m.NextKey(), this way we know nextKey is
   706  		// NOT the first key in the map and iteration hasn't reset.
   707  		nextKeyErr := m.NextKey(currentKey, nextKey)
   708  
   709  		if err := m.m.Lookup(currentKey, value); err != nil {
   710  			stats.LookupFailed++
   711  			// Restarting from a invalid key starts the iteration again from the beginning.
   712  			// If we have a previously found key, try to restart from there instead
   713  			if prevKeyValid {
   714  				currentKey = prevKey
   715  				// Restart from a given previous key only once, otherwise if the prevKey is
   716  				// concurrently deleted we might loop forever trying to look it up.
   717  				prevKeyValid = false
   718  				stats.KeyFallback++
   719  			} else {
   720  				// Depending on exactly when currentKey was deleted from the
   721  				// map, nextKey may be the actual key element after the deleted
   722  				// one, or the first element in the map.
   723  				currentKey = nextKey
   724  				// To avoid having nextKey and currentKey pointing at the same memory
   725  				// we allocate a new key for nextKey. Without this currentKey and nextKey
   726  				// would be the same pointer value and would get double iterated on the next
   727  				// iterations m.NextKey(...) call.
   728  				nextKey = m.key.New()
   729  				stats.Interrupted++
   730  			}
   731  			continue
   732  		}
   733  
   734  		cb(currentKey, value)
   735  
   736  		if nextKeyErr != nil {
   737  			if errors.Is(nextKeyErr, ebpf.ErrKeyNotExist) {
   738  				stats.Completed = true
   739  				return nil // end of map, we're done iterating
   740  			}
   741  			return nextKeyErr
   742  		}
   743  
   744  		// Prepare keys to move to the next iteration.
   745  		prevKey = currentKey
   746  		currentKey = nextKey
   747  		nextKey = m.key.New()
   748  		prevKeyValid = true
   749  	}
   750  
   751  	return ErrMaxLookup
   752  }
   753  
   754  // Dump returns the map (type map[string][]string) which contains all
   755  // data stored in BPF map.
   756  func (m *Map) Dump(hash map[string][]string) error {
   757  	callback := func(key MapKey, value MapValue) {
   758  		// No need to deep copy since we are creating strings.
   759  		hash[key.String()] = append(hash[key.String()], value.String())
   760  	}
   761  
   762  	if err := m.DumpWithCallback(callback); err != nil {
   763  		return err
   764  	}
   765  
   766  	return nil
   767  }
   768  
   769  // BatchLookup returns the count of elements in the map by dumping the map
   770  // using batch lookup.
   771  func (m *Map) BatchLookup(cursor *ebpf.MapBatchCursor, keysOut, valuesOut interface{}, opts *ebpf.BatchOptions) (int, error) {
   772  	return m.m.BatchLookup(cursor, keysOut, valuesOut, opts)
   773  }
   774  
   775  // DumpIfExists dumps the contents of the map into hash via Dump() if the map
   776  // file exists
   777  func (m *Map) DumpIfExists(hash map[string][]string) error {
   778  	found, err := m.exist()
   779  	if err != nil {
   780  		return err
   781  	}
   782  
   783  	if found {
   784  		return m.Dump(hash)
   785  	}
   786  
   787  	return nil
   788  }
   789  
   790  func (m *Map) Lookup(key MapKey) (MapValue, error) {
   791  	if err := m.Open(); err != nil {
   792  		return nil, err
   793  	}
   794  
   795  	m.lock.RLock()
   796  	defer m.lock.RUnlock()
   797  
   798  	var duration *spanstat.SpanStat
   799  	if metrics.BPFSyscallDuration.IsEnabled() {
   800  		duration = spanstat.Start()
   801  	}
   802  
   803  	value := m.value.New()
   804  	err := m.m.Lookup(key, value)
   805  
   806  	if metrics.BPFSyscallDuration.IsEnabled() {
   807  		metrics.BPFSyscallDuration.WithLabelValues(metricOpLookup, metrics.Error2Outcome(err)).Observe(duration.End(err == nil).Total().Seconds())
   808  	}
   809  
   810  	if err != nil {
   811  		return nil, err
   812  	}
   813  
   814  	return value, nil
   815  }
   816  
   817  func (m *Map) Update(key MapKey, value MapValue) error {
   818  	var err error
   819  
   820  	m.lock.Lock()
   821  	defer m.lock.Unlock()
   822  
   823  	defer func() {
   824  		desiredAction := OK
   825  		if err != nil {
   826  			desiredAction = Insert
   827  		}
   828  		entry := &cacheEntry{
   829  			Key:           key,
   830  			Value:         value,
   831  			DesiredAction: desiredAction,
   832  			LastError:     err,
   833  		}
   834  		m.addToEventsLocked(MapUpdate, *entry)
   835  
   836  		if m.cache == nil {
   837  			return
   838  		}
   839  
   840  		if m.withValueCache {
   841  			if err != nil {
   842  				m.scheduleErrorResolver()
   843  			}
   844  			m.cache[key.String()] = &cacheEntry{
   845  				Key:           key,
   846  				Value:         value,
   847  				DesiredAction: desiredAction,
   848  				LastError:     err,
   849  			}
   850  			m.updatePressureMetric()
   851  		} else if err == nil {
   852  			m.cache[key.String()] = nil
   853  			m.updatePressureMetric()
   854  		}
   855  	}()
   856  
   857  	if err = m.open(); err != nil {
   858  		return err
   859  	}
   860  
   861  	err = m.m.Update(key, value, ebpf.UpdateAny)
   862  
   863  	if metrics.BPFMapOps.IsEnabled() {
   864  		metrics.BPFMapOps.WithLabelValues(m.commonName(), metricOpUpdate, metrics.Error2Outcome(err)).Inc()
   865  	}
   866  
   867  	if err != nil {
   868  		return fmt.Errorf("update map %s: %w", m.Name(), err)
   869  	}
   870  
   871  	return nil
   872  }
   873  
   874  // deleteMapEvent is run at every delete map event.
   875  // If cache is enabled, it will update the cache to reflect the delete.
   876  // As well, if event buffer is enabled, it adds a new event to the buffer.
   877  func (m *Map) deleteMapEvent(key MapKey, err error) {
   878  	m.addToEventsLocked(MapDelete, cacheEntry{
   879  		Key:           key,
   880  		DesiredAction: Delete,
   881  		LastError:     err,
   882  	})
   883  	m.deleteCacheEntry(key, err)
   884  }
   885  
   886  func (m *Map) deleteAllMapEvent() {
   887  	m.addToEventsLocked(MapDeleteAll, cacheEntry{})
   888  }
   889  
   890  // deleteCacheEntry evaluates the specified error, if nil the map key is
   891  // removed from the cache to indicate successful deletion. If non-nil, the map
   892  // key entry in the cache is updated to indicate deletion failure with the
   893  // specified error.
   894  //
   895  // Caller must hold m.lock for writing
   896  func (m *Map) deleteCacheEntry(key MapKey, err error) {
   897  	if m.cache == nil {
   898  		return
   899  	}
   900  
   901  	k := key.String()
   902  	if err == nil {
   903  		delete(m.cache, k)
   904  	} else if !m.withValueCache {
   905  		return
   906  	} else {
   907  		entry, ok := m.cache[k]
   908  		if !ok {
   909  			m.cache[k] = &cacheEntry{
   910  				Key: key,
   911  			}
   912  			entry = m.cache[k]
   913  		}
   914  
   915  		entry.DesiredAction = Delete
   916  		entry.LastError = err
   917  		m.scheduleErrorResolver()
   918  	}
   919  }
   920  
   921  // delete deletes the map entry corresponding to the given key. If ignoreMissing
   922  // is set to true and the entry was not found, the error metric is not
   923  // incremented for missing entries and nil error is returned.
   924  func (m *Map) delete(key MapKey, ignoreMissing bool) (_ bool, err error) {
   925  	defer func() {
   926  		m.deleteMapEvent(key, err)
   927  		if err != nil {
   928  			m.updatePressureMetric()
   929  		}
   930  	}()
   931  
   932  	if err = m.open(); err != nil {
   933  		return false, err
   934  	}
   935  
   936  	var duration *spanstat.SpanStat
   937  	if metrics.BPFSyscallDuration.IsEnabled() {
   938  		duration = spanstat.Start()
   939  	}
   940  
   941  	err = m.m.Delete(key)
   942  
   943  	if metrics.BPFSyscallDuration.IsEnabled() {
   944  		metrics.BPFSyscallDuration.WithLabelValues(metricOpDelete, metrics.Error2Outcome(err)).Observe(duration.End(err == nil).Total().Seconds())
   945  	}
   946  
   947  	if errors.Is(err, ebpf.ErrKeyNotExist) && ignoreMissing {
   948  		// Error and metrics handling is skipped in case ignoreMissing is set and
   949  		// the map key did not exist. This removes false positives in the delete
   950  		// metrics and skips the deferred cleanup of nonexistent entries. This
   951  		// situation occurs at least in the context of cleanup of NAT mappings from
   952  		// CT GC.
   953  		return false, nil
   954  	}
   955  
   956  	if metrics.BPFMapOps.IsEnabled() {
   957  		// err can be nil or any error other than ebpf.ErrKeyNotExist.
   958  		metrics.BPFMapOps.WithLabelValues(m.commonName(), metricOpDelete, metrics.Error2Outcome(err)).Inc()
   959  	}
   960  
   961  	if err != nil {
   962  		return false, fmt.Errorf("unable to delete element %s from map %s: %w", key, m.name, err)
   963  	}
   964  
   965  	return true, nil
   966  }
   967  
   968  // SilentDelete deletes the map entry corresponding to the given key.
   969  // If a map entry is not found this returns (false, nil).
   970  func (m *Map) SilentDelete(key MapKey) (deleted bool, err error) {
   971  	m.lock.Lock()
   972  	defer m.lock.Unlock()
   973  
   974  	return m.delete(key, true)
   975  }
   976  
   977  // Delete deletes the map entry corresponding to the given key.
   978  func (m *Map) Delete(key MapKey) error {
   979  	m.lock.Lock()
   980  	defer m.lock.Unlock()
   981  
   982  	_, err := m.delete(key, false)
   983  	return err
   984  }
   985  
   986  // scopedLogger returns a logger scoped for the map. m.lock must be held.
   987  func (m *Map) scopedLogger() *logrus.Entry {
   988  	return log.WithFields(logrus.Fields{logfields.Path: m.path, "name": m.name})
   989  }
   990  
   991  // DeleteAll deletes all entries of a map by traversing the map and deleting individual
   992  // entries. Note that if entries are added while the taversal is in progress,
   993  // such entries may survive the deletion process.
   994  func (m *Map) DeleteAll() error {
   995  	m.lock.Lock()
   996  	defer m.lock.Unlock()
   997  	defer m.updatePressureMetric()
   998  	scopedLog := m.scopedLogger()
   999  	scopedLog.Debug("deleting all entries in map")
  1000  
  1001  	if m.withValueCache {
  1002  		// Mark all entries for deletion, upon successful deletion,
  1003  		// entries will be removed or the LastError will be updated
  1004  		for _, entry := range m.cache {
  1005  			entry.DesiredAction = Delete
  1006  			entry.LastError = fmt.Errorf("deletion pending")
  1007  		}
  1008  	}
  1009  
  1010  	if err := m.open(); err != nil {
  1011  		return err
  1012  	}
  1013  
  1014  	mk := m.key.New()
  1015  	mv := make([]byte, m.ValueSize())
  1016  
  1017  	defer m.deleteAllMapEvent()
  1018  
  1019  	i := m.m.Iterate()
  1020  	for i.Next(mk, &mv) {
  1021  		err := m.m.Delete(mk)
  1022  
  1023  		m.deleteCacheEntry(mk, err)
  1024  
  1025  		if err != nil {
  1026  			return err
  1027  		}
  1028  	}
  1029  
  1030  	err := i.Err()
  1031  	if err != nil {
  1032  		scopedLog.WithError(err).Warningf("Unable to correlate iteration key %v with cache entry. Inconsistent cache.", mk)
  1033  	}
  1034  
  1035  	return err
  1036  }
  1037  
  1038  // GetModel returns a BPF map in the representation served via the API
  1039  func (m *Map) GetModel() *models.BPFMap {
  1040  
  1041  	mapModel := &models.BPFMap{
  1042  		Path: m.path,
  1043  	}
  1044  
  1045  	mapModel.Cache = make([]*models.BPFMapEntry, 0, len(m.cache))
  1046  	if m.withValueCache {
  1047  		m.lock.RLock()
  1048  		defer m.lock.RUnlock()
  1049  		for k, entry := range m.cache {
  1050  			model := &models.BPFMapEntry{
  1051  				Key:           k,
  1052  				DesiredAction: entry.DesiredAction.String(),
  1053  			}
  1054  
  1055  			if entry.LastError != nil {
  1056  				model.LastError = entry.LastError.Error()
  1057  			}
  1058  
  1059  			if entry.Value != nil {
  1060  				model.Value = entry.Value.String()
  1061  			}
  1062  			mapModel.Cache = append(mapModel.Cache, model)
  1063  		}
  1064  		return mapModel
  1065  	}
  1066  
  1067  	stats := NewDumpStats(m)
  1068  	filterCallback := func(key MapKey, value MapValue) {
  1069  		mapModel.Cache = append(mapModel.Cache, &models.BPFMapEntry{
  1070  			Key:   key.String(),
  1071  			Value: value.String(),
  1072  		})
  1073  	}
  1074  
  1075  	m.DumpReliablyWithCallback(filterCallback, stats)
  1076  	return mapModel
  1077  }
  1078  
  1079  func (m *Map) addToEventsLocked(action Action, entry cacheEntry) {
  1080  	if !m.eventsBufferEnabled {
  1081  		return
  1082  	}
  1083  	m.events.add(&Event{
  1084  		action:     action,
  1085  		Timestamp:  time.Now(),
  1086  		cacheEntry: entry,
  1087  	})
  1088  }
  1089  
  1090  // resolveErrors is schedule by scheduleErrorResolver() and runs periodically.
  1091  // It resolves up to maxSyncErrors discrepancies between cache and BPF map in
  1092  // the kernel.
  1093  func (m *Map) resolveErrors(ctx context.Context) error {
  1094  	started := time.Now()
  1095  
  1096  	m.lock.Lock()
  1097  	defer m.lock.Unlock()
  1098  
  1099  	if m.cache == nil {
  1100  		return nil
  1101  	}
  1102  
  1103  	if !m.outstandingErrors {
  1104  		return nil
  1105  	}
  1106  
  1107  	outstanding := 0
  1108  	for _, e := range m.cache {
  1109  		switch e.DesiredAction {
  1110  		case Insert, Delete:
  1111  			outstanding++
  1112  		}
  1113  	}
  1114  
  1115  	// Errors appear to have already been resolved. This can happen if a subsequent
  1116  	// Update/Delete operation acting on the same key succeeded.
  1117  	if outstanding == 0 {
  1118  		m.outstandingErrors = false
  1119  		return nil
  1120  	}
  1121  
  1122  	if err := m.open(); err != nil {
  1123  		return err
  1124  	}
  1125  
  1126  	scopedLogger := m.scopedLogger()
  1127  	scopedLogger.WithField("remaining", outstanding).
  1128  		Debug("Starting periodic BPF map error resolver")
  1129  
  1130  	resolved := 0
  1131  	scanned := 0
  1132  	nerr := 0
  1133  	for k, e := range m.cache {
  1134  		scanned++
  1135  
  1136  		switch e.DesiredAction {
  1137  		case OK:
  1138  		case Insert:
  1139  			// Call into ebpf-go's Map.Update() directly, don't go through the cache.
  1140  			err := m.m.Update(e.Key, e.Value, ebpf.UpdateAny)
  1141  			if metrics.BPFMapOps.IsEnabled() {
  1142  				metrics.BPFMapOps.WithLabelValues(m.commonName(), metricOpUpdate, metrics.Error2Outcome(err)).Inc()
  1143  			}
  1144  			if err == nil {
  1145  				e.DesiredAction = OK
  1146  				e.LastError = nil
  1147  				resolved++
  1148  				outstanding--
  1149  			} else {
  1150  				e.LastError = err
  1151  				nerr++
  1152  			}
  1153  			m.cache[k] = e
  1154  			m.addToEventsLocked(MapUpdate, *e)
  1155  		case Delete:
  1156  			// Holding lock, issue direct delete on map.
  1157  			err := m.m.Delete(e.Key)
  1158  			if metrics.BPFMapOps.IsEnabled() {
  1159  				metrics.BPFMapOps.WithLabelValues(m.commonName(), metricOpDelete, metrics.Error2Outcome(err)).Inc()
  1160  			}
  1161  			if err == nil || errors.Is(err, ebpf.ErrKeyNotExist) {
  1162  				delete(m.cache, k)
  1163  				resolved++
  1164  				outstanding--
  1165  			} else {
  1166  				e.LastError = err
  1167  				nerr++
  1168  				m.cache[k] = e
  1169  			}
  1170  
  1171  			m.addToEventsLocked(MapDelete, *e)
  1172  		}
  1173  
  1174  		// bail out if maximum errors are reached to relax the map lock
  1175  		if nerr > maxSyncErrors {
  1176  			break
  1177  		}
  1178  	}
  1179  
  1180  	m.updatePressureMetric()
  1181  
  1182  	scopedLogger.WithFields(logrus.Fields{
  1183  		"remaining": outstanding,
  1184  		"resolved":  resolved,
  1185  		"scanned":   scanned,
  1186  		"duration":  time.Since(started),
  1187  	}).Debug("BPF map error resolver completed")
  1188  
  1189  	m.outstandingErrors = outstanding > 0
  1190  	if m.outstandingErrors {
  1191  		return fmt.Errorf("%d map sync errors", outstanding)
  1192  	}
  1193  
  1194  	return nil
  1195  }
  1196  
  1197  // CheckAndUpgrade checks the received map's properties (for the map currently
  1198  // loaded into the kernel) against the desired properties, and if they do not
  1199  // match, deletes the map.
  1200  //
  1201  // Returns true if the map was upgraded.
  1202  func (m *Map) CheckAndUpgrade(desired *Map) bool {
  1203  	flags := desired.Flags() | GetPreAllocateMapFlags(desired.Type())
  1204  
  1205  	return objCheck(
  1206  		m.m,
  1207  		m.path,
  1208  		desired.Type(),
  1209  		desired.KeySize(),
  1210  		desired.ValueSize(),
  1211  		desired.MaxEntries(),
  1212  		flags,
  1213  	)
  1214  }
  1215  
  1216  func (m *Map) exist() (bool, error) {
  1217  	path, err := m.Path()
  1218  	if err != nil {
  1219  		return false, err
  1220  	}
  1221  
  1222  	if _, err := os.Stat(path); err == nil {
  1223  		return true, nil
  1224  	}
  1225  
  1226  	return false, nil
  1227  }