github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/map.go (about)

     1  package ebpf
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"math/rand"
     9  	"os"
    10  	"path/filepath"
    11  	"reflect"
    12  	"strings"
    13  	"sync"
    14  	"time"
    15  	"unsafe"
    16  
    17  	"github.com/cilium/ebpf/btf"
    18  	"github.com/cilium/ebpf/internal"
    19  	"github.com/cilium/ebpf/internal/sys"
    20  	"github.com/cilium/ebpf/internal/sysenc"
    21  	"github.com/cilium/ebpf/internal/unix"
    22  )
    23  
    24  // Errors returned by Map and MapIterator methods.
    25  var (
    26  	ErrKeyNotExist      = errors.New("key does not exist")
    27  	ErrKeyExist         = errors.New("key already exists")
    28  	ErrIterationAborted = errors.New("iteration aborted")
    29  	ErrMapIncompatible  = errors.New("map spec is incompatible with existing map")
    30  	errMapNoBTFValue    = errors.New("map spec does not contain a BTF Value")
    31  )
    32  
    33  // MapOptions control loading a map into the kernel.
    34  type MapOptions struct {
    35  	// The base path to pin maps in if requested via PinByName.
    36  	// Existing maps will be re-used if they are compatible, otherwise an
    37  	// error is returned.
    38  	PinPath        string
    39  	LoadPinOptions LoadPinOptions
    40  }
    41  
    42  // MapID represents the unique ID of an eBPF map
    43  type MapID uint32
    44  
    45  // MapSpec defines a Map.
    46  type MapSpec struct {
    47  	// Name is passed to the kernel as a debug aid. Must only contain
    48  	// alpha numeric and '_' characters.
    49  	Name       string
    50  	Type       MapType
    51  	KeySize    uint32
    52  	ValueSize  uint32
    53  	MaxEntries uint32
    54  
    55  	// Flags is passed to the kernel and specifies additional map
    56  	// creation attributes.
    57  	Flags uint32
    58  
    59  	// Automatically pin and load a map from MapOptions.PinPath.
    60  	// Generates an error if an existing pinned map is incompatible with the MapSpec.
    61  	Pinning PinType
    62  
    63  	// Specify numa node during map creation
    64  	// (effective only if unix.BPF_F_NUMA_NODE flag is set,
    65  	// which can be imported from golang.org/x/sys/unix)
    66  	NumaNode uint32
    67  
    68  	// The initial contents of the map. May be nil.
    69  	Contents []MapKV
    70  
    71  	// Whether to freeze a map after setting its initial contents.
    72  	Freeze bool
    73  
    74  	// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
    75  	InnerMap *MapSpec
    76  
    77  	// Extra trailing bytes found in the ELF map definition when using structs
    78  	// larger than libbpf's bpf_map_def. nil if no trailing bytes were present.
    79  	// Must be nil or empty before instantiating the MapSpec into a Map.
    80  	Extra *bytes.Reader
    81  
    82  	// The key and value type of this map. May be nil.
    83  	Key, Value btf.Type
    84  }
    85  
    86  func (ms *MapSpec) String() string {
    87  	return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
    88  }
    89  
    90  // Copy returns a copy of the spec.
    91  //
    92  // MapSpec.Contents is a shallow copy.
    93  func (ms *MapSpec) Copy() *MapSpec {
    94  	if ms == nil {
    95  		return nil
    96  	}
    97  
    98  	cpy := *ms
    99  
   100  	cpy.Contents = make([]MapKV, len(ms.Contents))
   101  	copy(cpy.Contents, ms.Contents)
   102  
   103  	cpy.InnerMap = ms.InnerMap.Copy()
   104  
   105  	return &cpy
   106  }
   107  
   108  // fixupMagicFields fills fields of MapSpec which are usually
   109  // left empty in ELF or which depend on runtime information.
   110  //
   111  // The method doesn't modify Spec, instead returning a copy.
   112  // The copy is only performed if fixups are necessary, so callers mustn't mutate
   113  // the returned spec.
   114  func (spec *MapSpec) fixupMagicFields() (*MapSpec, error) {
   115  	switch spec.Type {
   116  	case ArrayOfMaps, HashOfMaps:
   117  		if spec.ValueSize != 0 && spec.ValueSize != 4 {
   118  			return nil, errors.New("ValueSize must be zero or four for map of map")
   119  		}
   120  
   121  		spec = spec.Copy()
   122  		spec.ValueSize = 4
   123  
   124  	case PerfEventArray:
   125  		if spec.KeySize != 0 && spec.KeySize != 4 {
   126  			return nil, errors.New("KeySize must be zero or four for perf event array")
   127  		}
   128  
   129  		if spec.ValueSize != 0 && spec.ValueSize != 4 {
   130  			return nil, errors.New("ValueSize must be zero or four for perf event array")
   131  		}
   132  
   133  		spec = spec.Copy()
   134  		spec.KeySize = 4
   135  		spec.ValueSize = 4
   136  
   137  		n, err := PossibleCPU()
   138  		if err != nil {
   139  			return nil, fmt.Errorf("fixup perf event array: %w", err)
   140  		}
   141  
   142  		if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n {
   143  			// MaxEntries should be zero most of the time, but there is code
   144  			// out there which hardcodes large constants. Clamp the number
   145  			// of entries to the number of CPUs at most. Allow creating maps with
   146  			// less than n items since some kernel selftests relied on this
   147  			// behaviour in the past.
   148  			spec.MaxEntries = n
   149  		}
   150  	}
   151  
   152  	return spec, nil
   153  }
   154  
   155  // dataSection returns the contents and BTF Datasec descriptor of the spec.
   156  func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) {
   157  	if ms.Value == nil {
   158  		return nil, nil, errMapNoBTFValue
   159  	}
   160  
   161  	ds, ok := ms.Value.(*btf.Datasec)
   162  	if !ok {
   163  		return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value)
   164  	}
   165  
   166  	if n := len(ms.Contents); n != 1 {
   167  		return nil, nil, fmt.Errorf("expected one key, found %d", n)
   168  	}
   169  
   170  	kv := ms.Contents[0]
   171  	value, ok := kv.Value.([]byte)
   172  	if !ok {
   173  		return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value)
   174  	}
   175  
   176  	return value, ds, nil
   177  }
   178  
   179  // MapKV is used to initialize the contents of a Map.
   180  type MapKV struct {
   181  	Key   interface{}
   182  	Value interface{}
   183  }
   184  
   185  // Compatible returns nil if an existing map may be used instead of creating
   186  // one from the spec.
   187  //
   188  // Returns an error wrapping [ErrMapIncompatible] otherwise.
   189  func (ms *MapSpec) Compatible(m *Map) error {
   190  	ms, err := ms.fixupMagicFields()
   191  	if err != nil {
   192  		return err
   193  	}
   194  
   195  	diffs := []string{}
   196  	if m.typ != ms.Type {
   197  		diffs = append(diffs, fmt.Sprintf("Type: %s changed to %s", m.typ, ms.Type))
   198  	}
   199  	if m.keySize != ms.KeySize {
   200  		diffs = append(diffs, fmt.Sprintf("KeySize: %d changed to %d", m.keySize, ms.KeySize))
   201  	}
   202  	if m.valueSize != ms.ValueSize {
   203  		diffs = append(diffs, fmt.Sprintf("ValueSize: %d changed to %d", m.valueSize, ms.ValueSize))
   204  	}
   205  	if m.maxEntries != ms.MaxEntries {
   206  		diffs = append(diffs, fmt.Sprintf("MaxEntries: %d changed to %d", m.maxEntries, ms.MaxEntries))
   207  	}
   208  
   209  	// BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow this
   210  	// mismatch.
   211  	if !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == unix.BPF_F_RDONLY_PROG) &&
   212  		m.flags != ms.Flags {
   213  		diffs = append(diffs, fmt.Sprintf("Flags: %d changed to %d", m.flags, ms.Flags))
   214  	}
   215  
   216  	if len(diffs) == 0 {
   217  		return nil
   218  	}
   219  
   220  	return fmt.Errorf("%s: %w", strings.Join(diffs, ", "), ErrMapIncompatible)
   221  }
   222  
   223  // Map represents a Map file descriptor.
   224  //
   225  // It is not safe to close a map which is used by other goroutines.
   226  //
   227  // Methods which take interface{} arguments by default encode
   228  // them using binary.Read/Write in the machine's native endianness.
   229  //
   230  // Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler
   231  // if you require custom encoding.
   232  type Map struct {
   233  	name       string
   234  	fd         *sys.FD
   235  	typ        MapType
   236  	keySize    uint32
   237  	valueSize  uint32
   238  	maxEntries uint32
   239  	flags      uint32
   240  	pinnedPath string
   241  	// Per CPU maps return values larger than the size in the spec
   242  	fullValueSize int
   243  }
   244  
   245  // NewMapFromFD creates a map from a raw fd.
   246  //
   247  // You should not use fd after calling this function.
   248  func NewMapFromFD(fd int) (*Map, error) {
   249  	f, err := sys.NewFD(fd)
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  
   254  	return newMapFromFD(f)
   255  }
   256  
   257  func newMapFromFD(fd *sys.FD) (*Map, error) {
   258  	info, err := newMapInfoFromFd(fd)
   259  	if err != nil {
   260  		fd.Close()
   261  		return nil, fmt.Errorf("get map info: %w", err)
   262  	}
   263  
   264  	return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags)
   265  }
   266  
   267  // NewMap creates a new Map.
   268  //
   269  // It's equivalent to calling NewMapWithOptions with default options.
   270  func NewMap(spec *MapSpec) (*Map, error) {
   271  	return NewMapWithOptions(spec, MapOptions{})
   272  }
   273  
   274  // NewMapWithOptions creates a new Map.
   275  //
   276  // Creating a map for the first time will perform feature detection
   277  // by creating small, temporary maps.
   278  //
   279  // The caller is responsible for ensuring the process' rlimit is set
   280  // sufficiently high for locking memory during map creation. This can be done
   281  // by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions.
   282  //
   283  // May return an error wrapping ErrMapIncompatible.
   284  func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
   285  	m, err := newMapWithOptions(spec, opts)
   286  	if err != nil {
   287  		return nil, fmt.Errorf("creating map: %w", err)
   288  	}
   289  
   290  	if err := m.finalize(spec); err != nil {
   291  		m.Close()
   292  		return nil, fmt.Errorf("populating map: %w", err)
   293  	}
   294  
   295  	return m, nil
   296  }
   297  
   298  func newMapWithOptions(spec *MapSpec, opts MapOptions) (_ *Map, err error) {
   299  	closeOnError := func(c io.Closer) {
   300  		if err != nil {
   301  			c.Close()
   302  		}
   303  	}
   304  
   305  	switch spec.Pinning {
   306  	case PinByName:
   307  		if spec.Name == "" {
   308  			return nil, fmt.Errorf("pin by name: missing Name")
   309  		}
   310  
   311  		if opts.PinPath == "" {
   312  			return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath")
   313  		}
   314  
   315  		path := filepath.Join(opts.PinPath, spec.Name)
   316  		m, err := LoadPinnedMap(path, &opts.LoadPinOptions)
   317  		if errors.Is(err, unix.ENOENT) {
   318  			break
   319  		}
   320  		if err != nil {
   321  			return nil, fmt.Errorf("load pinned map: %w", err)
   322  		}
   323  		defer closeOnError(m)
   324  
   325  		if err := spec.Compatible(m); err != nil {
   326  			return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err)
   327  		}
   328  
   329  		return m, nil
   330  
   331  	case PinNone:
   332  		// Nothing to do here
   333  
   334  	default:
   335  		return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported)
   336  	}
   337  
   338  	var innerFd *sys.FD
   339  	if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps {
   340  		if spec.InnerMap == nil {
   341  			return nil, fmt.Errorf("%s requires InnerMap", spec.Type)
   342  		}
   343  
   344  		if spec.InnerMap.Pinning != PinNone {
   345  			return nil, errors.New("inner maps cannot be pinned")
   346  		}
   347  
   348  		template, err := spec.InnerMap.createMap(nil, opts)
   349  		if err != nil {
   350  			return nil, fmt.Errorf("inner map: %w", err)
   351  		}
   352  		defer template.Close()
   353  
   354  		// Intentionally skip populating and freezing (finalizing)
   355  		// the inner map template since it will be removed shortly.
   356  
   357  		innerFd = template.fd
   358  	}
   359  
   360  	m, err := spec.createMap(innerFd, opts)
   361  	if err != nil {
   362  		return nil, err
   363  	}
   364  	defer closeOnError(m)
   365  
   366  	if spec.Pinning == PinByName {
   367  		path := filepath.Join(opts.PinPath, spec.Name)
   368  		if err := m.Pin(path); err != nil {
   369  			return nil, fmt.Errorf("pin map to %s: %w", path, err)
   370  		}
   371  	}
   372  
   373  	return m, nil
   374  }
   375  
   376  // createMap validates the spec's properties and creates the map in the kernel
   377  // using the given opts. It does not populate or freeze the map.
   378  func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err error) {
   379  	closeOnError := func(closer io.Closer) {
   380  		if err != nil {
   381  			closer.Close()
   382  		}
   383  	}
   384  
   385  	// Kernels 4.13 through 5.4 used a struct bpf_map_def that contained
   386  	// additional 'inner_map_idx' and later 'numa_node' fields.
   387  	// In order to support loading these definitions, tolerate the presence of
   388  	// extra bytes, but require them to be zeroes.
   389  	if spec.Extra != nil {
   390  		if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil {
   391  			return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map")
   392  		}
   393  	}
   394  
   395  	spec, err = spec.fixupMagicFields()
   396  	if err != nil {
   397  		return nil, err
   398  	}
   399  
   400  	attr := sys.MapCreateAttr{
   401  		MapType:    sys.MapType(spec.Type),
   402  		KeySize:    spec.KeySize,
   403  		ValueSize:  spec.ValueSize,
   404  		MaxEntries: spec.MaxEntries,
   405  		MapFlags:   sys.MapFlags(spec.Flags),
   406  		NumaNode:   spec.NumaNode,
   407  	}
   408  
   409  	if inner != nil {
   410  		attr.InnerMapFd = inner.Uint()
   411  	}
   412  
   413  	if haveObjName() == nil {
   414  		attr.MapName = sys.NewObjName(spec.Name)
   415  	}
   416  
   417  	if spec.Key != nil || spec.Value != nil {
   418  		handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value)
   419  		if err != nil && !errors.Is(err, btf.ErrNotSupported) {
   420  			return nil, fmt.Errorf("load BTF: %w", err)
   421  		}
   422  
   423  		if handle != nil {
   424  			defer handle.Close()
   425  
   426  			// Use BTF k/v during map creation.
   427  			attr.BtfFd = uint32(handle.FD())
   428  			attr.BtfKeyTypeId = keyTypeID
   429  			attr.BtfValueTypeId = valueTypeID
   430  		}
   431  	}
   432  
   433  	fd, err := sys.MapCreate(&attr)
   434  
   435  	// Some map types don't support BTF k/v in earlier kernel versions.
   436  	// Remove BTF metadata and retry map creation.
   437  	if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 {
   438  		attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0
   439  		fd, err = sys.MapCreate(&attr)
   440  	}
   441  	if err != nil {
   442  		return nil, handleMapCreateError(attr, spec, err)
   443  	}
   444  
   445  	defer closeOnError(fd)
   446  	m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags)
   447  	if err != nil {
   448  		return nil, fmt.Errorf("map create: %w", err)
   449  	}
   450  	return m, nil
   451  }
   452  
   453  func handleMapCreateError(attr sys.MapCreateAttr, spec *MapSpec, err error) error {
   454  	if errors.Is(err, unix.EPERM) {
   455  		return fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
   456  	}
   457  	if errors.Is(err, unix.EINVAL) && spec.MaxEntries == 0 {
   458  		return fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err)
   459  	}
   460  	if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap {
   461  		return fmt.Errorf("map create: cannot use type %s", UnspecifiedMap)
   462  	}
   463  	if errors.Is(err, unix.EINVAL) && spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
   464  		return fmt.Errorf("map create: %w (noPrealloc flag may be incompatible with map type %s)", err, spec.Type)
   465  	}
   466  
   467  	switch spec.Type {
   468  	case ArrayOfMaps, HashOfMaps:
   469  		if haveFeatErr := haveNestedMaps(); haveFeatErr != nil {
   470  			return fmt.Errorf("map create: %w", haveFeatErr)
   471  		}
   472  	}
   473  	if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze {
   474  		if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil {
   475  			return fmt.Errorf("map create: %w", haveFeatErr)
   476  		}
   477  	}
   478  	if spec.Flags&unix.BPF_F_MMAPABLE > 0 {
   479  		if haveFeatErr := haveMmapableMaps(); haveFeatErr != nil {
   480  			return fmt.Errorf("map create: %w", haveFeatErr)
   481  		}
   482  	}
   483  	if spec.Flags&unix.BPF_F_INNER_MAP > 0 {
   484  		if haveFeatErr := haveInnerMaps(); haveFeatErr != nil {
   485  			return fmt.Errorf("map create: %w", haveFeatErr)
   486  		}
   487  	}
   488  	if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
   489  		if haveFeatErr := haveNoPreallocMaps(); haveFeatErr != nil {
   490  			return fmt.Errorf("map create: %w", haveFeatErr)
   491  		}
   492  	}
   493  	// BPF_MAP_TYPE_RINGBUF's max_entries must be a power-of-2 multiple of kernel's page size.
   494  	if errors.Is(err, unix.EINVAL) &&
   495  		(attr.MapType == sys.BPF_MAP_TYPE_RINGBUF || attr.MapType == sys.BPF_MAP_TYPE_USER_RINGBUF) {
   496  		pageSize := uint32(os.Getpagesize())
   497  		maxEntries := attr.MaxEntries
   498  		if maxEntries%pageSize != 0 || !internal.IsPow(maxEntries) {
   499  			return fmt.Errorf("map create: %w (ring map size %d not a multiple of page size %d)", err, maxEntries, pageSize)
   500  		}
   501  	}
   502  	if attr.BtfFd == 0 {
   503  		return fmt.Errorf("map create: %w (without BTF k/v)", err)
   504  	}
   505  
   506  	return fmt.Errorf("map create: %w", err)
   507  }
   508  
   509  // newMap allocates and returns a new Map structure.
   510  // Sets the fullValueSize on per-CPU maps.
   511  func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) {
   512  	m := &Map{
   513  		name,
   514  		fd,
   515  		typ,
   516  		keySize,
   517  		valueSize,
   518  		maxEntries,
   519  		flags,
   520  		"",
   521  		int(valueSize),
   522  	}
   523  
   524  	if !typ.hasPerCPUValue() {
   525  		return m, nil
   526  	}
   527  
   528  	possibleCPUs, err := PossibleCPU()
   529  	if err != nil {
   530  		return nil, err
   531  	}
   532  
   533  	m.fullValueSize = int(internal.Align(valueSize, 8)) * possibleCPUs
   534  	return m, nil
   535  }
   536  
   537  func (m *Map) String() string {
   538  	if m.name != "" {
   539  		return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd)
   540  	}
   541  	return fmt.Sprintf("%s#%v", m.typ, m.fd)
   542  }
   543  
   544  // Type returns the underlying type of the map.
   545  func (m *Map) Type() MapType {
   546  	return m.typ
   547  }
   548  
   549  // KeySize returns the size of the map key in bytes.
   550  func (m *Map) KeySize() uint32 {
   551  	return m.keySize
   552  }
   553  
   554  // ValueSize returns the size of the map value in bytes.
   555  func (m *Map) ValueSize() uint32 {
   556  	return m.valueSize
   557  }
   558  
   559  // MaxEntries returns the maximum number of elements the map can hold.
   560  func (m *Map) MaxEntries() uint32 {
   561  	return m.maxEntries
   562  }
   563  
   564  // Flags returns the flags of the map.
   565  func (m *Map) Flags() uint32 {
   566  	return m.flags
   567  }
   568  
   569  // Info returns metadata about the map.
   570  func (m *Map) Info() (*MapInfo, error) {
   571  	return newMapInfoFromFd(m.fd)
   572  }
   573  
   574  // MapLookupFlags controls the behaviour of the map lookup calls.
   575  type MapLookupFlags uint64
   576  
   577  // LookupLock look up the value of a spin-locked map.
   578  const LookupLock MapLookupFlags = unix.BPF_F_LOCK
   579  
   580  // Lookup retrieves a value from a Map.
   581  //
   582  // Calls Close() on valueOut if it is of type **Map or **Program,
   583  // and *valueOut is not nil.
   584  //
   585  // Returns an error if the key doesn't exist, see ErrKeyNotExist.
   586  func (m *Map) Lookup(key, valueOut interface{}) error {
   587  	return m.LookupWithFlags(key, valueOut, 0)
   588  }
   589  
   590  // LookupWithFlags retrieves a value from a Map with flags.
   591  //
   592  // Passing LookupLock flag will look up the value of a spin-locked
   593  // map without returning the lock. This must be specified if the
   594  // elements contain a spinlock.
   595  //
   596  // Calls Close() on valueOut if it is of type **Map or **Program,
   597  // and *valueOut is not nil.
   598  //
   599  // Returns an error if the key doesn't exist, see ErrKeyNotExist.
   600  func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
   601  	if m.typ.hasPerCPUValue() {
   602  		return m.lookupPerCPU(key, valueOut, flags)
   603  	}
   604  
   605  	valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize)
   606  	if err := m.lookup(key, valueBytes.Pointer(), flags); err != nil {
   607  		return err
   608  	}
   609  
   610  	return m.unmarshalValue(valueOut, valueBytes)
   611  }
   612  
   613  // LookupAndDelete retrieves and deletes a value from a Map.
   614  //
   615  // Returns ErrKeyNotExist if the key doesn't exist.
   616  func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
   617  	return m.LookupAndDeleteWithFlags(key, valueOut, 0)
   618  }
   619  
   620  // LookupAndDeleteWithFlags retrieves and deletes a value from a Map.
   621  //
   622  // Passing LookupLock flag will look up and delete the value of a spin-locked
   623  // map without returning the lock. This must be specified if the elements
   624  // contain a spinlock.
   625  //
   626  // Returns ErrKeyNotExist if the key doesn't exist.
   627  func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
   628  	if m.typ.hasPerCPUValue() {
   629  		return m.lookupAndDeletePerCPU(key, valueOut, flags)
   630  	}
   631  
   632  	valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize)
   633  	if err := m.lookupAndDelete(key, valueBytes.Pointer(), flags); err != nil {
   634  		return err
   635  	}
   636  	return m.unmarshalValue(valueOut, valueBytes)
   637  }
   638  
   639  // LookupBytes gets a value from Map.
   640  //
   641  // Returns a nil value if a key doesn't exist.
   642  func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
   643  	valueBytes := make([]byte, m.fullValueSize)
   644  	valuePtr := sys.NewSlicePointer(valueBytes)
   645  
   646  	err := m.lookup(key, valuePtr, 0)
   647  	if errors.Is(err, ErrKeyNotExist) {
   648  		return nil, nil
   649  	}
   650  
   651  	return valueBytes, err
   652  }
   653  
   654  func (m *Map) lookupPerCPU(key, valueOut any, flags MapLookupFlags) error {
   655  	slice, err := ensurePerCPUSlice(valueOut, int(m.valueSize))
   656  	if err != nil {
   657  		return err
   658  	}
   659  	valueBytes := make([]byte, m.fullValueSize)
   660  	if err := m.lookup(key, sys.NewSlicePointer(valueBytes), flags); err != nil {
   661  		return err
   662  	}
   663  	return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes)
   664  }
   665  
   666  func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error {
   667  	keyPtr, err := m.marshalKey(key)
   668  	if err != nil {
   669  		return fmt.Errorf("can't marshal key: %w", err)
   670  	}
   671  
   672  	attr := sys.MapLookupElemAttr{
   673  		MapFd: m.fd.Uint(),
   674  		Key:   keyPtr,
   675  		Value: valueOut,
   676  		Flags: uint64(flags),
   677  	}
   678  
   679  	if err = sys.MapLookupElem(&attr); err != nil {
   680  		return fmt.Errorf("lookup: %w", wrapMapError(err))
   681  	}
   682  	return nil
   683  }
   684  
   685  func (m *Map) lookupAndDeletePerCPU(key, valueOut any, flags MapLookupFlags) error {
   686  	slice, err := ensurePerCPUSlice(valueOut, int(m.valueSize))
   687  	if err != nil {
   688  		return err
   689  	}
   690  	valueBytes := make([]byte, m.fullValueSize)
   691  	if err := m.lookupAndDelete(key, sys.NewSlicePointer(valueBytes), flags); err != nil {
   692  		return err
   693  	}
   694  	return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes)
   695  }
   696  
   697  // ensurePerCPUSlice allocates a slice for a per-CPU value if necessary.
   698  func ensurePerCPUSlice(sliceOrPtr any, elemLength int) (any, error) {
   699  	sliceOrPtrType := reflect.TypeOf(sliceOrPtr)
   700  	if sliceOrPtrType.Kind() == reflect.Slice {
   701  		// The target is a slice, the caller is responsible for ensuring that
   702  		// size is correct.
   703  		return sliceOrPtr, nil
   704  	}
   705  
   706  	slicePtrType := sliceOrPtrType
   707  	if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
   708  		return nil, fmt.Errorf("per-cpu value requires a slice or a pointer to slice")
   709  	}
   710  
   711  	possibleCPUs, err := PossibleCPU()
   712  	if err != nil {
   713  		return nil, err
   714  	}
   715  
   716  	sliceType := slicePtrType.Elem()
   717  	slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs)
   718  
   719  	sliceElemType := sliceType.Elem()
   720  	sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr
   721  	reflect.ValueOf(sliceOrPtr).Elem().Set(slice)
   722  	if !sliceElemIsPointer {
   723  		return slice.Interface(), nil
   724  	}
   725  	sliceElemType = sliceElemType.Elem()
   726  
   727  	for i := 0; i < possibleCPUs; i++ {
   728  		newElem := reflect.New(sliceElemType)
   729  		slice.Index(i).Set(newElem)
   730  	}
   731  
   732  	return slice.Interface(), nil
   733  }
   734  
   735  func (m *Map) lookupAndDelete(key any, valuePtr sys.Pointer, flags MapLookupFlags) error {
   736  	keyPtr, err := m.marshalKey(key)
   737  	if err != nil {
   738  		return fmt.Errorf("can't marshal key: %w", err)
   739  	}
   740  
   741  	attr := sys.MapLookupAndDeleteElemAttr{
   742  		MapFd: m.fd.Uint(),
   743  		Key:   keyPtr,
   744  		Value: valuePtr,
   745  		Flags: uint64(flags),
   746  	}
   747  
   748  	if err := sys.MapLookupAndDeleteElem(&attr); err != nil {
   749  		return fmt.Errorf("lookup and delete: %w", wrapMapError(err))
   750  	}
   751  
   752  	return nil
   753  }
   754  
   755  // MapUpdateFlags controls the behaviour of the Map.Update call.
   756  //
   757  // The exact semantics depend on the specific MapType.
   758  type MapUpdateFlags uint64
   759  
   760  const (
   761  	// UpdateAny creates a new element or update an existing one.
   762  	UpdateAny MapUpdateFlags = iota
   763  	// UpdateNoExist creates a new element.
   764  	UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
   765  	// UpdateExist updates an existing element.
   766  	UpdateExist
   767  	// UpdateLock updates elements under bpf_spin_lock.
   768  	UpdateLock
   769  )
   770  
   771  // Put replaces or creates a value in map.
   772  //
   773  // It is equivalent to calling Update with UpdateAny.
   774  func (m *Map) Put(key, value interface{}) error {
   775  	return m.Update(key, value, UpdateAny)
   776  }
   777  
   778  // Update changes the value of a key.
   779  func (m *Map) Update(key, value any, flags MapUpdateFlags) error {
   780  	if m.typ.hasPerCPUValue() {
   781  		return m.updatePerCPU(key, value, flags)
   782  	}
   783  
   784  	valuePtr, err := m.marshalValue(value)
   785  	if err != nil {
   786  		return fmt.Errorf("marshal value: %w", err)
   787  	}
   788  
   789  	return m.update(key, valuePtr, flags)
   790  }
   791  
   792  func (m *Map) updatePerCPU(key, value any, flags MapUpdateFlags) error {
   793  	valuePtr, err := marshalPerCPUValue(value, int(m.valueSize))
   794  	if err != nil {
   795  		return fmt.Errorf("marshal value: %w", err)
   796  	}
   797  
   798  	return m.update(key, valuePtr, flags)
   799  }
   800  
   801  func (m *Map) update(key any, valuePtr sys.Pointer, flags MapUpdateFlags) error {
   802  	keyPtr, err := m.marshalKey(key)
   803  	if err != nil {
   804  		return fmt.Errorf("marshal key: %w", err)
   805  	}
   806  
   807  	attr := sys.MapUpdateElemAttr{
   808  		MapFd: m.fd.Uint(),
   809  		Key:   keyPtr,
   810  		Value: valuePtr,
   811  		Flags: uint64(flags),
   812  	}
   813  
   814  	if err = sys.MapUpdateElem(&attr); err != nil {
   815  		return fmt.Errorf("update: %w", wrapMapError(err))
   816  	}
   817  
   818  	return nil
   819  }
   820  
   821  // Delete removes a value.
   822  //
   823  // Returns ErrKeyNotExist if the key does not exist.
   824  func (m *Map) Delete(key interface{}) error {
   825  	keyPtr, err := m.marshalKey(key)
   826  	if err != nil {
   827  		return fmt.Errorf("can't marshal key: %w", err)
   828  	}
   829  
   830  	attr := sys.MapDeleteElemAttr{
   831  		MapFd: m.fd.Uint(),
   832  		Key:   keyPtr,
   833  	}
   834  
   835  	if err = sys.MapDeleteElem(&attr); err != nil {
   836  		return fmt.Errorf("delete: %w", wrapMapError(err))
   837  	}
   838  	return nil
   839  }
   840  
   841  // NextKey finds the key following an initial key.
   842  //
   843  // See NextKeyBytes for details.
   844  //
   845  // Returns ErrKeyNotExist if there is no next key.
   846  func (m *Map) NextKey(key, nextKeyOut interface{}) error {
   847  	nextKeyBytes := makeMapSyscallOutput(nextKeyOut, int(m.keySize))
   848  
   849  	if err := m.nextKey(key, nextKeyBytes.Pointer()); err != nil {
   850  		return err
   851  	}
   852  
   853  	if err := nextKeyBytes.Unmarshal(nextKeyOut); err != nil {
   854  		return fmt.Errorf("can't unmarshal next key: %w", err)
   855  	}
   856  	return nil
   857  }
   858  
   859  // NextKeyBytes returns the key following an initial key as a byte slice.
   860  //
   861  // Passing nil will return the first key.
   862  //
   863  // Use Iterate if you want to traverse all entries in the map.
   864  //
   865  // Returns nil if there are no more keys.
   866  func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
   867  	nextKey := make([]byte, m.keySize)
   868  	nextKeyPtr := sys.NewSlicePointer(nextKey)
   869  
   870  	err := m.nextKey(key, nextKeyPtr)
   871  	if errors.Is(err, ErrKeyNotExist) {
   872  		return nil, nil
   873  	}
   874  
   875  	return nextKey, err
   876  }
   877  
   878  func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error {
   879  	var (
   880  		keyPtr sys.Pointer
   881  		err    error
   882  	)
   883  
   884  	if key != nil {
   885  		keyPtr, err = m.marshalKey(key)
   886  		if err != nil {
   887  			return fmt.Errorf("can't marshal key: %w", err)
   888  		}
   889  	}
   890  
   891  	attr := sys.MapGetNextKeyAttr{
   892  		MapFd:   m.fd.Uint(),
   893  		Key:     keyPtr,
   894  		NextKey: nextKeyOut,
   895  	}
   896  
   897  	if err = sys.MapGetNextKey(&attr); err != nil {
   898  		// Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the
   899  		// first map element when a nil key pointer is specified.
   900  		if key == nil && errors.Is(err, unix.EFAULT) {
   901  			var guessKey []byte
   902  			guessKey, err = m.guessNonExistentKey()
   903  			if err != nil {
   904  				return err
   905  			}
   906  
   907  			// Retry the syscall with a valid non-existing key.
   908  			attr.Key = sys.NewSlicePointer(guessKey)
   909  			if err = sys.MapGetNextKey(&attr); err == nil {
   910  				return nil
   911  			}
   912  		}
   913  
   914  		return fmt.Errorf("next key: %w", wrapMapError(err))
   915  	}
   916  
   917  	return nil
   918  }
   919  
   920  var mmapProtectedPage = sync.OnceValues(func() ([]byte, error) {
   921  	return unix.Mmap(-1, 0, os.Getpagesize(), unix.PROT_NONE, unix.MAP_ANON|unix.MAP_SHARED)
   922  })
   923  
   924  // guessNonExistentKey attempts to perform a map lookup that returns ENOENT.
   925  // This is necessary on kernels before 4.4.132, since those don't support
   926  // iterating maps from the start by providing an invalid key pointer.
   927  func (m *Map) guessNonExistentKey() ([]byte, error) {
   928  	// Map a protected page and use that as the value pointer. This saves some
   929  	// work copying out the value, which we're not interested in.
   930  	page, err := mmapProtectedPage()
   931  	if err != nil {
   932  		return nil, err
   933  	}
   934  	valuePtr := sys.NewSlicePointer(page)
   935  
   936  	randKey := make([]byte, int(m.keySize))
   937  
   938  	for i := 0; i < 4; i++ {
   939  		switch i {
   940  		// For hash maps, the 0 key is less likely to be occupied. They're often
   941  		// used for storing data related to pointers, and their access pattern is
   942  		// generally scattered across the keyspace.
   943  		case 0:
   944  		// An all-0xff key is guaranteed to be out of bounds of any array, since
   945  		// those have a fixed key size of 4 bytes. The only corner case being
   946  		// arrays with 2^32 max entries, but those are prohibitively expensive
   947  		// in many environments.
   948  		case 1:
   949  			for r := range randKey {
   950  				randKey[r] = 0xff
   951  			}
   952  		// Inspired by BCC, 0x55 is an alternating binary pattern (0101), so
   953  		// is unlikely to be taken.
   954  		case 2:
   955  			for r := range randKey {
   956  				randKey[r] = 0x55
   957  			}
   958  		// Last ditch effort, generate a random key.
   959  		case 3:
   960  			rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey)
   961  		}
   962  
   963  		err := m.lookup(randKey, valuePtr, 0)
   964  		if errors.Is(err, ErrKeyNotExist) {
   965  			return randKey, nil
   966  		}
   967  	}
   968  
   969  	return nil, errors.New("couldn't find non-existing key")
   970  }
   971  
   972  // BatchLookup looks up many elements in a map at once.
   973  //
   974  // "keysOut" and "valuesOut" must be of type slice, a pointer
   975  // to a slice or buffer will not work.
   976  // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass
   977  // "cursor" to subsequent calls of this function to continue the batching
   978  // operation in the case of chunking.
   979  //
   980  // Warning: This API is not very safe to use as the kernel implementation for
   981  // batching relies on the user to be aware of subtle details with regarding to
   982  // different map type implementations.
   983  //
   984  // ErrKeyNotExist is returned when the batch lookup has reached
   985  // the end of all possible results, even when partial results
   986  // are returned. It should be used to evaluate when lookup is "done".
   987  func (m *Map) BatchLookup(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
   988  	return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, cursor, keysOut, valuesOut, opts)
   989  }
   990  
   991  // BatchLookupAndDelete looks up many elements in a map at once,
   992  //
   993  // It then deletes all those elements.
   994  // "keysOut" and "valuesOut" must be of type slice, a pointer
   995  // to a slice or buffer will not work.
   996  // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass
   997  // "cursor" to subsequent calls of this function to continue the batching
   998  // operation in the case of chunking.
   999  //
  1000  // Warning: This API is not very safe to use as the kernel implementation for
  1001  // batching relies on the user to be aware of subtle details with regarding to
  1002  // different map type implementations.
  1003  //
  1004  // ErrKeyNotExist is returned when the batch lookup has reached
  1005  // the end of all possible results, even when partial results
  1006  // are returned. It should be used to evaluate when lookup is "done".
  1007  func (m *Map) BatchLookupAndDelete(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
  1008  	return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, cursor, keysOut, valuesOut, opts)
  1009  }
  1010  
  1011  // MapBatchCursor represents a starting point for a batch operation.
  1012  type MapBatchCursor struct {
  1013  	m      *Map
  1014  	opaque []byte
  1015  }
  1016  
  1017  func (m *Map) batchLookup(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
  1018  	if m.typ.hasPerCPUValue() {
  1019  		return m.batchLookupPerCPU(cmd, cursor, keysOut, valuesOut, opts)
  1020  	}
  1021  
  1022  	count, err := batchCount(keysOut, valuesOut)
  1023  	if err != nil {
  1024  		return 0, err
  1025  	}
  1026  
  1027  	valueBuf := sysenc.SyscallOutput(valuesOut, count*int(m.fullValueSize))
  1028  
  1029  	n, err := m.batchLookupCmd(cmd, cursor, count, keysOut, valueBuf.Pointer(), opts)
  1030  	if err != nil {
  1031  		return n, err
  1032  	}
  1033  
  1034  	err = valueBuf.Unmarshal(valuesOut)
  1035  	if err != nil {
  1036  		return 0, err
  1037  	}
  1038  
  1039  	return n, nil
  1040  }
  1041  
  1042  func (m *Map) batchLookupPerCPU(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
  1043  	count, err := sliceLen(keysOut)
  1044  	if err != nil {
  1045  		return 0, fmt.Errorf("keys: %w", err)
  1046  	}
  1047  
  1048  	valueBuf := make([]byte, count*int(m.fullValueSize))
  1049  	valuePtr := sys.NewSlicePointer(valueBuf)
  1050  
  1051  	n, sysErr := m.batchLookupCmd(cmd, cursor, count, keysOut, valuePtr, opts)
  1052  	if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) {
  1053  		return 0, err
  1054  	}
  1055  
  1056  	err = unmarshalBatchPerCPUValue(valuesOut, count, int(m.valueSize), valueBuf)
  1057  	if err != nil {
  1058  		return 0, err
  1059  	}
  1060  
  1061  	return n, sysErr
  1062  }
  1063  
  1064  func (m *Map) batchLookupCmd(cmd sys.Cmd, cursor *MapBatchCursor, count int, keysOut any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) {
  1065  	cursorLen := int(m.keySize)
  1066  	if cursorLen < 4 {
  1067  		// * generic_map_lookup_batch requires that batch_out is key_size bytes.
  1068  		//   This is used by array and LPM maps.
  1069  		//
  1070  		// * __htab_map_lookup_and_delete_batch requires u32. This is used by the
  1071  		//   various hash maps.
  1072  		//
  1073  		// Use a minimum of 4 bytes to avoid having to distinguish between the two.
  1074  		cursorLen = 4
  1075  	}
  1076  
  1077  	inBatch := cursor.opaque
  1078  	if inBatch == nil {
  1079  		// This is the first lookup, allocate a buffer to hold the cursor.
  1080  		cursor.opaque = make([]byte, cursorLen)
  1081  		cursor.m = m
  1082  	} else if cursor.m != m {
  1083  		// Prevent reuse of a cursor across maps. First, it's unlikely to work.
  1084  		// Second, the maps may require different cursorLen and cursor.opaque
  1085  		// may therefore be too short. This could lead to the kernel clobbering
  1086  		// user space memory.
  1087  		return 0, errors.New("a cursor may not be reused across maps")
  1088  	}
  1089  
  1090  	if err := haveBatchAPI(); err != nil {
  1091  		return 0, err
  1092  	}
  1093  
  1094  	keyBuf := sysenc.SyscallOutput(keysOut, count*int(m.keySize))
  1095  
  1096  	attr := sys.MapLookupBatchAttr{
  1097  		MapFd:    m.fd.Uint(),
  1098  		Keys:     keyBuf.Pointer(),
  1099  		Values:   valuePtr,
  1100  		Count:    uint32(count),
  1101  		InBatch:  sys.NewSlicePointer(inBatch),
  1102  		OutBatch: sys.NewSlicePointer(cursor.opaque),
  1103  	}
  1104  
  1105  	if opts != nil {
  1106  		attr.ElemFlags = opts.ElemFlags
  1107  		attr.Flags = opts.Flags
  1108  	}
  1109  
  1110  	_, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
  1111  	sysErr = wrapMapError(sysErr)
  1112  	if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) {
  1113  		return 0, sysErr
  1114  	}
  1115  
  1116  	if err := keyBuf.Unmarshal(keysOut); err != nil {
  1117  		return 0, err
  1118  	}
  1119  
  1120  	return int(attr.Count), sysErr
  1121  }
  1122  
  1123  // BatchUpdate updates the map with multiple keys and values
  1124  // simultaneously.
  1125  // "keys" and "values" must be of type slice, a pointer
  1126  // to a slice or buffer will not work.
  1127  func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) {
  1128  	if m.typ.hasPerCPUValue() {
  1129  		return m.batchUpdatePerCPU(keys, values, opts)
  1130  	}
  1131  
  1132  	count, err := batchCount(keys, values)
  1133  	if err != nil {
  1134  		return 0, err
  1135  	}
  1136  
  1137  	valuePtr, err := marshalMapSyscallInput(values, count*int(m.valueSize))
  1138  	if err != nil {
  1139  		return 0, err
  1140  	}
  1141  
  1142  	return m.batchUpdate(count, keys, valuePtr, opts)
  1143  }
  1144  
  1145  func (m *Map) batchUpdate(count int, keys any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) {
  1146  	keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize))
  1147  	if err != nil {
  1148  		return 0, err
  1149  	}
  1150  
  1151  	attr := sys.MapUpdateBatchAttr{
  1152  		MapFd:  m.fd.Uint(),
  1153  		Keys:   keyPtr,
  1154  		Values: valuePtr,
  1155  		Count:  uint32(count),
  1156  	}
  1157  	if opts != nil {
  1158  		attr.ElemFlags = opts.ElemFlags
  1159  		attr.Flags = opts.Flags
  1160  	}
  1161  
  1162  	err = sys.MapUpdateBatch(&attr)
  1163  	if err != nil {
  1164  		if haveFeatErr := haveBatchAPI(); haveFeatErr != nil {
  1165  			return 0, haveFeatErr
  1166  		}
  1167  		return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err))
  1168  	}
  1169  
  1170  	return int(attr.Count), nil
  1171  }
  1172  
  1173  func (m *Map) batchUpdatePerCPU(keys, values any, opts *BatchOptions) (int, error) {
  1174  	count, err := sliceLen(keys)
  1175  	if err != nil {
  1176  		return 0, fmt.Errorf("keys: %w", err)
  1177  	}
  1178  
  1179  	valueBuf, err := marshalBatchPerCPUValue(values, count, int(m.valueSize))
  1180  	if err != nil {
  1181  		return 0, err
  1182  	}
  1183  
  1184  	return m.batchUpdate(count, keys, sys.NewSlicePointer(valueBuf), opts)
  1185  }
  1186  
  1187  // BatchDelete batch deletes entries in the map by keys.
  1188  // "keys" must be of type slice, a pointer to a slice or buffer will not work.
  1189  func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) {
  1190  	count, err := sliceLen(keys)
  1191  	if err != nil {
  1192  		return 0, fmt.Errorf("keys: %w", err)
  1193  	}
  1194  
  1195  	keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize))
  1196  	if err != nil {
  1197  		return 0, fmt.Errorf("cannot marshal keys: %v", err)
  1198  	}
  1199  
  1200  	attr := sys.MapDeleteBatchAttr{
  1201  		MapFd: m.fd.Uint(),
  1202  		Keys:  keyPtr,
  1203  		Count: uint32(count),
  1204  	}
  1205  
  1206  	if opts != nil {
  1207  		attr.ElemFlags = opts.ElemFlags
  1208  		attr.Flags = opts.Flags
  1209  	}
  1210  
  1211  	if err = sys.MapDeleteBatch(&attr); err != nil {
  1212  		if haveFeatErr := haveBatchAPI(); haveFeatErr != nil {
  1213  			return 0, haveFeatErr
  1214  		}
  1215  		return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err))
  1216  	}
  1217  
  1218  	return int(attr.Count), nil
  1219  }
  1220  
  1221  func batchCount(keys, values any) (int, error) {
  1222  	keysLen, err := sliceLen(keys)
  1223  	if err != nil {
  1224  		return 0, fmt.Errorf("keys: %w", err)
  1225  	}
  1226  
  1227  	valuesLen, err := sliceLen(values)
  1228  	if err != nil {
  1229  		return 0, fmt.Errorf("values: %w", err)
  1230  	}
  1231  
  1232  	if keysLen != valuesLen {
  1233  		return 0, fmt.Errorf("keys and values must have the same length")
  1234  	}
  1235  
  1236  	return keysLen, nil
  1237  }
  1238  
  1239  // Iterate traverses a map.
  1240  //
  1241  // It's safe to create multiple iterators at the same time.
  1242  //
  1243  // It's not possible to guarantee that all keys in a map will be
  1244  // returned if there are concurrent modifications to the map.
  1245  func (m *Map) Iterate() *MapIterator {
  1246  	return newMapIterator(m)
  1247  }
  1248  
  1249  // Close the Map's underlying file descriptor, which could unload the
  1250  // Map from the kernel if it is not pinned or in use by a loaded Program.
  1251  func (m *Map) Close() error {
  1252  	if m == nil {
  1253  		// This makes it easier to clean up when iterating maps
  1254  		// of maps / programs.
  1255  		return nil
  1256  	}
  1257  
  1258  	return m.fd.Close()
  1259  }
  1260  
  1261  // FD gets the file descriptor of the Map.
  1262  //
  1263  // Calling this function is invalid after Close has been called.
  1264  func (m *Map) FD() int {
  1265  	return m.fd.Int()
  1266  }
  1267  
  1268  // Clone creates a duplicate of the Map.
  1269  //
  1270  // Closing the duplicate does not affect the original, and vice versa.
  1271  // Changes made to the map are reflected by both instances however.
  1272  // If the original map was pinned, the cloned map will not be pinned by default.
  1273  //
  1274  // Cloning a nil Map returns nil.
  1275  func (m *Map) Clone() (*Map, error) {
  1276  	if m == nil {
  1277  		return nil, nil
  1278  	}
  1279  
  1280  	dup, err := m.fd.Dup()
  1281  	if err != nil {
  1282  		return nil, fmt.Errorf("can't clone map: %w", err)
  1283  	}
  1284  
  1285  	return &Map{
  1286  		m.name,
  1287  		dup,
  1288  		m.typ,
  1289  		m.keySize,
  1290  		m.valueSize,
  1291  		m.maxEntries,
  1292  		m.flags,
  1293  		"",
  1294  		m.fullValueSize,
  1295  	}, nil
  1296  }
  1297  
  1298  // Pin persists the map on the BPF virtual file system past the lifetime of
  1299  // the process that created it .
  1300  //
  1301  // Calling Pin on a previously pinned map will overwrite the path, except when
  1302  // the new path already exists. Re-pinning across filesystems is not supported.
  1303  // You can Clone a map to pin it to a different path.
  1304  //
  1305  // This requires bpffs to be mounted above fileName.
  1306  // See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd
  1307  func (m *Map) Pin(fileName string) error {
  1308  	if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil {
  1309  		return err
  1310  	}
  1311  	m.pinnedPath = fileName
  1312  	return nil
  1313  }
  1314  
  1315  // Unpin removes the persisted state for the map from the BPF virtual filesystem.
  1316  //
  1317  // Failed calls to Unpin will not alter the state returned by IsPinned.
  1318  //
  1319  // Unpinning an unpinned Map returns nil.
  1320  func (m *Map) Unpin() error {
  1321  	if err := internal.Unpin(m.pinnedPath); err != nil {
  1322  		return err
  1323  	}
  1324  	m.pinnedPath = ""
  1325  	return nil
  1326  }
  1327  
  1328  // IsPinned returns true if the map has a non-empty pinned path.
  1329  func (m *Map) IsPinned() bool {
  1330  	return m.pinnedPath != ""
  1331  }
  1332  
  1333  // Freeze prevents a map to be modified from user space.
  1334  //
  1335  // It makes no changes to kernel-side restrictions.
  1336  func (m *Map) Freeze() error {
  1337  	attr := sys.MapFreezeAttr{
  1338  		MapFd: m.fd.Uint(),
  1339  	}
  1340  
  1341  	if err := sys.MapFreeze(&attr); err != nil {
  1342  		if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil {
  1343  			return fmt.Errorf("can't freeze map: %w", haveFeatErr)
  1344  		}
  1345  		return fmt.Errorf("can't freeze map: %w", err)
  1346  	}
  1347  	return nil
  1348  }
  1349  
  1350  // finalize populates the Map according to the Contents specified
  1351  // in spec and freezes the Map if requested by spec.
  1352  func (m *Map) finalize(spec *MapSpec) error {
  1353  	for _, kv := range spec.Contents {
  1354  		if err := m.Put(kv.Key, kv.Value); err != nil {
  1355  			return fmt.Errorf("putting value: key %v: %w", kv.Key, err)
  1356  		}
  1357  	}
  1358  
  1359  	if spec.Freeze {
  1360  		if err := m.Freeze(); err != nil {
  1361  			return fmt.Errorf("freezing map: %w", err)
  1362  		}
  1363  	}
  1364  
  1365  	return nil
  1366  }
  1367  
  1368  func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) {
  1369  	if data == nil {
  1370  		if m.keySize == 0 {
  1371  			// Queues have a key length of zero, so passing nil here is valid.
  1372  			return sys.NewPointer(nil), nil
  1373  		}
  1374  		return sys.Pointer{}, errors.New("can't use nil as key of map")
  1375  	}
  1376  
  1377  	return marshalMapSyscallInput(data, int(m.keySize))
  1378  }
  1379  
  1380  func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
  1381  	var (
  1382  		buf []byte
  1383  		err error
  1384  	)
  1385  
  1386  	switch value := data.(type) {
  1387  	case *Map:
  1388  		if !m.typ.canStoreMap() {
  1389  			return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ)
  1390  		}
  1391  		buf, err = marshalMap(value, int(m.valueSize))
  1392  
  1393  	case *Program:
  1394  		if !m.typ.canStoreProgram() {
  1395  			return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ)
  1396  		}
  1397  		buf, err = marshalProgram(value, int(m.valueSize))
  1398  
  1399  	default:
  1400  		return marshalMapSyscallInput(data, int(m.valueSize))
  1401  	}
  1402  
  1403  	if err != nil {
  1404  		return sys.Pointer{}, err
  1405  	}
  1406  
  1407  	return sys.NewSlicePointer(buf), nil
  1408  }
  1409  
  1410  func (m *Map) unmarshalValue(value any, buf sysenc.Buffer) error {
  1411  	switch value := value.(type) {
  1412  	case **Map:
  1413  		if !m.typ.canStoreMap() {
  1414  			return fmt.Errorf("can't read a map from %s", m.typ)
  1415  		}
  1416  
  1417  		other, err := unmarshalMap(buf)
  1418  		if err != nil {
  1419  			return err
  1420  		}
  1421  
  1422  		// The caller might close the map externally, so ignore errors.
  1423  		_ = (*value).Close()
  1424  
  1425  		*value = other
  1426  		return nil
  1427  
  1428  	case *Map:
  1429  		if !m.typ.canStoreMap() {
  1430  			return fmt.Errorf("can't read a map from %s", m.typ)
  1431  		}
  1432  		return errors.New("require pointer to *Map")
  1433  
  1434  	case **Program:
  1435  		if !m.typ.canStoreProgram() {
  1436  			return fmt.Errorf("can't read a program from %s", m.typ)
  1437  		}
  1438  
  1439  		other, err := unmarshalProgram(buf)
  1440  		if err != nil {
  1441  			return err
  1442  		}
  1443  
  1444  		// The caller might close the program externally, so ignore errors.
  1445  		_ = (*value).Close()
  1446  
  1447  		*value = other
  1448  		return nil
  1449  
  1450  	case *Program:
  1451  		if !m.typ.canStoreProgram() {
  1452  			return fmt.Errorf("can't read a program from %s", m.typ)
  1453  		}
  1454  		return errors.New("require pointer to *Program")
  1455  	}
  1456  
  1457  	return buf.Unmarshal(value)
  1458  }
  1459  
  1460  // LoadPinnedMap loads a Map from a BPF file.
  1461  func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) {
  1462  	fd, err := sys.ObjGet(&sys.ObjGetAttr{
  1463  		Pathname:  sys.NewStringPointer(fileName),
  1464  		FileFlags: opts.Marshal(),
  1465  	})
  1466  	if err != nil {
  1467  		return nil, err
  1468  	}
  1469  
  1470  	m, err := newMapFromFD(fd)
  1471  	if err == nil {
  1472  		m.pinnedPath = fileName
  1473  	}
  1474  
  1475  	return m, err
  1476  }
  1477  
  1478  // unmarshalMap creates a map from a map ID encoded in host endianness.
  1479  func unmarshalMap(buf sysenc.Buffer) (*Map, error) {
  1480  	var id uint32
  1481  	if err := buf.Unmarshal(&id); err != nil {
  1482  		return nil, err
  1483  	}
  1484  	return NewMapFromID(MapID(id))
  1485  }
  1486  
  1487  // marshalMap marshals the fd of a map into a buffer in host endianness.
  1488  func marshalMap(m *Map, length int) ([]byte, error) {
  1489  	if length != 4 {
  1490  		return nil, fmt.Errorf("can't marshal map to %d bytes", length)
  1491  	}
  1492  
  1493  	buf := make([]byte, 4)
  1494  	internal.NativeEndian.PutUint32(buf, m.fd.Uint())
  1495  	return buf, nil
  1496  }
  1497  
  1498  // MapIterator iterates a Map.
  1499  //
  1500  // See Map.Iterate.
  1501  type MapIterator struct {
  1502  	target *Map
  1503  	// Temporary storage to avoid allocations in Next(). This is any instead
  1504  	// of []byte to avoid allocations.
  1505  	cursor            any
  1506  	count, maxEntries uint32
  1507  	done              bool
  1508  	err               error
  1509  }
  1510  
  1511  func newMapIterator(target *Map) *MapIterator {
  1512  	return &MapIterator{
  1513  		target:     target,
  1514  		maxEntries: target.maxEntries,
  1515  	}
  1516  }
  1517  
  1518  // Next decodes the next key and value.
  1519  //
  1520  // Iterating a hash map from which keys are being deleted is not
  1521  // safe. You may see the same key multiple times. Iteration may
  1522  // also abort with an error, see IsIterationAborted.
  1523  //
  1524  // Returns false if there are no more entries. You must check
  1525  // the result of Err afterwards.
  1526  //
  1527  // See Map.Get for further caveats around valueOut.
  1528  func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
  1529  	if mi.err != nil || mi.done {
  1530  		return false
  1531  	}
  1532  
  1533  	// For array-like maps NextKey returns nil only after maxEntries
  1534  	// iterations.
  1535  	for mi.count <= mi.maxEntries {
  1536  		if mi.cursor == nil {
  1537  			// Pass nil interface to NextKey to make sure the Map's first key
  1538  			// is returned. If we pass an uninitialized []byte instead, it'll see a
  1539  			// non-nil interface and try to marshal it.
  1540  			mi.cursor = make([]byte, mi.target.keySize)
  1541  			mi.err = mi.target.NextKey(nil, mi.cursor)
  1542  		} else {
  1543  			mi.err = mi.target.NextKey(mi.cursor, mi.cursor)
  1544  		}
  1545  
  1546  		if errors.Is(mi.err, ErrKeyNotExist) {
  1547  			mi.done = true
  1548  			mi.err = nil
  1549  			return false
  1550  		} else if mi.err != nil {
  1551  			mi.err = fmt.Errorf("get next key: %w", mi.err)
  1552  			return false
  1553  		}
  1554  
  1555  		mi.count++
  1556  		mi.err = mi.target.Lookup(mi.cursor, valueOut)
  1557  		if errors.Is(mi.err, ErrKeyNotExist) {
  1558  			// Even though the key should be valid, we couldn't look up
  1559  			// its value. If we're iterating a hash map this is probably
  1560  			// because a concurrent delete removed the value before we
  1561  			// could get it. This means that the next call to NextKeyBytes
  1562  			// is very likely to restart iteration.
  1563  			// If we're iterating one of the fd maps like
  1564  			// ProgramArray it means that a given slot doesn't have
  1565  			// a valid fd associated. It's OK to continue to the next slot.
  1566  			continue
  1567  		}
  1568  		if mi.err != nil {
  1569  			mi.err = fmt.Errorf("look up next key: %w", mi.err)
  1570  			return false
  1571  		}
  1572  
  1573  		buf := mi.cursor.([]byte)
  1574  		if ptr, ok := keyOut.(unsafe.Pointer); ok {
  1575  			copy(unsafe.Slice((*byte)(ptr), len(buf)), buf)
  1576  		} else {
  1577  			mi.err = sysenc.Unmarshal(keyOut, buf)
  1578  		}
  1579  
  1580  		return mi.err == nil
  1581  	}
  1582  
  1583  	mi.err = fmt.Errorf("%w", ErrIterationAborted)
  1584  	return false
  1585  }
  1586  
  1587  // Err returns any encountered error.
  1588  //
  1589  // The method must be called after Next returns nil.
  1590  //
  1591  // Returns ErrIterationAborted if it wasn't possible to do a full iteration.
  1592  func (mi *MapIterator) Err() error {
  1593  	return mi.err
  1594  }
  1595  
  1596  // MapGetNextID returns the ID of the next eBPF map.
  1597  //
  1598  // Returns ErrNotExist, if there is no next eBPF map.
  1599  func MapGetNextID(startID MapID) (MapID, error) {
  1600  	attr := &sys.MapGetNextIdAttr{Id: uint32(startID)}
  1601  	return MapID(attr.NextId), sys.MapGetNextId(attr)
  1602  }
  1603  
  1604  // NewMapFromID returns the map for a given id.
  1605  //
  1606  // Returns ErrNotExist, if there is no eBPF map with the given id.
  1607  func NewMapFromID(id MapID) (*Map, error) {
  1608  	fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{
  1609  		Id: uint32(id),
  1610  	})
  1611  	if err != nil {
  1612  		return nil, err
  1613  	}
  1614  
  1615  	return newMapFromFD(fd)
  1616  }
  1617  
  1618  // sliceLen returns the length if the value is a slice or an error otherwise.
  1619  func sliceLen(slice any) (int, error) {
  1620  	sliceValue := reflect.ValueOf(slice)
  1621  	if sliceValue.Kind() != reflect.Slice {
  1622  		return 0, fmt.Errorf("%T is not a slice", slice)
  1623  	}
  1624  	return sliceValue.Len(), nil
  1625  }