github.com/cilium/ebpf@v0.16.0/map.go (about)

     1  package ebpf
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"math/rand"
     9  	"os"
    10  	"path/filepath"
    11  	"reflect"
    12  	"slices"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  	"unsafe"
    17  
    18  	"github.com/cilium/ebpf/btf"
    19  	"github.com/cilium/ebpf/internal"
    20  	"github.com/cilium/ebpf/internal/sys"
    21  	"github.com/cilium/ebpf/internal/sysenc"
    22  	"github.com/cilium/ebpf/internal/unix"
    23  )
    24  
    25  // Errors returned by Map and MapIterator methods.
    26  var (
    27  	ErrKeyNotExist      = errors.New("key does not exist")
    28  	ErrKeyExist         = errors.New("key already exists")
    29  	ErrIterationAborted = errors.New("iteration aborted")
    30  	ErrMapIncompatible  = errors.New("map spec is incompatible with existing map")
    31  	errMapNoBTFValue    = errors.New("map spec does not contain a BTF Value")
    32  
    33  	// pre-allocating these errors here since they may get called in hot code paths
    34  	// and cause unnecessary memory allocations
    35  	errMapLookupKeyNotExist = fmt.Errorf("lookup: %w", sysErrKeyNotExist)
    36  )
    37  
    38  // MapOptions control loading a map into the kernel.
    39  type MapOptions struct {
    40  	// The base path to pin maps in if requested via PinByName.
    41  	// Existing maps will be re-used if they are compatible, otherwise an
    42  	// error is returned.
    43  	PinPath        string
    44  	LoadPinOptions LoadPinOptions
    45  }
    46  
    47  // MapID represents the unique ID of an eBPF map
    48  type MapID uint32
    49  
    50  // MapSpec defines a Map.
    51  type MapSpec struct {
    52  	// Name is passed to the kernel as a debug aid. Must only contain
    53  	// alpha numeric and '_' characters.
    54  	Name       string
    55  	Type       MapType
    56  	KeySize    uint32
    57  	ValueSize  uint32
    58  	MaxEntries uint32
    59  
    60  	// Flags is passed to the kernel and specifies additional map
    61  	// creation attributes.
    62  	Flags uint32
    63  
    64  	// Automatically pin and load a map from MapOptions.PinPath.
    65  	// Generates an error if an existing pinned map is incompatible with the MapSpec.
    66  	Pinning PinType
    67  
    68  	// Specify numa node during map creation
    69  	// (effective only if unix.BPF_F_NUMA_NODE flag is set,
    70  	// which can be imported from golang.org/x/sys/unix)
    71  	NumaNode uint32
    72  
    73  	// The initial contents of the map. May be nil.
    74  	Contents []MapKV
    75  
    76  	// Whether to freeze a map after setting its initial contents.
    77  	Freeze bool
    78  
    79  	// InnerMap is used as a template for ArrayOfMaps and HashOfMaps
    80  	InnerMap *MapSpec
    81  
    82  	// Extra trailing bytes found in the ELF map definition when using structs
    83  	// larger than libbpf's bpf_map_def. nil if no trailing bytes were present.
    84  	// Must be nil or empty before instantiating the MapSpec into a Map.
    85  	Extra *bytes.Reader
    86  
    87  	// The key and value type of this map. May be nil.
    88  	Key, Value btf.Type
    89  }
    90  
    91  func (ms *MapSpec) String() string {
    92  	return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags)
    93  }
    94  
    95  // Copy returns a copy of the spec.
    96  //
    97  // MapSpec.Contents is a shallow copy.
    98  func (ms *MapSpec) Copy() *MapSpec {
    99  	if ms == nil {
   100  		return nil
   101  	}
   102  
   103  	cpy := *ms
   104  	cpy.Contents = slices.Clone(cpy.Contents)
   105  	cpy.Key = btf.Copy(cpy.Key)
   106  	cpy.Value = btf.Copy(cpy.Value)
   107  
   108  	if cpy.InnerMap == ms {
   109  		cpy.InnerMap = &cpy
   110  	} else {
   111  		cpy.InnerMap = ms.InnerMap.Copy()
   112  	}
   113  
   114  	if cpy.Extra != nil {
   115  		extra := *cpy.Extra
   116  		cpy.Extra = &extra
   117  	}
   118  
   119  	return &cpy
   120  }
   121  
   122  // fixupMagicFields fills fields of MapSpec which are usually
   123  // left empty in ELF or which depend on runtime information.
   124  //
   125  // The method doesn't modify Spec, instead returning a copy.
   126  // The copy is only performed if fixups are necessary, so callers mustn't mutate
   127  // the returned spec.
   128  func (spec *MapSpec) fixupMagicFields() (*MapSpec, error) {
   129  	switch spec.Type {
   130  	case ArrayOfMaps, HashOfMaps:
   131  		if spec.ValueSize != 0 && spec.ValueSize != 4 {
   132  			return nil, errors.New("ValueSize must be zero or four for map of map")
   133  		}
   134  
   135  		spec = spec.Copy()
   136  		spec.ValueSize = 4
   137  
   138  	case PerfEventArray:
   139  		if spec.KeySize != 0 && spec.KeySize != 4 {
   140  			return nil, errors.New("KeySize must be zero or four for perf event array")
   141  		}
   142  
   143  		if spec.ValueSize != 0 && spec.ValueSize != 4 {
   144  			return nil, errors.New("ValueSize must be zero or four for perf event array")
   145  		}
   146  
   147  		spec = spec.Copy()
   148  		spec.KeySize = 4
   149  		spec.ValueSize = 4
   150  
   151  		n, err := PossibleCPU()
   152  		if err != nil {
   153  			return nil, fmt.Errorf("fixup perf event array: %w", err)
   154  		}
   155  
   156  		if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n {
   157  			// MaxEntries should be zero most of the time, but there is code
   158  			// out there which hardcodes large constants. Clamp the number
   159  			// of entries to the number of CPUs at most. Allow creating maps with
   160  			// less than n items since some kernel selftests relied on this
   161  			// behaviour in the past.
   162  			spec.MaxEntries = n
   163  		}
   164  	}
   165  
   166  	return spec, nil
   167  }
   168  
   169  // dataSection returns the contents and BTF Datasec descriptor of the spec.
   170  func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) {
   171  	if ms.Value == nil {
   172  		return nil, nil, errMapNoBTFValue
   173  	}
   174  
   175  	ds, ok := ms.Value.(*btf.Datasec)
   176  	if !ok {
   177  		return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value)
   178  	}
   179  
   180  	if n := len(ms.Contents); n != 1 {
   181  		return nil, nil, fmt.Errorf("expected one key, found %d", n)
   182  	}
   183  
   184  	kv := ms.Contents[0]
   185  	value, ok := kv.Value.([]byte)
   186  	if !ok {
   187  		return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value)
   188  	}
   189  
   190  	return value, ds, nil
   191  }
   192  
   193  // MapKV is used to initialize the contents of a Map.
   194  type MapKV struct {
   195  	Key   interface{}
   196  	Value interface{}
   197  }
   198  
   199  // Compatible returns nil if an existing map may be used instead of creating
   200  // one from the spec.
   201  //
   202  // Returns an error wrapping [ErrMapIncompatible] otherwise.
   203  func (ms *MapSpec) Compatible(m *Map) error {
   204  	ms, err := ms.fixupMagicFields()
   205  	if err != nil {
   206  		return err
   207  	}
   208  
   209  	diffs := []string{}
   210  	if m.typ != ms.Type {
   211  		diffs = append(diffs, fmt.Sprintf("Type: %s changed to %s", m.typ, ms.Type))
   212  	}
   213  	if m.keySize != ms.KeySize {
   214  		diffs = append(diffs, fmt.Sprintf("KeySize: %d changed to %d", m.keySize, ms.KeySize))
   215  	}
   216  	if m.valueSize != ms.ValueSize {
   217  		diffs = append(diffs, fmt.Sprintf("ValueSize: %d changed to %d", m.valueSize, ms.ValueSize))
   218  	}
   219  	if m.maxEntries != ms.MaxEntries {
   220  		diffs = append(diffs, fmt.Sprintf("MaxEntries: %d changed to %d", m.maxEntries, ms.MaxEntries))
   221  	}
   222  
   223  	// BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow this
   224  	// mismatch.
   225  	if !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == unix.BPF_F_RDONLY_PROG) &&
   226  		m.flags != ms.Flags {
   227  		diffs = append(diffs, fmt.Sprintf("Flags: %d changed to %d", m.flags, ms.Flags))
   228  	}
   229  
   230  	if len(diffs) == 0 {
   231  		return nil
   232  	}
   233  
   234  	return fmt.Errorf("%s: %w", strings.Join(diffs, ", "), ErrMapIncompatible)
   235  }
   236  
   237  // Map represents a Map file descriptor.
   238  //
   239  // It is not safe to close a map which is used by other goroutines.
   240  //
   241  // Methods which take interface{} arguments by default encode
   242  // them using binary.Read/Write in the machine's native endianness.
   243  //
   244  // Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler
   245  // if you require custom encoding.
   246  type Map struct {
   247  	name       string
   248  	fd         *sys.FD
   249  	typ        MapType
   250  	keySize    uint32
   251  	valueSize  uint32
   252  	maxEntries uint32
   253  	flags      uint32
   254  	pinnedPath string
   255  	// Per CPU maps return values larger than the size in the spec
   256  	fullValueSize int
   257  }
   258  
   259  // NewMapFromFD creates a map from a raw fd.
   260  //
   261  // You should not use fd after calling this function.
   262  func NewMapFromFD(fd int) (*Map, error) {
   263  	f, err := sys.NewFD(fd)
   264  	if err != nil {
   265  		return nil, err
   266  	}
   267  
   268  	return newMapFromFD(f)
   269  }
   270  
   271  func newMapFromFD(fd *sys.FD) (*Map, error) {
   272  	info, err := newMapInfoFromFd(fd)
   273  	if err != nil {
   274  		fd.Close()
   275  		return nil, fmt.Errorf("get map info: %w", err)
   276  	}
   277  
   278  	return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags)
   279  }
   280  
   281  // NewMap creates a new Map.
   282  //
   283  // It's equivalent to calling NewMapWithOptions with default options.
   284  func NewMap(spec *MapSpec) (*Map, error) {
   285  	return NewMapWithOptions(spec, MapOptions{})
   286  }
   287  
   288  // NewMapWithOptions creates a new Map.
   289  //
   290  // Creating a map for the first time will perform feature detection
   291  // by creating small, temporary maps.
   292  //
   293  // The caller is responsible for ensuring the process' rlimit is set
   294  // sufficiently high for locking memory during map creation. This can be done
   295  // by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions.
   296  //
   297  // May return an error wrapping ErrMapIncompatible.
   298  func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) {
   299  	m, err := newMapWithOptions(spec, opts)
   300  	if err != nil {
   301  		return nil, fmt.Errorf("creating map: %w", err)
   302  	}
   303  
   304  	if err := m.finalize(spec); err != nil {
   305  		m.Close()
   306  		return nil, fmt.Errorf("populating map: %w", err)
   307  	}
   308  
   309  	return m, nil
   310  }
   311  
   312  func newMapWithOptions(spec *MapSpec, opts MapOptions) (_ *Map, err error) {
   313  	closeOnError := func(c io.Closer) {
   314  		if err != nil {
   315  			c.Close()
   316  		}
   317  	}
   318  
   319  	switch spec.Pinning {
   320  	case PinByName:
   321  		if spec.Name == "" {
   322  			return nil, fmt.Errorf("pin by name: missing Name")
   323  		}
   324  
   325  		if opts.PinPath == "" {
   326  			return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath")
   327  		}
   328  
   329  		path := filepath.Join(opts.PinPath, spec.Name)
   330  		m, err := LoadPinnedMap(path, &opts.LoadPinOptions)
   331  		if errors.Is(err, unix.ENOENT) {
   332  			break
   333  		}
   334  		if err != nil {
   335  			return nil, fmt.Errorf("load pinned map: %w", err)
   336  		}
   337  		defer closeOnError(m)
   338  
   339  		if err := spec.Compatible(m); err != nil {
   340  			return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err)
   341  		}
   342  
   343  		return m, nil
   344  
   345  	case PinNone:
   346  		// Nothing to do here
   347  
   348  	default:
   349  		return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported)
   350  	}
   351  
   352  	var innerFd *sys.FD
   353  	if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps {
   354  		if spec.InnerMap == nil {
   355  			return nil, fmt.Errorf("%s requires InnerMap", spec.Type)
   356  		}
   357  
   358  		if spec.InnerMap.Pinning != PinNone {
   359  			return nil, errors.New("inner maps cannot be pinned")
   360  		}
   361  
   362  		template, err := spec.InnerMap.createMap(nil, opts)
   363  		if err != nil {
   364  			return nil, fmt.Errorf("inner map: %w", err)
   365  		}
   366  		defer template.Close()
   367  
   368  		// Intentionally skip populating and freezing (finalizing)
   369  		// the inner map template since it will be removed shortly.
   370  
   371  		innerFd = template.fd
   372  	}
   373  
   374  	m, err := spec.createMap(innerFd, opts)
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  	defer closeOnError(m)
   379  
   380  	if spec.Pinning == PinByName {
   381  		path := filepath.Join(opts.PinPath, spec.Name)
   382  		if err := m.Pin(path); err != nil {
   383  			return nil, fmt.Errorf("pin map to %s: %w", path, err)
   384  		}
   385  	}
   386  
   387  	return m, nil
   388  }
   389  
   390  // createMap validates the spec's properties and creates the map in the kernel
   391  // using the given opts. It does not populate or freeze the map.
   392  func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err error) {
   393  	closeOnError := func(closer io.Closer) {
   394  		if err != nil {
   395  			closer.Close()
   396  		}
   397  	}
   398  
   399  	// Kernels 4.13 through 5.4 used a struct bpf_map_def that contained
   400  	// additional 'inner_map_idx' and later 'numa_node' fields.
   401  	// In order to support loading these definitions, tolerate the presence of
   402  	// extra bytes, but require them to be zeroes.
   403  	if spec.Extra != nil {
   404  		if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil {
   405  			return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map")
   406  		}
   407  	}
   408  
   409  	spec, err = spec.fixupMagicFields()
   410  	if err != nil {
   411  		return nil, err
   412  	}
   413  
   414  	attr := sys.MapCreateAttr{
   415  		MapType:    sys.MapType(spec.Type),
   416  		KeySize:    spec.KeySize,
   417  		ValueSize:  spec.ValueSize,
   418  		MaxEntries: spec.MaxEntries,
   419  		MapFlags:   sys.MapFlags(spec.Flags),
   420  		NumaNode:   spec.NumaNode,
   421  	}
   422  
   423  	if inner != nil {
   424  		attr.InnerMapFd = inner.Uint()
   425  	}
   426  
   427  	if haveObjName() == nil {
   428  		attr.MapName = sys.NewObjName(spec.Name)
   429  	}
   430  
   431  	if spec.Key != nil || spec.Value != nil {
   432  		handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value)
   433  		if err != nil && !errors.Is(err, btf.ErrNotSupported) {
   434  			return nil, fmt.Errorf("load BTF: %w", err)
   435  		}
   436  
   437  		if handle != nil {
   438  			defer handle.Close()
   439  
   440  			// Use BTF k/v during map creation.
   441  			attr.BtfFd = uint32(handle.FD())
   442  			attr.BtfKeyTypeId = keyTypeID
   443  			attr.BtfValueTypeId = valueTypeID
   444  		}
   445  	}
   446  
   447  	fd, err := sys.MapCreate(&attr)
   448  
   449  	// Some map types don't support BTF k/v in earlier kernel versions.
   450  	// Remove BTF metadata and retry map creation.
   451  	if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 {
   452  		attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0
   453  		fd, err = sys.MapCreate(&attr)
   454  	}
   455  	if err != nil {
   456  		return nil, handleMapCreateError(attr, spec, err)
   457  	}
   458  
   459  	defer closeOnError(fd)
   460  	m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags)
   461  	if err != nil {
   462  		return nil, fmt.Errorf("map create: %w", err)
   463  	}
   464  	return m, nil
   465  }
   466  
   467  func handleMapCreateError(attr sys.MapCreateAttr, spec *MapSpec, err error) error {
   468  	if errors.Is(err, unix.EPERM) {
   469  		return fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err)
   470  	}
   471  	if errors.Is(err, unix.EINVAL) && spec.MaxEntries == 0 {
   472  		return fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err)
   473  	}
   474  	if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap {
   475  		return fmt.Errorf("map create: cannot use type %s", UnspecifiedMap)
   476  	}
   477  	if errors.Is(err, unix.EINVAL) && spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
   478  		return fmt.Errorf("map create: %w (noPrealloc flag may be incompatible with map type %s)", err, spec.Type)
   479  	}
   480  
   481  	switch spec.Type {
   482  	case ArrayOfMaps, HashOfMaps:
   483  		if haveFeatErr := haveNestedMaps(); haveFeatErr != nil {
   484  			return fmt.Errorf("map create: %w", haveFeatErr)
   485  		}
   486  	}
   487  	if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze {
   488  		if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil {
   489  			return fmt.Errorf("map create: %w", haveFeatErr)
   490  		}
   491  	}
   492  	if spec.Flags&unix.BPF_F_MMAPABLE > 0 {
   493  		if haveFeatErr := haveMmapableMaps(); haveFeatErr != nil {
   494  			return fmt.Errorf("map create: %w", haveFeatErr)
   495  		}
   496  	}
   497  	if spec.Flags&unix.BPF_F_INNER_MAP > 0 {
   498  		if haveFeatErr := haveInnerMaps(); haveFeatErr != nil {
   499  			return fmt.Errorf("map create: %w", haveFeatErr)
   500  		}
   501  	}
   502  	if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 {
   503  		if haveFeatErr := haveNoPreallocMaps(); haveFeatErr != nil {
   504  			return fmt.Errorf("map create: %w", haveFeatErr)
   505  		}
   506  	}
   507  	// BPF_MAP_TYPE_RINGBUF's max_entries must be a power-of-2 multiple of kernel's page size.
   508  	if errors.Is(err, unix.EINVAL) &&
   509  		(attr.MapType == sys.BPF_MAP_TYPE_RINGBUF || attr.MapType == sys.BPF_MAP_TYPE_USER_RINGBUF) {
   510  		pageSize := uint32(os.Getpagesize())
   511  		maxEntries := attr.MaxEntries
   512  		if maxEntries%pageSize != 0 || !internal.IsPow(maxEntries) {
   513  			return fmt.Errorf("map create: %w (ring map size %d not a multiple of page size %d)", err, maxEntries, pageSize)
   514  		}
   515  	}
   516  
   517  	return fmt.Errorf("map create: %w", err)
   518  }
   519  
   520  // newMap allocates and returns a new Map structure.
   521  // Sets the fullValueSize on per-CPU maps.
   522  func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) {
   523  	m := &Map{
   524  		name,
   525  		fd,
   526  		typ,
   527  		keySize,
   528  		valueSize,
   529  		maxEntries,
   530  		flags,
   531  		"",
   532  		int(valueSize),
   533  	}
   534  
   535  	if !typ.hasPerCPUValue() {
   536  		return m, nil
   537  	}
   538  
   539  	possibleCPUs, err := PossibleCPU()
   540  	if err != nil {
   541  		return nil, err
   542  	}
   543  
   544  	m.fullValueSize = int(internal.Align(valueSize, 8)) * possibleCPUs
   545  	return m, nil
   546  }
   547  
   548  func (m *Map) String() string {
   549  	if m.name != "" {
   550  		return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd)
   551  	}
   552  	return fmt.Sprintf("%s#%v", m.typ, m.fd)
   553  }
   554  
   555  // Type returns the underlying type of the map.
   556  func (m *Map) Type() MapType {
   557  	return m.typ
   558  }
   559  
   560  // KeySize returns the size of the map key in bytes.
   561  func (m *Map) KeySize() uint32 {
   562  	return m.keySize
   563  }
   564  
   565  // ValueSize returns the size of the map value in bytes.
   566  func (m *Map) ValueSize() uint32 {
   567  	return m.valueSize
   568  }
   569  
   570  // MaxEntries returns the maximum number of elements the map can hold.
   571  func (m *Map) MaxEntries() uint32 {
   572  	return m.maxEntries
   573  }
   574  
   575  // Flags returns the flags of the map.
   576  func (m *Map) Flags() uint32 {
   577  	return m.flags
   578  }
   579  
   580  // Info returns metadata about the map.
   581  func (m *Map) Info() (*MapInfo, error) {
   582  	return newMapInfoFromFd(m.fd)
   583  }
   584  
   585  // Handle returns a reference to the Map's type information in the kernel.
   586  //
   587  // Returns ErrNotSupported if the kernel has no BTF support, or if there is no
   588  // BTF associated with the Map.
   589  func (m *Map) Handle() (*btf.Handle, error) {
   590  	info, err := m.Info()
   591  	if err != nil {
   592  		return nil, err
   593  	}
   594  
   595  	id, ok := info.BTFID()
   596  	if !ok {
   597  		return nil, fmt.Errorf("map %s: retrieve BTF ID: %w", m, ErrNotSupported)
   598  	}
   599  
   600  	return btf.NewHandleFromID(id)
   601  }
   602  
   603  // MapLookupFlags controls the behaviour of the map lookup calls.
   604  type MapLookupFlags uint64
   605  
   606  // LookupLock look up the value of a spin-locked map.
   607  const LookupLock MapLookupFlags = unix.BPF_F_LOCK
   608  
   609  // Lookup retrieves a value from a Map.
   610  //
   611  // Calls Close() on valueOut if it is of type **Map or **Program,
   612  // and *valueOut is not nil.
   613  //
   614  // Returns an error if the key doesn't exist, see ErrKeyNotExist.
   615  func (m *Map) Lookup(key, valueOut interface{}) error {
   616  	return m.LookupWithFlags(key, valueOut, 0)
   617  }
   618  
   619  // LookupWithFlags retrieves a value from a Map with flags.
   620  //
   621  // Passing LookupLock flag will look up the value of a spin-locked
   622  // map without returning the lock. This must be specified if the
   623  // elements contain a spinlock.
   624  //
   625  // Calls Close() on valueOut if it is of type **Map or **Program,
   626  // and *valueOut is not nil.
   627  //
   628  // Returns an error if the key doesn't exist, see ErrKeyNotExist.
   629  func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
   630  	if m.typ.hasPerCPUValue() {
   631  		return m.lookupPerCPU(key, valueOut, flags)
   632  	}
   633  
   634  	valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize)
   635  	if err := m.lookup(key, valueBytes.Pointer(), flags); err != nil {
   636  		return err
   637  	}
   638  
   639  	return m.unmarshalValue(valueOut, valueBytes)
   640  }
   641  
   642  // LookupAndDelete retrieves and deletes a value from a Map.
   643  //
   644  // Returns ErrKeyNotExist if the key doesn't exist.
   645  func (m *Map) LookupAndDelete(key, valueOut interface{}) error {
   646  	return m.LookupAndDeleteWithFlags(key, valueOut, 0)
   647  }
   648  
   649  // LookupAndDeleteWithFlags retrieves and deletes a value from a Map.
   650  //
   651  // Passing LookupLock flag will look up and delete the value of a spin-locked
   652  // map without returning the lock. This must be specified if the elements
   653  // contain a spinlock.
   654  //
   655  // Returns ErrKeyNotExist if the key doesn't exist.
   656  func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error {
   657  	if m.typ.hasPerCPUValue() {
   658  		return m.lookupAndDeletePerCPU(key, valueOut, flags)
   659  	}
   660  
   661  	valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize)
   662  	if err := m.lookupAndDelete(key, valueBytes.Pointer(), flags); err != nil {
   663  		return err
   664  	}
   665  	return m.unmarshalValue(valueOut, valueBytes)
   666  }
   667  
   668  // LookupBytes gets a value from Map.
   669  //
   670  // Returns a nil value if a key doesn't exist.
   671  func (m *Map) LookupBytes(key interface{}) ([]byte, error) {
   672  	valueBytes := make([]byte, m.fullValueSize)
   673  	valuePtr := sys.NewSlicePointer(valueBytes)
   674  
   675  	err := m.lookup(key, valuePtr, 0)
   676  	if errors.Is(err, ErrKeyNotExist) {
   677  		return nil, nil
   678  	}
   679  
   680  	return valueBytes, err
   681  }
   682  
   683  func (m *Map) lookupPerCPU(key, valueOut any, flags MapLookupFlags) error {
   684  	slice, err := ensurePerCPUSlice(valueOut)
   685  	if err != nil {
   686  		return err
   687  	}
   688  	valueBytes := make([]byte, m.fullValueSize)
   689  	if err := m.lookup(key, sys.NewSlicePointer(valueBytes), flags); err != nil {
   690  		return err
   691  	}
   692  	return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes)
   693  }
   694  
   695  func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error {
   696  	keyPtr, err := m.marshalKey(key)
   697  	if err != nil {
   698  		return fmt.Errorf("can't marshal key: %w", err)
   699  	}
   700  
   701  	attr := sys.MapLookupElemAttr{
   702  		MapFd: m.fd.Uint(),
   703  		Key:   keyPtr,
   704  		Value: valueOut,
   705  		Flags: uint64(flags),
   706  	}
   707  
   708  	if err = sys.MapLookupElem(&attr); err != nil {
   709  		if errors.Is(err, unix.ENOENT) {
   710  			return errMapLookupKeyNotExist
   711  		}
   712  		return fmt.Errorf("lookup: %w", wrapMapError(err))
   713  	}
   714  	return nil
   715  }
   716  
   717  func (m *Map) lookupAndDeletePerCPU(key, valueOut any, flags MapLookupFlags) error {
   718  	slice, err := ensurePerCPUSlice(valueOut)
   719  	if err != nil {
   720  		return err
   721  	}
   722  	valueBytes := make([]byte, m.fullValueSize)
   723  	if err := m.lookupAndDelete(key, sys.NewSlicePointer(valueBytes), flags); err != nil {
   724  		return err
   725  	}
   726  	return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes)
   727  }
   728  
   729  // ensurePerCPUSlice allocates a slice for a per-CPU value if necessary.
   730  func ensurePerCPUSlice(sliceOrPtr any) (any, error) {
   731  	sliceOrPtrType := reflect.TypeOf(sliceOrPtr)
   732  	if sliceOrPtrType.Kind() == reflect.Slice {
   733  		// The target is a slice, the caller is responsible for ensuring that
   734  		// size is correct.
   735  		return sliceOrPtr, nil
   736  	}
   737  
   738  	slicePtrType := sliceOrPtrType
   739  	if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice {
   740  		return nil, fmt.Errorf("per-cpu value requires a slice or a pointer to slice")
   741  	}
   742  
   743  	possibleCPUs, err := PossibleCPU()
   744  	if err != nil {
   745  		return nil, err
   746  	}
   747  
   748  	sliceType := slicePtrType.Elem()
   749  	slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs)
   750  
   751  	sliceElemType := sliceType.Elem()
   752  	sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr
   753  	reflect.ValueOf(sliceOrPtr).Elem().Set(slice)
   754  	if !sliceElemIsPointer {
   755  		return slice.Interface(), nil
   756  	}
   757  	sliceElemType = sliceElemType.Elem()
   758  
   759  	for i := 0; i < possibleCPUs; i++ {
   760  		newElem := reflect.New(sliceElemType)
   761  		slice.Index(i).Set(newElem)
   762  	}
   763  
   764  	return slice.Interface(), nil
   765  }
   766  
   767  func (m *Map) lookupAndDelete(key any, valuePtr sys.Pointer, flags MapLookupFlags) error {
   768  	keyPtr, err := m.marshalKey(key)
   769  	if err != nil {
   770  		return fmt.Errorf("can't marshal key: %w", err)
   771  	}
   772  
   773  	attr := sys.MapLookupAndDeleteElemAttr{
   774  		MapFd: m.fd.Uint(),
   775  		Key:   keyPtr,
   776  		Value: valuePtr,
   777  		Flags: uint64(flags),
   778  	}
   779  
   780  	if err := sys.MapLookupAndDeleteElem(&attr); err != nil {
   781  		return fmt.Errorf("lookup and delete: %w", wrapMapError(err))
   782  	}
   783  
   784  	return nil
   785  }
   786  
   787  // MapUpdateFlags controls the behaviour of the Map.Update call.
   788  //
   789  // The exact semantics depend on the specific MapType.
   790  type MapUpdateFlags uint64
   791  
   792  const (
   793  	// UpdateAny creates a new element or update an existing one.
   794  	UpdateAny MapUpdateFlags = iota
   795  	// UpdateNoExist creates a new element.
   796  	UpdateNoExist MapUpdateFlags = 1 << (iota - 1)
   797  	// UpdateExist updates an existing element.
   798  	UpdateExist
   799  	// UpdateLock updates elements under bpf_spin_lock.
   800  	UpdateLock
   801  )
   802  
   803  // Put replaces or creates a value in map.
   804  //
   805  // It is equivalent to calling Update with UpdateAny.
   806  func (m *Map) Put(key, value interface{}) error {
   807  	return m.Update(key, value, UpdateAny)
   808  }
   809  
   810  // Update changes the value of a key.
   811  func (m *Map) Update(key, value any, flags MapUpdateFlags) error {
   812  	if m.typ.hasPerCPUValue() {
   813  		return m.updatePerCPU(key, value, flags)
   814  	}
   815  
   816  	valuePtr, err := m.marshalValue(value)
   817  	if err != nil {
   818  		return fmt.Errorf("marshal value: %w", err)
   819  	}
   820  
   821  	return m.update(key, valuePtr, flags)
   822  }
   823  
   824  func (m *Map) updatePerCPU(key, value any, flags MapUpdateFlags) error {
   825  	valuePtr, err := marshalPerCPUValue(value, int(m.valueSize))
   826  	if err != nil {
   827  		return fmt.Errorf("marshal value: %w", err)
   828  	}
   829  
   830  	return m.update(key, valuePtr, flags)
   831  }
   832  
   833  func (m *Map) update(key any, valuePtr sys.Pointer, flags MapUpdateFlags) error {
   834  	keyPtr, err := m.marshalKey(key)
   835  	if err != nil {
   836  		return fmt.Errorf("marshal key: %w", err)
   837  	}
   838  
   839  	attr := sys.MapUpdateElemAttr{
   840  		MapFd: m.fd.Uint(),
   841  		Key:   keyPtr,
   842  		Value: valuePtr,
   843  		Flags: uint64(flags),
   844  	}
   845  
   846  	if err = sys.MapUpdateElem(&attr); err != nil {
   847  		return fmt.Errorf("update: %w", wrapMapError(err))
   848  	}
   849  
   850  	return nil
   851  }
   852  
   853  // Delete removes a value.
   854  //
   855  // Returns ErrKeyNotExist if the key does not exist.
   856  func (m *Map) Delete(key interface{}) error {
   857  	keyPtr, err := m.marshalKey(key)
   858  	if err != nil {
   859  		return fmt.Errorf("can't marshal key: %w", err)
   860  	}
   861  
   862  	attr := sys.MapDeleteElemAttr{
   863  		MapFd: m.fd.Uint(),
   864  		Key:   keyPtr,
   865  	}
   866  
   867  	if err = sys.MapDeleteElem(&attr); err != nil {
   868  		return fmt.Errorf("delete: %w", wrapMapError(err))
   869  	}
   870  	return nil
   871  }
   872  
   873  // NextKey finds the key following an initial key.
   874  //
   875  // See NextKeyBytes for details.
   876  //
   877  // Returns ErrKeyNotExist if there is no next key.
   878  func (m *Map) NextKey(key, nextKeyOut interface{}) error {
   879  	nextKeyBytes := makeMapSyscallOutput(nextKeyOut, int(m.keySize))
   880  
   881  	if err := m.nextKey(key, nextKeyBytes.Pointer()); err != nil {
   882  		return err
   883  	}
   884  
   885  	if err := nextKeyBytes.Unmarshal(nextKeyOut); err != nil {
   886  		return fmt.Errorf("can't unmarshal next key: %w", err)
   887  	}
   888  	return nil
   889  }
   890  
   891  // NextKeyBytes returns the key following an initial key as a byte slice.
   892  //
   893  // Passing nil will return the first key.
   894  //
   895  // Use Iterate if you want to traverse all entries in the map.
   896  //
   897  // Returns nil if there are no more keys.
   898  func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) {
   899  	nextKey := make([]byte, m.keySize)
   900  	nextKeyPtr := sys.NewSlicePointer(nextKey)
   901  
   902  	err := m.nextKey(key, nextKeyPtr)
   903  	if errors.Is(err, ErrKeyNotExist) {
   904  		return nil, nil
   905  	}
   906  
   907  	return nextKey, err
   908  }
   909  
   910  func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error {
   911  	var (
   912  		keyPtr sys.Pointer
   913  		err    error
   914  	)
   915  
   916  	if key != nil {
   917  		keyPtr, err = m.marshalKey(key)
   918  		if err != nil {
   919  			return fmt.Errorf("can't marshal key: %w", err)
   920  		}
   921  	}
   922  
   923  	attr := sys.MapGetNextKeyAttr{
   924  		MapFd:   m.fd.Uint(),
   925  		Key:     keyPtr,
   926  		NextKey: nextKeyOut,
   927  	}
   928  
   929  	if err = sys.MapGetNextKey(&attr); err != nil {
   930  		// Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the
   931  		// first map element when a nil key pointer is specified.
   932  		if key == nil && errors.Is(err, unix.EFAULT) {
   933  			var guessKey []byte
   934  			guessKey, err = m.guessNonExistentKey()
   935  			if err != nil {
   936  				return err
   937  			}
   938  
   939  			// Retry the syscall with a valid non-existing key.
   940  			attr.Key = sys.NewSlicePointer(guessKey)
   941  			if err = sys.MapGetNextKey(&attr); err == nil {
   942  				return nil
   943  			}
   944  		}
   945  
   946  		return fmt.Errorf("next key: %w", wrapMapError(err))
   947  	}
   948  
   949  	return nil
   950  }
   951  
   952  var mmapProtectedPage = sync.OnceValues(func() ([]byte, error) {
   953  	return unix.Mmap(-1, 0, os.Getpagesize(), unix.PROT_NONE, unix.MAP_ANON|unix.MAP_SHARED)
   954  })
   955  
   956  // guessNonExistentKey attempts to perform a map lookup that returns ENOENT.
   957  // This is necessary on kernels before 4.4.132, since those don't support
   958  // iterating maps from the start by providing an invalid key pointer.
   959  func (m *Map) guessNonExistentKey() ([]byte, error) {
   960  	// Map a protected page and use that as the value pointer. This saves some
   961  	// work copying out the value, which we're not interested in.
   962  	page, err := mmapProtectedPage()
   963  	if err != nil {
   964  		return nil, err
   965  	}
   966  	valuePtr := sys.NewSlicePointer(page)
   967  
   968  	randKey := make([]byte, int(m.keySize))
   969  
   970  	for i := 0; i < 4; i++ {
   971  		switch i {
   972  		// For hash maps, the 0 key is less likely to be occupied. They're often
   973  		// used for storing data related to pointers, and their access pattern is
   974  		// generally scattered across the keyspace.
   975  		case 0:
   976  		// An all-0xff key is guaranteed to be out of bounds of any array, since
   977  		// those have a fixed key size of 4 bytes. The only corner case being
   978  		// arrays with 2^32 max entries, but those are prohibitively expensive
   979  		// in many environments.
   980  		case 1:
   981  			for r := range randKey {
   982  				randKey[r] = 0xff
   983  			}
   984  		// Inspired by BCC, 0x55 is an alternating binary pattern (0101), so
   985  		// is unlikely to be taken.
   986  		case 2:
   987  			for r := range randKey {
   988  				randKey[r] = 0x55
   989  			}
   990  		// Last ditch effort, generate a random key.
   991  		case 3:
   992  			rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey)
   993  		}
   994  
   995  		err := m.lookup(randKey, valuePtr, 0)
   996  		if errors.Is(err, ErrKeyNotExist) {
   997  			return randKey, nil
   998  		}
   999  	}
  1000  
  1001  	return nil, errors.New("couldn't find non-existing key")
  1002  }
  1003  
  1004  // BatchLookup looks up many elements in a map at once.
  1005  //
  1006  // "keysOut" and "valuesOut" must be of type slice, a pointer
  1007  // to a slice or buffer will not work.
  1008  // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass
  1009  // "cursor" to subsequent calls of this function to continue the batching
  1010  // operation in the case of chunking.
  1011  //
  1012  // Warning: This API is not very safe to use as the kernel implementation for
  1013  // batching relies on the user to be aware of subtle details with regarding to
  1014  // different map type implementations.
  1015  //
  1016  // ErrKeyNotExist is returned when the batch lookup has reached
  1017  // the end of all possible results, even when partial results
  1018  // are returned. It should be used to evaluate when lookup is "done".
  1019  func (m *Map) BatchLookup(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
  1020  	n, err := m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, cursor, keysOut, valuesOut, opts)
  1021  	if err != nil {
  1022  		return n, fmt.Errorf("map batch lookup: %w", err)
  1023  	}
  1024  	return n, nil
  1025  }
  1026  
  1027  // BatchLookupAndDelete looks up many elements in a map at once,
  1028  //
  1029  // It then deletes all those elements.
  1030  // "keysOut" and "valuesOut" must be of type slice, a pointer
  1031  // to a slice or buffer will not work.
  1032  // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass
  1033  // "cursor" to subsequent calls of this function to continue the batching
  1034  // operation in the case of chunking.
  1035  //
  1036  // Warning: This API is not very safe to use as the kernel implementation for
  1037  // batching relies on the user to be aware of subtle details with regarding to
  1038  // different map type implementations.
  1039  //
  1040  // ErrKeyNotExist is returned when the batch lookup has reached
  1041  // the end of all possible results, even when partial results
  1042  // are returned. It should be used to evaluate when lookup is "done".
  1043  func (m *Map) BatchLookupAndDelete(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
  1044  	n, err := m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, cursor, keysOut, valuesOut, opts)
  1045  	if err != nil {
  1046  		return n, fmt.Errorf("map batch lookup and delete: %w", err)
  1047  	}
  1048  	return n, nil
  1049  }
  1050  
  1051  // MapBatchCursor represents a starting point for a batch operation.
  1052  type MapBatchCursor struct {
  1053  	m      *Map
  1054  	opaque []byte
  1055  }
  1056  
  1057  func (m *Map) batchLookup(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
  1058  	if m.typ.hasPerCPUValue() {
  1059  		return m.batchLookupPerCPU(cmd, cursor, keysOut, valuesOut, opts)
  1060  	}
  1061  
  1062  	count, err := batchCount(keysOut, valuesOut)
  1063  	if err != nil {
  1064  		return 0, err
  1065  	}
  1066  
  1067  	valueBuf := sysenc.SyscallOutput(valuesOut, count*int(m.fullValueSize))
  1068  
  1069  	n, err := m.batchLookupCmd(cmd, cursor, count, keysOut, valueBuf.Pointer(), opts)
  1070  	if errors.Is(err, unix.ENOSPC) {
  1071  		// Hash tables return ENOSPC when the size of the batch is smaller than
  1072  		// any bucket.
  1073  		return n, fmt.Errorf("%w (batch size too small?)", err)
  1074  	} else if err != nil {
  1075  		return n, err
  1076  	}
  1077  
  1078  	err = valueBuf.Unmarshal(valuesOut)
  1079  	if err != nil {
  1080  		return 0, err
  1081  	}
  1082  
  1083  	return n, nil
  1084  }
  1085  
  1086  func (m *Map) batchLookupPerCPU(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) {
  1087  	count, err := sliceLen(keysOut)
  1088  	if err != nil {
  1089  		return 0, fmt.Errorf("keys: %w", err)
  1090  	}
  1091  
  1092  	valueBuf := make([]byte, count*int(m.fullValueSize))
  1093  	valuePtr := sys.NewSlicePointer(valueBuf)
  1094  
  1095  	n, sysErr := m.batchLookupCmd(cmd, cursor, count, keysOut, valuePtr, opts)
  1096  	if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) {
  1097  		return 0, err
  1098  	}
  1099  
  1100  	err = unmarshalBatchPerCPUValue(valuesOut, count, int(m.valueSize), valueBuf)
  1101  	if err != nil {
  1102  		return 0, err
  1103  	}
  1104  
  1105  	return n, sysErr
  1106  }
  1107  
  1108  func (m *Map) batchLookupCmd(cmd sys.Cmd, cursor *MapBatchCursor, count int, keysOut any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) {
  1109  	cursorLen := int(m.keySize)
  1110  	if cursorLen < 4 {
  1111  		// * generic_map_lookup_batch requires that batch_out is key_size bytes.
  1112  		//   This is used by array and LPM maps.
  1113  		//
  1114  		// * __htab_map_lookup_and_delete_batch requires u32. This is used by the
  1115  		//   various hash maps.
  1116  		//
  1117  		// Use a minimum of 4 bytes to avoid having to distinguish between the two.
  1118  		cursorLen = 4
  1119  	}
  1120  
  1121  	inBatch := cursor.opaque
  1122  	if inBatch == nil {
  1123  		// This is the first lookup, allocate a buffer to hold the cursor.
  1124  		cursor.opaque = make([]byte, cursorLen)
  1125  		cursor.m = m
  1126  	} else if cursor.m != m {
  1127  		// Prevent reuse of a cursor across maps. First, it's unlikely to work.
  1128  		// Second, the maps may require different cursorLen and cursor.opaque
  1129  		// may therefore be too short. This could lead to the kernel clobbering
  1130  		// user space memory.
  1131  		return 0, errors.New("a cursor may not be reused across maps")
  1132  	}
  1133  
  1134  	if err := haveBatchAPI(); err != nil {
  1135  		return 0, err
  1136  	}
  1137  
  1138  	keyBuf := sysenc.SyscallOutput(keysOut, count*int(m.keySize))
  1139  
  1140  	attr := sys.MapLookupBatchAttr{
  1141  		MapFd:    m.fd.Uint(),
  1142  		Keys:     keyBuf.Pointer(),
  1143  		Values:   valuePtr,
  1144  		Count:    uint32(count),
  1145  		InBatch:  sys.NewSlicePointer(inBatch),
  1146  		OutBatch: sys.NewSlicePointer(cursor.opaque),
  1147  	}
  1148  
  1149  	if opts != nil {
  1150  		attr.ElemFlags = opts.ElemFlags
  1151  		attr.Flags = opts.Flags
  1152  	}
  1153  
  1154  	_, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr))
  1155  	sysErr = wrapMapError(sysErr)
  1156  	if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) {
  1157  		return 0, sysErr
  1158  	}
  1159  
  1160  	if err := keyBuf.Unmarshal(keysOut); err != nil {
  1161  		return 0, err
  1162  	}
  1163  
  1164  	return int(attr.Count), sysErr
  1165  }
  1166  
  1167  // BatchUpdate updates the map with multiple keys and values
  1168  // simultaneously.
  1169  // "keys" and "values" must be of type slice, a pointer
  1170  // to a slice or buffer will not work.
  1171  func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) {
  1172  	if m.typ.hasPerCPUValue() {
  1173  		return m.batchUpdatePerCPU(keys, values, opts)
  1174  	}
  1175  
  1176  	count, err := batchCount(keys, values)
  1177  	if err != nil {
  1178  		return 0, err
  1179  	}
  1180  
  1181  	valuePtr, err := marshalMapSyscallInput(values, count*int(m.valueSize))
  1182  	if err != nil {
  1183  		return 0, err
  1184  	}
  1185  
  1186  	return m.batchUpdate(count, keys, valuePtr, opts)
  1187  }
  1188  
  1189  func (m *Map) batchUpdate(count int, keys any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) {
  1190  	keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize))
  1191  	if err != nil {
  1192  		return 0, err
  1193  	}
  1194  
  1195  	attr := sys.MapUpdateBatchAttr{
  1196  		MapFd:  m.fd.Uint(),
  1197  		Keys:   keyPtr,
  1198  		Values: valuePtr,
  1199  		Count:  uint32(count),
  1200  	}
  1201  	if opts != nil {
  1202  		attr.ElemFlags = opts.ElemFlags
  1203  		attr.Flags = opts.Flags
  1204  	}
  1205  
  1206  	err = sys.MapUpdateBatch(&attr)
  1207  	if err != nil {
  1208  		if haveFeatErr := haveBatchAPI(); haveFeatErr != nil {
  1209  			return 0, haveFeatErr
  1210  		}
  1211  		return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err))
  1212  	}
  1213  
  1214  	return int(attr.Count), nil
  1215  }
  1216  
  1217  func (m *Map) batchUpdatePerCPU(keys, values any, opts *BatchOptions) (int, error) {
  1218  	count, err := sliceLen(keys)
  1219  	if err != nil {
  1220  		return 0, fmt.Errorf("keys: %w", err)
  1221  	}
  1222  
  1223  	valueBuf, err := marshalBatchPerCPUValue(values, count, int(m.valueSize))
  1224  	if err != nil {
  1225  		return 0, err
  1226  	}
  1227  
  1228  	return m.batchUpdate(count, keys, sys.NewSlicePointer(valueBuf), opts)
  1229  }
  1230  
  1231  // BatchDelete batch deletes entries in the map by keys.
  1232  // "keys" must be of type slice, a pointer to a slice or buffer will not work.
  1233  func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) {
  1234  	count, err := sliceLen(keys)
  1235  	if err != nil {
  1236  		return 0, fmt.Errorf("keys: %w", err)
  1237  	}
  1238  
  1239  	keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize))
  1240  	if err != nil {
  1241  		return 0, fmt.Errorf("cannot marshal keys: %v", err)
  1242  	}
  1243  
  1244  	attr := sys.MapDeleteBatchAttr{
  1245  		MapFd: m.fd.Uint(),
  1246  		Keys:  keyPtr,
  1247  		Count: uint32(count),
  1248  	}
  1249  
  1250  	if opts != nil {
  1251  		attr.ElemFlags = opts.ElemFlags
  1252  		attr.Flags = opts.Flags
  1253  	}
  1254  
  1255  	if err = sys.MapDeleteBatch(&attr); err != nil {
  1256  		if haveFeatErr := haveBatchAPI(); haveFeatErr != nil {
  1257  			return 0, haveFeatErr
  1258  		}
  1259  		return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err))
  1260  	}
  1261  
  1262  	return int(attr.Count), nil
  1263  }
  1264  
  1265  func batchCount(keys, values any) (int, error) {
  1266  	keysLen, err := sliceLen(keys)
  1267  	if err != nil {
  1268  		return 0, fmt.Errorf("keys: %w", err)
  1269  	}
  1270  
  1271  	valuesLen, err := sliceLen(values)
  1272  	if err != nil {
  1273  		return 0, fmt.Errorf("values: %w", err)
  1274  	}
  1275  
  1276  	if keysLen != valuesLen {
  1277  		return 0, fmt.Errorf("keys and values must have the same length")
  1278  	}
  1279  
  1280  	return keysLen, nil
  1281  }
  1282  
  1283  // Iterate traverses a map.
  1284  //
  1285  // It's safe to create multiple iterators at the same time.
  1286  //
  1287  // It's not possible to guarantee that all keys in a map will be
  1288  // returned if there are concurrent modifications to the map.
  1289  func (m *Map) Iterate() *MapIterator {
  1290  	return newMapIterator(m)
  1291  }
  1292  
  1293  // Close the Map's underlying file descriptor, which could unload the
  1294  // Map from the kernel if it is not pinned or in use by a loaded Program.
  1295  func (m *Map) Close() error {
  1296  	if m == nil {
  1297  		// This makes it easier to clean up when iterating maps
  1298  		// of maps / programs.
  1299  		return nil
  1300  	}
  1301  
  1302  	return m.fd.Close()
  1303  }
  1304  
  1305  // FD gets the file descriptor of the Map.
  1306  //
  1307  // Calling this function is invalid after Close has been called.
  1308  func (m *Map) FD() int {
  1309  	return m.fd.Int()
  1310  }
  1311  
  1312  // Clone creates a duplicate of the Map.
  1313  //
  1314  // Closing the duplicate does not affect the original, and vice versa.
  1315  // Changes made to the map are reflected by both instances however.
  1316  // If the original map was pinned, the cloned map will not be pinned by default.
  1317  //
  1318  // Cloning a nil Map returns nil.
  1319  func (m *Map) Clone() (*Map, error) {
  1320  	if m == nil {
  1321  		return nil, nil
  1322  	}
  1323  
  1324  	dup, err := m.fd.Dup()
  1325  	if err != nil {
  1326  		return nil, fmt.Errorf("can't clone map: %w", err)
  1327  	}
  1328  
  1329  	return &Map{
  1330  		m.name,
  1331  		dup,
  1332  		m.typ,
  1333  		m.keySize,
  1334  		m.valueSize,
  1335  		m.maxEntries,
  1336  		m.flags,
  1337  		"",
  1338  		m.fullValueSize,
  1339  	}, nil
  1340  }
  1341  
  1342  // Pin persists the map on the BPF virtual file system past the lifetime of
  1343  // the process that created it .
  1344  //
  1345  // Calling Pin on a previously pinned map will overwrite the path, except when
  1346  // the new path already exists. Re-pinning across filesystems is not supported.
  1347  // You can Clone a map to pin it to a different path.
  1348  //
  1349  // This requires bpffs to be mounted above fileName.
  1350  // See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd
  1351  func (m *Map) Pin(fileName string) error {
  1352  	if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil {
  1353  		return err
  1354  	}
  1355  	m.pinnedPath = fileName
  1356  	return nil
  1357  }
  1358  
  1359  // Unpin removes the persisted state for the map from the BPF virtual filesystem.
  1360  //
  1361  // Failed calls to Unpin will not alter the state returned by IsPinned.
  1362  //
  1363  // Unpinning an unpinned Map returns nil.
  1364  func (m *Map) Unpin() error {
  1365  	if err := internal.Unpin(m.pinnedPath); err != nil {
  1366  		return err
  1367  	}
  1368  	m.pinnedPath = ""
  1369  	return nil
  1370  }
  1371  
  1372  // IsPinned returns true if the map has a non-empty pinned path.
  1373  func (m *Map) IsPinned() bool {
  1374  	return m.pinnedPath != ""
  1375  }
  1376  
  1377  // Freeze prevents a map to be modified from user space.
  1378  //
  1379  // It makes no changes to kernel-side restrictions.
  1380  func (m *Map) Freeze() error {
  1381  	attr := sys.MapFreezeAttr{
  1382  		MapFd: m.fd.Uint(),
  1383  	}
  1384  
  1385  	if err := sys.MapFreeze(&attr); err != nil {
  1386  		if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil {
  1387  			return fmt.Errorf("can't freeze map: %w", haveFeatErr)
  1388  		}
  1389  		return fmt.Errorf("can't freeze map: %w", err)
  1390  	}
  1391  	return nil
  1392  }
  1393  
  1394  // finalize populates the Map according to the Contents specified
  1395  // in spec and freezes the Map if requested by spec.
  1396  func (m *Map) finalize(spec *MapSpec) error {
  1397  	for _, kv := range spec.Contents {
  1398  		if err := m.Put(kv.Key, kv.Value); err != nil {
  1399  			return fmt.Errorf("putting value: key %v: %w", kv.Key, err)
  1400  		}
  1401  	}
  1402  
  1403  	if spec.Freeze {
  1404  		if err := m.Freeze(); err != nil {
  1405  			return fmt.Errorf("freezing map: %w", err)
  1406  		}
  1407  	}
  1408  
  1409  	return nil
  1410  }
  1411  
  1412  func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) {
  1413  	if data == nil {
  1414  		if m.keySize == 0 {
  1415  			// Queues have a key length of zero, so passing nil here is valid.
  1416  			return sys.NewPointer(nil), nil
  1417  		}
  1418  		return sys.Pointer{}, errors.New("can't use nil as key of map")
  1419  	}
  1420  
  1421  	return marshalMapSyscallInput(data, int(m.keySize))
  1422  }
  1423  
  1424  func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) {
  1425  	var (
  1426  		buf []byte
  1427  		err error
  1428  	)
  1429  
  1430  	switch value := data.(type) {
  1431  	case *Map:
  1432  		if !m.typ.canStoreMap() {
  1433  			return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ)
  1434  		}
  1435  		buf, err = marshalMap(value, int(m.valueSize))
  1436  
  1437  	case *Program:
  1438  		if !m.typ.canStoreProgram() {
  1439  			return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ)
  1440  		}
  1441  		buf, err = marshalProgram(value, int(m.valueSize))
  1442  
  1443  	default:
  1444  		return marshalMapSyscallInput(data, int(m.valueSize))
  1445  	}
  1446  
  1447  	if err != nil {
  1448  		return sys.Pointer{}, err
  1449  	}
  1450  
  1451  	return sys.NewSlicePointer(buf), nil
  1452  }
  1453  
  1454  func (m *Map) unmarshalValue(value any, buf sysenc.Buffer) error {
  1455  	switch value := value.(type) {
  1456  	case **Map:
  1457  		if !m.typ.canStoreMap() {
  1458  			return fmt.Errorf("can't read a map from %s", m.typ)
  1459  		}
  1460  
  1461  		other, err := unmarshalMap(buf)
  1462  		if err != nil {
  1463  			return err
  1464  		}
  1465  
  1466  		// The caller might close the map externally, so ignore errors.
  1467  		_ = (*value).Close()
  1468  
  1469  		*value = other
  1470  		return nil
  1471  
  1472  	case *Map:
  1473  		if !m.typ.canStoreMap() {
  1474  			return fmt.Errorf("can't read a map from %s", m.typ)
  1475  		}
  1476  		return errors.New("require pointer to *Map")
  1477  
  1478  	case **Program:
  1479  		if !m.typ.canStoreProgram() {
  1480  			return fmt.Errorf("can't read a program from %s", m.typ)
  1481  		}
  1482  
  1483  		other, err := unmarshalProgram(buf)
  1484  		if err != nil {
  1485  			return err
  1486  		}
  1487  
  1488  		// The caller might close the program externally, so ignore errors.
  1489  		_ = (*value).Close()
  1490  
  1491  		*value = other
  1492  		return nil
  1493  
  1494  	case *Program:
  1495  		if !m.typ.canStoreProgram() {
  1496  			return fmt.Errorf("can't read a program from %s", m.typ)
  1497  		}
  1498  		return errors.New("require pointer to *Program")
  1499  	}
  1500  
  1501  	return buf.Unmarshal(value)
  1502  }
  1503  
  1504  // LoadPinnedMap loads a Map from a BPF file.
  1505  func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) {
  1506  	fd, err := sys.ObjGet(&sys.ObjGetAttr{
  1507  		Pathname:  sys.NewStringPointer(fileName),
  1508  		FileFlags: opts.Marshal(),
  1509  	})
  1510  	if err != nil {
  1511  		return nil, err
  1512  	}
  1513  
  1514  	m, err := newMapFromFD(fd)
  1515  	if err == nil {
  1516  		m.pinnedPath = fileName
  1517  	}
  1518  
  1519  	return m, err
  1520  }
  1521  
  1522  // unmarshalMap creates a map from a map ID encoded in host endianness.
  1523  func unmarshalMap(buf sysenc.Buffer) (*Map, error) {
  1524  	var id uint32
  1525  	if err := buf.Unmarshal(&id); err != nil {
  1526  		return nil, err
  1527  	}
  1528  	return NewMapFromID(MapID(id))
  1529  }
  1530  
  1531  // marshalMap marshals the fd of a map into a buffer in host endianness.
  1532  func marshalMap(m *Map, length int) ([]byte, error) {
  1533  	if length != 4 {
  1534  		return nil, fmt.Errorf("can't marshal map to %d bytes", length)
  1535  	}
  1536  
  1537  	buf := make([]byte, 4)
  1538  	internal.NativeEndian.PutUint32(buf, m.fd.Uint())
  1539  	return buf, nil
  1540  }
  1541  
  1542  // MapIterator iterates a Map.
  1543  //
  1544  // See Map.Iterate.
  1545  type MapIterator struct {
  1546  	target *Map
  1547  	// Temporary storage to avoid allocations in Next(). This is any instead
  1548  	// of []byte to avoid allocations.
  1549  	cursor            any
  1550  	count, maxEntries uint32
  1551  	done              bool
  1552  	err               error
  1553  }
  1554  
  1555  func newMapIterator(target *Map) *MapIterator {
  1556  	return &MapIterator{
  1557  		target:     target,
  1558  		maxEntries: target.maxEntries,
  1559  	}
  1560  }
  1561  
  1562  // Next decodes the next key and value.
  1563  //
  1564  // Iterating a hash map from which keys are being deleted is not
  1565  // safe. You may see the same key multiple times. Iteration may
  1566  // also abort with an error, see IsIterationAborted.
  1567  //
  1568  // Returns false if there are no more entries. You must check
  1569  // the result of Err afterwards.
  1570  //
  1571  // See Map.Get for further caveats around valueOut.
  1572  func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool {
  1573  	if mi.err != nil || mi.done {
  1574  		return false
  1575  	}
  1576  
  1577  	// For array-like maps NextKey returns nil only after maxEntries
  1578  	// iterations.
  1579  	for mi.count <= mi.maxEntries {
  1580  		if mi.cursor == nil {
  1581  			// Pass nil interface to NextKey to make sure the Map's first key
  1582  			// is returned. If we pass an uninitialized []byte instead, it'll see a
  1583  			// non-nil interface and try to marshal it.
  1584  			mi.cursor = make([]byte, mi.target.keySize)
  1585  			mi.err = mi.target.NextKey(nil, mi.cursor)
  1586  		} else {
  1587  			mi.err = mi.target.NextKey(mi.cursor, mi.cursor)
  1588  		}
  1589  
  1590  		if errors.Is(mi.err, ErrKeyNotExist) {
  1591  			mi.done = true
  1592  			mi.err = nil
  1593  			return false
  1594  		} else if mi.err != nil {
  1595  			mi.err = fmt.Errorf("get next key: %w", mi.err)
  1596  			return false
  1597  		}
  1598  
  1599  		mi.count++
  1600  		mi.err = mi.target.Lookup(mi.cursor, valueOut)
  1601  		if errors.Is(mi.err, ErrKeyNotExist) {
  1602  			// Even though the key should be valid, we couldn't look up
  1603  			// its value. If we're iterating a hash map this is probably
  1604  			// because a concurrent delete removed the value before we
  1605  			// could get it. This means that the next call to NextKeyBytes
  1606  			// is very likely to restart iteration.
  1607  			// If we're iterating one of the fd maps like
  1608  			// ProgramArray it means that a given slot doesn't have
  1609  			// a valid fd associated. It's OK to continue to the next slot.
  1610  			continue
  1611  		}
  1612  		if mi.err != nil {
  1613  			mi.err = fmt.Errorf("look up next key: %w", mi.err)
  1614  			return false
  1615  		}
  1616  
  1617  		buf := mi.cursor.([]byte)
  1618  		if ptr, ok := keyOut.(unsafe.Pointer); ok {
  1619  			copy(unsafe.Slice((*byte)(ptr), len(buf)), buf)
  1620  		} else {
  1621  			mi.err = sysenc.Unmarshal(keyOut, buf)
  1622  		}
  1623  
  1624  		return mi.err == nil
  1625  	}
  1626  
  1627  	mi.err = fmt.Errorf("%w", ErrIterationAborted)
  1628  	return false
  1629  }
  1630  
  1631  // Err returns any encountered error.
  1632  //
  1633  // The method must be called after Next returns nil.
  1634  //
  1635  // Returns ErrIterationAborted if it wasn't possible to do a full iteration.
  1636  func (mi *MapIterator) Err() error {
  1637  	return mi.err
  1638  }
  1639  
  1640  // MapGetNextID returns the ID of the next eBPF map.
  1641  //
  1642  // Returns ErrNotExist, if there is no next eBPF map.
  1643  func MapGetNextID(startID MapID) (MapID, error) {
  1644  	attr := &sys.MapGetNextIdAttr{Id: uint32(startID)}
  1645  	return MapID(attr.NextId), sys.MapGetNextId(attr)
  1646  }
  1647  
  1648  // NewMapFromID returns the map for a given id.
  1649  //
  1650  // Returns ErrNotExist, if there is no eBPF map with the given id.
  1651  func NewMapFromID(id MapID) (*Map, error) {
  1652  	fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{
  1653  		Id: uint32(id),
  1654  	})
  1655  	if err != nil {
  1656  		return nil, err
  1657  	}
  1658  
  1659  	return newMapFromFD(fd)
  1660  }
  1661  
  1662  // sliceLen returns the length if the value is a slice or an error otherwise.
  1663  func sliceLen(slice any) (int, error) {
  1664  	sliceValue := reflect.ValueOf(slice)
  1665  	if sliceValue.Kind() != reflect.Slice {
  1666  		return 0, fmt.Errorf("%T is not a slice", slice)
  1667  	}
  1668  	return sliceValue.Len(), nil
  1669  }