github.com/cilium/ebpf@v0.16.0/map.go (about) 1 package ebpf 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "math/rand" 9 "os" 10 "path/filepath" 11 "reflect" 12 "slices" 13 "strings" 14 "sync" 15 "time" 16 "unsafe" 17 18 "github.com/cilium/ebpf/btf" 19 "github.com/cilium/ebpf/internal" 20 "github.com/cilium/ebpf/internal/sys" 21 "github.com/cilium/ebpf/internal/sysenc" 22 "github.com/cilium/ebpf/internal/unix" 23 ) 24 25 // Errors returned by Map and MapIterator methods. 26 var ( 27 ErrKeyNotExist = errors.New("key does not exist") 28 ErrKeyExist = errors.New("key already exists") 29 ErrIterationAborted = errors.New("iteration aborted") 30 ErrMapIncompatible = errors.New("map spec is incompatible with existing map") 31 errMapNoBTFValue = errors.New("map spec does not contain a BTF Value") 32 33 // pre-allocating these errors here since they may get called in hot code paths 34 // and cause unnecessary memory allocations 35 errMapLookupKeyNotExist = fmt.Errorf("lookup: %w", sysErrKeyNotExist) 36 ) 37 38 // MapOptions control loading a map into the kernel. 39 type MapOptions struct { 40 // The base path to pin maps in if requested via PinByName. 41 // Existing maps will be re-used if they are compatible, otherwise an 42 // error is returned. 43 PinPath string 44 LoadPinOptions LoadPinOptions 45 } 46 47 // MapID represents the unique ID of an eBPF map 48 type MapID uint32 49 50 // MapSpec defines a Map. 51 type MapSpec struct { 52 // Name is passed to the kernel as a debug aid. Must only contain 53 // alpha numeric and '_' characters. 54 Name string 55 Type MapType 56 KeySize uint32 57 ValueSize uint32 58 MaxEntries uint32 59 60 // Flags is passed to the kernel and specifies additional map 61 // creation attributes. 62 Flags uint32 63 64 // Automatically pin and load a map from MapOptions.PinPath. 65 // Generates an error if an existing pinned map is incompatible with the MapSpec. 66 Pinning PinType 67 68 // Specify numa node during map creation 69 // (effective only if unix.BPF_F_NUMA_NODE flag is set, 70 // which can be imported from golang.org/x/sys/unix) 71 NumaNode uint32 72 73 // The initial contents of the map. May be nil. 74 Contents []MapKV 75 76 // Whether to freeze a map after setting its initial contents. 77 Freeze bool 78 79 // InnerMap is used as a template for ArrayOfMaps and HashOfMaps 80 InnerMap *MapSpec 81 82 // Extra trailing bytes found in the ELF map definition when using structs 83 // larger than libbpf's bpf_map_def. nil if no trailing bytes were present. 84 // Must be nil or empty before instantiating the MapSpec into a Map. 85 Extra *bytes.Reader 86 87 // The key and value type of this map. May be nil. 88 Key, Value btf.Type 89 } 90 91 func (ms *MapSpec) String() string { 92 return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags) 93 } 94 95 // Copy returns a copy of the spec. 96 // 97 // MapSpec.Contents is a shallow copy. 98 func (ms *MapSpec) Copy() *MapSpec { 99 if ms == nil { 100 return nil 101 } 102 103 cpy := *ms 104 cpy.Contents = slices.Clone(cpy.Contents) 105 cpy.Key = btf.Copy(cpy.Key) 106 cpy.Value = btf.Copy(cpy.Value) 107 108 if cpy.InnerMap == ms { 109 cpy.InnerMap = &cpy 110 } else { 111 cpy.InnerMap = ms.InnerMap.Copy() 112 } 113 114 if cpy.Extra != nil { 115 extra := *cpy.Extra 116 cpy.Extra = &extra 117 } 118 119 return &cpy 120 } 121 122 // fixupMagicFields fills fields of MapSpec which are usually 123 // left empty in ELF or which depend on runtime information. 124 // 125 // The method doesn't modify Spec, instead returning a copy. 126 // The copy is only performed if fixups are necessary, so callers mustn't mutate 127 // the returned spec. 128 func (spec *MapSpec) fixupMagicFields() (*MapSpec, error) { 129 switch spec.Type { 130 case ArrayOfMaps, HashOfMaps: 131 if spec.ValueSize != 0 && spec.ValueSize != 4 { 132 return nil, errors.New("ValueSize must be zero or four for map of map") 133 } 134 135 spec = spec.Copy() 136 spec.ValueSize = 4 137 138 case PerfEventArray: 139 if spec.KeySize != 0 && spec.KeySize != 4 { 140 return nil, errors.New("KeySize must be zero or four for perf event array") 141 } 142 143 if spec.ValueSize != 0 && spec.ValueSize != 4 { 144 return nil, errors.New("ValueSize must be zero or four for perf event array") 145 } 146 147 spec = spec.Copy() 148 spec.KeySize = 4 149 spec.ValueSize = 4 150 151 n, err := PossibleCPU() 152 if err != nil { 153 return nil, fmt.Errorf("fixup perf event array: %w", err) 154 } 155 156 if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n { 157 // MaxEntries should be zero most of the time, but there is code 158 // out there which hardcodes large constants. Clamp the number 159 // of entries to the number of CPUs at most. Allow creating maps with 160 // less than n items since some kernel selftests relied on this 161 // behaviour in the past. 162 spec.MaxEntries = n 163 } 164 } 165 166 return spec, nil 167 } 168 169 // dataSection returns the contents and BTF Datasec descriptor of the spec. 170 func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) { 171 if ms.Value == nil { 172 return nil, nil, errMapNoBTFValue 173 } 174 175 ds, ok := ms.Value.(*btf.Datasec) 176 if !ok { 177 return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value) 178 } 179 180 if n := len(ms.Contents); n != 1 { 181 return nil, nil, fmt.Errorf("expected one key, found %d", n) 182 } 183 184 kv := ms.Contents[0] 185 value, ok := kv.Value.([]byte) 186 if !ok { 187 return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value) 188 } 189 190 return value, ds, nil 191 } 192 193 // MapKV is used to initialize the contents of a Map. 194 type MapKV struct { 195 Key interface{} 196 Value interface{} 197 } 198 199 // Compatible returns nil if an existing map may be used instead of creating 200 // one from the spec. 201 // 202 // Returns an error wrapping [ErrMapIncompatible] otherwise. 203 func (ms *MapSpec) Compatible(m *Map) error { 204 ms, err := ms.fixupMagicFields() 205 if err != nil { 206 return err 207 } 208 209 diffs := []string{} 210 if m.typ != ms.Type { 211 diffs = append(diffs, fmt.Sprintf("Type: %s changed to %s", m.typ, ms.Type)) 212 } 213 if m.keySize != ms.KeySize { 214 diffs = append(diffs, fmt.Sprintf("KeySize: %d changed to %d", m.keySize, ms.KeySize)) 215 } 216 if m.valueSize != ms.ValueSize { 217 diffs = append(diffs, fmt.Sprintf("ValueSize: %d changed to %d", m.valueSize, ms.ValueSize)) 218 } 219 if m.maxEntries != ms.MaxEntries { 220 diffs = append(diffs, fmt.Sprintf("MaxEntries: %d changed to %d", m.maxEntries, ms.MaxEntries)) 221 } 222 223 // BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow this 224 // mismatch. 225 if !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == unix.BPF_F_RDONLY_PROG) && 226 m.flags != ms.Flags { 227 diffs = append(diffs, fmt.Sprintf("Flags: %d changed to %d", m.flags, ms.Flags)) 228 } 229 230 if len(diffs) == 0 { 231 return nil 232 } 233 234 return fmt.Errorf("%s: %w", strings.Join(diffs, ", "), ErrMapIncompatible) 235 } 236 237 // Map represents a Map file descriptor. 238 // 239 // It is not safe to close a map which is used by other goroutines. 240 // 241 // Methods which take interface{} arguments by default encode 242 // them using binary.Read/Write in the machine's native endianness. 243 // 244 // Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler 245 // if you require custom encoding. 246 type Map struct { 247 name string 248 fd *sys.FD 249 typ MapType 250 keySize uint32 251 valueSize uint32 252 maxEntries uint32 253 flags uint32 254 pinnedPath string 255 // Per CPU maps return values larger than the size in the spec 256 fullValueSize int 257 } 258 259 // NewMapFromFD creates a map from a raw fd. 260 // 261 // You should not use fd after calling this function. 262 func NewMapFromFD(fd int) (*Map, error) { 263 f, err := sys.NewFD(fd) 264 if err != nil { 265 return nil, err 266 } 267 268 return newMapFromFD(f) 269 } 270 271 func newMapFromFD(fd *sys.FD) (*Map, error) { 272 info, err := newMapInfoFromFd(fd) 273 if err != nil { 274 fd.Close() 275 return nil, fmt.Errorf("get map info: %w", err) 276 } 277 278 return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags) 279 } 280 281 // NewMap creates a new Map. 282 // 283 // It's equivalent to calling NewMapWithOptions with default options. 284 func NewMap(spec *MapSpec) (*Map, error) { 285 return NewMapWithOptions(spec, MapOptions{}) 286 } 287 288 // NewMapWithOptions creates a new Map. 289 // 290 // Creating a map for the first time will perform feature detection 291 // by creating small, temporary maps. 292 // 293 // The caller is responsible for ensuring the process' rlimit is set 294 // sufficiently high for locking memory during map creation. This can be done 295 // by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions. 296 // 297 // May return an error wrapping ErrMapIncompatible. 298 func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { 299 m, err := newMapWithOptions(spec, opts) 300 if err != nil { 301 return nil, fmt.Errorf("creating map: %w", err) 302 } 303 304 if err := m.finalize(spec); err != nil { 305 m.Close() 306 return nil, fmt.Errorf("populating map: %w", err) 307 } 308 309 return m, nil 310 } 311 312 func newMapWithOptions(spec *MapSpec, opts MapOptions) (_ *Map, err error) { 313 closeOnError := func(c io.Closer) { 314 if err != nil { 315 c.Close() 316 } 317 } 318 319 switch spec.Pinning { 320 case PinByName: 321 if spec.Name == "" { 322 return nil, fmt.Errorf("pin by name: missing Name") 323 } 324 325 if opts.PinPath == "" { 326 return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath") 327 } 328 329 path := filepath.Join(opts.PinPath, spec.Name) 330 m, err := LoadPinnedMap(path, &opts.LoadPinOptions) 331 if errors.Is(err, unix.ENOENT) { 332 break 333 } 334 if err != nil { 335 return nil, fmt.Errorf("load pinned map: %w", err) 336 } 337 defer closeOnError(m) 338 339 if err := spec.Compatible(m); err != nil { 340 return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) 341 } 342 343 return m, nil 344 345 case PinNone: 346 // Nothing to do here 347 348 default: 349 return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported) 350 } 351 352 var innerFd *sys.FD 353 if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps { 354 if spec.InnerMap == nil { 355 return nil, fmt.Errorf("%s requires InnerMap", spec.Type) 356 } 357 358 if spec.InnerMap.Pinning != PinNone { 359 return nil, errors.New("inner maps cannot be pinned") 360 } 361 362 template, err := spec.InnerMap.createMap(nil, opts) 363 if err != nil { 364 return nil, fmt.Errorf("inner map: %w", err) 365 } 366 defer template.Close() 367 368 // Intentionally skip populating and freezing (finalizing) 369 // the inner map template since it will be removed shortly. 370 371 innerFd = template.fd 372 } 373 374 m, err := spec.createMap(innerFd, opts) 375 if err != nil { 376 return nil, err 377 } 378 defer closeOnError(m) 379 380 if spec.Pinning == PinByName { 381 path := filepath.Join(opts.PinPath, spec.Name) 382 if err := m.Pin(path); err != nil { 383 return nil, fmt.Errorf("pin map to %s: %w", path, err) 384 } 385 } 386 387 return m, nil 388 } 389 390 // createMap validates the spec's properties and creates the map in the kernel 391 // using the given opts. It does not populate or freeze the map. 392 func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err error) { 393 closeOnError := func(closer io.Closer) { 394 if err != nil { 395 closer.Close() 396 } 397 } 398 399 // Kernels 4.13 through 5.4 used a struct bpf_map_def that contained 400 // additional 'inner_map_idx' and later 'numa_node' fields. 401 // In order to support loading these definitions, tolerate the presence of 402 // extra bytes, but require them to be zeroes. 403 if spec.Extra != nil { 404 if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil { 405 return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map") 406 } 407 } 408 409 spec, err = spec.fixupMagicFields() 410 if err != nil { 411 return nil, err 412 } 413 414 attr := sys.MapCreateAttr{ 415 MapType: sys.MapType(spec.Type), 416 KeySize: spec.KeySize, 417 ValueSize: spec.ValueSize, 418 MaxEntries: spec.MaxEntries, 419 MapFlags: sys.MapFlags(spec.Flags), 420 NumaNode: spec.NumaNode, 421 } 422 423 if inner != nil { 424 attr.InnerMapFd = inner.Uint() 425 } 426 427 if haveObjName() == nil { 428 attr.MapName = sys.NewObjName(spec.Name) 429 } 430 431 if spec.Key != nil || spec.Value != nil { 432 handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value) 433 if err != nil && !errors.Is(err, btf.ErrNotSupported) { 434 return nil, fmt.Errorf("load BTF: %w", err) 435 } 436 437 if handle != nil { 438 defer handle.Close() 439 440 // Use BTF k/v during map creation. 441 attr.BtfFd = uint32(handle.FD()) 442 attr.BtfKeyTypeId = keyTypeID 443 attr.BtfValueTypeId = valueTypeID 444 } 445 } 446 447 fd, err := sys.MapCreate(&attr) 448 449 // Some map types don't support BTF k/v in earlier kernel versions. 450 // Remove BTF metadata and retry map creation. 451 if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 { 452 attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0 453 fd, err = sys.MapCreate(&attr) 454 } 455 if err != nil { 456 return nil, handleMapCreateError(attr, spec, err) 457 } 458 459 defer closeOnError(fd) 460 m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags) 461 if err != nil { 462 return nil, fmt.Errorf("map create: %w", err) 463 } 464 return m, nil 465 } 466 467 func handleMapCreateError(attr sys.MapCreateAttr, spec *MapSpec, err error) error { 468 if errors.Is(err, unix.EPERM) { 469 return fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) 470 } 471 if errors.Is(err, unix.EINVAL) && spec.MaxEntries == 0 { 472 return fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err) 473 } 474 if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap { 475 return fmt.Errorf("map create: cannot use type %s", UnspecifiedMap) 476 } 477 if errors.Is(err, unix.EINVAL) && spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { 478 return fmt.Errorf("map create: %w (noPrealloc flag may be incompatible with map type %s)", err, spec.Type) 479 } 480 481 switch spec.Type { 482 case ArrayOfMaps, HashOfMaps: 483 if haveFeatErr := haveNestedMaps(); haveFeatErr != nil { 484 return fmt.Errorf("map create: %w", haveFeatErr) 485 } 486 } 487 if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze { 488 if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil { 489 return fmt.Errorf("map create: %w", haveFeatErr) 490 } 491 } 492 if spec.Flags&unix.BPF_F_MMAPABLE > 0 { 493 if haveFeatErr := haveMmapableMaps(); haveFeatErr != nil { 494 return fmt.Errorf("map create: %w", haveFeatErr) 495 } 496 } 497 if spec.Flags&unix.BPF_F_INNER_MAP > 0 { 498 if haveFeatErr := haveInnerMaps(); haveFeatErr != nil { 499 return fmt.Errorf("map create: %w", haveFeatErr) 500 } 501 } 502 if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { 503 if haveFeatErr := haveNoPreallocMaps(); haveFeatErr != nil { 504 return fmt.Errorf("map create: %w", haveFeatErr) 505 } 506 } 507 // BPF_MAP_TYPE_RINGBUF's max_entries must be a power-of-2 multiple of kernel's page size. 508 if errors.Is(err, unix.EINVAL) && 509 (attr.MapType == sys.BPF_MAP_TYPE_RINGBUF || attr.MapType == sys.BPF_MAP_TYPE_USER_RINGBUF) { 510 pageSize := uint32(os.Getpagesize()) 511 maxEntries := attr.MaxEntries 512 if maxEntries%pageSize != 0 || !internal.IsPow(maxEntries) { 513 return fmt.Errorf("map create: %w (ring map size %d not a multiple of page size %d)", err, maxEntries, pageSize) 514 } 515 } 516 517 return fmt.Errorf("map create: %w", err) 518 } 519 520 // newMap allocates and returns a new Map structure. 521 // Sets the fullValueSize on per-CPU maps. 522 func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) { 523 m := &Map{ 524 name, 525 fd, 526 typ, 527 keySize, 528 valueSize, 529 maxEntries, 530 flags, 531 "", 532 int(valueSize), 533 } 534 535 if !typ.hasPerCPUValue() { 536 return m, nil 537 } 538 539 possibleCPUs, err := PossibleCPU() 540 if err != nil { 541 return nil, err 542 } 543 544 m.fullValueSize = int(internal.Align(valueSize, 8)) * possibleCPUs 545 return m, nil 546 } 547 548 func (m *Map) String() string { 549 if m.name != "" { 550 return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd) 551 } 552 return fmt.Sprintf("%s#%v", m.typ, m.fd) 553 } 554 555 // Type returns the underlying type of the map. 556 func (m *Map) Type() MapType { 557 return m.typ 558 } 559 560 // KeySize returns the size of the map key in bytes. 561 func (m *Map) KeySize() uint32 { 562 return m.keySize 563 } 564 565 // ValueSize returns the size of the map value in bytes. 566 func (m *Map) ValueSize() uint32 { 567 return m.valueSize 568 } 569 570 // MaxEntries returns the maximum number of elements the map can hold. 571 func (m *Map) MaxEntries() uint32 { 572 return m.maxEntries 573 } 574 575 // Flags returns the flags of the map. 576 func (m *Map) Flags() uint32 { 577 return m.flags 578 } 579 580 // Info returns metadata about the map. 581 func (m *Map) Info() (*MapInfo, error) { 582 return newMapInfoFromFd(m.fd) 583 } 584 585 // Handle returns a reference to the Map's type information in the kernel. 586 // 587 // Returns ErrNotSupported if the kernel has no BTF support, or if there is no 588 // BTF associated with the Map. 589 func (m *Map) Handle() (*btf.Handle, error) { 590 info, err := m.Info() 591 if err != nil { 592 return nil, err 593 } 594 595 id, ok := info.BTFID() 596 if !ok { 597 return nil, fmt.Errorf("map %s: retrieve BTF ID: %w", m, ErrNotSupported) 598 } 599 600 return btf.NewHandleFromID(id) 601 } 602 603 // MapLookupFlags controls the behaviour of the map lookup calls. 604 type MapLookupFlags uint64 605 606 // LookupLock look up the value of a spin-locked map. 607 const LookupLock MapLookupFlags = unix.BPF_F_LOCK 608 609 // Lookup retrieves a value from a Map. 610 // 611 // Calls Close() on valueOut if it is of type **Map or **Program, 612 // and *valueOut is not nil. 613 // 614 // Returns an error if the key doesn't exist, see ErrKeyNotExist. 615 func (m *Map) Lookup(key, valueOut interface{}) error { 616 return m.LookupWithFlags(key, valueOut, 0) 617 } 618 619 // LookupWithFlags retrieves a value from a Map with flags. 620 // 621 // Passing LookupLock flag will look up the value of a spin-locked 622 // map without returning the lock. This must be specified if the 623 // elements contain a spinlock. 624 // 625 // Calls Close() on valueOut if it is of type **Map or **Program, 626 // and *valueOut is not nil. 627 // 628 // Returns an error if the key doesn't exist, see ErrKeyNotExist. 629 func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { 630 if m.typ.hasPerCPUValue() { 631 return m.lookupPerCPU(key, valueOut, flags) 632 } 633 634 valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize) 635 if err := m.lookup(key, valueBytes.Pointer(), flags); err != nil { 636 return err 637 } 638 639 return m.unmarshalValue(valueOut, valueBytes) 640 } 641 642 // LookupAndDelete retrieves and deletes a value from a Map. 643 // 644 // Returns ErrKeyNotExist if the key doesn't exist. 645 func (m *Map) LookupAndDelete(key, valueOut interface{}) error { 646 return m.LookupAndDeleteWithFlags(key, valueOut, 0) 647 } 648 649 // LookupAndDeleteWithFlags retrieves and deletes a value from a Map. 650 // 651 // Passing LookupLock flag will look up and delete the value of a spin-locked 652 // map without returning the lock. This must be specified if the elements 653 // contain a spinlock. 654 // 655 // Returns ErrKeyNotExist if the key doesn't exist. 656 func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { 657 if m.typ.hasPerCPUValue() { 658 return m.lookupAndDeletePerCPU(key, valueOut, flags) 659 } 660 661 valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize) 662 if err := m.lookupAndDelete(key, valueBytes.Pointer(), flags); err != nil { 663 return err 664 } 665 return m.unmarshalValue(valueOut, valueBytes) 666 } 667 668 // LookupBytes gets a value from Map. 669 // 670 // Returns a nil value if a key doesn't exist. 671 func (m *Map) LookupBytes(key interface{}) ([]byte, error) { 672 valueBytes := make([]byte, m.fullValueSize) 673 valuePtr := sys.NewSlicePointer(valueBytes) 674 675 err := m.lookup(key, valuePtr, 0) 676 if errors.Is(err, ErrKeyNotExist) { 677 return nil, nil 678 } 679 680 return valueBytes, err 681 } 682 683 func (m *Map) lookupPerCPU(key, valueOut any, flags MapLookupFlags) error { 684 slice, err := ensurePerCPUSlice(valueOut) 685 if err != nil { 686 return err 687 } 688 valueBytes := make([]byte, m.fullValueSize) 689 if err := m.lookup(key, sys.NewSlicePointer(valueBytes), flags); err != nil { 690 return err 691 } 692 return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes) 693 } 694 695 func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error { 696 keyPtr, err := m.marshalKey(key) 697 if err != nil { 698 return fmt.Errorf("can't marshal key: %w", err) 699 } 700 701 attr := sys.MapLookupElemAttr{ 702 MapFd: m.fd.Uint(), 703 Key: keyPtr, 704 Value: valueOut, 705 Flags: uint64(flags), 706 } 707 708 if err = sys.MapLookupElem(&attr); err != nil { 709 if errors.Is(err, unix.ENOENT) { 710 return errMapLookupKeyNotExist 711 } 712 return fmt.Errorf("lookup: %w", wrapMapError(err)) 713 } 714 return nil 715 } 716 717 func (m *Map) lookupAndDeletePerCPU(key, valueOut any, flags MapLookupFlags) error { 718 slice, err := ensurePerCPUSlice(valueOut) 719 if err != nil { 720 return err 721 } 722 valueBytes := make([]byte, m.fullValueSize) 723 if err := m.lookupAndDelete(key, sys.NewSlicePointer(valueBytes), flags); err != nil { 724 return err 725 } 726 return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes) 727 } 728 729 // ensurePerCPUSlice allocates a slice for a per-CPU value if necessary. 730 func ensurePerCPUSlice(sliceOrPtr any) (any, error) { 731 sliceOrPtrType := reflect.TypeOf(sliceOrPtr) 732 if sliceOrPtrType.Kind() == reflect.Slice { 733 // The target is a slice, the caller is responsible for ensuring that 734 // size is correct. 735 return sliceOrPtr, nil 736 } 737 738 slicePtrType := sliceOrPtrType 739 if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice { 740 return nil, fmt.Errorf("per-cpu value requires a slice or a pointer to slice") 741 } 742 743 possibleCPUs, err := PossibleCPU() 744 if err != nil { 745 return nil, err 746 } 747 748 sliceType := slicePtrType.Elem() 749 slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) 750 751 sliceElemType := sliceType.Elem() 752 sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr 753 reflect.ValueOf(sliceOrPtr).Elem().Set(slice) 754 if !sliceElemIsPointer { 755 return slice.Interface(), nil 756 } 757 sliceElemType = sliceElemType.Elem() 758 759 for i := 0; i < possibleCPUs; i++ { 760 newElem := reflect.New(sliceElemType) 761 slice.Index(i).Set(newElem) 762 } 763 764 return slice.Interface(), nil 765 } 766 767 func (m *Map) lookupAndDelete(key any, valuePtr sys.Pointer, flags MapLookupFlags) error { 768 keyPtr, err := m.marshalKey(key) 769 if err != nil { 770 return fmt.Errorf("can't marshal key: %w", err) 771 } 772 773 attr := sys.MapLookupAndDeleteElemAttr{ 774 MapFd: m.fd.Uint(), 775 Key: keyPtr, 776 Value: valuePtr, 777 Flags: uint64(flags), 778 } 779 780 if err := sys.MapLookupAndDeleteElem(&attr); err != nil { 781 return fmt.Errorf("lookup and delete: %w", wrapMapError(err)) 782 } 783 784 return nil 785 } 786 787 // MapUpdateFlags controls the behaviour of the Map.Update call. 788 // 789 // The exact semantics depend on the specific MapType. 790 type MapUpdateFlags uint64 791 792 const ( 793 // UpdateAny creates a new element or update an existing one. 794 UpdateAny MapUpdateFlags = iota 795 // UpdateNoExist creates a new element. 796 UpdateNoExist MapUpdateFlags = 1 << (iota - 1) 797 // UpdateExist updates an existing element. 798 UpdateExist 799 // UpdateLock updates elements under bpf_spin_lock. 800 UpdateLock 801 ) 802 803 // Put replaces or creates a value in map. 804 // 805 // It is equivalent to calling Update with UpdateAny. 806 func (m *Map) Put(key, value interface{}) error { 807 return m.Update(key, value, UpdateAny) 808 } 809 810 // Update changes the value of a key. 811 func (m *Map) Update(key, value any, flags MapUpdateFlags) error { 812 if m.typ.hasPerCPUValue() { 813 return m.updatePerCPU(key, value, flags) 814 } 815 816 valuePtr, err := m.marshalValue(value) 817 if err != nil { 818 return fmt.Errorf("marshal value: %w", err) 819 } 820 821 return m.update(key, valuePtr, flags) 822 } 823 824 func (m *Map) updatePerCPU(key, value any, flags MapUpdateFlags) error { 825 valuePtr, err := marshalPerCPUValue(value, int(m.valueSize)) 826 if err != nil { 827 return fmt.Errorf("marshal value: %w", err) 828 } 829 830 return m.update(key, valuePtr, flags) 831 } 832 833 func (m *Map) update(key any, valuePtr sys.Pointer, flags MapUpdateFlags) error { 834 keyPtr, err := m.marshalKey(key) 835 if err != nil { 836 return fmt.Errorf("marshal key: %w", err) 837 } 838 839 attr := sys.MapUpdateElemAttr{ 840 MapFd: m.fd.Uint(), 841 Key: keyPtr, 842 Value: valuePtr, 843 Flags: uint64(flags), 844 } 845 846 if err = sys.MapUpdateElem(&attr); err != nil { 847 return fmt.Errorf("update: %w", wrapMapError(err)) 848 } 849 850 return nil 851 } 852 853 // Delete removes a value. 854 // 855 // Returns ErrKeyNotExist if the key does not exist. 856 func (m *Map) Delete(key interface{}) error { 857 keyPtr, err := m.marshalKey(key) 858 if err != nil { 859 return fmt.Errorf("can't marshal key: %w", err) 860 } 861 862 attr := sys.MapDeleteElemAttr{ 863 MapFd: m.fd.Uint(), 864 Key: keyPtr, 865 } 866 867 if err = sys.MapDeleteElem(&attr); err != nil { 868 return fmt.Errorf("delete: %w", wrapMapError(err)) 869 } 870 return nil 871 } 872 873 // NextKey finds the key following an initial key. 874 // 875 // See NextKeyBytes for details. 876 // 877 // Returns ErrKeyNotExist if there is no next key. 878 func (m *Map) NextKey(key, nextKeyOut interface{}) error { 879 nextKeyBytes := makeMapSyscallOutput(nextKeyOut, int(m.keySize)) 880 881 if err := m.nextKey(key, nextKeyBytes.Pointer()); err != nil { 882 return err 883 } 884 885 if err := nextKeyBytes.Unmarshal(nextKeyOut); err != nil { 886 return fmt.Errorf("can't unmarshal next key: %w", err) 887 } 888 return nil 889 } 890 891 // NextKeyBytes returns the key following an initial key as a byte slice. 892 // 893 // Passing nil will return the first key. 894 // 895 // Use Iterate if you want to traverse all entries in the map. 896 // 897 // Returns nil if there are no more keys. 898 func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) { 899 nextKey := make([]byte, m.keySize) 900 nextKeyPtr := sys.NewSlicePointer(nextKey) 901 902 err := m.nextKey(key, nextKeyPtr) 903 if errors.Is(err, ErrKeyNotExist) { 904 return nil, nil 905 } 906 907 return nextKey, err 908 } 909 910 func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error { 911 var ( 912 keyPtr sys.Pointer 913 err error 914 ) 915 916 if key != nil { 917 keyPtr, err = m.marshalKey(key) 918 if err != nil { 919 return fmt.Errorf("can't marshal key: %w", err) 920 } 921 } 922 923 attr := sys.MapGetNextKeyAttr{ 924 MapFd: m.fd.Uint(), 925 Key: keyPtr, 926 NextKey: nextKeyOut, 927 } 928 929 if err = sys.MapGetNextKey(&attr); err != nil { 930 // Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the 931 // first map element when a nil key pointer is specified. 932 if key == nil && errors.Is(err, unix.EFAULT) { 933 var guessKey []byte 934 guessKey, err = m.guessNonExistentKey() 935 if err != nil { 936 return err 937 } 938 939 // Retry the syscall with a valid non-existing key. 940 attr.Key = sys.NewSlicePointer(guessKey) 941 if err = sys.MapGetNextKey(&attr); err == nil { 942 return nil 943 } 944 } 945 946 return fmt.Errorf("next key: %w", wrapMapError(err)) 947 } 948 949 return nil 950 } 951 952 var mmapProtectedPage = sync.OnceValues(func() ([]byte, error) { 953 return unix.Mmap(-1, 0, os.Getpagesize(), unix.PROT_NONE, unix.MAP_ANON|unix.MAP_SHARED) 954 }) 955 956 // guessNonExistentKey attempts to perform a map lookup that returns ENOENT. 957 // This is necessary on kernels before 4.4.132, since those don't support 958 // iterating maps from the start by providing an invalid key pointer. 959 func (m *Map) guessNonExistentKey() ([]byte, error) { 960 // Map a protected page and use that as the value pointer. This saves some 961 // work copying out the value, which we're not interested in. 962 page, err := mmapProtectedPage() 963 if err != nil { 964 return nil, err 965 } 966 valuePtr := sys.NewSlicePointer(page) 967 968 randKey := make([]byte, int(m.keySize)) 969 970 for i := 0; i < 4; i++ { 971 switch i { 972 // For hash maps, the 0 key is less likely to be occupied. They're often 973 // used for storing data related to pointers, and their access pattern is 974 // generally scattered across the keyspace. 975 case 0: 976 // An all-0xff key is guaranteed to be out of bounds of any array, since 977 // those have a fixed key size of 4 bytes. The only corner case being 978 // arrays with 2^32 max entries, but those are prohibitively expensive 979 // in many environments. 980 case 1: 981 for r := range randKey { 982 randKey[r] = 0xff 983 } 984 // Inspired by BCC, 0x55 is an alternating binary pattern (0101), so 985 // is unlikely to be taken. 986 case 2: 987 for r := range randKey { 988 randKey[r] = 0x55 989 } 990 // Last ditch effort, generate a random key. 991 case 3: 992 rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey) 993 } 994 995 err := m.lookup(randKey, valuePtr, 0) 996 if errors.Is(err, ErrKeyNotExist) { 997 return randKey, nil 998 } 999 } 1000 1001 return nil, errors.New("couldn't find non-existing key") 1002 } 1003 1004 // BatchLookup looks up many elements in a map at once. 1005 // 1006 // "keysOut" and "valuesOut" must be of type slice, a pointer 1007 // to a slice or buffer will not work. 1008 // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass 1009 // "cursor" to subsequent calls of this function to continue the batching 1010 // operation in the case of chunking. 1011 // 1012 // Warning: This API is not very safe to use as the kernel implementation for 1013 // batching relies on the user to be aware of subtle details with regarding to 1014 // different map type implementations. 1015 // 1016 // ErrKeyNotExist is returned when the batch lookup has reached 1017 // the end of all possible results, even when partial results 1018 // are returned. It should be used to evaluate when lookup is "done". 1019 func (m *Map) BatchLookup(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 1020 n, err := m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, cursor, keysOut, valuesOut, opts) 1021 if err != nil { 1022 return n, fmt.Errorf("map batch lookup: %w", err) 1023 } 1024 return n, nil 1025 } 1026 1027 // BatchLookupAndDelete looks up many elements in a map at once, 1028 // 1029 // It then deletes all those elements. 1030 // "keysOut" and "valuesOut" must be of type slice, a pointer 1031 // to a slice or buffer will not work. 1032 // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass 1033 // "cursor" to subsequent calls of this function to continue the batching 1034 // operation in the case of chunking. 1035 // 1036 // Warning: This API is not very safe to use as the kernel implementation for 1037 // batching relies on the user to be aware of subtle details with regarding to 1038 // different map type implementations. 1039 // 1040 // ErrKeyNotExist is returned when the batch lookup has reached 1041 // the end of all possible results, even when partial results 1042 // are returned. It should be used to evaluate when lookup is "done". 1043 func (m *Map) BatchLookupAndDelete(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 1044 n, err := m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, cursor, keysOut, valuesOut, opts) 1045 if err != nil { 1046 return n, fmt.Errorf("map batch lookup and delete: %w", err) 1047 } 1048 return n, nil 1049 } 1050 1051 // MapBatchCursor represents a starting point for a batch operation. 1052 type MapBatchCursor struct { 1053 m *Map 1054 opaque []byte 1055 } 1056 1057 func (m *Map) batchLookup(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 1058 if m.typ.hasPerCPUValue() { 1059 return m.batchLookupPerCPU(cmd, cursor, keysOut, valuesOut, opts) 1060 } 1061 1062 count, err := batchCount(keysOut, valuesOut) 1063 if err != nil { 1064 return 0, err 1065 } 1066 1067 valueBuf := sysenc.SyscallOutput(valuesOut, count*int(m.fullValueSize)) 1068 1069 n, err := m.batchLookupCmd(cmd, cursor, count, keysOut, valueBuf.Pointer(), opts) 1070 if errors.Is(err, unix.ENOSPC) { 1071 // Hash tables return ENOSPC when the size of the batch is smaller than 1072 // any bucket. 1073 return n, fmt.Errorf("%w (batch size too small?)", err) 1074 } else if err != nil { 1075 return n, err 1076 } 1077 1078 err = valueBuf.Unmarshal(valuesOut) 1079 if err != nil { 1080 return 0, err 1081 } 1082 1083 return n, nil 1084 } 1085 1086 func (m *Map) batchLookupPerCPU(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 1087 count, err := sliceLen(keysOut) 1088 if err != nil { 1089 return 0, fmt.Errorf("keys: %w", err) 1090 } 1091 1092 valueBuf := make([]byte, count*int(m.fullValueSize)) 1093 valuePtr := sys.NewSlicePointer(valueBuf) 1094 1095 n, sysErr := m.batchLookupCmd(cmd, cursor, count, keysOut, valuePtr, opts) 1096 if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { 1097 return 0, err 1098 } 1099 1100 err = unmarshalBatchPerCPUValue(valuesOut, count, int(m.valueSize), valueBuf) 1101 if err != nil { 1102 return 0, err 1103 } 1104 1105 return n, sysErr 1106 } 1107 1108 func (m *Map) batchLookupCmd(cmd sys.Cmd, cursor *MapBatchCursor, count int, keysOut any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) { 1109 cursorLen := int(m.keySize) 1110 if cursorLen < 4 { 1111 // * generic_map_lookup_batch requires that batch_out is key_size bytes. 1112 // This is used by array and LPM maps. 1113 // 1114 // * __htab_map_lookup_and_delete_batch requires u32. This is used by the 1115 // various hash maps. 1116 // 1117 // Use a minimum of 4 bytes to avoid having to distinguish between the two. 1118 cursorLen = 4 1119 } 1120 1121 inBatch := cursor.opaque 1122 if inBatch == nil { 1123 // This is the first lookup, allocate a buffer to hold the cursor. 1124 cursor.opaque = make([]byte, cursorLen) 1125 cursor.m = m 1126 } else if cursor.m != m { 1127 // Prevent reuse of a cursor across maps. First, it's unlikely to work. 1128 // Second, the maps may require different cursorLen and cursor.opaque 1129 // may therefore be too short. This could lead to the kernel clobbering 1130 // user space memory. 1131 return 0, errors.New("a cursor may not be reused across maps") 1132 } 1133 1134 if err := haveBatchAPI(); err != nil { 1135 return 0, err 1136 } 1137 1138 keyBuf := sysenc.SyscallOutput(keysOut, count*int(m.keySize)) 1139 1140 attr := sys.MapLookupBatchAttr{ 1141 MapFd: m.fd.Uint(), 1142 Keys: keyBuf.Pointer(), 1143 Values: valuePtr, 1144 Count: uint32(count), 1145 InBatch: sys.NewSlicePointer(inBatch), 1146 OutBatch: sys.NewSlicePointer(cursor.opaque), 1147 } 1148 1149 if opts != nil { 1150 attr.ElemFlags = opts.ElemFlags 1151 attr.Flags = opts.Flags 1152 } 1153 1154 _, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) 1155 sysErr = wrapMapError(sysErr) 1156 if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { 1157 return 0, sysErr 1158 } 1159 1160 if err := keyBuf.Unmarshal(keysOut); err != nil { 1161 return 0, err 1162 } 1163 1164 return int(attr.Count), sysErr 1165 } 1166 1167 // BatchUpdate updates the map with multiple keys and values 1168 // simultaneously. 1169 // "keys" and "values" must be of type slice, a pointer 1170 // to a slice or buffer will not work. 1171 func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) { 1172 if m.typ.hasPerCPUValue() { 1173 return m.batchUpdatePerCPU(keys, values, opts) 1174 } 1175 1176 count, err := batchCount(keys, values) 1177 if err != nil { 1178 return 0, err 1179 } 1180 1181 valuePtr, err := marshalMapSyscallInput(values, count*int(m.valueSize)) 1182 if err != nil { 1183 return 0, err 1184 } 1185 1186 return m.batchUpdate(count, keys, valuePtr, opts) 1187 } 1188 1189 func (m *Map) batchUpdate(count int, keys any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) { 1190 keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize)) 1191 if err != nil { 1192 return 0, err 1193 } 1194 1195 attr := sys.MapUpdateBatchAttr{ 1196 MapFd: m.fd.Uint(), 1197 Keys: keyPtr, 1198 Values: valuePtr, 1199 Count: uint32(count), 1200 } 1201 if opts != nil { 1202 attr.ElemFlags = opts.ElemFlags 1203 attr.Flags = opts.Flags 1204 } 1205 1206 err = sys.MapUpdateBatch(&attr) 1207 if err != nil { 1208 if haveFeatErr := haveBatchAPI(); haveFeatErr != nil { 1209 return 0, haveFeatErr 1210 } 1211 return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err)) 1212 } 1213 1214 return int(attr.Count), nil 1215 } 1216 1217 func (m *Map) batchUpdatePerCPU(keys, values any, opts *BatchOptions) (int, error) { 1218 count, err := sliceLen(keys) 1219 if err != nil { 1220 return 0, fmt.Errorf("keys: %w", err) 1221 } 1222 1223 valueBuf, err := marshalBatchPerCPUValue(values, count, int(m.valueSize)) 1224 if err != nil { 1225 return 0, err 1226 } 1227 1228 return m.batchUpdate(count, keys, sys.NewSlicePointer(valueBuf), opts) 1229 } 1230 1231 // BatchDelete batch deletes entries in the map by keys. 1232 // "keys" must be of type slice, a pointer to a slice or buffer will not work. 1233 func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { 1234 count, err := sliceLen(keys) 1235 if err != nil { 1236 return 0, fmt.Errorf("keys: %w", err) 1237 } 1238 1239 keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize)) 1240 if err != nil { 1241 return 0, fmt.Errorf("cannot marshal keys: %v", err) 1242 } 1243 1244 attr := sys.MapDeleteBatchAttr{ 1245 MapFd: m.fd.Uint(), 1246 Keys: keyPtr, 1247 Count: uint32(count), 1248 } 1249 1250 if opts != nil { 1251 attr.ElemFlags = opts.ElemFlags 1252 attr.Flags = opts.Flags 1253 } 1254 1255 if err = sys.MapDeleteBatch(&attr); err != nil { 1256 if haveFeatErr := haveBatchAPI(); haveFeatErr != nil { 1257 return 0, haveFeatErr 1258 } 1259 return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err)) 1260 } 1261 1262 return int(attr.Count), nil 1263 } 1264 1265 func batchCount(keys, values any) (int, error) { 1266 keysLen, err := sliceLen(keys) 1267 if err != nil { 1268 return 0, fmt.Errorf("keys: %w", err) 1269 } 1270 1271 valuesLen, err := sliceLen(values) 1272 if err != nil { 1273 return 0, fmt.Errorf("values: %w", err) 1274 } 1275 1276 if keysLen != valuesLen { 1277 return 0, fmt.Errorf("keys and values must have the same length") 1278 } 1279 1280 return keysLen, nil 1281 } 1282 1283 // Iterate traverses a map. 1284 // 1285 // It's safe to create multiple iterators at the same time. 1286 // 1287 // It's not possible to guarantee that all keys in a map will be 1288 // returned if there are concurrent modifications to the map. 1289 func (m *Map) Iterate() *MapIterator { 1290 return newMapIterator(m) 1291 } 1292 1293 // Close the Map's underlying file descriptor, which could unload the 1294 // Map from the kernel if it is not pinned or in use by a loaded Program. 1295 func (m *Map) Close() error { 1296 if m == nil { 1297 // This makes it easier to clean up when iterating maps 1298 // of maps / programs. 1299 return nil 1300 } 1301 1302 return m.fd.Close() 1303 } 1304 1305 // FD gets the file descriptor of the Map. 1306 // 1307 // Calling this function is invalid after Close has been called. 1308 func (m *Map) FD() int { 1309 return m.fd.Int() 1310 } 1311 1312 // Clone creates a duplicate of the Map. 1313 // 1314 // Closing the duplicate does not affect the original, and vice versa. 1315 // Changes made to the map are reflected by both instances however. 1316 // If the original map was pinned, the cloned map will not be pinned by default. 1317 // 1318 // Cloning a nil Map returns nil. 1319 func (m *Map) Clone() (*Map, error) { 1320 if m == nil { 1321 return nil, nil 1322 } 1323 1324 dup, err := m.fd.Dup() 1325 if err != nil { 1326 return nil, fmt.Errorf("can't clone map: %w", err) 1327 } 1328 1329 return &Map{ 1330 m.name, 1331 dup, 1332 m.typ, 1333 m.keySize, 1334 m.valueSize, 1335 m.maxEntries, 1336 m.flags, 1337 "", 1338 m.fullValueSize, 1339 }, nil 1340 } 1341 1342 // Pin persists the map on the BPF virtual file system past the lifetime of 1343 // the process that created it . 1344 // 1345 // Calling Pin on a previously pinned map will overwrite the path, except when 1346 // the new path already exists. Re-pinning across filesystems is not supported. 1347 // You can Clone a map to pin it to a different path. 1348 // 1349 // This requires bpffs to be mounted above fileName. 1350 // See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd 1351 func (m *Map) Pin(fileName string) error { 1352 if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil { 1353 return err 1354 } 1355 m.pinnedPath = fileName 1356 return nil 1357 } 1358 1359 // Unpin removes the persisted state for the map from the BPF virtual filesystem. 1360 // 1361 // Failed calls to Unpin will not alter the state returned by IsPinned. 1362 // 1363 // Unpinning an unpinned Map returns nil. 1364 func (m *Map) Unpin() error { 1365 if err := internal.Unpin(m.pinnedPath); err != nil { 1366 return err 1367 } 1368 m.pinnedPath = "" 1369 return nil 1370 } 1371 1372 // IsPinned returns true if the map has a non-empty pinned path. 1373 func (m *Map) IsPinned() bool { 1374 return m.pinnedPath != "" 1375 } 1376 1377 // Freeze prevents a map to be modified from user space. 1378 // 1379 // It makes no changes to kernel-side restrictions. 1380 func (m *Map) Freeze() error { 1381 attr := sys.MapFreezeAttr{ 1382 MapFd: m.fd.Uint(), 1383 } 1384 1385 if err := sys.MapFreeze(&attr); err != nil { 1386 if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil { 1387 return fmt.Errorf("can't freeze map: %w", haveFeatErr) 1388 } 1389 return fmt.Errorf("can't freeze map: %w", err) 1390 } 1391 return nil 1392 } 1393 1394 // finalize populates the Map according to the Contents specified 1395 // in spec and freezes the Map if requested by spec. 1396 func (m *Map) finalize(spec *MapSpec) error { 1397 for _, kv := range spec.Contents { 1398 if err := m.Put(kv.Key, kv.Value); err != nil { 1399 return fmt.Errorf("putting value: key %v: %w", kv.Key, err) 1400 } 1401 } 1402 1403 if spec.Freeze { 1404 if err := m.Freeze(); err != nil { 1405 return fmt.Errorf("freezing map: %w", err) 1406 } 1407 } 1408 1409 return nil 1410 } 1411 1412 func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) { 1413 if data == nil { 1414 if m.keySize == 0 { 1415 // Queues have a key length of zero, so passing nil here is valid. 1416 return sys.NewPointer(nil), nil 1417 } 1418 return sys.Pointer{}, errors.New("can't use nil as key of map") 1419 } 1420 1421 return marshalMapSyscallInput(data, int(m.keySize)) 1422 } 1423 1424 func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { 1425 var ( 1426 buf []byte 1427 err error 1428 ) 1429 1430 switch value := data.(type) { 1431 case *Map: 1432 if !m.typ.canStoreMap() { 1433 return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ) 1434 } 1435 buf, err = marshalMap(value, int(m.valueSize)) 1436 1437 case *Program: 1438 if !m.typ.canStoreProgram() { 1439 return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ) 1440 } 1441 buf, err = marshalProgram(value, int(m.valueSize)) 1442 1443 default: 1444 return marshalMapSyscallInput(data, int(m.valueSize)) 1445 } 1446 1447 if err != nil { 1448 return sys.Pointer{}, err 1449 } 1450 1451 return sys.NewSlicePointer(buf), nil 1452 } 1453 1454 func (m *Map) unmarshalValue(value any, buf sysenc.Buffer) error { 1455 switch value := value.(type) { 1456 case **Map: 1457 if !m.typ.canStoreMap() { 1458 return fmt.Errorf("can't read a map from %s", m.typ) 1459 } 1460 1461 other, err := unmarshalMap(buf) 1462 if err != nil { 1463 return err 1464 } 1465 1466 // The caller might close the map externally, so ignore errors. 1467 _ = (*value).Close() 1468 1469 *value = other 1470 return nil 1471 1472 case *Map: 1473 if !m.typ.canStoreMap() { 1474 return fmt.Errorf("can't read a map from %s", m.typ) 1475 } 1476 return errors.New("require pointer to *Map") 1477 1478 case **Program: 1479 if !m.typ.canStoreProgram() { 1480 return fmt.Errorf("can't read a program from %s", m.typ) 1481 } 1482 1483 other, err := unmarshalProgram(buf) 1484 if err != nil { 1485 return err 1486 } 1487 1488 // The caller might close the program externally, so ignore errors. 1489 _ = (*value).Close() 1490 1491 *value = other 1492 return nil 1493 1494 case *Program: 1495 if !m.typ.canStoreProgram() { 1496 return fmt.Errorf("can't read a program from %s", m.typ) 1497 } 1498 return errors.New("require pointer to *Program") 1499 } 1500 1501 return buf.Unmarshal(value) 1502 } 1503 1504 // LoadPinnedMap loads a Map from a BPF file. 1505 func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) { 1506 fd, err := sys.ObjGet(&sys.ObjGetAttr{ 1507 Pathname: sys.NewStringPointer(fileName), 1508 FileFlags: opts.Marshal(), 1509 }) 1510 if err != nil { 1511 return nil, err 1512 } 1513 1514 m, err := newMapFromFD(fd) 1515 if err == nil { 1516 m.pinnedPath = fileName 1517 } 1518 1519 return m, err 1520 } 1521 1522 // unmarshalMap creates a map from a map ID encoded in host endianness. 1523 func unmarshalMap(buf sysenc.Buffer) (*Map, error) { 1524 var id uint32 1525 if err := buf.Unmarshal(&id); err != nil { 1526 return nil, err 1527 } 1528 return NewMapFromID(MapID(id)) 1529 } 1530 1531 // marshalMap marshals the fd of a map into a buffer in host endianness. 1532 func marshalMap(m *Map, length int) ([]byte, error) { 1533 if length != 4 { 1534 return nil, fmt.Errorf("can't marshal map to %d bytes", length) 1535 } 1536 1537 buf := make([]byte, 4) 1538 internal.NativeEndian.PutUint32(buf, m.fd.Uint()) 1539 return buf, nil 1540 } 1541 1542 // MapIterator iterates a Map. 1543 // 1544 // See Map.Iterate. 1545 type MapIterator struct { 1546 target *Map 1547 // Temporary storage to avoid allocations in Next(). This is any instead 1548 // of []byte to avoid allocations. 1549 cursor any 1550 count, maxEntries uint32 1551 done bool 1552 err error 1553 } 1554 1555 func newMapIterator(target *Map) *MapIterator { 1556 return &MapIterator{ 1557 target: target, 1558 maxEntries: target.maxEntries, 1559 } 1560 } 1561 1562 // Next decodes the next key and value. 1563 // 1564 // Iterating a hash map from which keys are being deleted is not 1565 // safe. You may see the same key multiple times. Iteration may 1566 // also abort with an error, see IsIterationAborted. 1567 // 1568 // Returns false if there are no more entries. You must check 1569 // the result of Err afterwards. 1570 // 1571 // See Map.Get for further caveats around valueOut. 1572 func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { 1573 if mi.err != nil || mi.done { 1574 return false 1575 } 1576 1577 // For array-like maps NextKey returns nil only after maxEntries 1578 // iterations. 1579 for mi.count <= mi.maxEntries { 1580 if mi.cursor == nil { 1581 // Pass nil interface to NextKey to make sure the Map's first key 1582 // is returned. If we pass an uninitialized []byte instead, it'll see a 1583 // non-nil interface and try to marshal it. 1584 mi.cursor = make([]byte, mi.target.keySize) 1585 mi.err = mi.target.NextKey(nil, mi.cursor) 1586 } else { 1587 mi.err = mi.target.NextKey(mi.cursor, mi.cursor) 1588 } 1589 1590 if errors.Is(mi.err, ErrKeyNotExist) { 1591 mi.done = true 1592 mi.err = nil 1593 return false 1594 } else if mi.err != nil { 1595 mi.err = fmt.Errorf("get next key: %w", mi.err) 1596 return false 1597 } 1598 1599 mi.count++ 1600 mi.err = mi.target.Lookup(mi.cursor, valueOut) 1601 if errors.Is(mi.err, ErrKeyNotExist) { 1602 // Even though the key should be valid, we couldn't look up 1603 // its value. If we're iterating a hash map this is probably 1604 // because a concurrent delete removed the value before we 1605 // could get it. This means that the next call to NextKeyBytes 1606 // is very likely to restart iteration. 1607 // If we're iterating one of the fd maps like 1608 // ProgramArray it means that a given slot doesn't have 1609 // a valid fd associated. It's OK to continue to the next slot. 1610 continue 1611 } 1612 if mi.err != nil { 1613 mi.err = fmt.Errorf("look up next key: %w", mi.err) 1614 return false 1615 } 1616 1617 buf := mi.cursor.([]byte) 1618 if ptr, ok := keyOut.(unsafe.Pointer); ok { 1619 copy(unsafe.Slice((*byte)(ptr), len(buf)), buf) 1620 } else { 1621 mi.err = sysenc.Unmarshal(keyOut, buf) 1622 } 1623 1624 return mi.err == nil 1625 } 1626 1627 mi.err = fmt.Errorf("%w", ErrIterationAborted) 1628 return false 1629 } 1630 1631 // Err returns any encountered error. 1632 // 1633 // The method must be called after Next returns nil. 1634 // 1635 // Returns ErrIterationAborted if it wasn't possible to do a full iteration. 1636 func (mi *MapIterator) Err() error { 1637 return mi.err 1638 } 1639 1640 // MapGetNextID returns the ID of the next eBPF map. 1641 // 1642 // Returns ErrNotExist, if there is no next eBPF map. 1643 func MapGetNextID(startID MapID) (MapID, error) { 1644 attr := &sys.MapGetNextIdAttr{Id: uint32(startID)} 1645 return MapID(attr.NextId), sys.MapGetNextId(attr) 1646 } 1647 1648 // NewMapFromID returns the map for a given id. 1649 // 1650 // Returns ErrNotExist, if there is no eBPF map with the given id. 1651 func NewMapFromID(id MapID) (*Map, error) { 1652 fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{ 1653 Id: uint32(id), 1654 }) 1655 if err != nil { 1656 return nil, err 1657 } 1658 1659 return newMapFromFD(fd) 1660 } 1661 1662 // sliceLen returns the length if the value is a slice or an error otherwise. 1663 func sliceLen(slice any) (int, error) { 1664 sliceValue := reflect.ValueOf(slice) 1665 if sliceValue.Kind() != reflect.Slice { 1666 return 0, fmt.Errorf("%T is not a slice", slice) 1667 } 1668 return sliceValue.Len(), nil 1669 }