github.com/cilium/ebpf@v0.15.1-0.20240517100537-8079b37aa138/map.go (about) 1 package ebpf 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "math/rand" 9 "os" 10 "path/filepath" 11 "reflect" 12 "strings" 13 "sync" 14 "time" 15 "unsafe" 16 17 "github.com/cilium/ebpf/btf" 18 "github.com/cilium/ebpf/internal" 19 "github.com/cilium/ebpf/internal/sys" 20 "github.com/cilium/ebpf/internal/sysenc" 21 "github.com/cilium/ebpf/internal/unix" 22 ) 23 24 // Errors returned by Map and MapIterator methods. 25 var ( 26 ErrKeyNotExist = errors.New("key does not exist") 27 ErrKeyExist = errors.New("key already exists") 28 ErrIterationAborted = errors.New("iteration aborted") 29 ErrMapIncompatible = errors.New("map spec is incompatible with existing map") 30 errMapNoBTFValue = errors.New("map spec does not contain a BTF Value") 31 ) 32 33 // MapOptions control loading a map into the kernel. 34 type MapOptions struct { 35 // The base path to pin maps in if requested via PinByName. 36 // Existing maps will be re-used if they are compatible, otherwise an 37 // error is returned. 38 PinPath string 39 LoadPinOptions LoadPinOptions 40 } 41 42 // MapID represents the unique ID of an eBPF map 43 type MapID uint32 44 45 // MapSpec defines a Map. 46 type MapSpec struct { 47 // Name is passed to the kernel as a debug aid. Must only contain 48 // alpha numeric and '_' characters. 49 Name string 50 Type MapType 51 KeySize uint32 52 ValueSize uint32 53 MaxEntries uint32 54 55 // Flags is passed to the kernel and specifies additional map 56 // creation attributes. 57 Flags uint32 58 59 // Automatically pin and load a map from MapOptions.PinPath. 60 // Generates an error if an existing pinned map is incompatible with the MapSpec. 61 Pinning PinType 62 63 // Specify numa node during map creation 64 // (effective only if unix.BPF_F_NUMA_NODE flag is set, 65 // which can be imported from golang.org/x/sys/unix) 66 NumaNode uint32 67 68 // The initial contents of the map. May be nil. 69 Contents []MapKV 70 71 // Whether to freeze a map after setting its initial contents. 72 Freeze bool 73 74 // InnerMap is used as a template for ArrayOfMaps and HashOfMaps 75 InnerMap *MapSpec 76 77 // Extra trailing bytes found in the ELF map definition when using structs 78 // larger than libbpf's bpf_map_def. nil if no trailing bytes were present. 79 // Must be nil or empty before instantiating the MapSpec into a Map. 80 Extra *bytes.Reader 81 82 // The key and value type of this map. May be nil. 83 Key, Value btf.Type 84 } 85 86 func (ms *MapSpec) String() string { 87 return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags) 88 } 89 90 // Copy returns a copy of the spec. 91 // 92 // MapSpec.Contents is a shallow copy. 93 func (ms *MapSpec) Copy() *MapSpec { 94 if ms == nil { 95 return nil 96 } 97 98 cpy := *ms 99 100 cpy.Contents = make([]MapKV, len(ms.Contents)) 101 copy(cpy.Contents, ms.Contents) 102 103 cpy.InnerMap = ms.InnerMap.Copy() 104 105 return &cpy 106 } 107 108 // fixupMagicFields fills fields of MapSpec which are usually 109 // left empty in ELF or which depend on runtime information. 110 // 111 // The method doesn't modify Spec, instead returning a copy. 112 // The copy is only performed if fixups are necessary, so callers mustn't mutate 113 // the returned spec. 114 func (spec *MapSpec) fixupMagicFields() (*MapSpec, error) { 115 switch spec.Type { 116 case ArrayOfMaps, HashOfMaps: 117 if spec.ValueSize != 0 && spec.ValueSize != 4 { 118 return nil, errors.New("ValueSize must be zero or four for map of map") 119 } 120 121 spec = spec.Copy() 122 spec.ValueSize = 4 123 124 case PerfEventArray: 125 if spec.KeySize != 0 && spec.KeySize != 4 { 126 return nil, errors.New("KeySize must be zero or four for perf event array") 127 } 128 129 if spec.ValueSize != 0 && spec.ValueSize != 4 { 130 return nil, errors.New("ValueSize must be zero or four for perf event array") 131 } 132 133 spec = spec.Copy() 134 spec.KeySize = 4 135 spec.ValueSize = 4 136 137 n, err := PossibleCPU() 138 if err != nil { 139 return nil, fmt.Errorf("fixup perf event array: %w", err) 140 } 141 142 if n := uint32(n); spec.MaxEntries == 0 || spec.MaxEntries > n { 143 // MaxEntries should be zero most of the time, but there is code 144 // out there which hardcodes large constants. Clamp the number 145 // of entries to the number of CPUs at most. Allow creating maps with 146 // less than n items since some kernel selftests relied on this 147 // behaviour in the past. 148 spec.MaxEntries = n 149 } 150 } 151 152 return spec, nil 153 } 154 155 // dataSection returns the contents and BTF Datasec descriptor of the spec. 156 func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) { 157 if ms.Value == nil { 158 return nil, nil, errMapNoBTFValue 159 } 160 161 ds, ok := ms.Value.(*btf.Datasec) 162 if !ok { 163 return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value) 164 } 165 166 if n := len(ms.Contents); n != 1 { 167 return nil, nil, fmt.Errorf("expected one key, found %d", n) 168 } 169 170 kv := ms.Contents[0] 171 value, ok := kv.Value.([]byte) 172 if !ok { 173 return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value) 174 } 175 176 return value, ds, nil 177 } 178 179 // MapKV is used to initialize the contents of a Map. 180 type MapKV struct { 181 Key interface{} 182 Value interface{} 183 } 184 185 // Compatible returns nil if an existing map may be used instead of creating 186 // one from the spec. 187 // 188 // Returns an error wrapping [ErrMapIncompatible] otherwise. 189 func (ms *MapSpec) Compatible(m *Map) error { 190 ms, err := ms.fixupMagicFields() 191 if err != nil { 192 return err 193 } 194 195 diffs := []string{} 196 if m.typ != ms.Type { 197 diffs = append(diffs, fmt.Sprintf("Type: %s changed to %s", m.typ, ms.Type)) 198 } 199 if m.keySize != ms.KeySize { 200 diffs = append(diffs, fmt.Sprintf("KeySize: %d changed to %d", m.keySize, ms.KeySize)) 201 } 202 if m.valueSize != ms.ValueSize { 203 diffs = append(diffs, fmt.Sprintf("ValueSize: %d changed to %d", m.valueSize, ms.ValueSize)) 204 } 205 if m.maxEntries != ms.MaxEntries { 206 diffs = append(diffs, fmt.Sprintf("MaxEntries: %d changed to %d", m.maxEntries, ms.MaxEntries)) 207 } 208 209 // BPF_F_RDONLY_PROG is set unconditionally for devmaps. Explicitly allow this 210 // mismatch. 211 if !((ms.Type == DevMap || ms.Type == DevMapHash) && m.flags^ms.Flags == unix.BPF_F_RDONLY_PROG) && 212 m.flags != ms.Flags { 213 diffs = append(diffs, fmt.Sprintf("Flags: %d changed to %d", m.flags, ms.Flags)) 214 } 215 216 if len(diffs) == 0 { 217 return nil 218 } 219 220 return fmt.Errorf("%s: %w", strings.Join(diffs, ", "), ErrMapIncompatible) 221 } 222 223 // Map represents a Map file descriptor. 224 // 225 // It is not safe to close a map which is used by other goroutines. 226 // 227 // Methods which take interface{} arguments by default encode 228 // them using binary.Read/Write in the machine's native endianness. 229 // 230 // Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler 231 // if you require custom encoding. 232 type Map struct { 233 name string 234 fd *sys.FD 235 typ MapType 236 keySize uint32 237 valueSize uint32 238 maxEntries uint32 239 flags uint32 240 pinnedPath string 241 // Per CPU maps return values larger than the size in the spec 242 fullValueSize int 243 } 244 245 // NewMapFromFD creates a map from a raw fd. 246 // 247 // You should not use fd after calling this function. 248 func NewMapFromFD(fd int) (*Map, error) { 249 f, err := sys.NewFD(fd) 250 if err != nil { 251 return nil, err 252 } 253 254 return newMapFromFD(f) 255 } 256 257 func newMapFromFD(fd *sys.FD) (*Map, error) { 258 info, err := newMapInfoFromFd(fd) 259 if err != nil { 260 fd.Close() 261 return nil, fmt.Errorf("get map info: %w", err) 262 } 263 264 return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags) 265 } 266 267 // NewMap creates a new Map. 268 // 269 // It's equivalent to calling NewMapWithOptions with default options. 270 func NewMap(spec *MapSpec) (*Map, error) { 271 return NewMapWithOptions(spec, MapOptions{}) 272 } 273 274 // NewMapWithOptions creates a new Map. 275 // 276 // Creating a map for the first time will perform feature detection 277 // by creating small, temporary maps. 278 // 279 // The caller is responsible for ensuring the process' rlimit is set 280 // sufficiently high for locking memory during map creation. This can be done 281 // by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions. 282 // 283 // May return an error wrapping ErrMapIncompatible. 284 func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { 285 m, err := newMapWithOptions(spec, opts) 286 if err != nil { 287 return nil, fmt.Errorf("creating map: %w", err) 288 } 289 290 if err := m.finalize(spec); err != nil { 291 m.Close() 292 return nil, fmt.Errorf("populating map: %w", err) 293 } 294 295 return m, nil 296 } 297 298 func newMapWithOptions(spec *MapSpec, opts MapOptions) (_ *Map, err error) { 299 closeOnError := func(c io.Closer) { 300 if err != nil { 301 c.Close() 302 } 303 } 304 305 switch spec.Pinning { 306 case PinByName: 307 if spec.Name == "" { 308 return nil, fmt.Errorf("pin by name: missing Name") 309 } 310 311 if opts.PinPath == "" { 312 return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath") 313 } 314 315 path := filepath.Join(opts.PinPath, spec.Name) 316 m, err := LoadPinnedMap(path, &opts.LoadPinOptions) 317 if errors.Is(err, unix.ENOENT) { 318 break 319 } 320 if err != nil { 321 return nil, fmt.Errorf("load pinned map: %w", err) 322 } 323 defer closeOnError(m) 324 325 if err := spec.Compatible(m); err != nil { 326 return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) 327 } 328 329 return m, nil 330 331 case PinNone: 332 // Nothing to do here 333 334 default: 335 return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported) 336 } 337 338 var innerFd *sys.FD 339 if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps { 340 if spec.InnerMap == nil { 341 return nil, fmt.Errorf("%s requires InnerMap", spec.Type) 342 } 343 344 if spec.InnerMap.Pinning != PinNone { 345 return nil, errors.New("inner maps cannot be pinned") 346 } 347 348 template, err := spec.InnerMap.createMap(nil, opts) 349 if err != nil { 350 return nil, fmt.Errorf("inner map: %w", err) 351 } 352 defer template.Close() 353 354 // Intentionally skip populating and freezing (finalizing) 355 // the inner map template since it will be removed shortly. 356 357 innerFd = template.fd 358 } 359 360 m, err := spec.createMap(innerFd, opts) 361 if err != nil { 362 return nil, err 363 } 364 defer closeOnError(m) 365 366 if spec.Pinning == PinByName { 367 path := filepath.Join(opts.PinPath, spec.Name) 368 if err := m.Pin(path); err != nil { 369 return nil, fmt.Errorf("pin map to %s: %w", path, err) 370 } 371 } 372 373 return m, nil 374 } 375 376 // createMap validates the spec's properties and creates the map in the kernel 377 // using the given opts. It does not populate or freeze the map. 378 func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err error) { 379 closeOnError := func(closer io.Closer) { 380 if err != nil { 381 closer.Close() 382 } 383 } 384 385 // Kernels 4.13 through 5.4 used a struct bpf_map_def that contained 386 // additional 'inner_map_idx' and later 'numa_node' fields. 387 // In order to support loading these definitions, tolerate the presence of 388 // extra bytes, but require them to be zeroes. 389 if spec.Extra != nil { 390 if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil { 391 return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map") 392 } 393 } 394 395 spec, err = spec.fixupMagicFields() 396 if err != nil { 397 return nil, err 398 } 399 400 attr := sys.MapCreateAttr{ 401 MapType: sys.MapType(spec.Type), 402 KeySize: spec.KeySize, 403 ValueSize: spec.ValueSize, 404 MaxEntries: spec.MaxEntries, 405 MapFlags: sys.MapFlags(spec.Flags), 406 NumaNode: spec.NumaNode, 407 } 408 409 if inner != nil { 410 attr.InnerMapFd = inner.Uint() 411 } 412 413 if haveObjName() == nil { 414 attr.MapName = sys.NewObjName(spec.Name) 415 } 416 417 if spec.Key != nil || spec.Value != nil { 418 handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value) 419 if err != nil && !errors.Is(err, btf.ErrNotSupported) { 420 return nil, fmt.Errorf("load BTF: %w", err) 421 } 422 423 if handle != nil { 424 defer handle.Close() 425 426 // Use BTF k/v during map creation. 427 attr.BtfFd = uint32(handle.FD()) 428 attr.BtfKeyTypeId = keyTypeID 429 attr.BtfValueTypeId = valueTypeID 430 } 431 } 432 433 fd, err := sys.MapCreate(&attr) 434 435 // Some map types don't support BTF k/v in earlier kernel versions. 436 // Remove BTF metadata and retry map creation. 437 if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 { 438 attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0 439 fd, err = sys.MapCreate(&attr) 440 } 441 if err != nil { 442 return nil, handleMapCreateError(attr, spec, err) 443 } 444 445 defer closeOnError(fd) 446 m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags) 447 if err != nil { 448 return nil, fmt.Errorf("map create: %w", err) 449 } 450 return m, nil 451 } 452 453 func handleMapCreateError(attr sys.MapCreateAttr, spec *MapSpec, err error) error { 454 if errors.Is(err, unix.EPERM) { 455 return fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) 456 } 457 if errors.Is(err, unix.EINVAL) && spec.MaxEntries == 0 { 458 return fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err) 459 } 460 if errors.Is(err, unix.EINVAL) && spec.Type == UnspecifiedMap { 461 return fmt.Errorf("map create: cannot use type %s", UnspecifiedMap) 462 } 463 if errors.Is(err, unix.EINVAL) && spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { 464 return fmt.Errorf("map create: %w (noPrealloc flag may be incompatible with map type %s)", err, spec.Type) 465 } 466 467 switch spec.Type { 468 case ArrayOfMaps, HashOfMaps: 469 if haveFeatErr := haveNestedMaps(); haveFeatErr != nil { 470 return fmt.Errorf("map create: %w", haveFeatErr) 471 } 472 } 473 if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze { 474 if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil { 475 return fmt.Errorf("map create: %w", haveFeatErr) 476 } 477 } 478 if spec.Flags&unix.BPF_F_MMAPABLE > 0 { 479 if haveFeatErr := haveMmapableMaps(); haveFeatErr != nil { 480 return fmt.Errorf("map create: %w", haveFeatErr) 481 } 482 } 483 if spec.Flags&unix.BPF_F_INNER_MAP > 0 { 484 if haveFeatErr := haveInnerMaps(); haveFeatErr != nil { 485 return fmt.Errorf("map create: %w", haveFeatErr) 486 } 487 } 488 if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { 489 if haveFeatErr := haveNoPreallocMaps(); haveFeatErr != nil { 490 return fmt.Errorf("map create: %w", haveFeatErr) 491 } 492 } 493 // BPF_MAP_TYPE_RINGBUF's max_entries must be a power-of-2 multiple of kernel's page size. 494 if errors.Is(err, unix.EINVAL) && 495 (attr.MapType == sys.BPF_MAP_TYPE_RINGBUF || attr.MapType == sys.BPF_MAP_TYPE_USER_RINGBUF) { 496 pageSize := uint32(os.Getpagesize()) 497 maxEntries := attr.MaxEntries 498 if maxEntries%pageSize != 0 || !internal.IsPow(maxEntries) { 499 return fmt.Errorf("map create: %w (ring map size %d not a multiple of page size %d)", err, maxEntries, pageSize) 500 } 501 } 502 if attr.BtfFd == 0 { 503 return fmt.Errorf("map create: %w (without BTF k/v)", err) 504 } 505 506 return fmt.Errorf("map create: %w", err) 507 } 508 509 // newMap allocates and returns a new Map structure. 510 // Sets the fullValueSize on per-CPU maps. 511 func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) { 512 m := &Map{ 513 name, 514 fd, 515 typ, 516 keySize, 517 valueSize, 518 maxEntries, 519 flags, 520 "", 521 int(valueSize), 522 } 523 524 if !typ.hasPerCPUValue() { 525 return m, nil 526 } 527 528 possibleCPUs, err := PossibleCPU() 529 if err != nil { 530 return nil, err 531 } 532 533 m.fullValueSize = int(internal.Align(valueSize, 8)) * possibleCPUs 534 return m, nil 535 } 536 537 func (m *Map) String() string { 538 if m.name != "" { 539 return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd) 540 } 541 return fmt.Sprintf("%s#%v", m.typ, m.fd) 542 } 543 544 // Type returns the underlying type of the map. 545 func (m *Map) Type() MapType { 546 return m.typ 547 } 548 549 // KeySize returns the size of the map key in bytes. 550 func (m *Map) KeySize() uint32 { 551 return m.keySize 552 } 553 554 // ValueSize returns the size of the map value in bytes. 555 func (m *Map) ValueSize() uint32 { 556 return m.valueSize 557 } 558 559 // MaxEntries returns the maximum number of elements the map can hold. 560 func (m *Map) MaxEntries() uint32 { 561 return m.maxEntries 562 } 563 564 // Flags returns the flags of the map. 565 func (m *Map) Flags() uint32 { 566 return m.flags 567 } 568 569 // Info returns metadata about the map. 570 func (m *Map) Info() (*MapInfo, error) { 571 return newMapInfoFromFd(m.fd) 572 } 573 574 // MapLookupFlags controls the behaviour of the map lookup calls. 575 type MapLookupFlags uint64 576 577 // LookupLock look up the value of a spin-locked map. 578 const LookupLock MapLookupFlags = unix.BPF_F_LOCK 579 580 // Lookup retrieves a value from a Map. 581 // 582 // Calls Close() on valueOut if it is of type **Map or **Program, 583 // and *valueOut is not nil. 584 // 585 // Returns an error if the key doesn't exist, see ErrKeyNotExist. 586 func (m *Map) Lookup(key, valueOut interface{}) error { 587 return m.LookupWithFlags(key, valueOut, 0) 588 } 589 590 // LookupWithFlags retrieves a value from a Map with flags. 591 // 592 // Passing LookupLock flag will look up the value of a spin-locked 593 // map without returning the lock. This must be specified if the 594 // elements contain a spinlock. 595 // 596 // Calls Close() on valueOut if it is of type **Map or **Program, 597 // and *valueOut is not nil. 598 // 599 // Returns an error if the key doesn't exist, see ErrKeyNotExist. 600 func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { 601 if m.typ.hasPerCPUValue() { 602 return m.lookupPerCPU(key, valueOut, flags) 603 } 604 605 valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize) 606 if err := m.lookup(key, valueBytes.Pointer(), flags); err != nil { 607 return err 608 } 609 610 return m.unmarshalValue(valueOut, valueBytes) 611 } 612 613 // LookupAndDelete retrieves and deletes a value from a Map. 614 // 615 // Returns ErrKeyNotExist if the key doesn't exist. 616 func (m *Map) LookupAndDelete(key, valueOut interface{}) error { 617 return m.LookupAndDeleteWithFlags(key, valueOut, 0) 618 } 619 620 // LookupAndDeleteWithFlags retrieves and deletes a value from a Map. 621 // 622 // Passing LookupLock flag will look up and delete the value of a spin-locked 623 // map without returning the lock. This must be specified if the elements 624 // contain a spinlock. 625 // 626 // Returns ErrKeyNotExist if the key doesn't exist. 627 func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { 628 if m.typ.hasPerCPUValue() { 629 return m.lookupAndDeletePerCPU(key, valueOut, flags) 630 } 631 632 valueBytes := makeMapSyscallOutput(valueOut, m.fullValueSize) 633 if err := m.lookupAndDelete(key, valueBytes.Pointer(), flags); err != nil { 634 return err 635 } 636 return m.unmarshalValue(valueOut, valueBytes) 637 } 638 639 // LookupBytes gets a value from Map. 640 // 641 // Returns a nil value if a key doesn't exist. 642 func (m *Map) LookupBytes(key interface{}) ([]byte, error) { 643 valueBytes := make([]byte, m.fullValueSize) 644 valuePtr := sys.NewSlicePointer(valueBytes) 645 646 err := m.lookup(key, valuePtr, 0) 647 if errors.Is(err, ErrKeyNotExist) { 648 return nil, nil 649 } 650 651 return valueBytes, err 652 } 653 654 func (m *Map) lookupPerCPU(key, valueOut any, flags MapLookupFlags) error { 655 slice, err := ensurePerCPUSlice(valueOut, int(m.valueSize)) 656 if err != nil { 657 return err 658 } 659 valueBytes := make([]byte, m.fullValueSize) 660 if err := m.lookup(key, sys.NewSlicePointer(valueBytes), flags); err != nil { 661 return err 662 } 663 return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes) 664 } 665 666 func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error { 667 keyPtr, err := m.marshalKey(key) 668 if err != nil { 669 return fmt.Errorf("can't marshal key: %w", err) 670 } 671 672 attr := sys.MapLookupElemAttr{ 673 MapFd: m.fd.Uint(), 674 Key: keyPtr, 675 Value: valueOut, 676 Flags: uint64(flags), 677 } 678 679 if err = sys.MapLookupElem(&attr); err != nil { 680 return fmt.Errorf("lookup: %w", wrapMapError(err)) 681 } 682 return nil 683 } 684 685 func (m *Map) lookupAndDeletePerCPU(key, valueOut any, flags MapLookupFlags) error { 686 slice, err := ensurePerCPUSlice(valueOut, int(m.valueSize)) 687 if err != nil { 688 return err 689 } 690 valueBytes := make([]byte, m.fullValueSize) 691 if err := m.lookupAndDelete(key, sys.NewSlicePointer(valueBytes), flags); err != nil { 692 return err 693 } 694 return unmarshalPerCPUValue(slice, int(m.valueSize), valueBytes) 695 } 696 697 // ensurePerCPUSlice allocates a slice for a per-CPU value if necessary. 698 func ensurePerCPUSlice(sliceOrPtr any, elemLength int) (any, error) { 699 sliceOrPtrType := reflect.TypeOf(sliceOrPtr) 700 if sliceOrPtrType.Kind() == reflect.Slice { 701 // The target is a slice, the caller is responsible for ensuring that 702 // size is correct. 703 return sliceOrPtr, nil 704 } 705 706 slicePtrType := sliceOrPtrType 707 if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice { 708 return nil, fmt.Errorf("per-cpu value requires a slice or a pointer to slice") 709 } 710 711 possibleCPUs, err := PossibleCPU() 712 if err != nil { 713 return nil, err 714 } 715 716 sliceType := slicePtrType.Elem() 717 slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) 718 719 sliceElemType := sliceType.Elem() 720 sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr 721 reflect.ValueOf(sliceOrPtr).Elem().Set(slice) 722 if !sliceElemIsPointer { 723 return slice.Interface(), nil 724 } 725 sliceElemType = sliceElemType.Elem() 726 727 for i := 0; i < possibleCPUs; i++ { 728 newElem := reflect.New(sliceElemType) 729 slice.Index(i).Set(newElem) 730 } 731 732 return slice.Interface(), nil 733 } 734 735 func (m *Map) lookupAndDelete(key any, valuePtr sys.Pointer, flags MapLookupFlags) error { 736 keyPtr, err := m.marshalKey(key) 737 if err != nil { 738 return fmt.Errorf("can't marshal key: %w", err) 739 } 740 741 attr := sys.MapLookupAndDeleteElemAttr{ 742 MapFd: m.fd.Uint(), 743 Key: keyPtr, 744 Value: valuePtr, 745 Flags: uint64(flags), 746 } 747 748 if err := sys.MapLookupAndDeleteElem(&attr); err != nil { 749 return fmt.Errorf("lookup and delete: %w", wrapMapError(err)) 750 } 751 752 return nil 753 } 754 755 // MapUpdateFlags controls the behaviour of the Map.Update call. 756 // 757 // The exact semantics depend on the specific MapType. 758 type MapUpdateFlags uint64 759 760 const ( 761 // UpdateAny creates a new element or update an existing one. 762 UpdateAny MapUpdateFlags = iota 763 // UpdateNoExist creates a new element. 764 UpdateNoExist MapUpdateFlags = 1 << (iota - 1) 765 // UpdateExist updates an existing element. 766 UpdateExist 767 // UpdateLock updates elements under bpf_spin_lock. 768 UpdateLock 769 ) 770 771 // Put replaces or creates a value in map. 772 // 773 // It is equivalent to calling Update with UpdateAny. 774 func (m *Map) Put(key, value interface{}) error { 775 return m.Update(key, value, UpdateAny) 776 } 777 778 // Update changes the value of a key. 779 func (m *Map) Update(key, value any, flags MapUpdateFlags) error { 780 if m.typ.hasPerCPUValue() { 781 return m.updatePerCPU(key, value, flags) 782 } 783 784 valuePtr, err := m.marshalValue(value) 785 if err != nil { 786 return fmt.Errorf("marshal value: %w", err) 787 } 788 789 return m.update(key, valuePtr, flags) 790 } 791 792 func (m *Map) updatePerCPU(key, value any, flags MapUpdateFlags) error { 793 valuePtr, err := marshalPerCPUValue(value, int(m.valueSize)) 794 if err != nil { 795 return fmt.Errorf("marshal value: %w", err) 796 } 797 798 return m.update(key, valuePtr, flags) 799 } 800 801 func (m *Map) update(key any, valuePtr sys.Pointer, flags MapUpdateFlags) error { 802 keyPtr, err := m.marshalKey(key) 803 if err != nil { 804 return fmt.Errorf("marshal key: %w", err) 805 } 806 807 attr := sys.MapUpdateElemAttr{ 808 MapFd: m.fd.Uint(), 809 Key: keyPtr, 810 Value: valuePtr, 811 Flags: uint64(flags), 812 } 813 814 if err = sys.MapUpdateElem(&attr); err != nil { 815 return fmt.Errorf("update: %w", wrapMapError(err)) 816 } 817 818 return nil 819 } 820 821 // Delete removes a value. 822 // 823 // Returns ErrKeyNotExist if the key does not exist. 824 func (m *Map) Delete(key interface{}) error { 825 keyPtr, err := m.marshalKey(key) 826 if err != nil { 827 return fmt.Errorf("can't marshal key: %w", err) 828 } 829 830 attr := sys.MapDeleteElemAttr{ 831 MapFd: m.fd.Uint(), 832 Key: keyPtr, 833 } 834 835 if err = sys.MapDeleteElem(&attr); err != nil { 836 return fmt.Errorf("delete: %w", wrapMapError(err)) 837 } 838 return nil 839 } 840 841 // NextKey finds the key following an initial key. 842 // 843 // See NextKeyBytes for details. 844 // 845 // Returns ErrKeyNotExist if there is no next key. 846 func (m *Map) NextKey(key, nextKeyOut interface{}) error { 847 nextKeyBytes := makeMapSyscallOutput(nextKeyOut, int(m.keySize)) 848 849 if err := m.nextKey(key, nextKeyBytes.Pointer()); err != nil { 850 return err 851 } 852 853 if err := nextKeyBytes.Unmarshal(nextKeyOut); err != nil { 854 return fmt.Errorf("can't unmarshal next key: %w", err) 855 } 856 return nil 857 } 858 859 // NextKeyBytes returns the key following an initial key as a byte slice. 860 // 861 // Passing nil will return the first key. 862 // 863 // Use Iterate if you want to traverse all entries in the map. 864 // 865 // Returns nil if there are no more keys. 866 func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) { 867 nextKey := make([]byte, m.keySize) 868 nextKeyPtr := sys.NewSlicePointer(nextKey) 869 870 err := m.nextKey(key, nextKeyPtr) 871 if errors.Is(err, ErrKeyNotExist) { 872 return nil, nil 873 } 874 875 return nextKey, err 876 } 877 878 func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error { 879 var ( 880 keyPtr sys.Pointer 881 err error 882 ) 883 884 if key != nil { 885 keyPtr, err = m.marshalKey(key) 886 if err != nil { 887 return fmt.Errorf("can't marshal key: %w", err) 888 } 889 } 890 891 attr := sys.MapGetNextKeyAttr{ 892 MapFd: m.fd.Uint(), 893 Key: keyPtr, 894 NextKey: nextKeyOut, 895 } 896 897 if err = sys.MapGetNextKey(&attr); err != nil { 898 // Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the 899 // first map element when a nil key pointer is specified. 900 if key == nil && errors.Is(err, unix.EFAULT) { 901 var guessKey []byte 902 guessKey, err = m.guessNonExistentKey() 903 if err != nil { 904 return err 905 } 906 907 // Retry the syscall with a valid non-existing key. 908 attr.Key = sys.NewSlicePointer(guessKey) 909 if err = sys.MapGetNextKey(&attr); err == nil { 910 return nil 911 } 912 } 913 914 return fmt.Errorf("next key: %w", wrapMapError(err)) 915 } 916 917 return nil 918 } 919 920 var mmapProtectedPage = sync.OnceValues(func() ([]byte, error) { 921 return unix.Mmap(-1, 0, os.Getpagesize(), unix.PROT_NONE, unix.MAP_ANON|unix.MAP_SHARED) 922 }) 923 924 // guessNonExistentKey attempts to perform a map lookup that returns ENOENT. 925 // This is necessary on kernels before 4.4.132, since those don't support 926 // iterating maps from the start by providing an invalid key pointer. 927 func (m *Map) guessNonExistentKey() ([]byte, error) { 928 // Map a protected page and use that as the value pointer. This saves some 929 // work copying out the value, which we're not interested in. 930 page, err := mmapProtectedPage() 931 if err != nil { 932 return nil, err 933 } 934 valuePtr := sys.NewSlicePointer(page) 935 936 randKey := make([]byte, int(m.keySize)) 937 938 for i := 0; i < 4; i++ { 939 switch i { 940 // For hash maps, the 0 key is less likely to be occupied. They're often 941 // used for storing data related to pointers, and their access pattern is 942 // generally scattered across the keyspace. 943 case 0: 944 // An all-0xff key is guaranteed to be out of bounds of any array, since 945 // those have a fixed key size of 4 bytes. The only corner case being 946 // arrays with 2^32 max entries, but those are prohibitively expensive 947 // in many environments. 948 case 1: 949 for r := range randKey { 950 randKey[r] = 0xff 951 } 952 // Inspired by BCC, 0x55 is an alternating binary pattern (0101), so 953 // is unlikely to be taken. 954 case 2: 955 for r := range randKey { 956 randKey[r] = 0x55 957 } 958 // Last ditch effort, generate a random key. 959 case 3: 960 rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey) 961 } 962 963 err := m.lookup(randKey, valuePtr, 0) 964 if errors.Is(err, ErrKeyNotExist) { 965 return randKey, nil 966 } 967 } 968 969 return nil, errors.New("couldn't find non-existing key") 970 } 971 972 // BatchLookup looks up many elements in a map at once. 973 // 974 // "keysOut" and "valuesOut" must be of type slice, a pointer 975 // to a slice or buffer will not work. 976 // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass 977 // "cursor" to subsequent calls of this function to continue the batching 978 // operation in the case of chunking. 979 // 980 // Warning: This API is not very safe to use as the kernel implementation for 981 // batching relies on the user to be aware of subtle details with regarding to 982 // different map type implementations. 983 // 984 // ErrKeyNotExist is returned when the batch lookup has reached 985 // the end of all possible results, even when partial results 986 // are returned. It should be used to evaluate when lookup is "done". 987 func (m *Map) BatchLookup(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 988 return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, cursor, keysOut, valuesOut, opts) 989 } 990 991 // BatchLookupAndDelete looks up many elements in a map at once, 992 // 993 // It then deletes all those elements. 994 // "keysOut" and "valuesOut" must be of type slice, a pointer 995 // to a slice or buffer will not work. 996 // "cursor" is an pointer to an opaque handle. It must be non-nil. Pass 997 // "cursor" to subsequent calls of this function to continue the batching 998 // operation in the case of chunking. 999 // 1000 // Warning: This API is not very safe to use as the kernel implementation for 1001 // batching relies on the user to be aware of subtle details with regarding to 1002 // different map type implementations. 1003 // 1004 // ErrKeyNotExist is returned when the batch lookup has reached 1005 // the end of all possible results, even when partial results 1006 // are returned. It should be used to evaluate when lookup is "done". 1007 func (m *Map) BatchLookupAndDelete(cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 1008 return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, cursor, keysOut, valuesOut, opts) 1009 } 1010 1011 // MapBatchCursor represents a starting point for a batch operation. 1012 type MapBatchCursor struct { 1013 m *Map 1014 opaque []byte 1015 } 1016 1017 func (m *Map) batchLookup(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 1018 if m.typ.hasPerCPUValue() { 1019 return m.batchLookupPerCPU(cmd, cursor, keysOut, valuesOut, opts) 1020 } 1021 1022 count, err := batchCount(keysOut, valuesOut) 1023 if err != nil { 1024 return 0, err 1025 } 1026 1027 valueBuf := sysenc.SyscallOutput(valuesOut, count*int(m.fullValueSize)) 1028 1029 n, err := m.batchLookupCmd(cmd, cursor, count, keysOut, valueBuf.Pointer(), opts) 1030 if err != nil { 1031 return n, err 1032 } 1033 1034 err = valueBuf.Unmarshal(valuesOut) 1035 if err != nil { 1036 return 0, err 1037 } 1038 1039 return n, nil 1040 } 1041 1042 func (m *Map) batchLookupPerCPU(cmd sys.Cmd, cursor *MapBatchCursor, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 1043 count, err := sliceLen(keysOut) 1044 if err != nil { 1045 return 0, fmt.Errorf("keys: %w", err) 1046 } 1047 1048 valueBuf := make([]byte, count*int(m.fullValueSize)) 1049 valuePtr := sys.NewSlicePointer(valueBuf) 1050 1051 n, sysErr := m.batchLookupCmd(cmd, cursor, count, keysOut, valuePtr, opts) 1052 if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { 1053 return 0, err 1054 } 1055 1056 err = unmarshalBatchPerCPUValue(valuesOut, count, int(m.valueSize), valueBuf) 1057 if err != nil { 1058 return 0, err 1059 } 1060 1061 return n, sysErr 1062 } 1063 1064 func (m *Map) batchLookupCmd(cmd sys.Cmd, cursor *MapBatchCursor, count int, keysOut any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) { 1065 cursorLen := int(m.keySize) 1066 if cursorLen < 4 { 1067 // * generic_map_lookup_batch requires that batch_out is key_size bytes. 1068 // This is used by array and LPM maps. 1069 // 1070 // * __htab_map_lookup_and_delete_batch requires u32. This is used by the 1071 // various hash maps. 1072 // 1073 // Use a minimum of 4 bytes to avoid having to distinguish between the two. 1074 cursorLen = 4 1075 } 1076 1077 inBatch := cursor.opaque 1078 if inBatch == nil { 1079 // This is the first lookup, allocate a buffer to hold the cursor. 1080 cursor.opaque = make([]byte, cursorLen) 1081 cursor.m = m 1082 } else if cursor.m != m { 1083 // Prevent reuse of a cursor across maps. First, it's unlikely to work. 1084 // Second, the maps may require different cursorLen and cursor.opaque 1085 // may therefore be too short. This could lead to the kernel clobbering 1086 // user space memory. 1087 return 0, errors.New("a cursor may not be reused across maps") 1088 } 1089 1090 if err := haveBatchAPI(); err != nil { 1091 return 0, err 1092 } 1093 1094 keyBuf := sysenc.SyscallOutput(keysOut, count*int(m.keySize)) 1095 1096 attr := sys.MapLookupBatchAttr{ 1097 MapFd: m.fd.Uint(), 1098 Keys: keyBuf.Pointer(), 1099 Values: valuePtr, 1100 Count: uint32(count), 1101 InBatch: sys.NewSlicePointer(inBatch), 1102 OutBatch: sys.NewSlicePointer(cursor.opaque), 1103 } 1104 1105 if opts != nil { 1106 attr.ElemFlags = opts.ElemFlags 1107 attr.Flags = opts.Flags 1108 } 1109 1110 _, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) 1111 sysErr = wrapMapError(sysErr) 1112 if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { 1113 return 0, sysErr 1114 } 1115 1116 if err := keyBuf.Unmarshal(keysOut); err != nil { 1117 return 0, err 1118 } 1119 1120 return int(attr.Count), sysErr 1121 } 1122 1123 // BatchUpdate updates the map with multiple keys and values 1124 // simultaneously. 1125 // "keys" and "values" must be of type slice, a pointer 1126 // to a slice or buffer will not work. 1127 func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) { 1128 if m.typ.hasPerCPUValue() { 1129 return m.batchUpdatePerCPU(keys, values, opts) 1130 } 1131 1132 count, err := batchCount(keys, values) 1133 if err != nil { 1134 return 0, err 1135 } 1136 1137 valuePtr, err := marshalMapSyscallInput(values, count*int(m.valueSize)) 1138 if err != nil { 1139 return 0, err 1140 } 1141 1142 return m.batchUpdate(count, keys, valuePtr, opts) 1143 } 1144 1145 func (m *Map) batchUpdate(count int, keys any, valuePtr sys.Pointer, opts *BatchOptions) (int, error) { 1146 keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize)) 1147 if err != nil { 1148 return 0, err 1149 } 1150 1151 attr := sys.MapUpdateBatchAttr{ 1152 MapFd: m.fd.Uint(), 1153 Keys: keyPtr, 1154 Values: valuePtr, 1155 Count: uint32(count), 1156 } 1157 if opts != nil { 1158 attr.ElemFlags = opts.ElemFlags 1159 attr.Flags = opts.Flags 1160 } 1161 1162 err = sys.MapUpdateBatch(&attr) 1163 if err != nil { 1164 if haveFeatErr := haveBatchAPI(); haveFeatErr != nil { 1165 return 0, haveFeatErr 1166 } 1167 return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err)) 1168 } 1169 1170 return int(attr.Count), nil 1171 } 1172 1173 func (m *Map) batchUpdatePerCPU(keys, values any, opts *BatchOptions) (int, error) { 1174 count, err := sliceLen(keys) 1175 if err != nil { 1176 return 0, fmt.Errorf("keys: %w", err) 1177 } 1178 1179 valueBuf, err := marshalBatchPerCPUValue(values, count, int(m.valueSize)) 1180 if err != nil { 1181 return 0, err 1182 } 1183 1184 return m.batchUpdate(count, keys, sys.NewSlicePointer(valueBuf), opts) 1185 } 1186 1187 // BatchDelete batch deletes entries in the map by keys. 1188 // "keys" must be of type slice, a pointer to a slice or buffer will not work. 1189 func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { 1190 count, err := sliceLen(keys) 1191 if err != nil { 1192 return 0, fmt.Errorf("keys: %w", err) 1193 } 1194 1195 keyPtr, err := marshalMapSyscallInput(keys, count*int(m.keySize)) 1196 if err != nil { 1197 return 0, fmt.Errorf("cannot marshal keys: %v", err) 1198 } 1199 1200 attr := sys.MapDeleteBatchAttr{ 1201 MapFd: m.fd.Uint(), 1202 Keys: keyPtr, 1203 Count: uint32(count), 1204 } 1205 1206 if opts != nil { 1207 attr.ElemFlags = opts.ElemFlags 1208 attr.Flags = opts.Flags 1209 } 1210 1211 if err = sys.MapDeleteBatch(&attr); err != nil { 1212 if haveFeatErr := haveBatchAPI(); haveFeatErr != nil { 1213 return 0, haveFeatErr 1214 } 1215 return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err)) 1216 } 1217 1218 return int(attr.Count), nil 1219 } 1220 1221 func batchCount(keys, values any) (int, error) { 1222 keysLen, err := sliceLen(keys) 1223 if err != nil { 1224 return 0, fmt.Errorf("keys: %w", err) 1225 } 1226 1227 valuesLen, err := sliceLen(values) 1228 if err != nil { 1229 return 0, fmt.Errorf("values: %w", err) 1230 } 1231 1232 if keysLen != valuesLen { 1233 return 0, fmt.Errorf("keys and values must have the same length") 1234 } 1235 1236 return keysLen, nil 1237 } 1238 1239 // Iterate traverses a map. 1240 // 1241 // It's safe to create multiple iterators at the same time. 1242 // 1243 // It's not possible to guarantee that all keys in a map will be 1244 // returned if there are concurrent modifications to the map. 1245 func (m *Map) Iterate() *MapIterator { 1246 return newMapIterator(m) 1247 } 1248 1249 // Close the Map's underlying file descriptor, which could unload the 1250 // Map from the kernel if it is not pinned or in use by a loaded Program. 1251 func (m *Map) Close() error { 1252 if m == nil { 1253 // This makes it easier to clean up when iterating maps 1254 // of maps / programs. 1255 return nil 1256 } 1257 1258 return m.fd.Close() 1259 } 1260 1261 // FD gets the file descriptor of the Map. 1262 // 1263 // Calling this function is invalid after Close has been called. 1264 func (m *Map) FD() int { 1265 return m.fd.Int() 1266 } 1267 1268 // Clone creates a duplicate of the Map. 1269 // 1270 // Closing the duplicate does not affect the original, and vice versa. 1271 // Changes made to the map are reflected by both instances however. 1272 // If the original map was pinned, the cloned map will not be pinned by default. 1273 // 1274 // Cloning a nil Map returns nil. 1275 func (m *Map) Clone() (*Map, error) { 1276 if m == nil { 1277 return nil, nil 1278 } 1279 1280 dup, err := m.fd.Dup() 1281 if err != nil { 1282 return nil, fmt.Errorf("can't clone map: %w", err) 1283 } 1284 1285 return &Map{ 1286 m.name, 1287 dup, 1288 m.typ, 1289 m.keySize, 1290 m.valueSize, 1291 m.maxEntries, 1292 m.flags, 1293 "", 1294 m.fullValueSize, 1295 }, nil 1296 } 1297 1298 // Pin persists the map on the BPF virtual file system past the lifetime of 1299 // the process that created it . 1300 // 1301 // Calling Pin on a previously pinned map will overwrite the path, except when 1302 // the new path already exists. Re-pinning across filesystems is not supported. 1303 // You can Clone a map to pin it to a different path. 1304 // 1305 // This requires bpffs to be mounted above fileName. 1306 // See https://docs.cilium.io/en/stable/network/kubernetes/configuration/#mounting-bpffs-with-systemd 1307 func (m *Map) Pin(fileName string) error { 1308 if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil { 1309 return err 1310 } 1311 m.pinnedPath = fileName 1312 return nil 1313 } 1314 1315 // Unpin removes the persisted state for the map from the BPF virtual filesystem. 1316 // 1317 // Failed calls to Unpin will not alter the state returned by IsPinned. 1318 // 1319 // Unpinning an unpinned Map returns nil. 1320 func (m *Map) Unpin() error { 1321 if err := internal.Unpin(m.pinnedPath); err != nil { 1322 return err 1323 } 1324 m.pinnedPath = "" 1325 return nil 1326 } 1327 1328 // IsPinned returns true if the map has a non-empty pinned path. 1329 func (m *Map) IsPinned() bool { 1330 return m.pinnedPath != "" 1331 } 1332 1333 // Freeze prevents a map to be modified from user space. 1334 // 1335 // It makes no changes to kernel-side restrictions. 1336 func (m *Map) Freeze() error { 1337 attr := sys.MapFreezeAttr{ 1338 MapFd: m.fd.Uint(), 1339 } 1340 1341 if err := sys.MapFreeze(&attr); err != nil { 1342 if haveFeatErr := haveMapMutabilityModifiers(); haveFeatErr != nil { 1343 return fmt.Errorf("can't freeze map: %w", haveFeatErr) 1344 } 1345 return fmt.Errorf("can't freeze map: %w", err) 1346 } 1347 return nil 1348 } 1349 1350 // finalize populates the Map according to the Contents specified 1351 // in spec and freezes the Map if requested by spec. 1352 func (m *Map) finalize(spec *MapSpec) error { 1353 for _, kv := range spec.Contents { 1354 if err := m.Put(kv.Key, kv.Value); err != nil { 1355 return fmt.Errorf("putting value: key %v: %w", kv.Key, err) 1356 } 1357 } 1358 1359 if spec.Freeze { 1360 if err := m.Freeze(); err != nil { 1361 return fmt.Errorf("freezing map: %w", err) 1362 } 1363 } 1364 1365 return nil 1366 } 1367 1368 func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) { 1369 if data == nil { 1370 if m.keySize == 0 { 1371 // Queues have a key length of zero, so passing nil here is valid. 1372 return sys.NewPointer(nil), nil 1373 } 1374 return sys.Pointer{}, errors.New("can't use nil as key of map") 1375 } 1376 1377 return marshalMapSyscallInput(data, int(m.keySize)) 1378 } 1379 1380 func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { 1381 var ( 1382 buf []byte 1383 err error 1384 ) 1385 1386 switch value := data.(type) { 1387 case *Map: 1388 if !m.typ.canStoreMap() { 1389 return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ) 1390 } 1391 buf, err = marshalMap(value, int(m.valueSize)) 1392 1393 case *Program: 1394 if !m.typ.canStoreProgram() { 1395 return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ) 1396 } 1397 buf, err = marshalProgram(value, int(m.valueSize)) 1398 1399 default: 1400 return marshalMapSyscallInput(data, int(m.valueSize)) 1401 } 1402 1403 if err != nil { 1404 return sys.Pointer{}, err 1405 } 1406 1407 return sys.NewSlicePointer(buf), nil 1408 } 1409 1410 func (m *Map) unmarshalValue(value any, buf sysenc.Buffer) error { 1411 switch value := value.(type) { 1412 case **Map: 1413 if !m.typ.canStoreMap() { 1414 return fmt.Errorf("can't read a map from %s", m.typ) 1415 } 1416 1417 other, err := unmarshalMap(buf) 1418 if err != nil { 1419 return err 1420 } 1421 1422 // The caller might close the map externally, so ignore errors. 1423 _ = (*value).Close() 1424 1425 *value = other 1426 return nil 1427 1428 case *Map: 1429 if !m.typ.canStoreMap() { 1430 return fmt.Errorf("can't read a map from %s", m.typ) 1431 } 1432 return errors.New("require pointer to *Map") 1433 1434 case **Program: 1435 if !m.typ.canStoreProgram() { 1436 return fmt.Errorf("can't read a program from %s", m.typ) 1437 } 1438 1439 other, err := unmarshalProgram(buf) 1440 if err != nil { 1441 return err 1442 } 1443 1444 // The caller might close the program externally, so ignore errors. 1445 _ = (*value).Close() 1446 1447 *value = other 1448 return nil 1449 1450 case *Program: 1451 if !m.typ.canStoreProgram() { 1452 return fmt.Errorf("can't read a program from %s", m.typ) 1453 } 1454 return errors.New("require pointer to *Program") 1455 } 1456 1457 return buf.Unmarshal(value) 1458 } 1459 1460 // LoadPinnedMap loads a Map from a BPF file. 1461 func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) { 1462 fd, err := sys.ObjGet(&sys.ObjGetAttr{ 1463 Pathname: sys.NewStringPointer(fileName), 1464 FileFlags: opts.Marshal(), 1465 }) 1466 if err != nil { 1467 return nil, err 1468 } 1469 1470 m, err := newMapFromFD(fd) 1471 if err == nil { 1472 m.pinnedPath = fileName 1473 } 1474 1475 return m, err 1476 } 1477 1478 // unmarshalMap creates a map from a map ID encoded in host endianness. 1479 func unmarshalMap(buf sysenc.Buffer) (*Map, error) { 1480 var id uint32 1481 if err := buf.Unmarshal(&id); err != nil { 1482 return nil, err 1483 } 1484 return NewMapFromID(MapID(id)) 1485 } 1486 1487 // marshalMap marshals the fd of a map into a buffer in host endianness. 1488 func marshalMap(m *Map, length int) ([]byte, error) { 1489 if length != 4 { 1490 return nil, fmt.Errorf("can't marshal map to %d bytes", length) 1491 } 1492 1493 buf := make([]byte, 4) 1494 internal.NativeEndian.PutUint32(buf, m.fd.Uint()) 1495 return buf, nil 1496 } 1497 1498 // MapIterator iterates a Map. 1499 // 1500 // See Map.Iterate. 1501 type MapIterator struct { 1502 target *Map 1503 // Temporary storage to avoid allocations in Next(). This is any instead 1504 // of []byte to avoid allocations. 1505 cursor any 1506 count, maxEntries uint32 1507 done bool 1508 err error 1509 } 1510 1511 func newMapIterator(target *Map) *MapIterator { 1512 return &MapIterator{ 1513 target: target, 1514 maxEntries: target.maxEntries, 1515 } 1516 } 1517 1518 // Next decodes the next key and value. 1519 // 1520 // Iterating a hash map from which keys are being deleted is not 1521 // safe. You may see the same key multiple times. Iteration may 1522 // also abort with an error, see IsIterationAborted. 1523 // 1524 // Returns false if there are no more entries. You must check 1525 // the result of Err afterwards. 1526 // 1527 // See Map.Get for further caveats around valueOut. 1528 func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { 1529 if mi.err != nil || mi.done { 1530 return false 1531 } 1532 1533 // For array-like maps NextKey returns nil only after maxEntries 1534 // iterations. 1535 for mi.count <= mi.maxEntries { 1536 if mi.cursor == nil { 1537 // Pass nil interface to NextKey to make sure the Map's first key 1538 // is returned. If we pass an uninitialized []byte instead, it'll see a 1539 // non-nil interface and try to marshal it. 1540 mi.cursor = make([]byte, mi.target.keySize) 1541 mi.err = mi.target.NextKey(nil, mi.cursor) 1542 } else { 1543 mi.err = mi.target.NextKey(mi.cursor, mi.cursor) 1544 } 1545 1546 if errors.Is(mi.err, ErrKeyNotExist) { 1547 mi.done = true 1548 mi.err = nil 1549 return false 1550 } else if mi.err != nil { 1551 mi.err = fmt.Errorf("get next key: %w", mi.err) 1552 return false 1553 } 1554 1555 mi.count++ 1556 mi.err = mi.target.Lookup(mi.cursor, valueOut) 1557 if errors.Is(mi.err, ErrKeyNotExist) { 1558 // Even though the key should be valid, we couldn't look up 1559 // its value. If we're iterating a hash map this is probably 1560 // because a concurrent delete removed the value before we 1561 // could get it. This means that the next call to NextKeyBytes 1562 // is very likely to restart iteration. 1563 // If we're iterating one of the fd maps like 1564 // ProgramArray it means that a given slot doesn't have 1565 // a valid fd associated. It's OK to continue to the next slot. 1566 continue 1567 } 1568 if mi.err != nil { 1569 mi.err = fmt.Errorf("look up next key: %w", mi.err) 1570 return false 1571 } 1572 1573 buf := mi.cursor.([]byte) 1574 if ptr, ok := keyOut.(unsafe.Pointer); ok { 1575 copy(unsafe.Slice((*byte)(ptr), len(buf)), buf) 1576 } else { 1577 mi.err = sysenc.Unmarshal(keyOut, buf) 1578 } 1579 1580 return mi.err == nil 1581 } 1582 1583 mi.err = fmt.Errorf("%w", ErrIterationAborted) 1584 return false 1585 } 1586 1587 // Err returns any encountered error. 1588 // 1589 // The method must be called after Next returns nil. 1590 // 1591 // Returns ErrIterationAborted if it wasn't possible to do a full iteration. 1592 func (mi *MapIterator) Err() error { 1593 return mi.err 1594 } 1595 1596 // MapGetNextID returns the ID of the next eBPF map. 1597 // 1598 // Returns ErrNotExist, if there is no next eBPF map. 1599 func MapGetNextID(startID MapID) (MapID, error) { 1600 attr := &sys.MapGetNextIdAttr{Id: uint32(startID)} 1601 return MapID(attr.NextId), sys.MapGetNextId(attr) 1602 } 1603 1604 // NewMapFromID returns the map for a given id. 1605 // 1606 // Returns ErrNotExist, if there is no eBPF map with the given id. 1607 func NewMapFromID(id MapID) (*Map, error) { 1608 fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{ 1609 Id: uint32(id), 1610 }) 1611 if err != nil { 1612 return nil, err 1613 } 1614 1615 return newMapFromFD(fd) 1616 } 1617 1618 // sliceLen returns the length if the value is a slice or an error otherwise. 1619 func sliceLen(slice any) (int, error) { 1620 sliceValue := reflect.ValueOf(slice) 1621 if sliceValue.Kind() != reflect.Slice { 1622 return 0, fmt.Errorf("%T is not a slice", slice) 1623 } 1624 return sliceValue.Len(), nil 1625 }