github.com/cilium/ebpf@v0.10.0/map.go (about) 1 package ebpf 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "io" 8 "math/rand" 9 "path/filepath" 10 "reflect" 11 "time" 12 "unsafe" 13 14 "github.com/cilium/ebpf/btf" 15 "github.com/cilium/ebpf/internal" 16 "github.com/cilium/ebpf/internal/sys" 17 "github.com/cilium/ebpf/internal/unix" 18 ) 19 20 // Errors returned by Map and MapIterator methods. 21 var ( 22 ErrKeyNotExist = errors.New("key does not exist") 23 ErrKeyExist = errors.New("key already exists") 24 ErrIterationAborted = errors.New("iteration aborted") 25 ErrMapIncompatible = errors.New("map spec is incompatible with existing map") 26 errMapNoBTFValue = errors.New("map spec does not contain a BTF Value") 27 ) 28 29 // MapOptions control loading a map into the kernel. 30 type MapOptions struct { 31 // The base path to pin maps in if requested via PinByName. 32 // Existing maps will be re-used if they are compatible, otherwise an 33 // error is returned. 34 PinPath string 35 LoadPinOptions LoadPinOptions 36 } 37 38 // MapID represents the unique ID of an eBPF map 39 type MapID uint32 40 41 // MapSpec defines a Map. 42 type MapSpec struct { 43 // Name is passed to the kernel as a debug aid. Must only contain 44 // alpha numeric and '_' characters. 45 Name string 46 Type MapType 47 KeySize uint32 48 ValueSize uint32 49 MaxEntries uint32 50 51 // Flags is passed to the kernel and specifies additional map 52 // creation attributes. 53 Flags uint32 54 55 // Automatically pin and load a map from MapOptions.PinPath. 56 // Generates an error if an existing pinned map is incompatible with the MapSpec. 57 Pinning PinType 58 59 // Specify numa node during map creation 60 // (effective only if unix.BPF_F_NUMA_NODE flag is set, 61 // which can be imported from golang.org/x/sys/unix) 62 NumaNode uint32 63 64 // The initial contents of the map. May be nil. 65 Contents []MapKV 66 67 // Whether to freeze a map after setting its initial contents. 68 Freeze bool 69 70 // InnerMap is used as a template for ArrayOfMaps and HashOfMaps 71 InnerMap *MapSpec 72 73 // Extra trailing bytes found in the ELF map definition when using structs 74 // larger than libbpf's bpf_map_def. nil if no trailing bytes were present. 75 // Must be nil or empty before instantiating the MapSpec into a Map. 76 Extra *bytes.Reader 77 78 // The key and value type of this map. May be nil. 79 Key, Value btf.Type 80 } 81 82 func (ms *MapSpec) String() string { 83 return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags) 84 } 85 86 // Copy returns a copy of the spec. 87 // 88 // MapSpec.Contents is a shallow copy. 89 func (ms *MapSpec) Copy() *MapSpec { 90 if ms == nil { 91 return nil 92 } 93 94 cpy := *ms 95 96 cpy.Contents = make([]MapKV, len(ms.Contents)) 97 copy(cpy.Contents, ms.Contents) 98 99 cpy.InnerMap = ms.InnerMap.Copy() 100 101 return &cpy 102 } 103 104 func (ms *MapSpec) clampPerfEventArraySize() error { 105 if ms.Type != PerfEventArray { 106 return nil 107 } 108 109 n, err := internal.PossibleCPUs() 110 if err != nil { 111 return fmt.Errorf("perf event array: %w", err) 112 } 113 114 if n := uint32(n); ms.MaxEntries > n { 115 ms.MaxEntries = n 116 } 117 118 return nil 119 } 120 121 // dataSection returns the contents and BTF Datasec descriptor of the spec. 122 func (ms *MapSpec) dataSection() ([]byte, *btf.Datasec, error) { 123 124 if ms.Value == nil { 125 return nil, nil, errMapNoBTFValue 126 } 127 128 ds, ok := ms.Value.(*btf.Datasec) 129 if !ok { 130 return nil, nil, fmt.Errorf("map value BTF is a %T, not a *btf.Datasec", ms.Value) 131 } 132 133 if n := len(ms.Contents); n != 1 { 134 return nil, nil, fmt.Errorf("expected one key, found %d", n) 135 } 136 137 kv := ms.Contents[0] 138 value, ok := kv.Value.([]byte) 139 if !ok { 140 return nil, nil, fmt.Errorf("value at first map key is %T, not []byte", kv.Value) 141 } 142 143 return value, ds, nil 144 } 145 146 // MapKV is used to initialize the contents of a Map. 147 type MapKV struct { 148 Key interface{} 149 Value interface{} 150 } 151 152 // Compatible returns nil if an existing map may be used instead of creating 153 // one from the spec. 154 // 155 // Returns an error wrapping [ErrMapIncompatible] otherwise. 156 func (ms *MapSpec) Compatible(m *Map) error { 157 switch { 158 case m.typ != ms.Type: 159 return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible) 160 161 case m.keySize != ms.KeySize: 162 return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible) 163 164 case m.valueSize != ms.ValueSize: 165 return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible) 166 167 case !(ms.Type == PerfEventArray && ms.MaxEntries == 0) && 168 m.maxEntries != ms.MaxEntries: 169 return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible) 170 171 case m.flags != ms.Flags: 172 return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible) 173 } 174 return nil 175 } 176 177 // Map represents a Map file descriptor. 178 // 179 // It is not safe to close a map which is used by other goroutines. 180 // 181 // Methods which take interface{} arguments by default encode 182 // them using binary.Read/Write in the machine's native endianness. 183 // 184 // Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler 185 // if you require custom encoding. 186 type Map struct { 187 name string 188 fd *sys.FD 189 typ MapType 190 keySize uint32 191 valueSize uint32 192 maxEntries uint32 193 flags uint32 194 pinnedPath string 195 // Per CPU maps return values larger than the size in the spec 196 fullValueSize int 197 } 198 199 // NewMapFromFD creates a map from a raw fd. 200 // 201 // You should not use fd after calling this function. 202 func NewMapFromFD(fd int) (*Map, error) { 203 f, err := sys.NewFD(fd) 204 if err != nil { 205 return nil, err 206 } 207 208 return newMapFromFD(f) 209 } 210 211 func newMapFromFD(fd *sys.FD) (*Map, error) { 212 info, err := newMapInfoFromFd(fd) 213 if err != nil { 214 fd.Close() 215 return nil, fmt.Errorf("get map info: %w", err) 216 } 217 218 return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags) 219 } 220 221 // NewMap creates a new Map. 222 // 223 // It's equivalent to calling NewMapWithOptions with default options. 224 func NewMap(spec *MapSpec) (*Map, error) { 225 return NewMapWithOptions(spec, MapOptions{}) 226 } 227 228 // NewMapWithOptions creates a new Map. 229 // 230 // Creating a map for the first time will perform feature detection 231 // by creating small, temporary maps. 232 // 233 // The caller is responsible for ensuring the process' rlimit is set 234 // sufficiently high for locking memory during map creation. This can be done 235 // by calling rlimit.RemoveMemlock() prior to calling NewMapWithOptions. 236 // 237 // May return an error wrapping ErrMapIncompatible. 238 func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { 239 m, err := newMapWithOptions(spec, opts) 240 if err != nil { 241 return nil, fmt.Errorf("creating map: %w", err) 242 } 243 244 if err := m.finalize(spec); err != nil { 245 m.Close() 246 return nil, fmt.Errorf("populating map: %w", err) 247 } 248 249 return m, nil 250 } 251 252 func newMapWithOptions(spec *MapSpec, opts MapOptions) (_ *Map, err error) { 253 closeOnError := func(c io.Closer) { 254 if err != nil { 255 c.Close() 256 } 257 } 258 259 switch spec.Pinning { 260 case PinByName: 261 if spec.Name == "" { 262 return nil, fmt.Errorf("pin by name: missing Name") 263 } 264 265 if opts.PinPath == "" { 266 return nil, fmt.Errorf("pin by name: missing MapOptions.PinPath") 267 } 268 269 path := filepath.Join(opts.PinPath, spec.Name) 270 m, err := LoadPinnedMap(path, &opts.LoadPinOptions) 271 if errors.Is(err, unix.ENOENT) { 272 break 273 } 274 if err != nil { 275 return nil, fmt.Errorf("load pinned map: %w", err) 276 } 277 defer closeOnError(m) 278 279 if err := spec.Compatible(m); err != nil { 280 return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) 281 } 282 283 return m, nil 284 285 case PinNone: 286 // Nothing to do here 287 288 default: 289 return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported) 290 } 291 292 var innerFd *sys.FD 293 if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps { 294 if spec.InnerMap == nil { 295 return nil, fmt.Errorf("%s requires InnerMap", spec.Type) 296 } 297 298 if spec.InnerMap.Pinning != PinNone { 299 return nil, errors.New("inner maps cannot be pinned") 300 } 301 302 template, err := spec.InnerMap.createMap(nil, opts) 303 if err != nil { 304 return nil, fmt.Errorf("inner map: %w", err) 305 } 306 defer template.Close() 307 308 // Intentionally skip populating and freezing (finalizing) 309 // the inner map template since it will be removed shortly. 310 311 innerFd = template.fd 312 } 313 314 m, err := spec.createMap(innerFd, opts) 315 if err != nil { 316 return nil, err 317 } 318 defer closeOnError(m) 319 320 if spec.Pinning == PinByName { 321 path := filepath.Join(opts.PinPath, spec.Name) 322 if err := m.Pin(path); err != nil { 323 return nil, fmt.Errorf("pin map to %s: %w", path, err) 324 } 325 } 326 327 return m, nil 328 } 329 330 // createMap validates the spec's properties and creates the map in the kernel 331 // using the given opts. It does not populate or freeze the map. 332 func (spec *MapSpec) createMap(inner *sys.FD, opts MapOptions) (_ *Map, err error) { 333 closeOnError := func(closer io.Closer) { 334 if err != nil { 335 closer.Close() 336 } 337 } 338 339 // Kernels 4.13 through 5.4 used a struct bpf_map_def that contained 340 // additional 'inner_map_idx' and later 'numa_node' fields. 341 // In order to support loading these definitions, tolerate the presence of 342 // extra bytes, but require them to be zeroes. 343 if spec.Extra != nil { 344 if _, err := io.Copy(internal.DiscardZeroes{}, spec.Extra); err != nil { 345 return nil, errors.New("extra contains unhandled non-zero bytes, drain before creating map") 346 } 347 } 348 349 switch spec.Type { 350 case ArrayOfMaps, HashOfMaps: 351 if err := haveNestedMaps(); err != nil { 352 return nil, err 353 } 354 355 if spec.ValueSize != 0 && spec.ValueSize != 4 { 356 return nil, errors.New("ValueSize must be zero or four for map of map") 357 } 358 359 spec = spec.Copy() 360 spec.ValueSize = 4 361 362 case PerfEventArray: 363 if spec.KeySize != 0 && spec.KeySize != 4 { 364 return nil, errors.New("KeySize must be zero or four for perf event array") 365 } 366 367 if spec.ValueSize != 0 && spec.ValueSize != 4 { 368 return nil, errors.New("ValueSize must be zero or four for perf event array") 369 } 370 371 spec = spec.Copy() 372 spec.KeySize = 4 373 spec.ValueSize = 4 374 375 if spec.MaxEntries == 0 { 376 n, err := internal.PossibleCPUs() 377 if err != nil { 378 return nil, fmt.Errorf("perf event array: %w", err) 379 } 380 spec.MaxEntries = uint32(n) 381 } 382 } 383 384 if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze { 385 if err := haveMapMutabilityModifiers(); err != nil { 386 return nil, fmt.Errorf("map create: %w", err) 387 } 388 } 389 if spec.Flags&unix.BPF_F_MMAPABLE > 0 { 390 if err := haveMmapableMaps(); err != nil { 391 return nil, fmt.Errorf("map create: %w", err) 392 } 393 } 394 if spec.Flags&unix.BPF_F_INNER_MAP > 0 { 395 if err := haveInnerMaps(); err != nil { 396 return nil, fmt.Errorf("map create: %w", err) 397 } 398 } 399 if spec.Flags&unix.BPF_F_NO_PREALLOC > 0 { 400 if err := haveNoPreallocMaps(); err != nil { 401 return nil, fmt.Errorf("map create: %w", err) 402 } 403 } 404 405 attr := sys.MapCreateAttr{ 406 MapType: sys.MapType(spec.Type), 407 KeySize: spec.KeySize, 408 ValueSize: spec.ValueSize, 409 MaxEntries: spec.MaxEntries, 410 MapFlags: sys.MapFlags(spec.Flags), 411 NumaNode: spec.NumaNode, 412 } 413 414 if inner != nil { 415 attr.InnerMapFd = inner.Uint() 416 } 417 418 if haveObjName() == nil { 419 attr.MapName = sys.NewObjName(spec.Name) 420 } 421 422 if spec.Key != nil || spec.Value != nil { 423 handle, keyTypeID, valueTypeID, err := btf.MarshalMapKV(spec.Key, spec.Value) 424 if err != nil && !errors.Is(err, btf.ErrNotSupported) { 425 return nil, fmt.Errorf("load BTF: %w", err) 426 } 427 428 if handle != nil { 429 defer handle.Close() 430 431 // Use BTF k/v during map creation. 432 attr.BtfFd = uint32(handle.FD()) 433 attr.BtfKeyTypeId = uint32(keyTypeID) 434 attr.BtfValueTypeId = uint32(valueTypeID) 435 } 436 } 437 438 fd, err := sys.MapCreate(&attr) 439 // Some map types don't support BTF k/v in earlier kernel versions. 440 // Remove BTF metadata and retry map creation. 441 if (errors.Is(err, sys.ENOTSUPP) || errors.Is(err, unix.EINVAL)) && attr.BtfFd != 0 { 442 attr.BtfFd, attr.BtfKeyTypeId, attr.BtfValueTypeId = 0, 0, 0 443 fd, err = sys.MapCreate(&attr) 444 } 445 446 if err != nil { 447 if errors.Is(err, unix.EPERM) { 448 return nil, fmt.Errorf("map create: %w (MEMLOCK may be too low, consider rlimit.RemoveMemlock)", err) 449 } 450 if errors.Is(err, unix.EINVAL) && attr.MaxEntries == 0 { 451 return nil, fmt.Errorf("map create: %w (MaxEntries may be incorrectly set to zero)", err) 452 } 453 if attr.BtfFd == 0 { 454 return nil, fmt.Errorf("map create: %w (without BTF k/v)", err) 455 } 456 return nil, fmt.Errorf("map create: %w", err) 457 } 458 defer closeOnError(fd) 459 460 m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags) 461 if err != nil { 462 return nil, fmt.Errorf("map create: %w", err) 463 } 464 465 return m, nil 466 } 467 468 // newMap allocates and returns a new Map structure. 469 // Sets the fullValueSize on per-CPU maps. 470 func newMap(fd *sys.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) { 471 m := &Map{ 472 name, 473 fd, 474 typ, 475 keySize, 476 valueSize, 477 maxEntries, 478 flags, 479 "", 480 int(valueSize), 481 } 482 483 if !typ.hasPerCPUValue() { 484 return m, nil 485 } 486 487 possibleCPUs, err := internal.PossibleCPUs() 488 if err != nil { 489 return nil, err 490 } 491 492 m.fullValueSize = internal.Align(int(valueSize), 8) * possibleCPUs 493 return m, nil 494 } 495 496 func (m *Map) String() string { 497 if m.name != "" { 498 return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd) 499 } 500 return fmt.Sprintf("%s#%v", m.typ, m.fd) 501 } 502 503 // Type returns the underlying type of the map. 504 func (m *Map) Type() MapType { 505 return m.typ 506 } 507 508 // KeySize returns the size of the map key in bytes. 509 func (m *Map) KeySize() uint32 { 510 return m.keySize 511 } 512 513 // ValueSize returns the size of the map value in bytes. 514 func (m *Map) ValueSize() uint32 { 515 return m.valueSize 516 } 517 518 // MaxEntries returns the maximum number of elements the map can hold. 519 func (m *Map) MaxEntries() uint32 { 520 return m.maxEntries 521 } 522 523 // Flags returns the flags of the map. 524 func (m *Map) Flags() uint32 { 525 return m.flags 526 } 527 528 // Info returns metadata about the map. 529 func (m *Map) Info() (*MapInfo, error) { 530 return newMapInfoFromFd(m.fd) 531 } 532 533 // MapLookupFlags controls the behaviour of the map lookup calls. 534 type MapLookupFlags uint64 535 536 // LookupLock look up the value of a spin-locked map. 537 const LookupLock MapLookupFlags = 4 538 539 // Lookup retrieves a value from a Map. 540 // 541 // Calls Close() on valueOut if it is of type **Map or **Program, 542 // and *valueOut is not nil. 543 // 544 // Returns an error if the key doesn't exist, see ErrKeyNotExist. 545 func (m *Map) Lookup(key, valueOut interface{}) error { 546 valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) 547 if err := m.lookup(key, valuePtr, 0); err != nil { 548 return err 549 } 550 551 return m.unmarshalValue(valueOut, valueBytes) 552 } 553 554 // LookupWithFlags retrieves a value from a Map with flags. 555 // 556 // Passing LookupLock flag will look up the value of a spin-locked 557 // map without returning the lock. This must be specified if the 558 // elements contain a spinlock. 559 // 560 // Calls Close() on valueOut if it is of type **Map or **Program, 561 // and *valueOut is not nil. 562 // 563 // Returns an error if the key doesn't exist, see ErrKeyNotExist. 564 func (m *Map) LookupWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { 565 valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) 566 if err := m.lookup(key, valuePtr, flags); err != nil { 567 return err 568 } 569 570 return m.unmarshalValue(valueOut, valueBytes) 571 } 572 573 // LookupAndDelete retrieves and deletes a value from a Map. 574 // 575 // Returns ErrKeyNotExist if the key doesn't exist. 576 func (m *Map) LookupAndDelete(key, valueOut interface{}) error { 577 return m.lookupAndDelete(key, valueOut, 0) 578 } 579 580 // LookupAndDeleteWithFlags retrieves and deletes a value from a Map. 581 // 582 // Passing LookupLock flag will look up and delete the value of a spin-locked 583 // map without returning the lock. This must be specified if the elements 584 // contain a spinlock. 585 // 586 // Returns ErrKeyNotExist if the key doesn't exist. 587 func (m *Map) LookupAndDeleteWithFlags(key, valueOut interface{}, flags MapLookupFlags) error { 588 return m.lookupAndDelete(key, valueOut, flags) 589 } 590 591 // LookupBytes gets a value from Map. 592 // 593 // Returns a nil value if a key doesn't exist. 594 func (m *Map) LookupBytes(key interface{}) ([]byte, error) { 595 valueBytes := make([]byte, m.fullValueSize) 596 valuePtr := sys.NewSlicePointer(valueBytes) 597 598 err := m.lookup(key, valuePtr, 0) 599 if errors.Is(err, ErrKeyNotExist) { 600 return nil, nil 601 } 602 603 return valueBytes, err 604 } 605 606 func (m *Map) lookup(key interface{}, valueOut sys.Pointer, flags MapLookupFlags) error { 607 keyPtr, err := m.marshalKey(key) 608 if err != nil { 609 return fmt.Errorf("can't marshal key: %w", err) 610 } 611 612 attr := sys.MapLookupElemAttr{ 613 MapFd: m.fd.Uint(), 614 Key: keyPtr, 615 Value: valueOut, 616 Flags: uint64(flags), 617 } 618 619 if err = sys.MapLookupElem(&attr); err != nil { 620 return fmt.Errorf("lookup: %w", wrapMapError(err)) 621 } 622 return nil 623 } 624 625 func (m *Map) lookupAndDelete(key, valueOut interface{}, flags MapLookupFlags) error { 626 valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) 627 628 keyPtr, err := m.marshalKey(key) 629 if err != nil { 630 return fmt.Errorf("can't marshal key: %w", err) 631 } 632 633 attr := sys.MapLookupAndDeleteElemAttr{ 634 MapFd: m.fd.Uint(), 635 Key: keyPtr, 636 Value: valuePtr, 637 Flags: uint64(flags), 638 } 639 640 if err := sys.MapLookupAndDeleteElem(&attr); err != nil { 641 return fmt.Errorf("lookup and delete: %w", wrapMapError(err)) 642 } 643 644 return m.unmarshalValue(valueOut, valueBytes) 645 } 646 647 // MapUpdateFlags controls the behaviour of the Map.Update call. 648 // 649 // The exact semantics depend on the specific MapType. 650 type MapUpdateFlags uint64 651 652 const ( 653 // UpdateAny creates a new element or update an existing one. 654 UpdateAny MapUpdateFlags = iota 655 // UpdateNoExist creates a new element. 656 UpdateNoExist MapUpdateFlags = 1 << (iota - 1) 657 // UpdateExist updates an existing element. 658 UpdateExist 659 // UpdateLock updates elements under bpf_spin_lock. 660 UpdateLock 661 ) 662 663 // Put replaces or creates a value in map. 664 // 665 // It is equivalent to calling Update with UpdateAny. 666 func (m *Map) Put(key, value interface{}) error { 667 return m.Update(key, value, UpdateAny) 668 } 669 670 // Update changes the value of a key. 671 func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error { 672 keyPtr, err := m.marshalKey(key) 673 if err != nil { 674 return fmt.Errorf("can't marshal key: %w", err) 675 } 676 677 valuePtr, err := m.marshalValue(value) 678 if err != nil { 679 return fmt.Errorf("can't marshal value: %w", err) 680 } 681 682 attr := sys.MapUpdateElemAttr{ 683 MapFd: m.fd.Uint(), 684 Key: keyPtr, 685 Value: valuePtr, 686 Flags: uint64(flags), 687 } 688 689 if err = sys.MapUpdateElem(&attr); err != nil { 690 return fmt.Errorf("update: %w", wrapMapError(err)) 691 } 692 693 return nil 694 } 695 696 // Delete removes a value. 697 // 698 // Returns ErrKeyNotExist if the key does not exist. 699 func (m *Map) Delete(key interface{}) error { 700 keyPtr, err := m.marshalKey(key) 701 if err != nil { 702 return fmt.Errorf("can't marshal key: %w", err) 703 } 704 705 attr := sys.MapDeleteElemAttr{ 706 MapFd: m.fd.Uint(), 707 Key: keyPtr, 708 } 709 710 if err = sys.MapDeleteElem(&attr); err != nil { 711 return fmt.Errorf("delete: %w", wrapMapError(err)) 712 } 713 return nil 714 } 715 716 // NextKey finds the key following an initial key. 717 // 718 // See NextKeyBytes for details. 719 // 720 // Returns ErrKeyNotExist if there is no next key. 721 func (m *Map) NextKey(key, nextKeyOut interface{}) error { 722 nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.keySize)) 723 724 if err := m.nextKey(key, nextKeyPtr); err != nil { 725 return err 726 } 727 728 if err := m.unmarshalKey(nextKeyOut, nextKeyBytes); err != nil { 729 return fmt.Errorf("can't unmarshal next key: %w", err) 730 } 731 return nil 732 } 733 734 // NextKeyBytes returns the key following an initial key as a byte slice. 735 // 736 // Passing nil will return the first key. 737 // 738 // Use Iterate if you want to traverse all entries in the map. 739 // 740 // Returns nil if there are no more keys. 741 func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) { 742 nextKey := make([]byte, m.keySize) 743 nextKeyPtr := sys.NewSlicePointer(nextKey) 744 745 err := m.nextKey(key, nextKeyPtr) 746 if errors.Is(err, ErrKeyNotExist) { 747 return nil, nil 748 } 749 750 return nextKey, err 751 } 752 753 func (m *Map) nextKey(key interface{}, nextKeyOut sys.Pointer) error { 754 var ( 755 keyPtr sys.Pointer 756 err error 757 ) 758 759 if key != nil { 760 keyPtr, err = m.marshalKey(key) 761 if err != nil { 762 return fmt.Errorf("can't marshal key: %w", err) 763 } 764 } 765 766 attr := sys.MapGetNextKeyAttr{ 767 MapFd: m.fd.Uint(), 768 Key: keyPtr, 769 NextKey: nextKeyOut, 770 } 771 772 if err = sys.MapGetNextKey(&attr); err != nil { 773 // Kernels 4.4.131 and earlier return EFAULT instead of a pointer to the 774 // first map element when a nil key pointer is specified. 775 if key == nil && errors.Is(err, unix.EFAULT) { 776 var guessKey []byte 777 guessKey, err = m.guessNonExistentKey() 778 if err != nil { 779 return err 780 } 781 782 // Retry the syscall with a valid non-existing key. 783 attr.Key = sys.NewSlicePointer(guessKey) 784 if err = sys.MapGetNextKey(&attr); err == nil { 785 return nil 786 } 787 } 788 789 return fmt.Errorf("next key: %w", wrapMapError(err)) 790 } 791 792 return nil 793 } 794 795 // guessNonExistentKey attempts to perform a map lookup that returns ENOENT. 796 // This is necessary on kernels before 4.4.132, since those don't support 797 // iterating maps from the start by providing an invalid key pointer. 798 func (m *Map) guessNonExistentKey() ([]byte, error) { 799 // Provide an invalid value pointer to prevent a copy on the kernel side. 800 valuePtr := sys.NewPointer(unsafe.Pointer(^uintptr(0))) 801 randKey := make([]byte, int(m.keySize)) 802 803 for i := 0; i < 4; i++ { 804 switch i { 805 // For hash maps, the 0 key is less likely to be occupied. They're often 806 // used for storing data related to pointers, and their access pattern is 807 // generally scattered across the keyspace. 808 case 0: 809 // An all-0xff key is guaranteed to be out of bounds of any array, since 810 // those have a fixed key size of 4 bytes. The only corner case being 811 // arrays with 2^32 max entries, but those are prohibitively expensive 812 // in many environments. 813 case 1: 814 for r := range randKey { 815 randKey[r] = 0xff 816 } 817 // Inspired by BCC, 0x55 is an alternating binary pattern (0101), so 818 // is unlikely to be taken. 819 case 2: 820 for r := range randKey { 821 randKey[r] = 0x55 822 } 823 // Last ditch effort, generate a random key. 824 case 3: 825 rand.New(rand.NewSource(time.Now().UnixNano())).Read(randKey) 826 } 827 828 err := m.lookup(randKey, valuePtr, 0) 829 if errors.Is(err, ErrKeyNotExist) { 830 return randKey, nil 831 } 832 } 833 834 return nil, errors.New("couldn't find non-existing key") 835 } 836 837 // BatchLookup looks up many elements in a map at once. 838 // 839 // "keysOut" and "valuesOut" must be of type slice, a pointer 840 // to a slice or buffer will not work. 841 // "prevKey" is the key to start the batch lookup from, it will 842 // *not* be included in the results. Use nil to start at the first key. 843 // 844 // ErrKeyNotExist is returned when the batch lookup has reached 845 // the end of all possible results, even when partial results 846 // are returned. It should be used to evaluate when lookup is "done". 847 func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 848 return m.batchLookup(sys.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) 849 } 850 851 // BatchLookupAndDelete looks up many elements in a map at once, 852 // 853 // It then deletes all those elements. 854 // "keysOut" and "valuesOut" must be of type slice, a pointer 855 // to a slice or buffer will not work. 856 // "prevKey" is the key to start the batch lookup from, it will 857 // *not* be included in the results. Use nil to start at the first key. 858 // 859 // ErrKeyNotExist is returned when the batch lookup has reached 860 // the end of all possible results, even when partial results 861 // are returned. It should be used to evaluate when lookup is "done". 862 func (m *Map) BatchLookupAndDelete(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 863 return m.batchLookup(sys.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) 864 } 865 866 func (m *Map) batchLookup(cmd sys.Cmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { 867 if err := haveBatchAPI(); err != nil { 868 return 0, err 869 } 870 if m.typ.hasPerCPUValue() { 871 return 0, ErrNotSupported 872 } 873 keysValue := reflect.ValueOf(keysOut) 874 if keysValue.Kind() != reflect.Slice { 875 return 0, fmt.Errorf("keys must be a slice") 876 } 877 valuesValue := reflect.ValueOf(valuesOut) 878 if valuesValue.Kind() != reflect.Slice { 879 return 0, fmt.Errorf("valuesOut must be a slice") 880 } 881 count := keysValue.Len() 882 if count != valuesValue.Len() { 883 return 0, fmt.Errorf("keysOut and valuesOut must be the same length") 884 } 885 keyBuf := make([]byte, count*int(m.keySize)) 886 keyPtr := sys.NewSlicePointer(keyBuf) 887 valueBuf := make([]byte, count*int(m.fullValueSize)) 888 valuePtr := sys.NewSlicePointer(valueBuf) 889 nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize)) 890 891 attr := sys.MapLookupBatchAttr{ 892 MapFd: m.fd.Uint(), 893 Keys: keyPtr, 894 Values: valuePtr, 895 Count: uint32(count), 896 OutBatch: nextPtr, 897 } 898 899 if opts != nil { 900 attr.ElemFlags = opts.ElemFlags 901 attr.Flags = opts.Flags 902 } 903 904 var err error 905 if startKey != nil { 906 attr.InBatch, err = marshalPtr(startKey, int(m.keySize)) 907 if err != nil { 908 return 0, err 909 } 910 } 911 912 _, sysErr := sys.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) 913 sysErr = wrapMapError(sysErr) 914 if sysErr != nil && !errors.Is(sysErr, unix.ENOENT) { 915 return 0, sysErr 916 } 917 918 err = m.unmarshalKey(nextKeyOut, nextBuf) 919 if err != nil { 920 return 0, err 921 } 922 err = unmarshalBytes(keysOut, keyBuf) 923 if err != nil { 924 return 0, err 925 } 926 err = unmarshalBytes(valuesOut, valueBuf) 927 if err != nil { 928 return 0, err 929 } 930 931 return int(attr.Count), sysErr 932 } 933 934 // BatchUpdate updates the map with multiple keys and values 935 // simultaneously. 936 // "keys" and "values" must be of type slice, a pointer 937 // to a slice or buffer will not work. 938 func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) { 939 if err := haveBatchAPI(); err != nil { 940 return 0, err 941 } 942 if m.typ.hasPerCPUValue() { 943 return 0, ErrNotSupported 944 } 945 keysValue := reflect.ValueOf(keys) 946 if keysValue.Kind() != reflect.Slice { 947 return 0, fmt.Errorf("keys must be a slice") 948 } 949 valuesValue := reflect.ValueOf(values) 950 if valuesValue.Kind() != reflect.Slice { 951 return 0, fmt.Errorf("values must be a slice") 952 } 953 var ( 954 count = keysValue.Len() 955 valuePtr sys.Pointer 956 err error 957 ) 958 if count != valuesValue.Len() { 959 return 0, fmt.Errorf("keys and values must be the same length") 960 } 961 keyPtr, err := marshalPtr(keys, count*int(m.keySize)) 962 if err != nil { 963 return 0, err 964 } 965 valuePtr, err = marshalPtr(values, count*int(m.valueSize)) 966 if err != nil { 967 return 0, err 968 } 969 970 attr := sys.MapUpdateBatchAttr{ 971 MapFd: m.fd.Uint(), 972 Keys: keyPtr, 973 Values: valuePtr, 974 Count: uint32(count), 975 } 976 if opts != nil { 977 attr.ElemFlags = opts.ElemFlags 978 attr.Flags = opts.Flags 979 } 980 981 err = sys.MapUpdateBatch(&attr) 982 if err != nil { 983 return int(attr.Count), fmt.Errorf("batch update: %w", wrapMapError(err)) 984 } 985 986 return int(attr.Count), nil 987 } 988 989 // BatchDelete batch deletes entries in the map by keys. 990 // "keys" must be of type slice, a pointer to a slice or buffer will not work. 991 func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { 992 if err := haveBatchAPI(); err != nil { 993 return 0, err 994 } 995 if m.typ.hasPerCPUValue() { 996 return 0, ErrNotSupported 997 } 998 keysValue := reflect.ValueOf(keys) 999 if keysValue.Kind() != reflect.Slice { 1000 return 0, fmt.Errorf("keys must be a slice") 1001 } 1002 count := keysValue.Len() 1003 keyPtr, err := marshalPtr(keys, count*int(m.keySize)) 1004 if err != nil { 1005 return 0, fmt.Errorf("cannot marshal keys: %v", err) 1006 } 1007 1008 attr := sys.MapDeleteBatchAttr{ 1009 MapFd: m.fd.Uint(), 1010 Keys: keyPtr, 1011 Count: uint32(count), 1012 } 1013 1014 if opts != nil { 1015 attr.ElemFlags = opts.ElemFlags 1016 attr.Flags = opts.Flags 1017 } 1018 1019 if err = sys.MapDeleteBatch(&attr); err != nil { 1020 return int(attr.Count), fmt.Errorf("batch delete: %w", wrapMapError(err)) 1021 } 1022 1023 return int(attr.Count), nil 1024 } 1025 1026 // Iterate traverses a map. 1027 // 1028 // It's safe to create multiple iterators at the same time. 1029 // 1030 // It's not possible to guarantee that all keys in a map will be 1031 // returned if there are concurrent modifications to the map. 1032 func (m *Map) Iterate() *MapIterator { 1033 return newMapIterator(m) 1034 } 1035 1036 // Close the Map's underlying file descriptor, which could unload the 1037 // Map from the kernel if it is not pinned or in use by a loaded Program. 1038 func (m *Map) Close() error { 1039 if m == nil { 1040 // This makes it easier to clean up when iterating maps 1041 // of maps / programs. 1042 return nil 1043 } 1044 1045 return m.fd.Close() 1046 } 1047 1048 // FD gets the file descriptor of the Map. 1049 // 1050 // Calling this function is invalid after Close has been called. 1051 func (m *Map) FD() int { 1052 return m.fd.Int() 1053 } 1054 1055 // Clone creates a duplicate of the Map. 1056 // 1057 // Closing the duplicate does not affect the original, and vice versa. 1058 // Changes made to the map are reflected by both instances however. 1059 // If the original map was pinned, the cloned map will not be pinned by default. 1060 // 1061 // Cloning a nil Map returns nil. 1062 func (m *Map) Clone() (*Map, error) { 1063 if m == nil { 1064 return nil, nil 1065 } 1066 1067 dup, err := m.fd.Dup() 1068 if err != nil { 1069 return nil, fmt.Errorf("can't clone map: %w", err) 1070 } 1071 1072 return &Map{ 1073 m.name, 1074 dup, 1075 m.typ, 1076 m.keySize, 1077 m.valueSize, 1078 m.maxEntries, 1079 m.flags, 1080 "", 1081 m.fullValueSize, 1082 }, nil 1083 } 1084 1085 // Pin persists the map on the BPF virtual file system past the lifetime of 1086 // the process that created it . 1087 // 1088 // Calling Pin on a previously pinned map will overwrite the path, except when 1089 // the new path already exists. Re-pinning across filesystems is not supported. 1090 // You can Clone a map to pin it to a different path. 1091 // 1092 // This requires bpffs to be mounted above fileName. 1093 // See https://docs.cilium.io/en/stable/concepts/kubernetes/configuration/#mounting-bpffs-with-systemd 1094 func (m *Map) Pin(fileName string) error { 1095 if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil { 1096 return err 1097 } 1098 m.pinnedPath = fileName 1099 return nil 1100 } 1101 1102 // Unpin removes the persisted state for the map from the BPF virtual filesystem. 1103 // 1104 // Failed calls to Unpin will not alter the state returned by IsPinned. 1105 // 1106 // Unpinning an unpinned Map returns nil. 1107 func (m *Map) Unpin() error { 1108 if err := internal.Unpin(m.pinnedPath); err != nil { 1109 return err 1110 } 1111 m.pinnedPath = "" 1112 return nil 1113 } 1114 1115 // IsPinned returns true if the map has a non-empty pinned path. 1116 func (m *Map) IsPinned() bool { 1117 return m.pinnedPath != "" 1118 } 1119 1120 // Freeze prevents a map to be modified from user space. 1121 // 1122 // It makes no changes to kernel-side restrictions. 1123 func (m *Map) Freeze() error { 1124 if err := haveMapMutabilityModifiers(); err != nil { 1125 return fmt.Errorf("can't freeze map: %w", err) 1126 } 1127 1128 attr := sys.MapFreezeAttr{ 1129 MapFd: m.fd.Uint(), 1130 } 1131 1132 if err := sys.MapFreeze(&attr); err != nil { 1133 return fmt.Errorf("can't freeze map: %w", err) 1134 } 1135 return nil 1136 } 1137 1138 // finalize populates the Map according to the Contents specified 1139 // in spec and freezes the Map if requested by spec. 1140 func (m *Map) finalize(spec *MapSpec) error { 1141 for _, kv := range spec.Contents { 1142 if err := m.Put(kv.Key, kv.Value); err != nil { 1143 return fmt.Errorf("putting value: key %v: %w", kv.Key, err) 1144 } 1145 } 1146 1147 if spec.Freeze { 1148 if err := m.Freeze(); err != nil { 1149 return fmt.Errorf("freezing map: %w", err) 1150 } 1151 } 1152 1153 return nil 1154 } 1155 1156 func (m *Map) marshalKey(data interface{}) (sys.Pointer, error) { 1157 if data == nil { 1158 if m.keySize == 0 { 1159 // Queues have a key length of zero, so passing nil here is valid. 1160 return sys.NewPointer(nil), nil 1161 } 1162 return sys.Pointer{}, errors.New("can't use nil as key of map") 1163 } 1164 1165 return marshalPtr(data, int(m.keySize)) 1166 } 1167 1168 func (m *Map) unmarshalKey(data interface{}, buf []byte) error { 1169 if buf == nil { 1170 // This is from a makeBuffer call, nothing do do here. 1171 return nil 1172 } 1173 1174 return unmarshalBytes(data, buf) 1175 } 1176 1177 func (m *Map) marshalValue(data interface{}) (sys.Pointer, error) { 1178 if m.typ.hasPerCPUValue() { 1179 return marshalPerCPUValue(data, int(m.valueSize)) 1180 } 1181 1182 var ( 1183 buf []byte 1184 err error 1185 ) 1186 1187 switch value := data.(type) { 1188 case *Map: 1189 if !m.typ.canStoreMap() { 1190 return sys.Pointer{}, fmt.Errorf("can't store map in %s", m.typ) 1191 } 1192 buf, err = marshalMap(value, int(m.valueSize)) 1193 1194 case *Program: 1195 if !m.typ.canStoreProgram() { 1196 return sys.Pointer{}, fmt.Errorf("can't store program in %s", m.typ) 1197 } 1198 buf, err = marshalProgram(value, int(m.valueSize)) 1199 1200 default: 1201 return marshalPtr(data, int(m.valueSize)) 1202 } 1203 1204 if err != nil { 1205 return sys.Pointer{}, err 1206 } 1207 1208 return sys.NewSlicePointer(buf), nil 1209 } 1210 1211 func (m *Map) unmarshalValue(value interface{}, buf []byte) error { 1212 if buf == nil { 1213 // This is from a makeBuffer call, nothing do do here. 1214 return nil 1215 } 1216 1217 if m.typ.hasPerCPUValue() { 1218 return unmarshalPerCPUValue(value, int(m.valueSize), buf) 1219 } 1220 1221 switch value := value.(type) { 1222 case **Map: 1223 if !m.typ.canStoreMap() { 1224 return fmt.Errorf("can't read a map from %s", m.typ) 1225 } 1226 1227 other, err := unmarshalMap(buf) 1228 if err != nil { 1229 return err 1230 } 1231 1232 // The caller might close the map externally, so ignore errors. 1233 _ = (*value).Close() 1234 1235 *value = other 1236 return nil 1237 1238 case *Map: 1239 if !m.typ.canStoreMap() { 1240 return fmt.Errorf("can't read a map from %s", m.typ) 1241 } 1242 return errors.New("require pointer to *Map") 1243 1244 case **Program: 1245 if !m.typ.canStoreProgram() { 1246 return fmt.Errorf("can't read a program from %s", m.typ) 1247 } 1248 1249 other, err := unmarshalProgram(buf) 1250 if err != nil { 1251 return err 1252 } 1253 1254 // The caller might close the program externally, so ignore errors. 1255 _ = (*value).Close() 1256 1257 *value = other 1258 return nil 1259 1260 case *Program: 1261 if !m.typ.canStoreProgram() { 1262 return fmt.Errorf("can't read a program from %s", m.typ) 1263 } 1264 return errors.New("require pointer to *Program") 1265 } 1266 1267 return unmarshalBytes(value, buf) 1268 } 1269 1270 // LoadPinnedMap loads a Map from a BPF file. 1271 func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) { 1272 fd, err := sys.ObjGet(&sys.ObjGetAttr{ 1273 Pathname: sys.NewStringPointer(fileName), 1274 FileFlags: opts.Marshal(), 1275 }) 1276 if err != nil { 1277 return nil, err 1278 } 1279 1280 m, err := newMapFromFD(fd) 1281 if err == nil { 1282 m.pinnedPath = fileName 1283 } 1284 1285 return m, err 1286 } 1287 1288 // unmarshalMap creates a map from a map ID encoded in host endianness. 1289 func unmarshalMap(buf []byte) (*Map, error) { 1290 if len(buf) != 4 { 1291 return nil, errors.New("map id requires 4 byte value") 1292 } 1293 1294 id := internal.NativeEndian.Uint32(buf) 1295 return NewMapFromID(MapID(id)) 1296 } 1297 1298 // marshalMap marshals the fd of a map into a buffer in host endianness. 1299 func marshalMap(m *Map, length int) ([]byte, error) { 1300 if length != 4 { 1301 return nil, fmt.Errorf("can't marshal map to %d bytes", length) 1302 } 1303 1304 buf := make([]byte, 4) 1305 internal.NativeEndian.PutUint32(buf, m.fd.Uint()) 1306 return buf, nil 1307 } 1308 1309 // MapIterator iterates a Map. 1310 // 1311 // See Map.Iterate. 1312 type MapIterator struct { 1313 target *Map 1314 prevKey interface{} 1315 prevBytes []byte 1316 count, maxEntries uint32 1317 done bool 1318 err error 1319 } 1320 1321 func newMapIterator(target *Map) *MapIterator { 1322 return &MapIterator{ 1323 target: target, 1324 maxEntries: target.maxEntries, 1325 prevBytes: make([]byte, target.keySize), 1326 } 1327 } 1328 1329 // Next decodes the next key and value. 1330 // 1331 // Iterating a hash map from which keys are being deleted is not 1332 // safe. You may see the same key multiple times. Iteration may 1333 // also abort with an error, see IsIterationAborted. 1334 // 1335 // Returns false if there are no more entries. You must check 1336 // the result of Err afterwards. 1337 // 1338 // See Map.Get for further caveats around valueOut. 1339 func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { 1340 if mi.err != nil || mi.done { 1341 return false 1342 } 1343 1344 // For array-like maps NextKeyBytes returns nil only on after maxEntries 1345 // iterations. 1346 for mi.count <= mi.maxEntries { 1347 var nextBytes []byte 1348 nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey) 1349 if mi.err != nil { 1350 return false 1351 } 1352 1353 if nextBytes == nil { 1354 mi.done = true 1355 return false 1356 } 1357 1358 // The user can get access to nextBytes since unmarshalBytes 1359 // does not copy when unmarshaling into a []byte. 1360 // Make a copy to prevent accidental corruption of 1361 // iterator state. 1362 copy(mi.prevBytes, nextBytes) 1363 mi.prevKey = mi.prevBytes 1364 1365 mi.count++ 1366 mi.err = mi.target.Lookup(nextBytes, valueOut) 1367 if errors.Is(mi.err, ErrKeyNotExist) { 1368 // Even though the key should be valid, we couldn't look up 1369 // its value. If we're iterating a hash map this is probably 1370 // because a concurrent delete removed the value before we 1371 // could get it. This means that the next call to NextKeyBytes 1372 // is very likely to restart iteration. 1373 // If we're iterating one of the fd maps like 1374 // ProgramArray it means that a given slot doesn't have 1375 // a valid fd associated. It's OK to continue to the next slot. 1376 continue 1377 } 1378 if mi.err != nil { 1379 return false 1380 } 1381 1382 mi.err = mi.target.unmarshalKey(keyOut, nextBytes) 1383 return mi.err == nil 1384 } 1385 1386 mi.err = fmt.Errorf("%w", ErrIterationAborted) 1387 return false 1388 } 1389 1390 // Err returns any encountered error. 1391 // 1392 // The method must be called after Next returns nil. 1393 // 1394 // Returns ErrIterationAborted if it wasn't possible to do a full iteration. 1395 func (mi *MapIterator) Err() error { 1396 return mi.err 1397 } 1398 1399 // MapGetNextID returns the ID of the next eBPF map. 1400 // 1401 // Returns ErrNotExist, if there is no next eBPF map. 1402 func MapGetNextID(startID MapID) (MapID, error) { 1403 attr := &sys.MapGetNextIdAttr{Id: uint32(startID)} 1404 return MapID(attr.NextId), sys.MapGetNextId(attr) 1405 } 1406 1407 // NewMapFromID returns the map for a given id. 1408 // 1409 // Returns ErrNotExist, if there is no eBPF map with the given id. 1410 func NewMapFromID(id MapID) (*Map, error) { 1411 fd, err := sys.MapGetFdById(&sys.MapGetFdByIdAttr{ 1412 Id: uint32(id), 1413 }) 1414 if err != nil { 1415 return nil, err 1416 } 1417 1418 return newMapFromFD(fd) 1419 }