github.com/cilium/cilium@v1.16.2/pkg/bpf/map_linux.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 //go:build linux 5 6 package bpf 7 8 import ( 9 "context" 10 "errors" 11 "fmt" 12 "io/fs" 13 "os" 14 "path" 15 "reflect" 16 "strings" 17 18 "github.com/cilium/ebpf" 19 "github.com/sirupsen/logrus" 20 21 "github.com/cilium/cilium/api/v1/models" 22 "github.com/cilium/cilium/pkg/controller" 23 "github.com/cilium/cilium/pkg/lock" 24 "github.com/cilium/cilium/pkg/logging/logfields" 25 "github.com/cilium/cilium/pkg/metrics" 26 "github.com/cilium/cilium/pkg/option" 27 "github.com/cilium/cilium/pkg/spanstat" 28 "github.com/cilium/cilium/pkg/time" 29 ) 30 31 var ( 32 // ErrMaxLookup is returned when the maximum number of map element lookups has 33 // been reached. 34 ErrMaxLookup = errors.New("maximum number of lookups reached") 35 36 bpfMapSyncControllerGroup = controller.NewGroup("bpf-map-sync") 37 ) 38 39 type MapKey interface { 40 fmt.Stringer 41 42 // New must return a pointer to a new MapKey. 43 New() MapKey 44 } 45 46 type MapValue interface { 47 fmt.Stringer 48 49 // New must return a pointer to a new MapValue. 50 New() MapValue 51 } 52 53 type cacheEntry struct { 54 Key MapKey 55 Value MapValue 56 57 DesiredAction DesiredAction 58 LastError error 59 } 60 61 type Map struct { 62 m *ebpf.Map 63 // spec will be nil after the map has been created 64 spec *ebpf.MapSpec 65 66 key MapKey 67 value MapValue 68 69 name string 70 path string 71 lock lock.RWMutex 72 73 // cachedCommonName is the common portion of the name excluding any 74 // endpoint ID 75 cachedCommonName string 76 77 // enableSync is true when synchronization retries have been enabled. 78 enableSync bool 79 80 // withValueCache is true when map cache has been enabled 81 withValueCache bool 82 83 // cache as key/value entries when map cache is enabled or as key-only when 84 // pressure metric is enabled 85 cache map[string]*cacheEntry 86 87 // errorResolverLastScheduled is the timestamp when the error resolver 88 // was last scheduled 89 errorResolverLastScheduled time.Time 90 91 // outstandingErrors states whether there are outstanding errors, occurred while 92 // syncing an entry with the kernel, that need to be resolved. This variable exists 93 // to avoid iterating over the full cache to check if reconciliation is necessary, 94 // but it is possible that it gets out of sync if an error is automatically 95 // resolved while performing a subsequent Update/Delete operation on the same key. 96 outstandingErrors bool 97 98 // pressureGauge is a metric that tracks the pressure on this map 99 pressureGauge *metrics.GaugeWithThreshold 100 101 // is true when events buffer is enabled. 102 eventsBufferEnabled bool 103 104 // contains optional event buffer which stores last n bpf map events. 105 events *eventsBuffer 106 107 // group is the metric group name for this map, it classifies maps of the same 108 // type that share the same metric group. 109 group string 110 } 111 112 func (m *Map) Type() ebpf.MapType { 113 if m.m != nil { 114 return m.m.Type() 115 } 116 if m.spec != nil { 117 return m.spec.Type 118 } 119 return ebpf.UnspecifiedMap 120 } 121 122 func (m *Map) KeySize() uint32 { 123 if m.m != nil { 124 return m.m.KeySize() 125 } 126 if m.spec != nil { 127 return m.spec.KeySize 128 } 129 return 0 130 } 131 132 func (m *Map) ValueSize() uint32 { 133 if m.m != nil { 134 return m.m.ValueSize() 135 } 136 if m.spec != nil { 137 return m.spec.ValueSize 138 } 139 return 0 140 } 141 142 func (m *Map) MaxEntries() uint32 { 143 if m.m != nil { 144 return m.m.MaxEntries() 145 } 146 if m.spec != nil { 147 return m.spec.MaxEntries 148 } 149 return 0 150 } 151 152 func (m *Map) Flags() uint32 { 153 if m.m != nil { 154 return m.m.Flags() 155 } 156 if m.spec != nil { 157 return m.spec.Flags 158 } 159 return 0 160 } 161 162 func (m *Map) updateMetrics() { 163 if m.group == "" { 164 return 165 } 166 metrics.UpdateMapCapacity(m.group, m.MaxEntries()) 167 } 168 169 // NewMap creates a new Map instance - object representing a BPF map 170 func NewMap(name string, mapType ebpf.MapType, mapKey MapKey, mapValue MapValue, 171 maxEntries int, flags uint32) *Map { 172 173 keySize := reflect.TypeOf(mapKey).Elem().Size() 174 valueSize := reflect.TypeOf(mapValue).Elem().Size() 175 176 return &Map{ 177 spec: &ebpf.MapSpec{ 178 Type: mapType, 179 Name: path.Base(name), 180 KeySize: uint32(keySize), 181 ValueSize: uint32(valueSize), 182 MaxEntries: uint32(maxEntries), 183 Flags: flags, 184 }, 185 name: path.Base(name), 186 key: mapKey, 187 value: mapValue, 188 group: name, 189 } 190 } 191 192 // NewMap creates a new Map instance - object representing a BPF map 193 func NewMapWithInnerSpec(name string, mapType ebpf.MapType, mapKey MapKey, mapValue MapValue, 194 maxEntries int, flags uint32, innerSpec *ebpf.MapSpec) *Map { 195 196 keySize := reflect.TypeOf(mapKey).Elem().Size() 197 valueSize := reflect.TypeOf(mapValue).Elem().Size() 198 199 return &Map{ 200 spec: &ebpf.MapSpec{ 201 Type: mapType, 202 Name: path.Base(name), 203 KeySize: uint32(keySize), 204 ValueSize: uint32(valueSize), 205 MaxEntries: uint32(maxEntries), 206 Flags: flags, 207 InnerMap: innerSpec, 208 }, 209 name: path.Base(name), 210 key: mapKey, 211 value: mapValue, 212 } 213 } 214 215 func (m *Map) commonName() string { 216 if m.cachedCommonName != "" { 217 return m.cachedCommonName 218 } 219 220 m.cachedCommonName = extractCommonName(m.name) 221 return m.cachedCommonName 222 } 223 224 func (m *Map) NonPrefixedName() string { 225 return strings.TrimPrefix(m.name, metrics.Namespace+"_") 226 } 227 228 // scheduleErrorResolver schedules a periodic resolver controller that scans 229 // all BPF map caches for unresolved errors and attempts to resolve them. On 230 // error of resolution, the controller is-rescheduled in an expedited manner 231 // with an exponential back-off. 232 // 233 // m.lock must be held for writing 234 func (m *Map) scheduleErrorResolver() { 235 m.outstandingErrors = true 236 237 if time.Since(m.errorResolverLastScheduled) <= errorResolverSchedulerMinInterval { 238 return 239 } 240 241 m.errorResolverLastScheduled = time.Now() 242 243 go func() { 244 time.Sleep(errorResolverSchedulerDelay) 245 mapControllers.UpdateController(m.controllerName(), 246 controller.ControllerParams{ 247 Group: bpfMapSyncControllerGroup, 248 DoFunc: m.resolveErrors, 249 RunInterval: errorResolverSchedulerMinInterval, 250 }, 251 ) 252 }() 253 254 } 255 256 // WithCache enables use of a cache. This will store all entries inserted from 257 // user space in a local cache (map) and will indicate the status of each 258 // individual entry. 259 func (m *Map) WithCache() *Map { 260 if m.cache == nil { 261 m.cache = map[string]*cacheEntry{} 262 } 263 m.withValueCache = true 264 m.enableSync = true 265 return m 266 } 267 268 // WithEvents enables use of the event buffer, if the buffer is enabled. 269 // This stores all map events (i.e. add/update/delete) in a bounded event buffer. 270 // If eventTTL is not zero, than events that are older than the TTL 271 // will periodically be removed from the buffer. 272 // Enabling events will use aprox proportional to 100MB for every million capacity 273 // in maxSize. 274 // 275 // TODO: The IPCache map have many periodic update events added by a controller for entries such as the 0.0.0.0/0 range. 276 // These fill the event buffer with possibly unnecessary events. 277 // We should either provide an option to aggregate these events, ignore hem from the ipcache event buffer or store them in a separate buffer. 278 func (m *Map) WithEvents(c option.BPFEventBufferConfig) *Map { 279 if !c.Enabled { 280 return m 281 } 282 m.scopedLogger().WithFields(logrus.Fields{ 283 "size": c.MaxSize, 284 "ttl": c.TTL, 285 }).Debug("enabling events buffer") 286 m.eventsBufferEnabled = true 287 m.initEventsBuffer(c.MaxSize, c.TTL) 288 return m 289 } 290 291 func (m *Map) WithGroupName(group string) *Map { 292 m.group = group 293 return m 294 } 295 296 // WithPressureMetricThreshold enables the tracking of a metric that measures 297 // the pressure of this map. This metric is only reported if over the 298 // threshold. 299 func (m *Map) WithPressureMetricThreshold(threshold float64) *Map { 300 // When pressure metric is enabled, we keep track of map keys in cache 301 if m.cache == nil { 302 m.cache = map[string]*cacheEntry{} 303 } 304 305 m.pressureGauge = metrics.NewBPFMapPressureGauge(m.NonPrefixedName(), threshold) 306 307 return m 308 } 309 310 // WithPressureMetric enables tracking and reporting of this map pressure with 311 // threshold 0. 312 func (m *Map) WithPressureMetric() *Map { 313 return m.WithPressureMetricThreshold(0.0) 314 } 315 316 // UpdatePressureMetricWithSize updates map pressure metric using the given map size. 317 func (m *Map) UpdatePressureMetricWithSize(size int32) { 318 if m.pressureGauge == nil { 319 return 320 } 321 322 // Do a lazy check of MetricsConfig as it is not available at map static 323 // initialization. 324 if !metrics.BPFMapPressure { 325 if !m.withValueCache { 326 m.cache = nil 327 } 328 m.pressureGauge = nil 329 return 330 } 331 332 pvalue := float64(size) / float64(m.MaxEntries()) 333 m.pressureGauge.Set(pvalue) 334 } 335 336 func (m *Map) updatePressureMetric() { 337 // Skipping pressure metric gauge updates for LRU map as the cache size 338 // does not accurately represent the actual map sie. 339 if m.spec != nil && m.spec.Type == ebpf.LRUHash { 340 return 341 } 342 m.UpdatePressureMetricWithSize(int32(len(m.cache))) 343 } 344 345 func (m *Map) FD() int { 346 return m.m.FD() 347 } 348 349 // Name returns the basename of this map. 350 func (m *Map) Name() string { 351 return m.name 352 } 353 354 // Path returns the path to this map on the filesystem. 355 func (m *Map) Path() (string, error) { 356 if err := m.setPathIfUnset(); err != nil { 357 return "", err 358 } 359 360 return m.path, nil 361 } 362 363 // Unpin attempts to unpin (remove) the map from the filesystem. 364 func (m *Map) Unpin() error { 365 path, err := m.Path() 366 if err != nil { 367 return err 368 } 369 370 return os.RemoveAll(path) 371 } 372 373 // UnpinIfExists tries to unpin (remove) the map only if it exists. 374 func (m *Map) UnpinIfExists() error { 375 found, err := m.exist() 376 if err != nil { 377 return err 378 } 379 380 if !found { 381 return nil 382 } 383 384 return m.Unpin() 385 } 386 387 func (m *Map) controllerName() string { 388 return fmt.Sprintf("bpf-map-sync-%s", m.name) 389 } 390 391 // OpenMap opens the map at pinPath. 392 func OpenMap(pinPath string, key MapKey, value MapValue) (*Map, error) { 393 if !path.IsAbs(pinPath) { 394 return nil, fmt.Errorf("pinPath must be absolute: %s", pinPath) 395 } 396 397 em, err := ebpf.LoadPinnedMap(pinPath, nil) 398 if err != nil { 399 return nil, err 400 } 401 402 m := &Map{ 403 m: em, 404 name: path.Base(pinPath), 405 path: pinPath, 406 key: key, 407 value: value, 408 } 409 410 m.updateMetrics() 411 registerMap(pinPath, m) 412 413 return m, nil 414 } 415 416 func (m *Map) setPathIfUnset() error { 417 if m.path == "" { 418 if m.name == "" { 419 return fmt.Errorf("either path or name must be set") 420 } 421 422 m.path = MapPath(m.name) 423 } 424 425 return nil 426 } 427 428 // Recreate removes any pin at the Map's pin path, recreates and re-pins it. 429 func (m *Map) Recreate() error { 430 m.lock.Lock() 431 defer m.lock.Unlock() 432 433 if m.m != nil { 434 return fmt.Errorf("map already open: %s", m.name) 435 } 436 437 if err := m.setPathIfUnset(); err != nil { 438 return err 439 } 440 441 if err := os.Remove(m.path); err != nil && !errors.Is(err, fs.ErrNotExist) { 442 return fmt.Errorf("removing pinned map %s: %w", m.name, err) 443 } 444 445 m.scopedLogger().Infof("Removed map pin at %s, recreating and re-pinning map %s", m.path, m.name) 446 447 return m.openOrCreate(true) 448 } 449 450 // IsOpen returns true if the map has been opened. 451 func (m *Map) IsOpen() bool { 452 m.lock.Lock() 453 defer m.lock.Unlock() 454 return m.m != nil 455 } 456 457 // OpenOrCreate attempts to open the Map, or if it does not yet exist, create 458 // the Map. If the existing map's attributes such as map type, key/value size, 459 // capacity, etc. do not match the Map's attributes, then the map will be 460 // deleted and reopened without any attempt to retain its previous contents. 461 // If the map is marked as non-persistent, it will always be recreated. 462 // 463 // Returns whether the map was deleted and recreated, or an optional error. 464 func (m *Map) OpenOrCreate() error { 465 m.lock.Lock() 466 defer m.lock.Unlock() 467 468 return m.openOrCreate(true) 469 } 470 471 // CreateUnpinned creates the map without pinning it to the file system. 472 // 473 // TODO(tb): Remove this when all map creation takes MapSpec. 474 func (m *Map) CreateUnpinned() error { 475 m.lock.Lock() 476 defer m.lock.Unlock() 477 478 return m.openOrCreate(false) 479 } 480 481 // Create is similar to OpenOrCreate, but closes the map after creating or 482 // opening it. 483 func (m *Map) Create() error { 484 if err := m.OpenOrCreate(); err != nil { 485 return err 486 } 487 return m.Close() 488 } 489 490 func (m *Map) openOrCreate(pin bool) error { 491 if m.m != nil { 492 return nil 493 } 494 495 if m.spec == nil { 496 return fmt.Errorf("attempted to create map %s without MapSpec", m.name) 497 } 498 499 if err := m.setPathIfUnset(); err != nil { 500 return err 501 } 502 503 m.spec.Flags |= GetPreAllocateMapFlags(m.spec.Type) 504 505 if m.spec.InnerMap != nil { 506 m.spec.InnerMap.Flags |= GetPreAllocateMapFlags(m.spec.InnerMap.Type) 507 } 508 509 if pin { 510 m.spec.Pinning = ebpf.PinByName 511 } 512 513 em, err := OpenOrCreateMap(m.spec, path.Dir(m.path)) 514 if err != nil { 515 return err 516 } 517 518 m.updateMetrics() 519 registerMap(m.path, m) 520 521 // Consume the MapSpec. 522 m.spec = nil 523 524 // Retain the Map. 525 m.m = em 526 527 return nil 528 } 529 530 // Open opens the BPF map. All calls to Open() are serialized due to acquiring 531 // m.lock 532 func (m *Map) Open() error { 533 m.lock.Lock() 534 defer m.lock.Unlock() 535 536 return m.open() 537 } 538 539 // open opens the BPF map. It is identical to Open() but should be used when 540 // m.lock is already held. open() may only be used if m.lock is held for 541 // writing. 542 func (m *Map) open() error { 543 if m.m != nil { 544 return nil 545 } 546 547 if err := m.setPathIfUnset(); err != nil { 548 return err 549 } 550 551 em, err := ebpf.LoadPinnedMap(m.path, nil) 552 if err != nil { 553 return fmt.Errorf("loading pinned map %s: %w", m.path, err) 554 } 555 556 m.updateMetrics() 557 registerMap(m.path, m) 558 559 m.m = em 560 561 return nil 562 } 563 564 func (m *Map) Close() error { 565 m.lock.Lock() 566 defer m.lock.Unlock() 567 568 if m.enableSync { 569 mapControllers.RemoveController(m.controllerName()) 570 } 571 572 if m.m != nil { 573 m.m.Close() 574 m.m = nil 575 } 576 577 unregisterMap(m.path, m) 578 579 return nil 580 } 581 582 func (m *Map) NextKey(key, nextKeyOut interface{}) error { 583 var duration *spanstat.SpanStat 584 if metrics.BPFSyscallDuration.IsEnabled() { 585 duration = spanstat.Start() 586 } 587 588 err := m.m.NextKey(key, nextKeyOut) 589 590 if metrics.BPFSyscallDuration.IsEnabled() { 591 metrics.BPFSyscallDuration.WithLabelValues(metricOpGetNextKey, metrics.Error2Outcome(err)).Observe(duration.End(err == nil).Total().Seconds()) 592 } 593 594 return err 595 } 596 597 type DumpCallback func(key MapKey, value MapValue) 598 599 // DumpWithCallback iterates over the Map and calls the given DumpCallback for 600 // each map entry. With the current implementation, it is safe for callbacks to 601 // retain the values received, as they are guaranteed to be new instances. 602 // 603 // TODO(tb): This package currently doesn't support dumping per-cpu maps, as 604 // ReadValueSize is always set to the size of a single value. 605 func (m *Map) DumpWithCallback(cb DumpCallback) error { 606 if cb == nil { 607 return errors.New("empty callback") 608 } 609 610 if err := m.Open(); err != nil { 611 return err 612 } 613 614 m.lock.RLock() 615 defer m.lock.RUnlock() 616 617 // Don't need deep copies here, only fresh pointers. 618 mk := m.key.New() 619 mv := m.value.New() 620 621 i := m.m.Iterate() 622 for i.Next(mk, mv) { 623 cb(mk, mv) 624 625 mk = m.key.New() 626 mv = m.value.New() 627 } 628 629 return i.Err() 630 } 631 632 // DumpWithCallbackIfExists is similar to DumpWithCallback, but returns earlier 633 // if the given map does not exist. 634 func (m *Map) DumpWithCallbackIfExists(cb DumpCallback) error { 635 found, err := m.exist() 636 if err != nil { 637 return err 638 } 639 640 if found { 641 return m.DumpWithCallback(cb) 642 } 643 644 return nil 645 } 646 647 // DumpReliablyWithCallback is similar to DumpWithCallback, but performs 648 // additional tracking of the current and recently seen keys, so that if an 649 // element is removed from the underlying kernel map during the dump, the dump 650 // can continue from a recently seen key rather than restarting from scratch. 651 // In addition, it caps the maximum number of map entry iterations at 4 times 652 // the maximum map size. If this limit is reached, ErrMaxLookup is returned. 653 // 654 // The caller must provide a callback for handling each entry, and a stats 655 // object initialized via a call to NewDumpStats(). 656 func (m *Map) DumpReliablyWithCallback(cb DumpCallback, stats *DumpStats) error { 657 if cb == nil { 658 return errors.New("empty callback") 659 } 660 661 if stats == nil { 662 return errors.New("stats is nil") 663 } 664 665 var ( 666 prevKey = m.key.New() 667 currentKey = m.key.New() 668 nextKey = m.key.New() 669 value = m.value.New() 670 671 prevKeyValid = false 672 ) 673 674 stats.start() 675 defer stats.finish() 676 677 if err := m.Open(); err != nil { 678 return err 679 } 680 681 // Get the first map key. 682 if err := m.NextKey(nil, currentKey); err != nil { 683 stats.Lookup = 1 684 if errors.Is(err, ebpf.ErrKeyNotExist) { 685 // Empty map, nothing to iterate. 686 stats.Completed = true 687 return nil 688 } 689 } 690 691 // maxLookup is an upper bound limit to prevent backtracking forever 692 // when iterating over the map's elements (the map might be concurrently 693 // updated while being iterated) 694 maxLookup := stats.MaxEntries * 4 695 696 // This loop stops when all elements have been iterated (Map.NextKey() returns 697 // ErrKeyNotExist) OR, in order to avoid hanging if 698 // the map is continuously updated, when maxLookup has been reached 699 for stats.Lookup = 1; stats.Lookup <= maxLookup; stats.Lookup++ { 700 // currentKey was set by the first m.NextKey() above. We know it existed in 701 // the map, but it may have been deleted by a concurrent map operation. 702 // 703 // If currentKey is no longer in the map, nextKey may be the first key in 704 // the map again. Continue with nextKey only if we still find currentKey in 705 // the Lookup() after the call to m.NextKey(), this way we know nextKey is 706 // NOT the first key in the map and iteration hasn't reset. 707 nextKeyErr := m.NextKey(currentKey, nextKey) 708 709 if err := m.m.Lookup(currentKey, value); err != nil { 710 stats.LookupFailed++ 711 // Restarting from a invalid key starts the iteration again from the beginning. 712 // If we have a previously found key, try to restart from there instead 713 if prevKeyValid { 714 currentKey = prevKey 715 // Restart from a given previous key only once, otherwise if the prevKey is 716 // concurrently deleted we might loop forever trying to look it up. 717 prevKeyValid = false 718 stats.KeyFallback++ 719 } else { 720 // Depending on exactly when currentKey was deleted from the 721 // map, nextKey may be the actual key element after the deleted 722 // one, or the first element in the map. 723 currentKey = nextKey 724 // To avoid having nextKey and currentKey pointing at the same memory 725 // we allocate a new key for nextKey. Without this currentKey and nextKey 726 // would be the same pointer value and would get double iterated on the next 727 // iterations m.NextKey(...) call. 728 nextKey = m.key.New() 729 stats.Interrupted++ 730 } 731 continue 732 } 733 734 cb(currentKey, value) 735 736 if nextKeyErr != nil { 737 if errors.Is(nextKeyErr, ebpf.ErrKeyNotExist) { 738 stats.Completed = true 739 return nil // end of map, we're done iterating 740 } 741 return nextKeyErr 742 } 743 744 // Prepare keys to move to the next iteration. 745 prevKey = currentKey 746 currentKey = nextKey 747 nextKey = m.key.New() 748 prevKeyValid = true 749 } 750 751 return ErrMaxLookup 752 } 753 754 // Dump returns the map (type map[string][]string) which contains all 755 // data stored in BPF map. 756 func (m *Map) Dump(hash map[string][]string) error { 757 callback := func(key MapKey, value MapValue) { 758 // No need to deep copy since we are creating strings. 759 hash[key.String()] = append(hash[key.String()], value.String()) 760 } 761 762 if err := m.DumpWithCallback(callback); err != nil { 763 return err 764 } 765 766 return nil 767 } 768 769 // BatchLookup returns the count of elements in the map by dumping the map 770 // using batch lookup. 771 func (m *Map) BatchLookup(cursor *ebpf.MapBatchCursor, keysOut, valuesOut interface{}, opts *ebpf.BatchOptions) (int, error) { 772 return m.m.BatchLookup(cursor, keysOut, valuesOut, opts) 773 } 774 775 // DumpIfExists dumps the contents of the map into hash via Dump() if the map 776 // file exists 777 func (m *Map) DumpIfExists(hash map[string][]string) error { 778 found, err := m.exist() 779 if err != nil { 780 return err 781 } 782 783 if found { 784 return m.Dump(hash) 785 } 786 787 return nil 788 } 789 790 func (m *Map) Lookup(key MapKey) (MapValue, error) { 791 if err := m.Open(); err != nil { 792 return nil, err 793 } 794 795 m.lock.RLock() 796 defer m.lock.RUnlock() 797 798 var duration *spanstat.SpanStat 799 if metrics.BPFSyscallDuration.IsEnabled() { 800 duration = spanstat.Start() 801 } 802 803 value := m.value.New() 804 err := m.m.Lookup(key, value) 805 806 if metrics.BPFSyscallDuration.IsEnabled() { 807 metrics.BPFSyscallDuration.WithLabelValues(metricOpLookup, metrics.Error2Outcome(err)).Observe(duration.End(err == nil).Total().Seconds()) 808 } 809 810 if err != nil { 811 return nil, err 812 } 813 814 return value, nil 815 } 816 817 func (m *Map) Update(key MapKey, value MapValue) error { 818 var err error 819 820 m.lock.Lock() 821 defer m.lock.Unlock() 822 823 defer func() { 824 desiredAction := OK 825 if err != nil { 826 desiredAction = Insert 827 } 828 entry := &cacheEntry{ 829 Key: key, 830 Value: value, 831 DesiredAction: desiredAction, 832 LastError: err, 833 } 834 m.addToEventsLocked(MapUpdate, *entry) 835 836 if m.cache == nil { 837 return 838 } 839 840 if m.withValueCache { 841 if err != nil { 842 m.scheduleErrorResolver() 843 } 844 m.cache[key.String()] = &cacheEntry{ 845 Key: key, 846 Value: value, 847 DesiredAction: desiredAction, 848 LastError: err, 849 } 850 m.updatePressureMetric() 851 } else if err == nil { 852 m.cache[key.String()] = nil 853 m.updatePressureMetric() 854 } 855 }() 856 857 if err = m.open(); err != nil { 858 return err 859 } 860 861 err = m.m.Update(key, value, ebpf.UpdateAny) 862 863 if metrics.BPFMapOps.IsEnabled() { 864 metrics.BPFMapOps.WithLabelValues(m.commonName(), metricOpUpdate, metrics.Error2Outcome(err)).Inc() 865 } 866 867 if err != nil { 868 return fmt.Errorf("update map %s: %w", m.Name(), err) 869 } 870 871 return nil 872 } 873 874 // deleteMapEvent is run at every delete map event. 875 // If cache is enabled, it will update the cache to reflect the delete. 876 // As well, if event buffer is enabled, it adds a new event to the buffer. 877 func (m *Map) deleteMapEvent(key MapKey, err error) { 878 m.addToEventsLocked(MapDelete, cacheEntry{ 879 Key: key, 880 DesiredAction: Delete, 881 LastError: err, 882 }) 883 m.deleteCacheEntry(key, err) 884 } 885 886 func (m *Map) deleteAllMapEvent() { 887 m.addToEventsLocked(MapDeleteAll, cacheEntry{}) 888 } 889 890 // deleteCacheEntry evaluates the specified error, if nil the map key is 891 // removed from the cache to indicate successful deletion. If non-nil, the map 892 // key entry in the cache is updated to indicate deletion failure with the 893 // specified error. 894 // 895 // Caller must hold m.lock for writing 896 func (m *Map) deleteCacheEntry(key MapKey, err error) { 897 if m.cache == nil { 898 return 899 } 900 901 k := key.String() 902 if err == nil { 903 delete(m.cache, k) 904 } else if !m.withValueCache { 905 return 906 } else { 907 entry, ok := m.cache[k] 908 if !ok { 909 m.cache[k] = &cacheEntry{ 910 Key: key, 911 } 912 entry = m.cache[k] 913 } 914 915 entry.DesiredAction = Delete 916 entry.LastError = err 917 m.scheduleErrorResolver() 918 } 919 } 920 921 // delete deletes the map entry corresponding to the given key. If ignoreMissing 922 // is set to true and the entry was not found, the error metric is not 923 // incremented for missing entries and nil error is returned. 924 func (m *Map) delete(key MapKey, ignoreMissing bool) (_ bool, err error) { 925 defer func() { 926 m.deleteMapEvent(key, err) 927 if err != nil { 928 m.updatePressureMetric() 929 } 930 }() 931 932 if err = m.open(); err != nil { 933 return false, err 934 } 935 936 var duration *spanstat.SpanStat 937 if metrics.BPFSyscallDuration.IsEnabled() { 938 duration = spanstat.Start() 939 } 940 941 err = m.m.Delete(key) 942 943 if metrics.BPFSyscallDuration.IsEnabled() { 944 metrics.BPFSyscallDuration.WithLabelValues(metricOpDelete, metrics.Error2Outcome(err)).Observe(duration.End(err == nil).Total().Seconds()) 945 } 946 947 if errors.Is(err, ebpf.ErrKeyNotExist) && ignoreMissing { 948 // Error and metrics handling is skipped in case ignoreMissing is set and 949 // the map key did not exist. This removes false positives in the delete 950 // metrics and skips the deferred cleanup of nonexistent entries. This 951 // situation occurs at least in the context of cleanup of NAT mappings from 952 // CT GC. 953 return false, nil 954 } 955 956 if metrics.BPFMapOps.IsEnabled() { 957 // err can be nil or any error other than ebpf.ErrKeyNotExist. 958 metrics.BPFMapOps.WithLabelValues(m.commonName(), metricOpDelete, metrics.Error2Outcome(err)).Inc() 959 } 960 961 if err != nil { 962 return false, fmt.Errorf("unable to delete element %s from map %s: %w", key, m.name, err) 963 } 964 965 return true, nil 966 } 967 968 // SilentDelete deletes the map entry corresponding to the given key. 969 // If a map entry is not found this returns (false, nil). 970 func (m *Map) SilentDelete(key MapKey) (deleted bool, err error) { 971 m.lock.Lock() 972 defer m.lock.Unlock() 973 974 return m.delete(key, true) 975 } 976 977 // Delete deletes the map entry corresponding to the given key. 978 func (m *Map) Delete(key MapKey) error { 979 m.lock.Lock() 980 defer m.lock.Unlock() 981 982 _, err := m.delete(key, false) 983 return err 984 } 985 986 // scopedLogger returns a logger scoped for the map. m.lock must be held. 987 func (m *Map) scopedLogger() *logrus.Entry { 988 return log.WithFields(logrus.Fields{logfields.Path: m.path, "name": m.name}) 989 } 990 991 // DeleteAll deletes all entries of a map by traversing the map and deleting individual 992 // entries. Note that if entries are added while the taversal is in progress, 993 // such entries may survive the deletion process. 994 func (m *Map) DeleteAll() error { 995 m.lock.Lock() 996 defer m.lock.Unlock() 997 defer m.updatePressureMetric() 998 scopedLog := m.scopedLogger() 999 scopedLog.Debug("deleting all entries in map") 1000 1001 if m.withValueCache { 1002 // Mark all entries for deletion, upon successful deletion, 1003 // entries will be removed or the LastError will be updated 1004 for _, entry := range m.cache { 1005 entry.DesiredAction = Delete 1006 entry.LastError = fmt.Errorf("deletion pending") 1007 } 1008 } 1009 1010 if err := m.open(); err != nil { 1011 return err 1012 } 1013 1014 mk := m.key.New() 1015 mv := make([]byte, m.ValueSize()) 1016 1017 defer m.deleteAllMapEvent() 1018 1019 i := m.m.Iterate() 1020 for i.Next(mk, &mv) { 1021 err := m.m.Delete(mk) 1022 1023 m.deleteCacheEntry(mk, err) 1024 1025 if err != nil { 1026 return err 1027 } 1028 } 1029 1030 err := i.Err() 1031 if err != nil { 1032 scopedLog.WithError(err).Warningf("Unable to correlate iteration key %v with cache entry. Inconsistent cache.", mk) 1033 } 1034 1035 return err 1036 } 1037 1038 // GetModel returns a BPF map in the representation served via the API 1039 func (m *Map) GetModel() *models.BPFMap { 1040 1041 mapModel := &models.BPFMap{ 1042 Path: m.path, 1043 } 1044 1045 mapModel.Cache = make([]*models.BPFMapEntry, 0, len(m.cache)) 1046 if m.withValueCache { 1047 m.lock.RLock() 1048 defer m.lock.RUnlock() 1049 for k, entry := range m.cache { 1050 model := &models.BPFMapEntry{ 1051 Key: k, 1052 DesiredAction: entry.DesiredAction.String(), 1053 } 1054 1055 if entry.LastError != nil { 1056 model.LastError = entry.LastError.Error() 1057 } 1058 1059 if entry.Value != nil { 1060 model.Value = entry.Value.String() 1061 } 1062 mapModel.Cache = append(mapModel.Cache, model) 1063 } 1064 return mapModel 1065 } 1066 1067 stats := NewDumpStats(m) 1068 filterCallback := func(key MapKey, value MapValue) { 1069 mapModel.Cache = append(mapModel.Cache, &models.BPFMapEntry{ 1070 Key: key.String(), 1071 Value: value.String(), 1072 }) 1073 } 1074 1075 m.DumpReliablyWithCallback(filterCallback, stats) 1076 return mapModel 1077 } 1078 1079 func (m *Map) addToEventsLocked(action Action, entry cacheEntry) { 1080 if !m.eventsBufferEnabled { 1081 return 1082 } 1083 m.events.add(&Event{ 1084 action: action, 1085 Timestamp: time.Now(), 1086 cacheEntry: entry, 1087 }) 1088 } 1089 1090 // resolveErrors is schedule by scheduleErrorResolver() and runs periodically. 1091 // It resolves up to maxSyncErrors discrepancies between cache and BPF map in 1092 // the kernel. 1093 func (m *Map) resolveErrors(ctx context.Context) error { 1094 started := time.Now() 1095 1096 m.lock.Lock() 1097 defer m.lock.Unlock() 1098 1099 if m.cache == nil { 1100 return nil 1101 } 1102 1103 if !m.outstandingErrors { 1104 return nil 1105 } 1106 1107 outstanding := 0 1108 for _, e := range m.cache { 1109 switch e.DesiredAction { 1110 case Insert, Delete: 1111 outstanding++ 1112 } 1113 } 1114 1115 // Errors appear to have already been resolved. This can happen if a subsequent 1116 // Update/Delete operation acting on the same key succeeded. 1117 if outstanding == 0 { 1118 m.outstandingErrors = false 1119 return nil 1120 } 1121 1122 if err := m.open(); err != nil { 1123 return err 1124 } 1125 1126 scopedLogger := m.scopedLogger() 1127 scopedLogger.WithField("remaining", outstanding). 1128 Debug("Starting periodic BPF map error resolver") 1129 1130 resolved := 0 1131 scanned := 0 1132 nerr := 0 1133 for k, e := range m.cache { 1134 scanned++ 1135 1136 switch e.DesiredAction { 1137 case OK: 1138 case Insert: 1139 // Call into ebpf-go's Map.Update() directly, don't go through the cache. 1140 err := m.m.Update(e.Key, e.Value, ebpf.UpdateAny) 1141 if metrics.BPFMapOps.IsEnabled() { 1142 metrics.BPFMapOps.WithLabelValues(m.commonName(), metricOpUpdate, metrics.Error2Outcome(err)).Inc() 1143 } 1144 if err == nil { 1145 e.DesiredAction = OK 1146 e.LastError = nil 1147 resolved++ 1148 outstanding-- 1149 } else { 1150 e.LastError = err 1151 nerr++ 1152 } 1153 m.cache[k] = e 1154 m.addToEventsLocked(MapUpdate, *e) 1155 case Delete: 1156 // Holding lock, issue direct delete on map. 1157 err := m.m.Delete(e.Key) 1158 if metrics.BPFMapOps.IsEnabled() { 1159 metrics.BPFMapOps.WithLabelValues(m.commonName(), metricOpDelete, metrics.Error2Outcome(err)).Inc() 1160 } 1161 if err == nil || errors.Is(err, ebpf.ErrKeyNotExist) { 1162 delete(m.cache, k) 1163 resolved++ 1164 outstanding-- 1165 } else { 1166 e.LastError = err 1167 nerr++ 1168 m.cache[k] = e 1169 } 1170 1171 m.addToEventsLocked(MapDelete, *e) 1172 } 1173 1174 // bail out if maximum errors are reached to relax the map lock 1175 if nerr > maxSyncErrors { 1176 break 1177 } 1178 } 1179 1180 m.updatePressureMetric() 1181 1182 scopedLogger.WithFields(logrus.Fields{ 1183 "remaining": outstanding, 1184 "resolved": resolved, 1185 "scanned": scanned, 1186 "duration": time.Since(started), 1187 }).Debug("BPF map error resolver completed") 1188 1189 m.outstandingErrors = outstanding > 0 1190 if m.outstandingErrors { 1191 return fmt.Errorf("%d map sync errors", outstanding) 1192 } 1193 1194 return nil 1195 } 1196 1197 // CheckAndUpgrade checks the received map's properties (for the map currently 1198 // loaded into the kernel) against the desired properties, and if they do not 1199 // match, deletes the map. 1200 // 1201 // Returns true if the map was upgraded. 1202 func (m *Map) CheckAndUpgrade(desired *Map) bool { 1203 flags := desired.Flags() | GetPreAllocateMapFlags(desired.Type()) 1204 1205 return objCheck( 1206 m.m, 1207 m.path, 1208 desired.Type(), 1209 desired.KeySize(), 1210 desired.ValueSize(), 1211 desired.MaxEntries(), 1212 flags, 1213 ) 1214 } 1215 1216 func (m *Map) exist() (bool, error) { 1217 path, err := m.Path() 1218 if err != nil { 1219 return false, err 1220 } 1221 1222 if _, err := os.Stat(path); err == nil { 1223 return true, nil 1224 } 1225 1226 return false, nil 1227 }