github.com/cilium/cilium@v1.16.2/pkg/bpf/collection.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package bpf 5 6 import ( 7 "encoding/binary" 8 "errors" 9 "fmt" 10 "strings" 11 12 "github.com/cilium/ebpf" 13 "github.com/cilium/ebpf/asm" 14 "github.com/cilium/ebpf/btf" 15 16 "github.com/cilium/cilium/pkg/maps/callsmap" 17 ) 18 19 const globalDataMap = ".rodata.config" 20 21 // LoadCollectionSpec loads the eBPF ELF at the given path and parses it into 22 // a CollectionSpec. This spec is only a blueprint of the contents of the ELF 23 // and does not represent any live resources that have been loaded into the 24 // kernel. 25 // 26 // This is a wrapper around ebpf.LoadCollectionSpec that parses legacy iproute2 27 // bpf_elf_map definitions (only used for prog_arrays at the time of writing) 28 // and assigns tail calls annotated with `__section_tail` macros to their 29 // intended maps and slots. 30 func LoadCollectionSpec(path string) (*ebpf.CollectionSpec, error) { 31 spec, err := ebpf.LoadCollectionSpec(path) 32 if err != nil { 33 return nil, err 34 } 35 36 if err := removeUnreachableTailcalls(spec); err != nil { 37 return nil, err 38 } 39 40 if err := iproute2Compat(spec); err != nil { 41 return nil, err 42 } 43 44 if err := classifyProgramTypes(spec); err != nil { 45 return nil, err 46 } 47 48 return spec, nil 49 } 50 51 func removeUnreachableTailcalls(spec *ebpf.CollectionSpec) error { 52 type TailCall struct { 53 referenced bool 54 visited bool 55 spec *ebpf.ProgramSpec 56 } 57 58 entrypoints := make([]*ebpf.ProgramSpec, 0) 59 tailcalls := make(map[uint32]*TailCall) 60 61 const ( 62 // Corresponds to CILIUM_MAP_CALLS. 63 cilium_calls_map = 2 64 ) 65 66 for _, prog := range spec.Programs { 67 var id, slot uint32 68 // Consider any program that doesn't follow the tailcall naming convention 69 // x/y to be an entrypoint. 70 // Any program that does follow the x/y naming convention but not part 71 // of the cilium_calls map is also considered an entrypoint. 72 if _, err := fmt.Sscanf(prog.SectionName, "%d/%v", &id, &slot); err != nil || id != cilium_calls_map { 73 entrypoints = append(entrypoints, prog) 74 continue 75 } 76 77 if tailcalls[slot] != nil { 78 return fmt.Errorf("duplicate tail call index %d", slot) 79 } 80 81 tailcalls[slot] = &TailCall{ 82 spec: prog, 83 } 84 } 85 86 // Discover all tailcalls that are reachable from the given program. 87 visit := func(prog *ebpf.ProgramSpec, tailcalls map[uint32]*TailCall) error { 88 // We look back from any tailcall, so we expect there to always be 3 instructions ahead of any tail call instr. 89 for i := 3; i < len(prog.Instructions); i++ { 90 // The `tail_call_static` C function is always used to call tail calls when 91 // the map index is known at compile time. 92 // Due to inline ASM this generates the following instructions: 93 // Mov R1, Rx 94 // Mov R2, <map> 95 // Mov R3, <index> 96 // call tail_call 97 98 // Find the tail call instruction. 99 inst := prog.Instructions[i] 100 if !inst.IsBuiltinCall() || inst.Constant != int64(asm.FnTailCall) { 101 continue 102 } 103 104 // Check that the previous instruction is a mov of the tail call index. 105 movIdx := prog.Instructions[i-1] 106 if movIdx.OpCode.ALUOp() != asm.Mov || movIdx.Dst != asm.R3 { 107 continue 108 } 109 110 // Check that the instruction before that is the load of the tail call map. 111 movR2 := prog.Instructions[i-2] 112 if movR2.OpCode != asm.LoadImmOp(asm.DWord) || movR2.Src != asm.PseudoMapFD { 113 continue 114 } 115 116 ref := movR2.Reference() 117 118 // Ignore static tail calls made to maps that are not the calls map 119 if !strings.Contains(ref, callsmap.MapName) || strings.Contains(ref, callsmap.CustomCallsMapName) { 120 log.Debugf("program '%s'/'%s', found tail call at %d, reference '%s', not a calls map, skipping", 121 prog.SectionName, prog.Name, i, ref) 122 continue 123 } 124 125 tc := tailcalls[uint32(movIdx.Constant)] 126 if tc == nil { 127 return fmt.Errorf( 128 "program '%s'/'%s' executes tail call to unknown index '%d' at %d, potential missed tailcall", 129 prog.SectionName, 130 prog.Name, 131 movIdx.Constant, 132 i, 133 ) 134 } 135 136 tc.referenced = true 137 } 138 139 return nil 140 } 141 142 // Discover all tailcalls that are reachable from the entrypoints. 143 for _, prog := range entrypoints { 144 if err := visit(prog, tailcalls); err != nil { 145 return err 146 } 147 } 148 149 // Keep visiting tailcalls until no more are discovered. 150 reset: 151 for _, tailcall := range tailcalls { 152 // If a tailcall is referenced by an entrypoint or another tailcall we should visit it 153 if tailcall.referenced && !tailcall.visited { 154 if err := visit(tailcall.spec, tailcalls); err != nil { 155 return err 156 } 157 tailcall.visited = true 158 159 // Visiting this tail call might have caused tail calls earlier in the list to become referenced, but this 160 // loop already skipped them. So reset the loop. If we already visited a tailcall we will ignore them anyway. 161 goto reset 162 } 163 } 164 165 // Remove all tailcalls that are not referenced. 166 for _, tailcall := range tailcalls { 167 if !tailcall.referenced { 168 log.Debugf("section '%s' / prog '%s', unreferenced, deleting", tailcall.spec.SectionName, tailcall.spec.Name) 169 delete(spec.Programs, tailcall.spec.Name) 170 } 171 } 172 173 return nil 174 } 175 176 // iproute2Compat parses the Extra field of each MapSpec in the CollectionSpec. 177 // This extra portion is present in legacy bpf_elf_map definitions and must be 178 // handled before the map can be loaded into the kernel. 179 // 180 // It parses the ELF section name of each ProgramSpec to extract any map/slot 181 // mappings for prog arrays used as tail call maps. The spec's programs are then 182 // inserted into the appropriate map and slot. 183 // 184 // TODO(timo): Remove when bpf_elf_map map definitions are no longer used after 185 // moving away from iproute2+libbpf. 186 func iproute2Compat(spec *ebpf.CollectionSpec) error { 187 // Parse legacy iproute2 u32 id and pinning fields. 188 maps := make(map[uint32]*ebpf.MapSpec) 189 for _, m := range spec.Maps { 190 if m.Extra != nil && m.Extra.Len() > 0 { 191 tail := struct { 192 ID uint32 193 Pinning uint32 194 _ uint64 // inner_id + inner_idx 195 }{} 196 if err := binary.Read(m.Extra, spec.ByteOrder, &tail); err != nil { 197 return fmt.Errorf("reading iproute2 map definition: %w", err) 198 } 199 200 m.Pinning = ebpf.PinType(tail.Pinning) 201 202 // Index maps by their iproute2 .id if any, so X/Y ELF section names can 203 // be matched against them. 204 if tail.ID != 0 { 205 if m2 := maps[tail.ID]; m2 != nil { 206 return fmt.Errorf("maps %s and %s have duplicate iproute2 map ID %d", m.Name, m2.Name, tail.ID) 207 } 208 maps[tail.ID] = m 209 } 210 } 211 } 212 213 for n, p := range spec.Programs { 214 // Parse the program's section name to determine which prog array and slot it 215 // needs to be inserted into. For example, a section name of '2/14' means to 216 // insert into the map with the .id field of 2 at index 14. 217 // Uses %v to automatically detect slot's mathematical base, since they can 218 // appear either in dec or hex, e.g. 1/0x0515. 219 var id, slot uint32 220 if _, err := fmt.Sscanf(p.SectionName, "%d/%v", &id, &slot); err == nil { 221 // Assign the prog name and slot to the map with the iproute2 .id obtained 222 // from the program's section name. The lib will load the ProgramSpecs 223 // and insert the corresponding Programs into the prog array at load time. 224 m := maps[id] 225 if m == nil { 226 return fmt.Errorf("no map with iproute2 map .id %d", id) 227 } 228 m.Contents = append(maps[id].Contents, ebpf.MapKV{Key: slot, Value: n}) 229 } 230 } 231 232 return nil 233 } 234 235 type CollectionOptions struct { 236 ebpf.CollectionOptions 237 238 // Replacements for constants defined using the DECLARE_CONFIG macros. 239 Constants map[string]uint64 240 } 241 242 // LoadCollection loads the given spec into the kernel with the specified opts. 243 // Returns a function that must be called after the Collection's entrypoints 244 // are attached to their respective kernel hooks. 245 // 246 // The value given in ProgramOptions.LogSize is used as the starting point for 247 // sizing the verifier's log buffer and defaults to 4MiB. On each retry, the 248 // log buffer quadruples in size, for a total of 5 attempts. If that proves 249 // insufficient, a truncated ebpf.VerifierError is returned. 250 // 251 // Any maps marked as pinned in the spec are automatically loaded from the path 252 // given in opts.Maps.PinPath and will be used instead of creating new ones. 253 // MapSpecs that differ (type/key/value/max/flags) from their pinned versions 254 // will result in an ebpf.ErrMapIncompatible here and the map must be removed 255 // before loading the CollectionSpec. 256 func LoadCollection(spec *ebpf.CollectionSpec, opts *CollectionOptions) (*ebpf.Collection, func() error, error) { 257 if spec == nil { 258 return nil, nil, errors.New("can't load nil CollectionSpec") 259 } 260 261 if opts == nil { 262 opts = &CollectionOptions{} 263 } 264 265 // Copy spec so the modifications below don't affect the input parameter, 266 // allowing the spec to be safely re-used by the caller. 267 spec = spec.Copy() 268 269 if err := inlineGlobalData(spec, opts.Constants); err != nil { 270 return nil, nil, fmt.Errorf("inlining global data: %w", err) 271 } 272 273 // Set initial size of verifier log buffer. 274 // 275 // Up until kernel 5.1, the maximum log size is (2^24)-1. In 5.2, this was 276 // increased to (2^30)-1 by 7a9f5c65abcc ("bpf: increase verifier log limit"). 277 // 278 // The default value of (2^22)-1 was chosen to be large enough to fit the log 279 // of most Cilium programs, while falling just within the 5.1 maximum size in 280 // one of the steps of the multiplication loop below. Without the -1, it would 281 // overshoot the cap to 2^24, making e.g. verifier tests unable to load the 282 // program if the previous size (2^22) was too small to fit the log. 283 if opts.Programs.LogSize == 0 { 284 opts.Programs.LogSize = 4_194_303 285 } 286 287 // Find and strip all CILIUM_PIN_REPLACE pinning flags before creating the 288 // Collection. ebpf-go will reject maps with pins it doesn't recognize. 289 toReplace := consumePinReplace(spec) 290 291 // Attempt to load the Collection. 292 coll, err := ebpf.NewCollectionWithOptions(spec, opts.CollectionOptions) 293 294 // Collect key names of maps that are not compatible with their pinned 295 // counterparts and remove their pinning flags. 296 if errors.Is(err, ebpf.ErrMapIncompatible) { 297 var incompatible []string 298 incompatible, err = incompatibleMaps(spec, opts.CollectionOptions) 299 if err != nil { 300 return nil, nil, fmt.Errorf("finding incompatible maps: %w", err) 301 } 302 toReplace = append(toReplace, incompatible...) 303 304 // Retry loading the Collection with necessary pinning flags removed. 305 coll, err = ebpf.NewCollectionWithOptions(spec, opts.CollectionOptions) 306 } 307 308 // Try to obtain the full verifier log if it was truncated. Note that 309 // VerifierError is also returned if verification was successful but the 310 // buffer was too small. 311 attempts := 5 312 for range attempts { 313 var ve *ebpf.VerifierError 314 if errors.As(err, &ve) && ve.Truncated { 315 // Retry with non-zero log level to avoid retrying with log disabled. 316 if opts.Programs.LogLevel == 0 { 317 opts.Programs.LogLevel = ebpf.LogLevelBranch 318 } 319 320 opts.Programs.LogSize *= 4 321 322 // Retry loading the Collection with increased log buffer. 323 coll, err = ebpf.NewCollectionWithOptions(spec, opts.CollectionOptions) 324 325 // Re-check error and bump attempts. 326 continue 327 } 328 329 if err != nil { 330 // Not a VerifierError or not truncated. 331 return nil, nil, err 332 } 333 } 334 if err != nil { 335 // Retry loop failed to resolve a VerifierError. 336 return nil, nil, fmt.Errorf("%d-byte truncated verifier log after %d attempts: %w", opts.CollectionOptions.Programs.LogSize, attempts, err) 337 } 338 339 // Load successful, return a function that must be invoked after attaching the 340 // Collection's entrypoint programs to their respective hooks. 341 commit := func() error { 342 // Commit maps that need their bpffs pins replaced. 343 if err := commitMapPins(toReplace, spec, coll, opts.CollectionOptions); err != nil { 344 return fmt.Errorf("replacing map pins on bpffs: %w", err) 345 } 346 return nil 347 } 348 return coll, commit, nil 349 } 350 351 // classifyProgramTypes sets the type of ProgramSpecs which the library cannot 352 // automatically classify due to them being in unrecognized ELF sections. Only 353 // programs of type UnspecifiedProgram are modified. 354 // 355 // Cilium uses the iproute2 X/Y section name convention for assigning programs 356 // to prog array slots, which is also not supported. 357 // 358 // TODO(timo): When iproute2 is no longer used for any loading, tail call progs 359 // can receive proper prefixes. 360 func classifyProgramTypes(spec *ebpf.CollectionSpec) error { 361 var t ebpf.ProgramType 362 for name, p := range spec.Programs { 363 // If the loader was able to classify a program, go with the verdict. 364 if p.Type != ebpf.UnspecifiedProgram { 365 t = p.Type 366 break 367 } 368 369 // Assign a program type based on the first recognized function name. 370 switch name { 371 // bpf_xdp.c 372 case "cil_xdp_entry": 373 t = ebpf.XDP 374 case 375 // bpf_lxc.c 376 "cil_from_container", "cil_to_container", 377 // bpf_host.c 378 "cil_from_netdev", "cil_from_host", "cil_to_netdev", "cil_to_host", 379 // bpf_network.c 380 "cil_from_network", 381 // bpf_overlay.c 382 "cil_to_overlay", "cil_from_overlay", 383 // bpf_wireguard.c 384 "cil_to_wireguard": 385 t = ebpf.SchedCLS 386 default: 387 continue 388 } 389 390 break 391 } 392 393 for _, p := range spec.Programs { 394 if p.Type == ebpf.UnspecifiedProgram { 395 p.Type = t 396 } 397 } 398 399 if t == ebpf.UnspecifiedProgram { 400 return errors.New("unable to classify program types") 401 } 402 403 return nil 404 } 405 406 // Must match the prefix used by the CONFIG macro in static_data.h. 407 const constantPrefix = "__config_" 408 409 // inlineGlobalData replaces all map loads from a global data section with 410 // immediate dword loads, effectively performing those map lookups in the 411 // loader. This is done for compatibility with kernels that don't support 412 // global data maps yet. 413 // 414 // overrides allow changing the value of the inlined global data. 415 // 416 // This code interacts with the DECLARE_CONFIG macro in the BPF C code base. 417 func inlineGlobalData(spec *ebpf.CollectionSpec, overrides map[string]uint64) error { 418 offsets, values, err := globalData(spec) 419 if err != nil { 420 return err 421 } 422 if offsets == nil { 423 // Most likely all references to global data have been compiled 424 // out. 425 return nil 426 } 427 428 for name, value := range overrides { 429 constName := constantPrefix + name 430 431 if _, ok := values[constName]; !ok { 432 return fmt.Errorf("can't override non-existent constant %q", name) 433 } 434 435 values[constName] = value 436 } 437 438 for _, prog := range spec.Programs { 439 for i, ins := range prog.Instructions { 440 if !ins.IsLoadFromMap() || ins.Src != asm.PseudoMapValue { 441 continue 442 } 443 444 if ins.Reference() != globalDataMap { 445 return fmt.Errorf("global constants must be in %s, but found reference to %s", globalDataMap, ins.Reference()) 446 } 447 448 // Get the offset of the read within the target map, 449 // stored in the 32 most-significant bits of Constant. 450 // Equivalent to Instruction.mapOffset(). 451 off := uint32(uint64(ins.Constant) >> 32) 452 453 // Look up the value of the variable stored at the Datasec offset pointed 454 // at by the instruction. 455 v, ok := offsets[off] 456 if !ok { 457 return fmt.Errorf("no global constant found in %s at offset %d", globalDataMap, off) 458 } 459 460 // Replace the map load with an immediate load. Must be a dword load 461 // to match the instruction width of a map load. 462 r := asm.LoadImm(ins.Dst, int64(values[v]), asm.DWord) 463 464 // Preserve metadata of the original instruction. Otherwise, a program's 465 // first instruction could be stripped of its func_info or Symbol 466 // (function start) annotations. 467 r.Metadata = ins.Metadata 468 469 prog.Instructions[i] = r 470 } 471 } 472 473 return nil 474 } 475 476 // globalData gets the contents of the first entry in the global data map 477 // and removes it from the spec to prevent it from being created in the kernel. 478 func globalData(spec *ebpf.CollectionSpec) (offsets map[uint32]string, values map[string]uint64, _ error) { 479 dm := spec.Maps[globalDataMap] 480 if dm == nil { 481 return nil, nil, nil 482 } 483 484 if dl := len(dm.Contents); dl != 1 { 485 return nil, nil, fmt.Errorf("expected one key in %s, found %d", globalDataMap, dl) 486 } 487 488 ds, ok := dm.Value.(*btf.Datasec) 489 if !ok { 490 return nil, nil, fmt.Errorf("no BTF datasec found for %s", globalDataMap) 491 } 492 493 data, ok := (dm.Contents[0].Value).([]byte) 494 if !ok { 495 return nil, nil, fmt.Errorf("expected %s value to be a byte slice, got: %T", 496 globalDataMap, dm.Contents[0].Value) 497 } 498 499 // Slice up the binary contents of the global data map according to the 500 // variables described in its Datasec. 501 values = make(map[string]uint64) 502 offsets = make(map[uint32]string) 503 buf := make([]byte, 8) 504 for _, vsi := range ds.Vars { 505 v, ok := vsi.Type.(*btf.Var) 506 if !ok { 507 // VarSecInfo.Type can be a Func. 508 continue 509 } 510 511 if _, ok := offsets[vsi.Offset]; ok { 512 return nil, nil, fmt.Errorf("duplicate VarSecInfo for offset %d", vsi.Offset) 513 } 514 515 copy(buf, data[vsi.Offset:vsi.Offset+vsi.Size]) 516 517 var value uint64 518 switch vsi.Size { 519 case 8: 520 value = spec.ByteOrder.Uint64(buf) 521 case 4: 522 value = uint64(spec.ByteOrder.Uint32(buf)) 523 case 2: 524 value = uint64(spec.ByteOrder.Uint16(buf)) 525 case 1: 526 value = uint64(buf[0]) 527 default: 528 return nil, nil, fmt.Errorf("invalid variable size %d", vsi.Size) 529 } 530 531 // Emit the variable's value by its offset in the datasec. 532 offsets[vsi.Offset] = v.Name 533 values[v.Name] = value 534 } 535 536 // Remove the map definition to skip loading it into the kernel. 537 delete(spec.Maps, globalDataMap) 538 539 return offsets, values, nil 540 }