github.com/cilium/cilium@v1.16.2/pkg/bpf/collection.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package bpf
     5  
     6  import (
     7  	"encoding/binary"
     8  	"errors"
     9  	"fmt"
    10  	"strings"
    11  
    12  	"github.com/cilium/ebpf"
    13  	"github.com/cilium/ebpf/asm"
    14  	"github.com/cilium/ebpf/btf"
    15  
    16  	"github.com/cilium/cilium/pkg/maps/callsmap"
    17  )
    18  
    19  const globalDataMap = ".rodata.config"
    20  
    21  // LoadCollectionSpec loads the eBPF ELF at the given path and parses it into
    22  // a CollectionSpec. This spec is only a blueprint of the contents of the ELF
    23  // and does not represent any live resources that have been loaded into the
    24  // kernel.
    25  //
    26  // This is a wrapper around ebpf.LoadCollectionSpec that parses legacy iproute2
    27  // bpf_elf_map definitions (only used for prog_arrays at the time of writing)
    28  // and assigns tail calls annotated with `__section_tail` macros to their
    29  // intended maps and slots.
    30  func LoadCollectionSpec(path string) (*ebpf.CollectionSpec, error) {
    31  	spec, err := ebpf.LoadCollectionSpec(path)
    32  	if err != nil {
    33  		return nil, err
    34  	}
    35  
    36  	if err := removeUnreachableTailcalls(spec); err != nil {
    37  		return nil, err
    38  	}
    39  
    40  	if err := iproute2Compat(spec); err != nil {
    41  		return nil, err
    42  	}
    43  
    44  	if err := classifyProgramTypes(spec); err != nil {
    45  		return nil, err
    46  	}
    47  
    48  	return spec, nil
    49  }
    50  
    51  func removeUnreachableTailcalls(spec *ebpf.CollectionSpec) error {
    52  	type TailCall struct {
    53  		referenced bool
    54  		visited    bool
    55  		spec       *ebpf.ProgramSpec
    56  	}
    57  
    58  	entrypoints := make([]*ebpf.ProgramSpec, 0)
    59  	tailcalls := make(map[uint32]*TailCall)
    60  
    61  	const (
    62  		// Corresponds to CILIUM_MAP_CALLS.
    63  		cilium_calls_map = 2
    64  	)
    65  
    66  	for _, prog := range spec.Programs {
    67  		var id, slot uint32
    68  		// Consider any program that doesn't follow the tailcall naming convention
    69  		// x/y to be an entrypoint.
    70  		// Any program that does follow the x/y naming convention but not part
    71  		// of the cilium_calls map is also considered an entrypoint.
    72  		if _, err := fmt.Sscanf(prog.SectionName, "%d/%v", &id, &slot); err != nil || id != cilium_calls_map {
    73  			entrypoints = append(entrypoints, prog)
    74  			continue
    75  		}
    76  
    77  		if tailcalls[slot] != nil {
    78  			return fmt.Errorf("duplicate tail call index %d", slot)
    79  		}
    80  
    81  		tailcalls[slot] = &TailCall{
    82  			spec: prog,
    83  		}
    84  	}
    85  
    86  	// Discover all tailcalls that are reachable from the given program.
    87  	visit := func(prog *ebpf.ProgramSpec, tailcalls map[uint32]*TailCall) error {
    88  		// We look back from any tailcall, so we expect there to always be 3 instructions ahead of any tail call instr.
    89  		for i := 3; i < len(prog.Instructions); i++ {
    90  			// The `tail_call_static` C function is always used to call tail calls when
    91  			// the map index is known at compile time.
    92  			// Due to inline ASM this generates the following instructions:
    93  			//   Mov R1, Rx
    94  			//   Mov R2, <map>
    95  			//   Mov R3, <index>
    96  			//   call tail_call
    97  
    98  			// Find the tail call instruction.
    99  			inst := prog.Instructions[i]
   100  			if !inst.IsBuiltinCall() || inst.Constant != int64(asm.FnTailCall) {
   101  				continue
   102  			}
   103  
   104  			// Check that the previous instruction is a mov of the tail call index.
   105  			movIdx := prog.Instructions[i-1]
   106  			if movIdx.OpCode.ALUOp() != asm.Mov || movIdx.Dst != asm.R3 {
   107  				continue
   108  			}
   109  
   110  			// Check that the instruction before that is the load of the tail call map.
   111  			movR2 := prog.Instructions[i-2]
   112  			if movR2.OpCode != asm.LoadImmOp(asm.DWord) || movR2.Src != asm.PseudoMapFD {
   113  				continue
   114  			}
   115  
   116  			ref := movR2.Reference()
   117  
   118  			// Ignore static tail calls made to maps that are not the calls map
   119  			if !strings.Contains(ref, callsmap.MapName) || strings.Contains(ref, callsmap.CustomCallsMapName) {
   120  				log.Debugf("program '%s'/'%s', found tail call at %d, reference '%s', not a calls map, skipping",
   121  					prog.SectionName, prog.Name, i, ref)
   122  				continue
   123  			}
   124  
   125  			tc := tailcalls[uint32(movIdx.Constant)]
   126  			if tc == nil {
   127  				return fmt.Errorf(
   128  					"program '%s'/'%s' executes tail call to unknown index '%d' at %d, potential missed tailcall",
   129  					prog.SectionName,
   130  					prog.Name,
   131  					movIdx.Constant,
   132  					i,
   133  				)
   134  			}
   135  
   136  			tc.referenced = true
   137  		}
   138  
   139  		return nil
   140  	}
   141  
   142  	// Discover all tailcalls that are reachable from the entrypoints.
   143  	for _, prog := range entrypoints {
   144  		if err := visit(prog, tailcalls); err != nil {
   145  			return err
   146  		}
   147  	}
   148  
   149  	// Keep visiting tailcalls until no more are discovered.
   150  reset:
   151  	for _, tailcall := range tailcalls {
   152  		// If a tailcall is referenced by an entrypoint or another tailcall we should visit it
   153  		if tailcall.referenced && !tailcall.visited {
   154  			if err := visit(tailcall.spec, tailcalls); err != nil {
   155  				return err
   156  			}
   157  			tailcall.visited = true
   158  
   159  			// Visiting this tail call might have caused tail calls earlier in the list to become referenced, but this
   160  			// loop already skipped them. So reset the loop. If we already visited a tailcall we will ignore them anyway.
   161  			goto reset
   162  		}
   163  	}
   164  
   165  	// Remove all tailcalls that are not referenced.
   166  	for _, tailcall := range tailcalls {
   167  		if !tailcall.referenced {
   168  			log.Debugf("section '%s' / prog '%s', unreferenced, deleting", tailcall.spec.SectionName, tailcall.spec.Name)
   169  			delete(spec.Programs, tailcall.spec.Name)
   170  		}
   171  	}
   172  
   173  	return nil
   174  }
   175  
   176  // iproute2Compat parses the Extra field of each MapSpec in the CollectionSpec.
   177  // This extra portion is present in legacy bpf_elf_map definitions and must be
   178  // handled before the map can be loaded into the kernel.
   179  //
   180  // It parses the ELF section name of each ProgramSpec to extract any map/slot
   181  // mappings for prog arrays used as tail call maps. The spec's programs are then
   182  // inserted into the appropriate map and slot.
   183  //
   184  // TODO(timo): Remove when bpf_elf_map map definitions are no longer used after
   185  // moving away from iproute2+libbpf.
   186  func iproute2Compat(spec *ebpf.CollectionSpec) error {
   187  	// Parse legacy iproute2 u32 id and pinning fields.
   188  	maps := make(map[uint32]*ebpf.MapSpec)
   189  	for _, m := range spec.Maps {
   190  		if m.Extra != nil && m.Extra.Len() > 0 {
   191  			tail := struct {
   192  				ID      uint32
   193  				Pinning uint32
   194  				_       uint64 // inner_id + inner_idx
   195  			}{}
   196  			if err := binary.Read(m.Extra, spec.ByteOrder, &tail); err != nil {
   197  				return fmt.Errorf("reading iproute2 map definition: %w", err)
   198  			}
   199  
   200  			m.Pinning = ebpf.PinType(tail.Pinning)
   201  
   202  			// Index maps by their iproute2 .id if any, so X/Y ELF section names can
   203  			// be matched against them.
   204  			if tail.ID != 0 {
   205  				if m2 := maps[tail.ID]; m2 != nil {
   206  					return fmt.Errorf("maps %s and %s have duplicate iproute2 map ID %d", m.Name, m2.Name, tail.ID)
   207  				}
   208  				maps[tail.ID] = m
   209  			}
   210  		}
   211  	}
   212  
   213  	for n, p := range spec.Programs {
   214  		// Parse the program's section name to determine which prog array and slot it
   215  		// needs to be inserted into. For example, a section name of '2/14' means to
   216  		// insert into the map with the .id field of 2 at index 14.
   217  		// Uses %v to automatically detect slot's mathematical base, since they can
   218  		// appear either in dec or hex, e.g. 1/0x0515.
   219  		var id, slot uint32
   220  		if _, err := fmt.Sscanf(p.SectionName, "%d/%v", &id, &slot); err == nil {
   221  			// Assign the prog name and slot to the map with the iproute2 .id obtained
   222  			// from the program's section name. The lib will load the ProgramSpecs
   223  			// and insert the corresponding Programs into the prog array at load time.
   224  			m := maps[id]
   225  			if m == nil {
   226  				return fmt.Errorf("no map with iproute2 map .id %d", id)
   227  			}
   228  			m.Contents = append(maps[id].Contents, ebpf.MapKV{Key: slot, Value: n})
   229  		}
   230  	}
   231  
   232  	return nil
   233  }
   234  
   235  type CollectionOptions struct {
   236  	ebpf.CollectionOptions
   237  
   238  	// Replacements for constants defined using the DECLARE_CONFIG macros.
   239  	Constants map[string]uint64
   240  }
   241  
   242  // LoadCollection loads the given spec into the kernel with the specified opts.
   243  // Returns a function that must be called after the Collection's entrypoints
   244  // are attached to their respective kernel hooks.
   245  //
   246  // The value given in ProgramOptions.LogSize is used as the starting point for
   247  // sizing the verifier's log buffer and defaults to 4MiB. On each retry, the
   248  // log buffer quadruples in size, for a total of 5 attempts. If that proves
   249  // insufficient, a truncated ebpf.VerifierError is returned.
   250  //
   251  // Any maps marked as pinned in the spec are automatically loaded from the path
   252  // given in opts.Maps.PinPath and will be used instead of creating new ones.
   253  // MapSpecs that differ (type/key/value/max/flags) from their pinned versions
   254  // will result in an ebpf.ErrMapIncompatible here and the map must be removed
   255  // before loading the CollectionSpec.
   256  func LoadCollection(spec *ebpf.CollectionSpec, opts *CollectionOptions) (*ebpf.Collection, func() error, error) {
   257  	if spec == nil {
   258  		return nil, nil, errors.New("can't load nil CollectionSpec")
   259  	}
   260  
   261  	if opts == nil {
   262  		opts = &CollectionOptions{}
   263  	}
   264  
   265  	// Copy spec so the modifications below don't affect the input parameter,
   266  	// allowing the spec to be safely re-used by the caller.
   267  	spec = spec.Copy()
   268  
   269  	if err := inlineGlobalData(spec, opts.Constants); err != nil {
   270  		return nil, nil, fmt.Errorf("inlining global data: %w", err)
   271  	}
   272  
   273  	// Set initial size of verifier log buffer.
   274  	//
   275  	// Up until kernel 5.1, the maximum log size is (2^24)-1. In 5.2, this was
   276  	// increased to (2^30)-1 by 7a9f5c65abcc ("bpf: increase verifier log limit").
   277  	//
   278  	// The default value of (2^22)-1 was chosen to be large enough to fit the log
   279  	// of most Cilium programs, while falling just within the 5.1 maximum size in
   280  	// one of the steps of the multiplication loop below. Without the -1, it would
   281  	// overshoot the cap to 2^24, making e.g. verifier tests unable to load the
   282  	// program if the previous size (2^22) was too small to fit the log.
   283  	if opts.Programs.LogSize == 0 {
   284  		opts.Programs.LogSize = 4_194_303
   285  	}
   286  
   287  	// Find and strip all CILIUM_PIN_REPLACE pinning flags before creating the
   288  	// Collection. ebpf-go will reject maps with pins it doesn't recognize.
   289  	toReplace := consumePinReplace(spec)
   290  
   291  	// Attempt to load the Collection.
   292  	coll, err := ebpf.NewCollectionWithOptions(spec, opts.CollectionOptions)
   293  
   294  	// Collect key names of maps that are not compatible with their pinned
   295  	// counterparts and remove their pinning flags.
   296  	if errors.Is(err, ebpf.ErrMapIncompatible) {
   297  		var incompatible []string
   298  		incompatible, err = incompatibleMaps(spec, opts.CollectionOptions)
   299  		if err != nil {
   300  			return nil, nil, fmt.Errorf("finding incompatible maps: %w", err)
   301  		}
   302  		toReplace = append(toReplace, incompatible...)
   303  
   304  		// Retry loading the Collection with necessary pinning flags removed.
   305  		coll, err = ebpf.NewCollectionWithOptions(spec, opts.CollectionOptions)
   306  	}
   307  
   308  	// Try to obtain the full verifier log if it was truncated. Note that
   309  	// VerifierError is also returned if verification was successful but the
   310  	// buffer was too small.
   311  	attempts := 5
   312  	for range attempts {
   313  		var ve *ebpf.VerifierError
   314  		if errors.As(err, &ve) && ve.Truncated {
   315  			// Retry with non-zero log level to avoid retrying with log disabled.
   316  			if opts.Programs.LogLevel == 0 {
   317  				opts.Programs.LogLevel = ebpf.LogLevelBranch
   318  			}
   319  
   320  			opts.Programs.LogSize *= 4
   321  
   322  			// Retry loading the Collection with increased log buffer.
   323  			coll, err = ebpf.NewCollectionWithOptions(spec, opts.CollectionOptions)
   324  
   325  			// Re-check error and bump attempts.
   326  			continue
   327  		}
   328  
   329  		if err != nil {
   330  			// Not a VerifierError or not truncated.
   331  			return nil, nil, err
   332  		}
   333  	}
   334  	if err != nil {
   335  		// Retry loop failed to resolve a VerifierError.
   336  		return nil, nil, fmt.Errorf("%d-byte truncated verifier log after %d attempts: %w", opts.CollectionOptions.Programs.LogSize, attempts, err)
   337  	}
   338  
   339  	// Load successful, return a function that must be invoked after attaching the
   340  	// Collection's entrypoint programs to their respective hooks.
   341  	commit := func() error {
   342  		// Commit maps that need their bpffs pins replaced.
   343  		if err := commitMapPins(toReplace, spec, coll, opts.CollectionOptions); err != nil {
   344  			return fmt.Errorf("replacing map pins on bpffs: %w", err)
   345  		}
   346  		return nil
   347  	}
   348  	return coll, commit, nil
   349  }
   350  
   351  // classifyProgramTypes sets the type of ProgramSpecs which the library cannot
   352  // automatically classify due to them being in unrecognized ELF sections. Only
   353  // programs of type UnspecifiedProgram are modified.
   354  //
   355  // Cilium uses the iproute2 X/Y section name convention for assigning programs
   356  // to prog array slots, which is also not supported.
   357  //
   358  // TODO(timo): When iproute2 is no longer used for any loading, tail call progs
   359  // can receive proper prefixes.
   360  func classifyProgramTypes(spec *ebpf.CollectionSpec) error {
   361  	var t ebpf.ProgramType
   362  	for name, p := range spec.Programs {
   363  		// If the loader was able to classify a program, go with the verdict.
   364  		if p.Type != ebpf.UnspecifiedProgram {
   365  			t = p.Type
   366  			break
   367  		}
   368  
   369  		// Assign a program type based on the first recognized function name.
   370  		switch name {
   371  		// bpf_xdp.c
   372  		case "cil_xdp_entry":
   373  			t = ebpf.XDP
   374  		case
   375  			// bpf_lxc.c
   376  			"cil_from_container", "cil_to_container",
   377  			// bpf_host.c
   378  			"cil_from_netdev", "cil_from_host", "cil_to_netdev", "cil_to_host",
   379  			// bpf_network.c
   380  			"cil_from_network",
   381  			// bpf_overlay.c
   382  			"cil_to_overlay", "cil_from_overlay",
   383  			// bpf_wireguard.c
   384  			"cil_to_wireguard":
   385  			t = ebpf.SchedCLS
   386  		default:
   387  			continue
   388  		}
   389  
   390  		break
   391  	}
   392  
   393  	for _, p := range spec.Programs {
   394  		if p.Type == ebpf.UnspecifiedProgram {
   395  			p.Type = t
   396  		}
   397  	}
   398  
   399  	if t == ebpf.UnspecifiedProgram {
   400  		return errors.New("unable to classify program types")
   401  	}
   402  
   403  	return nil
   404  }
   405  
   406  // Must match the prefix used by the CONFIG macro in static_data.h.
   407  const constantPrefix = "__config_"
   408  
   409  // inlineGlobalData replaces all map loads from a global data section with
   410  // immediate dword loads, effectively performing those map lookups in the
   411  // loader. This is done for compatibility with kernels that don't support
   412  // global data maps yet.
   413  //
   414  // overrides allow changing the value of the inlined global data.
   415  //
   416  // This code interacts with the DECLARE_CONFIG macro in the BPF C code base.
   417  func inlineGlobalData(spec *ebpf.CollectionSpec, overrides map[string]uint64) error {
   418  	offsets, values, err := globalData(spec)
   419  	if err != nil {
   420  		return err
   421  	}
   422  	if offsets == nil {
   423  		// Most likely all references to global data have been compiled
   424  		// out.
   425  		return nil
   426  	}
   427  
   428  	for name, value := range overrides {
   429  		constName := constantPrefix + name
   430  
   431  		if _, ok := values[constName]; !ok {
   432  			return fmt.Errorf("can't override non-existent constant %q", name)
   433  		}
   434  
   435  		values[constName] = value
   436  	}
   437  
   438  	for _, prog := range spec.Programs {
   439  		for i, ins := range prog.Instructions {
   440  			if !ins.IsLoadFromMap() || ins.Src != asm.PseudoMapValue {
   441  				continue
   442  			}
   443  
   444  			if ins.Reference() != globalDataMap {
   445  				return fmt.Errorf("global constants must be in %s, but found reference to %s", globalDataMap, ins.Reference())
   446  			}
   447  
   448  			// Get the offset of the read within the target map,
   449  			// stored in the 32 most-significant bits of Constant.
   450  			// Equivalent to Instruction.mapOffset().
   451  			off := uint32(uint64(ins.Constant) >> 32)
   452  
   453  			// Look up the value of the variable stored at the Datasec offset pointed
   454  			// at by the instruction.
   455  			v, ok := offsets[off]
   456  			if !ok {
   457  				return fmt.Errorf("no global constant found in %s at offset %d", globalDataMap, off)
   458  			}
   459  
   460  			// Replace the map load with an immediate load. Must be a dword load
   461  			// to match the instruction width of a map load.
   462  			r := asm.LoadImm(ins.Dst, int64(values[v]), asm.DWord)
   463  
   464  			// Preserve metadata of the original instruction. Otherwise, a program's
   465  			// first instruction could be stripped of its func_info or Symbol
   466  			// (function start) annotations.
   467  			r.Metadata = ins.Metadata
   468  
   469  			prog.Instructions[i] = r
   470  		}
   471  	}
   472  
   473  	return nil
   474  }
   475  
   476  // globalData gets the contents of the first entry in the global data map
   477  // and removes it from the spec to prevent it from being created in the kernel.
   478  func globalData(spec *ebpf.CollectionSpec) (offsets map[uint32]string, values map[string]uint64, _ error) {
   479  	dm := spec.Maps[globalDataMap]
   480  	if dm == nil {
   481  		return nil, nil, nil
   482  	}
   483  
   484  	if dl := len(dm.Contents); dl != 1 {
   485  		return nil, nil, fmt.Errorf("expected one key in %s, found %d", globalDataMap, dl)
   486  	}
   487  
   488  	ds, ok := dm.Value.(*btf.Datasec)
   489  	if !ok {
   490  		return nil, nil, fmt.Errorf("no BTF datasec found for %s", globalDataMap)
   491  	}
   492  
   493  	data, ok := (dm.Contents[0].Value).([]byte)
   494  	if !ok {
   495  		return nil, nil, fmt.Errorf("expected %s value to be a byte slice, got: %T",
   496  			globalDataMap, dm.Contents[0].Value)
   497  	}
   498  
   499  	// Slice up the binary contents of the global data map according to the
   500  	// variables described in its Datasec.
   501  	values = make(map[string]uint64)
   502  	offsets = make(map[uint32]string)
   503  	buf := make([]byte, 8)
   504  	for _, vsi := range ds.Vars {
   505  		v, ok := vsi.Type.(*btf.Var)
   506  		if !ok {
   507  			// VarSecInfo.Type can be a Func.
   508  			continue
   509  		}
   510  
   511  		if _, ok := offsets[vsi.Offset]; ok {
   512  			return nil, nil, fmt.Errorf("duplicate VarSecInfo for offset %d", vsi.Offset)
   513  		}
   514  
   515  		copy(buf, data[vsi.Offset:vsi.Offset+vsi.Size])
   516  
   517  		var value uint64
   518  		switch vsi.Size {
   519  		case 8:
   520  			value = spec.ByteOrder.Uint64(buf)
   521  		case 4:
   522  			value = uint64(spec.ByteOrder.Uint32(buf))
   523  		case 2:
   524  			value = uint64(spec.ByteOrder.Uint16(buf))
   525  		case 1:
   526  			value = uint64(buf[0])
   527  		default:
   528  			return nil, nil, fmt.Errorf("invalid variable size %d", vsi.Size)
   529  		}
   530  
   531  		// Emit the variable's value by its offset in the datasec.
   532  		offsets[vsi.Offset] = v.Name
   533  		values[v.Name] = value
   534  	}
   535  
   536  	// Remove the map definition to skip loading it into the kernel.
   537  	delete(spec.Maps, globalDataMap)
   538  
   539  	return offsets, values, nil
   540  }