github.com/cilium/cilium@v1.16.2/pkg/socketlb/cgroup.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  // attachCgroup and detachCgroup have to deal with two different kernel APIs:
     5  //
     6  // bpf_link (available with kernel version >= 5.7): in order for the program<->cgroup
     7  // association to outlive the userspace process, the link (not the program) needs to be pinned.
     8  // Removing the pinned link on bpffs breaks the association.
     9  // Cilium will only use links on fresh installs and if available in the kernel.
    10  // On upgrade, a link can be updated using link.Update(), which will atomically replace the
    11  // currently running bpf program.
    12  //
    13  // PROG_ATTACH (all kernel versions pre 5.7 that cilium supports): by definition the association
    14  // outlives userspace as the cgroup will hold a reference  to the attached program and detaching
    15  // must be done explicitly using PROG_DETACH.
    16  // This API is what cilium has been using prior to the 1.14 release and will continue to use if
    17  // bpf_link is not available.
    18  // On upgrade, cilium will continue to seamlessly replace old programs with the PROG_ATTACH API,
    19  // because updating it with a bpf_link could cause connectivity interruptions.
    20  
    21  package socketlb
    22  
    23  import (
    24  	"errors"
    25  	"fmt"
    26  	"os"
    27  	"path/filepath"
    28  
    29  	"github.com/cilium/ebpf"
    30  	"github.com/cilium/ebpf/link"
    31  	"golang.org/x/sys/unix"
    32  
    33  	"github.com/cilium/cilium/pkg/bpf"
    34  )
    35  
    36  var attachTypes = map[string]ebpf.AttachType{
    37  	Connect4:     ebpf.AttachCGroupInet4Connect,
    38  	SendMsg4:     ebpf.AttachCGroupUDP4Sendmsg,
    39  	RecvMsg4:     ebpf.AttachCGroupUDP4Recvmsg,
    40  	GetPeerName4: ebpf.AttachCgroupInet4GetPeername,
    41  	PostBind4:    ebpf.AttachCGroupInet4PostBind,
    42  	PreBind4:     ebpf.AttachCGroupInet4Bind,
    43  	Connect6:     ebpf.AttachCGroupInet6Connect,
    44  	SendMsg6:     ebpf.AttachCGroupUDP6Sendmsg,
    45  	RecvMsg6:     ebpf.AttachCGroupUDP6Recvmsg,
    46  	GetPeerName6: ebpf.AttachCgroupInet6GetPeername,
    47  	PostBind6:    ebpf.AttachCGroupInet6PostBind,
    48  	PreBind6:     ebpf.AttachCGroupInet6Bind,
    49  }
    50  
    51  // attachCgroup attaches a program from spec with the given name to cgroupRoot.
    52  // If the kernel supports it, the resulting bpf_link is pinned to pinPath.
    53  //
    54  // Upgrades from prior Cilium versions will continue to be handled by a PROG_ATTACH
    55  // to replace an old program attached to a cgroup.
    56  func attachCgroup(spec *ebpf.Collection, name, cgroupRoot, pinPath string) error {
    57  	prog := spec.Programs[name]
    58  	if prog == nil {
    59  		return fmt.Errorf("program %s not found in ELF", name)
    60  	}
    61  
    62  	// Attempt to open and update an existing link.
    63  	pin := filepath.Join(pinPath, name)
    64  	err := bpf.UpdateLink(pin, prog)
    65  	switch {
    66  	// Update successful, nothing left to do.
    67  	case err == nil:
    68  		log.Infof("Updated link %s for program %s", pin, name)
    69  
    70  		return nil
    71  
    72  	// Link exists, but is defunct, and needs to be recreated against a new
    73  	// cgroup. This can happen in environments like dind where we're attaching
    74  	// to a sub-cgroup that goes away if the container is destroyed, but the
    75  	// link persists in the host's /sys/fs/bpf. The program no longer gets
    76  	// triggered at this point and the link needs to be removed to proceed.
    77  	case errors.Is(err, unix.ENOLINK):
    78  		if err := os.Remove(pin); err != nil {
    79  			return fmt.Errorf("unpinning defunct link %s: %w", pin, err)
    80  		}
    81  
    82  		log.Infof("Unpinned defunct link %s for program %s", pin, name)
    83  
    84  	// No existing link found, continue trying to create one.
    85  	case errors.Is(err, os.ErrNotExist):
    86  		log.Infof("No existing link found at %s for program %s", pin, name)
    87  
    88  	default:
    89  		return fmt.Errorf("updating link %s for program %s: %w", pin, name, err)
    90  	}
    91  
    92  	cg, err := os.Open(cgroupRoot)
    93  	if err != nil {
    94  		return fmt.Errorf("open cgroup %s: %w", cgroupRoot, err)
    95  	}
    96  	defer cg.Close()
    97  
    98  	// Create a new link. This will only succeed on nodes that support bpf_link
    99  	// and don't have any attached PROG_ATTACH programs.
   100  	l, err := link.AttachRawLink(link.RawLinkOptions{
   101  		Target:  int(cg.Fd()),
   102  		Program: prog,
   103  		Attach:  attachTypes[name],
   104  	})
   105  	if err == nil {
   106  		defer func() {
   107  			// The program was successfully attached using bpf_link. Closing a link
   108  			// does not detach the program if the link is pinned.
   109  			if err := l.Close(); err != nil {
   110  				log.Warnf("Failed to close bpf_link for program %s", name)
   111  			}
   112  		}()
   113  
   114  		if err := l.Pin(pin); err != nil {
   115  			return fmt.Errorf("pin link at %s for program %s : %w", pin, name, err)
   116  		}
   117  
   118  		// Successfully created and pinned bpf_link.
   119  		log.Debugf("Program %s attached using bpf_link", name)
   120  
   121  		return nil
   122  	}
   123  
   124  	// Kernels before 5.7 don't support bpf_link. In that case link.AttachRawLink
   125  	// returns ErrNotSupported.
   126  	//
   127  	// If the kernel supports bpf_link, but an older version of Cilium attached a
   128  	// cgroup program without flags (old init.sh behaviour), link.AttachRawLink
   129  	// will return EPERM because bpf_link implicitly uses the multi flag.
   130  	if !errors.Is(err, unix.EPERM) && !errors.Is(err, link.ErrNotSupported) {
   131  		// Unrecoverable error from AttachRawLink.
   132  		return fmt.Errorf("attach program %s using bpf_link: %w", name, err)
   133  	}
   134  
   135  	log.Debugf("Performing PROG_ATTACH for program %s", name)
   136  
   137  	// Call PROG_ATTACH without flags to attach the program if bpf_link is not
   138  	// available or a previous PROG_ATTACH without flags has to be seamlessly
   139  	// replaced.
   140  	if err := link.RawAttachProgram(link.RawAttachProgramOptions{
   141  		Target:  int(cg.Fd()),
   142  		Program: prog,
   143  		Attach:  attachTypes[name],
   144  	}); err != nil {
   145  		return fmt.Errorf("PROG_ATTACH for program %s: %w", name, err)
   146  	}
   147  
   148  	// Nothing left to do, the cgroup now holds a reference to the prog
   149  	// so we don't need to hold a reference in the agent/bpffs to ensure
   150  	// the program stays active.
   151  	log.Debugf("Program %s was attached using PROG_ATTACH", name)
   152  
   153  	return nil
   154  
   155  }
   156  
   157  // detachCgroup detaches a program with the given name from cgroupRoot. Attempts
   158  // to open a pinned link with the given name from directory pinPath first,
   159  // falling back to PROG_DETACH if no pin is present.
   160  func detachCgroup(name, cgroupRoot, pinPath string) error {
   161  	pin := filepath.Join(pinPath, name)
   162  	err := bpf.UnpinLink(pin)
   163  	if err == nil {
   164  		return nil
   165  	}
   166  
   167  	if !errors.Is(err, os.ErrNotExist) {
   168  		// The pinned link exists, something went wrong unpinning it.
   169  		return fmt.Errorf("unpinning cgroup program using bpf_link: %w", err)
   170  	}
   171  
   172  	// No bpf_link pin found, detach all prog_attach progs.
   173  	log.Debugf("No pinned link '%s', querying cgroup", pin)
   174  	err = detachAll(attachTypes[name], cgroupRoot)
   175  	// Treat detaching unsupported attach types as successful.
   176  	if errors.Is(err, link.ErrNotSupported) {
   177  		return nil
   178  	}
   179  	return err
   180  }
   181  
   182  // detachAll detaches all programs attached to cgroupRoot with the corresponding attach type.
   183  func detachAll(attach ebpf.AttachType, cgroupRoot string) error {
   184  	cg, err := os.Open(cgroupRoot)
   185  	if err != nil {
   186  		return fmt.Errorf("open cgroup %s: %w", cg.Name(), err)
   187  	}
   188  	defer cg.Close()
   189  
   190  	// Query the program ids of all programs currently attached to the given cgroup
   191  	// with the given attach type. In ciliums case this should always return only one id.
   192  	ids, err := link.QueryPrograms(link.QueryOptions{
   193  		Target: int(cg.Fd()),
   194  		Attach: attach,
   195  	})
   196  	// We know the cgroup root exists, so EINVAL will likely mean querying
   197  	// the given attach type is not supported.
   198  	if errors.Is(err, unix.EINVAL) {
   199  		err = fmt.Errorf("%w: %w", err, link.ErrNotSupported)
   200  	}
   201  	// Even though the cgroup exists, QueryPrograms will return EBADF
   202  	// on a cgroupv1.
   203  	if errors.Is(err, unix.EBADF) {
   204  		log.Debug("The cgroup exists but is a cgroupv1. No detachment necessary")
   205  		return nil
   206  	}
   207  	if err != nil {
   208  		return fmt.Errorf("query cgroup %s for type %s: %w", cgroupRoot, attach, err)
   209  	}
   210  	if ids == nil || len(ids.Programs) == 0 {
   211  		log.Debugf("No programs in cgroup %s with attach type %s", cgroupRoot, attach)
   212  		return nil
   213  	}
   214  
   215  	// cilium owns the cgroup and assumes only one program is attached.
   216  	// This allows to remove all ids returned in the query phase.
   217  	for _, id := range ids.Programs {
   218  		prog, err := ebpf.NewProgramFromID(id.ID)
   219  		if err != nil {
   220  			return fmt.Errorf("could not open program id %d: %w", id, err)
   221  		}
   222  		defer prog.Close()
   223  
   224  		if err := link.RawDetachProgram(link.RawDetachProgramOptions{
   225  			Target:  int(cg.Fd()),
   226  			Program: prog,
   227  			Attach:  attach,
   228  		}); err != nil {
   229  			return fmt.Errorf("detach programs from cgroup %s attach type %s: %w", cgroupRoot, attach, err)
   230  		}
   231  
   232  		log.Debugf("Detached program id %d", id)
   233  	}
   234  
   235  	return nil
   236  }