github.com/cilium/cilium@v1.16.2/pkg/socketlb/cgroup.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 // attachCgroup and detachCgroup have to deal with two different kernel APIs: 5 // 6 // bpf_link (available with kernel version >= 5.7): in order for the program<->cgroup 7 // association to outlive the userspace process, the link (not the program) needs to be pinned. 8 // Removing the pinned link on bpffs breaks the association. 9 // Cilium will only use links on fresh installs and if available in the kernel. 10 // On upgrade, a link can be updated using link.Update(), which will atomically replace the 11 // currently running bpf program. 12 // 13 // PROG_ATTACH (all kernel versions pre 5.7 that cilium supports): by definition the association 14 // outlives userspace as the cgroup will hold a reference to the attached program and detaching 15 // must be done explicitly using PROG_DETACH. 16 // This API is what cilium has been using prior to the 1.14 release and will continue to use if 17 // bpf_link is not available. 18 // On upgrade, cilium will continue to seamlessly replace old programs with the PROG_ATTACH API, 19 // because updating it with a bpf_link could cause connectivity interruptions. 20 21 package socketlb 22 23 import ( 24 "errors" 25 "fmt" 26 "os" 27 "path/filepath" 28 29 "github.com/cilium/ebpf" 30 "github.com/cilium/ebpf/link" 31 "golang.org/x/sys/unix" 32 33 "github.com/cilium/cilium/pkg/bpf" 34 ) 35 36 var attachTypes = map[string]ebpf.AttachType{ 37 Connect4: ebpf.AttachCGroupInet4Connect, 38 SendMsg4: ebpf.AttachCGroupUDP4Sendmsg, 39 RecvMsg4: ebpf.AttachCGroupUDP4Recvmsg, 40 GetPeerName4: ebpf.AttachCgroupInet4GetPeername, 41 PostBind4: ebpf.AttachCGroupInet4PostBind, 42 PreBind4: ebpf.AttachCGroupInet4Bind, 43 Connect6: ebpf.AttachCGroupInet6Connect, 44 SendMsg6: ebpf.AttachCGroupUDP6Sendmsg, 45 RecvMsg6: ebpf.AttachCGroupUDP6Recvmsg, 46 GetPeerName6: ebpf.AttachCgroupInet6GetPeername, 47 PostBind6: ebpf.AttachCGroupInet6PostBind, 48 PreBind6: ebpf.AttachCGroupInet6Bind, 49 } 50 51 // attachCgroup attaches a program from spec with the given name to cgroupRoot. 52 // If the kernel supports it, the resulting bpf_link is pinned to pinPath. 53 // 54 // Upgrades from prior Cilium versions will continue to be handled by a PROG_ATTACH 55 // to replace an old program attached to a cgroup. 56 func attachCgroup(spec *ebpf.Collection, name, cgroupRoot, pinPath string) error { 57 prog := spec.Programs[name] 58 if prog == nil { 59 return fmt.Errorf("program %s not found in ELF", name) 60 } 61 62 // Attempt to open and update an existing link. 63 pin := filepath.Join(pinPath, name) 64 err := bpf.UpdateLink(pin, prog) 65 switch { 66 // Update successful, nothing left to do. 67 case err == nil: 68 log.Infof("Updated link %s for program %s", pin, name) 69 70 return nil 71 72 // Link exists, but is defunct, and needs to be recreated against a new 73 // cgroup. This can happen in environments like dind where we're attaching 74 // to a sub-cgroup that goes away if the container is destroyed, but the 75 // link persists in the host's /sys/fs/bpf. The program no longer gets 76 // triggered at this point and the link needs to be removed to proceed. 77 case errors.Is(err, unix.ENOLINK): 78 if err := os.Remove(pin); err != nil { 79 return fmt.Errorf("unpinning defunct link %s: %w", pin, err) 80 } 81 82 log.Infof("Unpinned defunct link %s for program %s", pin, name) 83 84 // No existing link found, continue trying to create one. 85 case errors.Is(err, os.ErrNotExist): 86 log.Infof("No existing link found at %s for program %s", pin, name) 87 88 default: 89 return fmt.Errorf("updating link %s for program %s: %w", pin, name, err) 90 } 91 92 cg, err := os.Open(cgroupRoot) 93 if err != nil { 94 return fmt.Errorf("open cgroup %s: %w", cgroupRoot, err) 95 } 96 defer cg.Close() 97 98 // Create a new link. This will only succeed on nodes that support bpf_link 99 // and don't have any attached PROG_ATTACH programs. 100 l, err := link.AttachRawLink(link.RawLinkOptions{ 101 Target: int(cg.Fd()), 102 Program: prog, 103 Attach: attachTypes[name], 104 }) 105 if err == nil { 106 defer func() { 107 // The program was successfully attached using bpf_link. Closing a link 108 // does not detach the program if the link is pinned. 109 if err := l.Close(); err != nil { 110 log.Warnf("Failed to close bpf_link for program %s", name) 111 } 112 }() 113 114 if err := l.Pin(pin); err != nil { 115 return fmt.Errorf("pin link at %s for program %s : %w", pin, name, err) 116 } 117 118 // Successfully created and pinned bpf_link. 119 log.Debugf("Program %s attached using bpf_link", name) 120 121 return nil 122 } 123 124 // Kernels before 5.7 don't support bpf_link. In that case link.AttachRawLink 125 // returns ErrNotSupported. 126 // 127 // If the kernel supports bpf_link, but an older version of Cilium attached a 128 // cgroup program without flags (old init.sh behaviour), link.AttachRawLink 129 // will return EPERM because bpf_link implicitly uses the multi flag. 130 if !errors.Is(err, unix.EPERM) && !errors.Is(err, link.ErrNotSupported) { 131 // Unrecoverable error from AttachRawLink. 132 return fmt.Errorf("attach program %s using bpf_link: %w", name, err) 133 } 134 135 log.Debugf("Performing PROG_ATTACH for program %s", name) 136 137 // Call PROG_ATTACH without flags to attach the program if bpf_link is not 138 // available or a previous PROG_ATTACH without flags has to be seamlessly 139 // replaced. 140 if err := link.RawAttachProgram(link.RawAttachProgramOptions{ 141 Target: int(cg.Fd()), 142 Program: prog, 143 Attach: attachTypes[name], 144 }); err != nil { 145 return fmt.Errorf("PROG_ATTACH for program %s: %w", name, err) 146 } 147 148 // Nothing left to do, the cgroup now holds a reference to the prog 149 // so we don't need to hold a reference in the agent/bpffs to ensure 150 // the program stays active. 151 log.Debugf("Program %s was attached using PROG_ATTACH", name) 152 153 return nil 154 155 } 156 157 // detachCgroup detaches a program with the given name from cgroupRoot. Attempts 158 // to open a pinned link with the given name from directory pinPath first, 159 // falling back to PROG_DETACH if no pin is present. 160 func detachCgroup(name, cgroupRoot, pinPath string) error { 161 pin := filepath.Join(pinPath, name) 162 err := bpf.UnpinLink(pin) 163 if err == nil { 164 return nil 165 } 166 167 if !errors.Is(err, os.ErrNotExist) { 168 // The pinned link exists, something went wrong unpinning it. 169 return fmt.Errorf("unpinning cgroup program using bpf_link: %w", err) 170 } 171 172 // No bpf_link pin found, detach all prog_attach progs. 173 log.Debugf("No pinned link '%s', querying cgroup", pin) 174 err = detachAll(attachTypes[name], cgroupRoot) 175 // Treat detaching unsupported attach types as successful. 176 if errors.Is(err, link.ErrNotSupported) { 177 return nil 178 } 179 return err 180 } 181 182 // detachAll detaches all programs attached to cgroupRoot with the corresponding attach type. 183 func detachAll(attach ebpf.AttachType, cgroupRoot string) error { 184 cg, err := os.Open(cgroupRoot) 185 if err != nil { 186 return fmt.Errorf("open cgroup %s: %w", cg.Name(), err) 187 } 188 defer cg.Close() 189 190 // Query the program ids of all programs currently attached to the given cgroup 191 // with the given attach type. In ciliums case this should always return only one id. 192 ids, err := link.QueryPrograms(link.QueryOptions{ 193 Target: int(cg.Fd()), 194 Attach: attach, 195 }) 196 // We know the cgroup root exists, so EINVAL will likely mean querying 197 // the given attach type is not supported. 198 if errors.Is(err, unix.EINVAL) { 199 err = fmt.Errorf("%w: %w", err, link.ErrNotSupported) 200 } 201 // Even though the cgroup exists, QueryPrograms will return EBADF 202 // on a cgroupv1. 203 if errors.Is(err, unix.EBADF) { 204 log.Debug("The cgroup exists but is a cgroupv1. No detachment necessary") 205 return nil 206 } 207 if err != nil { 208 return fmt.Errorf("query cgroup %s for type %s: %w", cgroupRoot, attach, err) 209 } 210 if ids == nil || len(ids.Programs) == 0 { 211 log.Debugf("No programs in cgroup %s with attach type %s", cgroupRoot, attach) 212 return nil 213 } 214 215 // cilium owns the cgroup and assumes only one program is attached. 216 // This allows to remove all ids returned in the query phase. 217 for _, id := range ids.Programs { 218 prog, err := ebpf.NewProgramFromID(id.ID) 219 if err != nil { 220 return fmt.Errorf("could not open program id %d: %w", id, err) 221 } 222 defer prog.Close() 223 224 if err := link.RawDetachProgram(link.RawDetachProgramOptions{ 225 Target: int(cg.Fd()), 226 Program: prog, 227 Attach: attach, 228 }); err != nil { 229 return fmt.Errorf("detach programs from cgroup %s attach type %s: %w", cgroupRoot, attach, err) 230 } 231 232 log.Debugf("Detached program id %d", id) 233 } 234 235 return nil 236 }