github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/tchandler/tracer.go (about) 1 // Copyright 2022-2024 The Inspektor Gadget authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package tchandler handles how SchedCLS programs are attached to containers and network 16 // interfaces. The behavior is very similar to the network tracer implemented in 17 // pkg/networktracer/tracer.go. 18 // The main difference is that SchedCLS programs need to be attached to network interfaces and can 19 // be attached on ingress or egress. 20 package tchandler 21 22 import ( 23 "errors" 24 "fmt" 25 "net" 26 "sync" 27 28 "github.com/cilium/ebpf" 29 "github.com/florianl/go-tc" 30 "golang.org/x/sys/unix" 31 32 containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection" 33 containerutils "github.com/inspektor-gadget/inspektor-gadget/pkg/container-utils" 34 "github.com/inspektor-gadget/inspektor-gadget/pkg/netnsenter" 35 ) 36 37 //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target bpfel -cc clang -cflags ${CFLAGS} dispatcher ./bpf/dispatcher.bpf.c -- -I./bpf/ 38 39 const ( 40 // Keep in sync with bpf/dispatcher.bpf.c 41 tailCallMapName = "gadget_tail_call" 42 ) 43 44 type attachment struct { 45 // dispatcher is a small eBPF program we attach to each network interface. This programs 46 // does a tail call to the gadget. The purpose of this program is to avoid loading multiple 47 // instances of the gadget when there are different networking interfaces it must be 48 // attached to. 49 dispatcher dispatcherObjects 50 // filter is the tc ebpf filter we attach to the network interface. This filter will execute 51 // the dispatcher above. 52 filter *tc.Object 53 54 // users keeps track of the users' pid that have called Attach(). This can happen for when 55 // there are several containers in a pod (sharing the netns, and hence the networking 56 // interface). In this case we want to attach the program once. 57 users map[uint32]struct{} 58 } 59 60 func (t *Handler) closeAttachment(a *attachment) { 61 if a.filter != nil { 62 t.tcnl.Filter().Delete(a.filter) 63 } 64 a.dispatcher.Close() 65 } 66 67 type Handler struct { 68 // dispatcher map is a program array map with a single element that is used by the 69 // dispatcher to perform a tail call to the gadget program. 70 dispatcherMap *ebpf.Map 71 // key: network interface name on the host side 72 // value: attachment 73 attachments map[string]*attachment 74 75 // socket to talk to netlink 76 // TODO: Currently we keep once instance of the socket for each Handler instance. Check if 77 // it makes sense to move this to the tracer to have one single instance per gadget. 78 // https://github.com/inspektor-gadget/inspektor-gadget/pull/2376#discussion_r1475472725 79 tcnl *tc.Tc 80 81 direction AttachmentDirection 82 83 // mu protects attachments from concurrent access 84 // AttachContainer and DetachContainer can be called in parallel 85 mu sync.Mutex 86 } 87 88 func NewHandler(direction AttachmentDirection) (*Handler, error) { 89 var err error 90 var tcnl *tc.Tc 91 92 // We need to create the client on the host network namespace, otherwise it's not able to 93 // create the qdisc and filters. 94 err = netnsenter.NetnsEnter(1, func() error { 95 // Setup tc socket for communication with the kernel 96 tcnl, err = tc.Open(&tc.Config{}) 97 if err != nil { 98 return fmt.Errorf("opening rtnetlink socket: %w", err) 99 } 100 return nil 101 }) 102 if err != nil { 103 return nil, err 104 } 105 106 t := &Handler{ 107 attachments: make(map[string]*attachment), 108 tcnl: tcnl, 109 direction: direction, 110 } 111 defer func() { 112 if err != nil { 113 t.Close() 114 } 115 }() 116 117 // Keep in sync with tail_call map in bpf/dispatcher.bpf.c 118 dispatcherMapSpec := ebpf.MapSpec{ 119 Name: tailCallMapName, 120 Type: ebpf.ProgramArray, 121 KeySize: 4, 122 ValueSize: 4, 123 MaxEntries: 1, 124 } 125 t.dispatcherMap, err = ebpf.NewMap(&dispatcherMapSpec) 126 if err != nil { 127 return nil, fmt.Errorf("creating tail call map: %w", err) 128 } 129 return t, nil 130 } 131 132 func (t *Handler) AttachProg(prog *ebpf.Program) error { 133 return t.dispatcherMap.Update(uint32(0), uint32(prog.FD()), ebpf.UpdateAny) 134 } 135 136 func (t *Handler) newAttachment(pid uint32, iface *net.Interface, netns uint64, direction AttachmentDirection) (_ *attachment, err error) { 137 a := &attachment{ 138 users: map[uint32]struct{}{pid: {}}, 139 } 140 141 var qdisc *tc.Object 142 143 defer func() { 144 if err != nil { 145 t.closeAttachment(a) 146 if qdisc != nil { 147 t.tcnl.Qdisc().Delete(qdisc) 148 } 149 } 150 }() 151 152 dispatcherSpec, err := loadDispatcher() 153 if err != nil { 154 return nil, err 155 } 156 157 consts := map[string]interface{}{ 158 "current_netns": uint32(netns), 159 } 160 if err := dispatcherSpec.RewriteConstants(consts); err != nil { 161 return nil, fmt.Errorf("RewriteConstants while attaching to pid %d: %w", pid, err) 162 } 163 164 // We create the clsact qdisc and leak it. We can't remove it because we'll break any other 165 // application (including other ig instances) that are using it. 166 if qdisc, err = createClsActQdisc(t.tcnl, iface); err != nil && !errors.Is(err, unix.EEXIST) { 167 return nil, fmt.Errorf("creating clsact qdisc: %w", err) 168 } 169 170 optsIngress := ebpf.CollectionOptions{ 171 MapReplacements: map[string]*ebpf.Map{ 172 tailCallMapName: t.dispatcherMap, 173 }, 174 } 175 if err = dispatcherSpec.LoadAndAssign(&a.dispatcher, &optsIngress); err != nil { 176 return nil, fmt.Errorf("loading ebpf program: %w", err) 177 } 178 179 a.filter, err = addTCFilter(t.tcnl, a.dispatcher.IgNetDisp, iface, direction) 180 if err != nil { 181 return nil, fmt.Errorf("attaching ebpf program to interface %s: %w", iface.Name, err) 182 } 183 184 return a, nil 185 } 186 187 func (t *Handler) AttachContainer(container *containercollection.Container) error { 188 // It's not clear what to do with hostNetwork containers. For now we just ignore them. 189 if container.HostNetwork { 190 return nil 191 } 192 193 pid := container.Pid 194 195 netns, err := containerutils.GetNetNs(int(pid)) 196 if err != nil { 197 return fmt.Errorf("getting network interfaces on the host side for pid %d: %w", pid, err) 198 } 199 200 // If we're attaching a container, we need to invert ingress and egress because ingress on the 201 // host end of the veth interface is egress on the container side and vice versa. 202 var direction AttachmentDirection 203 switch t.direction { 204 case AttachmentDirectionIngress: 205 direction = AttachmentDirectionEgress 206 case AttachmentDirectionEgress: 207 direction = AttachmentDirectionIngress 208 } 209 210 ifaces, err := containerutils.GetIfacePeers(int(pid)) 211 if err != nil { 212 return fmt.Errorf("getting network namespace of pid %d: %w", pid, err) 213 } 214 215 t.mu.Lock() 216 defer t.mu.Unlock() 217 218 // We need to perform these operations from the host network namespace, otherwise we won't 219 // be able to add the filter to the network interface. 220 err = netnsenter.NetnsEnter(1, func() error { 221 for _, iface := range ifaces { 222 if a, ok := t.attachments[iface.Name]; ok { 223 a.users[pid] = struct{}{} 224 return nil 225 } 226 227 a, err := t.newAttachment(pid, iface, netns, direction) 228 if err != nil { 229 return fmt.Errorf("creating network handler attachment for container %s: %w", 230 container.Runtime.ContainerName, err) 231 } 232 t.attachments[iface.Name] = a 233 } 234 235 return nil 236 }) 237 return err 238 } 239 240 func (t *Handler) DetachContainer(container *containercollection.Container) error { 241 // It's not clear what to do with hostNetwork containers. For now we just ignore them. 242 if container.HostNetwork { 243 return nil 244 } 245 246 pid := container.Pid 247 248 t.mu.Lock() 249 defer t.mu.Unlock() 250 251 for ifacename, a := range t.attachments { 252 if _, ok := a.users[pid]; ok { 253 delete(a.users, pid) 254 if len(a.users) == 0 { 255 t.closeAttachment(a) 256 delete(t.attachments, ifacename) 257 } 258 return nil 259 } 260 } 261 return fmt.Errorf("pid %d is not attached", pid) 262 } 263 264 // AttachIface attaches the tracer to the given interface on the host. See AttachContainer() if you 265 // want to attach to a container. 266 func (t *Handler) AttachIface(iface *net.Interface) error { 267 if _, ok := t.attachments[iface.Name]; ok { 268 return nil 269 } 270 271 hostNs, err := containerutils.GetNetNs(int(1)) 272 if err != nil { 273 return fmt.Errorf("getting network namespace of pid %d: %w", 1, err) 274 } 275 276 a, err := t.newAttachment(1, iface, hostNs, t.direction) 277 if err != nil { 278 return fmt.Errorf("creating network handler attachment for interface %s: %w", iface.Name, err) 279 } 280 t.attachments[iface.Name] = a 281 282 return nil 283 } 284 285 func (t *Handler) DetachIface(iface *net.Interface) error { 286 if a, ok := t.attachments[iface.Name]; ok { 287 t.closeAttachment(a) 288 delete(t.attachments, iface.Name) 289 return nil 290 } 291 return fmt.Errorf("interface %s is not attached", iface.Name) 292 } 293 294 func (t *Handler) Close() { 295 for _, a := range t.attachments { 296 t.closeAttachment(a) 297 } 298 if t.dispatcherMap != nil { 299 t.dispatcherMap.Close() 300 } 301 if t.tcnl != nil { 302 t.tcnl.Close() 303 } 304 }