github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/networktracer/tracer.go (about) 1 // Copyright 2022-2023 The Inspektor Gadget authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package networktracer installs the dispatcher ebpf program in each network 16 // namespace of interest. The dispatcher program runs a tail call to the actual 17 // gadget program. 18 // 19 // This is done both for builtin gadgets and containerized gadgets. In the case 20 // of containerized gadgets, the dispatcher program is installed before 21 // knowning the actual gadget program. Once it knows the actual gadget program, 22 // the tail call map is updated. 23 // 24 // In the case of builtin gadgets, the Run() method can be called to fetch and 25 // process events from ebpf. The containerized gadgets won't call Run() because 26 // run/tracer.go fetches and processes the events themselves. Instead, it will 27 // just call AttachProg(). 28 // 29 // The actual gadget program is instantiated only once for performance reason. 30 // The network namespace is passed to the actual gadget program via the 31 // skb->cb[0] variable. 32 // 33 // https://github.com/inspektor-gadget/inspektor-gadget/blob/main/docs/devel/network-gadget-dispatcher.png 34 package networktracer 35 36 import ( 37 "errors" 38 "fmt" 39 "os" 40 "strings" 41 "sync" 42 "syscall" 43 "unsafe" 44 45 "github.com/cilium/ebpf" 46 "github.com/cilium/ebpf/perf" 47 "golang.org/x/sys/unix" 48 49 containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection" 50 containerutils "github.com/inspektor-gadget/inspektor-gadget/pkg/container-utils" 51 "github.com/inspektor-gadget/inspektor-gadget/pkg/gadgets" 52 "github.com/inspektor-gadget/inspektor-gadget/pkg/rawsock" 53 "github.com/inspektor-gadget/inspektor-gadget/pkg/socketenricher" 54 "github.com/inspektor-gadget/inspektor-gadget/pkg/types" 55 ) 56 57 //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target bpfel -cc clang -cflags ${CFLAGS} dispatcher ./bpf/dispatcher.bpf.c -- -I./bpf/ -I../socketenricher/bpf 58 59 type attachment struct { 60 dispatcherObjs dispatcherObjects 61 62 sockFd int 63 64 // users keeps track of the users' pid that have called Attach(). This can 65 // happen for two reasons: 66 // 1. several containers in a pod (sharing the netns) 67 // 2. pods with networkHost=true 68 // In both cases, we want to attach the BPF program only once. 69 users map[uint32]struct{} 70 } 71 72 type Tracer[Event any] struct { 73 socketEnricherMap *ebpf.Map 74 dispatcherMap *ebpf.Map 75 collection *ebpf.Collection 76 prog *ebpf.Program 77 perfRd *perf.Reader 78 79 // key: network namespace inode number 80 // value: Tracelet 81 attachments map[uint64]*attachment 82 83 eventHandler func(ev *Event) 84 85 // mu protects attachments from concurrent access 86 // AttachContainer and DetachContainer can be called in parallel 87 mu sync.Mutex 88 } 89 90 func (t *Tracer[Event]) newAttachment( 91 pid uint32, 92 netns uint64, 93 ) (_ *attachment, err error) { 94 a := &attachment{ 95 sockFd: -1, 96 users: map[uint32]struct{}{pid: {}}, 97 } 98 defer func() { 99 if err != nil { 100 if a.sockFd != -1 { 101 unix.Close(a.sockFd) 102 } 103 a.dispatcherObjs.Close() 104 } 105 }() 106 107 dispatcherSpec, err := loadDispatcher() 108 if err != nil { 109 return nil, err 110 } 111 112 u32netns := uint32(netns) 113 consts := map[string]interface{}{ 114 "current_netns": u32netns, 115 } 116 if err := dispatcherSpec.RewriteConstants(consts); err != nil { 117 return nil, fmt.Errorf("RewriteConstants while attaching to pid %d: %w", pid, err) 118 } 119 opts := ebpf.CollectionOptions{ 120 MapReplacements: map[string]*ebpf.Map{ 121 "tail_call": t.dispatcherMap, 122 }, 123 } 124 if err = dispatcherSpec.LoadAndAssign(&a.dispatcherObjs, &opts); err != nil { 125 return nil, fmt.Errorf("loading ebpf program: %w", err) 126 } 127 128 a.sockFd, err = rawsock.OpenRawSock(pid) 129 if err != nil { 130 return nil, fmt.Errorf("opening raw socket: %w", err) 131 } 132 133 if err := syscall.SetsockoptInt(a.sockFd, syscall.SOL_SOCKET, unix.SO_ATTACH_BPF, a.dispatcherObjs.IgNetDisp.FD()); err != nil { 134 return nil, fmt.Errorf("attaching BPF program: %w", err) 135 } 136 return a, nil 137 } 138 139 func NewTracer[Event any]() (_ *Tracer[Event], err error) { 140 t := &Tracer[Event]{ 141 attachments: make(map[uint64]*attachment), 142 } 143 144 // Keep in sync with tail_call map in bpf/dispatcher.bpf.c 145 dispatcherMapSpec := ebpf.MapSpec{ 146 Name: "tail_call", 147 Type: ebpf.ProgramArray, 148 KeySize: 4, 149 ValueSize: 4, 150 MaxEntries: 1, 151 } 152 t.dispatcherMap, err = ebpf.NewMap(&dispatcherMapSpec) 153 if err != nil { 154 return nil, fmt.Errorf("creating tail_call map: %w", err) 155 } 156 return t, nil 157 } 158 159 func (t *Tracer[Event]) SetSocketEnricherMap(m *ebpf.Map) { 160 t.socketEnricherMap = m 161 } 162 163 func (t *Tracer[Event]) Run( 164 spec *ebpf.CollectionSpec, 165 baseEvent func(ev types.Event) *Event, 166 processEvent func(rawSample []byte, netns uint64) (*Event, error), 167 ) (err error) { 168 gadgets.FixBpfKtimeGetBootNs(spec.Programs) 169 170 defer func() { 171 if err != nil { 172 if t.perfRd != nil { 173 t.perfRd.Close() 174 } 175 if t.collection != nil { 176 t.collection.Close() 177 } 178 } 179 }() 180 181 var opts ebpf.CollectionOptions 182 183 // Automatically find the socket program 184 bpfProgName := "" 185 for progName, p := range spec.Programs { 186 if p.Type == ebpf.SocketFilter && strings.HasPrefix(p.SectionName, "socket") { 187 if bpfProgName != "" { 188 return fmt.Errorf("multiple socket programs found: %s, %s", bpfProgName, progName) 189 } 190 bpfProgName = progName 191 } 192 } 193 if bpfProgName == "" { 194 return fmt.Errorf("no socket program found") 195 } 196 197 // Automatically find the perf map 198 bpfPerfMapName := "" 199 for mapName, m := range spec.Maps { 200 if m.Type == ebpf.PerfEventArray { 201 if bpfPerfMapName != "" { 202 return fmt.Errorf("multiple perf maps found: %s, %s", bpfPerfMapName, mapName) 203 } 204 bpfPerfMapName = mapName 205 } 206 } 207 if bpfPerfMapName == "" { 208 return fmt.Errorf("no perf map found") 209 } 210 211 usesSocketEnricher := false 212 for _, m := range spec.Maps { 213 if m.Name == socketenricher.SocketsMapName { 214 usesSocketEnricher = true 215 break 216 } 217 } 218 219 if usesSocketEnricher && t.socketEnricherMap != nil { 220 mapReplacements := map[string]*ebpf.Map{} 221 mapReplacements[socketenricher.SocketsMapName] = t.socketEnricherMap 222 opts.MapReplacements = mapReplacements 223 } 224 225 t.collection, err = ebpf.NewCollectionWithOptions(spec, opts) 226 if err != nil { 227 return fmt.Errorf("creating BPF collection: %w", err) 228 } 229 230 t.perfRd, err = perf.NewReader(t.collection.Maps[bpfPerfMapName], gadgets.PerfBufferPages*os.Getpagesize()) 231 if err != nil { 232 return fmt.Errorf("getting a perf reader: %w", err) 233 } 234 235 var ok bool 236 t.prog, ok = t.collection.Programs[bpfProgName] 237 if !ok { 238 return fmt.Errorf("BPF program %q not found", bpfProgName) 239 } 240 241 err = t.AttachProg(t.prog) 242 if err != nil { 243 return fmt.Errorf("updating tail call map: %w", err) 244 } 245 246 go t.listen(baseEvent, processEvent) 247 248 return nil 249 } 250 251 // AttachProg is used directly by containerized gadgets 252 func (t *Tracer[Event]) AttachProg(prog *ebpf.Program) error { 253 return t.dispatcherMap.Update(uint32(0), uint32(prog.FD()), ebpf.UpdateAny) 254 } 255 256 func (t *Tracer[Event]) Attach(pid uint32) error { 257 t.mu.Lock() 258 defer t.mu.Unlock() 259 260 netns, err := containerutils.GetNetNs(int(pid)) 261 if err != nil { 262 return fmt.Errorf("getting network namespace of pid %d: %w", pid, err) 263 } 264 if a, ok := t.attachments[netns]; ok { 265 a.users[pid] = struct{}{} 266 return nil 267 } 268 269 a, err := t.newAttachment(pid, netns) 270 if err != nil { 271 return fmt.Errorf("creating network tracer attachment for pid %d: %w", pid, err) 272 } 273 t.attachments[netns] = a 274 275 return nil 276 } 277 278 func (t *Tracer[Event]) SetEventHandler(handler any) { 279 if t.eventHandler != nil { 280 panic("handler already set") 281 } 282 283 nh, ok := handler.(func(ev *Event)) 284 if !ok { 285 panic("event handler invalid") 286 } 287 t.eventHandler = nh 288 } 289 290 // EventCallback provides support for legacy pkg/gadget-collection 291 func (t *Tracer[Event]) EventCallback(event any) { 292 e, ok := event.(*Event) 293 if !ok { 294 panic("event handler argument invalid") 295 } 296 if t.eventHandler == nil { 297 return 298 } 299 t.eventHandler(e) 300 } 301 302 func (t *Tracer[Event]) AttachContainer(container *containercollection.Container) error { 303 return t.Attach(container.Pid) 304 } 305 306 func (t *Tracer[Event]) DetachContainer(container *containercollection.Container) error { 307 return t.Detach(container.Pid) 308 } 309 310 func (t *Tracer[Event]) GetMap(name string) *ebpf.Map { 311 return t.collection.Maps[name] 312 } 313 314 func (t *Tracer[Event]) listen( 315 baseEvent func(ev types.Event) *Event, 316 processEvent func(rawSample []byte, netns uint64) (*Event, error), 317 ) { 318 for { 319 record, err := t.perfRd.Read() 320 if err != nil { 321 if errors.Is(err, perf.ErrClosed) { 322 return 323 } 324 325 msg := fmt.Sprintf("Error reading perf ring buffer: %s", err) 326 t.eventHandler(baseEvent(types.Err(msg))) 327 return 328 } 329 330 if record.LostSamples != 0 { 331 msg := fmt.Sprintf("lost %d samples", record.LostSamples) 332 t.eventHandler(baseEvent(types.Warn(msg))) 333 continue 334 } 335 336 if len(record.RawSample) < 4 { 337 t.eventHandler(baseEvent(types.Err("record too small"))) 338 continue 339 } 340 341 // all networking gadgets have netns as first field 342 netns := *(*uint32)(unsafe.Pointer(&record.RawSample[0])) 343 event, err := processEvent(record.RawSample, uint64(netns)) 344 if err != nil { 345 t.eventHandler(baseEvent(types.Err(err.Error()))) 346 continue 347 } 348 if event == nil { 349 continue 350 } 351 t.eventHandler(event) 352 } 353 } 354 355 func (t *Tracer[Event]) releaseAttachment(netns uint64, a *attachment) { 356 unix.Close(a.sockFd) 357 a.dispatcherObjs.Close() 358 delete(t.attachments, netns) 359 } 360 361 func (t *Tracer[Event]) Detach(pid uint32) error { 362 t.mu.Lock() 363 defer t.mu.Unlock() 364 365 for netns, a := range t.attachments { 366 if _, ok := a.users[pid]; ok { 367 delete(a.users, pid) 368 if len(a.users) == 0 { 369 t.releaseAttachment(netns, a) 370 } 371 return nil 372 } 373 } 374 return fmt.Errorf("pid %d is not attached", pid) 375 } 376 377 func (t *Tracer[Event]) Close() { 378 t.mu.Lock() 379 defer t.mu.Unlock() 380 381 if t.perfRd != nil { 382 t.perfRd.Close() 383 } 384 if t.collection != nil { 385 t.collection.Close() 386 } 387 for key, l := range t.attachments { 388 t.releaseAttachment(key, l) 389 } 390 if t.dispatcherMap != nil { 391 t.dispatcherMap.Close() 392 } 393 }