github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/uprobetracer/tracer.go (about) 1 // Copyright 2024 The Inspektor Gadget authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package uprobetracer handles how uprobe/uretprobe/USDT programs are attached 16 // to containers. It has two running modes: `pending` mode and `running` mode. 17 // 18 // Before `AttachProg` is called, uprobetracer runs in `pending` mode, only 19 // maintaining the container PIDs ready to attach to. 20 // 21 // When `AttachProg` is called, uprobetracer enters the `running` mode and 22 // attaches to all pending containers. After that, it will never get back to 23 // the `pending` mode. 24 // 25 // In `running` mode, uprobetracer holds fd(s) of the executables, so we can 26 // use `/proc/self/fd/$fd` for attaching, it is used to avoid fd-reusing. 27 // 28 // Uprobetracer doesn't maintain ebpf.collection or perf-ring buffer by itself, 29 // those are hold by the parent tracer. 30 // 31 // All interfaces should hold locks, while inner functions do not. 32 package uprobetracer 33 34 import ( 35 "errors" 36 "fmt" 37 "os" 38 "path" 39 "path/filepath" 40 "strings" 41 "sync" 42 43 "github.com/cilium/ebpf" 44 "github.com/cilium/ebpf/link" 45 securejoin "github.com/cyphar/filepath-securejoin" 46 47 containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection" 48 "github.com/inspektor-gadget/inspektor-gadget/pkg/kfilefields" 49 "github.com/inspektor-gadget/inspektor-gadget/pkg/logger" 50 "github.com/inspektor-gadget/inspektor-gadget/pkg/utils/host" 51 ) 52 53 type ProgType uint32 54 55 const ( 56 ProgUprobe ProgType = iota 57 ProgUretprobe 58 ProgUSDT 59 ) 60 61 // inodeKeeper holds a file object, with the counter representing its 62 // reference count. The link is not nil only when the file is attached. 63 type inodeKeeper struct { 64 counter int 65 file *os.File 66 link link.Link 67 } 68 69 func (t *inodeKeeper) close() { 70 if t.link != nil { 71 t.link.Close() 72 } 73 t.file.Close() 74 } 75 76 type Tracer[Event any] struct { 77 progName string 78 progType ProgType 79 attachFilePath string 80 attachSymbol string 81 prog *ebpf.Program 82 83 // keeps the inodes for each attached container 84 // when users write library names in ebpf section names, it's possible to 85 // find multiple libraries of different archs within the same container, 86 // making this a one-to-many mapping 87 containerPid2Inodes map[uint32][]uint64 88 // keeps the fd and refCount for each realInodePtr 89 // 90 // we are using `realInodePtr` (the address of real inode in kernel) to identify a file 91 // instead of just the inode number. 92 // Since overlayFS overwrites the FsID of files and provides its own inode implementation, 93 // we cannot uniquely identify a file on disk using `<FsID, inode>` pairs. 94 // 95 // Meanwhile, uprobe is using kernel function `d_real_inode` to get the underlying inode, 96 // and attaching onto it. That means if we are attaching to one container, other containers 97 // sharing the same image will also be attached. If we are attaching to multiple containers, 98 // the underlying inode might be attached multiple times, leading to duplicate records. 99 // 100 // To deduplicate, we need to identify the underlying inode hidden by overlayFS, 101 // and use it as a unique identifier. For each realInodePtr, we only attach to it once. 102 inodeRefCount map[uint64]*inodeKeeper 103 // used as a set, keeps PIDs of the pending containers 104 pendingContainerPids map[uint32]bool 105 106 logger logger.Logger 107 108 closed bool 109 mu sync.Mutex 110 } 111 112 func NewTracer[Event any](logger logger.Logger) (*Tracer[Event], error) { 113 t := &Tracer[Event]{ 114 containerPid2Inodes: make(map[uint32][]uint64), 115 inodeRefCount: make(map[uint64]*inodeKeeper), 116 pendingContainerPids: make(map[uint32]bool), 117 logger: logger, 118 closed: false, 119 } 120 return t, nil 121 } 122 123 // AttachProg loads the ebpf program, and try attaching if there are pending containers 124 func (t *Tracer[Event]) AttachProg(progName string, progType ProgType, attachTo string, prog *ebpf.Program) error { 125 if progType != ProgUprobe && progType != ProgUretprobe && progType != ProgUSDT { 126 return fmt.Errorf("unsupported uprobe prog type: %q", progType) 127 } 128 129 if prog == nil { 130 return errors.New("prog does not exist") 131 } 132 if t.prog != nil { 133 return errors.New("loading uprobe program twice") 134 } 135 136 parts := strings.SplitN(attachTo, ":", 2) 137 if len(parts) < 2 { 138 return fmt.Errorf("invalid section name %q", attachTo) 139 } 140 if !filepath.IsAbs(parts[0]) && strings.Contains(parts[0], "/") { 141 return fmt.Errorf("section name must be either an absolute path or a library name: %q", parts[0]) 142 } 143 if progType == ProgUSDT && len(strings.Split(parts[1], ":")) != 2 { 144 return fmt.Errorf("invalid USDT section name: %q", attachTo) 145 } 146 147 t.mu.Lock() 148 defer t.mu.Unlock() 149 150 if t.closed { 151 return errors.New("uprobetracer has been closed") 152 } 153 154 t.progName = progName 155 t.progType = progType 156 t.attachFilePath = parts[0] 157 t.attachSymbol = parts[1] 158 t.prog = prog 159 160 // attach to pending containers, then release the pending list 161 for pid := range t.pendingContainerPids { 162 t.attach(pid) 163 } 164 t.pendingContainerPids = nil 165 166 return nil 167 } 168 169 func (t *Tracer[Event]) searchForLibrary(containerPid uint32) ([]string, error) { 170 var targetPaths []string 171 var securedTargetPaths []string 172 173 filePath := t.attachFilePath 174 if !filepath.IsAbs(filePath) { 175 containerLdCachePath, err := securejoin.SecureJoin(filepath.Join(host.HostProcFs, fmt.Sprint(containerPid), "root"), "etc/ld.so.cache") 176 if err != nil { 177 return nil, fmt.Errorf("path %q: %w", filePath, err) 178 } 179 ldCachePaths, err := parseLdCache(containerLdCachePath, filePath) 180 if err != nil { 181 return nil, fmt.Errorf("parsing ld cache: %w", err) 182 } 183 targetPaths = ldCachePaths 184 } else { 185 targetPaths = append(targetPaths, filePath) 186 } 187 for _, targetPath := range targetPaths { 188 securedTargetPath, err := securejoin.SecureJoin(filepath.Join(host.HostProcFs, fmt.Sprint(containerPid), "root"), targetPath) 189 if err != nil { 190 t.logger.Debugf("path %q in ld cache is not available: %s", filePath, err.Error()) 191 continue 192 } 193 securedTargetPaths = append(securedTargetPaths, securedTargetPath) 194 } 195 return securedTargetPaths, nil 196 } 197 198 // attach uprobe program to the inode of the file passed in parameter 199 func (t *Tracer[Event]) attachUprobe(file *os.File) (link.Link, error) { 200 attachPath := path.Join(host.HostProcFs, "self/fd/", fmt.Sprint(file.Fd())) 201 ex, err := link.OpenExecutable(attachPath) 202 if err != nil { 203 return nil, fmt.Errorf("opening %q: %w", attachPath, err) 204 } 205 switch t.progType { 206 case ProgUprobe: 207 return ex.Uprobe(t.attachSymbol, t.prog, nil) 208 case ProgUretprobe: 209 return ex.Uretprobe(t.attachSymbol, t.prog, nil) 210 case ProgUSDT: 211 attachInfo, err := getUsdtInfo(attachPath, t.attachSymbol) 212 if err != nil { 213 return nil, fmt.Errorf("reading USDT metadata: %w", err) 214 } 215 return ex.Uprobe(t.attachSymbol, t.prog, 216 &link.UprobeOptions{ 217 Address: attachInfo.attachAddress, 218 RefCtrOffset: attachInfo.semaphoreAddress, 219 }) 220 default: 221 return nil, fmt.Errorf("attaching to inode: unsupported prog type: %q", t.progType) 222 } 223 } 224 225 // try attaching to a container, will update `containerPid2Inodes` 226 func (t *Tracer[Event]) attach(containerPid uint32) { 227 var attachedRealInodes []uint64 228 attachFilePaths, err := t.searchForLibrary(containerPid) 229 if err != nil { 230 t.logger.Debugf("attaching to container %d: %s", containerPid, err.Error()) 231 } 232 233 if len(attachFilePaths) == 0 { 234 t.logger.Debugf("cannot find file to attach in container %d for symbol %q", containerPid, t.attachSymbol) 235 } 236 237 for _, filePath := range attachFilePaths { 238 // Do not use `O_PATH` flag here, because `ReadRealInodeFromFd` needs the `private_data` field 239 // in kernel "struct file", to access the underlying inode through overlayFS. 240 // Using `O_PATH` flag will cause the `private_data` field to be zero. 241 file, err := os.Open(filePath) 242 if err != nil { 243 t.logger.Debugf("opening file '%q' for uprobe: %s", filePath, err.Error()) 244 continue 245 } 246 realInodePtr, err := kfilefields.ReadRealInodeFromFd(int(file.Fd())) 247 if err != nil { 248 t.logger.Debugf("getting inode info for '%q': %s", filePath, err.Error()) 249 file.Close() 250 continue 251 } 252 253 t.logger.Debugf("attaching uprobe %q to container %d: %q", t.progName, containerPid, filePath) 254 attachedRealInodes = append(attachedRealInodes, realInodePtr) 255 256 inode, exists := t.inodeRefCount[realInodePtr] 257 if !exists { 258 progLink, err := t.attachUprobe(file) 259 if err != nil { 260 t.logger.Debugf("failed to attach uprobe %q: %s", t.progName, err.Error()) 261 } 262 t.inodeRefCount[realInodePtr] = &inodeKeeper{1, file, progLink} 263 } else { 264 inode.counter++ 265 file.Close() 266 } 267 } 268 269 t.containerPid2Inodes[containerPid] = attachedRealInodes 270 } 271 272 // AttachContainer will attach now if the prog is ready, otherwise it will add container into the pending list 273 func (t *Tracer[Event]) AttachContainer(container *containercollection.Container) error { 274 t.mu.Lock() 275 defer t.mu.Unlock() 276 277 if t.closed { 278 return errors.New("uprobetracer has been closed") 279 } 280 281 if t.prog == nil { 282 _, exist := t.pendingContainerPids[container.Pid] 283 if exist { 284 return fmt.Errorf("container PID already exists: %d", container.Pid) 285 } 286 t.pendingContainerPids[container.Pid] = true 287 } else { 288 _, exist := t.containerPid2Inodes[container.Pid] 289 if exist { 290 return fmt.Errorf("container PID already exists: %d", container.Pid) 291 } 292 t.attach(container.Pid) 293 } 294 return nil 295 } 296 297 func (t *Tracer[Event]) DetachContainer(container *containercollection.Container) error { 298 t.mu.Lock() 299 defer t.mu.Unlock() 300 301 if t.closed { 302 return nil 303 } 304 305 if t.prog == nil { 306 // remove from pending list 307 _, exist := t.pendingContainerPids[container.Pid] 308 if !exist { 309 return errors.New("container has not been attached") 310 } 311 delete(t.pendingContainerPids, container.Pid) 312 } else { 313 // detach from container if attached 314 attachedRealInodes, exist := t.containerPid2Inodes[container.Pid] 315 if !exist { 316 return errors.New("container has not been attached") 317 } 318 delete(t.containerPid2Inodes, container.Pid) 319 320 for _, realInodePtr := range attachedRealInodes { 321 keeper, exist := t.inodeRefCount[realInodePtr] 322 if !exist { 323 return errors.New("internal error: finding inodeKeeper with realInodePtr") 324 } 325 keeper.counter-- 326 if keeper.counter == 0 { 327 keeper.close() 328 delete(t.inodeRefCount, realInodePtr) 329 } 330 } 331 } 332 333 return nil 334 } 335 336 func (t *Tracer[Event]) Close() { 337 t.mu.Lock() 338 defer t.mu.Unlock() 339 340 if t.closed { 341 return 342 } 343 344 for _, keeper := range t.inodeRefCount { 345 keeper.close() 346 } 347 348 t.containerPid2Inodes = nil 349 t.inodeRefCount = nil 350 t.closed = true 351 }