github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/uprobetracer/tracer.go (about)

     1  // Copyright 2024 The Inspektor Gadget authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package uprobetracer handles how uprobe/uretprobe/USDT programs are attached
    16  // to containers. It has two running modes: `pending` mode and `running` mode.
    17  //
    18  // Before `AttachProg` is called, uprobetracer runs in `pending` mode, only
    19  // maintaining the container PIDs ready to attach to.
    20  //
    21  // When `AttachProg` is called, uprobetracer enters the `running` mode and
    22  // attaches to all pending containers. After that, it will never get back to
    23  // the `pending` mode.
    24  //
    25  // In `running` mode, uprobetracer holds fd(s) of the executables, so we can
    26  // use `/proc/self/fd/$fd` for attaching, it is used to avoid fd-reusing.
    27  //
    28  // Uprobetracer doesn't maintain ebpf.collection or perf-ring buffer by itself,
    29  // those are hold by the parent tracer.
    30  //
    31  // All interfaces should hold locks, while inner functions do not.
    32  package uprobetracer
    33  
    34  import (
    35  	"errors"
    36  	"fmt"
    37  	"os"
    38  	"path"
    39  	"path/filepath"
    40  	"strings"
    41  	"sync"
    42  
    43  	"github.com/cilium/ebpf"
    44  	"github.com/cilium/ebpf/link"
    45  	securejoin "github.com/cyphar/filepath-securejoin"
    46  
    47  	containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection"
    48  	"github.com/inspektor-gadget/inspektor-gadget/pkg/kfilefields"
    49  	"github.com/inspektor-gadget/inspektor-gadget/pkg/logger"
    50  	"github.com/inspektor-gadget/inspektor-gadget/pkg/utils/host"
    51  )
    52  
    53  type ProgType uint32
    54  
    55  const (
    56  	ProgUprobe ProgType = iota
    57  	ProgUretprobe
    58  	ProgUSDT
    59  )
    60  
    61  // inodeKeeper holds a file object, with the counter representing its
    62  // reference count. The link is not nil only when the file is attached.
    63  type inodeKeeper struct {
    64  	counter int
    65  	file    *os.File
    66  	link    link.Link
    67  }
    68  
    69  func (t *inodeKeeper) close() {
    70  	if t.link != nil {
    71  		t.link.Close()
    72  	}
    73  	t.file.Close()
    74  }
    75  
    76  type Tracer[Event any] struct {
    77  	progName       string
    78  	progType       ProgType
    79  	attachFilePath string
    80  	attachSymbol   string
    81  	prog           *ebpf.Program
    82  
    83  	// keeps the inodes for each attached container
    84  	// when users write library names in ebpf section names, it's possible to
    85  	// find multiple libraries of different archs within the same container,
    86  	// making this a one-to-many mapping
    87  	containerPid2Inodes map[uint32][]uint64
    88  	// keeps the fd and refCount for each realInodePtr
    89  	//
    90  	// we are using `realInodePtr` (the address of real inode in kernel) to identify a file
    91  	// instead of just the inode number.
    92  	// Since overlayFS overwrites the FsID of files and provides its own inode implementation,
    93  	// we cannot uniquely identify a file on disk using `<FsID, inode>` pairs.
    94  	//
    95  	// Meanwhile, uprobe is using kernel function `d_real_inode` to get the underlying inode,
    96  	// and attaching onto it. That means if we are attaching to one container, other containers
    97  	// sharing the same image will also be attached. If we are attaching to multiple containers,
    98  	// the underlying inode might be attached multiple times, leading to duplicate records.
    99  	//
   100  	// To deduplicate, we need to identify the underlying inode hidden by overlayFS,
   101  	// and use it as a unique identifier. For each realInodePtr, we only attach to it once.
   102  	inodeRefCount map[uint64]*inodeKeeper
   103  	// used as a set, keeps PIDs of the pending containers
   104  	pendingContainerPids map[uint32]bool
   105  
   106  	logger logger.Logger
   107  
   108  	closed bool
   109  	mu     sync.Mutex
   110  }
   111  
   112  func NewTracer[Event any](logger logger.Logger) (*Tracer[Event], error) {
   113  	t := &Tracer[Event]{
   114  		containerPid2Inodes:  make(map[uint32][]uint64),
   115  		inodeRefCount:        make(map[uint64]*inodeKeeper),
   116  		pendingContainerPids: make(map[uint32]bool),
   117  		logger:               logger,
   118  		closed:               false,
   119  	}
   120  	return t, nil
   121  }
   122  
   123  // AttachProg loads the ebpf program, and try attaching if there are pending containers
   124  func (t *Tracer[Event]) AttachProg(progName string, progType ProgType, attachTo string, prog *ebpf.Program) error {
   125  	if progType != ProgUprobe && progType != ProgUretprobe && progType != ProgUSDT {
   126  		return fmt.Errorf("unsupported uprobe prog type: %q", progType)
   127  	}
   128  
   129  	if prog == nil {
   130  		return errors.New("prog does not exist")
   131  	}
   132  	if t.prog != nil {
   133  		return errors.New("loading uprobe program twice")
   134  	}
   135  
   136  	parts := strings.SplitN(attachTo, ":", 2)
   137  	if len(parts) < 2 {
   138  		return fmt.Errorf("invalid section name %q", attachTo)
   139  	}
   140  	if !filepath.IsAbs(parts[0]) && strings.Contains(parts[0], "/") {
   141  		return fmt.Errorf("section name must be either an absolute path or a library name: %q", parts[0])
   142  	}
   143  	if progType == ProgUSDT && len(strings.Split(parts[1], ":")) != 2 {
   144  		return fmt.Errorf("invalid USDT section name: %q", attachTo)
   145  	}
   146  
   147  	t.mu.Lock()
   148  	defer t.mu.Unlock()
   149  
   150  	if t.closed {
   151  		return errors.New("uprobetracer has been closed")
   152  	}
   153  
   154  	t.progName = progName
   155  	t.progType = progType
   156  	t.attachFilePath = parts[0]
   157  	t.attachSymbol = parts[1]
   158  	t.prog = prog
   159  
   160  	// attach to pending containers, then release the pending list
   161  	for pid := range t.pendingContainerPids {
   162  		t.attach(pid)
   163  	}
   164  	t.pendingContainerPids = nil
   165  
   166  	return nil
   167  }
   168  
   169  func (t *Tracer[Event]) searchForLibrary(containerPid uint32) ([]string, error) {
   170  	var targetPaths []string
   171  	var securedTargetPaths []string
   172  
   173  	filePath := t.attachFilePath
   174  	if !filepath.IsAbs(filePath) {
   175  		containerLdCachePath, err := securejoin.SecureJoin(filepath.Join(host.HostProcFs, fmt.Sprint(containerPid), "root"), "etc/ld.so.cache")
   176  		if err != nil {
   177  			return nil, fmt.Errorf("path %q: %w", filePath, err)
   178  		}
   179  		ldCachePaths, err := parseLdCache(containerLdCachePath, filePath)
   180  		if err != nil {
   181  			return nil, fmt.Errorf("parsing ld cache: %w", err)
   182  		}
   183  		targetPaths = ldCachePaths
   184  	} else {
   185  		targetPaths = append(targetPaths, filePath)
   186  	}
   187  	for _, targetPath := range targetPaths {
   188  		securedTargetPath, err := securejoin.SecureJoin(filepath.Join(host.HostProcFs, fmt.Sprint(containerPid), "root"), targetPath)
   189  		if err != nil {
   190  			t.logger.Debugf("path %q in ld cache is not available: %s", filePath, err.Error())
   191  			continue
   192  		}
   193  		securedTargetPaths = append(securedTargetPaths, securedTargetPath)
   194  	}
   195  	return securedTargetPaths, nil
   196  }
   197  
   198  // attach uprobe program to the inode of the file passed in parameter
   199  func (t *Tracer[Event]) attachUprobe(file *os.File) (link.Link, error) {
   200  	attachPath := path.Join(host.HostProcFs, "self/fd/", fmt.Sprint(file.Fd()))
   201  	ex, err := link.OpenExecutable(attachPath)
   202  	if err != nil {
   203  		return nil, fmt.Errorf("opening %q: %w", attachPath, err)
   204  	}
   205  	switch t.progType {
   206  	case ProgUprobe:
   207  		return ex.Uprobe(t.attachSymbol, t.prog, nil)
   208  	case ProgUretprobe:
   209  		return ex.Uretprobe(t.attachSymbol, t.prog, nil)
   210  	case ProgUSDT:
   211  		attachInfo, err := getUsdtInfo(attachPath, t.attachSymbol)
   212  		if err != nil {
   213  			return nil, fmt.Errorf("reading USDT metadata: %w", err)
   214  		}
   215  		return ex.Uprobe(t.attachSymbol, t.prog,
   216  			&link.UprobeOptions{
   217  				Address:      attachInfo.attachAddress,
   218  				RefCtrOffset: attachInfo.semaphoreAddress,
   219  			})
   220  	default:
   221  		return nil, fmt.Errorf("attaching to inode: unsupported prog type: %q", t.progType)
   222  	}
   223  }
   224  
   225  // try attaching to a container, will update `containerPid2Inodes`
   226  func (t *Tracer[Event]) attach(containerPid uint32) {
   227  	var attachedRealInodes []uint64
   228  	attachFilePaths, err := t.searchForLibrary(containerPid)
   229  	if err != nil {
   230  		t.logger.Debugf("attaching to container %d: %s", containerPid, err.Error())
   231  	}
   232  
   233  	if len(attachFilePaths) == 0 {
   234  		t.logger.Debugf("cannot find file to attach in container %d for symbol %q", containerPid, t.attachSymbol)
   235  	}
   236  
   237  	for _, filePath := range attachFilePaths {
   238  		// Do not use `O_PATH` flag here, because `ReadRealInodeFromFd` needs the `private_data` field
   239  		// in kernel "struct file", to access the underlying inode through overlayFS.
   240  		// Using `O_PATH` flag will cause the `private_data` field to be zero.
   241  		file, err := os.Open(filePath)
   242  		if err != nil {
   243  			t.logger.Debugf("opening file '%q' for uprobe: %s", filePath, err.Error())
   244  			continue
   245  		}
   246  		realInodePtr, err := kfilefields.ReadRealInodeFromFd(int(file.Fd()))
   247  		if err != nil {
   248  			t.logger.Debugf("getting inode info for '%q': %s", filePath, err.Error())
   249  			file.Close()
   250  			continue
   251  		}
   252  
   253  		t.logger.Debugf("attaching uprobe %q to container %d: %q", t.progName, containerPid, filePath)
   254  		attachedRealInodes = append(attachedRealInodes, realInodePtr)
   255  
   256  		inode, exists := t.inodeRefCount[realInodePtr]
   257  		if !exists {
   258  			progLink, err := t.attachUprobe(file)
   259  			if err != nil {
   260  				t.logger.Debugf("failed to attach uprobe %q: %s", t.progName, err.Error())
   261  			}
   262  			t.inodeRefCount[realInodePtr] = &inodeKeeper{1, file, progLink}
   263  		} else {
   264  			inode.counter++
   265  			file.Close()
   266  		}
   267  	}
   268  
   269  	t.containerPid2Inodes[containerPid] = attachedRealInodes
   270  }
   271  
   272  // AttachContainer will attach now if the prog is ready, otherwise it will add container into the pending list
   273  func (t *Tracer[Event]) AttachContainer(container *containercollection.Container) error {
   274  	t.mu.Lock()
   275  	defer t.mu.Unlock()
   276  
   277  	if t.closed {
   278  		return errors.New("uprobetracer has been closed")
   279  	}
   280  
   281  	if t.prog == nil {
   282  		_, exist := t.pendingContainerPids[container.Pid]
   283  		if exist {
   284  			return fmt.Errorf("container PID already exists: %d", container.Pid)
   285  		}
   286  		t.pendingContainerPids[container.Pid] = true
   287  	} else {
   288  		_, exist := t.containerPid2Inodes[container.Pid]
   289  		if exist {
   290  			return fmt.Errorf("container PID already exists: %d", container.Pid)
   291  		}
   292  		t.attach(container.Pid)
   293  	}
   294  	return nil
   295  }
   296  
   297  func (t *Tracer[Event]) DetachContainer(container *containercollection.Container) error {
   298  	t.mu.Lock()
   299  	defer t.mu.Unlock()
   300  
   301  	if t.closed {
   302  		return nil
   303  	}
   304  
   305  	if t.prog == nil {
   306  		// remove from pending list
   307  		_, exist := t.pendingContainerPids[container.Pid]
   308  		if !exist {
   309  			return errors.New("container has not been attached")
   310  		}
   311  		delete(t.pendingContainerPids, container.Pid)
   312  	} else {
   313  		// detach from container if attached
   314  		attachedRealInodes, exist := t.containerPid2Inodes[container.Pid]
   315  		if !exist {
   316  			return errors.New("container has not been attached")
   317  		}
   318  		delete(t.containerPid2Inodes, container.Pid)
   319  
   320  		for _, realInodePtr := range attachedRealInodes {
   321  			keeper, exist := t.inodeRefCount[realInodePtr]
   322  			if !exist {
   323  				return errors.New("internal error: finding inodeKeeper with realInodePtr")
   324  			}
   325  			keeper.counter--
   326  			if keeper.counter == 0 {
   327  				keeper.close()
   328  				delete(t.inodeRefCount, realInodePtr)
   329  			}
   330  		}
   331  	}
   332  
   333  	return nil
   334  }
   335  
   336  func (t *Tracer[Event]) Close() {
   337  	t.mu.Lock()
   338  	defer t.mu.Unlock()
   339  
   340  	if t.closed {
   341  		return
   342  	}
   343  
   344  	for _, keeper := range t.inodeRefCount {
   345  		keeper.close()
   346  	}
   347  
   348  	t.containerPid2Inodes = nil
   349  	t.inodeRefCount = nil
   350  	t.closed = true
   351  }