github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/utils/bpf-iter-ns/bpf-iter-ns.go (about)

     1  // Copyright 2023 The Inspektor Gadget authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package bpfiterns reads a ebpf iterator in a different namespace.
    16  package bpfiterns
    17  
    18  import (
    19  	"bufio"
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"path/filepath"
    26  	"sync"
    27  	"time"
    28  
    29  	"github.com/cilium/ebpf/link"
    30  	systemdDbus "github.com/coreos/go-systemd/v22/dbus"
    31  	_ "github.com/godbus/dbus/v5"
    32  	"github.com/google/uuid"
    33  
    34  	"github.com/inspektor-gadget/inspektor-gadget/pkg/utils/host"
    35  )
    36  
    37  // Read reads the iterator in the host pid namespace.
    38  // It will test if the current pid namespace is the host pid namespace.
    39  func Read(iter *link.Iter) ([]byte, error) {
    40  	hostPidNs, err := host.IsHostPidNs()
    41  	if err != nil {
    42  		return nil, fmt.Errorf("checking if current pid namespace is host pid namespace: %w", err)
    43  	}
    44  	if hostPidNs {
    45  		return ReadOnCurrentPidNs(iter)
    46  	} else {
    47  		return ReadOnHostPidNs(iter)
    48  	}
    49  }
    50  
    51  // ReadOnCurrentPidNs reads the iterator in the current pid namespace.
    52  func ReadOnCurrentPidNs(iter *link.Iter) ([]byte, error) {
    53  	file, err := iter.Open()
    54  	if err != nil {
    55  		return nil, fmt.Errorf("open BPF iterator: %w", err)
    56  	}
    57  	defer file.Close()
    58  	buf, err := io.ReadAll(file)
    59  	if err != nil {
    60  		return nil, fmt.Errorf("read BPF iterator: %w", err)
    61  	}
    62  	return buf, err
    63  }
    64  
    65  // ReadOnHostPidNs reads the iterator in the host pid namespace.
    66  // It does so by pinning the iterator in a temporary directory in the host bpffs,
    67  // and then creating a systemd service that will read the iterator and write it
    68  // to a temporary pipe. The pipe is then read and returned.
    69  func ReadOnHostPidNs(iter *link.Iter) ([]byte, error) {
    70  	selfPidOnHost, err := os.Readlink(filepath.Join(host.HostProcFs, "self"))
    71  	if err != nil {
    72  		return nil, fmt.Errorf("readlink /proc/self: %w", err)
    73  	}
    74  	if selfPidOnHost == "" {
    75  		return nil, fmt.Errorf("empty /proc/self symlink")
    76  	}
    77  
    78  	// Create a temporary directory in bpffs
    79  	bpfFS := "/sys/fs/bpf"
    80  	tmpPinDir, err := os.MkdirTemp(bpfFS, "ig-iter-")
    81  	if err != nil {
    82  		return nil, fmt.Errorf("creating temporary directory in bpffs: %w", err)
    83  	}
    84  	defer os.RemoveAll(tmpPinDir)
    85  
    86  	// Prepare the pin path from the container and host point of view
    87  	pinPathFromContainer := filepath.Join(tmpPinDir, "iter")
    88  	pinPathFromHost := filepath.Join("/proc", selfPidOnHost, "root", pinPathFromContainer)
    89  
    90  	err = iter.Pin(pinPathFromContainer)
    91  	if err != nil {
    92  		return nil, fmt.Errorf("pinning iterator: %w", err)
    93  	}
    94  
    95  	r, w, err := os.Pipe()
    96  	if err != nil {
    97  		return nil, fmt.Errorf("creating pipe: %w", err)
    98  	}
    99  	writerPath := fmt.Sprintf("/proc/%s/fd/%d", selfPidOnHost, w.Fd())
   100  
   101  	var buf []byte
   102  	var errReader error
   103  	var wg sync.WaitGroup
   104  	wg.Add(1)
   105  	go func() {
   106  		stdoutReader := bufio.NewReader(r)
   107  		// ReadAll will block until the write-side of the pipe is closed in both processes
   108  		// (the systemd service and this process)
   109  		buf, errReader = io.ReadAll(stdoutReader)
   110  		r.Close()
   111  		wg.Done()
   112  	}()
   113  
   114  	conn, err := systemdDbus.NewSystemdConnectionContext(context.TODO())
   115  	if err != nil {
   116  		return nil, fmt.Errorf("connecting to systemd: %w", err)
   117  	}
   118  	defer conn.Close()
   119  
   120  	runID := uuid.New().String()[:8]
   121  	unitName := fmt.Sprintf("ig-%s.service", runID)
   122  
   123  	statusChan := make(chan string, 1)
   124  	properties := []systemdDbus.Property{
   125  		systemdDbus.PropDescription("Inspektor Gadget job on host pidns"),
   126  		// Type=oneshot ensures that StartTransientUnitContext will only return "done" when the job is done
   127  		systemdDbus.PropType("oneshot"),
   128  		systemdDbus.PropExecStart([]string{
   129  			"/bin/sh",
   130  			"-c",
   131  			fmt.Sprintf("cat %s > %s", pinPathFromHost, writerPath),
   132  		}, true),
   133  	}
   134  
   135  	_, err = conn.StartTransientUnitContext(context.TODO(),
   136  		unitName, "fail", properties, statusChan)
   137  	if err != nil {
   138  		return nil, fmt.Errorf("starting transient unit: %w", err)
   139  	}
   140  	timeout := time.NewTimer(10 * time.Second)
   141  	defer timeout.Stop()
   142  
   143  	select {
   144  	case s := <-statusChan:
   145  		close(statusChan)
   146  
   147  		// Close writer first: this will unblock the go routine reading from the pipe
   148  		w.Close()
   149  		wg.Wait()
   150  
   151  		if errReader != nil {
   152  			return nil, fmt.Errorf("reading from pipe: %w", errReader)
   153  		}
   154  
   155  		// "done" indicates successful execution of a job
   156  		// See https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit
   157  		if s != "done" {
   158  			conn.ResetFailedUnitContext(context.TODO(), unitName)
   159  
   160  			return nil, fmt.Errorf("creating systemd unit `%s`: got `%s`", unitName, s)
   161  		}
   162  	case <-timeout.C:
   163  		w.Close()
   164  		wg.Wait()
   165  
   166  		conn.ResetFailedUnitContext(context.TODO(), unitName)
   167  		return nil, errors.New("timeout waiting for systemd to create " + unitName)
   168  	}
   169  
   170  	return buf, nil
   171  }