github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/networktracer/tracer.go (about)

     1  // Copyright 2022-2023 The Inspektor Gadget authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package networktracer installs the dispatcher ebpf program in each network
    16  // namespace of interest. The dispatcher program runs a tail call to the actual
    17  // gadget program.
    18  //
    19  // This is done both for builtin gadgets and containerized gadgets. In the case
    20  // of containerized gadgets, the dispatcher program is installed before
    21  // knowning the actual gadget program. Once it knows the actual gadget program,
    22  // the tail call map is updated.
    23  //
    24  // In the case of builtin gadgets, the Run() method can be called to fetch and
    25  // process events from ebpf. The containerized gadgets won't call Run() because
    26  // run/tracer.go fetches and processes the events themselves. Instead, it will
    27  // just call AttachProg().
    28  //
    29  // The actual gadget program is instantiated only once for performance reason.
    30  // The network namespace is passed to the actual gadget program via the
    31  // skb->cb[0] variable.
    32  //
    33  // https://github.com/inspektor-gadget/inspektor-gadget/blob/main/docs/devel/network-gadget-dispatcher.png
    34  package networktracer
    35  
    36  import (
    37  	"errors"
    38  	"fmt"
    39  	"os"
    40  	"strings"
    41  	"sync"
    42  	"syscall"
    43  	"unsafe"
    44  
    45  	"github.com/cilium/ebpf"
    46  	"github.com/cilium/ebpf/perf"
    47  	"golang.org/x/sys/unix"
    48  
    49  	containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection"
    50  	containerutils "github.com/inspektor-gadget/inspektor-gadget/pkg/container-utils"
    51  	"github.com/inspektor-gadget/inspektor-gadget/pkg/gadgets"
    52  	"github.com/inspektor-gadget/inspektor-gadget/pkg/rawsock"
    53  	"github.com/inspektor-gadget/inspektor-gadget/pkg/socketenricher"
    54  	"github.com/inspektor-gadget/inspektor-gadget/pkg/types"
    55  )
    56  
    57  //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target bpfel -cc clang -cflags ${CFLAGS} dispatcher ./bpf/dispatcher.bpf.c -- -I./bpf/ -I../socketenricher/bpf
    58  
    59  type attachment struct {
    60  	dispatcherObjs dispatcherObjects
    61  
    62  	sockFd int
    63  
    64  	// users keeps track of the users' pid that have called Attach(). This can
    65  	// happen for two reasons:
    66  	// 1. several containers in a pod (sharing the netns)
    67  	// 2. pods with networkHost=true
    68  	// In both cases, we want to attach the BPF program only once.
    69  	users map[uint32]struct{}
    70  }
    71  
    72  type Tracer[Event any] struct {
    73  	socketEnricherMap *ebpf.Map
    74  	dispatcherMap     *ebpf.Map
    75  	collection        *ebpf.Collection
    76  	prog              *ebpf.Program
    77  	perfRd            *perf.Reader
    78  
    79  	// key: network namespace inode number
    80  	// value: Tracelet
    81  	attachments map[uint64]*attachment
    82  
    83  	eventHandler func(ev *Event)
    84  
    85  	// mu protects attachments from concurrent access
    86  	// AttachContainer and DetachContainer can be called in parallel
    87  	mu sync.Mutex
    88  }
    89  
    90  func (t *Tracer[Event]) newAttachment(
    91  	pid uint32,
    92  	netns uint64,
    93  ) (_ *attachment, err error) {
    94  	a := &attachment{
    95  		sockFd: -1,
    96  		users:  map[uint32]struct{}{pid: {}},
    97  	}
    98  	defer func() {
    99  		if err != nil {
   100  			if a.sockFd != -1 {
   101  				unix.Close(a.sockFd)
   102  			}
   103  			a.dispatcherObjs.Close()
   104  		}
   105  	}()
   106  
   107  	dispatcherSpec, err := loadDispatcher()
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  
   112  	u32netns := uint32(netns)
   113  	consts := map[string]interface{}{
   114  		"current_netns": u32netns,
   115  	}
   116  	if err := dispatcherSpec.RewriteConstants(consts); err != nil {
   117  		return nil, fmt.Errorf("RewriteConstants while attaching to pid %d: %w", pid, err)
   118  	}
   119  	opts := ebpf.CollectionOptions{
   120  		MapReplacements: map[string]*ebpf.Map{
   121  			"tail_call": t.dispatcherMap,
   122  		},
   123  	}
   124  	if err = dispatcherSpec.LoadAndAssign(&a.dispatcherObjs, &opts); err != nil {
   125  		return nil, fmt.Errorf("loading ebpf program: %w", err)
   126  	}
   127  
   128  	a.sockFd, err = rawsock.OpenRawSock(pid)
   129  	if err != nil {
   130  		return nil, fmt.Errorf("opening raw socket: %w", err)
   131  	}
   132  
   133  	if err := syscall.SetsockoptInt(a.sockFd, syscall.SOL_SOCKET, unix.SO_ATTACH_BPF, a.dispatcherObjs.IgNetDisp.FD()); err != nil {
   134  		return nil, fmt.Errorf("attaching BPF program: %w", err)
   135  	}
   136  	return a, nil
   137  }
   138  
   139  func NewTracer[Event any]() (_ *Tracer[Event], err error) {
   140  	t := &Tracer[Event]{
   141  		attachments: make(map[uint64]*attachment),
   142  	}
   143  
   144  	// Keep in sync with tail_call map in bpf/dispatcher.bpf.c
   145  	dispatcherMapSpec := ebpf.MapSpec{
   146  		Name:       "tail_call",
   147  		Type:       ebpf.ProgramArray,
   148  		KeySize:    4,
   149  		ValueSize:  4,
   150  		MaxEntries: 1,
   151  	}
   152  	t.dispatcherMap, err = ebpf.NewMap(&dispatcherMapSpec)
   153  	if err != nil {
   154  		return nil, fmt.Errorf("creating tail_call map: %w", err)
   155  	}
   156  	return t, nil
   157  }
   158  
   159  func (t *Tracer[Event]) SetSocketEnricherMap(m *ebpf.Map) {
   160  	t.socketEnricherMap = m
   161  }
   162  
   163  func (t *Tracer[Event]) Run(
   164  	spec *ebpf.CollectionSpec,
   165  	baseEvent func(ev types.Event) *Event,
   166  	processEvent func(rawSample []byte, netns uint64) (*Event, error),
   167  ) (err error) {
   168  	gadgets.FixBpfKtimeGetBootNs(spec.Programs)
   169  
   170  	defer func() {
   171  		if err != nil {
   172  			if t.perfRd != nil {
   173  				t.perfRd.Close()
   174  			}
   175  			if t.collection != nil {
   176  				t.collection.Close()
   177  			}
   178  		}
   179  	}()
   180  
   181  	var opts ebpf.CollectionOptions
   182  
   183  	// Automatically find the socket program
   184  	bpfProgName := ""
   185  	for progName, p := range spec.Programs {
   186  		if p.Type == ebpf.SocketFilter && strings.HasPrefix(p.SectionName, "socket") {
   187  			if bpfProgName != "" {
   188  				return fmt.Errorf("multiple socket programs found: %s, %s", bpfProgName, progName)
   189  			}
   190  			bpfProgName = progName
   191  		}
   192  	}
   193  	if bpfProgName == "" {
   194  		return fmt.Errorf("no socket program found")
   195  	}
   196  
   197  	// Automatically find the perf map
   198  	bpfPerfMapName := ""
   199  	for mapName, m := range spec.Maps {
   200  		if m.Type == ebpf.PerfEventArray {
   201  			if bpfPerfMapName != "" {
   202  				return fmt.Errorf("multiple perf maps found: %s, %s", bpfPerfMapName, mapName)
   203  			}
   204  			bpfPerfMapName = mapName
   205  		}
   206  	}
   207  	if bpfPerfMapName == "" {
   208  		return fmt.Errorf("no perf map found")
   209  	}
   210  
   211  	usesSocketEnricher := false
   212  	for _, m := range spec.Maps {
   213  		if m.Name == socketenricher.SocketsMapName {
   214  			usesSocketEnricher = true
   215  			break
   216  		}
   217  	}
   218  
   219  	if usesSocketEnricher && t.socketEnricherMap != nil {
   220  		mapReplacements := map[string]*ebpf.Map{}
   221  		mapReplacements[socketenricher.SocketsMapName] = t.socketEnricherMap
   222  		opts.MapReplacements = mapReplacements
   223  	}
   224  
   225  	t.collection, err = ebpf.NewCollectionWithOptions(spec, opts)
   226  	if err != nil {
   227  		return fmt.Errorf("creating BPF collection: %w", err)
   228  	}
   229  
   230  	t.perfRd, err = perf.NewReader(t.collection.Maps[bpfPerfMapName], gadgets.PerfBufferPages*os.Getpagesize())
   231  	if err != nil {
   232  		return fmt.Errorf("getting a perf reader: %w", err)
   233  	}
   234  
   235  	var ok bool
   236  	t.prog, ok = t.collection.Programs[bpfProgName]
   237  	if !ok {
   238  		return fmt.Errorf("BPF program %q not found", bpfProgName)
   239  	}
   240  
   241  	err = t.AttachProg(t.prog)
   242  	if err != nil {
   243  		return fmt.Errorf("updating tail call map: %w", err)
   244  	}
   245  
   246  	go t.listen(baseEvent, processEvent)
   247  
   248  	return nil
   249  }
   250  
   251  // AttachProg is used directly by containerized gadgets
   252  func (t *Tracer[Event]) AttachProg(prog *ebpf.Program) error {
   253  	return t.dispatcherMap.Update(uint32(0), uint32(prog.FD()), ebpf.UpdateAny)
   254  }
   255  
   256  func (t *Tracer[Event]) Attach(pid uint32) error {
   257  	t.mu.Lock()
   258  	defer t.mu.Unlock()
   259  
   260  	netns, err := containerutils.GetNetNs(int(pid))
   261  	if err != nil {
   262  		return fmt.Errorf("getting network namespace of pid %d: %w", pid, err)
   263  	}
   264  	if a, ok := t.attachments[netns]; ok {
   265  		a.users[pid] = struct{}{}
   266  		return nil
   267  	}
   268  
   269  	a, err := t.newAttachment(pid, netns)
   270  	if err != nil {
   271  		return fmt.Errorf("creating network tracer attachment for pid %d: %w", pid, err)
   272  	}
   273  	t.attachments[netns] = a
   274  
   275  	return nil
   276  }
   277  
   278  func (t *Tracer[Event]) SetEventHandler(handler any) {
   279  	if t.eventHandler != nil {
   280  		panic("handler already set")
   281  	}
   282  
   283  	nh, ok := handler.(func(ev *Event))
   284  	if !ok {
   285  		panic("event handler invalid")
   286  	}
   287  	t.eventHandler = nh
   288  }
   289  
   290  // EventCallback provides support for legacy pkg/gadget-collection
   291  func (t *Tracer[Event]) EventCallback(event any) {
   292  	e, ok := event.(*Event)
   293  	if !ok {
   294  		panic("event handler argument invalid")
   295  	}
   296  	if t.eventHandler == nil {
   297  		return
   298  	}
   299  	t.eventHandler(e)
   300  }
   301  
   302  func (t *Tracer[Event]) AttachContainer(container *containercollection.Container) error {
   303  	return t.Attach(container.Pid)
   304  }
   305  
   306  func (t *Tracer[Event]) DetachContainer(container *containercollection.Container) error {
   307  	return t.Detach(container.Pid)
   308  }
   309  
   310  func (t *Tracer[Event]) GetMap(name string) *ebpf.Map {
   311  	return t.collection.Maps[name]
   312  }
   313  
   314  func (t *Tracer[Event]) listen(
   315  	baseEvent func(ev types.Event) *Event,
   316  	processEvent func(rawSample []byte, netns uint64) (*Event, error),
   317  ) {
   318  	for {
   319  		record, err := t.perfRd.Read()
   320  		if err != nil {
   321  			if errors.Is(err, perf.ErrClosed) {
   322  				return
   323  			}
   324  
   325  			msg := fmt.Sprintf("Error reading perf ring buffer: %s", err)
   326  			t.eventHandler(baseEvent(types.Err(msg)))
   327  			return
   328  		}
   329  
   330  		if record.LostSamples != 0 {
   331  			msg := fmt.Sprintf("lost %d samples", record.LostSamples)
   332  			t.eventHandler(baseEvent(types.Warn(msg)))
   333  			continue
   334  		}
   335  
   336  		if len(record.RawSample) < 4 {
   337  			t.eventHandler(baseEvent(types.Err("record too small")))
   338  			continue
   339  		}
   340  
   341  		// all networking gadgets have netns as first field
   342  		netns := *(*uint32)(unsafe.Pointer(&record.RawSample[0]))
   343  		event, err := processEvent(record.RawSample, uint64(netns))
   344  		if err != nil {
   345  			t.eventHandler(baseEvent(types.Err(err.Error())))
   346  			continue
   347  		}
   348  		if event == nil {
   349  			continue
   350  		}
   351  		t.eventHandler(event)
   352  	}
   353  }
   354  
   355  func (t *Tracer[Event]) releaseAttachment(netns uint64, a *attachment) {
   356  	unix.Close(a.sockFd)
   357  	a.dispatcherObjs.Close()
   358  	delete(t.attachments, netns)
   359  }
   360  
   361  func (t *Tracer[Event]) Detach(pid uint32) error {
   362  	t.mu.Lock()
   363  	defer t.mu.Unlock()
   364  
   365  	for netns, a := range t.attachments {
   366  		if _, ok := a.users[pid]; ok {
   367  			delete(a.users, pid)
   368  			if len(a.users) == 0 {
   369  				t.releaseAttachment(netns, a)
   370  			}
   371  			return nil
   372  		}
   373  	}
   374  	return fmt.Errorf("pid %d is not attached", pid)
   375  }
   376  
   377  func (t *Tracer[Event]) Close() {
   378  	t.mu.Lock()
   379  	defer t.mu.Unlock()
   380  
   381  	if t.perfRd != nil {
   382  		t.perfRd.Close()
   383  	}
   384  	if t.collection != nil {
   385  		t.collection.Close()
   386  	}
   387  	for key, l := range t.attachments {
   388  		t.releaseAttachment(key, l)
   389  	}
   390  	if t.dispatcherMap != nil {
   391  		t.dispatcherMap.Close()
   392  	}
   393  }