github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/tchandler/tracer.go (about)

     1  // Copyright 2022-2024 The Inspektor Gadget authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package tchandler handles how SchedCLS programs are attached to containers and network
    16  // interfaces. The behavior is very similar to the network tracer implemented in
    17  // pkg/networktracer/tracer.go.
    18  // The main difference is that SchedCLS programs need to be attached to network interfaces and can
    19  // be attached on ingress or egress.
    20  package tchandler
    21  
    22  import (
    23  	"errors"
    24  	"fmt"
    25  	"net"
    26  	"sync"
    27  
    28  	"github.com/cilium/ebpf"
    29  	"github.com/florianl/go-tc"
    30  	"golang.org/x/sys/unix"
    31  
    32  	containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection"
    33  	containerutils "github.com/inspektor-gadget/inspektor-gadget/pkg/container-utils"
    34  	"github.com/inspektor-gadget/inspektor-gadget/pkg/netnsenter"
    35  )
    36  
    37  //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target bpfel -cc clang -cflags ${CFLAGS} dispatcher ./bpf/dispatcher.bpf.c -- -I./bpf/
    38  
    39  const (
    40  	// Keep in sync with bpf/dispatcher.bpf.c
    41  	tailCallMapName = "gadget_tail_call"
    42  )
    43  
    44  type attachment struct {
    45  	// dispatcher is a small eBPF program we attach to each network interface. This programs
    46  	// does a tail call to the gadget. The purpose of this program is to avoid loading multiple
    47  	// instances of the gadget when there are different networking interfaces it must be
    48  	// attached to.
    49  	dispatcher dispatcherObjects
    50  	// filter is the tc ebpf filter we attach to the network interface. This filter will execute
    51  	// the dispatcher above.
    52  	filter *tc.Object
    53  
    54  	// users keeps track of the users' pid that have called Attach(). This can happen for when
    55  	// there are several containers in a pod (sharing the netns, and hence the networking
    56  	// interface). In this case we want to attach the program once.
    57  	users map[uint32]struct{}
    58  }
    59  
    60  func (t *Handler) closeAttachment(a *attachment) {
    61  	if a.filter != nil {
    62  		t.tcnl.Filter().Delete(a.filter)
    63  	}
    64  	a.dispatcher.Close()
    65  }
    66  
    67  type Handler struct {
    68  	// dispatcher map is a program array map with a single element that is used by the
    69  	// dispatcher to perform a tail call to the gadget program.
    70  	dispatcherMap *ebpf.Map
    71  	// key: network interface name on the host side
    72  	// value: attachment
    73  	attachments map[string]*attachment
    74  
    75  	// socket to talk to netlink
    76  	// TODO: Currently we keep once instance of the socket for each Handler instance. Check if
    77  	// it makes sense to move this to the tracer to have one single instance per gadget.
    78  	// https://github.com/inspektor-gadget/inspektor-gadget/pull/2376#discussion_r1475472725
    79  	tcnl *tc.Tc
    80  
    81  	direction AttachmentDirection
    82  
    83  	// mu protects attachments from concurrent access
    84  	// AttachContainer and DetachContainer can be called in parallel
    85  	mu sync.Mutex
    86  }
    87  
    88  func NewHandler(direction AttachmentDirection) (*Handler, error) {
    89  	var err error
    90  	var tcnl *tc.Tc
    91  
    92  	// We need to create the client on the host network namespace, otherwise it's not able to
    93  	// create the qdisc and filters.
    94  	err = netnsenter.NetnsEnter(1, func() error {
    95  		// Setup tc socket for communication with the kernel
    96  		tcnl, err = tc.Open(&tc.Config{})
    97  		if err != nil {
    98  			return fmt.Errorf("opening rtnetlink socket: %w", err)
    99  		}
   100  		return nil
   101  	})
   102  	if err != nil {
   103  		return nil, err
   104  	}
   105  
   106  	t := &Handler{
   107  		attachments: make(map[string]*attachment),
   108  		tcnl:        tcnl,
   109  		direction:   direction,
   110  	}
   111  	defer func() {
   112  		if err != nil {
   113  			t.Close()
   114  		}
   115  	}()
   116  
   117  	// Keep in sync with tail_call map in bpf/dispatcher.bpf.c
   118  	dispatcherMapSpec := ebpf.MapSpec{
   119  		Name:       tailCallMapName,
   120  		Type:       ebpf.ProgramArray,
   121  		KeySize:    4,
   122  		ValueSize:  4,
   123  		MaxEntries: 1,
   124  	}
   125  	t.dispatcherMap, err = ebpf.NewMap(&dispatcherMapSpec)
   126  	if err != nil {
   127  		return nil, fmt.Errorf("creating tail call map: %w", err)
   128  	}
   129  	return t, nil
   130  }
   131  
   132  func (t *Handler) AttachProg(prog *ebpf.Program) error {
   133  	return t.dispatcherMap.Update(uint32(0), uint32(prog.FD()), ebpf.UpdateAny)
   134  }
   135  
   136  func (t *Handler) newAttachment(pid uint32, iface *net.Interface, netns uint64, direction AttachmentDirection) (_ *attachment, err error) {
   137  	a := &attachment{
   138  		users: map[uint32]struct{}{pid: {}},
   139  	}
   140  
   141  	var qdisc *tc.Object
   142  
   143  	defer func() {
   144  		if err != nil {
   145  			t.closeAttachment(a)
   146  			if qdisc != nil {
   147  				t.tcnl.Qdisc().Delete(qdisc)
   148  			}
   149  		}
   150  	}()
   151  
   152  	dispatcherSpec, err := loadDispatcher()
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  
   157  	consts := map[string]interface{}{
   158  		"current_netns": uint32(netns),
   159  	}
   160  	if err := dispatcherSpec.RewriteConstants(consts); err != nil {
   161  		return nil, fmt.Errorf("RewriteConstants while attaching to pid %d: %w", pid, err)
   162  	}
   163  
   164  	// We create the clsact qdisc and leak it. We can't remove it because we'll break any other
   165  	// application (including other ig instances) that are using it.
   166  	if qdisc, err = createClsActQdisc(t.tcnl, iface); err != nil && !errors.Is(err, unix.EEXIST) {
   167  		return nil, fmt.Errorf("creating clsact qdisc: %w", err)
   168  	}
   169  
   170  	optsIngress := ebpf.CollectionOptions{
   171  		MapReplacements: map[string]*ebpf.Map{
   172  			tailCallMapName: t.dispatcherMap,
   173  		},
   174  	}
   175  	if err = dispatcherSpec.LoadAndAssign(&a.dispatcher, &optsIngress); err != nil {
   176  		return nil, fmt.Errorf("loading ebpf program: %w", err)
   177  	}
   178  
   179  	a.filter, err = addTCFilter(t.tcnl, a.dispatcher.IgNetDisp, iface, direction)
   180  	if err != nil {
   181  		return nil, fmt.Errorf("attaching ebpf program to interface %s: %w", iface.Name, err)
   182  	}
   183  
   184  	return a, nil
   185  }
   186  
   187  func (t *Handler) AttachContainer(container *containercollection.Container) error {
   188  	// It's not clear what to do with hostNetwork containers. For now we just ignore them.
   189  	if container.HostNetwork {
   190  		return nil
   191  	}
   192  
   193  	pid := container.Pid
   194  
   195  	netns, err := containerutils.GetNetNs(int(pid))
   196  	if err != nil {
   197  		return fmt.Errorf("getting network interfaces on the host side for pid %d: %w", pid, err)
   198  	}
   199  
   200  	// If we're attaching a container, we need to invert ingress and egress because ingress on the
   201  	// host end of the veth interface is egress on the container side and vice versa.
   202  	var direction AttachmentDirection
   203  	switch t.direction {
   204  	case AttachmentDirectionIngress:
   205  		direction = AttachmentDirectionEgress
   206  	case AttachmentDirectionEgress:
   207  		direction = AttachmentDirectionIngress
   208  	}
   209  
   210  	ifaces, err := containerutils.GetIfacePeers(int(pid))
   211  	if err != nil {
   212  		return fmt.Errorf("getting network namespace of pid %d: %w", pid, err)
   213  	}
   214  
   215  	t.mu.Lock()
   216  	defer t.mu.Unlock()
   217  
   218  	// We need to perform these operations from the host network namespace, otherwise we won't
   219  	// be able to add the filter to the network interface.
   220  	err = netnsenter.NetnsEnter(1, func() error {
   221  		for _, iface := range ifaces {
   222  			if a, ok := t.attachments[iface.Name]; ok {
   223  				a.users[pid] = struct{}{}
   224  				return nil
   225  			}
   226  
   227  			a, err := t.newAttachment(pid, iface, netns, direction)
   228  			if err != nil {
   229  				return fmt.Errorf("creating network handler attachment for container %s: %w",
   230  					container.Runtime.ContainerName, err)
   231  			}
   232  			t.attachments[iface.Name] = a
   233  		}
   234  
   235  		return nil
   236  	})
   237  	return err
   238  }
   239  
   240  func (t *Handler) DetachContainer(container *containercollection.Container) error {
   241  	// It's not clear what to do with hostNetwork containers. For now we just ignore them.
   242  	if container.HostNetwork {
   243  		return nil
   244  	}
   245  
   246  	pid := container.Pid
   247  
   248  	t.mu.Lock()
   249  	defer t.mu.Unlock()
   250  
   251  	for ifacename, a := range t.attachments {
   252  		if _, ok := a.users[pid]; ok {
   253  			delete(a.users, pid)
   254  			if len(a.users) == 0 {
   255  				t.closeAttachment(a)
   256  				delete(t.attachments, ifacename)
   257  			}
   258  			return nil
   259  		}
   260  	}
   261  	return fmt.Errorf("pid %d is not attached", pid)
   262  }
   263  
   264  // AttachIface attaches the tracer to the given interface on the host. See AttachContainer() if you
   265  // want to attach to a container.
   266  func (t *Handler) AttachIface(iface *net.Interface) error {
   267  	if _, ok := t.attachments[iface.Name]; ok {
   268  		return nil
   269  	}
   270  
   271  	hostNs, err := containerutils.GetNetNs(int(1))
   272  	if err != nil {
   273  		return fmt.Errorf("getting network namespace of pid %d: %w", 1, err)
   274  	}
   275  
   276  	a, err := t.newAttachment(1, iface, hostNs, t.direction)
   277  	if err != nil {
   278  		return fmt.Errorf("creating network handler attachment for interface %s: %w", iface.Name, err)
   279  	}
   280  	t.attachments[iface.Name] = a
   281  
   282  	return nil
   283  }
   284  
   285  func (t *Handler) DetachIface(iface *net.Interface) error {
   286  	if a, ok := t.attachments[iface.Name]; ok {
   287  		t.closeAttachment(a)
   288  		delete(t.attachments, iface.Name)
   289  		return nil
   290  	}
   291  	return fmt.Errorf("interface %s is not attached", iface.Name)
   292  }
   293  
   294  func (t *Handler) Close() {
   295  	for _, a := range t.attachments {
   296  		t.closeAttachment(a)
   297  	}
   298  	if t.dispatcherMap != nil {
   299  		t.dispatcherMap.Close()
   300  	}
   301  	if t.tcnl != nil {
   302  		t.tcnl.Close()
   303  	}
   304  }