github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadgettracermanager/gadgettracermanager.go (about)

     1  // Copyright 2019-2023 The Inspektor Gadget authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gadgettracermanager
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"errors"
    21  	"fmt"
    22  	"runtime"
    23  	"sync"
    24  
    25  	"github.com/cilium/ebpf"
    26  	"github.com/cilium/ebpf/rlimit"
    27  	log "github.com/sirupsen/logrus"
    28  
    29  	ocispec "github.com/opencontainers/runtime-spec/specs-go"
    30  
    31  	containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection"
    32  	containerhook "github.com/inspektor-gadget/inspektor-gadget/pkg/container-hook"
    33  	"github.com/inspektor-gadget/inspektor-gadget/pkg/gadgets"
    34  	pb "github.com/inspektor-gadget/inspektor-gadget/pkg/gadgettracermanager/api"
    35  	containersmap "github.com/inspektor-gadget/inspektor-gadget/pkg/gadgettracermanager/containers-map"
    36  	"github.com/inspektor-gadget/inspektor-gadget/pkg/operators"
    37  	"github.com/inspektor-gadget/inspektor-gadget/pkg/runcfanotify"
    38  	tracercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/tracer-collection"
    39  	eventtypes "github.com/inspektor-gadget/inspektor-gadget/pkg/types"
    40  )
    41  
    42  type GadgetTracerManager struct {
    43  	pb.UnimplementedGadgetTracerManagerServer
    44  	containercollection.ContainerCollection
    45  
    46  	// mu protects the tracers map from concurrent access
    47  	mu sync.Mutex
    48  
    49  	// node where this instance is running
    50  	nodeName string
    51  
    52  	// tracers
    53  	tracerCollection *tracercollection.TracerCollection
    54  
    55  	// containersMap is the global map at /sys/fs/bpf/gadget/containers
    56  	// exposing container details for each mount namespace.
    57  	containersMap *containersmap.ContainersMap
    58  }
    59  
    60  func (g *GadgetTracerManager) AddTracer(tracerID string, containerSelector containercollection.ContainerSelector) error {
    61  	g.mu.Lock()
    62  	defer g.mu.Unlock()
    63  
    64  	return g.tracerCollection.AddTracer(tracerID, containerSelector)
    65  }
    66  
    67  func (g *GadgetTracerManager) RemoveTracer(tracerID string) error {
    68  	g.mu.Lock()
    69  	defer g.mu.Unlock()
    70  
    71  	return g.tracerCollection.RemoveTracer(tracerID)
    72  }
    73  
    74  func (g *GadgetTracerManager) ReceiveStream(tracerID *pb.TracerID, stream pb.GadgetTracerManager_ReceiveStreamServer) error {
    75  	if tracerID.Id == "" {
    76  		return fmt.Errorf("tracer Id not set")
    77  	}
    78  
    79  	g.mu.Lock()
    80  
    81  	gadgetStream, err := g.tracerCollection.Stream(tracerID.Id)
    82  	if err != nil {
    83  		g.mu.Unlock()
    84  		return fmt.Errorf("stream for tracer %q not found", tracerID.Id)
    85  	}
    86  
    87  	ch := gadgetStream.Subscribe()
    88  	defer gadgetStream.Unsubscribe(ch)
    89  
    90  	g.mu.Unlock()
    91  
    92  	if ch == nil {
    93  		return errors.New("tracer was removed before we could subscribe to its stream")
    94  	}
    95  
    96  	for l := range ch {
    97  		if l.EventLost {
    98  			ev := eventtypes.Event{
    99  				Type: eventtypes.ERR,
   100  				CommonData: eventtypes.CommonData{
   101  					K8s: eventtypes.K8sMetadata{
   102  						Node: g.nodeName,
   103  					},
   104  				},
   105  				Message: "events lost in gadget tracer manager",
   106  			}
   107  			line, _ := json.Marshal(ev)
   108  			err := stream.Send(&pb.StreamData{Line: string(line)})
   109  			if err != nil {
   110  				return err
   111  			}
   112  
   113  			continue
   114  		}
   115  
   116  		line := &pb.StreamData{Line: l.Line}
   117  		if err := stream.Send(line); err != nil {
   118  			return err
   119  		}
   120  	}
   121  
   122  	return nil
   123  }
   124  
   125  func (g *GadgetTracerManager) PublishEvent(tracerID string, line string) error {
   126  	// TODO: reentrant locking :/
   127  	// g.mu.Lock()
   128  	// defer g.mu.Unlock()
   129  
   130  	stream, err := g.tracerCollection.Stream(tracerID)
   131  	if err != nil {
   132  		return fmt.Errorf("stream for tracer %q not found", tracerID)
   133  	}
   134  
   135  	stream.Publish(line)
   136  	return nil
   137  }
   138  
   139  func (g *GadgetTracerManager) TracerMountNsMap(tracerID string) (*ebpf.Map, error) {
   140  	g.mu.Lock()
   141  	defer g.mu.Unlock()
   142  
   143  	return g.tracerCollection.TracerMountNsMap(tracerID)
   144  }
   145  
   146  func (g *GadgetTracerManager) ContainersMap() *ebpf.Map {
   147  	if g.containersMap == nil {
   148  		return nil
   149  	}
   150  
   151  	return g.containersMap.ContainersMap()
   152  }
   153  
   154  func (g *GadgetTracerManager) AddContainer(_ context.Context, containerDefinition *pb.ContainerDefinition) (*pb.AddContainerResponse, error) {
   155  	g.mu.Lock()
   156  	defer g.mu.Unlock()
   157  
   158  	if containerDefinition.Id == "" {
   159  		return nil, fmt.Errorf("container id not set")
   160  	}
   161  	if g.ContainerCollection.GetContainer(containerDefinition.Id) != nil {
   162  		return nil, fmt.Errorf("container with id %s already exists", containerDefinition.Id)
   163  	}
   164  
   165  	container := containercollection.Container{
   166  		Runtime: containercollection.RuntimeMetadata{
   167  			BasicRuntimeMetadata: eventtypes.BasicRuntimeMetadata{
   168  				ContainerID: containerDefinition.Id,
   169  			},
   170  		},
   171  		Pid: containerDefinition.Pid,
   172  		K8s: containercollection.K8sMetadata{
   173  			BasicK8sMetadata: eventtypes.BasicK8sMetadata{
   174  				Namespace:     containerDefinition.Namespace,
   175  				PodName:       containerDefinition.Podname,
   176  				ContainerName: containerDefinition.Name,
   177  			},
   178  		},
   179  	}
   180  	if containerDefinition.LabelsSet {
   181  		container.K8s.PodLabels = make(map[string]string)
   182  		for _, l := range containerDefinition.Labels {
   183  			container.K8s.PodLabels[l.Key] = l.Value
   184  		}
   185  	}
   186  	if containerDefinition.OciConfig != "" {
   187  		containerConfig := &ocispec.Spec{}
   188  		err := json.Unmarshal([]byte(containerDefinition.OciConfig), containerConfig)
   189  		if err != nil {
   190  			return nil, fmt.Errorf("unmarshaling container config: %w", err)
   191  		}
   192  		container.OciConfig = containerConfig
   193  	}
   194  
   195  	g.ContainerCollection.AddContainer(&container)
   196  
   197  	return &pb.AddContainerResponse{}, nil
   198  }
   199  
   200  func (g *GadgetTracerManager) RemoveContainer(_ context.Context, containerDefinition *pb.ContainerDefinition) (*pb.RemoveContainerResponse, error) {
   201  	g.mu.Lock()
   202  	defer g.mu.Unlock()
   203  
   204  	if containerDefinition.Id == "" {
   205  		return nil, fmt.Errorf("container Id not set")
   206  	}
   207  
   208  	c := g.ContainerCollection.GetContainer(containerDefinition.Id)
   209  	if c == nil {
   210  		return nil, fmt.Errorf("unknown container %q", containerDefinition.Id)
   211  	}
   212  
   213  	g.ContainerCollection.RemoveContainer(containerDefinition.Id)
   214  	return &pb.RemoveContainerResponse{}, nil
   215  }
   216  
   217  func (g *GadgetTracerManager) DumpState(_ context.Context, req *pb.DumpStateRequest) (*pb.Dump, error) {
   218  	g.mu.Lock()
   219  	defer g.mu.Unlock()
   220  
   221  	containers := "List of containers:\n"
   222  	g.ContainerRange(func(c *containercollection.Container) {
   223  		containers += fmt.Sprintf("%+v\n", c)
   224  	})
   225  
   226  	traces := "List of tracers:\n"
   227  	traces += g.tracerCollection.TracerDump()
   228  
   229  	stacks := "List of stacks:\n"
   230  	buf := make([]byte, 1<<20)
   231  	stacklen := runtime.Stack(buf, true)
   232  	stacks += fmt.Sprintf("%s\n", buf[:stacklen])
   233  
   234  	return &pb.Dump{Containers: containers, Traces: traces, Stacks: stacks}, nil
   235  }
   236  
   237  func NewServer(conf *Conf) (*GadgetTracerManager, error) {
   238  	g := &GadgetTracerManager{
   239  		nodeName: conf.NodeName,
   240  	}
   241  
   242  	eventtypes.Init(conf.NodeName)
   243  	var err error
   244  	if conf.TestOnly {
   245  		g.tracerCollection, err = tracercollection.NewTracerCollectionTest(&g.ContainerCollection)
   246  	} else {
   247  		g.tracerCollection, err = tracercollection.NewTracerCollection(&g.ContainerCollection)
   248  	}
   249  	if err != nil {
   250  		return nil, err
   251  	}
   252  
   253  	opts := []containercollection.ContainerCollectionOption{
   254  		containercollection.WithNodeName(conf.NodeName),
   255  	}
   256  
   257  	if !conf.TestOnly {
   258  		if err := rlimit.RemoveMemlock(); err != nil {
   259  			return nil, err
   260  		}
   261  
   262  		var err error
   263  		if g.containersMap, err = containersmap.NewContainersMap(gadgets.PinPath); err != nil {
   264  			return nil, fmt.Errorf("creating containers map: %w", err)
   265  		}
   266  
   267  		opts = append(opts, containercollection.WithPubSub(g.containersMap.ContainersMapUpdater()))
   268  		opts = append(opts, containercollection.WithOCIConfigEnrichment())
   269  		opts = append(opts, containercollection.WithCgroupEnrichment())
   270  		opts = append(opts, containercollection.WithLinuxNamespaceEnrichment())
   271  		opts = append(opts, containercollection.WithKubernetesEnrichment(g.nodeName, nil))
   272  		opts = append(opts, containercollection.WithTracerCollection(g.tracerCollection))
   273  	}
   274  
   275  	podInformerUsed := false
   276  	switch conf.HookMode {
   277  	case "none":
   278  		// Nothing to do: grpc calls will be enough
   279  		// Used by nri and crio
   280  		log.Infof("GadgetTracerManager: hook mode: none")
   281  		if !conf.TestOnly {
   282  			opts = append(opts, containercollection.WithInitialKubernetesContainers(g.nodeName))
   283  		}
   284  	case "auto":
   285  		if containerhook.Supported() {
   286  			log.Infof("GadgetTracerManager: hook mode: fanotify+ebpf (auto)")
   287  			opts = append(opts, containercollection.WithContainerFanotifyEbpf())
   288  			opts = append(opts, containercollection.WithInitialKubernetesContainers(g.nodeName))
   289  		} else if runcfanotify.Supported() {
   290  			log.Infof("GadgetTracerManager: hook mode: fanotify (auto)")
   291  			opts = append(opts, containercollection.WithRuncFanotify())
   292  			opts = append(opts, containercollection.WithInitialKubernetesContainers(g.nodeName))
   293  		} else {
   294  			log.Infof("GadgetTracerManager: hook mode: podinformer (auto)")
   295  			opts = append(opts, containercollection.WithPodInformer(g.nodeName))
   296  			podInformerUsed = true
   297  		}
   298  	case "podinformer":
   299  		log.Infof("GadgetTracerManager: hook mode: podinformer")
   300  		opts = append(opts, containercollection.WithPodInformer(g.nodeName))
   301  		podInformerUsed = true
   302  	case "fanotify":
   303  		log.Infof("GadgetTracerManager: hook mode: fanotify")
   304  		opts = append(opts, containercollection.WithRuncFanotify())
   305  		opts = append(opts, containercollection.WithInitialKubernetesContainers(g.nodeName))
   306  	case "fanotify+ebpf":
   307  		log.Infof("GadgetTracerManager: hook mode: fanotify+ebpf")
   308  		opts = append(opts, containercollection.WithContainerFanotifyEbpf())
   309  		opts = append(opts, containercollection.WithInitialKubernetesContainers(g.nodeName))
   310  	default:
   311  		return nil, fmt.Errorf("invalid hook mode: %s", conf.HookMode)
   312  	}
   313  
   314  	if conf.FallbackPodInformer && !podInformerUsed {
   315  		log.Infof("GadgetTracerManager: enabling fallback podinformer")
   316  		opts = append(opts, containercollection.WithFallbackPodInformer(g.nodeName))
   317  	}
   318  
   319  	err = g.ContainerCollection.Initialize(opts...)
   320  	if err != nil {
   321  		return nil, err
   322  	}
   323  
   324  	// Dirty hack
   325  	op := operators.GetRaw("KubeManager")
   326  	if setter, ok := op.(SetGadgetTracerMgr); ok {
   327  		setter.SetGadgetTracerMgr(g)
   328  	}
   329  	return g, nil
   330  }
   331  
   332  // SetGadgetTracerMgr is an interface that is implemented by KubeManager to be able
   333  // to set a reference to GadgetTracerManager
   334  type SetGadgetTracerMgr interface {
   335  	SetGadgetTracerMgr(*GadgetTracerManager)
   336  }
   337  
   338  type Conf struct {
   339  	NodeName            string
   340  	HookMode            string
   341  	FallbackPodInformer bool
   342  	TestOnly            bool
   343  }
   344  
   345  // Close releases any resource that could be in use by the tracer manager, like
   346  // ebpf maps.
   347  func (g *GadgetTracerManager) Close() {
   348  	if g.containersMap != nil {
   349  		g.containersMap.Close()
   350  	}
   351  	if g.tracerCollection != nil {
   352  		g.tracerCollection.Close()
   353  	}
   354  	g.ContainerCollection.Close()
   355  }