github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/ebpftracer/tracer.go (about)

     1  package ebpftracer
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/binary"
     7  	"errors"
     8  	"fmt"
     9  	"net/netip"
    10  	"os"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/castai/kvisor/pkg/cgroup"
    15  	"github.com/castai/kvisor/pkg/containers"
    16  	"github.com/castai/kvisor/pkg/ebpftracer/events"
    17  	"github.com/castai/kvisor/pkg/ebpftracer/signature"
    18  	"github.com/castai/kvisor/pkg/ebpftracer/types"
    19  	"github.com/castai/kvisor/pkg/logging"
    20  	"github.com/castai/kvisor/pkg/metrics"
    21  	"github.com/castai/kvisor/pkg/proc"
    22  	"github.com/cilium/ebpf/perf"
    23  	"github.com/go-playground/validator/v10"
    24  	"github.com/google/gopacket/layers"
    25  	"github.com/samber/lo"
    26  	"golang.org/x/sync/errgroup"
    27  	"golang.org/x/sys/unix"
    28  )
    29  
    30  // ActualDestinationGetter is used to find actual destination ip.
    31  // Usually this info is obtained from conntrack.
    32  type ActualDestinationGetter interface {
    33  	GetDestination(src, dst netip.AddrPort) (netip.AddrPort, bool)
    34  }
    35  
    36  type ContainerClient interface {
    37  	GetContainerForCgroup(ctx context.Context, cgroup cgroup.ID) (*containers.Container, error)
    38  	CleanupCgroup(cgroup cgroup.ID)
    39  }
    40  
    41  type CgroupClient interface {
    42  	LoadCgroup(id cgroup.ID, path string)
    43  	CleanupCgroup(cgroup cgroup.ID)
    44  	IsDefaultHierarchy(uint32) bool
    45  }
    46  
    47  type Config struct {
    48  	BTFPath                string
    49  	EventsPerCPUBuffer     int
    50  	EventsOutputChanSize   int
    51  	GCInterval             time.Duration
    52  	DefaultCgroupsVersion  string `validate:"required,oneof=V1 V2"`
    53  	DebugEnabled           bool
    54  	ContainerClient        ContainerClient
    55  	CgroupClient           CgroupClient
    56  	SignatureEngine        *signature.SignatureEngine
    57  	MountNamespacePIDStore *types.PIDsPerNamespace
    58  	// All PIPs reported from ebpf will be normalized to this PID namespace
    59  	HomePIDNS                          proc.NamespaceID
    60  	AllowAnyEvent                      bool
    61  	NetflowOutputChanSize              int
    62  	NetflowSampleSubmitIntervalSeconds uint64
    63  }
    64  
    65  type cgroupCleanupRequest struct {
    66  	cgroupID     cgroup.ID
    67  	cleanupAfter time.Time
    68  }
    69  
    70  type Tracer struct {
    71  	log *logging.Logger
    72  	cfg Config
    73  
    74  	bootTime uint64
    75  
    76  	module    *module
    77  	eventsSet map[events.ID]definition
    78  
    79  	policyMu          sync.Mutex
    80  	policy            *Policy
    81  	eventPoliciesMap  map[events.ID]*EventPolicy
    82  	cgroupEventPolicy map[cgroup.ID]map[events.ID]*cgroupEventPolicy
    83  	signatureEventMap map[events.ID]struct{}
    84  
    85  	eventsChan        chan *types.Event
    86  	netflowEventsChan chan *types.Event
    87  
    88  	removedCgroupsMu sync.Mutex
    89  	removedCgroups   map[uint64]struct{}
    90  
    91  	dnsPacketParser *layers.DNS
    92  
    93  	cgroupCleanupMu         sync.Mutex
    94  	requestedCgroupCleanups []cgroupCleanupRequest
    95  
    96  	cleanupTimerTickRate time.Duration
    97  	cgroupCleanupDelay   time.Duration
    98  }
    99  
   100  func New(log *logging.Logger, cfg Config) *Tracer {
   101  	if err := validator.New().Struct(cfg); err != nil {
   102  		panic(fmt.Errorf("invalid ebpftracer config: %w", err).Error())
   103  	}
   104  
   105  	log = log.WithField("component", "ebpftracer")
   106  	m := newModule(log, moduleConfig{
   107  		BTFObjPath: cfg.BTFPath,
   108  	})
   109  
   110  	if cfg.EventsPerCPUBuffer == 0 {
   111  		cfg.EventsPerCPUBuffer = 8192
   112  	}
   113  	if cfg.EventsOutputChanSize == 0 {
   114  		cfg.EventsOutputChanSize = 16384
   115  	}
   116  	if cfg.GCInterval == 0 {
   117  		cfg.GCInterval = 15 * time.Second
   118  	}
   119  
   120  	var ts unix.Timespec
   121  	err := unix.ClockGettime(unix.CLOCK_MONOTONIC, &ts)
   122  	if err != nil {
   123  		panic(fmt.Errorf("getting clock time: %w", err).Error())
   124  	}
   125  	bootTime := time.Now().UnixNano() - ts.Nano()
   126  
   127  	t := &Tracer{
   128  		log:                  log,
   129  		cfg:                  cfg,
   130  		module:               m,
   131  		bootTime:             uint64(bootTime),
   132  		eventsChan:           make(chan *types.Event, cfg.EventsOutputChanSize),
   133  		netflowEventsChan:    make(chan *types.Event, cfg.NetflowOutputChanSize),
   134  		removedCgroups:       map[uint64]struct{}{},
   135  		eventPoliciesMap:     map[events.ID]*EventPolicy{},
   136  		cgroupEventPolicy:    map[uint64]map[events.ID]*cgroupEventPolicy{},
   137  		dnsPacketParser:      &layers.DNS{},
   138  		signatureEventMap:    map[events.ID]struct{}{},
   139  		cleanupTimerTickRate: 1 * time.Minute,
   140  		cgroupCleanupDelay:   1 * time.Minute,
   141  	}
   142  
   143  	return t
   144  }
   145  
   146  func (t *Tracer) Load() error {
   147  	if err := t.module.load(t.cfg.HomePIDNS, t.cfg.NetflowSampleSubmitIntervalSeconds); err != nil {
   148  		return fmt.Errorf("loading ebpf module: %w", err)
   149  	}
   150  	t.eventsSet = newEventsDefinitionSet(t.module.objects)
   151  	return nil
   152  }
   153  
   154  func (t *Tracer) Close() error {
   155  	return t.module.close()
   156  }
   157  
   158  func (t *Tracer) Run(ctx context.Context) error {
   159  	t.log.Infof("running")
   160  	defer t.log.Infof("stopping")
   161  
   162  	if !t.module.loaded.Load() {
   163  		return errors.New("tracer is not loaded")
   164  	}
   165  	errg, ctx := errgroup.WithContext(ctx)
   166  	if t.cfg.DebugEnabled {
   167  		errg.Go(func() error {
   168  			return t.debugEventsLoop(ctx)
   169  		})
   170  	}
   171  	errg.Go(func() error {
   172  		return t.eventsReadLoop(ctx)
   173  	})
   174  	errg.Go(func() error {
   175  		return t.signalReadLoop(ctx)
   176  	})
   177  	errg.Go(func() error {
   178  		return t.cgroupCleanupLoop(ctx)
   179  	})
   180  
   181  	return errg.Wait()
   182  }
   183  
   184  func (t *Tracer) Events() <-chan *types.Event {
   185  	return t.eventsChan
   186  }
   187  
   188  func (t *Tracer) NetflowEvents() <-chan *types.Event {
   189  	return t.netflowEventsChan
   190  }
   191  
   192  func (t *Tracer) GetEventName(id events.ID) string {
   193  	if def, found := t.eventsSet[id]; found {
   194  		return def.name
   195  	}
   196  	return ""
   197  }
   198  
   199  func (t *Tracer) signalReadLoop(ctx context.Context) error {
   200  	eventsReader, err := perf.NewReader(t.module.objects.Signals, t.cfg.EventsPerCPUBuffer)
   201  	if err != nil {
   202  		return err
   203  	}
   204  	defer eventsReader.Close()
   205  
   206  	for {
   207  		select {
   208  		case <-ctx.Done():
   209  			return ctx.Err()
   210  		default:
   211  		}
   212  		record, err := eventsReader.Read()
   213  		if err != nil {
   214  			if t.cfg.DebugEnabled {
   215  				t.log.Warnf("reading signals: %v", err)
   216  			}
   217  			continue
   218  		}
   219  		if record.LostSamples > 0 {
   220  			t.log.Warnf("lost %d signals", record.LostSamples)
   221  			metrics.AgentKernelLostEventsTotal.Add(float64(record.LostSamples))
   222  			continue
   223  		}
   224  		metrics.AgentPulledEventsTotal.Inc()
   225  
   226  		if err := t.decodeAndHandleSignal(ctx, record.RawSample); err != nil {
   227  			if t.cfg.DebugEnabled || errors.Is(err, ErrPanic) {
   228  				t.log.Errorf("decoding signal: %v", err)
   229  			}
   230  			metrics.AgentDecodeEventErrorsTotal.Inc()
   231  			continue
   232  		}
   233  	}
   234  }
   235  
   236  func (t *Tracer) eventsReadLoop(ctx context.Context) error {
   237  	eventsReader, err := perf.NewReader(t.module.objects.Events, t.cfg.EventsPerCPUBuffer)
   238  	if err != nil {
   239  		return err
   240  	}
   241  	defer eventsReader.Close()
   242  
   243  	for {
   244  		select {
   245  		case <-ctx.Done():
   246  			return ctx.Err()
   247  		default:
   248  		}
   249  
   250  		record, err := eventsReader.Read()
   251  		if err != nil {
   252  			if t.cfg.DebugEnabled {
   253  				t.log.Warnf("reading event: %v", err)
   254  			}
   255  			continue
   256  		}
   257  		if record.LostSamples > 0 {
   258  			t.log.Warnf("lost %d events", record.LostSamples)
   259  			metrics.AgentKernelLostEventsTotal.Add(float64(record.LostSamples))
   260  			continue
   261  		}
   262  		metrics.AgentPulledEventsTotal.Inc()
   263  
   264  		if err := t.decodeAndExportEvent(ctx, record.RawSample); err != nil {
   265  			if t.cfg.DebugEnabled || errors.Is(err, ErrPanic) {
   266  				t.log.Errorf("decoding event: %v", err)
   267  			}
   268  			metrics.AgentDecodeEventErrorsTotal.Inc()
   269  			continue
   270  		}
   271  	}
   272  }
   273  
   274  func (t *Tracer) findAllRequiredEvents(id events.ID, out map[events.ID]struct{}) {
   275  	// No need to load the whole dependency tree twice
   276  	if _, found := out[id]; found {
   277  		return
   278  	}
   279  
   280  	def := t.eventsSet[id]
   281  	out[id] = struct{}{}
   282  	for _, def := range def.dependencies.ids {
   283  		t.findAllRequiredEvents(def, out)
   284  	}
   285  }
   286  
   287  func (t *Tracer) ApplyPolicy(policy *Policy) error {
   288  	if !t.module.loaded.Load() {
   289  		return errors.New("tracer is not loaded")
   290  	}
   291  	t.policyMu.Lock()
   292  	defer t.policyMu.Unlock()
   293  
   294  	if t.policy != nil {
   295  		// TODO(Kvisord): Here we can add policy diff with previous one and dynamically update policy.
   296  		return errors.New("policy update is not supported yet")
   297  	}
   298  
   299  	t.policy = policy
   300  	for _, event := range t.policy.Events {
   301  		event := event
   302  		t.eventPoliciesMap[event.ID] = event
   303  	}
   304  
   305  	eventsParams := getParamTypes(t.eventsSet)
   306  	requiredEventsIDs := make(map[events.ID]struct{})
   307  	for _, event := range policy.Events {
   308  		event := event
   309  		t.eventPoliciesMap[event.ID] = event
   310  		t.findAllRequiredEvents(event.ID, requiredEventsIDs)
   311  	}
   312  	if t.cfg.SignatureEngine != nil {
   313  		requiredSignatureEvents := policy.SignatureEvents
   314  		for _, eventID := range requiredSignatureEvents {
   315  			t.signatureEventMap[eventID] = struct{}{}
   316  		}
   317  		for _, eventID := range requiredSignatureEvents {
   318  			t.findAllRequiredEvents(eventID, requiredEventsIDs)
   319  		}
   320  	}
   321  
   322  	for _, eventID := range policy.SystemEvents {
   323  		t.findAllRequiredEvents(eventID, requiredEventsIDs)
   324  	}
   325  
   326  	eventsBpfMapConfig := make(map[events.ID][]byte)
   327  
   328  	objs := t.module.objects
   329  
   330  	var tailCalls []TailCall
   331  	probesToAttach := map[handle]bool{}
   332  	for id := range requiredEventsIDs {
   333  		def, found := t.eventsSet[id]
   334  		if !found {
   335  			return fmt.Errorf("missing event definition for id %d", id)
   336  		}
   337  
   338  		tailCalls = append(tailCalls, def.dependencies.tailCalls...)
   339  		if def.syscall {
   340  			probesToAttach[ProbeSyscallEnter__Internal] = true
   341  			probesToAttach[ProbeSyscallExit__Internal] = true
   342  			// Add default tail calls for syscall events.
   343  			if len(def.dependencies.tailCalls) == 0 && !def.dependencies.skipDefaultTailCalls {
   344  				tailCalls = append(tailCalls, getDefaultSyscallTailCalls(objs, def)...)
   345  			}
   346  		}
   347  		for _, dep := range def.dependencies.probes {
   348  			if required, found := probesToAttach[dep.handle]; found {
   349  				if !required {
   350  					probesToAttach[dep.handle] = dep.required
   351  				}
   352  			} else {
   353  				probesToAttach[dep.handle] = dep.required
   354  			}
   355  		}
   356  
   357  		eventConfigVal := marshalEventConfig(eventsParams, id)
   358  		eventsBpfMapConfig[id] = eventConfigVal
   359  	}
   360  
   361  	// Attach selected probes.
   362  	for handle, required := range probesToAttach {
   363  		if err := t.module.attachProbe(handle); err != nil {
   364  			if required {
   365  				return fmt.Errorf("attaching probe %d: %w", handle, err)
   366  			} else {
   367  				t.log.Warnf("attaching optional probe %d: %v", handle, err)
   368  			}
   369  		}
   370  	}
   371  
   372  	// Send events configs in events ebpf map.
   373  	for id, cfg := range eventsBpfMapConfig {
   374  		if err := t.module.objects.EventsMap.Update(&id, cfg, 0); err != nil {
   375  			return fmt.Errorf("updating events map, event %d: %w", id, err)
   376  		}
   377  	}
   378  	config := t.computeConfigValues(policy)
   379  	if err := t.module.objects.ConfigMap.Update(uint32(0), config, 0); err != nil {
   380  		return fmt.Errorf("updating config map: %w", err)
   381  	}
   382  
   383  	// Initialize tail call dependencies.
   384  	for _, tailCall := range tailCalls {
   385  		err := t.initTailCall(tailCall)
   386  		if err != nil {
   387  			return fmt.Errorf("failed to initialize tail call: %w", err)
   388  		}
   389  	}
   390  
   391  	return nil
   392  }
   393  
   394  func marshalEventConfig(eventsParams map[events.ID][]ArgType, id events.ID) []byte {
   395  	eventConfigVal := make([]byte, 16)
   396  	// bitmap of policies that require this event to be submitted
   397  	binary.LittleEndian.PutUint64(eventConfigVal[0:8], 1)
   398  	// encoded event's parameter types
   399  	var paramTypes uint64
   400  	params := eventsParams[id]
   401  	for n, paramType := range params {
   402  		paramTypes = paramTypes | (uint64(paramType) << (8 * n))
   403  	}
   404  	binary.LittleEndian.PutUint64(eventConfigVal[8:16], paramTypes)
   405  	return eventConfigVal
   406  }
   407  
   408  func getDefaultSyscallTailCalls(objs *tracerObjects, def definition) []TailCall {
   409  	return []TailCall{
   410  		{objs.SysEnterInitTail, objs.SysEnterInit, []uint32{uint32(def.ID)}},
   411  		{objs.SysEnterSubmitTail, objs.SysEnterSubmit, []uint32{uint32(def.ID)}},
   412  		{objs.SysExitInitTail, objs.SysExitInit, []uint32{uint32(def.ID)}},
   413  		{objs.SysExitSubmitTail, objs.SysExitSubmit, []uint32{uint32(def.ID)}},
   414  	}
   415  }
   416  
   417  func getParamTypes(eventsSet map[events.ID]definition) map[events.ID][]ArgType {
   418  	eventsParams := make(map[events.ID][]ArgType)
   419  	for _, eventDefinition := range eventsSet {
   420  		id := eventDefinition.ID
   421  		params := eventDefinition.params
   422  		for _, param := range params {
   423  			eventsParams[id] = append(eventsParams[id], getParamType(param.Type))
   424  		}
   425  	}
   426  	return eventsParams
   427  }
   428  
   429  const (
   430  	optExecEnv uint32 = 1 << iota
   431  	optCaptureFilesWrite
   432  	optExtractDynCode
   433  	optStackAddresses
   434  	optCaptureModules
   435  	optCgroupV1
   436  	optTranslateFDFilePath
   437  	optCaptureBpf
   438  	optCaptureFileRead
   439  )
   440  
   441  func (t *Tracer) getOptionsConfig(p *Policy) uint32 {
   442  	var cOptVal uint32
   443  
   444  	if p.Output.ExecEnv {
   445  		cOptVal = cOptVal | optExecEnv
   446  	}
   447  	if p.Output.StackAddresses {
   448  		cOptVal = cOptVal | optStackAddresses
   449  	}
   450  	// TODO: Check other options.
   451  	//if t.config.Capture.FileWrite.Capture {
   452  	//	cOptVal = cOptVal | optCaptureFilesWrite
   453  	//}
   454  	//if t.config.Capture.FileRead.Capture {
   455  	//	cOptVal = cOptVal | optCaptureFileRead
   456  	//}
   457  	//if t.config.Capture.Module {
   458  	//	cOptVal = cOptVal | optCaptureModules
   459  	//}
   460  	//if t.config.Capture.Bpf {
   461  	//	cOptVal = cOptVal | optCaptureBpf
   462  	//}
   463  	//if t.config.Capture.Mem {
   464  	//	cOptVal = cOptVal | optExtractDynCode
   465  	//}
   466  	//if t.config.Output.ParseArgumentsFDs {
   467  	//	cOptVal = cOptVal | optTranslateFDFilePath
   468  	//}
   469  	if t.cfg.DefaultCgroupsVersion == "V1" {
   470  		cOptVal = cOptVal | optCgroupV1
   471  	}
   472  	return cOptVal
   473  }
   474  
   475  func (t *Tracer) computeConfigValues(p *Policy) []byte {
   476  	// config_entry
   477  	configVal := make([]byte, 256)
   478  
   479  	// tracee_pid
   480  	binary.LittleEndian.PutUint32(configVal[0:4], uint32(os.Getpid()))
   481  	// options
   482  	binary.LittleEndian.PutUint32(configVal[4:8], t.getOptionsConfig(p))
   483  	// cgroup_v1_hid
   484  	//binary.LittleEndian.PutUint32(configVal[8:12], uint32(t.containers.GetDefaultCgroupHierarchyID()))
   485  	binary.LittleEndian.PutUint32(configVal[8:12], 0)
   486  	// padding
   487  	binary.LittleEndian.PutUint32(configVal[12:16], 0)
   488  
   489  	id := 0
   490  	byteIndex := id / 8
   491  	bitOffset := id % 8
   492  
   493  	// enabled_scopes
   494  	configVal[216+byteIndex] |= 1 << bitOffset
   495  
   496  	// compute all policies internals
   497  	//t.config.Policies.Compute()
   498  
   499  	// uid_max
   500  	//binary.LittleEndian.PutUint64(configVal[224:232], t.config.Policies.UIDFilterMax())
   501  	//// uid_min
   502  	//binary.LittleEndian.PutUint64(configVal[232:240], t.config.Policies.UIDFilterMin())
   503  	//// pid_max
   504  	//binary.LittleEndian.PutUint64(configVal[240:248], t.config.Policies.PIDFilterMax())
   505  	//// pid_min
   506  	//binary.LittleEndian.PutUint64(configVal[248:256], t.config.Policies.PIDFilterMin())
   507  
   508  	return configVal
   509  }
   510  
   511  func (t *Tracer) initTailCall(tailCall TailCall) error {
   512  	tailCallIndexes := tailCall.indexes
   513  	// Pick eBPF program file descriptor.
   514  	bpfProgFD := uint32(tailCall.ebpfProg.FD())
   515  	if tailCall.ebpfProg.FD() < 0 {
   516  		return fmt.Errorf("ebpf tail call map fd is negative")
   517  	}
   518  
   519  	t.log.Debugf("init tail call, map=%s, prog=%s", tailCall.ebpfMap.String(), tailCall.ebpfProg.String())
   520  
   521  	// Pick all indexes (event, or syscall, IDs) the BPF program should be related to.
   522  	for _, index := range tailCallIndexes {
   523  		index := index
   524  		// Special treatment for indexes of syscall events.
   525  		if t.eventsSet[events.ID(index)].syscall {
   526  			// Workaround: Do not map eBPF program to unsupported syscalls (arm64, e.g.)
   527  			if index >= uint32(events.Unsupported) {
   528  				continue
   529  			}
   530  		}
   531  		// Update given eBPF map with the eBPF program file descriptor at given index.
   532  		err := tailCall.ebpfMap.Update(&index, &bpfProgFD, 0)
   533  		if err != nil {
   534  			return err
   535  		}
   536  	}
   537  
   538  	return nil
   539  }
   540  
   541  func (t *Tracer) debugEventsLoop(ctx context.Context) error {
   542  	rd, err := perf.NewReader(t.module.objects.DebugEvents, 2048)
   543  	if err != nil {
   544  		return fmt.Errorf("creating debug events perf reader: %w", err)
   545  	}
   546  
   547  	var e types.RawDebugEvent
   548  	for {
   549  		select {
   550  		case <-ctx.Done():
   551  			return ctx.Err()
   552  		default:
   553  		}
   554  
   555  		v, err := rd.Read()
   556  		if err != nil {
   557  			if errors.Is(err, perf.ErrClosed) {
   558  				return nil
   559  			}
   560  			continue
   561  		}
   562  
   563  		if v.LostSamples > 0 {
   564  			t.log.Warnf("lost samples %d", v.LostSamples)
   565  		}
   566  		if len(v.RawSample) == 0 {
   567  			continue
   568  		}
   569  		if err := binary.Read(bytes.NewBuffer(v.RawSample), binary.LittleEndian, &e); err != nil {
   570  			return fmt.Errorf("read event binary: %w", err)
   571  		}
   572  
   573  		msg := e.String()
   574  		fmt.Printf("%s\n", msg)
   575  	}
   576  }
   577  
   578  func (t *Tracer) allowedByPolicyPre(ctx *types.EventContext) error {
   579  	policy := t.getPolicy(ctx.EventID, ctx.CgroupID)
   580  
   581  	if policy != nil {
   582  		return policy.allowPre(ctx)
   583  	}
   584  
   585  	// No policy.
   586  	return nil
   587  }
   588  
   589  func (t *Tracer) allowedByPolicy(eventID events.ID, cgroupID uint64, event *types.Event) error {
   590  	policy := t.getPolicy(eventID, cgroupID)
   591  
   592  	if policy != nil {
   593  		return policy.allow(event)
   594  	}
   595  
   596  	// No policy.
   597  	return nil
   598  }
   599  
   600  func (t *Tracer) getPolicy(eventID events.ID, cgroupID uint64) *cgroupEventPolicy {
   601  	t.policyMu.Lock()
   602  	defer t.policyMu.Unlock()
   603  
   604  	eventPolicy, found := t.eventPoliciesMap[eventID]
   605  	if found {
   606  		cgPolicyMap, found := t.cgroupEventPolicy[cgroupID]
   607  
   608  		if !found {
   609  			cgPolicyMap = make(map[events.ID]*cgroupEventPolicy)
   610  			t.cgroupEventPolicy[cgroupID] = cgPolicyMap
   611  		}
   612  
   613  		cgPolicy, found := cgPolicyMap[eventID]
   614  
   615  		if !found {
   616  			cgPolicy = newCgroupEventPolicy(eventPolicy)
   617  			t.cgroupEventPolicy[cgroupID][eventID] = cgPolicy
   618  		}
   619  		return cgPolicy
   620  	}
   621  
   622  	return nil
   623  }
   624  
   625  func (t *Tracer) cgroupCleanupLoop(ctx context.Context) error {
   626  	cleanupTimer := time.NewTicker(t.cleanupTimerTickRate)
   627  	defer func() {
   628  		cleanupTimer.Stop()
   629  	}()
   630  
   631  	for {
   632  		select {
   633  		case <-ctx.Done():
   634  			return ctx.Err()
   635  		case <-cleanupTimer.C:
   636  		}
   637  
   638  		now := time.Now()
   639  		var toCleanup []cgroupCleanupRequest
   640  
   641  		t.cgroupCleanupMu.Lock()
   642  		toCleanup, t.requestedCgroupCleanups = splitCleanupRequests(now, t.requestedCgroupCleanups)
   643  		t.cgroupCleanupMu.Unlock()
   644  
   645  		cgroupsToCleanup := lo.Map(toCleanup, func(item cgroupCleanupRequest, index int) cgroup.ID {
   646  			return item.cgroupID
   647  		})
   648  		t.removeCgroups(cgroupsToCleanup)
   649  	}
   650  }
   651  
   652  // splitCleanupRequests will split the given slice by the first index that is after the provided `now`. The provided
   653  // requests need to be sorted by cleanup date.
   654  func splitCleanupRequests(now time.Time, requests []cgroupCleanupRequest) ([]cgroupCleanupRequest, []cgroupCleanupRequest) {
   655  	splitIdx := len(requests)
   656  	// Requests have to be orderd by cleanup date.
   657  	for i, r := range requests {
   658  		if now.Before(r.cleanupAfter) {
   659  			splitIdx = i
   660  			break
   661  		}
   662  	}
   663  
   664  	return requests[:splitIdx], requests[splitIdx:]
   665  }
   666  
   667  func (t *Tracer) queueCgroupForRemoval(cgroupID cgroup.ID) {
   668  	t.cgroupCleanupMu.Lock()
   669  	t.requestedCgroupCleanups = append(t.requestedCgroupCleanups, cgroupCleanupRequest{
   670  		cgroupID:     cgroupID,
   671  		cleanupAfter: time.Now().Add(t.cgroupCleanupDelay),
   672  	})
   673  	t.cgroupCleanupMu.Unlock()
   674  }
   675  
   676  func (t *Tracer) removeCgroups(cgroupIDs []cgroup.ID) {
   677  	t.policyMu.Lock()
   678  	t.removedCgroupsMu.Lock()
   679  	for _, id := range cgroupIDs {
   680  		delete(t.cgroupEventPolicy, id)
   681  		t.removedCgroups[id] = struct{}{}
   682  	}
   683  	t.policyMu.Unlock()
   684  	t.removedCgroupsMu.Unlock()
   685  
   686  	for _, id := range cgroupIDs {
   687  		t.cfg.ContainerClient.CleanupCgroup(id)
   688  		t.cfg.CgroupClient.CleanupCgroup(id)
   689  	}
   690  }