github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/agent/daemon/state/controller.go (about)

     1  package state
     2  
     3  import (
     4  	"context"
     5  	"hash/maphash"
     6  	"net/netip"
     7  	"os"
     8  	"sync"
     9  	"time"
    10  
    11  	kubepb "github.com/castai/kvisor/api/v1/kube"
    12  	castpb "github.com/castai/kvisor/api/v1/runtime"
    13  	"github.com/castai/kvisor/cmd/agent/daemon/enrichment"
    14  	"github.com/castai/kvisor/cmd/agent/daemon/netstats"
    15  	"github.com/castai/kvisor/pkg/cgroup"
    16  	"github.com/castai/kvisor/pkg/containers"
    17  	"github.com/castai/kvisor/pkg/ebpftracer"
    18  	"github.com/castai/kvisor/pkg/ebpftracer/types"
    19  	"github.com/castai/kvisor/pkg/logging"
    20  	"github.com/cespare/xxhash/v2"
    21  	"github.com/elastic/go-freelru"
    22  	"golang.org/x/sync/errgroup"
    23  )
    24  
    25  type Config struct {
    26  	ContainerStatsScrapeInterval time.Duration `validate:"required"`
    27  	NetflowExportInterval        time.Duration `validate:"required"`
    28  	NetflowCleanupInterval       time.Duration `validate:"required"`
    29  }
    30  
    31  type containersClient interface {
    32  	ListContainers() []*containers.Container
    33  	GetContainerForCgroup(ctx context.Context, cgroup uint64) (*containers.Container, error)
    34  	LookupContainerForCgroupInCache(cgroup uint64) (*containers.Container, bool, error)
    35  	CleanupCgroup(cgroup cgroup.ID)
    36  	GetCgroupsInNamespace(namespace string) []uint64
    37  	RegisterContainerCreatedListener(l containers.ContainerCreatedListener)
    38  	RegisterContainerDeletedListener(l containers.ContainerDeletedListener)
    39  	GetCgroupCpuStats(c *containers.Container) (*cgroup.CPUStat, error)
    40  	GetCgroupMemoryStats(c *containers.Container) (*cgroup.MemoryStat, error)
    41  }
    42  
    43  type netStatsReader interface {
    44  	Read(pid uint32) ([]netstats.InterfaceStats, error)
    45  }
    46  
    47  type ebpfTracer interface {
    48  	Events() <-chan *types.Event
    49  	NetflowEvents() <-chan *types.Event
    50  	MuteEventsFromCgroup(cgroup uint64) error
    51  	MuteEventsFromCgroups(cgroups []uint64) error
    52  	UnmuteEventsFromCgroup(cgroup uint64) error
    53  	UnmuteEventsFromCgroups(cgroups []uint64) error
    54  	IsCgroupMuted(cgroup uint64) bool
    55  	ReadSyscallStats() (map[ebpftracer.SyscallStatsKeyCgroupID][]ebpftracer.SyscallStats, error)
    56  }
    57  
    58  type signatureEngine interface {
    59  	Events() <-chan *castpb.Event
    60  }
    61  
    62  type enrichmentService interface {
    63  	Enqueue(e *enrichment.EnrichRequest) bool
    64  	Events() <-chan *castpb.Event
    65  }
    66  
    67  type conntrackClient interface {
    68  	GetDestination(src, dst netip.AddrPort) (netip.AddrPort, bool)
    69  }
    70  
    71  func NewController(
    72  	log *logging.Logger,
    73  	cfg Config,
    74  	exporters *Exporters,
    75  	containersClient containersClient,
    76  	netStatsReader netStatsReader,
    77  	ct conntrackClient,
    78  	tracer ebpfTracer,
    79  	signatureEngine signatureEngine,
    80  	enrichmentService enrichmentService,
    81  	kubeClient kubepb.KubeAPIClient,
    82  ) *Controller {
    83  	dnsCache, err := freelru.NewSynced[uint64, *freelru.SyncedLRU[netip.Addr, string]](1024, func(k uint64) uint32 {
    84  		return uint32(k)
    85  	})
    86  	if err != nil {
    87  		panic(err)
    88  	}
    89  	ipInfoCache, err := freelru.NewSynced[netip.Addr, *kubepb.IPInfo](1024, func(k netip.Addr) uint32 {
    90  		return uint32(xxhash.Sum64(k.AsSlice()))
    91  	})
    92  	if err != nil {
    93  		panic(err)
    94  	}
    95  	podCache, err := freelru.NewSynced[string, *kubepb.Pod](1024, func(k string) uint32 {
    96  		return uint32(xxhash.Sum64String(k))
    97  	})
    98  	if err != nil {
    99  		panic(err)
   100  	}
   101  	return &Controller{
   102  		log:                        log.WithField("component", "ctrl"),
   103  		cfg:                        cfg,
   104  		exporters:                  exporters,
   105  		containersClient:           containersClient,
   106  		netStatsReader:             netStatsReader,
   107  		ct:                         ct,
   108  		tracer:                     tracer,
   109  		signatureEngine:            signatureEngine,
   110  		enrichmentService:          enrichmentService,
   111  		kubeClient:                 kubeClient,
   112  		nodeName:                   os.Getenv("NODE_NAME"),
   113  		resourcesStatsScrapePoints: map[uint64]*resourcesStatsScrapePoint{},
   114  		syscallScrapePoints:        map[uint64]*syscallScrapePoint{},
   115  		mutedNamespaces:            map[string]struct{}{},
   116  		netflows:                   make(map[uint64]*netflowVal),
   117  		dnsCache:                   dnsCache,
   118  		ipInfoCache:                ipInfoCache,
   119  		podCache:                   podCache,
   120  	}
   121  }
   122  
   123  type Controller struct {
   124  	log               *logging.Logger
   125  	cfg               Config
   126  	containersClient  containersClient
   127  	netStatsReader    netStatsReader
   128  	ct                conntrackClient
   129  	tracer            ebpfTracer
   130  	signatureEngine   signatureEngine
   131  	enrichmentService enrichmentService
   132  	exporters         *Exporters
   133  
   134  	nodeName string
   135  
   136  	// Scrape points are used to calculate deltas between scrapes.
   137  	resourcesStatsScrapePointsMu sync.RWMutex
   138  	resourcesStatsScrapePoints   map[uint64]*resourcesStatsScrapePoint
   139  	syscallScrapePointsMu        sync.RWMutex
   140  	syscallScrapePoints          map[uint64]*syscallScrapePoint
   141  
   142  	mutedNamespacesMu sync.RWMutex
   143  	mutedNamespaces   map[string]struct{}
   144  
   145  	netflowsMu         sync.Mutex
   146  	netflows           map[uint64]*netflowVal
   147  	netflowKeyHash     maphash.Hash
   148  	netflowDestKeyHash maphash.Hash
   149  	clusterInfo        *clusterInfo
   150  	dnsCache           *freelru.SyncedLRU[uint64, *freelru.SyncedLRU[netip.Addr, string]]
   151  	kubeClient         kubepb.KubeAPIClient
   152  	ipInfoCache        *freelru.SyncedLRU[netip.Addr, *kubepb.IPInfo]
   153  	podCache           *freelru.SyncedLRU[string, *kubepb.Pod]
   154  }
   155  
   156  func (c *Controller) Run(ctx context.Context) error {
   157  	c.log.Infof("running")
   158  	defer c.log.Infof("stopping")
   159  
   160  	c.containersClient.RegisterContainerCreatedListener(c.onNewContainer)
   161  	c.containersClient.RegisterContainerDeletedListener(c.onDeleteContainer)
   162  
   163  	errg, ctx := errgroup.WithContext(ctx)
   164  	if len(c.exporters.Events) > 0 {
   165  		errg.Go(func() error {
   166  			return c.runEventsPipeline(ctx)
   167  		})
   168  	}
   169  	if len(c.exporters.ContainerStats) > 0 {
   170  		errg.Go(func() error {
   171  			return c.runContainerStatsPipeline(ctx)
   172  		})
   173  	}
   174  	if len(c.exporters.Netflow) > 0 {
   175  		errg.Go(func() error {
   176  			return c.runNetflowPipeline(ctx)
   177  		})
   178  	}
   179  	return errg.Wait()
   180  }
   181  
   182  func (c *Controller) onNewContainer(container *containers.Container) {
   183  	if !c.IsMutedNamespace(container.PodNamespace) {
   184  		return
   185  	}
   186  
   187  	// We explicitly mute cgroups of new containers in muted namespaces, as otherwise
   188  	// there could be a timing issue, where we want to mute a namespace before the cgroup mkdir
   189  	// event has been handled.
   190  	err := c.tracer.MuteEventsFromCgroup(container.CgroupID)
   191  	if err != nil {
   192  		c.log.Warnf("cannot mute cgroup %d: %v", container.CgroupID, err)
   193  	}
   194  }
   195  
   196  func (c *Controller) onDeleteContainer(container *containers.Container) {
   197  	c.resourcesStatsScrapePointsMu.Lock()
   198  	delete(c.resourcesStatsScrapePoints, container.CgroupID)
   199  	c.resourcesStatsScrapePointsMu.Unlock()
   200  
   201  	c.syscallScrapePointsMu.Lock()
   202  	delete(c.syscallScrapePoints, container.CgroupID)
   203  	c.syscallScrapePointsMu.Unlock()
   204  
   205  	c.dnsCache.Remove(container.CgroupID)
   206  
   207  	c.log.Debugf("removed cgroup %d", container.CgroupID)
   208  }
   209  
   210  type resourcesStatsScrapePoint struct {
   211  	ts       time.Time
   212  	cpuStat  *cgroup.CPUStat
   213  	memStats *cgroup.MemoryStat
   214  	netStats *netstats.InterfaceStats
   215  }
   216  
   217  type syscallScrapePoint struct {
   218  	syscalls map[ebpftracer.SyscallID]uint64
   219  }
   220  
   221  func (c *Controller) MuteNamespace(namespace string) error {
   222  	c.mutedNamespacesMu.Lock()
   223  	c.mutedNamespaces[namespace] = struct{}{}
   224  	c.mutedNamespacesMu.Unlock()
   225  
   226  	cgroups := c.containersClient.GetCgroupsInNamespace(namespace)
   227  
   228  	err := c.tracer.MuteEventsFromCgroups(cgroups)
   229  
   230  	if err != nil {
   231  		return err
   232  	}
   233  
   234  	return nil
   235  }
   236  
   237  func (c *Controller) UnmuteNamespace(namespace string) error {
   238  	c.mutedNamespacesMu.Lock()
   239  	delete(c.mutedNamespaces, namespace)
   240  	c.mutedNamespacesMu.Unlock()
   241  
   242  	cgroups := c.containersClient.GetCgroupsInNamespace(namespace)
   243  
   244  	err := c.tracer.UnmuteEventsFromCgroups(cgroups)
   245  	if err != nil {
   246  		return err
   247  	}
   248  
   249  	return nil
   250  }
   251  
   252  func (c *Controller) IsMutedNamespace(namespace string) bool {
   253  	c.mutedNamespacesMu.RLock()
   254  	defer c.mutedNamespacesMu.RUnlock()
   255  	_, found := c.mutedNamespaces[namespace]
   256  
   257  	return found
   258  }