github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/agent/daemon/enrichment/enrichers.go (about)

     1  package enrichment
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"io/fs"
     9  	"path/filepath"
    10  	"strconv"
    11  	"syscall"
    12  
    13  	castpb "github.com/castai/kvisor/api/v1/runtime"
    14  	"github.com/castai/kvisor/pkg/containers"
    15  	"github.com/castai/kvisor/pkg/ebpftracer/types"
    16  	"github.com/castai/kvisor/pkg/logging"
    17  	"github.com/castai/kvisor/pkg/proc"
    18  	"github.com/cespare/xxhash"
    19  	"github.com/elastic/go-freelru"
    20  	"github.com/minio/sha256-simd"
    21  )
    22  
    23  type fileHashCacheKey string
    24  type ContainerForCgroupGetter func(cgroup uint64) (*containers.Container, bool, error)
    25  type PIDsInNamespaceGetter func(ns uint32) []uint32
    26  
    27  // more hash function in https://github.com/elastic/go-freelru/blob/main/bench/hash.go
    28  func hashStringXXHASH(s fileHashCacheKey) uint32 {
    29  	return uint32(xxhash.Sum64String(string(s)))
    30  }
    31  
    32  func EnrichWithFileHash(log *logging.Logger, mountNamespacePIDStore *types.PIDsPerNamespace, procFS proc.ProcFS) EventEnricher {
    33  	cache, err := freelru.NewSynced[fileHashCacheKey, []byte](1024, hashStringXXHASH)
    34  	if err != nil {
    35  		// This case can never happen, as err is only thrown if cache size is <0, which it isn't.
    36  		panic(err)
    37  	}
    38  
    39  	return &fileHashEnricher{
    40  		log:                    log,
    41  		mountNamespacePIDStore: mountNamespacePIDStore,
    42  		procFS:                 procFS,
    43  		cache:                  cache,
    44  	}
    45  }
    46  
    47  type fileHashEnricher struct {
    48  	log                    *logging.Logger
    49  	mountNamespacePIDStore *types.PIDsPerNamespace
    50  	procFS                 fs.StatFS
    51  	cache                  freelru.Cache[fileHashCacheKey, []byte]
    52  }
    53  
    54  func (enricher *fileHashEnricher) EventTypes() []castpb.EventType {
    55  	return []castpb.EventType{
    56  		castpb.EventType_EVENT_EXEC,
    57  	}
    58  }
    59  
    60  func (enricher *fileHashEnricher) Enrich(ctx context.Context, req *EnrichRequest) {
    61  	e := req.Event
    62  	exec := e.GetExec()
    63  	if exec == nil || exec.Path == "" {
    64  		return
    65  	}
    66  
    67  	sha, err := enricher.calcFileHashForPID(req.EbpfEvent.Container, proc.PID(req.EbpfEvent.Context.NodeHostPid), exec.Path)
    68  	if err != nil {
    69  		if errors.Is(err, ErrFileDoesNotExist) {
    70  			return
    71  		}
    72  	} else {
    73  		setExecFileHash(exec, sha)
    74  		return
    75  	}
    76  
    77  	for _, pid := range enricher.mountNamespacePIDStore.GetBucket(proc.NamespaceID(req.EbpfEvent.Context.MntID)) {
    78  		if pid == proc.PID(req.EbpfEvent.Context.NodeHostPid) {
    79  			// We already tried that PID of the event, skipping.
    80  			continue
    81  		}
    82  
    83  		sha, err := enricher.calcFileHashForPID(req.EbpfEvent.Container, pid, exec.Path)
    84  		// We search for the first PID we can successfully calculate a filehash for.
    85  		if err != nil {
    86  			if errors.Is(err, ErrFileDoesNotExist) {
    87  				// If the wanted file does not exist in the PID mount namespace, it will also not exist in the mounts of the other.
    88  				// We can hence simply return, as we will not find the wanted file.
    89  				return
    90  			}
    91  
    92  			continue
    93  		}
    94  
    95  		setExecFileHash(exec, sha)
    96  		return
    97  	}
    98  }
    99  
   100  func setExecFileHash(exec *castpb.Exec, sha []byte) {
   101  	if exec.Meta == nil {
   102  		exec.Meta = &castpb.ExecMetadata{}
   103  	}
   104  
   105  	exec.Meta.HashSha256 = sha
   106  }
   107  
   108  func (enricher *fileHashEnricher) calcFileHashForPID(cont *containers.Container, pid proc.PID, execPath string) ([]byte, error) {
   109  	pidString := strconv.FormatInt(int64(pid), 10)
   110  
   111  	_, err := enricher.procFS.Stat(pidString)
   112  	if err != nil {
   113  		// If the /proc/<pid> folder doesn't exist, there is nothing we can do.
   114  		return nil, ErrProcFolderDoesNotExist
   115  	}
   116  
   117  	path := filepath.Join(pidString, "root", execPath)
   118  	info, err := enricher.procFS.Stat(path)
   119  	if err != nil {
   120  		// If the wanted file does not exist inside the mount namespace, there is also nothing we can do.
   121  		return nil, ErrFileDoesNotExist
   122  	}
   123  
   124  	key := enricher.buildCacheKey(cont, info)
   125  	hash, found := enricher.checkCache(key)
   126  	if found {
   127  		return hash, nil
   128  	}
   129  
   130  	f, err := enricher.procFS.Open(path)
   131  	if err != nil {
   132  		return nil, ErrFileDoesNotExist
   133  	}
   134  	defer f.Close()
   135  
   136  	h := sha256.New()
   137  	if _, err := io.Copy(h, f); err != nil {
   138  		return nil, err
   139  	}
   140  
   141  	hash = h.Sum(nil)
   142  	enricher.cacheHash(key, hash)
   143  
   144  	return hash, nil
   145  }
   146  
   147  var (
   148  	ErrCannotGetInode         = errors.New("cannot get inode for path")
   149  	ErrProcFolderDoesNotExist = errors.New("/proc/<pid> folder does not exist")
   150  	ErrFileDoesNotExist       = errors.New("wanted file does not exist")
   151  )
   152  
   153  func (enricher *fileHashEnricher) buildCacheKey(cont *containers.Container, info fs.FileInfo) fileHashCacheKey {
   154  	stat, ok := info.Sys().(*syscall.Stat_t)
   155  	if !ok {
   156  		return ""
   157  	}
   158  
   159  	return fileHashCacheKey(fmt.Sprintf("%s:%d", cont.Cgroup.ContainerID, stat.Ino))
   160  }
   161  
   162  func (enricher *fileHashEnricher) checkCache(key fileHashCacheKey) ([]byte, bool) {
   163  	if key == "" {
   164  		// An empty key indicates an error when calculating the hash key, hence we treat it as not cached.
   165  		return nil, false
   166  	}
   167  
   168  	return enricher.cache.Get(key)
   169  }
   170  
   171  func (enricher *fileHashEnricher) cacheHash(key fileHashCacheKey, hash []byte) {
   172  	if key == "" {
   173  		// An empty key indicates an error when calculating the hash key, hence nothing will be cached
   174  		return
   175  	}
   176  
   177  	enricher.cache.Add(key, hash)
   178  }