github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/agent/daemon/enrichment/enrichers.go (about) 1 package enrichment 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "io/fs" 9 "path/filepath" 10 "strconv" 11 "syscall" 12 13 castpb "github.com/castai/kvisor/api/v1/runtime" 14 "github.com/castai/kvisor/pkg/containers" 15 "github.com/castai/kvisor/pkg/ebpftracer/types" 16 "github.com/castai/kvisor/pkg/logging" 17 "github.com/castai/kvisor/pkg/proc" 18 "github.com/cespare/xxhash" 19 "github.com/elastic/go-freelru" 20 "github.com/minio/sha256-simd" 21 ) 22 23 type fileHashCacheKey string 24 type ContainerForCgroupGetter func(cgroup uint64) (*containers.Container, bool, error) 25 type PIDsInNamespaceGetter func(ns uint32) []uint32 26 27 // more hash function in https://github.com/elastic/go-freelru/blob/main/bench/hash.go 28 func hashStringXXHASH(s fileHashCacheKey) uint32 { 29 return uint32(xxhash.Sum64String(string(s))) 30 } 31 32 func EnrichWithFileHash(log *logging.Logger, mountNamespacePIDStore *types.PIDsPerNamespace, procFS proc.ProcFS) EventEnricher { 33 cache, err := freelru.NewSynced[fileHashCacheKey, []byte](1024, hashStringXXHASH) 34 if err != nil { 35 // This case can never happen, as err is only thrown if cache size is <0, which it isn't. 36 panic(err) 37 } 38 39 return &fileHashEnricher{ 40 log: log, 41 mountNamespacePIDStore: mountNamespacePIDStore, 42 procFS: procFS, 43 cache: cache, 44 } 45 } 46 47 type fileHashEnricher struct { 48 log *logging.Logger 49 mountNamespacePIDStore *types.PIDsPerNamespace 50 procFS fs.StatFS 51 cache freelru.Cache[fileHashCacheKey, []byte] 52 } 53 54 func (enricher *fileHashEnricher) EventTypes() []castpb.EventType { 55 return []castpb.EventType{ 56 castpb.EventType_EVENT_EXEC, 57 } 58 } 59 60 func (enricher *fileHashEnricher) Enrich(ctx context.Context, req *EnrichRequest) { 61 e := req.Event 62 exec := e.GetExec() 63 if exec == nil || exec.Path == "" { 64 return 65 } 66 67 sha, err := enricher.calcFileHashForPID(req.EbpfEvent.Container, proc.PID(req.EbpfEvent.Context.NodeHostPid), exec.Path) 68 if err != nil { 69 if errors.Is(err, ErrFileDoesNotExist) { 70 return 71 } 72 } else { 73 setExecFileHash(exec, sha) 74 return 75 } 76 77 for _, pid := range enricher.mountNamespacePIDStore.GetBucket(proc.NamespaceID(req.EbpfEvent.Context.MntID)) { 78 if pid == proc.PID(req.EbpfEvent.Context.NodeHostPid) { 79 // We already tried that PID of the event, skipping. 80 continue 81 } 82 83 sha, err := enricher.calcFileHashForPID(req.EbpfEvent.Container, pid, exec.Path) 84 // We search for the first PID we can successfully calculate a filehash for. 85 if err != nil { 86 if errors.Is(err, ErrFileDoesNotExist) { 87 // If the wanted file does not exist in the PID mount namespace, it will also not exist in the mounts of the other. 88 // We can hence simply return, as we will not find the wanted file. 89 return 90 } 91 92 continue 93 } 94 95 setExecFileHash(exec, sha) 96 return 97 } 98 } 99 100 func setExecFileHash(exec *castpb.Exec, sha []byte) { 101 if exec.Meta == nil { 102 exec.Meta = &castpb.ExecMetadata{} 103 } 104 105 exec.Meta.HashSha256 = sha 106 } 107 108 func (enricher *fileHashEnricher) calcFileHashForPID(cont *containers.Container, pid proc.PID, execPath string) ([]byte, error) { 109 pidString := strconv.FormatInt(int64(pid), 10) 110 111 _, err := enricher.procFS.Stat(pidString) 112 if err != nil { 113 // If the /proc/<pid> folder doesn't exist, there is nothing we can do. 114 return nil, ErrProcFolderDoesNotExist 115 } 116 117 path := filepath.Join(pidString, "root", execPath) 118 info, err := enricher.procFS.Stat(path) 119 if err != nil { 120 // If the wanted file does not exist inside the mount namespace, there is also nothing we can do. 121 return nil, ErrFileDoesNotExist 122 } 123 124 key := enricher.buildCacheKey(cont, info) 125 hash, found := enricher.checkCache(key) 126 if found { 127 return hash, nil 128 } 129 130 f, err := enricher.procFS.Open(path) 131 if err != nil { 132 return nil, ErrFileDoesNotExist 133 } 134 defer f.Close() 135 136 h := sha256.New() 137 if _, err := io.Copy(h, f); err != nil { 138 return nil, err 139 } 140 141 hash = h.Sum(nil) 142 enricher.cacheHash(key, hash) 143 144 return hash, nil 145 } 146 147 var ( 148 ErrCannotGetInode = errors.New("cannot get inode for path") 149 ErrProcFolderDoesNotExist = errors.New("/proc/<pid> folder does not exist") 150 ErrFileDoesNotExist = errors.New("wanted file does not exist") 151 ) 152 153 func (enricher *fileHashEnricher) buildCacheKey(cont *containers.Container, info fs.FileInfo) fileHashCacheKey { 154 stat, ok := info.Sys().(*syscall.Stat_t) 155 if !ok { 156 return "" 157 } 158 159 return fileHashCacheKey(fmt.Sprintf("%s:%d", cont.Cgroup.ContainerID, stat.Ino)) 160 } 161 162 func (enricher *fileHashEnricher) checkCache(key fileHashCacheKey) ([]byte, bool) { 163 if key == "" { 164 // An empty key indicates an error when calculating the hash key, hence we treat it as not cached. 165 return nil, false 166 } 167 168 return enricher.cache.Get(key) 169 } 170 171 func (enricher *fileHashEnricher) cacheHash(key fileHashCacheKey, hash []byte) { 172 if key == "" { 173 // An empty key indicates an error when calculating the hash key, hence nothing will be cached 174 return 175 } 176 177 enricher.cache.Add(key, hash) 178 }