github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/pkg/ebpftracer/tracer_decode.go (about) 1 package ebpftracer 2 3 import ( 4 "errors" 5 "fmt" 6 "runtime/debug" 7 "strconv" 8 9 "github.com/castai/kvisor/pkg/containers" 10 "github.com/castai/kvisor/pkg/ebpftracer/decoder" 11 "github.com/castai/kvisor/pkg/ebpftracer/events" 12 "github.com/castai/kvisor/pkg/ebpftracer/types" 13 "github.com/castai/kvisor/pkg/kernel" 14 "github.com/castai/kvisor/pkg/metrics" 15 "github.com/castai/kvisor/pkg/proc" 16 "github.com/cilium/ebpf" 17 "github.com/prometheus/client_golang/prometheus" 18 "golang.org/x/net/context" 19 ) 20 21 // Error indicating that the resulting error was caught from a panic 22 var ErrPanic = errors.New("encountered panic") 23 24 func (t *Tracer) decodeAndHandleSignal(_ context.Context, data []byte) (rerr error) { 25 defer func() { 26 if perr := recover(); perr != nil { 27 stack := string(debug.Stack()) 28 rerr = fmt.Errorf("decode %w: %v, stack=%s", ErrPanic, perr, stack) 29 } 30 }() 31 32 ebpfMsgDecoder := decoder.NewEventDecoder(t.log, data) 33 var signalCtx types.SignalContext 34 if err := ebpfMsgDecoder.DecodeSignalContext(&signalCtx); err != nil { 35 return err 36 } 37 parsedArgs, err := decoder.ParseArgs(ebpfMsgDecoder, signalCtx.EventID) 38 if err != nil { 39 return fmt.Errorf("cannot parse event type %d: %w", signalCtx.EventID, err) 40 } 41 42 switch args := parsedArgs.(type) { 43 case types.SignalCgroupMkdirArgs: 44 // We we only care about events from the default cgroup, as cgroup v1 does not have unified cgroups. 45 if !t.cfg.CgroupClient.IsDefaultHierarchy(args.HierarchyId) { 46 return nil 47 } 48 49 t.cfg.CgroupClient.LoadCgroup(args.CgroupId, args.CgroupPath) 50 51 case types.SignalCgroupRmdirArgs: 52 // We we only care about events from the default cgroup, as cgroup v1 does not have unified cgroups. 53 if !t.cfg.CgroupClient.IsDefaultHierarchy(args.HierarchyId) { 54 return nil 55 } 56 57 t.queueCgroupForRemoval(args.CgroupId) 58 err := t.UnmuteEventsFromCgroup(args.CgroupId) 59 if err != nil { 60 return fmt.Errorf("cannot remove cgroup %d from mute map: %w", args.CgroupId, err) 61 } 62 default: 63 t.log.Warnf("unhandled signal: %d", signalCtx.EventID) 64 } 65 66 return nil 67 } 68 69 func (t *Tracer) decodeAndExportEvent(ctx context.Context, data []byte) (rerr error) { 70 metrics.AgentPulledEventsBytesTotal.Add(float64(len(data))) 71 72 defer func() { 73 if perr := recover(); perr != nil { 74 stack := string(debug.Stack()) 75 rerr = fmt.Errorf("decode %w: %v, stack=%s", ErrPanic, perr, stack) 76 } 77 }() 78 79 ebpfMsgDecoder := decoder.NewEventDecoder(t.log, data) 80 var eventCtx types.EventContext 81 if err := ebpfMsgDecoder.DecodeContext(&eventCtx); err != nil { 82 return err 83 } 84 85 eventId := eventCtx.EventID 86 parsedArgs, err := decoder.ParseArgs(ebpfMsgDecoder, eventId) 87 if err != nil { 88 return fmt.Errorf("cannot parse event type %d: %w", eventId, err) 89 } 90 91 container, err := t.cfg.ContainerClient.GetContainerForCgroup(ctx, eventCtx.CgroupID) 92 if err != nil { 93 // We ignore any event not belonging to a container for now. 94 if errors.Is(err, containers.ErrContainerNotFound) { 95 err := t.MuteEventsFromCgroup(eventCtx.CgroupID) 96 if err != nil { 97 return fmt.Errorf("cannot mute events for cgroup %d: %w", eventCtx.CgroupID, err) 98 } 99 return nil 100 } 101 return fmt.Errorf("cannot get container for cgroup %d: %w", eventCtx.CgroupID, err) 102 } 103 104 eventCtx.Ts = t.bootTime + eventCtx.Ts 105 event := &types.Event{ 106 Context: &eventCtx, 107 Container: container, 108 Args: parsedArgs, 109 } 110 111 if _, found := t.signatureEventMap[eventId]; found { 112 t.cfg.SignatureEngine.QueueEvent(event) 113 } 114 115 // Do not parse event, if it is not registered. If there is no policy set, we treat is as to parse all the events 116 if _, found := t.eventPoliciesMap[eventId]; !found && t.policy != nil { 117 metrics.AgentSkippedEventsTotal.With(prometheus.Labels{metrics.EventIDLabel: strconv.Itoa(int(eventId))}).Inc() 118 return nil 119 } 120 121 // TODO: Move rate limit based policy to kernel side. 122 if err := t.allowedByPolicyPre(&eventCtx); err != nil { 123 metrics.AgentSkippedEventsTotal.With(prometheus.Labels{metrics.EventIDLabel: strconv.Itoa(int(eventId))}).Inc() 124 return nil 125 } 126 127 switch eventId { 128 case events.SchedProcessExec: 129 if eventCtx.Pid == 1 { 130 t.cfg.MountNamespacePIDStore.ForceAddToBucket(proc.NamespaceID(eventCtx.MntID), eventCtx.NodeHostPid) 131 } else { 132 t.cfg.MountNamespacePIDStore.AddToBucket(proc.NamespaceID(eventCtx.MntID), eventCtx.NodeHostPid) 133 } 134 } 135 136 if err := t.allowedByPolicy(eventId, eventCtx.CgroupID, event); err != nil { 137 metrics.AgentSkippedEventsTotal.With(prometheus.Labels{metrics.EventIDLabel: strconv.Itoa(int(eventCtx.EventID))}).Inc() 138 return nil 139 } 140 141 switch eventId { 142 case events.NetFlowBase: 143 select { 144 case t.netflowEventsChan <- event: 145 default: 146 def := t.eventsSet[eventCtx.EventID] 147 metrics.AgentDroppedEventsTotal.With(prometheus.Labels{metrics.EventTypeLabel: def.name}).Inc() 148 } 149 default: 150 select { 151 case t.eventsChan <- event: 152 default: 153 def := t.eventsSet[eventCtx.EventID] 154 metrics.AgentDroppedEventsTotal.With(prometheus.Labels{metrics.EventTypeLabel: def.name}).Inc() 155 } 156 } 157 158 return nil 159 } 160 161 func (t *Tracer) MuteEventsFromCgroup(cgroup uint64) error { 162 t.log.Infof("muting cgroup %d", cgroup) 163 return t.module.objects.IgnoredCgroupsMap.Put(cgroup, cgroup) 164 } 165 166 func (t *Tracer) MuteEventsFromCgroups(cgroups []uint64) error { 167 t.log.Infof("muting cgroups %v", cgroups) 168 169 kernelVersion, err := kernel.CurrentKernelVersion() 170 if err != nil { 171 return err 172 } 173 174 // The ebpf batch helpers are available since kernel version 5.6. 175 if kernelVersion.Major > 5 || (kernelVersion.Major == 5 && kernelVersion.Minor >= 6) { 176 _, err = t.module.objects.IgnoredCgroupsMap.BatchUpdate(cgroups, cgroups, &ebpf.BatchOptions{ 177 Flags: uint64(ebpf.UpdateAny), 178 }) 179 180 if err != nil { 181 t.log.Warnf("got error while trying to mute cgroups %v: %s", cgroups, err) 182 } 183 } else { 184 for _, cgroup := range cgroups { 185 err = t.module.objects.IgnoredCgroupsMap.Update(cgroup, cgroup, ebpf.UpdateAny) 186 187 if err != nil { 188 t.log.Warnf("got error while trying to delete cgroup %d from ignore map: %s", cgroup, err) 189 } 190 } 191 } 192 193 return nil 194 } 195 196 func (t *Tracer) UnmuteEventsFromCgroup(cgroup uint64) error { 197 t.log.Infof("unmuting cgroup %d", cgroup) 198 199 err := t.module.objects.IgnoredCgroupsMap.Delete(cgroup) 200 201 // We do not care if we try to remove a non existing cgroup. 202 if errors.Is(err, ebpf.ErrKeyNotExist) { 203 return nil 204 } 205 206 return err 207 } 208 209 func (t *Tracer) UnmuteEventsFromCgroups(cgroups []uint64) error { 210 t.log.Infof("unmuting cgroup %v", cgroups) 211 212 kernelVersion, err := kernel.CurrentKernelVersion() 213 if err != nil { 214 return err 215 } 216 217 // The ebpf batch helpers are available since kernel version 5.6. 218 if kernelVersion.Major > 5 || (kernelVersion.Major == 5 && kernelVersion.Minor >= 6) { 219 _, err = t.module.objects.IgnoredCgroupsMap.BatchDelete(cgroups, nil) 220 if !errors.Is(err, ebpf.ErrKeyNotExist) { 221 t.log.Warnf("got error while trying to delete cgroups %v from ignore map: %s", cgroups, err) 222 } 223 } else { 224 for _, cgroup := range cgroups { 225 err = t.module.objects.IgnoredCgroupsMap.Delete(cgroup) 226 if !errors.Is(err, ebpf.ErrKeyNotExist) { 227 t.log.Warnf("got error while trying to delete cgroup %d from ignore map: %s", cgroup, err) 228 } 229 } 230 } 231 232 return nil 233 } 234 235 func (t *Tracer) IsCgroupMuted(cgroup uint64) bool { 236 var value uint64 237 238 err := t.module.objects.IgnoredCgroupsMap.Lookup(cgroup, &value) 239 240 return !errors.Is(err, ebpf.ErrKeyNotExist) && value > 0 241 }