github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/monitor/internal/linux/processor.go (about) 1 package linuxmonitor 2 3 import ( 4 "context" 5 "fmt" 6 "regexp" 7 "strconv" 8 "strings" 9 "sync" 10 11 "go.aporeto.io/enforcerd/trireme-lib/buildflags" 12 "go.aporeto.io/enforcerd/trireme-lib/collector" 13 "go.aporeto.io/enforcerd/trireme-lib/common" 14 "go.aporeto.io/enforcerd/trireme-lib/monitor/config" 15 "go.aporeto.io/enforcerd/trireme-lib/monitor/extractors" 16 "go.aporeto.io/enforcerd/trireme-lib/policy" 17 "go.aporeto.io/enforcerd/trireme-lib/utils/cgnetcls" 18 "go.uber.org/zap" 19 ) 20 21 var ignoreNames = map[string]*struct{}{ 22 "cgroup.clone_children": nil, 23 "cgroup.procs": nil, 24 "net_cls.classid": nil, 25 "net_prio.ifpriomap": nil, 26 "net_prio.prioidx": nil, 27 "notify_on_release": nil, 28 "tasks": nil, 29 } 30 31 // linuxProcessor captures all the monitor processor information 32 // It implements the EventProcessor interface of the rpc monitor 33 type linuxProcessor struct { 34 host bool 35 config *config.ProcessorConfig 36 metadataExtractor extractors.EventMetadataExtractor 37 netcls cgnetcls.Cgroupnetcls 38 regStart *regexp.Regexp 39 regStop *regexp.Regexp 40 sync.Mutex 41 } 42 43 func baseName(name, separator string) string { 44 45 lastseparator := strings.LastIndex(name, separator) 46 if len(name) <= lastseparator { 47 return "" 48 } 49 return name[lastseparator+1:] 50 } 51 52 // Create handles create events 53 func (l *linuxProcessor) Create(ctx context.Context, eventInfo *common.EventInfo) error { 54 // This should never be called for Linux Processes 55 return fmt.Errorf("Use start directly for Linux processes. Create not supported") 56 } 57 58 // Start handles start events 59 func (l *linuxProcessor) Start(ctx context.Context, eventInfo *common.EventInfo) error { 60 61 // Validate the PUID format. Additional validations TODO 62 if !l.regStart.Match([]byte(eventInfo.PUID)) { 63 return fmt.Errorf("invalid pu id: %s", eventInfo.PUID) 64 } 65 66 // Normalize to a nativeID context. This will become key for any recoveries 67 // and it's an one way function. 68 nativeID, err := l.generateContextID(eventInfo) 69 if err != nil { 70 return err 71 } 72 73 processes, err := l.netcls.ListCgroupProcesses(nativeID) 74 if err == nil && len(processes) != 0 { 75 //This PU already exists we are getting a duplicate event 76 zap.L().Debug("Duplicate start event for the same PU", zap.String("PUID", nativeID)) 77 if err = l.netcls.AddProcess(nativeID, int(eventInfo.PID)); err != nil { 78 if derr := l.netcls.DeleteCgroup(nativeID); derr != nil { 79 zap.L().Warn("Failed to clean cgroup", zap.Error(derr)) 80 } 81 return err 82 } 83 return nil 84 } 85 86 // Extract the metadata and create the runtime 87 runtime, err := l.metadataExtractor(eventInfo) 88 if err != nil { 89 return err 90 } 91 92 // We need to send a create event to the policy engine. 93 if err = l.config.Policy.HandlePUEvent(ctx, nativeID, common.EventCreate, runtime); err != nil { 94 return fmt.Errorf("Unable to create PU: %s", err) 95 } 96 97 // We can now send a start event to the policy engine 98 if err = l.config.Policy.HandlePUEvent(ctx, nativeID, common.EventStart, runtime); err != nil { 99 return fmt.Errorf("Unable to start PU: %s", err) 100 } 101 102 l.Lock() 103 // We can now program cgroups and everything else. 104 if eventInfo.HostService { 105 err = l.processHostServiceStart(eventInfo, runtime) 106 } else { 107 err = l.processLinuxServiceStart(nativeID, eventInfo, runtime) 108 } 109 l.Unlock() 110 if err != nil { 111 return fmt.Errorf("Failed to program cgroups: %s", err) 112 } 113 114 // Send the event to the collector. 115 l.config.Collector.CollectContainerEvent(&collector.ContainerRecord{ 116 ContextID: eventInfo.PUID, 117 IPAddress: runtime.IPAddresses(), 118 Tags: runtime.Tags(), 119 Event: collector.ContainerStart, 120 }) 121 122 return nil 123 } 124 125 // Stop handles a stop event 126 func (l *linuxProcessor) Stop(ctx context.Context, event *common.EventInfo) error { 127 128 puID, err := l.generateContextID(event) 129 if err != nil { 130 return err 131 } 132 133 processes, err := l.netcls.ListCgroupProcesses(puID) 134 if err == nil && len(processes) != 0 { 135 zap.L().Debug("Received Bogus Stop", zap.Int("Num Processes", len(processes)), zap.Error(err)) 136 return nil 137 } 138 139 if puID == "/trireme" { 140 return nil 141 } 142 143 runtime := policy.NewPURuntimeWithDefaults() 144 runtime.SetPUType(event.PUType) 145 146 return l.config.Policy.HandlePUEvent(ctx, puID, common.EventStop, runtime) 147 } 148 149 // Destroy handles a destroy event 150 func (l *linuxProcessor) Destroy(ctx context.Context, eventInfo *common.EventInfo) error { 151 152 puID, err := l.generateContextID(eventInfo) 153 if err != nil { 154 return err 155 } 156 157 if puID == "/trireme" { 158 puID = strings.TrimLeft(puID, "/") 159 l.netcls.Deletebasepath(puID) 160 return nil 161 } 162 163 runtime := policy.NewPURuntimeWithDefaults() 164 runtime.SetPUType(eventInfo.PUType) 165 166 // Send the event upstream 167 if err := l.config.Policy.HandlePUEvent(ctx, puID, common.EventDestroy, runtime); err != nil { 168 zap.L().Warn("Unable to clean trireme ", 169 zap.String("puID", puID), 170 zap.Error(err), 171 ) 172 } 173 174 l.Lock() 175 defer l.Unlock() 176 177 if eventInfo.HostService { 178 // For network only pus, we do not program cgroups and hence should not clean it. 179 // Cleaning this could result in removal of root cgroup that was configured for 180 // true host mode pu. 181 if eventInfo.NetworkOnlyTraffic { 182 return nil 183 } 184 185 if err := l.netcls.AssignRootMark(0); err != nil { 186 return fmt.Errorf("unable to write to net_cls.classid file for new cgroup: %s", err) 187 } 188 } 189 190 puID = baseName(puID, "/") 191 192 //let us remove the cgroup files now 193 if err := l.netcls.DeleteCgroup(puID); err != nil { 194 zap.L().Warn("Failed to clean netcls group", 195 zap.String("puID", puID), 196 zap.Error(err), 197 ) 198 } 199 200 return nil 201 } 202 203 // Pause handles a pause event 204 func (l *linuxProcessor) Pause(ctx context.Context, eventInfo *common.EventInfo) error { 205 206 puID, err := l.generateContextID(eventInfo) 207 if err != nil { 208 return fmt.Errorf("unable to generate context id: %s", err) 209 } 210 211 return l.config.Policy.HandlePUEvent(ctx, puID, common.EventPause, nil) 212 } 213 214 func (l *linuxProcessor) resyncHostService(ctx context.Context, e *common.EventInfo) error { 215 216 runtime, err := l.metadataExtractor(e) 217 if err != nil { 218 return err 219 } 220 221 nativeID, err := l.generateContextID(e) 222 if err != nil { 223 return err 224 } 225 226 if err = l.config.Policy.HandlePUEvent(ctx, nativeID, common.EventStart, runtime); err != nil { 227 return fmt.Errorf("Unable to start PU: %s", err) 228 } 229 230 return l.processHostServiceStart(e, runtime) 231 } 232 233 // Resync resyncs with all the existing services that were there before we start 234 func (l *linuxProcessor) Resync(ctx context.Context, e *common.EventInfo) error { 235 // This lock is not complete necessary here 236 l.config.ResyncLock.RLock() 237 defer l.config.ResyncLock.RUnlock() 238 if e != nil { 239 // If its a host service then use pu from eventInfo 240 // The code block below assumes that pu is already created 241 if e.HostService { 242 return l.resyncHostService(ctx, e) 243 } 244 } 245 246 cgroups := l.netcls.ListAllCgroups("") 247 for _, cgroup := range cgroups { 248 249 if _, ok := ignoreNames[cgroup]; ok { 250 continue 251 } 252 253 // List all the cgroup processes. If its empty, we can remove it. 254 procs, err := l.netcls.ListCgroupProcesses(cgroup) 255 if err != nil { 256 continue 257 } 258 259 // All processes in cgroup have died. Let's clean up. 260 if len(procs) == 0 { 261 if err := l.netcls.DeleteCgroup(cgroup); err != nil { 262 zap.L().Warn("Failed to deleted cgroup", 263 zap.String("cgroup", cgroup), 264 zap.Error(err), 265 ) 266 } 267 continue 268 } 269 270 runtime := policy.NewPURuntimeWithDefaults() 271 puType := common.LinuxProcessPU 272 273 runtime.SetPUType(puType) 274 runtime.SetOptions(policy.OptionsType{ 275 CgroupMark: strconv.FormatUint(cgnetcls.MarkVal(), 10), 276 CgroupName: cgroup, 277 }) 278 279 // Processes are still alive. We should enforce policy. 280 if err := l.config.Policy.HandlePUEvent(ctx, cgroup, common.EventStart, runtime); err != nil { 281 zap.L().Error("Failed to restart cgroup control", zap.String("cgroup ID", cgroup), zap.Error(err)) 282 } 283 284 if err := l.processLinuxServiceStart(cgroup, nil, runtime); err != nil { 285 return err 286 } 287 } 288 return nil 289 } 290 291 // generateContextID creates the puID from the event information 292 func (l *linuxProcessor) generateContextID(eventInfo *common.EventInfo) (string, error) { 293 294 puID := eventInfo.PUID 295 if eventInfo.Cgroup == "" { 296 return puID, nil 297 } 298 299 if !l.regStop.Match([]byte(eventInfo.Cgroup)) { 300 return "", fmt.Errorf("invalid pu id: %s", eventInfo.Cgroup) 301 } 302 303 puID = baseName(eventInfo.Cgroup, "/") 304 305 return puID, nil 306 } 307 308 func (l *linuxProcessor) processLinuxServiceStart(nativeID string, event *common.EventInfo, runtimeInfo *policy.PURuntime) error { 309 310 // It is okay to launch this so let us create a cgroup for it 311 if err := l.netcls.Creategroup(nativeID); err != nil { 312 return err 313 } 314 315 markval := runtimeInfo.Options().CgroupMark 316 if markval == "" { 317 if derr := l.netcls.DeleteCgroup(nativeID); derr != nil { 318 zap.L().Warn("Failed to clean cgroup", zap.Error(derr)) 319 } 320 return fmt.Errorf("mark value %s not found", markval) 321 } 322 323 mark, _ := strconv.ParseUint(markval, 10, 32) 324 if err := l.netcls.AssignMark(nativeID, mark); err != nil { 325 if derr := l.netcls.DeleteCgroup(nativeID); derr != nil { 326 zap.L().Warn("Failed to clean cgroup", zap.Error(derr)) 327 } 328 return err 329 } 330 331 if event != nil { 332 if err := l.netcls.AddProcess(nativeID, int(event.PID)); err != nil { 333 if derr := l.netcls.DeleteCgroup(nativeID); derr != nil { 334 zap.L().Warn("Failed to clean cgroup", zap.Error(derr)) 335 } 336 return err 337 } 338 } 339 340 return nil 341 } 342 343 func (l *linuxProcessor) processHostServiceStart(event *common.EventInfo, runtimeInfo *policy.PURuntime) error { 344 345 if event.NetworkOnlyTraffic || buildflags.IsLegacyKernel() { 346 return nil 347 } 348 349 markval := runtimeInfo.Options().CgroupMark 350 mark, _ := strconv.ParseUint(markval, 10, 32) 351 352 return l.netcls.AssignRootMark(mark) 353 }