github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/monitor/internal/linux/processor.go (about)

     1  package linuxmonitor
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"regexp"
     7  	"strconv"
     8  	"strings"
     9  	"sync"
    10  
    11  	"go.aporeto.io/enforcerd/trireme-lib/buildflags"
    12  	"go.aporeto.io/enforcerd/trireme-lib/collector"
    13  	"go.aporeto.io/enforcerd/trireme-lib/common"
    14  	"go.aporeto.io/enforcerd/trireme-lib/monitor/config"
    15  	"go.aporeto.io/enforcerd/trireme-lib/monitor/extractors"
    16  	"go.aporeto.io/enforcerd/trireme-lib/policy"
    17  	"go.aporeto.io/enforcerd/trireme-lib/utils/cgnetcls"
    18  	"go.uber.org/zap"
    19  )
    20  
    21  var ignoreNames = map[string]*struct{}{
    22  	"cgroup.clone_children": nil,
    23  	"cgroup.procs":          nil,
    24  	"net_cls.classid":       nil,
    25  	"net_prio.ifpriomap":    nil,
    26  	"net_prio.prioidx":      nil,
    27  	"notify_on_release":     nil,
    28  	"tasks":                 nil,
    29  }
    30  
    31  // linuxProcessor captures all the monitor processor information
    32  // It implements the EventProcessor interface of the rpc monitor
    33  type linuxProcessor struct {
    34  	host              bool
    35  	config            *config.ProcessorConfig
    36  	metadataExtractor extractors.EventMetadataExtractor
    37  	netcls            cgnetcls.Cgroupnetcls
    38  	regStart          *regexp.Regexp
    39  	regStop           *regexp.Regexp
    40  	sync.Mutex
    41  }
    42  
    43  func baseName(name, separator string) string {
    44  
    45  	lastseparator := strings.LastIndex(name, separator)
    46  	if len(name) <= lastseparator {
    47  		return ""
    48  	}
    49  	return name[lastseparator+1:]
    50  }
    51  
    52  // Create handles create events
    53  func (l *linuxProcessor) Create(ctx context.Context, eventInfo *common.EventInfo) error {
    54  	// This should never be called for Linux Processes
    55  	return fmt.Errorf("Use start directly for Linux processes. Create not supported")
    56  }
    57  
    58  // Start handles start events
    59  func (l *linuxProcessor) Start(ctx context.Context, eventInfo *common.EventInfo) error {
    60  
    61  	// Validate the PUID format. Additional validations TODO
    62  	if !l.regStart.Match([]byte(eventInfo.PUID)) {
    63  		return fmt.Errorf("invalid pu id: %s", eventInfo.PUID)
    64  	}
    65  
    66  	// Normalize to a nativeID context. This will become key for any recoveries
    67  	// and it's an one way function.
    68  	nativeID, err := l.generateContextID(eventInfo)
    69  	if err != nil {
    70  		return err
    71  	}
    72  
    73  	processes, err := l.netcls.ListCgroupProcesses(nativeID)
    74  	if err == nil && len(processes) != 0 {
    75  		//This PU already exists we are getting a duplicate event
    76  		zap.L().Debug("Duplicate start event for the same PU", zap.String("PUID", nativeID))
    77  		if err = l.netcls.AddProcess(nativeID, int(eventInfo.PID)); err != nil {
    78  			if derr := l.netcls.DeleteCgroup(nativeID); derr != nil {
    79  				zap.L().Warn("Failed to clean cgroup", zap.Error(derr))
    80  			}
    81  			return err
    82  		}
    83  		return nil
    84  	}
    85  
    86  	// Extract the metadata and create the runtime
    87  	runtime, err := l.metadataExtractor(eventInfo)
    88  	if err != nil {
    89  		return err
    90  	}
    91  
    92  	// We need to send a create event to the policy engine.
    93  	if err = l.config.Policy.HandlePUEvent(ctx, nativeID, common.EventCreate, runtime); err != nil {
    94  		return fmt.Errorf("Unable to create PU: %s", err)
    95  	}
    96  
    97  	// We can now send a start event to the policy engine
    98  	if err = l.config.Policy.HandlePUEvent(ctx, nativeID, common.EventStart, runtime); err != nil {
    99  		return fmt.Errorf("Unable to start PU: %s", err)
   100  	}
   101  
   102  	l.Lock()
   103  	// We can now program cgroups and everything else.
   104  	if eventInfo.HostService {
   105  		err = l.processHostServiceStart(eventInfo, runtime)
   106  	} else {
   107  		err = l.processLinuxServiceStart(nativeID, eventInfo, runtime)
   108  	}
   109  	l.Unlock()
   110  	if err != nil {
   111  		return fmt.Errorf("Failed to program cgroups: %s", err)
   112  	}
   113  
   114  	// Send the event to the collector.
   115  	l.config.Collector.CollectContainerEvent(&collector.ContainerRecord{
   116  		ContextID: eventInfo.PUID,
   117  		IPAddress: runtime.IPAddresses(),
   118  		Tags:      runtime.Tags(),
   119  		Event:     collector.ContainerStart,
   120  	})
   121  
   122  	return nil
   123  }
   124  
   125  // Stop handles a stop event
   126  func (l *linuxProcessor) Stop(ctx context.Context, event *common.EventInfo) error {
   127  
   128  	puID, err := l.generateContextID(event)
   129  	if err != nil {
   130  		return err
   131  	}
   132  
   133  	processes, err := l.netcls.ListCgroupProcesses(puID)
   134  	if err == nil && len(processes) != 0 {
   135  		zap.L().Debug("Received Bogus Stop", zap.Int("Num Processes", len(processes)), zap.Error(err))
   136  		return nil
   137  	}
   138  
   139  	if puID == "/trireme" {
   140  		return nil
   141  	}
   142  
   143  	runtime := policy.NewPURuntimeWithDefaults()
   144  	runtime.SetPUType(event.PUType)
   145  
   146  	return l.config.Policy.HandlePUEvent(ctx, puID, common.EventStop, runtime)
   147  }
   148  
   149  // Destroy handles a destroy event
   150  func (l *linuxProcessor) Destroy(ctx context.Context, eventInfo *common.EventInfo) error {
   151  
   152  	puID, err := l.generateContextID(eventInfo)
   153  	if err != nil {
   154  		return err
   155  	}
   156  
   157  	if puID == "/trireme" {
   158  		puID = strings.TrimLeft(puID, "/")
   159  		l.netcls.Deletebasepath(puID)
   160  		return nil
   161  	}
   162  
   163  	runtime := policy.NewPURuntimeWithDefaults()
   164  	runtime.SetPUType(eventInfo.PUType)
   165  
   166  	// Send the event upstream
   167  	if err := l.config.Policy.HandlePUEvent(ctx, puID, common.EventDestroy, runtime); err != nil {
   168  		zap.L().Warn("Unable to clean trireme ",
   169  			zap.String("puID", puID),
   170  			zap.Error(err),
   171  		)
   172  	}
   173  
   174  	l.Lock()
   175  	defer l.Unlock()
   176  
   177  	if eventInfo.HostService {
   178  		// For network only pus, we do not program cgroups and hence should not clean it.
   179  		// Cleaning this could result in removal of root cgroup that was configured for
   180  		// true host mode pu.
   181  		if eventInfo.NetworkOnlyTraffic {
   182  			return nil
   183  		}
   184  
   185  		if err := l.netcls.AssignRootMark(0); err != nil {
   186  			return fmt.Errorf("unable to write to net_cls.classid file for new cgroup: %s", err)
   187  		}
   188  	}
   189  
   190  	puID = baseName(puID, "/")
   191  
   192  	//let us remove the cgroup files now
   193  	if err := l.netcls.DeleteCgroup(puID); err != nil {
   194  		zap.L().Warn("Failed to clean netcls group",
   195  			zap.String("puID", puID),
   196  			zap.Error(err),
   197  		)
   198  	}
   199  
   200  	return nil
   201  }
   202  
   203  // Pause handles a pause event
   204  func (l *linuxProcessor) Pause(ctx context.Context, eventInfo *common.EventInfo) error {
   205  
   206  	puID, err := l.generateContextID(eventInfo)
   207  	if err != nil {
   208  		return fmt.Errorf("unable to generate context id: %s", err)
   209  	}
   210  
   211  	return l.config.Policy.HandlePUEvent(ctx, puID, common.EventPause, nil)
   212  }
   213  
   214  func (l *linuxProcessor) resyncHostService(ctx context.Context, e *common.EventInfo) error {
   215  
   216  	runtime, err := l.metadataExtractor(e)
   217  	if err != nil {
   218  		return err
   219  	}
   220  
   221  	nativeID, err := l.generateContextID(e)
   222  	if err != nil {
   223  		return err
   224  	}
   225  
   226  	if err = l.config.Policy.HandlePUEvent(ctx, nativeID, common.EventStart, runtime); err != nil {
   227  		return fmt.Errorf("Unable to start PU: %s", err)
   228  	}
   229  
   230  	return l.processHostServiceStart(e, runtime)
   231  }
   232  
   233  // Resync resyncs with all the existing services that were there before we start
   234  func (l *linuxProcessor) Resync(ctx context.Context, e *common.EventInfo) error {
   235  	// This lock is not complete necessary here
   236  	l.config.ResyncLock.RLock()
   237  	defer l.config.ResyncLock.RUnlock()
   238  	if e != nil {
   239  		// If its a host service then use pu from eventInfo
   240  		// The code block below assumes that pu is already created
   241  		if e.HostService {
   242  			return l.resyncHostService(ctx, e)
   243  		}
   244  	}
   245  
   246  	cgroups := l.netcls.ListAllCgroups("")
   247  	for _, cgroup := range cgroups {
   248  
   249  		if _, ok := ignoreNames[cgroup]; ok {
   250  			continue
   251  		}
   252  
   253  		// List all the cgroup processes. If its empty, we can remove it.
   254  		procs, err := l.netcls.ListCgroupProcesses(cgroup)
   255  		if err != nil {
   256  			continue
   257  		}
   258  
   259  		// All processes in cgroup have died. Let's clean up.
   260  		if len(procs) == 0 {
   261  			if err := l.netcls.DeleteCgroup(cgroup); err != nil {
   262  				zap.L().Warn("Failed to deleted cgroup",
   263  					zap.String("cgroup", cgroup),
   264  					zap.Error(err),
   265  				)
   266  			}
   267  			continue
   268  		}
   269  
   270  		runtime := policy.NewPURuntimeWithDefaults()
   271  		puType := common.LinuxProcessPU
   272  
   273  		runtime.SetPUType(puType)
   274  		runtime.SetOptions(policy.OptionsType{
   275  			CgroupMark: strconv.FormatUint(cgnetcls.MarkVal(), 10),
   276  			CgroupName: cgroup,
   277  		})
   278  
   279  		// Processes are still alive. We should enforce policy.
   280  		if err := l.config.Policy.HandlePUEvent(ctx, cgroup, common.EventStart, runtime); err != nil {
   281  			zap.L().Error("Failed to restart cgroup control", zap.String("cgroup ID", cgroup), zap.Error(err))
   282  		}
   283  
   284  		if err := l.processLinuxServiceStart(cgroup, nil, runtime); err != nil {
   285  			return err
   286  		}
   287  	}
   288  	return nil
   289  }
   290  
   291  // generateContextID creates the puID from the event information
   292  func (l *linuxProcessor) generateContextID(eventInfo *common.EventInfo) (string, error) {
   293  
   294  	puID := eventInfo.PUID
   295  	if eventInfo.Cgroup == "" {
   296  		return puID, nil
   297  	}
   298  
   299  	if !l.regStop.Match([]byte(eventInfo.Cgroup)) {
   300  		return "", fmt.Errorf("invalid pu id: %s", eventInfo.Cgroup)
   301  	}
   302  
   303  	puID = baseName(eventInfo.Cgroup, "/")
   304  
   305  	return puID, nil
   306  }
   307  
   308  func (l *linuxProcessor) processLinuxServiceStart(nativeID string, event *common.EventInfo, runtimeInfo *policy.PURuntime) error {
   309  
   310  	// It is okay to launch this so let us create a cgroup for it
   311  	if err := l.netcls.Creategroup(nativeID); err != nil {
   312  		return err
   313  	}
   314  
   315  	markval := runtimeInfo.Options().CgroupMark
   316  	if markval == "" {
   317  		if derr := l.netcls.DeleteCgroup(nativeID); derr != nil {
   318  			zap.L().Warn("Failed to clean cgroup", zap.Error(derr))
   319  		}
   320  		return fmt.Errorf("mark value %s not found", markval)
   321  	}
   322  
   323  	mark, _ := strconv.ParseUint(markval, 10, 32)
   324  	if err := l.netcls.AssignMark(nativeID, mark); err != nil {
   325  		if derr := l.netcls.DeleteCgroup(nativeID); derr != nil {
   326  			zap.L().Warn("Failed to clean cgroup", zap.Error(derr))
   327  		}
   328  		return err
   329  	}
   330  
   331  	if event != nil {
   332  		if err := l.netcls.AddProcess(nativeID, int(event.PID)); err != nil {
   333  			if derr := l.netcls.DeleteCgroup(nativeID); derr != nil {
   334  				zap.L().Warn("Failed to clean cgroup", zap.Error(derr))
   335  			}
   336  			return err
   337  		}
   338  	}
   339  
   340  	return nil
   341  }
   342  
   343  func (l *linuxProcessor) processHostServiceStart(event *common.EventInfo, runtimeInfo *policy.PURuntime) error {
   344  
   345  	if event.NetworkOnlyTraffic || buildflags.IsLegacyKernel() {
   346  		return nil
   347  	}
   348  
   349  	markval := runtimeInfo.Options().CgroupMark
   350  	mark, _ := strconv.ParseUint(markval, 10, 32)
   351  
   352  	return l.netcls.AssignRootMark(mark)
   353  }