github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/monitor/internal/uid/processor.go (about)

     1  package uidmonitor
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"regexp"
     7  	"strconv"
     8  	"strings"
     9  	"sync"
    10  
    11  	"go.aporeto.io/trireme-lib/collector"
    12  	"go.aporeto.io/trireme-lib/common"
    13  	"go.aporeto.io/trireme-lib/monitor/config"
    14  	"go.aporeto.io/trireme-lib/monitor/extractors"
    15  	"go.aporeto.io/trireme-lib/policy"
    16  	"go.aporeto.io/trireme-lib/utils/cache"
    17  	"go.aporeto.io/trireme-lib/utils/cgnetcls"
    18  	"go.uber.org/zap"
    19  )
    20  
    21  var ignoreNames = map[string]*struct{}{
    22  	"cgroup.clone_children": nil,
    23  	"cgroup.procs":          nil,
    24  	"net_cls.classid":       nil,
    25  	"net_prio.ifpriomap":    nil,
    26  	"net_prio.prioidx":      nil,
    27  	"notify_on_release":     nil,
    28  	"tasks":                 nil,
    29  }
    30  
    31  // uidProcessor captures all the monitor processor information for a UIDLoginPU
    32  // It implements the EventProcessor interface of the rpc monitor
    33  type uidProcessor struct {
    34  	config            *config.ProcessorConfig
    35  	metadataExtractor extractors.EventMetadataExtractor
    36  	netcls            cgnetcls.Cgroupnetcls
    37  	regStart          *regexp.Regexp
    38  	regStop           *regexp.Regexp
    39  	putoPidMap        *cache.Cache
    40  	pidToPU           *cache.Cache
    41  	sync.Mutex
    42  }
    43  
    44  const (
    45  	triremeBaseCgroup = "/trireme"
    46  )
    47  
    48  // puToPidEntry represents an entry to puToPidMap
    49  type puToPidEntry struct {
    50  	pidlist            map[int32]bool
    51  	Info               *policy.PURuntime
    52  	publishedContextID string
    53  }
    54  
    55  // Start handles start events
    56  func (u *uidProcessor) Start(ctx context.Context, eventInfo *common.EventInfo) error {
    57  
    58  	return u.createAndStart(ctx, eventInfo, false)
    59  }
    60  
    61  // Stop handles a stop event and destroy as well. Destroy does nothing for the uid monitor
    62  func (u *uidProcessor) Stop(ctx context.Context, eventInfo *common.EventInfo) error {
    63  
    64  	puID := eventInfo.PUID
    65  
    66  	if puID == triremeBaseCgroup {
    67  		u.netcls.Deletebasepath(puID)
    68  		return nil
    69  	}
    70  
    71  	u.Lock()
    72  	defer u.Unlock()
    73  
    74  	// Take the PID part of the user/pid PUID
    75  	var pid string
    76  	userID := eventInfo.PUID
    77  	parts := strings.SplitN(puID, "/", 2)
    78  	if len(parts) == 2 {
    79  		userID = parts[0]
    80  		pid = parts[1]
    81  	}
    82  
    83  	if len(pid) > 0 {
    84  		// Delete the cgroup for that pid
    85  		if err := u.netcls.DeleteCgroup(puID); err != nil {
    86  			return err
    87  		}
    88  
    89  		if pidlist, err := u.putoPidMap.Get(userID); err == nil {
    90  			pidCxt := pidlist.(*puToPidEntry)
    91  
    92  			iPid, err := strconv.Atoi(pid)
    93  			if err != nil {
    94  				return err
    95  			}
    96  
    97  			// Clean pid from both caches
    98  			delete(pidCxt.pidlist, int32(iPid))
    99  
   100  			if err = u.pidToPU.Remove(int32(iPid)); err != nil {
   101  				zap.L().Warn("Failed to remove entry in the cache", zap.Error(err), zap.String("stopped pid", pid))
   102  			}
   103  		}
   104  		return nil
   105  	}
   106  
   107  	runtime := policy.NewPURuntimeWithDefaults()
   108  	runtime.SetPUType(common.UIDLoginPU)
   109  
   110  	// Since all the PIDs of the user are gone, we can delete the user context.
   111  	if err := u.config.Policy.HandlePUEvent(ctx, userID, common.EventStop, runtime); err != nil {
   112  		zap.L().Warn("Failed to stop trireme PU ",
   113  			zap.String("puID", puID),
   114  			zap.Error(err),
   115  		)
   116  	}
   117  
   118  	if err := u.config.Policy.HandlePUEvent(ctx, userID, common.EventDestroy, runtime); err != nil {
   119  		zap.L().Warn("Failed to Destroy clean trireme ",
   120  			zap.String("puID", puID),
   121  			zap.Error(err),
   122  		)
   123  	}
   124  
   125  	if err := u.putoPidMap.Remove(userID); err != nil {
   126  		zap.L().Warn("Failed to remove entry in the cache", zap.Error(err), zap.String("puID", puID))
   127  	}
   128  
   129  	return u.netcls.DeleteCgroup(strings.TrimRight(userID, "/"))
   130  }
   131  
   132  // Create handles create events
   133  func (u *uidProcessor) Create(ctx context.Context, eventInfo *common.EventInfo) error {
   134  	return nil
   135  }
   136  
   137  // Destroy handles a destroy event
   138  func (u *uidProcessor) Destroy(ctx context.Context, eventInfo *common.EventInfo) error {
   139  	// Destroy is not used for the UIDMonitor since we will destroy when we get stop
   140  	// This is to try and save some time .Stop/Destroy is two RPC calls.
   141  	// We don't define pause on uid monitor so stop is always followed by destroy
   142  	return nil
   143  }
   144  
   145  // Pause handles a pause event
   146  func (u *uidProcessor) Pause(ctx context.Context, eventInfo *common.EventInfo) error {
   147  
   148  	return u.config.Policy.HandlePUEvent(ctx, eventInfo.PUID, common.EventPause, nil)
   149  }
   150  
   151  // Resync resyncs with all the existing services that were there before we start
   152  func (u *uidProcessor) Resync(ctx context.Context, e *common.EventInfo) error {
   153  
   154  	uids := u.netcls.ListAllCgroups("")
   155  	for _, uid := range uids {
   156  
   157  		if _, ok := ignoreNames[uid]; ok {
   158  			continue
   159  		}
   160  
   161  		processesOfUID := u.netcls.ListAllCgroups(uid)
   162  		activePids := []int32{}
   163  
   164  		for _, pid := range processesOfUID {
   165  			if _, ok := ignoreNames[pid]; ok {
   166  				continue
   167  			}
   168  
   169  			cgroupPath := uid + "/" + pid
   170  			pidlist, _ := u.netcls.ListCgroupProcesses(cgroupPath)
   171  			if len(pidlist) == 0 {
   172  				if err := u.netcls.DeleteCgroup(cgroupPath); err != nil {
   173  					zap.L().Warn("Unable to delete cgroup",
   174  						zap.String("cgroup", cgroupPath),
   175  						zap.Error(err),
   176  					)
   177  				}
   178  				continue
   179  			}
   180  
   181  			iPid, _ := strconv.Atoi(pid)
   182  			activePids = append(activePids, int32(iPid))
   183  		}
   184  
   185  		if len(activePids) == 0 {
   186  			if err := u.netcls.DeleteCgroup(uid); err != nil {
   187  				zap.L().Warn("Unable to delete cgroup",
   188  					zap.String("cgroup", uid),
   189  					zap.Error(err),
   190  				)
   191  			}
   192  			continue
   193  		}
   194  
   195  		event := &common.EventInfo{
   196  			PID:    activePids[0],
   197  			PUID:   uid,
   198  			PUType: common.UIDLoginPU,
   199  		}
   200  
   201  		if err := u.createAndStart(ctx, event, true); err != nil {
   202  			zap.L().Error("Can not synchronize user", zap.String("user", uid))
   203  		}
   204  
   205  		for i := 1; i < len(activePids); i++ {
   206  			event := &common.EventInfo{
   207  				PID:    activePids[i],
   208  				PUID:   uid,
   209  				PUType: common.UIDLoginPU,
   210  			}
   211  			if err := u.createAndStart(ctx, event, true); err != nil {
   212  				zap.L().Error("Can not synchronize user", zap.String("user", uid))
   213  			}
   214  		}
   215  	}
   216  
   217  	return nil
   218  }
   219  
   220  func (u *uidProcessor) createAndStart(ctx context.Context, eventInfo *common.EventInfo, startOnly bool) error {
   221  
   222  	u.Lock()
   223  	defer u.Unlock()
   224  
   225  	if eventInfo.Name == "" {
   226  		eventInfo.Name = eventInfo.PUID
   227  	}
   228  
   229  	puID := eventInfo.PUID
   230  	pids, err := u.putoPidMap.Get(puID)
   231  	var runtimeInfo *policy.PURuntime
   232  	if err != nil {
   233  		runtimeInfo, err = u.metadataExtractor(eventInfo)
   234  		if err != nil {
   235  			return err
   236  		}
   237  
   238  		publishedContextID := puID
   239  		// Setup the run time
   240  		if !startOnly {
   241  			if perr := u.config.Policy.HandlePUEvent(ctx, publishedContextID, common.EventCreate, runtimeInfo); perr != nil {
   242  				zap.L().Error("Failed to create process", zap.Error(perr))
   243  				return perr
   244  			}
   245  		}
   246  
   247  		if perr := u.config.Policy.HandlePUEvent(ctx, publishedContextID, common.EventStart, runtimeInfo); perr != nil {
   248  			zap.L().Error("Failed to start process", zap.Error(perr))
   249  			return perr
   250  		}
   251  
   252  		if err = u.processLinuxServiceStart(puID, eventInfo, runtimeInfo); err != nil {
   253  			zap.L().Error("processLinuxServiceStart", zap.Error(err))
   254  			return err
   255  		}
   256  
   257  		u.config.Collector.CollectContainerEvent(&collector.ContainerRecord{
   258  			ContextID: puID,
   259  			IPAddress: runtimeInfo.IPAddresses(),
   260  			Tags:      runtimeInfo.Tags(),
   261  			Event:     collector.ContainerStart,
   262  		})
   263  
   264  		entry := &puToPidEntry{
   265  			Info:               runtimeInfo,
   266  			publishedContextID: publishedContextID,
   267  			pidlist:            map[int32]bool{},
   268  		}
   269  
   270  		if err := u.putoPidMap.Add(puID, entry); err != nil {
   271  			zap.L().Warn("Failed to add puID/PU in the cache",
   272  				zap.Error(err),
   273  				zap.String("puID", puID),
   274  			)
   275  		}
   276  
   277  		pids = entry
   278  	}
   279  
   280  	pids.(*puToPidEntry).pidlist[eventInfo.PID] = true
   281  	if err := u.pidToPU.Add(eventInfo.PID, eventInfo.PUID); err != nil {
   282  		zap.L().Warn("Failed to add eventInfoPID/eventInfoPUID in the cache",
   283  			zap.Error(err),
   284  			zap.Int32("eventInfo.PID", eventInfo.PID),
   285  			zap.String("eventInfo.PUID", eventInfo.PUID),
   286  		)
   287  	}
   288  
   289  	pidPath := puID + "/" + strconv.Itoa(int(eventInfo.PID))
   290  
   291  	return u.processLinuxServiceStart(pidPath, eventInfo, pids.(*puToPidEntry).Info)
   292  
   293  }
   294  
   295  func (u *uidProcessor) processLinuxServiceStart(pidName string, event *common.EventInfo, runtimeInfo *policy.PURuntime) error {
   296  
   297  	if err := u.netcls.Creategroup(pidName); err != nil {
   298  		zap.L().Error("Failed to create cgroup for the user", zap.String("user", pidName), zap.Error(err))
   299  		return err
   300  	}
   301  
   302  	markval := runtimeInfo.Options().CgroupMark
   303  	if markval == "" {
   304  		if derr := u.netcls.DeleteCgroup(pidName); derr != nil {
   305  			zap.L().Warn("Failed to clean cgroup", zap.Error(derr))
   306  		}
   307  		return errors.New("mark value not found")
   308  	}
   309  
   310  	mark, err := strconv.ParseUint(markval, 10, 32)
   311  	if err != nil {
   312  		return err
   313  	}
   314  
   315  	if err = u.netcls.AssignMark(pidName, mark); err != nil {
   316  		if derr := u.netcls.DeleteCgroup(pidName); derr != nil {
   317  			zap.L().Warn("Failed to clean cgroup", zap.Error(derr))
   318  		}
   319  		return err
   320  	}
   321  
   322  	if err := u.netcls.AddProcess(pidName, int(event.PID)); err != nil {
   323  		if derr := u.netcls.DeleteCgroup(pidName); derr != nil {
   324  			zap.L().Warn("Failed to clean cgroup", zap.Error(derr))
   325  		}
   326  		return err
   327  	}
   328  
   329  	return nil
   330  }