github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/monitor/internal/k8s/runtime_cache.go (about)

     1  package k8smonitor
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  	"time"
     7  
     8  	"go.aporeto.io/enforcerd/trireme-lib/policy"
     9  	"go.uber.org/zap"
    10  )
    11  
    12  var (
    13  	// defaultLoopWait defines how often we loop over the entries to discover dead containers
    14  	defaultLoopWait = time.Second * 5
    15  )
    16  
    17  type runtimeCacheInterface interface {
    18  	Delete(sandboxID string)
    19  	Get(sandboxID string) policy.RuntimeReader
    20  	Set(sandboxID string, runtime policy.RuntimeReader) error
    21  }
    22  
    23  var _ runtimeCacheInterface = &runtimeCache{}
    24  
    25  type runtimeCache struct {
    26  	sync.RWMutex
    27  	runtimes  map[string]runtimeCacheEntry
    28  	loopWait  time.Duration
    29  	stopEvent stopEventFunc
    30  }
    31  
    32  type runtimeCacheEntry struct {
    33  	runtime policy.RuntimeReader
    34  	running bool
    35  }
    36  
    37  func newRuntimeCache(ctx context.Context, stopEvent stopEventFunc) *runtimeCache {
    38  	c := &runtimeCache{
    39  		runtimes:  make(map[string]runtimeCacheEntry),
    40  		loopWait:  defaultLoopWait,
    41  		stopEvent: stopEvent,
    42  	}
    43  	if c.loopWait > 0 {
    44  		go c.loop(ctx)
    45  	}
    46  	return c
    47  }
    48  
    49  func makeSnapshot(m map[string]runtimeCacheEntry) map[string]policy.RuntimeReader {
    50  	snap := make(map[string]policy.RuntimeReader, len(m))
    51  	for k, v := range m {
    52  		if v.running {
    53  			snap[k] = v.runtime
    54  		}
    55  	}
    56  	return snap
    57  }
    58  
    59  // loop is very awkward: it implements a runtime poller that checks if all runtimes
    60  // are actually still running. If not, it sends a stop event.
    61  // NOTE: this must be deprecated once we have hooked into the OCI runtime hooks in the bundle!
    62  func (c *runtimeCache) loop(ctx context.Context) {
    63  loop:
    64  	for {
    65  		select {
    66  		case <-ctx.Done():
    67  			break loop
    68  		case <-time.After(c.loopWait):
    69  			c.RLock()
    70  			if len(c.runtimes) > 0 {
    71  				// take a snapshot
    72  				snap := makeSnapshot(c.runtimes)
    73  				c.RUnlock()
    74  				// and process them
    75  				c.processRuntimes(ctx, snap)
    76  			} else {
    77  				c.RUnlock()
    78  			}
    79  		}
    80  	}
    81  }
    82  
    83  // processRuntimes takes a snapshot of the runtimeCache, checks if the process is running, and sends a stop event if not
    84  func (c *runtimeCache) processRuntimes(ctx context.Context, snap map[string]policy.RuntimeReader) {
    85  	for id, runtime := range snap {
    86  		pid := runtime.Pid()
    87  		if pid > 0 {
    88  			if running, err := sandboxIsRunning(pid); !running {
    89  				// if there has been error checking, just continue
    90  				if err != nil {
    91  					zap.L().Error("K8sMonitor: runtime poller: failed to check if sandbox is still running",
    92  						zap.String("sandboxID", id),
    93  						zap.Int("sandboxPid", pid),
    94  						zap.Error(err),
    95  					)
    96  					continue
    97  				}
    98  				zap.L().Debug("K8sMonitor: runtime poller: sandbox container must have stopped", zap.String("sandboxID", id), zap.Int("sandboxPid", pid), zap.Error(err))
    99  
   100  				// update the entry
   101  				c.Lock()
   102  				if _, ok := c.runtimes[id]; ok {
   103  					c.runtimes[id] = runtimeCacheEntry{
   104  						runtime: runtime,
   105  						running: false,
   106  					}
   107  				}
   108  				c.Unlock()
   109  
   110  				// fire away a stop event to the policy engine
   111  				// log an error as every caller should do, but continue normally
   112  				// there is nothing we can do about the error
   113  				go func(ctx context.Context, sandboxID string) {
   114  					if err := c.stopEvent(ctx, sandboxID); err != nil {
   115  						zap.L().Error("K8sMonitor: runtime poller: failed to send stop event to policy engine", zap.String("sandboxID", sandboxID), zap.Error(err))
   116  					}
   117  				}(ctx, id)
   118  			}
   119  		}
   120  	}
   121  }
   122  
   123  func (c *runtimeCache) Get(sandboxID string) policy.RuntimeReader {
   124  	if c == nil {
   125  		return nil
   126  	}
   127  	c.RLock()
   128  	defer c.RUnlock()
   129  	if c.runtimes == nil {
   130  		return nil
   131  	}
   132  	r, ok := c.runtimes[sandboxID]
   133  	if !ok {
   134  		return nil
   135  	}
   136  	// TODO: should return a clone, not a pointer
   137  	return r.runtime
   138  }
   139  
   140  func (c *runtimeCache) Set(sandboxID string, runtime policy.RuntimeReader) error {
   141  	if c == nil {
   142  		return errCacheUninitialized
   143  	}
   144  	if sandboxID == "" {
   145  		return errSandboxEmpty
   146  	}
   147  	if runtime == nil {
   148  		return errRuntimeNil
   149  	}
   150  	c.Lock()
   151  	defer c.Unlock()
   152  	if c.runtimes == nil {
   153  		return errCacheUninitialized
   154  	}
   155  	c.runtimes[sandboxID] = runtimeCacheEntry{
   156  		runtime: runtime,
   157  		running: true,
   158  	}
   159  	return nil
   160  }
   161  
   162  func (c *runtimeCache) Delete(sandboxID string) {
   163  	if c == nil {
   164  		return
   165  	}
   166  	c.Lock()
   167  	defer c.Unlock()
   168  	delete(c.runtimes, sandboxID)
   169  }