github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/agent/daemon/process/process.go (about)

     1  package process
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"context"
     7  	"errors"
     8  	"fmt"
     9  	"os"
    10  	"strings"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  
    15  	"github.com/castai/kvisor/pkg/logging"
    16  )
    17  
    18  var (
    19  	ErrContainerNotFound  = errors.New("container not found")
    20  	searchKubeletKubepods = []byte("/kubepods")
    21  	searchDockerPids      = []byte("pids:/docker")
    22  	sep                   = []byte("/")
    23  )
    24  
    25  type containersCacheValue struct {
    26  	containerID string
    27  	ts          *atomic.Int64
    28  }
    29  
    30  func NewClient(log *logging.Logger, procDir string) *Client {
    31  	return &Client{
    32  		log:             log,
    33  		procDir:         procDir,
    34  		refreshDuration: 2 * time.Minute,
    35  		cache:           map[int]containersCacheValue{},
    36  	}
    37  }
    38  
    39  type Client struct {
    40  	log     *logging.Logger
    41  	procDir string
    42  
    43  	refreshDuration time.Duration
    44  	cache           map[int]containersCacheValue
    45  	mu              sync.RWMutex
    46  }
    47  
    48  func (c *Client) Start(ctx context.Context) {
    49  	c.runRefreshLoop(ctx)
    50  }
    51  
    52  func (c *Client) GetContainerID(pid int) (string, error) {
    53  	if pid == 0 {
    54  		return "", ErrContainerNotFound
    55  	}
    56  	// Fast path. Check cached
    57  	cacheKey := pid
    58  	c.mu.RLock()
    59  	cachedVal, found := c.cache[cacheKey]
    60  	c.mu.RUnlock()
    61  	if found {
    62  		cachedVal.ts.Store(time.Now().UTC().Unix())
    63  		return cachedVal.containerID, nil
    64  	}
    65  
    66  	// Slow path. Read from file system.
    67  	cid, err := c.getContainerIDFromFile(pid)
    68  	if err != nil {
    69  		return "", err
    70  	}
    71  
    72  	// Cache value.
    73  	c.addContainerToCache(pid, cid)
    74  
    75  	return cid, nil
    76  }
    77  
    78  func (c *Client) RemoveContainer(pid int) {
    79  	c.mu.Lock()
    80  	delete(c.cache, pid)
    81  	c.mu.Unlock()
    82  }
    83  
    84  func (c *Client) getContainerIDFromFile(pid int) (string, error) {
    85  	cgroupPath := fmt.Sprintf("%s/%d/cgroup", c.procDir, pid)
    86  	f, err := os.Open(cgroupPath)
    87  	if err != nil {
    88  		return "", fmt.Errorf("opening file %s: %w", cgroupPath, ErrContainerNotFound)
    89  	}
    90  	defer f.Close()
    91  
    92  	scanner := bufio.NewScanner(f)
    93  	for scanner.Scan() {
    94  		line := scanner.Bytes()
    95  		if bytes.Contains(line, searchKubeletKubepods) || bytes.Contains(line, searchDockerPids) {
    96  			lastSegment := bytes.LastIndex(line, sep)
    97  			containerID := string(line[lastSegment+1:])
    98  			if strings.Contains(containerID, "cri-containerd") {
    99  				containerID = strings.Replace(containerID, "cri-containerd-", "", 1)
   100  				containerID = strings.Replace(containerID, ".scope", "", 1)
   101  			}
   102  			return containerID, nil
   103  		}
   104  	}
   105  	return "", ErrContainerNotFound
   106  }
   107  
   108  func (c *Client) runRefreshLoop(ctx context.Context) {
   109  	ticker := time.NewTicker(c.refreshDuration)
   110  	defer ticker.Stop()
   111  
   112  	for {
   113  		select {
   114  		case <-ctx.Done():
   115  			return
   116  		case <-ticker.C:
   117  			c.refresh()
   118  		}
   119  	}
   120  }
   121  
   122  func (c *Client) refresh() {
   123  	var pidsToRefresh []int
   124  	var deletedPidsCount int
   125  	deletedOlderThan := time.Now().UTC().Add(-1 * time.Minute)
   126  	c.mu.Lock()
   127  	for pid, val := range c.cache {
   128  		if time.Unix(val.ts.Load(), 0).Before(deletedOlderThan) {
   129  			delete(c.cache, pid)
   130  			deletedPidsCount++
   131  		} else {
   132  			pidsToRefresh = append(pidsToRefresh, pid)
   133  		}
   134  	}
   135  	c.mu.Unlock()
   136  
   137  	var changedPidsCount int
   138  	for _, pid := range pidsToRefresh {
   139  		cid, err := c.getContainerIDFromFile(pid)
   140  		if err != nil && !errors.Is(err, ErrContainerNotFound) {
   141  			c.log.Errorf("refresh: getting container id by pid: %v", err)
   142  			continue
   143  		}
   144  		if cid != "" {
   145  			c.mu.RLock()
   146  			cachedVal, found := c.cache[pid]
   147  			c.mu.RUnlock()
   148  
   149  			if found && cachedVal.containerID != cid {
   150  				c.addContainerToCache(pid, cid)
   151  				changedPidsCount++
   152  			}
   153  		}
   154  	}
   155  
   156  	c.log.Debugf("process containers refresh done, deleted=%d, changed=%d", deletedPidsCount, changedPidsCount)
   157  }
   158  
   159  func (c *Client) addContainerToCache(pid int, cid string) {
   160  	c.mu.Lock()
   161  	ts := &atomic.Int64{}
   162  	ts.Store(time.Now().UTC().Unix())
   163  	c.cache[pid] = containersCacheValue{
   164  		containerID: cid,
   165  		ts:          ts,
   166  	}
   167  	c.mu.Unlock()
   168  }