github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/agent/daemon/process/process.go (about) 1 package process 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "errors" 8 "fmt" 9 "os" 10 "strings" 11 "sync" 12 "sync/atomic" 13 "time" 14 15 "github.com/castai/kvisor/pkg/logging" 16 ) 17 18 var ( 19 ErrContainerNotFound = errors.New("container not found") 20 searchKubeletKubepods = []byte("/kubepods") 21 searchDockerPids = []byte("pids:/docker") 22 sep = []byte("/") 23 ) 24 25 type containersCacheValue struct { 26 containerID string 27 ts *atomic.Int64 28 } 29 30 func NewClient(log *logging.Logger, procDir string) *Client { 31 return &Client{ 32 log: log, 33 procDir: procDir, 34 refreshDuration: 2 * time.Minute, 35 cache: map[int]containersCacheValue{}, 36 } 37 } 38 39 type Client struct { 40 log *logging.Logger 41 procDir string 42 43 refreshDuration time.Duration 44 cache map[int]containersCacheValue 45 mu sync.RWMutex 46 } 47 48 func (c *Client) Start(ctx context.Context) { 49 c.runRefreshLoop(ctx) 50 } 51 52 func (c *Client) GetContainerID(pid int) (string, error) { 53 if pid == 0 { 54 return "", ErrContainerNotFound 55 } 56 // Fast path. Check cached 57 cacheKey := pid 58 c.mu.RLock() 59 cachedVal, found := c.cache[cacheKey] 60 c.mu.RUnlock() 61 if found { 62 cachedVal.ts.Store(time.Now().UTC().Unix()) 63 return cachedVal.containerID, nil 64 } 65 66 // Slow path. Read from file system. 67 cid, err := c.getContainerIDFromFile(pid) 68 if err != nil { 69 return "", err 70 } 71 72 // Cache value. 73 c.addContainerToCache(pid, cid) 74 75 return cid, nil 76 } 77 78 func (c *Client) RemoveContainer(pid int) { 79 c.mu.Lock() 80 delete(c.cache, pid) 81 c.mu.Unlock() 82 } 83 84 func (c *Client) getContainerIDFromFile(pid int) (string, error) { 85 cgroupPath := fmt.Sprintf("%s/%d/cgroup", c.procDir, pid) 86 f, err := os.Open(cgroupPath) 87 if err != nil { 88 return "", fmt.Errorf("opening file %s: %w", cgroupPath, ErrContainerNotFound) 89 } 90 defer f.Close() 91 92 scanner := bufio.NewScanner(f) 93 for scanner.Scan() { 94 line := scanner.Bytes() 95 if bytes.Contains(line, searchKubeletKubepods) || bytes.Contains(line, searchDockerPids) { 96 lastSegment := bytes.LastIndex(line, sep) 97 containerID := string(line[lastSegment+1:]) 98 if strings.Contains(containerID, "cri-containerd") { 99 containerID = strings.Replace(containerID, "cri-containerd-", "", 1) 100 containerID = strings.Replace(containerID, ".scope", "", 1) 101 } 102 return containerID, nil 103 } 104 } 105 return "", ErrContainerNotFound 106 } 107 108 func (c *Client) runRefreshLoop(ctx context.Context) { 109 ticker := time.NewTicker(c.refreshDuration) 110 defer ticker.Stop() 111 112 for { 113 select { 114 case <-ctx.Done(): 115 return 116 case <-ticker.C: 117 c.refresh() 118 } 119 } 120 } 121 122 func (c *Client) refresh() { 123 var pidsToRefresh []int 124 var deletedPidsCount int 125 deletedOlderThan := time.Now().UTC().Add(-1 * time.Minute) 126 c.mu.Lock() 127 for pid, val := range c.cache { 128 if time.Unix(val.ts.Load(), 0).Before(deletedOlderThan) { 129 delete(c.cache, pid) 130 deletedPidsCount++ 131 } else { 132 pidsToRefresh = append(pidsToRefresh, pid) 133 } 134 } 135 c.mu.Unlock() 136 137 var changedPidsCount int 138 for _, pid := range pidsToRefresh { 139 cid, err := c.getContainerIDFromFile(pid) 140 if err != nil && !errors.Is(err, ErrContainerNotFound) { 141 c.log.Errorf("refresh: getting container id by pid: %v", err) 142 continue 143 } 144 if cid != "" { 145 c.mu.RLock() 146 cachedVal, found := c.cache[pid] 147 c.mu.RUnlock() 148 149 if found && cachedVal.containerID != cid { 150 c.addContainerToCache(pid, cid) 151 changedPidsCount++ 152 } 153 } 154 } 155 156 c.log.Debugf("process containers refresh done, deleted=%d, changed=%d", deletedPidsCount, changedPidsCount) 157 } 158 159 func (c *Client) addContainerToCache(pid int, cid string) { 160 c.mu.Lock() 161 ts := &atomic.Int64{} 162 ts.Store(time.Now().UTC().Unix()) 163 c.cache[pid] = containersCacheValue{ 164 containerID: cid, 165 ts: ts, 166 } 167 c.mu.Unlock() 168 }