github.com/alibaba/ilogtail/pkg@v0.0.0-20250526110833-c53b480d046c/helper/containercenter/cri_adapter.go (about) 1 // Copyright 2021 iLogtail Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package containercenter 16 17 import ( 18 "context" 19 "encoding/json" 20 "errors" 21 "fmt" 22 "os" 23 "path" 24 "path/filepath" 25 "strings" 26 "sync" 27 "time" 28 29 "github.com/containerd/containerd" 30 containerdcriserver "github.com/containerd/containerd/pkg/cri/server" 31 "github.com/docker/docker/api/types" 32 "github.com/docker/docker/api/types/container" 33 34 "github.com/alibaba/ilogtail/pkg/flags" 35 "github.com/alibaba/ilogtail/pkg/logger" 36 ) 37 38 const ( 39 maxMsgSize = 1024 * 1024 * 16 40 defaultContextTimeout = time.Second * 10 41 ) 42 43 var criRuntimeWrapper *CRIRuntimeWrapper 44 45 // CRIRuntimeWrapper wrapper for containerd client 46 type innerContainerInfo struct { 47 State CriContainerState 48 Pid int 49 Name string 50 Status string 51 } 52 53 type CRIRuntimeWrapper struct { 54 containerCenter *ContainerCenter 55 nativeClient *containerd.Client 56 client *RuntimeServiceClient 57 runtimeInfo CriVersionInfo 58 59 containersLock sync.Mutex 60 61 containers map[string]*innerContainerInfo 62 containerHistory map[string]bool 63 64 stopCh <-chan struct{} 65 66 rootfsLock sync.RWMutex 67 rootfsCache map[string]string 68 listContainerStartTime int64 // in nanosecond 69 } 70 71 func NewCRIRuntimeWrapper(containerCenter *ContainerCenter) (*CRIRuntimeWrapper, error) { 72 if fi, err := os.Stat(containerdUnixSocket); err != nil || fi.IsDir() { 73 return nil, fmt.Errorf("cri runtime endpoint %s is not valid", containerdUnixSocket) 74 } 75 76 client, err := NewRuntimeServiceClient(defaultContextTimeout, maxMsgSize) 77 if err != nil { 78 logger.Errorf(context.Background(), "CONNECT_CRI_RUNTIME_ALARM", "Connect remote cri-runtime failed: %v", err) 79 return nil, err 80 } 81 82 ctx, cancel := getContextWithTimeout(defaultContextTimeout) 83 defer cancel() 84 85 containerResp, err := client.ListContainers(ctx) 86 if err != nil { 87 logger.Errorf(context.Background(), "CONNECT_CRI_RUNTIME_ALARM", "List containers from cri-runtime failed: %v", err) 88 return nil, err 89 } else if len(containerResp.Containers) == 0 { 90 err = errors.New("remote cri-runtime has no container") 91 logger.Errorf(context.Background(), "CONNECT_CRI_RUNTIME_ALARM", "Remote cri-runtime is invalid: %v", err) 92 return nil, err 93 } 94 95 var containerdClient *containerd.Client 96 if *flags.EnableContainerdUpperDirDetect { 97 containerdClient, err = containerd.New(containerdUnixSocket, containerd.WithDefaultNamespace("k8s.io")) 98 if err == nil { 99 _, err = containerdClient.Version(context.Background()) 100 } 101 if err != nil { 102 logger.Warning(context.Background(), "CONTAINERD_CLIENT_ALARM", "Connect containerd failed", err) 103 containerdClient = nil 104 } 105 } 106 107 return &CRIRuntimeWrapper{ 108 containerCenter: containerCenter, 109 client: client, 110 nativeClient: containerdClient, 111 runtimeInfo: client.info, 112 containers: make(map[string]*innerContainerInfo), 113 containerHistory: make(map[string]bool), 114 stopCh: make(<-chan struct{}), 115 rootfsCache: make(map[string]string), 116 listContainerStartTime: time.Now().UnixNano(), 117 }, nil 118 } 119 120 // createContainerInfo convert cri container to docker spec to adapt the history logic. 121 func (cw *CRIRuntimeWrapper) createContainerInfo(containerID string) (detail *DockerInfoDetail, sandboxID string, state CriContainerState, err error) { 122 ctx, cancel := getContextWithTimeout(defaultContextTimeout) 123 status, err := cw.client.ContainerStatus(ctx, containerID, true) 124 cancel() 125 if err != nil { 126 return nil, "", ContainerStateContainerUnknown, err 127 } 128 129 var ci containerdcriserver.ContainerInfo 130 foundInfo := false 131 if statusinfo := status.Info; statusinfo != nil { 132 if info, ok := statusinfo["info"]; ok { 133 foundInfo = true 134 ci, err = parseContainerInfo(info) 135 if err != nil { 136 logger.Errorf(context.Background(), "CREATE_CONTAINERD_INFO_ALARM", "failed to parse container info, containerId: %s, data: %s, error: %v", containerID, info, err) 137 } 138 } 139 } 140 141 if !foundInfo { 142 logger.Warningf(context.Background(), "CREATE_CONTAINERD_INFO_ALARM", "can not find container info from CRI::ContainerStatus, containerId: %s", containerID) 143 return nil, "", ContainerStateContainerUnknown, fmt.Errorf("can not find container info from CRI::ContainerStatus, containerId: %s", containerID) 144 } 145 146 labels := status.Status.Labels 147 if labels == nil { 148 labels = map[string]string{} 149 } 150 151 var image string 152 if status.Status.Image != nil && status.Status.Image.Image != "" { 153 image = status.Status.Image.Image 154 } else { 155 image = status.Status.ImageRef 156 } 157 158 // Judge Container Liveness by Pid 159 state = status.Status.State 160 stateStatus := ContainerStatusExited 161 if state == ContainerStateContainerRunning && ContainerProcessAlive(int(ci.Pid)) { 162 stateStatus = ContainerStatusRunning 163 } 164 dockerContainer := types.ContainerJSON{ 165 ContainerJSONBase: &types.ContainerJSONBase{ 166 ID: containerID, 167 Created: time.Unix(0, status.Status.CreatedAt).Format(time.RFC3339Nano), 168 LogPath: status.Status.LogPath, 169 State: &types.ContainerState{ 170 Status: stateStatus, 171 Pid: int(ci.Pid), 172 }, 173 HostConfig: &container.HostConfig{ 174 VolumeDriver: ci.Snapshotter, 175 Runtime: cw.runtimeInfo.RuntimeName, 176 LogConfig: container.LogConfig{ 177 Type: "json-file", 178 }, 179 }, 180 }, 181 Config: &container.Config{ 182 Labels: labels, 183 Image: image, 184 }, 185 } 186 187 if status.Status.Metadata != nil { 188 dockerContainer.Name = status.Status.Metadata.Name 189 } 190 191 if ci.RuntimeSpec != nil && ci.RuntimeSpec.Process != nil { 192 dockerContainer.Config.Env = ci.RuntimeSpec.Process.Env 193 } else { 194 var envs []string 195 for _, kv := range ci.Config.Envs { 196 envs = append(envs, kv.Key+"="+kv.Value) 197 } 198 dockerContainer.Config.Env = envs 199 } 200 201 var hostsPath string 202 var hostnamePath string 203 if ci.RuntimeSpec != nil { 204 for _, mount := range ci.RuntimeSpec.Mounts { 205 if mount.Destination == "/etc/hosts" { 206 hostsPath = mount.Source 207 } 208 if mount.Destination == "/etc/hostname" { 209 hostnamePath = mount.Source 210 } 211 dockerContainer.Mounts = append(dockerContainer.Mounts, types.MountPoint{ 212 Source: filepath.Clean(mount.Source), 213 Destination: filepath.Clean(mount.Destination), 214 Driver: mount.Type, 215 }) 216 } 217 } 218 if ci.Snapshotter != "" && ci.SnapshotKey != "" { 219 uppDir := cw.getContainerUpperDir(ci.SnapshotKey, ci.Snapshotter) 220 if uppDir != "" { 221 dockerContainer.GraphDriver.Data = map[string]string{ 222 "UpperDir": uppDir, 223 } 224 } 225 } 226 if len(hostnamePath) > 0 { 227 hn, _ := os.ReadFile(GetMountedFilePath(hostnamePath)) 228 dockerContainer.Config.Hostname = strings.Trim(string(hn), "\t \n") 229 } 230 dockerContainer.HostnamePath = hostnamePath 231 dockerContainer.HostsPath = hostsPath 232 233 return cw.containerCenter.CreateInfoDetail(dockerContainer, envConfigPrefix, false), ci.SandboxID, state, nil 234 } 235 236 func (cw *CRIRuntimeWrapper) fetchAll() error { 237 // fetchAll and syncContainers must be isolated 238 // if one procedure read container list then locked out 239 // when it resumes, it may process on a staled list and make wrong decisions 240 cw.containersLock.Lock() 241 defer cw.containersLock.Unlock() 242 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 243 defer cancel() 244 containersResp, err := cw.client.ListContainers(ctx) 245 if err != nil { 246 return err 247 } 248 sandboxResp, err := cw.client.ListPodSandbox(ctx) 249 if err != nil { 250 return err 251 } 252 sandboxMap := make(map[string]*CriPodSandbox, len(sandboxResp.Items)) 253 for _, item := range sandboxResp.Items { 254 sandboxMap[item.ID] = item 255 } 256 257 allContainerMap := make(map[string]bool) // all listable containers 258 runningMap := make(map[string]bool) // status running 259 containerMap := make(map[string]*DockerInfoDetail) // pid exists 260 for i, c := range containersResp.Containers { 261 logger.Debugf(context.Background(), "CRIRuntime ListContainers [%v]: %+v", i, c) 262 allContainerMap[c.ID] = true 263 switch c.State { 264 case ContainerStateContainerRunning: 265 runningMap[c.ID] = true 266 case ContainerStateContainerExited: 267 runningMap[c.ID] = false 268 default: 269 continue 270 } 271 272 dockerContainer, _, _, err := cw.createContainerInfo(c.ID) 273 if err != nil { 274 logger.Errorf(context.Background(), "CREATE_CONTAINERD_INFO_ALARM", "Create container info from cri-runtime error, Container Info: %+v, err: %v", c, err) 275 continue 276 } 277 if dockerContainer == nil || dockerContainer.ContainerInfo.ContainerJSONBase == nil { 278 logger.Error(context.Background(), "CREATE_CONTAINERD_INFO_ALARM", "Create container info from cri-runtime error, Container Info:%+v", c) 279 continue 280 } 281 if dockerContainer.Status() != ContainerStatusRunning { 282 continue 283 } 284 cw.containers[c.ID] = &innerContainerInfo{ 285 State: c.State, 286 Pid: dockerContainer.ContainerInfo.State.Pid, 287 Name: dockerContainer.ContainerInfo.Name, 288 Status: dockerContainer.Status(), 289 } 290 cw.containerHistory[c.ID] = true 291 containerMap[c.ID] = dockerContainer 292 293 // append the pod labels to the k8s info. 294 if sandbox, ok := sandboxMap[c.PodSandboxID]; ok { 295 cw.wrapperK8sInfoByLabels(sandbox.Labels, dockerContainer) 296 } 297 logger.Debugf(context.Background(), "Create container info, id:%v\tname:%v\tcreated:%v\tstatus:%v\tdetail:%+v", 298 dockerContainer.IDPrefix(), c.Metadata.Name, dockerContainer.ContainerInfo.Created, dockerContainer.Status(), c) 299 } 300 cw.containerCenter.updateContainers(containerMap) 301 302 // delete not running containers 303 for k := range cw.containers { 304 if running, ok := runningMap[k]; !ok || !running { 305 cw.containerCenter.markRemove(k) 306 delete(cw.containers, k) 307 } 308 } 309 310 // delete obsolete history 311 for k := range cw.containerHistory { 312 if _, ok := allContainerMap[k]; !ok { 313 delete(cw.containerHistory, k) 314 } 315 } 316 317 return nil 318 } 319 320 func (cw *CRIRuntimeWrapper) loopSyncContainers() { 321 ticker := time.NewTicker(DefaultSyncContainersPeriod) 322 for { 323 select { 324 case <-cw.stopCh: 325 cw.client.Close() 326 ticker.Stop() 327 return 328 case <-ticker.C: 329 if err := cw.syncContainers(); err != nil { 330 logger.Errorf(context.Background(), "SYNC_CONTAINERD_ALARM", "syncContainers error: %v", err) 331 } 332 } 333 } 334 } 335 336 func (cw *CRIRuntimeWrapper) syncContainers() error { 337 cw.containersLock.Lock() 338 defer cw.containersLock.Unlock() 339 ctx, cancel := getContextWithTimeout(time.Second * 20) 340 defer cancel() 341 logger.Debug(context.Background(), "cri sync containers", "begin") 342 containersResp, err := cw.client.ListContainers(ctx) 343 if err != nil { 344 return err 345 } 346 347 newContainers := map[string]*CriContainer{} 348 for i, c := range containersResp.Containers { 349 // https://github.com/containerd/containerd/blob/main/pkg/cri/store/container/status.go 350 // We only care RUNNING and EXITED 351 // This is only an early prune, accurate status must be detected by ContainerProcessAlive 352 if c.State != ContainerStateContainerRunning && 353 (c.State != ContainerStateContainerExited || c.CreatedAt < cw.listContainerStartTime) { 354 continue 355 } 356 id := containersResp.Containers[i].ID 357 newContainers[id] = containersResp.Containers[i] 358 oldInfo, ok := cw.containers[id] 359 _, inHistory := cw.containerHistory[id] 360 if ok { 361 status := ContainerStatusExited 362 if oldInfo.Status == ContainerStatusRunning && ContainerProcessAlive(oldInfo.Pid) { 363 status = ContainerStatusRunning 364 } 365 if oldInfo.State != ContainerStateContainerRunning || // not running 366 (oldInfo.State == c.State && oldInfo.Name == c.Metadata.Name && oldInfo.Status == status) { // no state change 367 continue 368 } 369 } else if inHistory { 370 continue 371 } 372 if err := cw.fetchOne(id); err != nil { 373 logger.Errorf(context.Background(), "CREATE_CONTAINERD_INFO_ALARM", "failed to createContainerInfo, containerId: %s, error: %v", id, err) 374 } 375 } 376 377 // delete container 378 for oldID, c := range cw.containers { 379 if _, ok := newContainers[oldID]; !ok || c.State == ContainerStateContainerExited { 380 logger.Debug(context.Background(), "cri sync containers remove", oldID) 381 cw.containerCenter.markRemove(oldID) 382 delete(cw.containers, oldID) 383 } 384 } 385 logger.Debug(context.Background(), "cri sync containers", "done") 386 return nil 387 } 388 389 func (cw *CRIRuntimeWrapper) fetchOne(containerID string) error { 390 logger.Debug(context.Background(), "trigger fetchOne") 391 dockerContainer, sandboxID, status, err := cw.createContainerInfo(containerID) 392 if err != nil { 393 return err 394 } 395 396 cw.wrapperK8sInfoByID(sandboxID, dockerContainer) 397 398 if logger.DebugFlag() { 399 // bytes, _ := json.Marshal(dockerContainer) 400 // logger.Debugf(context.Background(), "Create container info: %s", string(bytes)) 401 logger.Debugf(context.Background(), "Create container info, id:%v\tname:%v\tcreated:%v\tstatus:%v\tdetail=%+v", 402 dockerContainer.IDPrefix(), dockerContainer.ContainerInfo.Name, dockerContainer.ContainerInfo.Created, dockerContainer.Status(), dockerContainer.ContainerInfo) 403 } 404 405 // cri场景下会拼接好k8s信息,然后再单个updateContainer 406 cw.containerCenter.updateContainer(containerID, dockerContainer) 407 cw.containerHistory[containerID] = true 408 cw.containers[containerID] = &innerContainerInfo{ 409 status, 410 dockerContainer.ContainerInfo.State.Pid, 411 dockerContainer.ContainerInfo.Name, 412 dockerContainer.Status(), 413 } 414 return nil 415 } 416 417 func (cw *CRIRuntimeWrapper) wrapperK8sInfoByID(sandboxID string, detail *DockerInfoDetail) { 418 ctx, cancel := getContextWithTimeout(defaultContextTimeout) 419 status, err := cw.client.PodSandboxStatus(ctx, sandboxID, true) 420 cancel() 421 if err != nil { 422 logger.Debug(context.Background(), "fetchone cannot read k8s info from sandbox, sandboxID", sandboxID) 423 return 424 } 425 cw.wrapperK8sInfoByLabels(status.Status.Labels, detail) 426 } 427 428 func (cw *CRIRuntimeWrapper) wrapperK8sInfoByLabels(sandboxLabels map[string]string, detail *DockerInfoDetail) { 429 if detail.K8SInfo == nil || sandboxLabels == nil { 430 return 431 } 432 if detail.K8SInfo.Labels == nil { 433 detail.K8SInfo.Labels = make(map[string]string) 434 } 435 for k, v := range sandboxLabels { 436 if strings.HasPrefix(k, k8sInnerLabelPrefix) || strings.HasPrefix(k, k8sInnerAnnotationPrefix) { 437 continue 438 } 439 detail.K8SInfo.Labels[k] = v 440 } 441 } 442 443 func (cw *CRIRuntimeWrapper) sweepCache() { 444 // clear unuseful cache 445 usedCacheItem := make(map[string]bool) 446 cw.containerCenter.lock.RLock() 447 for key := range cw.containerCenter.containerMap { 448 usedCacheItem[key] = true 449 } 450 cw.containerCenter.lock.RUnlock() 451 452 cw.rootfsLock.Lock() 453 for key := range cw.rootfsCache { 454 if _, ok := usedCacheItem[key]; !ok { 455 delete(cw.rootfsCache, key) 456 } 457 } 458 cw.rootfsLock.Unlock() 459 } 460 461 func getContextWithTimeout(timeout time.Duration) (context.Context, context.CancelFunc) { 462 return context.WithTimeout(context.Background(), timeout) 463 } 464 465 func parseContainerInfo(data string) (containerdcriserver.ContainerInfo, error) { 466 var ci containerdcriserver.ContainerInfo 467 err := json.Unmarshal([]byte(data), &ci) 468 return ci, err 469 } 470 471 func (cw *CRIRuntimeWrapper) lookupRootfsCache(containerID string) (string, bool) { 472 cw.rootfsLock.RLock() 473 defer cw.rootfsLock.RUnlock() 474 dir, ok := cw.rootfsCache[containerID] 475 return dir, ok 476 } 477 478 func (cw *CRIRuntimeWrapper) lookupContainerRootfsAbsDir(info types.ContainerJSON) string { 479 // For cri-runtime 480 containerID := info.ID 481 if dir, ok := cw.lookupRootfsCache(containerID); ok { 482 return dir 483 } 484 485 // Example: /run/containerd/io.containerd.runtime.v1.linux/k8s.io/{ContainerID}/rootfs/ 486 487 var aDirs []string 488 customStateDir := os.Getenv("CONTAINERD_STATE_DIR") 489 if len(customStateDir) > 0 { 490 // /etc/containerd/config.toml 491 // state = "/home/containerd" 492 // Example /home/containerd/io.containerd.runtime.v2.task/k8s.io/{ContainerID}/rootfs 493 aDirs = []string{ 494 customStateDir, 495 "/run/containerd", 496 "/var/run/containerd", 497 } 498 } else { 499 aDirs = []string{ 500 "/run/containerd", 501 "/var/run/containerd", 502 } 503 } 504 505 bDirs := []string{ 506 "io.containerd.runtime.v2.task", 507 "io.containerd.runtime.v1.linux", 508 "runc", 509 } 510 511 cDirs := []string{ 512 "k8s.io", 513 "", 514 } 515 516 dDirs := []string{ 517 "rootfs", 518 "root", 519 } 520 521 for _, a := range aDirs { 522 for _, c := range cDirs { 523 for _, d := range dDirs { 524 for _, b := range bDirs { 525 dir := path.Join(a, b, c, info.ID, d) 526 if fi, err := os.Stat(dir); err == nil && fi.IsDir() { 527 cw.rootfsLock.Lock() 528 cw.rootfsCache[containerID] = dir 529 cw.rootfsLock.Unlock() 530 return dir 531 } 532 } 533 } 534 } 535 } 536 537 return "" 538 } 539 540 func (cw *CRIRuntimeWrapper) getContainerUpperDir(containerid, snapshotter string) string { 541 // For Containerd 542 543 if cw.nativeClient == nil { 544 return "" 545 } 546 547 if dir, ok := cw.lookupRootfsCache(containerid); ok { 548 return dir 549 } 550 551 si := cw.nativeClient.SnapshotService(snapshotter) 552 mounts, err := si.Mounts(context.Background(), containerid) 553 if err != nil { 554 logger.Warning(context.Background(), "CONTAINERD_CLIENT_ALARM", "cannot get snapshot info, containerID", containerid, "errInfo", err) 555 return "" 556 } 557 for _, m := range mounts { 558 if len(m.Options) != 0 { 559 for _, i := range m.Options { 560 s := strings.Split(i, "=") 561 if s[0] == "upperdir" { 562 cw.rootfsLock.Lock() 563 cw.rootfsCache[containerid] = s[1] 564 cw.rootfsLock.Unlock() 565 return s[1] 566 } 567 continue 568 } 569 } 570 } 571 return "" 572 } 573 574 func init() { 575 containerdSockPathStr := os.Getenv("CONTAINERD_SOCK_PATH") 576 if len(containerdSockPathStr) > 0 { 577 containerdUnixSocket = containerdSockPathStr 578 } 579 }