github.com/kubewharf/katalyst-core@v0.5.3/pkg/metaserver/external/cgroupid/manager_linux.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2022 The Katalyst Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package cgroupid
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"sync"
    26  	"syscall"
    27  	"time"
    28  
    29  	"golang.org/x/sys/unix"
    30  	v1 "k8s.io/api/core/v1"
    31  	"k8s.io/apimachinery/pkg/util/sets"
    32  	"k8s.io/apimachinery/pkg/util/wait"
    33  	"k8s.io/klog/v2"
    34  
    35  	"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod"
    36  	"github.com/kubewharf/katalyst-core/pkg/util/cgroup/common"
    37  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    38  )
    39  
    40  const (
    41  	maxResidualTime = 5 * time.Minute
    42  )
    43  
    44  var (
    45  	initManagerOnce sync.Once
    46  	cgIDManager     *cgroupIDManagerImpl
    47  )
    48  
    49  type (
    50  	ContainerCache map[string]uint64         // Keyed by container id
    51  	PodCache       map[string]ContainerCache // Keyed by pod UID
    52  )
    53  
    54  type cgroupIDManagerImpl struct {
    55  	sync.RWMutex
    56  	pod.PodFetcher
    57  
    58  	reconcilePeriod  time.Duration
    59  	podCgroupIDCache PodCache
    60  	residualHitMap   map[string]int64
    61  }
    62  
    63  // NewCgroupIDManager returns a CgroupIDManager
    64  func NewCgroupIDManager(podFetcher pod.PodFetcher) CgroupIDManager {
    65  	initManagerOnce.Do(func() {
    66  		cgIDManager = &cgroupIDManagerImpl{
    67  			PodFetcher:       podFetcher,
    68  			podCgroupIDCache: make(PodCache),
    69  			reconcilePeriod:  5 * time.Second,
    70  			residualHitMap:   make(map[string]int64),
    71  		}
    72  	})
    73  
    74  	return cgIDManager
    75  }
    76  
    77  // Run starts a cgroupIDManagerImpl
    78  func (m *cgroupIDManagerImpl) Run(ctx context.Context) {
    79  	wait.UntilWithContext(ctx, m.reconcileCgroupIDMap, m.reconcilePeriod)
    80  }
    81  
    82  // GetCgroupIDForContainer returns the cgroup id of a given container.
    83  func (m *cgroupIDManagerImpl) GetCgroupIDForContainer(podUID, containerID string) (uint64, error) {
    84  	if cgroupID, found := m.getCgroupIDFromCache(podUID, containerID); found {
    85  		return cgroupID, nil
    86  	}
    87  
    88  	cgroupID, err := m.getCgroupIDFromSystem(podUID, containerID)
    89  	if err != nil {
    90  		return 0, fmt.Errorf("getCgroupIDFromSystem failed, err: %v", err)
    91  	}
    92  
    93  	m.setCgroupID(podUID, containerID, cgroupID)
    94  
    95  	return cgroupID, nil
    96  }
    97  
    98  // ListCgroupIDsForPod returns the cgroup ids of a given pod.
    99  func (m *cgroupIDManagerImpl) ListCgroupIDsForPod(podUID string) ([]uint64, error) {
   100  	m.RLock()
   101  	defer m.RUnlock()
   102  
   103  	containerCgroupIDMap, ok := m.podCgroupIDCache[podUID]
   104  	if !ok {
   105  		return nil, general.ErrNotFound
   106  	}
   107  
   108  	var cgIDList []uint64
   109  	for _, cgID := range containerCgroupIDMap {
   110  		cgIDList = append(cgIDList, cgID)
   111  	}
   112  
   113  	return cgIDList, nil
   114  }
   115  
   116  func (m *cgroupIDManagerImpl) reconcileCgroupIDMap(ctx context.Context) {
   117  	podList, err := m.GetPodList(ctx, nil)
   118  	if err != nil {
   119  		klog.Errorf("[cgroupIDManagerImpl.reconcileCgroupIDMap] get pod list failed, err: %v", err)
   120  		return
   121  	}
   122  
   123  	m.clearResidualPodsInCache(podList)
   124  	m.addAbsentCgroupIDsToCache(m.getAbsentContainers(podList))
   125  }
   126  
   127  // addAbsentCgroupIDsToCache adds absent cgroup ids to cache.
   128  func (m *cgroupIDManagerImpl) addAbsentCgroupIDsToCache(absentContainers map[string]sets.String) {
   129  	klog.V(4).Infof("[cgroupIDManagerImpl] exec addAbsentCgroupIDsToCache")
   130  
   131  	for podUID, absentContainerSet := range absentContainers {
   132  		for {
   133  			containerID, found := absentContainerSet.PopAny()
   134  			if !found {
   135  				break
   136  			}
   137  
   138  			cgID, err := m.getCgroupIDFromSystem(podUID, containerID)
   139  			if err != nil {
   140  				klog.Errorf("[cgroupIDManagerImpl.addAbsentCgroupIDsToCache] get cgroup id failed, pod: %s, container: %s, err: %v",
   141  					podUID, containerID, err)
   142  				continue
   143  			}
   144  
   145  			klog.Infof("[cgroupIDManagerImpl.addAbsentCgroupIDsToCache] add absent cgroup id to cache, "+
   146  				"pod: %s, container: %s, cgroup id: %d", podUID, containerID, cgID)
   147  			m.setCgroupID(podUID, containerID, cgID)
   148  		}
   149  	}
   150  }
   151  
   152  func (m *cgroupIDManagerImpl) getAbsentContainers(podList []*v1.Pod) map[string]sets.String {
   153  	absentContainersMap := make(map[string]sets.String)
   154  
   155  	m.RLock()
   156  	defer m.RUnlock()
   157  
   158  	for _, pod := range podList {
   159  		podUID := string(pod.UID)
   160  		containerCache, ok := m.podCgroupIDCache[podUID]
   161  		if !ok {
   162  			containerCache = make(ContainerCache)
   163  		}
   164  		for _, container := range pod.Spec.Containers {
   165  			containerId, err := m.GetContainerID(podUID, container.Name)
   166  			if err != nil {
   167  				klog.Errorf("[cgroupIDManagerImpl.addNewCgroupIDsToCache] get container id failed, pod: %s, container: %s, err: %v",
   168  					podUID, container.Name, err)
   169  				continue
   170  			}
   171  			if _, ok := containerCache[containerId]; !ok {
   172  				if _, ok := absentContainersMap[podUID]; !ok {
   173  					absentContainersMap[podUID] = sets.NewString()
   174  				}
   175  				absentContainersMap[podUID].Insert(containerId)
   176  			}
   177  		}
   178  	}
   179  
   180  	return absentContainersMap
   181  }
   182  
   183  // clearResidualPodsInCache cleans residual pods in podCgroupIDCache.
   184  func (m *cgroupIDManagerImpl) clearResidualPodsInCache(podList []*v1.Pod) {
   185  	klog.V(4).Infof("[cgroupIDManagerImpl] exec clearResidualPodsInCache")
   186  	residualSet := make(map[string]bool)
   187  
   188  	podSet := sets.NewString()
   189  	for _, pod := range podList {
   190  		podSet.Insert(fmt.Sprintf("%v", pod.UID))
   191  	}
   192  
   193  	m.Lock()
   194  	defer m.Unlock()
   195  
   196  	for podUID := range m.podCgroupIDCache {
   197  		if !podSet.Has(podUID) && !residualSet[podUID] {
   198  			residualSet[podUID] = true
   199  			m.residualHitMap[podUID] += 1
   200  			klog.V(4).Infof("[cgroupIDManagerImpl.clearResidualPodsInCache] found pod: %s with cache but doesn't show up in pod watcher, hit count: %d", podUID, m.residualHitMap[podUID])
   201  		}
   202  	}
   203  
   204  	podsToDelete := sets.NewString()
   205  	for podUID, hitCount := range m.residualHitMap {
   206  		if !residualSet[podUID] {
   207  			klog.V(4).Infof("[cgroupIDManagerImpl.clearResidualPodsInCache] already found pod: %s in pod watcher or its cache is cleared, delete it from residualHitMap", podUID)
   208  			delete(m.residualHitMap, podUID)
   209  			continue
   210  		}
   211  
   212  		if time.Duration(hitCount)*m.reconcilePeriod >= maxResidualTime {
   213  			podsToDelete.Insert(podUID)
   214  		}
   215  	}
   216  
   217  	if podsToDelete.Len() > 0 {
   218  		for {
   219  			podUID, found := podsToDelete.PopAny()
   220  			if !found {
   221  				break
   222  			}
   223  
   224  			klog.Infof("[cgroupIDManagerImpl.clearResidualPodsInCache] clear residual pod: %s in cache", podUID)
   225  			delete(m.podCgroupIDCache, podUID)
   226  		}
   227  	}
   228  }
   229  
   230  func (m *cgroupIDManagerImpl) getCgroupIDFromCache(podUID, containerID string) (uint64, bool) {
   231  	m.RLock()
   232  	defer m.RUnlock()
   233  
   234  	containerCache, ok := m.podCgroupIDCache[podUID]
   235  	if !ok {
   236  		return 0, false
   237  	}
   238  	cgroupID, ok := containerCache[containerID]
   239  	if !ok {
   240  		return 0, false
   241  	}
   242  
   243  	return cgroupID, true
   244  }
   245  
   246  func (m *cgroupIDManagerImpl) getCgroupIDFromSystem(podUID, containerID string) (uint64, error) {
   247  	containerAbsCGPath, err := common.GetContainerAbsCgroupPath("", podUID, containerID)
   248  	if err != nil {
   249  		return 0, fmt.Errorf("GetContainerAbsCgroupPath failed, err: %v", err)
   250  	}
   251  
   252  	cgID, err := cgroupPathToID(containerAbsCGPath)
   253  	if err != nil {
   254  		return 0, fmt.Errorf("cgroupPathToID failed, err: %v", err)
   255  	}
   256  
   257  	return cgID, nil
   258  }
   259  
   260  func (m *cgroupIDManagerImpl) setCgroupID(podUID, containerID string, cgroupID uint64) {
   261  	m.Lock()
   262  	defer m.Unlock()
   263  
   264  	_, ok := m.podCgroupIDCache[podUID]
   265  	if !ok {
   266  		m.podCgroupIDCache[podUID] = make(ContainerCache)
   267  	}
   268  
   269  	m.podCgroupIDCache[podUID][containerID] = cgroupID
   270  }
   271  
   272  func cgroupPathToID(cgPath string) (uint64, error) {
   273  	var fstat syscall.Statfs_t
   274  	err := syscall.Statfs(cgPath, &fstat)
   275  	if err != nil {
   276  		return 0, fmt.Errorf("get file fstat failed, cgPath: %s, err: %v", cgPath, err)
   277  	}
   278  	if fstat.Type != unix.CGROUP2_SUPER_MAGIC && fstat.Type != unix.CGROUP_SUPER_MAGIC {
   279  		return 0, fmt.Errorf("get file fstat failed, cgPath: %s, invalid file type: %v", cgPath, fstat.Type)
   280  	}
   281  
   282  	handle, _, err := unix.NameToHandleAt(unix.AT_FDCWD, cgPath, 0)
   283  	if err != nil {
   284  		return 0, fmt.Errorf("call name_to_handle_at failed, cgPath: %s, err: %v", cgPath, err)
   285  	}
   286  	if handle.Size() != 8 {
   287  		return 0, fmt.Errorf("call name_to_handle_at failed, cgPath: %s, invalid size: %v", cgPath, handle.Size())
   288  	}
   289  
   290  	return general.NativeEndian.Uint64(handle.Bytes()), nil
   291  }