github.com/kubewharf/katalyst-core@v0.5.3/pkg/metaserver/external/cgroupid/manager_linux.go (about) 1 //go:build linux 2 // +build linux 3 4 /* 5 Copyright 2022 The Katalyst Authors. 6 7 Licensed under the Apache License, Version 2.0 (the "License"); 8 you may not use this file except in compliance with the License. 9 You may obtain a copy of the License at 10 11 http://www.apache.org/licenses/LICENSE-2.0 12 13 Unless required by applicable law or agreed to in writing, software 14 distributed under the License is distributed on an "AS IS" BASIS, 15 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 See the License for the specific language governing permissions and 17 limitations under the License. 18 */ 19 20 package cgroupid 21 22 import ( 23 "context" 24 "fmt" 25 "sync" 26 "syscall" 27 "time" 28 29 "golang.org/x/sys/unix" 30 v1 "k8s.io/api/core/v1" 31 "k8s.io/apimachinery/pkg/util/sets" 32 "k8s.io/apimachinery/pkg/util/wait" 33 "k8s.io/klog/v2" 34 35 "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" 36 "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" 37 "github.com/kubewharf/katalyst-core/pkg/util/general" 38 ) 39 40 const ( 41 maxResidualTime = 5 * time.Minute 42 ) 43 44 var ( 45 initManagerOnce sync.Once 46 cgIDManager *cgroupIDManagerImpl 47 ) 48 49 type ( 50 ContainerCache map[string]uint64 // Keyed by container id 51 PodCache map[string]ContainerCache // Keyed by pod UID 52 ) 53 54 type cgroupIDManagerImpl struct { 55 sync.RWMutex 56 pod.PodFetcher 57 58 reconcilePeriod time.Duration 59 podCgroupIDCache PodCache 60 residualHitMap map[string]int64 61 } 62 63 // NewCgroupIDManager returns a CgroupIDManager 64 func NewCgroupIDManager(podFetcher pod.PodFetcher) CgroupIDManager { 65 initManagerOnce.Do(func() { 66 cgIDManager = &cgroupIDManagerImpl{ 67 PodFetcher: podFetcher, 68 podCgroupIDCache: make(PodCache), 69 reconcilePeriod: 5 * time.Second, 70 residualHitMap: make(map[string]int64), 71 } 72 }) 73 74 return cgIDManager 75 } 76 77 // Run starts a cgroupIDManagerImpl 78 func (m *cgroupIDManagerImpl) Run(ctx context.Context) { 79 wait.UntilWithContext(ctx, m.reconcileCgroupIDMap, m.reconcilePeriod) 80 } 81 82 // GetCgroupIDForContainer returns the cgroup id of a given container. 83 func (m *cgroupIDManagerImpl) GetCgroupIDForContainer(podUID, containerID string) (uint64, error) { 84 if cgroupID, found := m.getCgroupIDFromCache(podUID, containerID); found { 85 return cgroupID, nil 86 } 87 88 cgroupID, err := m.getCgroupIDFromSystem(podUID, containerID) 89 if err != nil { 90 return 0, fmt.Errorf("getCgroupIDFromSystem failed, err: %v", err) 91 } 92 93 m.setCgroupID(podUID, containerID, cgroupID) 94 95 return cgroupID, nil 96 } 97 98 // ListCgroupIDsForPod returns the cgroup ids of a given pod. 99 func (m *cgroupIDManagerImpl) ListCgroupIDsForPod(podUID string) ([]uint64, error) { 100 m.RLock() 101 defer m.RUnlock() 102 103 containerCgroupIDMap, ok := m.podCgroupIDCache[podUID] 104 if !ok { 105 return nil, general.ErrNotFound 106 } 107 108 var cgIDList []uint64 109 for _, cgID := range containerCgroupIDMap { 110 cgIDList = append(cgIDList, cgID) 111 } 112 113 return cgIDList, nil 114 } 115 116 func (m *cgroupIDManagerImpl) reconcileCgroupIDMap(ctx context.Context) { 117 podList, err := m.GetPodList(ctx, nil) 118 if err != nil { 119 klog.Errorf("[cgroupIDManagerImpl.reconcileCgroupIDMap] get pod list failed, err: %v", err) 120 return 121 } 122 123 m.clearResidualPodsInCache(podList) 124 m.addAbsentCgroupIDsToCache(m.getAbsentContainers(podList)) 125 } 126 127 // addAbsentCgroupIDsToCache adds absent cgroup ids to cache. 128 func (m *cgroupIDManagerImpl) addAbsentCgroupIDsToCache(absentContainers map[string]sets.String) { 129 klog.V(4).Infof("[cgroupIDManagerImpl] exec addAbsentCgroupIDsToCache") 130 131 for podUID, absentContainerSet := range absentContainers { 132 for { 133 containerID, found := absentContainerSet.PopAny() 134 if !found { 135 break 136 } 137 138 cgID, err := m.getCgroupIDFromSystem(podUID, containerID) 139 if err != nil { 140 klog.Errorf("[cgroupIDManagerImpl.addAbsentCgroupIDsToCache] get cgroup id failed, pod: %s, container: %s, err: %v", 141 podUID, containerID, err) 142 continue 143 } 144 145 klog.Infof("[cgroupIDManagerImpl.addAbsentCgroupIDsToCache] add absent cgroup id to cache, "+ 146 "pod: %s, container: %s, cgroup id: %d", podUID, containerID, cgID) 147 m.setCgroupID(podUID, containerID, cgID) 148 } 149 } 150 } 151 152 func (m *cgroupIDManagerImpl) getAbsentContainers(podList []*v1.Pod) map[string]sets.String { 153 absentContainersMap := make(map[string]sets.String) 154 155 m.RLock() 156 defer m.RUnlock() 157 158 for _, pod := range podList { 159 podUID := string(pod.UID) 160 containerCache, ok := m.podCgroupIDCache[podUID] 161 if !ok { 162 containerCache = make(ContainerCache) 163 } 164 for _, container := range pod.Spec.Containers { 165 containerId, err := m.GetContainerID(podUID, container.Name) 166 if err != nil { 167 klog.Errorf("[cgroupIDManagerImpl.addNewCgroupIDsToCache] get container id failed, pod: %s, container: %s, err: %v", 168 podUID, container.Name, err) 169 continue 170 } 171 if _, ok := containerCache[containerId]; !ok { 172 if _, ok := absentContainersMap[podUID]; !ok { 173 absentContainersMap[podUID] = sets.NewString() 174 } 175 absentContainersMap[podUID].Insert(containerId) 176 } 177 } 178 } 179 180 return absentContainersMap 181 } 182 183 // clearResidualPodsInCache cleans residual pods in podCgroupIDCache. 184 func (m *cgroupIDManagerImpl) clearResidualPodsInCache(podList []*v1.Pod) { 185 klog.V(4).Infof("[cgroupIDManagerImpl] exec clearResidualPodsInCache") 186 residualSet := make(map[string]bool) 187 188 podSet := sets.NewString() 189 for _, pod := range podList { 190 podSet.Insert(fmt.Sprintf("%v", pod.UID)) 191 } 192 193 m.Lock() 194 defer m.Unlock() 195 196 for podUID := range m.podCgroupIDCache { 197 if !podSet.Has(podUID) && !residualSet[podUID] { 198 residualSet[podUID] = true 199 m.residualHitMap[podUID] += 1 200 klog.V(4).Infof("[cgroupIDManagerImpl.clearResidualPodsInCache] found pod: %s with cache but doesn't show up in pod watcher, hit count: %d", podUID, m.residualHitMap[podUID]) 201 } 202 } 203 204 podsToDelete := sets.NewString() 205 for podUID, hitCount := range m.residualHitMap { 206 if !residualSet[podUID] { 207 klog.V(4).Infof("[cgroupIDManagerImpl.clearResidualPodsInCache] already found pod: %s in pod watcher or its cache is cleared, delete it from residualHitMap", podUID) 208 delete(m.residualHitMap, podUID) 209 continue 210 } 211 212 if time.Duration(hitCount)*m.reconcilePeriod >= maxResidualTime { 213 podsToDelete.Insert(podUID) 214 } 215 } 216 217 if podsToDelete.Len() > 0 { 218 for { 219 podUID, found := podsToDelete.PopAny() 220 if !found { 221 break 222 } 223 224 klog.Infof("[cgroupIDManagerImpl.clearResidualPodsInCache] clear residual pod: %s in cache", podUID) 225 delete(m.podCgroupIDCache, podUID) 226 } 227 } 228 } 229 230 func (m *cgroupIDManagerImpl) getCgroupIDFromCache(podUID, containerID string) (uint64, bool) { 231 m.RLock() 232 defer m.RUnlock() 233 234 containerCache, ok := m.podCgroupIDCache[podUID] 235 if !ok { 236 return 0, false 237 } 238 cgroupID, ok := containerCache[containerID] 239 if !ok { 240 return 0, false 241 } 242 243 return cgroupID, true 244 } 245 246 func (m *cgroupIDManagerImpl) getCgroupIDFromSystem(podUID, containerID string) (uint64, error) { 247 containerAbsCGPath, err := common.GetContainerAbsCgroupPath("", podUID, containerID) 248 if err != nil { 249 return 0, fmt.Errorf("GetContainerAbsCgroupPath failed, err: %v", err) 250 } 251 252 cgID, err := cgroupPathToID(containerAbsCGPath) 253 if err != nil { 254 return 0, fmt.Errorf("cgroupPathToID failed, err: %v", err) 255 } 256 257 return cgID, nil 258 } 259 260 func (m *cgroupIDManagerImpl) setCgroupID(podUID, containerID string, cgroupID uint64) { 261 m.Lock() 262 defer m.Unlock() 263 264 _, ok := m.podCgroupIDCache[podUID] 265 if !ok { 266 m.podCgroupIDCache[podUID] = make(ContainerCache) 267 } 268 269 m.podCgroupIDCache[podUID][containerID] = cgroupID 270 } 271 272 func cgroupPathToID(cgPath string) (uint64, error) { 273 var fstat syscall.Statfs_t 274 err := syscall.Statfs(cgPath, &fstat) 275 if err != nil { 276 return 0, fmt.Errorf("get file fstat failed, cgPath: %s, err: %v", cgPath, err) 277 } 278 if fstat.Type != unix.CGROUP2_SUPER_MAGIC && fstat.Type != unix.CGROUP_SUPER_MAGIC { 279 return 0, fmt.Errorf("get file fstat failed, cgPath: %s, invalid file type: %v", cgPath, fstat.Type) 280 } 281 282 handle, _, err := unix.NameToHandleAt(unix.AT_FDCWD, cgPath, 0) 283 if err != nil { 284 return 0, fmt.Errorf("call name_to_handle_at failed, cgPath: %s, err: %v", cgPath, err) 285 } 286 if handle.Size() != 8 { 287 return 0, fmt.Errorf("call name_to_handle_at failed, cgPath: %s, invalid size: %v", cgPath, handle.Size()) 288 } 289 290 return general.NativeEndian.Uint64(handle.Bytes()), nil 291 }