github.com/google/cadvisor@v0.49.1/container/crio/handler.go (about) 1 // Copyright 2017 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Handler for CRI-O containers. 16 package crio 17 18 import ( 19 "fmt" 20 "path/filepath" 21 "strconv" 22 "strings" 23 24 "github.com/opencontainers/runc/libcontainer/cgroups" 25 26 "github.com/google/cadvisor/container" 27 "github.com/google/cadvisor/container/common" 28 containerlibcontainer "github.com/google/cadvisor/container/libcontainer" 29 "github.com/google/cadvisor/fs" 30 info "github.com/google/cadvisor/info/v1" 31 ) 32 33 type crioContainerHandler struct { 34 client CrioClient 35 name string 36 37 machineInfoFactory info.MachineInfoFactory 38 39 // Absolute path to the cgroup hierarchies of this container. 40 // (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test") 41 cgroupPaths map[string]string 42 43 // the CRI-O storage driver 44 storageDriver storageDriver 45 fsInfo fs.FsInfo 46 rootfsStorageDir string 47 48 // Metadata associated with the container. 49 envs map[string]string 50 labels map[string]string 51 52 // TODO 53 // crio version handling... 54 55 // Image name used for this container. 56 image string 57 58 // The network mode of the container 59 // TODO 60 61 // Filesystem handler. 62 fsHandler common.FsHandler 63 64 // The IP address of the container 65 ipAddress string 66 67 includedMetrics container.MetricSet 68 69 reference info.ContainerReference 70 71 libcontainerHandler *containerlibcontainer.Handler 72 cgroupManager cgroups.Manager 73 rootFs string 74 pidKnown bool 75 } 76 77 var _ container.ContainerHandler = &crioContainerHandler{} 78 79 // newCrioContainerHandler returns a new container.ContainerHandler 80 func newCrioContainerHandler( 81 client CrioClient, 82 name string, 83 machineInfoFactory info.MachineInfoFactory, 84 fsInfo fs.FsInfo, 85 storageDriver storageDriver, 86 storageDir string, 87 cgroupSubsystems map[string]string, 88 inHostNamespace bool, 89 metadataEnvAllowList []string, 90 includedMetrics container.MetricSet, 91 ) (container.ContainerHandler, error) { 92 // Create the cgroup paths. 93 cgroupPaths := common.MakeCgroupPaths(cgroupSubsystems, name) 94 95 // Generate the equivalent cgroup manager for this container. 96 cgroupManager, err := containerlibcontainer.NewCgroupManager(name, cgroupPaths) 97 if err != nil { 98 return nil, err 99 } 100 101 rootFs := "/" 102 if !inHostNamespace { 103 rootFs = "/rootfs" 104 } 105 106 id := ContainerNameToCrioId(name) 107 pidKnown := true 108 109 cInfo, err := client.ContainerInfo(id) 110 if err != nil { 111 return nil, err 112 } 113 if cInfo.Pid == 0 { 114 // If pid is not known yet, network related stats can not be retrieved by the 115 // libcontainer handler GetStats(). In this case, the crio handler GetStats() 116 // will reattempt to get the pid and, if now known, will construct the libcontainer 117 // handler. This libcontainer handler is then cached and reused without additional 118 // calls to crio. 119 pidKnown = false 120 } 121 122 // passed to fs handler below ... 123 // XXX: this is using the full container logpath, as constructed by the CRI 124 // /var/log/pods/<pod_uuid>/container_instance.log 125 // It's not actually a log dir, as the CRI doesn't have per-container dirs 126 // under /var/log/pods/<pod_uuid>/ 127 // We can't use /var/log/pods/<pod_uuid>/ to count per-container log usage. 128 // We use the container log file directly. 129 storageLogDir := cInfo.LogPath 130 131 // Determine the rootfs storage dir 132 rootfsStorageDir := cInfo.Root 133 // TODO(runcom): CRI-O doesn't strip /merged but we need to in order to 134 // get device ID from root, otherwise, it's going to error out as overlay 135 // mounts doesn't have fixed dev ids. 136 rootfsStorageDir = strings.TrimSuffix(rootfsStorageDir, "/merged") 137 switch storageDriver { 138 case overlayStorageDriver, overlay2StorageDriver: 139 // overlay and overlay2 driver are the same "overlay2" driver so treat 140 // them the same. 141 rootfsStorageDir = filepath.Join(rootfsStorageDir, "diff") 142 } 143 144 containerReference := info.ContainerReference{ 145 Id: id, 146 Name: name, 147 Aliases: []string{cInfo.Name, id}, 148 Namespace: CrioNamespace, 149 } 150 151 // Find out if we need network metrics reported for this container. 152 // Containers that don't have their own network -- this includes 153 // containers running in Kubernetes pods that use the network of the 154 // infrastructure container -- does not need their stats to be 155 // reported. This stops metrics being reported multiple times for each 156 // container in a pod. 157 metrics := common.RemoveNetMetrics(includedMetrics, cInfo.Labels["io.kubernetes.container.name"] != "POD") 158 159 libcontainerHandler := containerlibcontainer.NewHandler(cgroupManager, rootFs, cInfo.Pid, metrics) 160 161 // TODO: extract object mother method 162 handler := &crioContainerHandler{ 163 client: client, 164 name: name, 165 machineInfoFactory: machineInfoFactory, 166 cgroupPaths: cgroupPaths, 167 storageDriver: storageDriver, 168 fsInfo: fsInfo, 169 rootfsStorageDir: rootfsStorageDir, 170 envs: make(map[string]string), 171 labels: cInfo.Labels, 172 includedMetrics: metrics, 173 reference: containerReference, 174 libcontainerHandler: libcontainerHandler, 175 cgroupManager: cgroupManager, 176 rootFs: rootFs, 177 pidKnown: pidKnown, 178 } 179 180 handler.image = cInfo.Image 181 // TODO: we wantd to know graph driver DeviceId (dont think this is needed now) 182 183 // ignore err and get zero as default, this happens with sandboxes, not sure why... 184 // kube isn't sending restart count in labels for sandboxes. 185 restartCount, _ := strconv.Atoi(cInfo.Annotations["io.kubernetes.container.restartCount"]) 186 // Only adds restartcount label if it's greater than 0 187 if restartCount > 0 { 188 handler.labels["restartcount"] = strconv.Itoa(restartCount) 189 } 190 191 handler.ipAddress = cInfo.IP 192 193 // we optionally collect disk usage metrics 194 if includedMetrics.Has(container.DiskUsageMetrics) { 195 handler.fsHandler = common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, storageLogDir, fsInfo) 196 } 197 // TODO for env vars we wanted to show from container.Config.Env from whitelist 198 //for _, exposedEnv := range metadataEnvAllowList { 199 //klog.V(4).Infof("TODO env whitelist: %v", exposedEnv) 200 //} 201 202 return handler, nil 203 } 204 205 func (h *crioContainerHandler) Start() { 206 if h.fsHandler != nil { 207 h.fsHandler.Start() 208 } 209 } 210 211 func (h *crioContainerHandler) Cleanup() { 212 if h.fsHandler != nil { 213 h.fsHandler.Stop() 214 } 215 } 216 217 func (h *crioContainerHandler) ContainerReference() (info.ContainerReference, error) { 218 return h.reference, nil 219 } 220 221 func (h *crioContainerHandler) GetSpec() (info.ContainerSpec, error) { 222 hasFilesystem := h.includedMetrics.Has(container.DiskUsageMetrics) 223 hasNet := h.includedMetrics.Has(container.NetworkUsageMetrics) 224 spec, err := common.GetSpec(h.cgroupPaths, h.machineInfoFactory, hasNet, hasFilesystem) 225 226 spec.Labels = h.labels 227 spec.Envs = h.envs 228 spec.Image = h.image 229 230 return spec, err 231 } 232 233 func (h *crioContainerHandler) getFsStats(stats *info.ContainerStats) error { 234 mi, err := h.machineInfoFactory.GetMachineInfo() 235 if err != nil { 236 return err 237 } 238 239 if h.includedMetrics.Has(container.DiskIOMetrics) { 240 common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo) 241 } 242 243 if !h.includedMetrics.Has(container.DiskUsageMetrics) { 244 return nil 245 } 246 var device string 247 switch h.storageDriver { 248 case overlay2StorageDriver, overlayStorageDriver: 249 deviceInfo, err := h.fsInfo.GetDirFsDevice(h.rootfsStorageDir) 250 if err != nil { 251 return fmt.Errorf("unable to determine device info for dir: %v: %v", h.rootfsStorageDir, err) 252 } 253 device = deviceInfo.Device 254 default: 255 return nil 256 } 257 258 var ( 259 limit uint64 260 fsType string 261 ) 262 263 // crio does not impose any filesystem limits for containers. So use capacity as limit. 264 for _, fs := range mi.Filesystems { 265 if fs.Device == device { 266 limit = fs.Capacity 267 fsType = fs.Type 268 break 269 } 270 } 271 272 if fsType == "" { 273 return fmt.Errorf("unable to determine fs type for device: %v", device) 274 } 275 fsStat := info.FsStats{Device: device, Type: fsType, Limit: limit} 276 usage := h.fsHandler.Usage() 277 fsStat.BaseUsage = usage.BaseUsageBytes 278 fsStat.Usage = usage.TotalUsageBytes 279 fsStat.Inodes = usage.InodeUsage 280 281 stats.Filesystem = append(stats.Filesystem, fsStat) 282 283 return nil 284 } 285 286 func (h *crioContainerHandler) getLibcontainerHandler() *containerlibcontainer.Handler { 287 if h.pidKnown { 288 return h.libcontainerHandler 289 } 290 291 id := ContainerNameToCrioId(h.name) 292 293 cInfo, err := h.client.ContainerInfo(id) 294 if err != nil || cInfo.Pid == 0 { 295 return h.libcontainerHandler 296 } 297 298 h.pidKnown = true 299 h.libcontainerHandler = containerlibcontainer.NewHandler(h.cgroupManager, h.rootFs, cInfo.Pid, h.includedMetrics) 300 301 return h.libcontainerHandler 302 } 303 304 func (h *crioContainerHandler) GetStats() (*info.ContainerStats, error) { 305 libcontainerHandler := h.getLibcontainerHandler() 306 stats, err := libcontainerHandler.GetStats() 307 if err != nil { 308 return stats, err 309 } 310 311 if h.includedMetrics.Has(container.NetworkUsageMetrics) && len(stats.Network.Interfaces) == 0 { 312 // No network related information indicates that the pid of the 313 // container is not longer valid and we need to ask crio to 314 // provide the pid of another container from that pod 315 h.pidKnown = false 316 return stats, nil 317 } 318 // Get filesystem stats. 319 err = h.getFsStats(stats) 320 if err != nil { 321 return stats, err 322 } 323 324 return stats, nil 325 } 326 327 func (h *crioContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) { 328 // No-op for Docker driver. 329 return []info.ContainerReference{}, nil 330 } 331 332 func (h *crioContainerHandler) GetCgroupPath(resource string) (string, error) { 333 var res string 334 if !cgroups.IsCgroup2UnifiedMode() { 335 res = resource 336 } 337 path, ok := h.cgroupPaths[res] 338 if !ok { 339 return "", fmt.Errorf("could not find path for resource %q for container %q", resource, h.reference.Name) 340 } 341 return path, nil 342 } 343 344 func (h *crioContainerHandler) GetContainerLabels() map[string]string { 345 return h.labels 346 } 347 348 func (h *crioContainerHandler) GetContainerIPAddress() string { 349 return h.ipAddress 350 } 351 352 func (h *crioContainerHandler) ListProcesses(listType container.ListType) ([]int, error) { 353 return h.libcontainerHandler.GetProcesses() 354 } 355 356 func (h *crioContainerHandler) Exists() bool { 357 return common.CgroupExists(h.cgroupPaths) 358 } 359 360 func (h *crioContainerHandler) Type() container.ContainerType { 361 return container.ContainerTypeCrio 362 }