k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/volume/emptydir/empty_dir.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package emptydir 18 19 import ( 20 "fmt" 21 "os" 22 "path/filepath" 23 24 "k8s.io/klog/v2" 25 "k8s.io/mount-utils" 26 utilstrings "k8s.io/utils/strings" 27 28 v1 "k8s.io/api/core/v1" 29 "k8s.io/apimachinery/pkg/api/resource" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/types" 32 utilfeature "k8s.io/apiserver/pkg/util/feature" 33 v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 34 "k8s.io/kubernetes/pkg/features" 35 "k8s.io/kubernetes/pkg/kubelet/cm" 36 "k8s.io/kubernetes/pkg/volume" 37 volumeutil "k8s.io/kubernetes/pkg/volume/util" 38 "k8s.io/kubernetes/pkg/volume/util/fsquota" 39 ) 40 41 // TODO: in the near future, this will be changed to be more restrictive 42 // and the group will be set to allow containers to use emptyDir volumes 43 // from the group attribute. 44 // 45 // https://issue.k8s.io/2630 46 const perm os.FileMode = 0777 47 48 // ProbeVolumePlugins is the primary entrypoint for volume plugins. 49 func ProbeVolumePlugins() []volume.VolumePlugin { 50 return []volume.VolumePlugin{ 51 &emptyDirPlugin{nil}, 52 } 53 } 54 55 type emptyDirPlugin struct { 56 host volume.VolumeHost 57 } 58 59 var _ volume.VolumePlugin = &emptyDirPlugin{} 60 61 const ( 62 emptyDirPluginName = "kubernetes.io/empty-dir" 63 hugePagesPageSizeMountOption = "pagesize" 64 ) 65 66 func getPath(uid types.UID, volName string, host volume.VolumeHost) string { 67 return host.GetPodVolumeDir(uid, utilstrings.EscapeQualifiedName(emptyDirPluginName), volName) 68 } 69 70 func (plugin *emptyDirPlugin) Init(host volume.VolumeHost) error { 71 plugin.host = host 72 73 return nil 74 } 75 76 func (plugin *emptyDirPlugin) GetPluginName() string { 77 return emptyDirPluginName 78 } 79 80 func (plugin *emptyDirPlugin) GetVolumeName(spec *volume.Spec) (string, error) { 81 volumeSource, _ := getVolumeSource(spec) 82 if volumeSource == nil { 83 return "", fmt.Errorf("Spec does not reference an EmptyDir volume type") 84 } 85 86 // Return user defined volume name, since this is an ephemeral volume type 87 return spec.Name(), nil 88 } 89 90 func (plugin *emptyDirPlugin) CanSupport(spec *volume.Spec) bool { 91 return spec.Volume != nil && spec.Volume.EmptyDir != nil 92 } 93 94 func (plugin *emptyDirPlugin) RequiresRemount(spec *volume.Spec) bool { 95 return false 96 } 97 98 func (plugin *emptyDirPlugin) SupportsMountOption() bool { 99 return false 100 } 101 102 func (plugin *emptyDirPlugin) SupportsSELinuxContextMount(spec *volume.Spec) (bool, error) { 103 return false, nil 104 } 105 106 func (plugin *emptyDirPlugin) NewMounter(spec *volume.Spec, pod *v1.Pod, opts volume.VolumeOptions) (volume.Mounter, error) { 107 return plugin.newMounterInternal(spec, pod, plugin.host.GetMounter(plugin.GetPluginName()), &realMountDetector{plugin.host.GetMounter(plugin.GetPluginName())}, opts) 108 } 109 110 func calculateEmptyDirMemorySize(nodeAllocatableMemory *resource.Quantity, spec *volume.Spec, pod *v1.Pod) *resource.Quantity { 111 // if feature is disabled, continue the default behavior of linux host default 112 sizeLimit := &resource.Quantity{} 113 if !utilfeature.DefaultFeatureGate.Enabled(features.SizeMemoryBackedVolumes) { 114 return sizeLimit 115 } 116 117 // size limit defaults to node allocatable (pods can't consume more memory than all pods) 118 sizeLimit = nodeAllocatableMemory 119 zero := resource.MustParse("0") 120 121 // determine pod resource allocation 122 // we use the same function for pod cgroup assignment to maintain consistent behavior 123 // NOTE: this could be nil on systems that do not support pod memory containment (i.e. windows) 124 podResourceConfig := cm.ResourceConfigForPod(pod, false, uint64(100000), false) 125 if podResourceConfig != nil && podResourceConfig.Memory != nil { 126 podMemoryLimit := resource.NewQuantity(*(podResourceConfig.Memory), resource.BinarySI) 127 // ensure 0 < value < size 128 if podMemoryLimit.Cmp(zero) > 0 && podMemoryLimit.Cmp(*sizeLimit) < 1 { 129 sizeLimit = podMemoryLimit 130 } 131 } 132 133 // volume local size is used if and only if less than what pod could consume 134 if spec.Volume.EmptyDir.SizeLimit != nil { 135 volumeSizeLimit := spec.Volume.EmptyDir.SizeLimit 136 // ensure 0 < value < size 137 if volumeSizeLimit.Cmp(zero) > 0 && volumeSizeLimit.Cmp(*sizeLimit) < 1 { 138 sizeLimit = volumeSizeLimit 139 } 140 } 141 return sizeLimit 142 } 143 144 func (plugin *emptyDirPlugin) newMounterInternal(spec *volume.Spec, pod *v1.Pod, mounter mount.Interface, mountDetector mountDetector, opts volume.VolumeOptions) (volume.Mounter, error) { 145 medium := v1.StorageMediumDefault 146 sizeLimit := &resource.Quantity{} 147 if spec.Volume.EmptyDir != nil { // Support a non-specified source as EmptyDir. 148 medium = spec.Volume.EmptyDir.Medium 149 if medium == v1.StorageMediumMemory { 150 nodeAllocatable, err := plugin.host.GetNodeAllocatable() 151 if err != nil { 152 return nil, err 153 } 154 sizeLimit = calculateEmptyDirMemorySize(nodeAllocatable.Memory(), spec, pod) 155 } 156 } 157 return &emptyDir{ 158 pod: pod, 159 volName: spec.Name(), 160 medium: medium, 161 sizeLimit: sizeLimit, 162 mounter: mounter, 163 mountDetector: mountDetector, 164 plugin: plugin, 165 MetricsProvider: volume.NewMetricsDu(getPath(pod.UID, spec.Name(), plugin.host)), 166 }, nil 167 } 168 169 func (plugin *emptyDirPlugin) NewUnmounter(volName string, podUID types.UID) (volume.Unmounter, error) { 170 // Inject real implementations here, test through the internal function. 171 return plugin.newUnmounterInternal(volName, podUID, plugin.host.GetMounter(plugin.GetPluginName()), &realMountDetector{plugin.host.GetMounter(plugin.GetPluginName())}) 172 } 173 174 func (plugin *emptyDirPlugin) newUnmounterInternal(volName string, podUID types.UID, mounter mount.Interface, mountDetector mountDetector) (volume.Unmounter, error) { 175 ed := &emptyDir{ 176 pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{UID: podUID}}, 177 volName: volName, 178 medium: v1.StorageMediumDefault, // might be changed later 179 mounter: mounter, 180 mountDetector: mountDetector, 181 plugin: plugin, 182 MetricsProvider: volume.NewMetricsDu(getPath(podUID, volName, plugin.host)), 183 } 184 return ed, nil 185 } 186 187 func (plugin *emptyDirPlugin) ConstructVolumeSpec(volName, mountPath string) (volume.ReconstructedVolume, error) { 188 emptyDirVolume := &v1.Volume{ 189 Name: volName, 190 VolumeSource: v1.VolumeSource{ 191 EmptyDir: &v1.EmptyDirVolumeSource{}, 192 }, 193 } 194 return volume.ReconstructedVolume{ 195 Spec: volume.NewSpecFromVolume(emptyDirVolume), 196 }, nil 197 } 198 199 // mountDetector abstracts how to find what kind of mount a path is backed by. 200 type mountDetector interface { 201 // GetMountMedium determines what type of medium a given path is backed 202 // by and whether that path is a mount point. For example, if this 203 // returns (v1.StorageMediumMemory, false, nil), the caller knows that the path is 204 // on a memory FS (tmpfs on Linux) but is not the root mountpoint of 205 // that tmpfs. 206 GetMountMedium(path string, requestedMedium v1.StorageMedium) (v1.StorageMedium, bool, *resource.Quantity, error) 207 } 208 209 // EmptyDir volumes are temporary directories exposed to the pod. 210 // These do not persist beyond the lifetime of a pod. 211 type emptyDir struct { 212 pod *v1.Pod 213 volName string 214 sizeLimit *resource.Quantity 215 medium v1.StorageMedium 216 mounter mount.Interface 217 mountDetector mountDetector 218 plugin *emptyDirPlugin 219 volume.MetricsProvider 220 } 221 222 func (ed *emptyDir) GetAttributes() volume.Attributes { 223 return volume.Attributes{ 224 ReadOnly: false, 225 Managed: true, 226 SELinuxRelabel: true, 227 } 228 } 229 230 // SetUp creates new directory. 231 func (ed *emptyDir) SetUp(mounterArgs volume.MounterArgs) error { 232 return ed.SetUpAt(ed.GetPath(), mounterArgs) 233 } 234 235 // SetUpAt creates new directory. 236 func (ed *emptyDir) SetUpAt(dir string, mounterArgs volume.MounterArgs) error { 237 notMnt, err := ed.mounter.IsLikelyNotMountPoint(dir) 238 // Getting an os.IsNotExist err from is a contingency; the directory 239 // may not exist yet, in which case, setup should run. 240 if err != nil && !os.IsNotExist(err) { 241 return err 242 } 243 244 // If the plugin readiness file is present for this volume, and the 245 // storage medium is the default, then the volume is ready. If the 246 // medium is memory, and a mountpoint is present, then the volume is 247 // ready. 248 readyDir := ed.getMetaDir() 249 if volumeutil.IsReady(readyDir) { 250 if ed.medium == v1.StorageMediumMemory && !notMnt { 251 return nil 252 } else if ed.medium == v1.StorageMediumDefault { 253 // Further check dir exists 254 if _, err := os.Stat(dir); err == nil { 255 klog.V(6).InfoS("Dir exists, so check and assign quota if the underlying medium supports quotas", "dir", dir) 256 err = ed.assignQuota(dir, mounterArgs.DesiredSize) 257 return err 258 } 259 // This situation should not happen unless user manually delete volume dir. 260 // In this case, delete ready file and print a warning for it. 261 klog.Warningf("volume ready file dir %s exist, but volume dir %s does not. Remove ready dir", readyDir, dir) 262 if err := os.RemoveAll(readyDir); err != nil && !os.IsNotExist(err) { 263 klog.Warningf("failed to remove ready dir [%s]: %v", readyDir, err) 264 } 265 } 266 } 267 268 switch { 269 case ed.medium == v1.StorageMediumDefault: 270 err = ed.setupDir(dir) 271 case ed.medium == v1.StorageMediumMemory: 272 err = ed.setupTmpfs(dir) 273 case v1helper.IsHugePageMedium(ed.medium): 274 err = ed.setupHugepages(dir) 275 default: 276 err = fmt.Errorf("unknown storage medium %q", ed.medium) 277 } 278 279 volume.SetVolumeOwnership(ed, dir, mounterArgs.FsGroup, nil /*fsGroupChangePolicy*/, volumeutil.FSGroupCompleteHook(ed.plugin, nil)) 280 281 // If setting up the quota fails, just log a message but don't actually error out. 282 // We'll use the old du mechanism in this case, at least until we support 283 // enforcement. 284 if err == nil { 285 volumeutil.SetReady(ed.getMetaDir()) 286 err = ed.assignQuota(dir, mounterArgs.DesiredSize) 287 } 288 return err 289 } 290 291 // assignQuota checks if the underlying medium supports quotas and if so, sets 292 func (ed *emptyDir) assignQuota(dir string, mounterSize *resource.Quantity) error { 293 if mounterSize != nil { 294 // Deliberately shadow the outer use of err as noted 295 // above. 296 hasQuotas, err := fsquota.SupportsQuotas(ed.mounter, dir) 297 if err != nil { 298 klog.V(3).Infof("Unable to check for quota support on %s: %s", dir, err.Error()) 299 } else if hasQuotas { 300 klog.V(4).Infof("emptydir trying to assign quota %v on %s", mounterSize, dir) 301 if err := fsquota.AssignQuota(ed.mounter, dir, ed.pod.UID, mounterSize); err != nil { 302 klog.V(3).Infof("Set quota on %s failed %s", dir, err.Error()) 303 return err 304 } 305 return nil 306 } 307 } 308 return nil 309 } 310 311 // setupTmpfs creates a tmpfs mount at the specified directory. 312 func (ed *emptyDir) setupTmpfs(dir string) error { 313 if ed.mounter == nil { 314 return fmt.Errorf("memory storage requested, but mounter is nil") 315 } 316 if err := ed.setupDir(dir); err != nil { 317 return err 318 } 319 // Make SetUp idempotent. 320 medium, isMnt, _, err := ed.mountDetector.GetMountMedium(dir, ed.medium) 321 if err != nil { 322 return err 323 } 324 // If the directory is a mountpoint with medium memory, there is no 325 // work to do since we are already in the desired state. 326 if isMnt && medium == v1.StorageMediumMemory { 327 return nil 328 } 329 330 var options []string 331 // Linux system default is 50% of capacity. 332 if ed.sizeLimit != nil && ed.sizeLimit.Value() > 0 { 333 options = []string{fmt.Sprintf("size=%d", ed.sizeLimit.Value())} 334 } 335 336 klog.V(3).Infof("pod %v: mounting tmpfs for volume %v", ed.pod.UID, ed.volName) 337 return ed.mounter.MountSensitiveWithoutSystemd("tmpfs", dir, "tmpfs", options, nil) 338 } 339 340 // setupHugepages creates a hugepage mount at the specified directory. 341 func (ed *emptyDir) setupHugepages(dir string) error { 342 if ed.mounter == nil { 343 return fmt.Errorf("memory storage requested, but mounter is nil") 344 } 345 if err := ed.setupDir(dir); err != nil { 346 return err 347 } 348 // Make SetUp idempotent. 349 medium, isMnt, mountPageSize, err := ed.mountDetector.GetMountMedium(dir, ed.medium) 350 klog.V(3).Infof("pod %v: setupHugepages: medium: %s, isMnt: %v, dir: %s, err: %v", ed.pod.UID, medium, isMnt, dir, err) 351 if err != nil { 352 return err 353 } 354 // If the directory is a mountpoint with medium hugepages of the same page size, 355 // there is no work to do since we are already in the desired state. 356 if isMnt && v1helper.IsHugePageMedium(medium) { 357 // Medium is: Hugepages 358 if ed.medium == v1.StorageMediumHugePages { 359 return nil 360 } 361 if mountPageSize == nil { 362 return fmt.Errorf("pod %v: mounted dir %s pagesize is not determined", ed.pod.UID, dir) 363 } 364 // Medium is: Hugepages-<size> 365 // Mounted page size and medium size must be equal 366 mediumSize, err := v1helper.HugePageSizeFromMedium(ed.medium) 367 if err != nil { 368 return err 369 } 370 if mountPageSize == nil || mediumSize.Cmp(*mountPageSize) != 0 { 371 return fmt.Errorf("pod %v: mounted dir %s pagesize '%s' and requested medium size '%s' differ", ed.pod.UID, dir, mountPageSize.String(), mediumSize.String()) 372 } 373 return nil 374 } 375 376 pageSizeMountOption, err := getPageSizeMountOption(ed.medium, ed.pod) 377 if err != nil { 378 return err 379 } 380 381 klog.V(3).Infof("pod %v: mounting hugepages for volume %v", ed.pod.UID, ed.volName) 382 return ed.mounter.MountSensitiveWithoutSystemd("nodev", dir, "hugetlbfs", []string{pageSizeMountOption}, nil) 383 } 384 385 // getPageSizeMountOption retrieves pageSize mount option from Pod's resources 386 // and medium and validates pageSize options in all containers of given Pod. 387 func getPageSizeMountOption(medium v1.StorageMedium, pod *v1.Pod) (string, error) { 388 pageSizeFound := false 389 pageSize := resource.Quantity{} 390 391 var mediumPageSize resource.Quantity 392 if medium != v1.StorageMediumHugePages { 393 // medium is: Hugepages-<size> 394 var err error 395 mediumPageSize, err = v1helper.HugePageSizeFromMedium(medium) 396 if err != nil { 397 return "", err 398 } 399 } 400 401 // In some rare cases init containers can also consume Huge pages 402 for _, container := range append(pod.Spec.Containers, pod.Spec.InitContainers...) { 403 // We can take request because limit and requests must match. 404 for requestName := range container.Resources.Requests { 405 if !v1helper.IsHugePageResourceName(requestName) { 406 continue 407 } 408 currentPageSize, err := v1helper.HugePageSizeFromResourceName(requestName) 409 if err != nil { 410 return "", err 411 } 412 if medium == v1.StorageMediumHugePages { // medium is: Hugepages, size is not specified 413 // PageSize for all volumes in a POD must be equal if medium is "Hugepages" 414 if pageSizeFound && pageSize.Cmp(currentPageSize) != 0 { 415 return "", fmt.Errorf("medium: %s can't be used if container requests multiple huge page sizes", medium) 416 } 417 418 pageSizeFound = true 419 pageSize = currentPageSize 420 } else { // medium is: Hugepages-<size> 421 if currentPageSize.Cmp(mediumPageSize) == 0 { 422 pageSizeFound = true 423 pageSize = currentPageSize 424 } 425 } 426 } 427 } 428 429 if !pageSizeFound { 430 return "", fmt.Errorf("medium %s: hugePages storage requested, but there is no resource request for huge pages", medium) 431 } 432 433 return fmt.Sprintf("%s=%s", hugePagesPageSizeMountOption, pageSize.String()), nil 434 435 } 436 437 // setupDir creates the directory with the default permissions specified by the perm constant. 438 func (ed *emptyDir) setupDir(dir string) error { 439 // Create the directory if it doesn't already exist. 440 if err := os.MkdirAll(dir, perm); err != nil { 441 return err 442 } 443 444 // stat the directory to read permission bits 445 fileinfo, err := os.Lstat(dir) 446 if err != nil { 447 return err 448 } 449 450 if fileinfo.Mode().Perm() != perm.Perm() { 451 // If the permissions on the created directory are wrong, the 452 // kubelet is probably running with a umask set. In order to 453 // avoid clearing the umask for the entire process or locking 454 // the thread, clearing the umask, creating the dir, restoring 455 // the umask, and unlocking the thread, we do a chmod to set 456 // the specific bits we need. 457 err := os.Chmod(dir, perm) 458 if err != nil { 459 return err 460 } 461 462 fileinfo, err = os.Lstat(dir) 463 if err != nil { 464 return err 465 } 466 467 if fileinfo.Mode().Perm() != perm.Perm() { 468 klog.Errorf("Expected directory %q permissions to be: %s; got: %s", dir, perm.Perm(), fileinfo.Mode().Perm()) 469 } 470 } 471 472 return nil 473 } 474 475 func (ed *emptyDir) GetPath() string { 476 return getPath(ed.pod.UID, ed.volName, ed.plugin.host) 477 } 478 479 // TearDown simply discards everything in the directory. 480 func (ed *emptyDir) TearDown() error { 481 return ed.TearDownAt(ed.GetPath()) 482 } 483 484 // TearDownAt simply discards everything in the directory. 485 func (ed *emptyDir) TearDownAt(dir string) error { 486 // First remove ready dir which created in SetUp func 487 readyDir := ed.getMetaDir() 488 if removeErr := os.RemoveAll(readyDir); removeErr != nil && !os.IsNotExist(removeErr) { 489 return fmt.Errorf("failed to remove ready dir [%s]: %v", readyDir, removeErr) 490 } 491 492 if pathExists, pathErr := mount.PathExists(dir); pathErr != nil { 493 return fmt.Errorf("error checking if path exists: %w", pathErr) 494 } else if !pathExists { 495 klog.Warningf("Warning: Unmount skipped because path does not exist: %v", dir) 496 return nil 497 } 498 499 // Figure out the medium. 500 medium, isMnt, _, err := ed.mountDetector.GetMountMedium(dir, ed.medium) 501 if err != nil { 502 return err 503 } 504 if isMnt { 505 if medium == v1.StorageMediumMemory { 506 ed.medium = v1.StorageMediumMemory 507 return ed.teardownTmpfsOrHugetlbfs(dir) 508 } else if medium == v1.StorageMediumHugePages { 509 ed.medium = v1.StorageMediumHugePages 510 return ed.teardownTmpfsOrHugetlbfs(dir) 511 } 512 } 513 // assume StorageMediumDefault 514 return ed.teardownDefault(dir) 515 } 516 517 func (ed *emptyDir) teardownDefault(dir string) error { 518 if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolationFSQuotaMonitoring) { 519 // Remove any quota 520 err := fsquota.ClearQuota(ed.mounter, dir) 521 if err != nil { 522 klog.Warningf("Warning: Failed to clear quota on %s: %v", dir, err) 523 } 524 } 525 // Renaming the directory is not required anymore because the operation executor 526 // now handles duplicate operations on the same volume 527 return os.RemoveAll(dir) 528 } 529 530 func (ed *emptyDir) teardownTmpfsOrHugetlbfs(dir string) error { 531 if ed.mounter == nil { 532 return fmt.Errorf("memory storage requested, but mounter is nil") 533 } 534 if err := ed.mounter.Unmount(dir); err != nil { 535 return err 536 } 537 if err := os.RemoveAll(dir); err != nil { 538 return err 539 } 540 return nil 541 } 542 543 func (ed *emptyDir) getMetaDir() string { 544 return filepath.Join(ed.plugin.host.GetPodPluginDir(ed.pod.UID, utilstrings.EscapeQualifiedName(emptyDirPluginName)), ed.volName) 545 } 546 547 func getVolumeSource(spec *volume.Spec) (*v1.EmptyDirVolumeSource, bool) { 548 var readOnly bool 549 var volumeSource *v1.EmptyDirVolumeSource 550 551 if spec.Volume != nil && spec.Volume.EmptyDir != nil { 552 volumeSource = spec.Volume.EmptyDir 553 readOnly = spec.ReadOnly 554 } 555 556 return volumeSource, readOnly 557 }