k8s.io/kubernetes@v1.29.3/pkg/volume/emptydir/empty_dir.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package emptydir 18 19 import ( 20 "fmt" 21 "os" 22 "path/filepath" 23 24 "k8s.io/klog/v2" 25 "k8s.io/mount-utils" 26 utilstrings "k8s.io/utils/strings" 27 28 v1 "k8s.io/api/core/v1" 29 "k8s.io/apimachinery/pkg/api/resource" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/types" 32 utilfeature "k8s.io/apiserver/pkg/util/feature" 33 v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" 34 "k8s.io/kubernetes/pkg/features" 35 "k8s.io/kubernetes/pkg/kubelet/cm" 36 "k8s.io/kubernetes/pkg/volume" 37 volumeutil "k8s.io/kubernetes/pkg/volume/util" 38 "k8s.io/kubernetes/pkg/volume/util/fsquota" 39 ) 40 41 // TODO: in the near future, this will be changed to be more restrictive 42 // and the group will be set to allow containers to use emptyDir volumes 43 // from the group attribute. 44 // 45 // https://issue.k8s.io/2630 46 const perm os.FileMode = 0777 47 48 // ProbeVolumePlugins is the primary entrypoint for volume plugins. 49 func ProbeVolumePlugins() []volume.VolumePlugin { 50 return []volume.VolumePlugin{ 51 &emptyDirPlugin{nil}, 52 } 53 } 54 55 type emptyDirPlugin struct { 56 host volume.VolumeHost 57 } 58 59 var _ volume.VolumePlugin = &emptyDirPlugin{} 60 61 const ( 62 emptyDirPluginName = "kubernetes.io/empty-dir" 63 hugePagesPageSizeMountOption = "pagesize" 64 ) 65 66 func getPath(uid types.UID, volName string, host volume.VolumeHost) string { 67 return host.GetPodVolumeDir(uid, utilstrings.EscapeQualifiedName(emptyDirPluginName), volName) 68 } 69 70 func (plugin *emptyDirPlugin) Init(host volume.VolumeHost) error { 71 plugin.host = host 72 73 return nil 74 } 75 76 func (plugin *emptyDirPlugin) GetPluginName() string { 77 return emptyDirPluginName 78 } 79 80 func (plugin *emptyDirPlugin) GetVolumeName(spec *volume.Spec) (string, error) { 81 volumeSource, _ := getVolumeSource(spec) 82 if volumeSource == nil { 83 return "", fmt.Errorf("Spec does not reference an EmptyDir volume type") 84 } 85 86 // Return user defined volume name, since this is an ephemeral volume type 87 return spec.Name(), nil 88 } 89 90 func (plugin *emptyDirPlugin) CanSupport(spec *volume.Spec) bool { 91 return spec.Volume != nil && spec.Volume.EmptyDir != nil 92 } 93 94 func (plugin *emptyDirPlugin) RequiresRemount(spec *volume.Spec) bool { 95 return false 96 } 97 98 func (plugin *emptyDirPlugin) SupportsMountOption() bool { 99 return false 100 } 101 102 func (plugin *emptyDirPlugin) SupportsBulkVolumeVerification() bool { 103 return false 104 } 105 106 func (plugin *emptyDirPlugin) SupportsSELinuxContextMount(spec *volume.Spec) (bool, error) { 107 return false, nil 108 } 109 110 func (plugin *emptyDirPlugin) NewMounter(spec *volume.Spec, pod *v1.Pod, opts volume.VolumeOptions) (volume.Mounter, error) { 111 return plugin.newMounterInternal(spec, pod, plugin.host.GetMounter(plugin.GetPluginName()), &realMountDetector{plugin.host.GetMounter(plugin.GetPluginName())}, opts) 112 } 113 114 func calculateEmptyDirMemorySize(nodeAllocatableMemory *resource.Quantity, spec *volume.Spec, pod *v1.Pod) *resource.Quantity { 115 // if feature is disabled, continue the default behavior of linux host default 116 sizeLimit := &resource.Quantity{} 117 if !utilfeature.DefaultFeatureGate.Enabled(features.SizeMemoryBackedVolumes) { 118 return sizeLimit 119 } 120 121 // size limit defaults to node allocatable (pods can't consume more memory than all pods) 122 sizeLimit = nodeAllocatableMemory 123 zero := resource.MustParse("0") 124 125 // determine pod resource allocation 126 // we use the same function for pod cgroup assignment to maintain consistent behavior 127 // NOTE: this could be nil on systems that do not support pod memory containment (i.e. windows) 128 podResourceConfig := cm.ResourceConfigForPod(pod, false, uint64(100000), false) 129 if podResourceConfig != nil && podResourceConfig.Memory != nil { 130 podMemoryLimit := resource.NewQuantity(*(podResourceConfig.Memory), resource.BinarySI) 131 // ensure 0 < value < size 132 if podMemoryLimit.Cmp(zero) > 0 && podMemoryLimit.Cmp(*sizeLimit) < 1 { 133 sizeLimit = podMemoryLimit 134 } 135 } 136 137 // volume local size is used if and only if less than what pod could consume 138 if spec.Volume.EmptyDir.SizeLimit != nil { 139 volumeSizeLimit := spec.Volume.EmptyDir.SizeLimit 140 // ensure 0 < value < size 141 if volumeSizeLimit.Cmp(zero) > 0 && volumeSizeLimit.Cmp(*sizeLimit) < 1 { 142 sizeLimit = volumeSizeLimit 143 } 144 } 145 return sizeLimit 146 } 147 148 func (plugin *emptyDirPlugin) newMounterInternal(spec *volume.Spec, pod *v1.Pod, mounter mount.Interface, mountDetector mountDetector, opts volume.VolumeOptions) (volume.Mounter, error) { 149 medium := v1.StorageMediumDefault 150 sizeLimit := &resource.Quantity{} 151 if spec.Volume.EmptyDir != nil { // Support a non-specified source as EmptyDir. 152 medium = spec.Volume.EmptyDir.Medium 153 if medium == v1.StorageMediumMemory { 154 nodeAllocatable, err := plugin.host.GetNodeAllocatable() 155 if err != nil { 156 return nil, err 157 } 158 sizeLimit = calculateEmptyDirMemorySize(nodeAllocatable.Memory(), spec, pod) 159 } 160 } 161 return &emptyDir{ 162 pod: pod, 163 volName: spec.Name(), 164 medium: medium, 165 sizeLimit: sizeLimit, 166 mounter: mounter, 167 mountDetector: mountDetector, 168 plugin: plugin, 169 MetricsProvider: volume.NewMetricsDu(getPath(pod.UID, spec.Name(), plugin.host)), 170 }, nil 171 } 172 173 func (plugin *emptyDirPlugin) NewUnmounter(volName string, podUID types.UID) (volume.Unmounter, error) { 174 // Inject real implementations here, test through the internal function. 175 return plugin.newUnmounterInternal(volName, podUID, plugin.host.GetMounter(plugin.GetPluginName()), &realMountDetector{plugin.host.GetMounter(plugin.GetPluginName())}) 176 } 177 178 func (plugin *emptyDirPlugin) newUnmounterInternal(volName string, podUID types.UID, mounter mount.Interface, mountDetector mountDetector) (volume.Unmounter, error) { 179 ed := &emptyDir{ 180 pod: &v1.Pod{ObjectMeta: metav1.ObjectMeta{UID: podUID}}, 181 volName: volName, 182 medium: v1.StorageMediumDefault, // might be changed later 183 mounter: mounter, 184 mountDetector: mountDetector, 185 plugin: plugin, 186 MetricsProvider: volume.NewMetricsDu(getPath(podUID, volName, plugin.host)), 187 } 188 return ed, nil 189 } 190 191 func (plugin *emptyDirPlugin) ConstructVolumeSpec(volName, mountPath string) (volume.ReconstructedVolume, error) { 192 emptyDirVolume := &v1.Volume{ 193 Name: volName, 194 VolumeSource: v1.VolumeSource{ 195 EmptyDir: &v1.EmptyDirVolumeSource{}, 196 }, 197 } 198 return volume.ReconstructedVolume{ 199 Spec: volume.NewSpecFromVolume(emptyDirVolume), 200 }, nil 201 } 202 203 // mountDetector abstracts how to find what kind of mount a path is backed by. 204 type mountDetector interface { 205 // GetMountMedium determines what type of medium a given path is backed 206 // by and whether that path is a mount point. For example, if this 207 // returns (v1.StorageMediumMemory, false, nil), the caller knows that the path is 208 // on a memory FS (tmpfs on Linux) but is not the root mountpoint of 209 // that tmpfs. 210 GetMountMedium(path string, requestedMedium v1.StorageMedium) (v1.StorageMedium, bool, *resource.Quantity, error) 211 } 212 213 // EmptyDir volumes are temporary directories exposed to the pod. 214 // These do not persist beyond the lifetime of a pod. 215 type emptyDir struct { 216 pod *v1.Pod 217 volName string 218 sizeLimit *resource.Quantity 219 medium v1.StorageMedium 220 mounter mount.Interface 221 mountDetector mountDetector 222 plugin *emptyDirPlugin 223 volume.MetricsProvider 224 } 225 226 func (ed *emptyDir) GetAttributes() volume.Attributes { 227 return volume.Attributes{ 228 ReadOnly: false, 229 Managed: true, 230 SELinuxRelabel: true, 231 } 232 } 233 234 // SetUp creates new directory. 235 func (ed *emptyDir) SetUp(mounterArgs volume.MounterArgs) error { 236 return ed.SetUpAt(ed.GetPath(), mounterArgs) 237 } 238 239 // SetUpAt creates new directory. 240 func (ed *emptyDir) SetUpAt(dir string, mounterArgs volume.MounterArgs) error { 241 notMnt, err := ed.mounter.IsLikelyNotMountPoint(dir) 242 // Getting an os.IsNotExist err from is a contingency; the directory 243 // may not exist yet, in which case, setup should run. 244 if err != nil && !os.IsNotExist(err) { 245 return err 246 } 247 248 // If the plugin readiness file is present for this volume, and the 249 // storage medium is the default, then the volume is ready. If the 250 // medium is memory, and a mountpoint is present, then the volume is 251 // ready. 252 readyDir := ed.getMetaDir() 253 if volumeutil.IsReady(readyDir) { 254 if ed.medium == v1.StorageMediumMemory && !notMnt { 255 return nil 256 } else if ed.medium == v1.StorageMediumDefault { 257 // Further check dir exists 258 if _, err := os.Stat(dir); err == nil { 259 klog.V(6).InfoS("Dir exists, so check and assign quota if the underlying medium supports quotas", "dir", dir) 260 err = ed.assignQuota(dir, mounterArgs.DesiredSize) 261 return err 262 } 263 // This situation should not happen unless user manually delete volume dir. 264 // In this case, delete ready file and print a warning for it. 265 klog.Warningf("volume ready file dir %s exist, but volume dir %s does not. Remove ready dir", readyDir, dir) 266 if err := os.RemoveAll(readyDir); err != nil && !os.IsNotExist(err) { 267 klog.Warningf("failed to remove ready dir [%s]: %v", readyDir, err) 268 } 269 } 270 } 271 272 switch { 273 case ed.medium == v1.StorageMediumDefault: 274 err = ed.setupDir(dir) 275 case ed.medium == v1.StorageMediumMemory: 276 err = ed.setupTmpfs(dir) 277 case v1helper.IsHugePageMedium(ed.medium): 278 err = ed.setupHugepages(dir) 279 default: 280 err = fmt.Errorf("unknown storage medium %q", ed.medium) 281 } 282 283 volume.SetVolumeOwnership(ed, dir, mounterArgs.FsGroup, nil /*fsGroupChangePolicy*/, volumeutil.FSGroupCompleteHook(ed.plugin, nil)) 284 285 // If setting up the quota fails, just log a message but don't actually error out. 286 // We'll use the old du mechanism in this case, at least until we support 287 // enforcement. 288 if err == nil { 289 volumeutil.SetReady(ed.getMetaDir()) 290 err = ed.assignQuota(dir, mounterArgs.DesiredSize) 291 } 292 return err 293 } 294 295 // assignQuota checks if the underlying medium supports quotas and if so, sets 296 func (ed *emptyDir) assignQuota(dir string, mounterSize *resource.Quantity) error { 297 if mounterSize != nil { 298 // Deliberately shadow the outer use of err as noted 299 // above. 300 hasQuotas, err := fsquota.SupportsQuotas(ed.mounter, dir) 301 if err != nil { 302 klog.V(3).Infof("Unable to check for quota support on %s: %s", dir, err.Error()) 303 } else if hasQuotas { 304 klog.V(4).Infof("emptydir trying to assign quota %v on %s", mounterSize, dir) 305 if err := fsquota.AssignQuota(ed.mounter, dir, ed.pod.UID, mounterSize); err != nil { 306 klog.V(3).Infof("Set quota on %s failed %s", dir, err.Error()) 307 return err 308 } 309 return nil 310 } 311 } 312 return nil 313 } 314 315 // setupTmpfs creates a tmpfs mount at the specified directory. 316 func (ed *emptyDir) setupTmpfs(dir string) error { 317 if ed.mounter == nil { 318 return fmt.Errorf("memory storage requested, but mounter is nil") 319 } 320 if err := ed.setupDir(dir); err != nil { 321 return err 322 } 323 // Make SetUp idempotent. 324 medium, isMnt, _, err := ed.mountDetector.GetMountMedium(dir, ed.medium) 325 if err != nil { 326 return err 327 } 328 // If the directory is a mountpoint with medium memory, there is no 329 // work to do since we are already in the desired state. 330 if isMnt && medium == v1.StorageMediumMemory { 331 return nil 332 } 333 334 var options []string 335 // Linux system default is 50% of capacity. 336 if ed.sizeLimit != nil && ed.sizeLimit.Value() > 0 { 337 options = []string{fmt.Sprintf("size=%d", ed.sizeLimit.Value())} 338 } 339 340 klog.V(3).Infof("pod %v: mounting tmpfs for volume %v", ed.pod.UID, ed.volName) 341 return ed.mounter.MountSensitiveWithoutSystemd("tmpfs", dir, "tmpfs", options, nil) 342 } 343 344 // setupHugepages creates a hugepage mount at the specified directory. 345 func (ed *emptyDir) setupHugepages(dir string) error { 346 if ed.mounter == nil { 347 return fmt.Errorf("memory storage requested, but mounter is nil") 348 } 349 if err := ed.setupDir(dir); err != nil { 350 return err 351 } 352 // Make SetUp idempotent. 353 medium, isMnt, mountPageSize, err := ed.mountDetector.GetMountMedium(dir, ed.medium) 354 klog.V(3).Infof("pod %v: setupHugepages: medium: %s, isMnt: %v, dir: %s, err: %v", ed.pod.UID, medium, isMnt, dir, err) 355 if err != nil { 356 return err 357 } 358 // If the directory is a mountpoint with medium hugepages of the same page size, 359 // there is no work to do since we are already in the desired state. 360 if isMnt && v1helper.IsHugePageMedium(medium) { 361 // Medium is: Hugepages 362 if ed.medium == v1.StorageMediumHugePages { 363 return nil 364 } 365 if mountPageSize == nil { 366 return fmt.Errorf("pod %v: mounted dir %s pagesize is not determined", ed.pod.UID, dir) 367 } 368 // Medium is: Hugepages-<size> 369 // Mounted page size and medium size must be equal 370 mediumSize, err := v1helper.HugePageSizeFromMedium(ed.medium) 371 if err != nil { 372 return err 373 } 374 if mountPageSize == nil || mediumSize.Cmp(*mountPageSize) != 0 { 375 return fmt.Errorf("pod %v: mounted dir %s pagesize '%s' and requested medium size '%s' differ", ed.pod.UID, dir, mountPageSize.String(), mediumSize.String()) 376 } 377 return nil 378 } 379 380 pageSizeMountOption, err := getPageSizeMountOption(ed.medium, ed.pod) 381 if err != nil { 382 return err 383 } 384 385 klog.V(3).Infof("pod %v: mounting hugepages for volume %v", ed.pod.UID, ed.volName) 386 return ed.mounter.MountSensitiveWithoutSystemd("nodev", dir, "hugetlbfs", []string{pageSizeMountOption}, nil) 387 } 388 389 // getPageSizeMountOption retrieves pageSize mount option from Pod's resources 390 // and medium and validates pageSize options in all containers of given Pod. 391 func getPageSizeMountOption(medium v1.StorageMedium, pod *v1.Pod) (string, error) { 392 pageSizeFound := false 393 pageSize := resource.Quantity{} 394 395 var mediumPageSize resource.Quantity 396 if medium != v1.StorageMediumHugePages { 397 // medium is: Hugepages-<size> 398 var err error 399 mediumPageSize, err = v1helper.HugePageSizeFromMedium(medium) 400 if err != nil { 401 return "", err 402 } 403 } 404 405 // In some rare cases init containers can also consume Huge pages 406 for _, container := range append(pod.Spec.Containers, pod.Spec.InitContainers...) { 407 // We can take request because limit and requests must match. 408 for requestName := range container.Resources.Requests { 409 if !v1helper.IsHugePageResourceName(requestName) { 410 continue 411 } 412 currentPageSize, err := v1helper.HugePageSizeFromResourceName(requestName) 413 if err != nil { 414 return "", err 415 } 416 if medium == v1.StorageMediumHugePages { // medium is: Hugepages, size is not specified 417 // PageSize for all volumes in a POD must be equal if medium is "Hugepages" 418 if pageSizeFound && pageSize.Cmp(currentPageSize) != 0 { 419 return "", fmt.Errorf("medium: %s can't be used if container requests multiple huge page sizes", medium) 420 } 421 422 pageSizeFound = true 423 pageSize = currentPageSize 424 } else { // medium is: Hugepages-<size> 425 if currentPageSize.Cmp(mediumPageSize) == 0 { 426 pageSizeFound = true 427 pageSize = currentPageSize 428 } 429 } 430 } 431 } 432 433 if !pageSizeFound { 434 return "", fmt.Errorf("medium %s: hugePages storage requested, but there is no resource request for huge pages", medium) 435 } 436 437 return fmt.Sprintf("%s=%s", hugePagesPageSizeMountOption, pageSize.String()), nil 438 439 } 440 441 // setupDir creates the directory with the default permissions specified by the perm constant. 442 func (ed *emptyDir) setupDir(dir string) error { 443 // Create the directory if it doesn't already exist. 444 if err := os.MkdirAll(dir, perm); err != nil { 445 return err 446 } 447 448 // stat the directory to read permission bits 449 fileinfo, err := os.Lstat(dir) 450 if err != nil { 451 return err 452 } 453 454 if fileinfo.Mode().Perm() != perm.Perm() { 455 // If the permissions on the created directory are wrong, the 456 // kubelet is probably running with a umask set. In order to 457 // avoid clearing the umask for the entire process or locking 458 // the thread, clearing the umask, creating the dir, restoring 459 // the umask, and unlocking the thread, we do a chmod to set 460 // the specific bits we need. 461 err := os.Chmod(dir, perm) 462 if err != nil { 463 return err 464 } 465 466 fileinfo, err = os.Lstat(dir) 467 if err != nil { 468 return err 469 } 470 471 if fileinfo.Mode().Perm() != perm.Perm() { 472 klog.Errorf("Expected directory %q permissions to be: %s; got: %s", dir, perm.Perm(), fileinfo.Mode().Perm()) 473 } 474 } 475 476 return nil 477 } 478 479 func (ed *emptyDir) GetPath() string { 480 return getPath(ed.pod.UID, ed.volName, ed.plugin.host) 481 } 482 483 // TearDown simply discards everything in the directory. 484 func (ed *emptyDir) TearDown() error { 485 return ed.TearDownAt(ed.GetPath()) 486 } 487 488 // TearDownAt simply discards everything in the directory. 489 func (ed *emptyDir) TearDownAt(dir string) error { 490 // First remove ready dir which created in SetUp func 491 readyDir := ed.getMetaDir() 492 if removeErr := os.RemoveAll(readyDir); removeErr != nil && !os.IsNotExist(removeErr) { 493 return fmt.Errorf("failed to remove ready dir [%s]: %v", readyDir, removeErr) 494 } 495 496 if pathExists, pathErr := mount.PathExists(dir); pathErr != nil { 497 return fmt.Errorf("error checking if path exists: %w", pathErr) 498 } else if !pathExists { 499 klog.Warningf("Warning: Unmount skipped because path does not exist: %v", dir) 500 return nil 501 } 502 503 // Figure out the medium. 504 medium, isMnt, _, err := ed.mountDetector.GetMountMedium(dir, ed.medium) 505 if err != nil { 506 return err 507 } 508 if isMnt { 509 if medium == v1.StorageMediumMemory { 510 ed.medium = v1.StorageMediumMemory 511 return ed.teardownTmpfsOrHugetlbfs(dir) 512 } else if medium == v1.StorageMediumHugePages { 513 ed.medium = v1.StorageMediumHugePages 514 return ed.teardownTmpfsOrHugetlbfs(dir) 515 } 516 } 517 // assume StorageMediumDefault 518 return ed.teardownDefault(dir) 519 } 520 521 func (ed *emptyDir) teardownDefault(dir string) error { 522 if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolationFSQuotaMonitoring) { 523 // Remove any quota 524 err := fsquota.ClearQuota(ed.mounter, dir) 525 if err != nil { 526 klog.Warningf("Warning: Failed to clear quota on %s: %v", dir, err) 527 } 528 } 529 // Renaming the directory is not required anymore because the operation executor 530 // now handles duplicate operations on the same volume 531 return os.RemoveAll(dir) 532 } 533 534 func (ed *emptyDir) teardownTmpfsOrHugetlbfs(dir string) error { 535 if ed.mounter == nil { 536 return fmt.Errorf("memory storage requested, but mounter is nil") 537 } 538 if err := ed.mounter.Unmount(dir); err != nil { 539 return err 540 } 541 if err := os.RemoveAll(dir); err != nil { 542 return err 543 } 544 return nil 545 } 546 547 func (ed *emptyDir) getMetaDir() string { 548 return filepath.Join(ed.plugin.host.GetPodPluginDir(ed.pod.UID, utilstrings.EscapeQualifiedName(emptyDirPluginName)), ed.volName) 549 } 550 551 func getVolumeSource(spec *volume.Spec) (*v1.EmptyDirVolumeSource, bool) { 552 var readOnly bool 553 var volumeSource *v1.EmptyDirVolumeSource 554 555 if spec.Volume != nil && spec.Volume.EmptyDir != nil { 556 volumeSource = spec.Volume.EmptyDir 557 readOnly = spec.ReadOnly 558 } 559 560 return volumeSource, readOnly 561 }