k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/eviction/eviction_manager_test.go

k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/kubelet/eviction/eviction_manager_test.go (about)

     1  /*
     2  Copyright 2016 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package eviction
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"testing"
    23  	"time"
    24  
    25  	"github.com/google/go-cmp/cmp"
    26  	"github.com/google/go-cmp/cmp/cmpopts"
    27  	gomock "go.uber.org/mock/gomock"
    28  	v1 "k8s.io/api/core/v1"
    29  	"k8s.io/apimachinery/pkg/api/resource"
    30  	"k8s.io/apimachinery/pkg/types"
    31  	utilfeature "k8s.io/apiserver/pkg/util/feature"
    32  	"k8s.io/client-go/tools/record"
    33  	featuregatetesting "k8s.io/component-base/featuregate/testing"
    34  	statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
    35  	kubeapi "k8s.io/kubernetes/pkg/apis/core"
    36  	"k8s.io/kubernetes/pkg/apis/scheduling"
    37  	"k8s.io/kubernetes/pkg/features"
    38  	evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
    39  	"k8s.io/kubernetes/pkg/kubelet/lifecycle"
    40  	kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
    41  	testingclock "k8s.io/utils/clock/testing"
    42  	"k8s.io/utils/ptr"
    43  )
    44  
    45  const (
    46  	lowPriority     = -1
    47  	defaultPriority = 0
    48  	highPriority    = 1
    49  )
    50  
    51  // mockPodKiller is used to testing which pod is killed
    52  type mockPodKiller struct {
    53  	pod                 *v1.Pod
    54  	evict               bool
    55  	statusFn            func(*v1.PodStatus)
    56  	gracePeriodOverride *int64
    57  }
    58  
    59  // killPodNow records the pod that was killed
    60  func (m *mockPodKiller) killPodNow(pod *v1.Pod, evict bool, gracePeriodOverride *int64, statusFn func(*v1.PodStatus)) error {
    61  	m.pod = pod
    62  	m.statusFn = statusFn
    63  	m.evict = evict
    64  	m.gracePeriodOverride = gracePeriodOverride
    65  	return nil
    66  }
    67  
    68  // mockDiskInfoProvider is used to simulate testing.
    69  type mockDiskInfoProvider struct {
    70  	dedicatedImageFs *bool
    71  }
    72  
    73  // HasDedicatedImageFs returns the mocked value
    74  func (m *mockDiskInfoProvider) HasDedicatedImageFs(_ context.Context) (bool, error) {
    75  	return ptr.Deref(m.dedicatedImageFs, false), nil
    76  }
    77  
    78  // mockDiskGC is used to simulate invoking image and container garbage collection.
    79  type mockDiskGC struct {
    80  	err                  error
    81  	imageGCInvoked       bool
    82  	containerGCInvoked   bool
    83  	readAndWriteSeparate bool
    84  	fakeSummaryProvider  *fakeSummaryProvider
    85  	summaryAfterGC       *statsapi.Summary
    86  }
    87  
    88  // DeleteUnusedImages returns the mocked values.
    89  func (m *mockDiskGC) DeleteUnusedImages(_ context.Context) error {
    90  	m.imageGCInvoked = true
    91  	if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
    92  		m.fakeSummaryProvider.result = m.summaryAfterGC
    93  	}
    94  	return m.err
    95  }
    96  
    97  // DeleteAllUnusedContainers returns the mocked value
    98  func (m *mockDiskGC) DeleteAllUnusedContainers(_ context.Context) error {
    99  	m.containerGCInvoked = true
   100  	if m.summaryAfterGC != nil && m.fakeSummaryProvider != nil {
   101  		m.fakeSummaryProvider.result = m.summaryAfterGC
   102  	}
   103  	return m.err
   104  }
   105  
   106  func (m *mockDiskGC) IsContainerFsSeparateFromImageFs(_ context.Context) bool {
   107  	return m.readAndWriteSeparate
   108  }
   109  
   110  func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
   111  	pod := newPod(name, priority, []v1.Container{
   112  		newContainer(name, requests, limits),
   113  	}, nil)
   114  	podStats := newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet))
   115  	return pod, podStats
   116  }
   117  
   118  func makePodWithPIDStats(name string, priority int32, processCount uint64) (*v1.Pod, statsapi.PodStats) {
   119  	pod := newPod(name, priority, []v1.Container{
   120  		newContainer(name, nil, nil),
   121  	}, nil)
   122  	podStats := newPodProcessStats(pod, processCount)
   123  	return pod, podStats
   124  }
   125  
   126  func makePodWithDiskStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string, volumes []v1.Volume) (*v1.Pod, statsapi.PodStats) {
   127  	pod := newPod(name, priority, []v1.Container{
   128  		newContainer(name, requests, limits),
   129  	}, volumes)
   130  	podStats := newPodDiskStats(pod, parseQuantity(rootFsUsed), parseQuantity(logsUsed), parseQuantity(perLocalVolumeUsed))
   131  	return pod, podStats
   132  }
   133  
   134  func makePodWithLocalStorageCapacityIsolationOpen(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, memoryWorkingSet string) (*v1.Pod, statsapi.PodStats) {
   135  	vol := newVolume("local-volume", v1.VolumeSource{
   136  		EmptyDir: &v1.EmptyDirVolumeSource{
   137  			SizeLimit: resource.NewQuantity(requests.Memory().Value(), resource.BinarySI),
   138  		},
   139  	})
   140  	var vols []v1.Volume
   141  	vols = append(vols, vol)
   142  	pod := newPod(name, priority, []v1.Container{
   143  		newContainer(name, requests, limits),
   144  	}, vols)
   145  
   146  	var podStats statsapi.PodStats
   147  	switch name {
   148  	case "empty-dir":
   149  		podStats = newPodMemoryStats(pod, *resource.NewQuantity(requests.Memory().Value()*2, resource.BinarySI))
   150  	case "container-ephemeral-storage-limit":
   151  		podStats = newPodMemoryStats(pod, *resource.NewQuantity(limits.StorageEphemeral().Value(), resource.BinarySI))
   152  	case "pod-ephemeral-storage-limit":
   153  		podStats = newPodMemoryStats(pod, *resource.NewQuantity(limits.StorageEphemeral().Value()*2, resource.BinarySI))
   154  	default:
   155  		podStats = newPodMemoryStats(pod, resource.MustParse(memoryWorkingSet))
   156  	}
   157  	return pod, podStats
   158  }
   159  
   160  func makePIDStats(nodeAvailablePIDs string, numberOfRunningProcesses string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
   161  	val := resource.MustParse(nodeAvailablePIDs)
   162  	availablePIDs := int64(val.Value())
   163  
   164  	parsed := resource.MustParse(numberOfRunningProcesses)
   165  	NumberOfRunningProcesses := int64(parsed.Value())
   166  	result := &statsapi.Summary{
   167  		Node: statsapi.NodeStats{
   168  			Rlimit: &statsapi.RlimitStats{
   169  				MaxPID:                &availablePIDs,
   170  				NumOfRunningProcesses: &NumberOfRunningProcesses,
   171  			},
   172  		},
   173  		Pods: []statsapi.PodStats{},
   174  	}
   175  	for _, podStat := range podStats {
   176  		result.Pods = append(result.Pods, podStat)
   177  	}
   178  	return result
   179  }
   180  
   181  func makeMemoryStats(nodeAvailableBytes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
   182  	val := resource.MustParse(nodeAvailableBytes)
   183  	availableBytes := uint64(val.Value())
   184  	WorkingSetBytes := uint64(val.Value())
   185  	result := &statsapi.Summary{
   186  		Node: statsapi.NodeStats{
   187  			Memory: &statsapi.MemoryStats{
   188  				AvailableBytes:  &availableBytes,
   189  				WorkingSetBytes: &WorkingSetBytes,
   190  			},
   191  			SystemContainers: []statsapi.ContainerStats{
   192  				{
   193  					Name: statsapi.SystemContainerPods,
   194  					Memory: &statsapi.MemoryStats{
   195  						AvailableBytes:  &availableBytes,
   196  						WorkingSetBytes: &WorkingSetBytes,
   197  					},
   198  				},
   199  			},
   200  		},
   201  		Pods: []statsapi.PodStats{},
   202  	}
   203  	for _, podStat := range podStats {
   204  		result.Pods = append(result.Pods, podStat)
   205  	}
   206  	return result
   207  }
   208  
   209  type diskStats struct {
   210  	rootFsAvailableBytes  string
   211  	imageFsAvailableBytes string
   212  	// optional fs
   213  	// if not specified, than will assume imagefs=containerfs
   214  	containerFsAvailableBytes string
   215  	podStats                  map[*v1.Pod]statsapi.PodStats
   216  }
   217  
   218  func makeDiskStats(diskStats diskStats) *statsapi.Summary {
   219  	rootFsVal := resource.MustParse(diskStats.rootFsAvailableBytes)
   220  	rootFsBytes := uint64(rootFsVal.Value())
   221  	rootFsCapacityBytes := uint64(rootFsVal.Value() * 2)
   222  	imageFsVal := resource.MustParse(diskStats.imageFsAvailableBytes)
   223  	imageFsBytes := uint64(imageFsVal.Value())
   224  	imageFsCapacityBytes := uint64(imageFsVal.Value() * 2)
   225  	if diskStats.containerFsAvailableBytes == "" {
   226  		diskStats.containerFsAvailableBytes = diskStats.imageFsAvailableBytes
   227  	}
   228  	containerFsVal := resource.MustParse(diskStats.containerFsAvailableBytes)
   229  	containerFsBytes := uint64(containerFsVal.Value())
   230  	containerFsCapacityBytes := uint64(containerFsVal.Value() * 2)
   231  	result := &statsapi.Summary{
   232  		Node: statsapi.NodeStats{
   233  			Fs: &statsapi.FsStats{
   234  				AvailableBytes: &rootFsBytes,
   235  				CapacityBytes:  &rootFsCapacityBytes,
   236  			},
   237  			Runtime: &statsapi.RuntimeStats{
   238  				ImageFs: &statsapi.FsStats{
   239  					AvailableBytes: &imageFsBytes,
   240  					CapacityBytes:  &imageFsCapacityBytes,
   241  				},
   242  				ContainerFs: &statsapi.FsStats{
   243  					AvailableBytes: &containerFsBytes,
   244  					CapacityBytes:  &containerFsCapacityBytes,
   245  				},
   246  			},
   247  		},
   248  		Pods: []statsapi.PodStats{},
   249  	}
   250  	for _, podStat := range diskStats.podStats {
   251  		result.Pods = append(result.Pods, podStat)
   252  	}
   253  	return result
   254  }
   255  
   256  type podToMake struct {
   257  	name                     string
   258  	priority                 int32
   259  	requests                 v1.ResourceList
   260  	limits                   v1.ResourceList
   261  	memoryWorkingSet         string
   262  	pidUsage                 uint64
   263  	rootFsUsed               string
   264  	logsFsUsed               string
   265  	logsFsInodesUsed         string
   266  	rootFsInodesUsed         string
   267  	perLocalVolumeUsed       string
   268  	perLocalVolumeInodesUsed string
   269  }
   270  
   271  func TestMemoryPressure_VerifyPodStatus(t *testing.T) {
   272  	testCases := map[string]struct {
   273  		wantPodStatus v1.PodStatus
   274  	}{
   275  		"eviction due to memory pressure; no image fs": {
   276  			wantPodStatus: v1.PodStatus{
   277  				Phase:   v1.PodFailed,
   278  				Reason:  "Evicted",
   279  				Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
   280  			},
   281  		},
   282  		"eviction due to memory pressure; image fs": {
   283  			wantPodStatus: v1.PodStatus{
   284  				Phase:   v1.PodFailed,
   285  				Reason:  "Evicted",
   286  				Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
   287  			},
   288  		},
   289  	}
   290  	for name, tc := range testCases {
   291  		for _, enablePodDisruptionConditions := range []bool{false, true} {
   292  			t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
   293  				featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)
   294  
   295  				podMaker := makePodWithMemoryStats
   296  				summaryStatsMaker := makeMemoryStats
   297  				podsToMake := []podToMake{
   298  					{name: "below-requests", requests: newResourceList("", "1Gi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "900Mi"},
   299  					{name: "above-requests", requests: newResourceList("", "100Mi", ""), limits: newResourceList("", "1Gi", ""), memoryWorkingSet: "700Mi"},
   300  				}
   301  				pods := []*v1.Pod{}
   302  				podStats := map[*v1.Pod]statsapi.PodStats{}
   303  				for _, podToMake := range podsToMake {
   304  					pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
   305  					pods = append(pods, pod)
   306  					podStats[pod] = podStat
   307  				}
   308  				activePodsFunc := func() []*v1.Pod {
   309  					return pods
   310  				}
   311  
   312  				fakeClock := testingclock.NewFakeClock(time.Now())
   313  				podKiller := &mockPodKiller{}
   314  				diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
   315  				diskGC := &mockDiskGC{err: nil}
   316  				nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   317  
   318  				config := Config{
   319  					PressureTransitionPeriod: time.Minute * 5,
   320  					Thresholds: []evictionapi.Threshold{
   321  						{
   322  							Signal:   evictionapi.SignalMemoryAvailable,
   323  							Operator: evictionapi.OpLessThan,
   324  							Value: evictionapi.ThresholdValue{
   325  								Quantity: quantityMustParse("2Gi"),
   326  							},
   327  						},
   328  					},
   329  				}
   330  				summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500Mi", podStats)}
   331  				manager := &managerImpl{
   332  					clock:                        fakeClock,
   333  					killPodFunc:                  podKiller.killPodNow,
   334  					imageGC:                      diskGC,
   335  					containerGC:                  diskGC,
   336  					config:                       config,
   337  					recorder:                     &record.FakeRecorder{},
   338  					summaryProvider:              summaryProvider,
   339  					nodeRef:                      nodeRef,
   340  					nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
   341  					thresholdsFirstObservedAt:    thresholdsObservedAt{},
   342  				}
   343  
   344  				// synchronize to detect the memory pressure
   345  				_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
   346  
   347  				if err != nil {
   348  					t.Fatalf("Manager expects no error but got %v", err)
   349  				}
   350  				// verify memory pressure is detected
   351  				if !manager.IsUnderMemoryPressure() {
   352  					t.Fatalf("Manager should have detected memory pressure")
   353  				}
   354  
   355  				// verify a pod is selected for eviction
   356  				if podKiller.pod == nil {
   357  					t.Fatalf("Manager should have selected a pod for eviction")
   358  				}
   359  
   360  				wantPodStatus := tc.wantPodStatus.DeepCopy()
   361  				if enablePodDisruptionConditions {
   362  					wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
   363  						Type:    "DisruptionTarget",
   364  						Status:  "True",
   365  						Reason:  "TerminationByKubelet",
   366  						Message: "The node was low on resource: memory. Threshold quantity: 2Gi, available: 1500Mi. ",
   367  					})
   368  				}
   369  
   370  				// verify the pod status after applying the status update function
   371  				podKiller.statusFn(&podKiller.pod.Status)
   372  				if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
   373  					t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
   374  				}
   375  			})
   376  		}
   377  	}
   378  }
   379  
   380  func TestPIDPressure_VerifyPodStatus(t *testing.T) {
   381  	testCases := map[string]struct {
   382  		wantPodStatus v1.PodStatus
   383  	}{
   384  		"eviction due to pid pressure": {
   385  			wantPodStatus: v1.PodStatus{
   386  				Phase:   v1.PodFailed,
   387  				Reason:  "Evicted",
   388  				Message: "The node was low on resource: pids. Threshold quantity: 1200, available: 500. ",
   389  			},
   390  		},
   391  	}
   392  	for name, tc := range testCases {
   393  		for _, enablePodDisruptionConditions := range []bool{true, false} {
   394  			t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
   395  				featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)
   396  
   397  				podMaker := makePodWithPIDStats
   398  				summaryStatsMaker := makePIDStats
   399  				podsToMake := []podToMake{
   400  					{name: "pod1", priority: lowPriority, pidUsage: 500},
   401  					{name: "pod2", priority: defaultPriority, pidUsage: 500},
   402  				}
   403  				pods := []*v1.Pod{}
   404  				podStats := map[*v1.Pod]statsapi.PodStats{}
   405  				for _, podToMake := range podsToMake {
   406  					pod, podStat := podMaker(podToMake.name, podToMake.priority, 2)
   407  					pods = append(pods, pod)
   408  					podStats[pod] = podStat
   409  				}
   410  				activePodsFunc := func() []*v1.Pod {
   411  					return pods
   412  				}
   413  
   414  				fakeClock := testingclock.NewFakeClock(time.Now())
   415  				podKiller := &mockPodKiller{}
   416  				diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
   417  				diskGC := &mockDiskGC{err: nil}
   418  				nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   419  
   420  				config := Config{
   421  					PressureTransitionPeriod: time.Minute * 5,
   422  					Thresholds: []evictionapi.Threshold{
   423  						{
   424  							Signal:   evictionapi.SignalPIDAvailable,
   425  							Operator: evictionapi.OpLessThan,
   426  							Value: evictionapi.ThresholdValue{
   427  								Quantity: quantityMustParse("1200"),
   428  							},
   429  						},
   430  					},
   431  				}
   432  				summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("1500", "1000", podStats)}
   433  				manager := &managerImpl{
   434  					clock:                        fakeClock,
   435  					killPodFunc:                  podKiller.killPodNow,
   436  					imageGC:                      diskGC,
   437  					containerGC:                  diskGC,
   438  					config:                       config,
   439  					recorder:                     &record.FakeRecorder{},
   440  					summaryProvider:              summaryProvider,
   441  					nodeRef:                      nodeRef,
   442  					nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
   443  					thresholdsFirstObservedAt:    thresholdsObservedAt{},
   444  				}
   445  
   446  				// synchronize to detect the PID pressure
   447  				_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
   448  
   449  				if err != nil {
   450  					t.Fatalf("Manager expects no error but got %v", err)
   451  				}
   452  
   453  				// verify PID pressure is detected
   454  				if !manager.IsUnderPIDPressure() {
   455  					t.Fatalf("Manager should have detected PID pressure")
   456  				}
   457  
   458  				// verify a pod is selected for eviction
   459  				if podKiller.pod == nil {
   460  					t.Fatalf("Manager should have selected a pod for eviction")
   461  				}
   462  
   463  				wantPodStatus := tc.wantPodStatus.DeepCopy()
   464  				if enablePodDisruptionConditions {
   465  					wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
   466  						Type:    "DisruptionTarget",
   467  						Status:  "True",
   468  						Reason:  "TerminationByKubelet",
   469  						Message: "The node was low on resource: pids. Threshold quantity: 1200, available: 500. ",
   470  					})
   471  				}
   472  
   473  				// verify the pod status after applying the status update function
   474  				podKiller.statusFn(&podKiller.pod.Status)
   475  				if diff := cmp.Diff(*wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
   476  					t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
   477  				}
   478  			})
   479  		}
   480  	}
   481  }
   482  
   483  func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
   484  	testCases := map[string]struct {
   485  		nodeFsStats                   string
   486  		imageFsStats                  string
   487  		containerFsStats              string
   488  		evictionMessage               string
   489  		kubeletSeparateDiskFeature    bool
   490  		writeableSeparateFromReadOnly bool
   491  		thresholdToMonitor            evictionapi.Threshold
   492  		podToMakes                    []podToMake
   493  		dedicatedImageFs              *bool
   494  		expectErr                     string
   495  	}{
   496  		"eviction due to disk pressure; no image fs": {
   497  			dedicatedImageFs: ptr.To(false),
   498  			nodeFsStats:      "1.5Gi",
   499  			imageFsStats:     "10Gi",
   500  			containerFsStats: "10Gi",
   501  			thresholdToMonitor: evictionapi.Threshold{
   502  				Signal:   evictionapi.SignalNodeFsAvailable,
   503  				Operator: evictionapi.OpLessThan,
   504  				Value: evictionapi.ThresholdValue{
   505  					Quantity: quantityMustParse("2Gi"),
   506  				},
   507  			},
   508  			evictionMessage: "The node was low on resource: ephemeral-storage. Threshold quantity: 2Gi, available: 1536Mi. Container above-requests was using 700Mi, request is 100Mi, has larger consumption of ephemeral-storage. ",
   509  			podToMakes: []podToMake{
   510  				{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
   511  				{name: "above-requests", requests: newResourceList("", "", "100Mi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "700Mi"},
   512  			},
   513  		},
   514  		"eviction due to image disk pressure; image fs": {
   515  			dedicatedImageFs: ptr.To(true),
   516  			nodeFsStats:      "1Gi",
   517  			imageFsStats:     "10Gi",
   518  			containerFsStats: "10Gi",
   519  			evictionMessage:  "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi. Container above-requests was using 80Gi, request is 50Gi, has larger consumption of ephemeral-storage. ",
   520  			thresholdToMonitor: evictionapi.Threshold{
   521  				Signal:   evictionapi.SignalImageFsAvailable,
   522  				Operator: evictionapi.OpLessThan,
   523  				Value: evictionapi.ThresholdValue{
   524  					Quantity: quantityMustParse("50Gi"),
   525  				},
   526  			},
   527  			podToMakes: []podToMake{
   528  				{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
   529  				{name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
   530  			},
   531  		},
   532  		"eviction due to container disk pressure; feature off; error; container fs": {
   533  			dedicatedImageFs:              ptr.To(true),
   534  			kubeletSeparateDiskFeature:    false,
   535  			writeableSeparateFromReadOnly: true,
   536  			expectErr:                     "KubeletSeparateDiskGC is turned off but we still have a split filesystem",
   537  			nodeFsStats:                   "1Gi",
   538  			imageFsStats:                  "100Gi",
   539  			containerFsStats:              "10Gi",
   540  			evictionMessage:               "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi.Container above-requests was using 80Gi, request is 50Gi, has larger consumption of ephemeral-storage. ",
   541  			thresholdToMonitor: evictionapi.Threshold{
   542  				Signal:   evictionapi.SignalContainerFsAvailable,
   543  				Operator: evictionapi.OpLessThan,
   544  				Value: evictionapi.ThresholdValue{
   545  					Quantity: quantityMustParse("50Gi"),
   546  				},
   547  			},
   548  			podToMakes: []podToMake{
   549  				{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
   550  				{name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
   551  			},
   552  		},
   553  		"eviction due to container disk pressure; container fs": {
   554  			dedicatedImageFs:              ptr.To(true),
   555  			kubeletSeparateDiskFeature:    true,
   556  			writeableSeparateFromReadOnly: true,
   557  			nodeFsStats:                   "10Gi",
   558  			imageFsStats:                  "100Gi",
   559  			containerFsStats:              "10Gi",
   560  			evictionMessage:               "The node was low on resource: ephemeral-storage. Threshold quantity: 50Gi, available: 10Gi. Container above-requests was using 80Gi, request is 50Gi, has larger consumption of ephemeral-storage. ",
   561  			thresholdToMonitor: evictionapi.Threshold{
   562  				Signal:   evictionapi.SignalNodeFsAvailable,
   563  				Operator: evictionapi.OpLessThan,
   564  				Value: evictionapi.ThresholdValue{
   565  					Quantity: quantityMustParse("50Gi"),
   566  				},
   567  			},
   568  			podToMakes: []podToMake{
   569  				{name: "below-requests", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "900Mi"},
   570  				{name: "above-requests", requests: newResourceList("", "", "50Gi"), limits: newResourceList("", "", "50Gi"), rootFsUsed: "80Gi"},
   571  			},
   572  		},
   573  	}
   574  	for name, tc := range testCases {
   575  		for _, enablePodDisruptionConditions := range []bool{false, true} {
   576  			t.Run(fmt.Sprintf("%s;PodDisruptionConditions=%v", name, enablePodDisruptionConditions), func(t *testing.T) {
   577  				featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)
   578  				featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.PodDisruptionConditions, enablePodDisruptionConditions)
   579  
   580  				podMaker := makePodWithDiskStats
   581  				summaryStatsMaker := makeDiskStats
   582  				podsToMake := tc.podToMakes
   583  				wantPodStatus := v1.PodStatus{
   584  					Phase:   v1.PodFailed,
   585  					Reason:  "Evicted",
   586  					Message: tc.evictionMessage,
   587  				}
   588  				pods := []*v1.Pod{}
   589  				podStats := map[*v1.Pod]statsapi.PodStats{}
   590  				for _, podToMake := range podsToMake {
   591  					pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed, nil)
   592  					pods = append(pods, pod)
   593  					podStats[pod] = podStat
   594  				}
   595  				activePodsFunc := func() []*v1.Pod {
   596  					return pods
   597  				}
   598  
   599  				fakeClock := testingclock.NewFakeClock(time.Now())
   600  				podKiller := &mockPodKiller{}
   601  				diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
   602  				diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
   603  				nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   604  
   605  				config := Config{
   606  					PressureTransitionPeriod: time.Minute * 5,
   607  					Thresholds:               []evictionapi.Threshold{tc.thresholdToMonitor},
   608  				}
   609  				diskStat := diskStats{
   610  					rootFsAvailableBytes:      tc.nodeFsStats,
   611  					imageFsAvailableBytes:     tc.imageFsStats,
   612  					containerFsAvailableBytes: tc.containerFsStats,
   613  					podStats:                  podStats,
   614  				}
   615  				summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStat)}
   616  				manager := &managerImpl{
   617  					clock:                        fakeClock,
   618  					killPodFunc:                  podKiller.killPodNow,
   619  					imageGC:                      diskGC,
   620  					containerGC:                  diskGC,
   621  					config:                       config,
   622  					recorder:                     &record.FakeRecorder{},
   623  					summaryProvider:              summaryProvider,
   624  					nodeRef:                      nodeRef,
   625  					nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
   626  					thresholdsFirstObservedAt:    thresholdsObservedAt{},
   627  				}
   628  
   629  				// synchronize
   630  				pods, synchErr := manager.synchronize(diskInfoProvider, activePodsFunc)
   631  
   632  				if synchErr == nil && tc.expectErr != "" {
   633  					t.Fatalf("Manager should report error but did not")
   634  				} else if tc.expectErr != "" && synchErr != nil {
   635  					if diff := cmp.Diff(tc.expectErr, synchErr.Error()); diff != "" {
   636  						t.Errorf("Unexpected error (-want,+got):\n%s", diff)
   637  					}
   638  				} else {
   639  					// verify manager detected disk pressure
   640  					if !manager.IsUnderDiskPressure() {
   641  						t.Fatalf("Manager should report disk pressure")
   642  					}
   643  
   644  					// verify a pod is selected for eviction
   645  					if podKiller.pod == nil {
   646  						t.Fatalf("Manager should have selected a pod for eviction")
   647  					}
   648  
   649  					if enablePodDisruptionConditions {
   650  						wantPodStatus.Conditions = append(wantPodStatus.Conditions, v1.PodCondition{
   651  							Type:    "DisruptionTarget",
   652  							Status:  "True",
   653  							Reason:  "TerminationByKubelet",
   654  							Message: tc.evictionMessage,
   655  						})
   656  					}
   657  
   658  					// verify the pod status after applying the status update function
   659  					podKiller.statusFn(&podKiller.pod.Status)
   660  					if diff := cmp.Diff(wantPodStatus, podKiller.pod.Status, cmpopts.IgnoreFields(v1.PodCondition{}, "LastProbeTime", "LastTransitionTime")); diff != "" {
   661  						t.Errorf("Unexpected pod status of the evicted pod (-want,+got):\n%s", diff)
   662  					}
   663  				}
   664  			})
   665  		}
   666  	}
   667  }
   668  
   669  // TestMemoryPressure
   670  func TestMemoryPressure(t *testing.T) {
   671  	podMaker := makePodWithMemoryStats
   672  	summaryStatsMaker := makeMemoryStats
   673  	podsToMake := []podToMake{
   674  		{name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
   675  		{name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
   676  		{name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
   677  		{name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
   678  		{name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
   679  	}
   680  	pods := []*v1.Pod{}
   681  	podStats := map[*v1.Pod]statsapi.PodStats{}
   682  	for _, podToMake := range podsToMake {
   683  		pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
   684  		pods = append(pods, pod)
   685  		podStats[pod] = podStat
   686  	}
   687  	podToEvict := pods[4]
   688  	activePodsFunc := func() []*v1.Pod {
   689  		return pods
   690  	}
   691  
   692  	fakeClock := testingclock.NewFakeClock(time.Now())
   693  	podKiller := &mockPodKiller{}
   694  	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
   695  	diskGC := &mockDiskGC{err: nil}
   696  	nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   697  
   698  	config := Config{
   699  		MaxPodGracePeriodSeconds: 5,
   700  		PressureTransitionPeriod: time.Minute * 5,
   701  		Thresholds: []evictionapi.Threshold{
   702  			{
   703  				Signal:   evictionapi.SignalMemoryAvailable,
   704  				Operator: evictionapi.OpLessThan,
   705  				Value: evictionapi.ThresholdValue{
   706  					Quantity: quantityMustParse("1Gi"),
   707  				},
   708  			},
   709  			{
   710  				Signal:   evictionapi.SignalMemoryAvailable,
   711  				Operator: evictionapi.OpLessThan,
   712  				Value: evictionapi.ThresholdValue{
   713  					Quantity: quantityMustParse("2Gi"),
   714  				},
   715  				GracePeriod: time.Minute * 2,
   716  			},
   717  		},
   718  	}
   719  	summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
   720  	manager := &managerImpl{
   721  		clock:                        fakeClock,
   722  		killPodFunc:                  podKiller.killPodNow,
   723  		imageGC:                      diskGC,
   724  		containerGC:                  diskGC,
   725  		config:                       config,
   726  		recorder:                     &record.FakeRecorder{},
   727  		summaryProvider:              summaryProvider,
   728  		nodeRef:                      nodeRef,
   729  		nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
   730  		thresholdsFirstObservedAt:    thresholdsObservedAt{},
   731  	}
   732  
   733  	// create a best effort pod to test admission
   734  	bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi")
   735  	burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
   736  
   737  	// synchronize
   738  	_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
   739  
   740  	if err != nil {
   741  		t.Fatalf("Manager expects no error but got %v", err)
   742  	}
   743  
   744  	// we should not have memory pressure
   745  	if manager.IsUnderMemoryPressure() {
   746  		t.Errorf("Manager should not report memory pressure")
   747  	}
   748  
   749  	// try to admit our pods (they should succeed)
   750  	expected := []bool{true, true}
   751  	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
   752  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
   753  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
   754  		}
   755  	}
   756  
   757  	// induce soft threshold
   758  	fakeClock.Step(1 * time.Minute)
   759  	summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
   760  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
   761  
   762  	if err != nil {
   763  		t.Fatalf("Manager expects no error but got %v", err)
   764  	}
   765  
   766  	// we should have memory pressure
   767  	if !manager.IsUnderMemoryPressure() {
   768  		t.Errorf("Manager should report memory pressure since soft threshold was met")
   769  	}
   770  
   771  	// verify no pod was yet killed because there has not yet been enough time passed.
   772  	if podKiller.pod != nil {
   773  		t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
   774  	}
   775  
   776  	// step forward in time pass the grace period
   777  	fakeClock.Step(3 * time.Minute)
   778  	summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
   779  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
   780  
   781  	if err != nil {
   782  		t.Fatalf("Manager expects no error but got %v", err)
   783  	}
   784  
   785  	// we should have memory pressure
   786  	if !manager.IsUnderMemoryPressure() {
   787  		t.Errorf("Manager should report memory pressure since soft threshold was met")
   788  	}
   789  
   790  	// verify the right pod was killed with the right grace period.
   791  	if podKiller.pod != podToEvict {
   792  		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
   793  	}
   794  	if podKiller.gracePeriodOverride == nil {
   795  		t.Errorf("Manager chose to kill pod but should have had a grace period override.")
   796  	}
   797  	observedGracePeriod := *podKiller.gracePeriodOverride
   798  	if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
   799  		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
   800  	}
   801  	// reset state
   802  	podKiller.pod = nil
   803  	podKiller.gracePeriodOverride = nil
   804  
   805  	// remove memory pressure
   806  	fakeClock.Step(20 * time.Minute)
   807  	summaryProvider.result = summaryStatsMaker("3Gi", podStats)
   808  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
   809  
   810  	if err != nil {
   811  		t.Fatalf("Manager expects no error but got %v", err)
   812  	}
   813  
   814  	// we should not have memory pressure
   815  	if manager.IsUnderMemoryPressure() {
   816  		t.Errorf("Manager should not report memory pressure")
   817  	}
   818  
   819  	// induce memory pressure!
   820  	fakeClock.Step(1 * time.Minute)
   821  	summaryProvider.result = summaryStatsMaker("500Mi", podStats)
   822  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
   823  
   824  	if err != nil {
   825  		t.Fatalf("Manager expects no error but got %v", err)
   826  	}
   827  
   828  	// we should have memory pressure
   829  	if !manager.IsUnderMemoryPressure() {
   830  		t.Errorf("Manager should report memory pressure")
   831  	}
   832  
   833  	// check the right pod was killed
   834  	if podKiller.pod != podToEvict {
   835  		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
   836  	}
   837  	observedGracePeriod = *podKiller.gracePeriodOverride
   838  	if observedGracePeriod != int64(1) {
   839  		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 1, observedGracePeriod)
   840  	}
   841  
   842  	// the best-effort pod should not admit, burstable should
   843  	expected = []bool{false, true}
   844  	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
   845  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
   846  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
   847  		}
   848  	}
   849  
   850  	// reduce memory pressure
   851  	fakeClock.Step(1 * time.Minute)
   852  	summaryProvider.result = summaryStatsMaker("2Gi", podStats)
   853  	podKiller.pod = nil // reset state
   854  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
   855  
   856  	if err != nil {
   857  		t.Fatalf("Manager expects no error but got %v", err)
   858  	}
   859  
   860  	// we should have memory pressure (because transition period not yet met)
   861  	if !manager.IsUnderMemoryPressure() {
   862  		t.Errorf("Manager should report memory pressure")
   863  	}
   864  
   865  	// no pod should have been killed
   866  	if podKiller.pod != nil {
   867  		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
   868  	}
   869  
   870  	// the best-effort pod should not admit, burstable should
   871  	expected = []bool{false, true}
   872  	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
   873  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
   874  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
   875  		}
   876  	}
   877  
   878  	// move the clock past transition period to ensure that we stop reporting pressure
   879  	fakeClock.Step(5 * time.Minute)
   880  	summaryProvider.result = summaryStatsMaker("2Gi", podStats)
   881  	podKiller.pod = nil // reset state
   882  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
   883  
   884  	if err != nil {
   885  		t.Fatalf("Manager expects no error but got %v", err)
   886  	}
   887  
   888  	// we should not have memory pressure (because transition period met)
   889  	if manager.IsUnderMemoryPressure() {
   890  		t.Errorf("Manager should not report memory pressure")
   891  	}
   892  
   893  	// no pod should have been killed
   894  	if podKiller.pod != nil {
   895  		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
   896  	}
   897  
   898  	// all pods should admit now
   899  	expected = []bool{true, true}
   900  	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
   901  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
   902  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
   903  		}
   904  	}
   905  }
   906  
   907  func makeContainersByQOS(class v1.PodQOSClass) []v1.Container {
   908  	resource := newResourceList("100m", "1Gi", "")
   909  	switch class {
   910  	case v1.PodQOSGuaranteed:
   911  		return []v1.Container{newContainer("guaranteed-container", resource, resource)}
   912  	case v1.PodQOSBurstable:
   913  		return []v1.Container{newContainer("burtable-container", resource, nil)}
   914  	case v1.PodQOSBestEffort:
   915  		fallthrough
   916  	default:
   917  		return []v1.Container{newContainer("best-effort-container", nil, nil)}
   918  	}
   919  }
   920  
   921  func TestPIDPressure(t *testing.T) {
   922  	testCases := []struct {
   923  		name                               string
   924  		podsToMake                         []podToMake
   925  		evictPodIndex                      int
   926  		noPressurePIDUsage                 string
   927  		pressurePIDUsageWithGracePeriod    string
   928  		pressurePIDUsageWithoutGracePeriod string
   929  		totalPID                           string
   930  	}{
   931  		{
   932  			name: "eviction due to pid pressure",
   933  			podsToMake: []podToMake{
   934  				{name: "high-priority-high-usage", priority: highPriority, pidUsage: 900},
   935  				{name: "default-priority-low-usage", priority: defaultPriority, pidUsage: 100},
   936  				{name: "default-priority-medium-usage", priority: defaultPriority, pidUsage: 400},
   937  				{name: "low-priority-high-usage", priority: lowPriority, pidUsage: 600},
   938  				{name: "low-priority-low-usage", priority: lowPriority, pidUsage: 50},
   939  			},
   940  			evictPodIndex:                      3, // we expect the low-priority-high-usage pod to be evicted
   941  			noPressurePIDUsage:                 "300",
   942  			pressurePIDUsageWithGracePeriod:    "700",
   943  			pressurePIDUsageWithoutGracePeriod: "1200",
   944  			totalPID:                           "2000",
   945  		},
   946  	}
   947  
   948  	for _, tc := range testCases {
   949  		t.Run(tc.name, func(t *testing.T) {
   950  			podMaker := makePodWithPIDStats
   951  			summaryStatsMaker := makePIDStats
   952  			pods := []*v1.Pod{}
   953  			podStats := map[*v1.Pod]statsapi.PodStats{}
   954  			for _, podToMake := range tc.podsToMake {
   955  				pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.pidUsage)
   956  				pods = append(pods, pod)
   957  				podStats[pod] = podStat
   958  			}
   959  			podToEvict := pods[tc.evictPodIndex]
   960  			activePodsFunc := func() []*v1.Pod { return pods }
   961  
   962  			fakeClock := testingclock.NewFakeClock(time.Now())
   963  			podKiller := &mockPodKiller{}
   964  			diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
   965  			diskGC := &mockDiskGC{err: nil}
   966  			nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
   967  
   968  			config := Config{
   969  				MaxPodGracePeriodSeconds: 5,
   970  				PressureTransitionPeriod: time.Minute * 5,
   971  				Thresholds: []evictionapi.Threshold{
   972  					{
   973  						Signal:   evictionapi.SignalPIDAvailable,
   974  						Operator: evictionapi.OpLessThan,
   975  						Value: evictionapi.ThresholdValue{
   976  							Quantity: quantityMustParse("1200"),
   977  						},
   978  					},
   979  					{
   980  						Signal:   evictionapi.SignalPIDAvailable,
   981  						Operator: evictionapi.OpLessThan,
   982  						Value: evictionapi.ThresholdValue{
   983  							Quantity: quantityMustParse("1500"),
   984  						},
   985  						GracePeriod: time.Minute * 2,
   986  					},
   987  				},
   988  			}
   989  
   990  			summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)}
   991  			manager := &managerImpl{
   992  				clock:                        fakeClock,
   993  				killPodFunc:                  podKiller.killPodNow,
   994  				imageGC:                      diskGC,
   995  				containerGC:                  diskGC,
   996  				config:                       config,
   997  				recorder:                     &record.FakeRecorder{},
   998  				summaryProvider:              summaryProvider,
   999  				nodeRef:                      nodeRef,
  1000  				nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  1001  				thresholdsFirstObservedAt:    thresholdsObservedAt{},
  1002  			}
  1003  
  1004  			// create a pod to test admission
  1005  			podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, 50)
  1006  
  1007  			// synchronize
  1008  			_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  1009  
  1010  			if err != nil {
  1011  				t.Fatalf("Manager expects no error but got %v", err)
  1012  			}
  1013  
  1014  			// we should not have PID pressure
  1015  			if manager.IsUnderPIDPressure() {
  1016  				t.Fatalf("Manager should not report PID pressure")
  1017  			}
  1018  
  1019  			// try to admit our pod (should succeed)
  1020  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  1021  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  1022  			}
  1023  
  1024  			// induce soft threshold for PID pressure
  1025  			fakeClock.Step(1 * time.Minute)
  1026  			summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.pressurePIDUsageWithGracePeriod, podStats)
  1027  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1028  
  1029  			if err != nil {
  1030  				t.Fatalf("Manager expects no error but got %v", err)
  1031  			}
  1032  
  1033  			// now, we should have PID pressure
  1034  			if !manager.IsUnderPIDPressure() {
  1035  				t.Errorf("Manager should report PID pressure since soft threshold was met")
  1036  			}
  1037  
  1038  			// verify no pod was yet killed because there has not yet been enough time passed
  1039  			if podKiller.pod != nil {
  1040  				t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
  1041  			}
  1042  
  1043  			// step forward in time past the grace period
  1044  			fakeClock.Step(3 * time.Minute)
  1045  			// no change in PID stats to simulate continued pressure
  1046  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1047  
  1048  			if err != nil {
  1049  				t.Fatalf("Manager expects no error but got %v", err)
  1050  			}
  1051  
  1052  			// verify PID pressure is still reported
  1053  			if !manager.IsUnderPIDPressure() {
  1054  				t.Errorf("Manager should still report PID pressure")
  1055  			}
  1056  
  1057  			// verify the right pod was killed with the right grace period.
  1058  			if podKiller.pod != podToEvict {
  1059  				t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  1060  			}
  1061  			if podKiller.gracePeriodOverride == nil {
  1062  				t.Errorf("Manager chose to kill pod but should have had a grace period override.")
  1063  			}
  1064  			observedGracePeriod := *podKiller.gracePeriodOverride
  1065  			if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
  1066  				t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
  1067  			}
  1068  
  1069  			// reset state
  1070  			podKiller.pod = nil
  1071  			podKiller.gracePeriodOverride = nil
  1072  
  1073  			// remove PID pressure by simulating increased PID availability
  1074  			fakeClock.Step(20 * time.Minute)
  1075  			summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats) // Simulate increased PID availability
  1076  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1077  
  1078  			if err != nil {
  1079  				t.Fatalf("Manager expects no error but got %v", err)
  1080  			}
  1081  
  1082  			// verify PID pressure is resolved
  1083  			if manager.IsUnderPIDPressure() {
  1084  				t.Errorf("Manager should not report PID pressure")
  1085  			}
  1086  
  1087  			// re-induce PID pressure
  1088  			fakeClock.Step(1 * time.Minute)
  1089  			summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.pressurePIDUsageWithoutGracePeriod, podStats)
  1090  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1091  
  1092  			if err != nil {
  1093  				t.Fatalf("Manager expects no error but got %v", err)
  1094  			}
  1095  
  1096  			// verify PID pressure is reported again
  1097  			if !manager.IsUnderPIDPressure() {
  1098  				t.Errorf("Manager should report PID pressure")
  1099  			}
  1100  
  1101  			// verify the right pod was killed with the right grace period.
  1102  			if podKiller.pod != podToEvict {
  1103  				t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  1104  			}
  1105  			if podKiller.gracePeriodOverride == nil {
  1106  				t.Errorf("Manager chose to kill pod but should have had a grace period override.")
  1107  			}
  1108  			observedGracePeriod = *podKiller.gracePeriodOverride
  1109  			if observedGracePeriod != int64(1) {
  1110  				t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 1, observedGracePeriod)
  1111  			}
  1112  
  1113  			// try to admit our pod (should fail)
  1114  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  1115  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  1116  			}
  1117  
  1118  			// reduce PID pressure
  1119  			fakeClock.Step(1 * time.Minute)
  1120  			summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)
  1121  			podKiller.pod = nil // reset state
  1122  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1123  
  1124  			if err != nil {
  1125  				t.Fatalf("Manager expects no error but got %v", err)
  1126  			}
  1127  
  1128  			// we should have PID pressure (because transition period not yet met)
  1129  			if !manager.IsUnderPIDPressure() {
  1130  				t.Errorf("Manager should report PID pressure")
  1131  			}
  1132  
  1133  			// no pod should have been killed
  1134  			if podKiller.pod != nil {
  1135  				t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1136  			}
  1137  
  1138  			// try to admit our pod (should fail)
  1139  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  1140  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  1141  			}
  1142  
  1143  			// move the clock past the transition period
  1144  			fakeClock.Step(5 * time.Minute)
  1145  			summaryProvider.result = summaryStatsMaker(tc.totalPID, tc.noPressurePIDUsage, podStats)
  1146  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1147  
  1148  			if err != nil {
  1149  				t.Fatalf("Manager expects no error but got %v", err)
  1150  			}
  1151  
  1152  			// we should not have PID pressure (because transition period met)
  1153  			if manager.IsUnderPIDPressure() {
  1154  				t.Errorf("Manager should not report PID pressure")
  1155  			}
  1156  
  1157  			// no pod should have been killed
  1158  			if podKiller.pod != nil {
  1159  				t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1160  			}
  1161  
  1162  			// try to admit our pod (should succeed)
  1163  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  1164  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  1165  			}
  1166  		})
  1167  	}
  1168  }
  1169  
  1170  func TestAdmitUnderNodeConditions(t *testing.T) {
  1171  	manager := &managerImpl{}
  1172  	pods := []*v1.Pod{
  1173  		newPod("guaranteed-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSGuaranteed), nil),
  1174  		newPod("burstable-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSBurstable), nil),
  1175  		newPod("best-effort-pod", scheduling.DefaultPriorityWhenNoDefaultClassExists, makeContainersByQOS(v1.PodQOSBestEffort), nil),
  1176  	}
  1177  
  1178  	expected := []bool{true, true, true}
  1179  	for i, pod := range pods {
  1180  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  1181  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  1182  		}
  1183  	}
  1184  
  1185  	manager.nodeConditions = []v1.NodeConditionType{v1.NodeMemoryPressure}
  1186  	expected = []bool{true, true, false}
  1187  	for i, pod := range pods {
  1188  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  1189  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  1190  		}
  1191  	}
  1192  
  1193  	manager.nodeConditions = []v1.NodeConditionType{v1.NodeMemoryPressure, v1.NodeDiskPressure}
  1194  	expected = []bool{false, false, false}
  1195  	for i, pod := range pods {
  1196  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  1197  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  1198  		}
  1199  	}
  1200  }
  1201  
  1202  // parseQuantity parses the specified value (if provided) otherwise returns 0 value
  1203  func parseQuantity(value string) resource.Quantity {
  1204  	if len(value) == 0 {
  1205  		return resource.MustParse("0")
  1206  	}
  1207  	return resource.MustParse(value)
  1208  }
  1209  
  1210  func TestDiskPressureNodeFs(t *testing.T) {
  1211  
  1212  	testCases := map[string]struct {
  1213  		nodeFsStats                   string
  1214  		imageFsStats                  string
  1215  		containerFsStats              string
  1216  		kubeletSeparateDiskFeature    bool
  1217  		writeableSeparateFromReadOnly bool
  1218  		thresholdToMonitor            []evictionapi.Threshold
  1219  		podToMakes                    []podToMake
  1220  		dedicatedImageFs              *bool
  1221  		expectErr                     string
  1222  		inducePressureOnWhichFs       string
  1223  		softDiskPressure              string
  1224  		hardDiskPressure              string
  1225  	}{
  1226  		"eviction due to disk pressure; no image fs": {
  1227  			dedicatedImageFs:        ptr.To(false),
  1228  			nodeFsStats:             "16Gi",
  1229  			imageFsStats:            "16Gi",
  1230  			containerFsStats:        "16Gi",
  1231  			inducePressureOnWhichFs: "nodefs",
  1232  			softDiskPressure:        "1.5Gi",
  1233  			hardDiskPressure:        "750Mi",
  1234  			thresholdToMonitor: []evictionapi.Threshold{
  1235  				{
  1236  					Signal:   evictionapi.SignalNodeFsAvailable,
  1237  					Operator: evictionapi.OpLessThan,
  1238  					Value: evictionapi.ThresholdValue{
  1239  						Quantity: quantityMustParse("1Gi"),
  1240  					},
  1241  				},
  1242  				{
  1243  					Signal:   evictionapi.SignalNodeFsAvailable,
  1244  					Operator: evictionapi.OpLessThan,
  1245  					Value: evictionapi.ThresholdValue{
  1246  						Quantity: quantityMustParse("2Gi"),
  1247  					},
  1248  					GracePeriod: time.Minute * 2,
  1249  				},
  1250  			},
  1251  			podToMakes: []podToMake{
  1252  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  1253  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  1254  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  1255  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  1256  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  1257  			},
  1258  		},
  1259  		"eviction due to image disk pressure; image fs": {
  1260  			dedicatedImageFs:        ptr.To(true),
  1261  			nodeFsStats:             "16Gi",
  1262  			imageFsStats:            "16Gi",
  1263  			containerFsStats:        "16Gi",
  1264  			softDiskPressure:        "1.5Gi",
  1265  			hardDiskPressure:        "750Mi",
  1266  			inducePressureOnWhichFs: "imagefs",
  1267  			thresholdToMonitor: []evictionapi.Threshold{
  1268  				{
  1269  					Signal:   evictionapi.SignalImageFsAvailable,
  1270  					Operator: evictionapi.OpLessThan,
  1271  					Value: evictionapi.ThresholdValue{
  1272  						Quantity: quantityMustParse("1Gi"),
  1273  					},
  1274  				},
  1275  				{
  1276  					Signal:   evictionapi.SignalImageFsAvailable,
  1277  					Operator: evictionapi.OpLessThan,
  1278  					Value: evictionapi.ThresholdValue{
  1279  						Quantity: quantityMustParse("2Gi"),
  1280  					},
  1281  					GracePeriod: time.Minute * 2,
  1282  				},
  1283  			},
  1284  			podToMakes: []podToMake{
  1285  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  1286  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  1287  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  1288  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  1289  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  1290  			},
  1291  		},
  1292  		"eviction due to container disk pressure; container fs": {
  1293  			dedicatedImageFs:              ptr.To(true),
  1294  			kubeletSeparateDiskFeature:    true,
  1295  			writeableSeparateFromReadOnly: true,
  1296  			nodeFsStats:                   "16Gi",
  1297  			imageFsStats:                  "16Gi",
  1298  			containerFsStats:              "16Gi",
  1299  			softDiskPressure:              "1.5Gi",
  1300  			hardDiskPressure:              "750Mi",
  1301  			inducePressureOnWhichFs:       "containerfs",
  1302  			thresholdToMonitor: []evictionapi.Threshold{
  1303  				{
  1304  					Signal:   evictionapi.SignalNodeFsAvailable,
  1305  					Operator: evictionapi.OpLessThan,
  1306  					Value: evictionapi.ThresholdValue{
  1307  						Quantity: quantityMustParse("1Gi"),
  1308  					},
  1309  				},
  1310  				{
  1311  					Signal:   evictionapi.SignalNodeFsAvailable,
  1312  					Operator: evictionapi.OpLessThan,
  1313  					Value: evictionapi.ThresholdValue{
  1314  						Quantity: quantityMustParse("2Gi"),
  1315  					},
  1316  					GracePeriod: time.Minute * 2,
  1317  				},
  1318  			},
  1319  			podToMakes: []podToMake{
  1320  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  1321  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  1322  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  1323  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  1324  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  1325  			},
  1326  		},
  1327  	}
  1328  
  1329  	for name, tc := range testCases {
  1330  		t.Run(name, func(t *testing.T) {
  1331  			featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)
  1332  
  1333  			podMaker := makePodWithDiskStats
  1334  			summaryStatsMaker := makeDiskStats
  1335  			podsToMake := tc.podToMakes
  1336  			pods := []*v1.Pod{}
  1337  			podStats := map[*v1.Pod]statsapi.PodStats{}
  1338  			for _, podToMake := range podsToMake {
  1339  				pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed, nil)
  1340  				pods = append(pods, pod)
  1341  				podStats[pod] = podStat
  1342  			}
  1343  			podToEvict := pods[0]
  1344  			activePodsFunc := func() []*v1.Pod {
  1345  				return pods
  1346  			}
  1347  
  1348  			fakeClock := testingclock.NewFakeClock(time.Now())
  1349  			podKiller := &mockPodKiller{}
  1350  			diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
  1351  			diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
  1352  			nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  1353  
  1354  			config := Config{
  1355  				MaxPodGracePeriodSeconds: 5,
  1356  				PressureTransitionPeriod: time.Minute * 5,
  1357  				Thresholds:               tc.thresholdToMonitor,
  1358  			}
  1359  
  1360  			diskStatStart := diskStats{
  1361  				rootFsAvailableBytes:      tc.nodeFsStats,
  1362  				imageFsAvailableBytes:     tc.imageFsStats,
  1363  				containerFsAvailableBytes: tc.containerFsStats,
  1364  				podStats:                  podStats,
  1365  			}
  1366  			diskStatConst := diskStatStart
  1367  			summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)}
  1368  			manager := &managerImpl{
  1369  				clock:                        fakeClock,
  1370  				killPodFunc:                  podKiller.killPodNow,
  1371  				imageGC:                      diskGC,
  1372  				containerGC:                  diskGC,
  1373  				config:                       config,
  1374  				recorder:                     &record.FakeRecorder{},
  1375  				summaryProvider:              summaryProvider,
  1376  				nodeRef:                      nodeRef,
  1377  				nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  1378  				thresholdsFirstObservedAt:    thresholdsObservedAt{},
  1379  			}
  1380  
  1381  			// create a best effort pod to test admission
  1382  			podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi", nil)
  1383  
  1384  			// synchronize
  1385  			_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  1386  
  1387  			if err != nil {
  1388  				t.Fatalf("Manager expects no error but got %v", err)
  1389  			}
  1390  
  1391  			// we should not have disk pressure
  1392  			if manager.IsUnderDiskPressure() {
  1393  				t.Fatalf("Manager should not report disk pressure")
  1394  			}
  1395  
  1396  			// try to admit our pod (should succeed)
  1397  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  1398  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  1399  			}
  1400  
  1401  			// induce soft threshold
  1402  			fakeClock.Step(1 * time.Minute)
  1403  
  1404  			if tc.inducePressureOnWhichFs == "nodefs" {
  1405  				diskStatStart.rootFsAvailableBytes = tc.softDiskPressure
  1406  			} else if tc.inducePressureOnWhichFs == "imagefs" {
  1407  				diskStatStart.imageFsAvailableBytes = tc.softDiskPressure
  1408  			} else if tc.inducePressureOnWhichFs == "containerfs" {
  1409  				diskStatStart.containerFsAvailableBytes = tc.softDiskPressure
  1410  			}
  1411  			summaryProvider.result = summaryStatsMaker(diskStatStart)
  1412  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1413  
  1414  			if err != nil {
  1415  				t.Fatalf("Manager expects no error but got %v", err)
  1416  			}
  1417  
  1418  			// we should have disk pressure
  1419  			if !manager.IsUnderDiskPressure() {
  1420  				t.Fatalf("Manager should report disk pressure since soft threshold was met")
  1421  			}
  1422  
  1423  			// verify no pod was yet killed because there has not yet been enough time passed.
  1424  			if podKiller.pod != nil {
  1425  				t.Fatalf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
  1426  			}
  1427  
  1428  			// step forward in time pass the grace period
  1429  			fakeClock.Step(3 * time.Minute)
  1430  			summaryProvider.result = summaryStatsMaker(diskStatStart)
  1431  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1432  
  1433  			if err != nil {
  1434  				t.Fatalf("Manager expects no error but got %v", err)
  1435  			}
  1436  
  1437  			// we should have disk pressure
  1438  			if !manager.IsUnderDiskPressure() {
  1439  				t.Fatalf("Manager should report disk pressure since soft threshold was met")
  1440  			}
  1441  
  1442  			// verify the right pod was killed with the right grace period.
  1443  			if podKiller.pod != podToEvict {
  1444  				t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  1445  			}
  1446  			if podKiller.gracePeriodOverride == nil {
  1447  				t.Fatalf("Manager chose to kill pod but should have had a grace period override.")
  1448  			}
  1449  			observedGracePeriod := *podKiller.gracePeriodOverride
  1450  			if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
  1451  				t.Fatalf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
  1452  			}
  1453  			// reset state
  1454  			podKiller.pod = nil
  1455  			podKiller.gracePeriodOverride = nil
  1456  
  1457  			// remove disk pressure
  1458  			fakeClock.Step(20 * time.Minute)
  1459  			summaryProvider.result = summaryStatsMaker(diskStatConst)
  1460  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1461  
  1462  			if err != nil {
  1463  				t.Fatalf("Manager expects no error but got %v", err)
  1464  			}
  1465  
  1466  			// we should not have disk pressure
  1467  			if manager.IsUnderDiskPressure() {
  1468  				t.Fatalf("Manager should not report disk pressure")
  1469  			}
  1470  
  1471  			// induce disk pressure!
  1472  			fakeClock.Step(1 * time.Minute)
  1473  			if tc.inducePressureOnWhichFs == "nodefs" {
  1474  				diskStatStart.rootFsAvailableBytes = tc.hardDiskPressure
  1475  			} else if tc.inducePressureOnWhichFs == "imagefs" {
  1476  				diskStatStart.imageFsAvailableBytes = tc.hardDiskPressure
  1477  			} else if tc.inducePressureOnWhichFs == "containerfs" {
  1478  				diskStatStart.containerFsAvailableBytes = tc.hardDiskPressure
  1479  			}
  1480  			summaryProvider.result = summaryStatsMaker(diskStatStart)
  1481  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1482  
  1483  			if err != nil {
  1484  				t.Fatalf("Manager expects no error but got %v", err)
  1485  			}
  1486  
  1487  			// we should have disk pressure
  1488  			if !manager.IsUnderDiskPressure() {
  1489  				t.Fatalf("Manager should report disk pressure")
  1490  			}
  1491  
  1492  			// check the right pod was killed
  1493  			if podKiller.pod != podToEvict {
  1494  				t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  1495  			}
  1496  			observedGracePeriod = *podKiller.gracePeriodOverride
  1497  			if observedGracePeriod != int64(1) {
  1498  				t.Fatalf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 1, observedGracePeriod)
  1499  			}
  1500  
  1501  			// try to admit our pod (should fail)
  1502  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  1503  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  1504  			}
  1505  
  1506  			// reduce disk pressure
  1507  			fakeClock.Step(1 * time.Minute)
  1508  
  1509  			summaryProvider.result = summaryStatsMaker(diskStatConst)
  1510  			podKiller.pod = nil // reset state
  1511  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1512  
  1513  			if err != nil {
  1514  				t.Fatalf("Manager should not have an error %v", err)
  1515  			}
  1516  			// we should have disk pressure (because transition period not yet met)
  1517  			if !manager.IsUnderDiskPressure() {
  1518  				t.Fatalf("Manager should report disk pressure")
  1519  			}
  1520  
  1521  			// no pod should have been killed
  1522  			if podKiller.pod != nil {
  1523  				t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1524  			}
  1525  
  1526  			// try to admit our pod (should fail)
  1527  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  1528  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  1529  			}
  1530  
  1531  			// move the clock past transition period to ensure that we stop reporting pressure
  1532  			fakeClock.Step(5 * time.Minute)
  1533  			summaryProvider.result = summaryStatsMaker(diskStatConst)
  1534  			podKiller.pod = nil // reset state
  1535  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1536  
  1537  			if err != nil {
  1538  				t.Fatalf("Manager should not have an error %v", err)
  1539  			}
  1540  
  1541  			// we should not have disk pressure (because transition period met)
  1542  			if manager.IsUnderDiskPressure() {
  1543  				t.Fatalf("Manager should not report disk pressure")
  1544  			}
  1545  
  1546  			// no pod should have been killed
  1547  			if podKiller.pod != nil {
  1548  				t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1549  			}
  1550  
  1551  			// try to admit our pod (should succeed)
  1552  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  1553  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  1554  			}
  1555  		})
  1556  	}
  1557  }
  1558  
  1559  // TestMinReclaim verifies that min-reclaim works as desired.
  1560  func TestMinReclaim(t *testing.T) {
  1561  	podMaker := makePodWithMemoryStats
  1562  	summaryStatsMaker := makeMemoryStats
  1563  	podsToMake := []podToMake{
  1564  		{name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
  1565  		{name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
  1566  		{name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
  1567  		{name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
  1568  		{name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
  1569  	}
  1570  	pods := []*v1.Pod{}
  1571  	podStats := map[*v1.Pod]statsapi.PodStats{}
  1572  	for _, podToMake := range podsToMake {
  1573  		pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
  1574  		pods = append(pods, pod)
  1575  		podStats[pod] = podStat
  1576  	}
  1577  	podToEvict := pods[4]
  1578  	activePodsFunc := func() []*v1.Pod {
  1579  		return pods
  1580  	}
  1581  
  1582  	fakeClock := testingclock.NewFakeClock(time.Now())
  1583  	podKiller := &mockPodKiller{}
  1584  	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
  1585  	diskGC := &mockDiskGC{err: nil}
  1586  	nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  1587  
  1588  	config := Config{
  1589  		MaxPodGracePeriodSeconds: 5,
  1590  		PressureTransitionPeriod: time.Minute * 5,
  1591  		Thresholds: []evictionapi.Threshold{
  1592  			{
  1593  				Signal:   evictionapi.SignalMemoryAvailable,
  1594  				Operator: evictionapi.OpLessThan,
  1595  				Value: evictionapi.ThresholdValue{
  1596  					Quantity: quantityMustParse("1Gi"),
  1597  				},
  1598  				MinReclaim: &evictionapi.ThresholdValue{
  1599  					Quantity: quantityMustParse("500Mi"),
  1600  				},
  1601  			},
  1602  		},
  1603  	}
  1604  	summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
  1605  	manager := &managerImpl{
  1606  		clock:                        fakeClock,
  1607  		killPodFunc:                  podKiller.killPodNow,
  1608  		imageGC:                      diskGC,
  1609  		containerGC:                  diskGC,
  1610  		config:                       config,
  1611  		recorder:                     &record.FakeRecorder{},
  1612  		summaryProvider:              summaryProvider,
  1613  		nodeRef:                      nodeRef,
  1614  		nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  1615  		thresholdsFirstObservedAt:    thresholdsObservedAt{},
  1616  	}
  1617  
  1618  	// synchronize
  1619  	_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  1620  	if err != nil {
  1621  		t.Errorf("Manager should not report any errors")
  1622  	}
  1623  	// we should not have memory pressure
  1624  	if manager.IsUnderMemoryPressure() {
  1625  		t.Errorf("Manager should not report memory pressure")
  1626  	}
  1627  
  1628  	// induce memory pressure!
  1629  	fakeClock.Step(1 * time.Minute)
  1630  	summaryProvider.result = summaryStatsMaker("500Mi", podStats)
  1631  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1632  
  1633  	if err != nil {
  1634  		t.Fatalf("Manager should not have an error %v", err)
  1635  	}
  1636  
  1637  	// we should have memory pressure
  1638  	if !manager.IsUnderMemoryPressure() {
  1639  		t.Errorf("Manager should report memory pressure")
  1640  	}
  1641  
  1642  	// check the right pod was killed
  1643  	if podKiller.pod != podToEvict {
  1644  		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  1645  	}
  1646  	observedGracePeriod := *podKiller.gracePeriodOverride
  1647  	if observedGracePeriod != int64(1) {
  1648  		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 1, observedGracePeriod)
  1649  	}
  1650  
  1651  	// reduce memory pressure, but not below the min-reclaim amount
  1652  	fakeClock.Step(1 * time.Minute)
  1653  	summaryProvider.result = summaryStatsMaker("1.2Gi", podStats)
  1654  	podKiller.pod = nil // reset state
  1655  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1656  
  1657  	if err != nil {
  1658  		t.Fatalf("Manager should not have an error %v", err)
  1659  	}
  1660  
  1661  	// we should have memory pressure (because transition period not yet met)
  1662  	if !manager.IsUnderMemoryPressure() {
  1663  		t.Errorf("Manager should report memory pressure")
  1664  	}
  1665  
  1666  	// check the right pod was killed
  1667  	if podKiller.pod != podToEvict {
  1668  		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  1669  	}
  1670  	observedGracePeriod = *podKiller.gracePeriodOverride
  1671  	if observedGracePeriod != int64(1) {
  1672  		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 1, observedGracePeriod)
  1673  	}
  1674  
  1675  	// reduce memory pressure and ensure the min-reclaim amount
  1676  	fakeClock.Step(1 * time.Minute)
  1677  	summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  1678  	podKiller.pod = nil // reset state
  1679  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1680  
  1681  	if err != nil {
  1682  		t.Fatalf("Manager should not have an error %v", err)
  1683  	}
  1684  
  1685  	// we should have memory pressure (because transition period not yet met)
  1686  	if !manager.IsUnderMemoryPressure() {
  1687  		t.Errorf("Manager should report memory pressure")
  1688  	}
  1689  
  1690  	// no pod should have been killed
  1691  	if podKiller.pod != nil {
  1692  		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1693  	}
  1694  
  1695  	// move the clock past transition period to ensure that we stop reporting pressure
  1696  	fakeClock.Step(5 * time.Minute)
  1697  	summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  1698  	podKiller.pod = nil // reset state
  1699  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1700  
  1701  	if err != nil {
  1702  		t.Fatalf("Manager should not have an error %v", err)
  1703  	}
  1704  
  1705  	// we should not have memory pressure (because transition period met)
  1706  	if manager.IsUnderMemoryPressure() {
  1707  		t.Errorf("Manager should not report memory pressure")
  1708  	}
  1709  
  1710  	// no pod should have been killed
  1711  	if podKiller.pod != nil {
  1712  		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  1713  	}
  1714  }
  1715  
  1716  func TestNodeReclaimFuncs(t *testing.T) {
  1717  	testCases := map[string]struct {
  1718  		nodeFsStats                   string
  1719  		imageFsStats                  string
  1720  		containerFsStats              string
  1721  		kubeletSeparateDiskFeature    bool
  1722  		writeableSeparateFromReadOnly bool
  1723  		expectContainerGcCall         bool
  1724  		expectImageGcCall             bool
  1725  		thresholdToMonitor            evictionapi.Threshold
  1726  		podToMakes                    []podToMake
  1727  		dedicatedImageFs              *bool
  1728  		expectErr                     string
  1729  		inducePressureOnWhichFs       string
  1730  		softDiskPressure              string
  1731  		hardDiskPressure              string
  1732  	}{
  1733  		"eviction due to disk pressure; no image fs": {
  1734  			dedicatedImageFs:        ptr.To(false),
  1735  			nodeFsStats:             "16Gi",
  1736  			imageFsStats:            "16Gi",
  1737  			containerFsStats:        "16Gi",
  1738  			inducePressureOnWhichFs: "nodefs",
  1739  			softDiskPressure:        "1.5Gi",
  1740  			hardDiskPressure:        "750Mi",
  1741  			expectContainerGcCall:   true,
  1742  			expectImageGcCall:       true,
  1743  			thresholdToMonitor: evictionapi.Threshold{
  1744  				Signal:   evictionapi.SignalNodeFsAvailable,
  1745  				Operator: evictionapi.OpLessThan,
  1746  				Value: evictionapi.ThresholdValue{
  1747  					Quantity: quantityMustParse("1Gi"),
  1748  				},
  1749  				MinReclaim: &evictionapi.ThresholdValue{
  1750  					Quantity: quantityMustParse("500Mi"),
  1751  				},
  1752  			},
  1753  			podToMakes: []podToMake{
  1754  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  1755  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  1756  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  1757  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  1758  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  1759  			},
  1760  		},
  1761  		"eviction due to image disk pressure; image fs": {
  1762  			dedicatedImageFs:        ptr.To(true),
  1763  			nodeFsStats:             "16Gi",
  1764  			imageFsStats:            "16Gi",
  1765  			containerFsStats:        "16Gi",
  1766  			softDiskPressure:        "1.5Gi",
  1767  			hardDiskPressure:        "750Mi",
  1768  			inducePressureOnWhichFs: "imagefs",
  1769  			expectContainerGcCall:   true,
  1770  			expectImageGcCall:       true,
  1771  			thresholdToMonitor: evictionapi.Threshold{
  1772  				Signal:   evictionapi.SignalImageFsAvailable,
  1773  				Operator: evictionapi.OpLessThan,
  1774  				Value: evictionapi.ThresholdValue{
  1775  					Quantity: quantityMustParse("1Gi"),
  1776  				},
  1777  				MinReclaim: &evictionapi.ThresholdValue{
  1778  					Quantity: quantityMustParse("500Mi"),
  1779  				},
  1780  			},
  1781  			podToMakes: []podToMake{
  1782  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  1783  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  1784  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  1785  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  1786  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  1787  			},
  1788  		},
  1789  		"eviction due to container disk pressure; container fs": {
  1790  			dedicatedImageFs:              ptr.To(true),
  1791  			kubeletSeparateDiskFeature:    true,
  1792  			writeableSeparateFromReadOnly: true,
  1793  			nodeFsStats:                   "16Gi",
  1794  			imageFsStats:                  "16Gi",
  1795  			containerFsStats:              "16Gi",
  1796  			softDiskPressure:              "1.5Gi",
  1797  			hardDiskPressure:              "750Mi",
  1798  			inducePressureOnWhichFs:       "nodefs",
  1799  			expectContainerGcCall:         true,
  1800  			expectImageGcCall:             false,
  1801  			thresholdToMonitor: evictionapi.Threshold{
  1802  				Signal:   evictionapi.SignalNodeFsAvailable,
  1803  				Operator: evictionapi.OpLessThan,
  1804  				Value: evictionapi.ThresholdValue{
  1805  					Quantity: quantityMustParse("1Gi"),
  1806  				},
  1807  				MinReclaim: &evictionapi.ThresholdValue{
  1808  					Quantity: quantityMustParse("500Mi"),
  1809  				},
  1810  			},
  1811  			podToMakes: []podToMake{
  1812  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  1813  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  1814  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  1815  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  1816  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  1817  			},
  1818  		},
  1819  		"eviction due to image disk pressure; container fs": {
  1820  			dedicatedImageFs:              ptr.To(true),
  1821  			kubeletSeparateDiskFeature:    true,
  1822  			writeableSeparateFromReadOnly: true,
  1823  			nodeFsStats:                   "16Gi",
  1824  			imageFsStats:                  "16Gi",
  1825  			containerFsStats:              "16Gi",
  1826  			softDiskPressure:              "1.5Gi",
  1827  			hardDiskPressure:              "750Mi",
  1828  			inducePressureOnWhichFs:       "imagefs",
  1829  			expectContainerGcCall:         false,
  1830  			expectImageGcCall:             true,
  1831  			thresholdToMonitor: evictionapi.Threshold{
  1832  				Signal:   evictionapi.SignalImageFsAvailable,
  1833  				Operator: evictionapi.OpLessThan,
  1834  				Value: evictionapi.ThresholdValue{
  1835  					Quantity: quantityMustParse("1Gi"),
  1836  				},
  1837  				MinReclaim: &evictionapi.ThresholdValue{
  1838  					Quantity: quantityMustParse("500Mi"),
  1839  				},
  1840  			},
  1841  			podToMakes: []podToMake{
  1842  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsUsed: "900Mi"},
  1843  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), logsFsUsed: "50Mi"},
  1844  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsUsed: "400Mi"},
  1845  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "400Mi"},
  1846  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsUsed: "100Mi"},
  1847  			},
  1848  		},
  1849  	}
  1850  
  1851  	for name, tc := range testCases {
  1852  		t.Run(name, func(t *testing.T) {
  1853  			featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)
  1854  
  1855  			podMaker := makePodWithDiskStats
  1856  			summaryStatsMaker := makeDiskStats
  1857  			podsToMake := tc.podToMakes
  1858  			pods := []*v1.Pod{}
  1859  			podStats := map[*v1.Pod]statsapi.PodStats{}
  1860  			for _, podToMake := range podsToMake {
  1861  				pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed, nil)
  1862  				pods = append(pods, pod)
  1863  				podStats[pod] = podStat
  1864  			}
  1865  			podToEvict := pods[0]
  1866  			activePodsFunc := func() []*v1.Pod {
  1867  				return pods
  1868  			}
  1869  
  1870  			fakeClock := testingclock.NewFakeClock(time.Now())
  1871  			podKiller := &mockPodKiller{}
  1872  			diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
  1873  			nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  1874  
  1875  			config := Config{
  1876  				MaxPodGracePeriodSeconds: 5,
  1877  				PressureTransitionPeriod: time.Minute * 5,
  1878  				Thresholds:               []evictionapi.Threshold{tc.thresholdToMonitor},
  1879  			}
  1880  			diskStatStart := diskStats{
  1881  				rootFsAvailableBytes:      tc.nodeFsStats,
  1882  				imageFsAvailableBytes:     tc.imageFsStats,
  1883  				containerFsAvailableBytes: tc.containerFsStats,
  1884  				podStats:                  podStats,
  1885  			}
  1886  			// This is a constant that we use to test that disk pressure is over. Don't change!
  1887  			diskStatConst := diskStatStart
  1888  			summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStatStart)}
  1889  			diskGC := &mockDiskGC{fakeSummaryProvider: summaryProvider, err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
  1890  			manager := &managerImpl{
  1891  				clock:                        fakeClock,
  1892  				killPodFunc:                  podKiller.killPodNow,
  1893  				imageGC:                      diskGC,
  1894  				containerGC:                  diskGC,
  1895  				config:                       config,
  1896  				recorder:                     &record.FakeRecorder{},
  1897  				summaryProvider:              summaryProvider,
  1898  				nodeRef:                      nodeRef,
  1899  				nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  1900  				thresholdsFirstObservedAt:    thresholdsObservedAt{},
  1901  			}
  1902  
  1903  			// synchronize
  1904  			_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  1905  
  1906  			if err != nil {
  1907  				t.Fatalf("Manager should not have an error %v", err)
  1908  			}
  1909  
  1910  			// we should not have disk pressure
  1911  			if manager.IsUnderDiskPressure() {
  1912  				t.Errorf("Manager should not report disk pressure")
  1913  			}
  1914  
  1915  			// induce hard threshold
  1916  			fakeClock.Step(1 * time.Minute)
  1917  
  1918  			setDiskStatsBasedOnFs := func(whichFs string, diskPressure string, diskStat diskStats) diskStats {
  1919  				if tc.inducePressureOnWhichFs == "nodefs" {
  1920  					diskStat.rootFsAvailableBytes = diskPressure
  1921  				} else if tc.inducePressureOnWhichFs == "imagefs" {
  1922  					diskStat.imageFsAvailableBytes = diskPressure
  1923  				} else if tc.inducePressureOnWhichFs == "containerfs" {
  1924  					diskStat.containerFsAvailableBytes = diskPressure
  1925  				}
  1926  				return diskStat
  1927  			}
  1928  			newDiskAfterHardEviction := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
  1929  			summaryProvider.result = summaryStatsMaker(newDiskAfterHardEviction)
  1930  			// make GC successfully return disk usage to previous levels
  1931  			diskGC.summaryAfterGC = summaryStatsMaker(diskStatConst)
  1932  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1933  
  1934  			if err != nil {
  1935  				t.Fatalf("Manager should not have an error %v", err)
  1936  			}
  1937  
  1938  			// we should have disk pressure
  1939  			if !manager.IsUnderDiskPressure() {
  1940  				t.Fatalf("Manager should report disk pressure since soft threshold was met")
  1941  			}
  1942  
  1943  			// verify image, container or both gc were called.
  1944  			// split filesystem can have container gc called without image.
  1945  			// same filesystem should have both.
  1946  			if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
  1947  				t.Fatalf("Manager should have invoked image gc")
  1948  			}
  1949  
  1950  			// verify no pod was killed because image gc was sufficient
  1951  			if podKiller.pod != nil {
  1952  				t.Fatalf("Manager should not have killed a pod, but killed: %v", podKiller.pod.Name)
  1953  			}
  1954  
  1955  			// reset state
  1956  			diskGC.imageGCInvoked = false
  1957  			diskGC.containerGCInvoked = false
  1958  
  1959  			// remove disk pressure
  1960  			fakeClock.Step(20 * time.Minute)
  1961  			summaryProvider.result = summaryStatsMaker(diskStatConst)
  1962  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1963  
  1964  			if err != nil {
  1965  				t.Fatalf("Manager should not have an error %v", err)
  1966  			}
  1967  
  1968  			// we should not have disk pressure
  1969  			if manager.IsUnderDiskPressure() {
  1970  				t.Fatalf("Manager should not report disk pressure")
  1971  			}
  1972  
  1973  			// synchronize
  1974  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1975  
  1976  			if err != nil {
  1977  				t.Fatalf("Manager should not have an error %v", err)
  1978  			}
  1979  
  1980  			// we should not have disk pressure
  1981  			if manager.IsUnderDiskPressure() {
  1982  				t.Fatalf("Manager should not report disk pressure")
  1983  			}
  1984  
  1985  			// induce hard threshold
  1986  			fakeClock.Step(1 * time.Minute)
  1987  			newDiskAfterHardEviction = setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
  1988  			summaryProvider.result = summaryStatsMaker(newDiskAfterHardEviction)
  1989  			// make GC return disk usage bellow the threshold, but not satisfying minReclaim
  1990  			gcBelowThreshold := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, "1.1G", newDiskAfterHardEviction)
  1991  			diskGC.summaryAfterGC = summaryStatsMaker(gcBelowThreshold)
  1992  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  1993  
  1994  			if err != nil {
  1995  				t.Fatalf("Manager should not have an error %v", err)
  1996  			}
  1997  
  1998  			// we should have disk pressure
  1999  			if !manager.IsUnderDiskPressure() {
  2000  				t.Fatalf("Manager should report disk pressure since soft threshold was met")
  2001  			}
  2002  
  2003  			// verify image, container or both gc were called.
  2004  			// split filesystem can have container gc called without image.
  2005  			// same filesystem should have both.
  2006  			if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
  2007  				t.Fatalf("Manager should have invoked image gc")
  2008  			}
  2009  
  2010  			// verify a pod was killed because image gc was not enough to satisfy minReclaim
  2011  			if podKiller.pod == nil {
  2012  				t.Fatalf("Manager should have killed a pod, but didn't")
  2013  			}
  2014  
  2015  			// reset state
  2016  			diskGC.imageGCInvoked = false
  2017  			diskGC.containerGCInvoked = false
  2018  			podKiller.pod = nil
  2019  
  2020  			// remove disk pressure
  2021  			fakeClock.Step(20 * time.Minute)
  2022  			summaryProvider.result = summaryStatsMaker(diskStatConst)
  2023  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2024  
  2025  			if err != nil {
  2026  				t.Fatalf("Manager should not have an error %v", err)
  2027  			}
  2028  
  2029  			// we should not have disk pressure
  2030  			if manager.IsUnderDiskPressure() {
  2031  				t.Fatalf("Manager should not report disk pressure")
  2032  			}
  2033  
  2034  			// induce disk pressure!
  2035  			fakeClock.Step(1 * time.Minute)
  2036  			softDiskPressure := setDiskStatsBasedOnFs(tc.inducePressureOnWhichFs, tc.hardDiskPressure, diskStatStart)
  2037  			summaryProvider.result = summaryStatsMaker(softDiskPressure)
  2038  			// Don't reclaim any disk
  2039  			diskGC.summaryAfterGC = summaryStatsMaker(softDiskPressure)
  2040  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2041  
  2042  			if err != nil {
  2043  				t.Fatalf("Manager should not have an error %v", err)
  2044  			}
  2045  
  2046  			// we should have disk pressure
  2047  			if !manager.IsUnderDiskPressure() {
  2048  				t.Fatalf("Manager should report disk pressure")
  2049  			}
  2050  
  2051  			// verify image, container or both gc were called.
  2052  			// split filesystem can have container gc called without image.
  2053  			// same filesystem should have both.
  2054  			if diskGC.imageGCInvoked != tc.expectImageGcCall && diskGC.containerGCInvoked != tc.expectContainerGcCall {
  2055  				t.Fatalf("Manager should have invoked image gc")
  2056  			}
  2057  
  2058  			// check the right pod was killed
  2059  			if podKiller.pod != podToEvict {
  2060  				t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  2061  			}
  2062  			observedGracePeriod := *podKiller.gracePeriodOverride
  2063  			if observedGracePeriod != int64(1) {
  2064  				t.Fatalf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 1, observedGracePeriod)
  2065  			}
  2066  
  2067  			// reduce disk pressure
  2068  			fakeClock.Step(1 * time.Minute)
  2069  			summaryProvider.result = summaryStatsMaker(diskStatConst)
  2070  			diskGC.imageGCInvoked = false     // reset state
  2071  			diskGC.containerGCInvoked = false // reset state
  2072  			podKiller.pod = nil               // reset state
  2073  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2074  
  2075  			if err != nil {
  2076  				t.Fatalf("Manager should not have an error %v", err)
  2077  			}
  2078  
  2079  			// we should have disk pressure (because transition period not yet met)
  2080  			if !manager.IsUnderDiskPressure() {
  2081  				t.Fatalf("Manager should report disk pressure")
  2082  			}
  2083  
  2084  			if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
  2085  				t.Errorf("Manager chose to perform image gc when it was not needed")
  2086  			}
  2087  
  2088  			// no pod should have been killed
  2089  			if podKiller.pod != nil {
  2090  				t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  2091  			}
  2092  
  2093  			// move the clock past transition period to ensure that we stop reporting pressure
  2094  			fakeClock.Step(5 * time.Minute)
  2095  			summaryProvider.result = summaryStatsMaker(diskStatConst)
  2096  			diskGC.imageGCInvoked = false     // reset state
  2097  			diskGC.containerGCInvoked = false // reset state
  2098  			podKiller.pod = nil               // reset state
  2099  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2100  
  2101  			if err != nil {
  2102  				t.Fatalf("Manager should not have an error %v", err)
  2103  			}
  2104  
  2105  			// we should not have disk pressure (because transition period met)
  2106  			if manager.IsUnderDiskPressure() {
  2107  				t.Fatalf("Manager should not report disk pressure")
  2108  			}
  2109  
  2110  			if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
  2111  				t.Errorf("Manager chose to perform image gc when it was not needed")
  2112  			}
  2113  
  2114  			// no pod should have been killed
  2115  			if podKiller.pod != nil {
  2116  				t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  2117  			}
  2118  		})
  2119  	}
  2120  }
  2121  
  2122  func TestInodePressureFsInodes(t *testing.T) {
  2123  	podMaker := func(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootInodes, logInodes, volumeInodes string) (*v1.Pod, statsapi.PodStats) {
  2124  		pod := newPod(name, priority, []v1.Container{
  2125  			newContainer(name, requests, limits),
  2126  		}, nil)
  2127  		podStats := newPodInodeStats(pod, parseQuantity(rootInodes), parseQuantity(logInodes), parseQuantity(volumeInodes))
  2128  		return pod, podStats
  2129  	}
  2130  	summaryStatsMaker := func(rootFsInodesFree, rootFsInodes, imageFsInodesFree, imageFsInodes, containerFsInodesFree, containerFsInodes string, podStats map[*v1.Pod]statsapi.PodStats) *statsapi.Summary {
  2131  		rootFsInodesFreeVal := resource.MustParse(rootFsInodesFree)
  2132  		internalRootFsInodesFree := uint64(rootFsInodesFreeVal.Value())
  2133  		rootFsInodesVal := resource.MustParse(rootFsInodes)
  2134  		internalRootFsInodes := uint64(rootFsInodesVal.Value())
  2135  
  2136  		imageFsInodesFreeVal := resource.MustParse(imageFsInodesFree)
  2137  		internalImageFsInodesFree := uint64(imageFsInodesFreeVal.Value())
  2138  		imageFsInodesVal := resource.MustParse(imageFsInodes)
  2139  		internalImageFsInodes := uint64(imageFsInodesVal.Value())
  2140  
  2141  		containerFsInodesFreeVal := resource.MustParse(containerFsInodesFree)
  2142  		internalContainerFsInodesFree := uint64(containerFsInodesFreeVal.Value())
  2143  		containerFsInodesVal := resource.MustParse(containerFsInodes)
  2144  		internalContainerFsInodes := uint64(containerFsInodesVal.Value())
  2145  
  2146  		result := &statsapi.Summary{
  2147  			Node: statsapi.NodeStats{
  2148  				Fs: &statsapi.FsStats{
  2149  					InodesFree: &internalRootFsInodesFree,
  2150  					Inodes:     &internalRootFsInodes,
  2151  				},
  2152  				Runtime: &statsapi.RuntimeStats{
  2153  					ImageFs: &statsapi.FsStats{
  2154  						InodesFree: &internalImageFsInodesFree,
  2155  						Inodes:     &internalImageFsInodes,
  2156  					},
  2157  					ContainerFs: &statsapi.FsStats{
  2158  						InodesFree: &internalContainerFsInodesFree,
  2159  						Inodes:     &internalContainerFsInodes,
  2160  					},
  2161  				},
  2162  			},
  2163  			Pods: []statsapi.PodStats{},
  2164  		}
  2165  		for _, podStat := range podStats {
  2166  			result.Pods = append(result.Pods, podStat)
  2167  		}
  2168  		return result
  2169  	}
  2170  
  2171  	setINodesFreeBasedOnFs := func(whichFs string, inodesFree string, diskStat *statsapi.Summary) *statsapi.Summary {
  2172  		inodesFreeVal := resource.MustParse(inodesFree)
  2173  		internalFsInodesFree := uint64(inodesFreeVal.Value())
  2174  
  2175  		if whichFs == "nodefs" {
  2176  			diskStat.Node.Fs.InodesFree = &internalFsInodesFree
  2177  		} else if whichFs == "imagefs" {
  2178  			diskStat.Node.Runtime.ImageFs.InodesFree = &internalFsInodesFree
  2179  		} else if whichFs == "containerfs" {
  2180  			diskStat.Node.Runtime.ContainerFs.InodesFree = &internalFsInodesFree
  2181  		}
  2182  		return diskStat
  2183  	}
  2184  
  2185  	testCases := map[string]struct {
  2186  		nodeFsInodesFree              string
  2187  		nodeFsInodes                  string
  2188  		imageFsInodesFree             string
  2189  		imageFsInodes                 string
  2190  		containerFsInodesFree         string
  2191  		containerFsInodes             string
  2192  		kubeletSeparateDiskFeature    bool
  2193  		writeableSeparateFromReadOnly bool
  2194  		thresholdToMonitor            []evictionapi.Threshold
  2195  		podToMakes                    []podToMake
  2196  		dedicatedImageFs              *bool
  2197  		expectErr                     string
  2198  		inducePressureOnWhichFs       string
  2199  		softINodePressure             string
  2200  		hardINodePressure             string
  2201  	}{
  2202  		"eviction due to disk pressure; no image fs": {
  2203  			dedicatedImageFs:        ptr.To(false),
  2204  			nodeFsInodesFree:        "3Mi",
  2205  			nodeFsInodes:            "4Mi",
  2206  			imageFsInodesFree:       "3Mi",
  2207  			imageFsInodes:           "4Mi",
  2208  			containerFsInodesFree:   "3Mi",
  2209  			containerFsInodes:       "4Mi",
  2210  			inducePressureOnWhichFs: "nodefs",
  2211  			softINodePressure:       "1.5Mi",
  2212  			hardINodePressure:       "0.5Mi",
  2213  			thresholdToMonitor: []evictionapi.Threshold{
  2214  				{
  2215  					Signal:   evictionapi.SignalNodeFsInodesFree,
  2216  					Operator: evictionapi.OpLessThan,
  2217  					Value: evictionapi.ThresholdValue{
  2218  						Quantity: quantityMustParse("1Mi"),
  2219  					},
  2220  				},
  2221  				{
  2222  					Signal:   evictionapi.SignalNodeFsInodesFree,
  2223  					Operator: evictionapi.OpLessThan,
  2224  					Value: evictionapi.ThresholdValue{
  2225  						Quantity: quantityMustParse("2Mi"),
  2226  					},
  2227  					GracePeriod: time.Minute * 2,
  2228  				},
  2229  			},
  2230  			podToMakes: []podToMake{
  2231  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
  2232  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
  2233  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
  2234  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
  2235  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
  2236  			},
  2237  		},
  2238  		"eviction due to image disk pressure; image fs": {
  2239  			dedicatedImageFs:        ptr.To(true),
  2240  			nodeFsInodesFree:        "3Mi",
  2241  			nodeFsInodes:            "4Mi",
  2242  			imageFsInodesFree:       "3Mi",
  2243  			imageFsInodes:           "4Mi",
  2244  			containerFsInodesFree:   "3Mi",
  2245  			containerFsInodes:       "4Mi",
  2246  			softINodePressure:       "1.5Mi",
  2247  			hardINodePressure:       "0.5Mi",
  2248  			inducePressureOnWhichFs: "imagefs",
  2249  			thresholdToMonitor: []evictionapi.Threshold{
  2250  				{
  2251  					Signal:   evictionapi.SignalImageFsInodesFree,
  2252  					Operator: evictionapi.OpLessThan,
  2253  					Value: evictionapi.ThresholdValue{
  2254  						Quantity: quantityMustParse("1Mi"),
  2255  					},
  2256  				},
  2257  				{
  2258  					Signal:   evictionapi.SignalImageFsInodesFree,
  2259  					Operator: evictionapi.OpLessThan,
  2260  					Value: evictionapi.ThresholdValue{
  2261  						Quantity: quantityMustParse("2Mi"),
  2262  					},
  2263  					GracePeriod: time.Minute * 2,
  2264  				},
  2265  			},
  2266  			podToMakes: []podToMake{
  2267  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
  2268  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
  2269  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
  2270  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
  2271  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
  2272  			},
  2273  		},
  2274  		"eviction due to container disk pressure; container fs": {
  2275  			dedicatedImageFs:              ptr.To(true),
  2276  			kubeletSeparateDiskFeature:    true,
  2277  			writeableSeparateFromReadOnly: true,
  2278  			nodeFsInodesFree:              "3Mi",
  2279  			nodeFsInodes:                  "4Mi",
  2280  			imageFsInodesFree:             "3Mi",
  2281  			imageFsInodes:                 "4Mi",
  2282  			containerFsInodesFree:         "3Mi",
  2283  			containerFsInodes:             "4Mi",
  2284  			softINodePressure:             "1.5Mi",
  2285  			hardINodePressure:             "0.5Mi",
  2286  			inducePressureOnWhichFs:       "nodefs",
  2287  			thresholdToMonitor: []evictionapi.Threshold{
  2288  				{
  2289  					Signal:   evictionapi.SignalNodeFsInodesFree,
  2290  					Operator: evictionapi.OpLessThan,
  2291  					Value: evictionapi.ThresholdValue{
  2292  						Quantity: quantityMustParse("1Mi"),
  2293  					},
  2294  				},
  2295  				{
  2296  					Signal:   evictionapi.SignalNodeFsInodesFree,
  2297  					Operator: evictionapi.OpLessThan,
  2298  					Value: evictionapi.ThresholdValue{
  2299  						Quantity: quantityMustParse("2Mi"),
  2300  					},
  2301  					GracePeriod: time.Minute * 2,
  2302  				},
  2303  			},
  2304  			podToMakes: []podToMake{
  2305  				{name: "low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), rootFsInodesUsed: "900Mi"},
  2306  				{name: "below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "50Mi"},
  2307  				{name: "above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), rootFsInodesUsed: "400Mi"},
  2308  				{name: "high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "400Mi"},
  2309  				{name: "low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), rootFsInodesUsed: "100Mi"},
  2310  			},
  2311  		},
  2312  	}
  2313  
  2314  	for name, tc := range testCases {
  2315  		t.Run(name, func(t *testing.T) {
  2316  			featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.KubeletSeparateDiskGC, tc.kubeletSeparateDiskFeature)
  2317  
  2318  			podMaker := podMaker
  2319  			summaryStatsMaker := summaryStatsMaker
  2320  			podsToMake := tc.podToMakes
  2321  			pods := []*v1.Pod{}
  2322  			podStats := map[*v1.Pod]statsapi.PodStats{}
  2323  			for _, podToMake := range podsToMake {
  2324  				pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsInodesUsed, podToMake.logsFsInodesUsed, podToMake.perLocalVolumeInodesUsed)
  2325  				pods = append(pods, pod)
  2326  				podStats[pod] = podStat
  2327  			}
  2328  			podToEvict := pods[0]
  2329  			activePodsFunc := func() []*v1.Pod {
  2330  				return pods
  2331  			}
  2332  
  2333  			fakeClock := testingclock.NewFakeClock(time.Now())
  2334  			podKiller := &mockPodKiller{}
  2335  			diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: tc.dedicatedImageFs}
  2336  			diskGC := &mockDiskGC{err: nil, readAndWriteSeparate: tc.writeableSeparateFromReadOnly}
  2337  			nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  2338  
  2339  			config := Config{
  2340  				MaxPodGracePeriodSeconds: 5,
  2341  				PressureTransitionPeriod: time.Minute * 5,
  2342  				Thresholds:               tc.thresholdToMonitor,
  2343  			}
  2344  			startingStatsConst := summaryStatsMaker(tc.nodeFsInodesFree, tc.nodeFsInodes, tc.imageFsInodesFree, tc.imageFsInodes, tc.containerFsInodesFree, tc.containerFsInodes, podStats)
  2345  			startingStatsModified := summaryStatsMaker(tc.nodeFsInodesFree, tc.nodeFsInodes, tc.imageFsInodesFree, tc.imageFsInodes, tc.containerFsInodesFree, tc.containerFsInodes, podStats)
  2346  			summaryProvider := &fakeSummaryProvider{result: startingStatsModified}
  2347  			manager := &managerImpl{
  2348  				clock:                        fakeClock,
  2349  				killPodFunc:                  podKiller.killPodNow,
  2350  				imageGC:                      diskGC,
  2351  				containerGC:                  diskGC,
  2352  				config:                       config,
  2353  				recorder:                     &record.FakeRecorder{},
  2354  				summaryProvider:              summaryProvider,
  2355  				nodeRef:                      nodeRef,
  2356  				nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  2357  				thresholdsFirstObservedAt:    thresholdsObservedAt{},
  2358  			}
  2359  
  2360  			// create a best effort pod to test admission
  2361  			podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0", "0", "0")
  2362  
  2363  			// synchronize
  2364  			_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  2365  
  2366  			if err != nil {
  2367  				t.Fatalf("Manager should not have an error %v", err)
  2368  			}
  2369  
  2370  			// we should not have disk pressure
  2371  			if manager.IsUnderDiskPressure() {
  2372  				t.Fatalf("Manager should not report inode pressure")
  2373  			}
  2374  
  2375  			// try to admit our pod (should succeed)
  2376  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  2377  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  2378  			}
  2379  
  2380  			// induce soft threshold
  2381  			fakeClock.Step(1 * time.Minute)
  2382  			summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.softINodePressure, startingStatsModified)
  2383  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2384  
  2385  			if err != nil {
  2386  				t.Fatalf("Manager should not have an error %v", err)
  2387  			}
  2388  
  2389  			// we should have disk pressure
  2390  			if !manager.IsUnderDiskPressure() {
  2391  				t.Fatalf("Manager should report inode pressure since soft threshold was met")
  2392  			}
  2393  
  2394  			// verify no pod was yet killed because there has not yet been enough time passed.
  2395  			if podKiller.pod != nil {
  2396  				t.Fatalf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
  2397  			}
  2398  
  2399  			// step forward in time pass the grace period
  2400  			fakeClock.Step(3 * time.Minute)
  2401  			summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.softINodePressure, startingStatsModified)
  2402  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2403  
  2404  			if err != nil {
  2405  				t.Fatalf("Manager should not have an error %v", err)
  2406  			}
  2407  
  2408  			// we should have disk pressure
  2409  			if !manager.IsUnderDiskPressure() {
  2410  				t.Fatalf("Manager should report inode pressure since soft threshold was met")
  2411  			}
  2412  
  2413  			// verify the right pod was killed with the right grace period.
  2414  			if podKiller.pod != podToEvict {
  2415  				t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  2416  			}
  2417  			if podKiller.gracePeriodOverride == nil {
  2418  				t.Fatalf("Manager chose to kill pod but should have had a grace period override.")
  2419  			}
  2420  			observedGracePeriod := *podKiller.gracePeriodOverride
  2421  			if observedGracePeriod != manager.config.MaxPodGracePeriodSeconds {
  2422  				t.Fatalf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", manager.config.MaxPodGracePeriodSeconds, observedGracePeriod)
  2423  			}
  2424  			// reset state
  2425  			podKiller.pod = nil
  2426  			podKiller.gracePeriodOverride = nil
  2427  
  2428  			// remove inode pressure
  2429  			fakeClock.Step(20 * time.Minute)
  2430  			summaryProvider.result = startingStatsConst
  2431  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2432  
  2433  			if err != nil {
  2434  				t.Fatalf("Manager should not have an error %v", err)
  2435  			}
  2436  
  2437  			// we should not have disk pressure
  2438  			if manager.IsUnderDiskPressure() {
  2439  				t.Fatalf("Manager should not report inode pressure")
  2440  			}
  2441  
  2442  			// induce inode pressure!
  2443  			fakeClock.Step(1 * time.Minute)
  2444  			summaryProvider.result = setINodesFreeBasedOnFs(tc.inducePressureOnWhichFs, tc.hardINodePressure, startingStatsModified)
  2445  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2446  
  2447  			if err != nil {
  2448  				t.Fatalf("Manager should not have an error %v", err)
  2449  			}
  2450  
  2451  			// we should have disk pressure
  2452  			if !manager.IsUnderDiskPressure() {
  2453  				t.Fatalf("Manager should report inode pressure")
  2454  			}
  2455  
  2456  			// check the right pod was killed
  2457  			if podKiller.pod != podToEvict {
  2458  				t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  2459  			}
  2460  			observedGracePeriod = *podKiller.gracePeriodOverride
  2461  			if observedGracePeriod != int64(1) {
  2462  				t.Fatalf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 1, observedGracePeriod)
  2463  			}
  2464  
  2465  			// try to admit our pod (should fail)
  2466  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  2467  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  2468  			}
  2469  
  2470  			// reduce inode pressure
  2471  			fakeClock.Step(1 * time.Minute)
  2472  			summaryProvider.result = startingStatsConst
  2473  			podKiller.pod = nil // reset state
  2474  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2475  
  2476  			if err != nil {
  2477  				t.Fatalf("Manager should not have an error %v", err)
  2478  			}
  2479  
  2480  			// we should have disk pressure (because transition period not yet met)
  2481  			if !manager.IsUnderDiskPressure() {
  2482  				t.Fatalf("Manager should report inode pressure")
  2483  			}
  2484  
  2485  			// no pod should have been killed
  2486  			if podKiller.pod != nil {
  2487  				t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  2488  			}
  2489  
  2490  			// try to admit our pod (should fail)
  2491  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); result.Admit {
  2492  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, false, result.Admit)
  2493  			}
  2494  
  2495  			// move the clock past transition period to ensure that we stop reporting pressure
  2496  			fakeClock.Step(5 * time.Minute)
  2497  			summaryProvider.result = startingStatsConst
  2498  			podKiller.pod = nil // reset state
  2499  			_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2500  
  2501  			if err != nil {
  2502  				t.Fatalf("Manager should not have an error %v", err)
  2503  			}
  2504  
  2505  			// we should not have disk pressure (because transition period met)
  2506  			if manager.IsUnderDiskPressure() {
  2507  				t.Fatalf("Manager should not report inode pressure")
  2508  			}
  2509  
  2510  			// no pod should have been killed
  2511  			if podKiller.pod != nil {
  2512  				t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  2513  			}
  2514  
  2515  			// try to admit our pod (should succeed)
  2516  			if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: podToAdmit}); !result.Admit {
  2517  				t.Fatalf("Admit pod: %v, expected: %v, actual: %v", podToAdmit, true, result.Admit)
  2518  			}
  2519  		})
  2520  	}
  2521  }
  2522  
  2523  // TestStaticCriticalPodsAreNotEvicted
  2524  func TestStaticCriticalPodsAreNotEvicted(t *testing.T) {
  2525  	podMaker := makePodWithMemoryStats
  2526  	summaryStatsMaker := makeMemoryStats
  2527  	podsToMake := []podToMake{
  2528  		{name: "critical", priority: scheduling.SystemCriticalPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "800Mi"},
  2529  	}
  2530  	pods := []*v1.Pod{}
  2531  	podStats := map[*v1.Pod]statsapi.PodStats{}
  2532  	for _, podToMake := range podsToMake {
  2533  		pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
  2534  		pods = append(pods, pod)
  2535  		podStats[pod] = podStat
  2536  	}
  2537  
  2538  	pods[0].Annotations = map[string]string{
  2539  		kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
  2540  	}
  2541  	// Mark the pod as critical
  2542  	podPriority := scheduling.SystemCriticalPriority
  2543  	pods[0].Spec.Priority = &podPriority
  2544  	pods[0].Namespace = kubeapi.NamespaceSystem
  2545  
  2546  	podToEvict := pods[0]
  2547  	activePodsFunc := func() []*v1.Pod {
  2548  		return pods
  2549  	}
  2550  
  2551  	fakeClock := testingclock.NewFakeClock(time.Now())
  2552  	podKiller := &mockPodKiller{}
  2553  	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
  2554  	diskGC := &mockDiskGC{err: nil}
  2555  	nodeRef := &v1.ObjectReference{
  2556  		Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
  2557  	}
  2558  
  2559  	config := Config{
  2560  		MaxPodGracePeriodSeconds: 5,
  2561  		PressureTransitionPeriod: time.Minute * 5,
  2562  		Thresholds: []evictionapi.Threshold{
  2563  			{
  2564  				Signal:   evictionapi.SignalMemoryAvailable,
  2565  				Operator: evictionapi.OpLessThan,
  2566  				Value: evictionapi.ThresholdValue{
  2567  					Quantity: quantityMustParse("1Gi"),
  2568  				},
  2569  			},
  2570  			{
  2571  				Signal:   evictionapi.SignalMemoryAvailable,
  2572  				Operator: evictionapi.OpLessThan,
  2573  				Value: evictionapi.ThresholdValue{
  2574  					Quantity: quantityMustParse("2Gi"),
  2575  				},
  2576  				GracePeriod: time.Minute * 2,
  2577  			},
  2578  		},
  2579  	}
  2580  	summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
  2581  	manager := &managerImpl{
  2582  		clock:                        fakeClock,
  2583  		killPodFunc:                  podKiller.killPodNow,
  2584  		imageGC:                      diskGC,
  2585  		containerGC:                  diskGC,
  2586  		config:                       config,
  2587  		recorder:                     &record.FakeRecorder{},
  2588  		summaryProvider:              summaryProvider,
  2589  		nodeRef:                      nodeRef,
  2590  		nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  2591  		thresholdsFirstObservedAt:    thresholdsObservedAt{},
  2592  	}
  2593  
  2594  	fakeClock.Step(1 * time.Minute)
  2595  	summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
  2596  	_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  2597  
  2598  	if err != nil {
  2599  		t.Fatalf("Manager should not have an error %v", err)
  2600  	}
  2601  
  2602  	// we should have memory pressure
  2603  	if !manager.IsUnderMemoryPressure() {
  2604  		t.Errorf("Manager should report memory pressure since soft threshold was met")
  2605  	}
  2606  
  2607  	// verify no pod was yet killed because there has not yet been enough time passed.
  2608  	if podKiller.pod != nil {
  2609  		t.Errorf("Manager should not have killed a pod yet, but killed: %v", podKiller.pod.Name)
  2610  	}
  2611  
  2612  	// step forward in time pass the grace period
  2613  	fakeClock.Step(3 * time.Minute)
  2614  	summaryProvider.result = summaryStatsMaker("1500Mi", podStats)
  2615  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2616  
  2617  	if err != nil {
  2618  		t.Fatalf("Manager should not have an error %v", err)
  2619  	}
  2620  
  2621  	// we should have memory pressure
  2622  	if !manager.IsUnderMemoryPressure() {
  2623  		t.Errorf("Manager should report memory pressure since soft threshold was met")
  2624  	}
  2625  
  2626  	// verify the right pod was killed with the right grace period.
  2627  	if podKiller.pod == podToEvict {
  2628  		t.Errorf("Manager chose to kill critical pod: %v, but should have ignored it", podKiller.pod.Name)
  2629  	}
  2630  	// reset state
  2631  	podKiller.pod = nil
  2632  	podKiller.gracePeriodOverride = nil
  2633  
  2634  	// remove memory pressure
  2635  	fakeClock.Step(20 * time.Minute)
  2636  	summaryProvider.result = summaryStatsMaker("3Gi", podStats)
  2637  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2638  
  2639  	if err != nil {
  2640  		t.Fatalf("Manager should not have an error %v", err)
  2641  	}
  2642  
  2643  	// we should not have memory pressure
  2644  	if manager.IsUnderMemoryPressure() {
  2645  		t.Errorf("Manager should not report memory pressure")
  2646  	}
  2647  
  2648  	pods[0].Annotations = map[string]string{
  2649  		kubelettypes.ConfigSourceAnnotationKey: kubelettypes.FileSource,
  2650  	}
  2651  	pods[0].Spec.Priority = nil
  2652  	pods[0].Namespace = kubeapi.NamespaceSystem
  2653  
  2654  	// induce memory pressure!
  2655  	fakeClock.Step(1 * time.Minute)
  2656  	summaryProvider.result = summaryStatsMaker("500Mi", podStats)
  2657  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2658  
  2659  	if err != nil {
  2660  		t.Fatalf("Manager should not have an error %v", err)
  2661  	}
  2662  
  2663  	// we should have memory pressure
  2664  	if !manager.IsUnderMemoryPressure() {
  2665  		t.Errorf("Manager should report memory pressure")
  2666  	}
  2667  }
  2668  
  2669  func TestStorageLimitEvictions(t *testing.T) {
  2670  	volumeSizeLimit := resource.MustParse("1Gi")
  2671  
  2672  	testCases := map[string]struct {
  2673  		pod     podToMake
  2674  		volumes []v1.Volume
  2675  	}{
  2676  		"eviction due to rootfs above limit": {
  2677  			pod: podToMake{name: "rootfs-above-limits", priority: defaultPriority, requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "2Gi"},
  2678  		},
  2679  		"eviction due to logsfs above limit": {
  2680  			pod: podToMake{name: "logsfs-above-limits", priority: defaultPriority, requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), logsFsUsed: "2Gi"},
  2681  		},
  2682  		"eviction due to local volume above limit": {
  2683  			pod: podToMake{name: "localvolume-above-limits", priority: defaultPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "2Gi"},
  2684  			volumes: []v1.Volume{{
  2685  				Name: "emptyDirVolume",
  2686  				VolumeSource: v1.VolumeSource{
  2687  					EmptyDir: &v1.EmptyDirVolumeSource{
  2688  						SizeLimit: &volumeSizeLimit,
  2689  					},
  2690  				},
  2691  			}},
  2692  		},
  2693  	}
  2694  	for name, tc := range testCases {
  2695  		t.Run(name, func(t *testing.T) {
  2696  			podMaker := makePodWithDiskStats
  2697  			summaryStatsMaker := makeDiskStats
  2698  			podsToMake := []podToMake{
  2699  				tc.pod,
  2700  			}
  2701  			pods := []*v1.Pod{}
  2702  			podStats := map[*v1.Pod]statsapi.PodStats{}
  2703  			for _, podToMake := range podsToMake {
  2704  				pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed, tc.volumes)
  2705  				pods = append(pods, pod)
  2706  				podStats[pod] = podStat
  2707  			}
  2708  
  2709  			podToEvict := pods[0]
  2710  			activePodsFunc := func() []*v1.Pod {
  2711  				return pods
  2712  			}
  2713  
  2714  			fakeClock := testingclock.NewFakeClock(time.Now())
  2715  			podKiller := &mockPodKiller{}
  2716  			diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
  2717  			diskGC := &mockDiskGC{err: nil}
  2718  			nodeRef := &v1.ObjectReference{
  2719  				Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
  2720  			}
  2721  
  2722  			config := Config{
  2723  				MaxPodGracePeriodSeconds: 5,
  2724  				PressureTransitionPeriod: time.Minute * 5,
  2725  				Thresholds: []evictionapi.Threshold{
  2726  					{
  2727  						Signal:   evictionapi.SignalNodeFsAvailable,
  2728  						Operator: evictionapi.OpLessThan,
  2729  						Value: evictionapi.ThresholdValue{
  2730  							Quantity: quantityMustParse("1Gi"),
  2731  						},
  2732  					},
  2733  				},
  2734  			}
  2735  
  2736  			diskStat := diskStats{
  2737  				rootFsAvailableBytes:  "200Mi",
  2738  				imageFsAvailableBytes: "200Mi",
  2739  				podStats:              podStats,
  2740  			}
  2741  			summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStat)}
  2742  			manager := &managerImpl{
  2743  				clock:                         fakeClock,
  2744  				killPodFunc:                   podKiller.killPodNow,
  2745  				imageGC:                       diskGC,
  2746  				containerGC:                   diskGC,
  2747  				config:                        config,
  2748  				recorder:                      &record.FakeRecorder{},
  2749  				summaryProvider:               summaryProvider,
  2750  				nodeRef:                       nodeRef,
  2751  				nodeConditionsLastObservedAt:  nodeConditionsObservedAt{},
  2752  				thresholdsFirstObservedAt:     thresholdsObservedAt{},
  2753  				localStorageCapacityIsolation: true,
  2754  			}
  2755  
  2756  			_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  2757  			if err != nil {
  2758  				t.Fatalf("Manager expects no error but got %v", err)
  2759  			}
  2760  
  2761  			if podKiller.pod == nil {
  2762  				t.Fatalf("Manager should have selected a pod for eviction")
  2763  			}
  2764  			if podKiller.pod != podToEvict {
  2765  				t.Errorf("Manager should have killed pod: %v, but instead killed: %v", podToEvict.Name, podKiller.pod.Name)
  2766  			}
  2767  			if *podKiller.gracePeriodOverride != 1 {
  2768  				t.Errorf("Manager should have evicted with gracePeriodOverride of 1, but used: %v", *podKiller.gracePeriodOverride)
  2769  			}
  2770  		})
  2771  	}
  2772  }
  2773  
  2774  // TestAllocatableMemoryPressure
  2775  func TestAllocatableMemoryPressure(t *testing.T) {
  2776  	podMaker := makePodWithMemoryStats
  2777  	summaryStatsMaker := makeMemoryStats
  2778  	podsToMake := []podToMake{
  2779  		{name: "guaranteed-low-priority-high-usage", priority: lowPriority, requests: newResourceList("100m", "1Gi", ""), limits: newResourceList("100m", "1Gi", ""), memoryWorkingSet: "900Mi"},
  2780  		{name: "burstable-below-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "50Mi"},
  2781  		{name: "burstable-above-requests", priority: defaultPriority, requests: newResourceList("100m", "100Mi", ""), limits: newResourceList("200m", "1Gi", ""), memoryWorkingSet: "400Mi"},
  2782  		{name: "best-effort-high-priority-high-usage", priority: highPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "400Mi"},
  2783  		{name: "best-effort-low-priority-low-usage", priority: lowPriority, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), memoryWorkingSet: "100Mi"},
  2784  	}
  2785  	pods := []*v1.Pod{}
  2786  	podStats := map[*v1.Pod]statsapi.PodStats{}
  2787  	for _, podToMake := range podsToMake {
  2788  		pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
  2789  		pods = append(pods, pod)
  2790  		podStats[pod] = podStat
  2791  	}
  2792  	podToEvict := pods[4]
  2793  	activePodsFunc := func() []*v1.Pod {
  2794  		return pods
  2795  	}
  2796  
  2797  	fakeClock := testingclock.NewFakeClock(time.Now())
  2798  	podKiller := &mockPodKiller{}
  2799  	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
  2800  	diskGC := &mockDiskGC{err: nil}
  2801  	nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  2802  
  2803  	config := Config{
  2804  		MaxPodGracePeriodSeconds: 5,
  2805  		PressureTransitionPeriod: time.Minute * 5,
  2806  		Thresholds: []evictionapi.Threshold{
  2807  			{
  2808  				Signal:   evictionapi.SignalAllocatableMemoryAvailable,
  2809  				Operator: evictionapi.OpLessThan,
  2810  				Value: evictionapi.ThresholdValue{
  2811  					Quantity: quantityMustParse("1Gi"),
  2812  				},
  2813  			},
  2814  		},
  2815  	}
  2816  	summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("4Gi", podStats)}
  2817  	manager := &managerImpl{
  2818  		clock:                        fakeClock,
  2819  		killPodFunc:                  podKiller.killPodNow,
  2820  		imageGC:                      diskGC,
  2821  		containerGC:                  diskGC,
  2822  		config:                       config,
  2823  		recorder:                     &record.FakeRecorder{},
  2824  		summaryProvider:              summaryProvider,
  2825  		nodeRef:                      nodeRef,
  2826  		nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  2827  		thresholdsFirstObservedAt:    thresholdsObservedAt{},
  2828  	}
  2829  
  2830  	// create a best effort pod to test admission
  2831  	bestEffortPodToAdmit, _ := podMaker("best-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi")
  2832  	burstablePodToAdmit, _ := podMaker("burst-admit", defaultPriority, newResourceList("100m", "100Mi", ""), newResourceList("200m", "200Mi", ""), "0Gi")
  2833  
  2834  	// synchronize
  2835  	_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  2836  
  2837  	if err != nil {
  2838  		t.Fatalf("Manager should not have an error %v", err)
  2839  	}
  2840  
  2841  	// we should not have memory pressure
  2842  	if manager.IsUnderMemoryPressure() {
  2843  		t.Errorf("Manager should not report memory pressure")
  2844  	}
  2845  
  2846  	// try to admit our pods (they should succeed)
  2847  	expected := []bool{true, true}
  2848  	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  2849  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  2850  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  2851  		}
  2852  	}
  2853  
  2854  	// induce memory pressure!
  2855  	fakeClock.Step(1 * time.Minute)
  2856  	pod, podStat := podMaker("guaranteed-high-2", defaultPriority, newResourceList("100m", "1Gi", ""), newResourceList("100m", "1Gi", ""), "1Gi")
  2857  	podStats[pod] = podStat
  2858  	summaryProvider.result = summaryStatsMaker("500Mi", podStats)
  2859  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2860  
  2861  	if err != nil {
  2862  		t.Fatalf("Manager should not have an error %v", err)
  2863  	}
  2864  
  2865  	// we should have memory pressure
  2866  	if !manager.IsUnderMemoryPressure() {
  2867  		t.Errorf("Manager should report memory pressure")
  2868  	}
  2869  
  2870  	// check the right pod was killed
  2871  	if podKiller.pod != podToEvict {
  2872  		t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
  2873  	}
  2874  	observedGracePeriod := *podKiller.gracePeriodOverride
  2875  	if observedGracePeriod != int64(1) {
  2876  		t.Errorf("Manager chose to kill pod with incorrect grace period.  Expected: %d, actual: %d", 1, observedGracePeriod)
  2877  	}
  2878  	// reset state
  2879  	podKiller.pod = nil
  2880  	podKiller.gracePeriodOverride = nil
  2881  
  2882  	// the best-effort pod should not admit, burstable should
  2883  	expected = []bool{false, true}
  2884  	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  2885  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  2886  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  2887  		}
  2888  	}
  2889  
  2890  	// reduce memory pressure
  2891  	fakeClock.Step(1 * time.Minute)
  2892  	for pod := range podStats {
  2893  		if pod.Name == "guaranteed-high-2" {
  2894  			delete(podStats, pod)
  2895  		}
  2896  	}
  2897  	summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  2898  	podKiller.pod = nil // reset state
  2899  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2900  
  2901  	if err != nil {
  2902  		t.Fatalf("Manager should not have an error %v", err)
  2903  	}
  2904  
  2905  	// we should have memory pressure (because transition period not yet met)
  2906  	if !manager.IsUnderMemoryPressure() {
  2907  		t.Errorf("Manager should report memory pressure")
  2908  	}
  2909  
  2910  	// no pod should have been killed
  2911  	if podKiller.pod != nil {
  2912  		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  2913  	}
  2914  
  2915  	// the best-effort pod should not admit, burstable should
  2916  	expected = []bool{false, true}
  2917  	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  2918  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  2919  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  2920  		}
  2921  	}
  2922  
  2923  	// move the clock past transition period to ensure that we stop reporting pressure
  2924  	fakeClock.Step(5 * time.Minute)
  2925  	summaryProvider.result = summaryStatsMaker("2Gi", podStats)
  2926  	podKiller.pod = nil // reset state
  2927  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  2928  
  2929  	if err != nil {
  2930  		t.Fatalf("Manager should not have an error %v", err)
  2931  	}
  2932  
  2933  	// we should not have memory pressure (because transition period met)
  2934  	if manager.IsUnderMemoryPressure() {
  2935  		t.Errorf("Manager should not report memory pressure")
  2936  	}
  2937  
  2938  	// no pod should have been killed
  2939  	if podKiller.pod != nil {
  2940  		t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
  2941  	}
  2942  
  2943  	// all pods should admit now
  2944  	expected = []bool{true, true}
  2945  	for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
  2946  		if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
  2947  			t.Errorf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
  2948  		}
  2949  	}
  2950  }
  2951  
  2952  func TestUpdateMemcgThreshold(t *testing.T) {
  2953  	activePodsFunc := func() []*v1.Pod {
  2954  		return []*v1.Pod{}
  2955  	}
  2956  
  2957  	fakeClock := testingclock.NewFakeClock(time.Now())
  2958  	podKiller := &mockPodKiller{}
  2959  	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
  2960  	diskGC := &mockDiskGC{err: nil}
  2961  	nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  2962  
  2963  	config := Config{
  2964  		MaxPodGracePeriodSeconds: 5,
  2965  		PressureTransitionPeriod: time.Minute * 5,
  2966  		Thresholds: []evictionapi.Threshold{
  2967  			{
  2968  				Signal:   evictionapi.SignalMemoryAvailable,
  2969  				Operator: evictionapi.OpLessThan,
  2970  				Value: evictionapi.ThresholdValue{
  2971  					Quantity: quantityMustParse("1Gi"),
  2972  				},
  2973  			},
  2974  		},
  2975  		PodCgroupRoot: "kubepods",
  2976  	}
  2977  	summaryProvider := &fakeSummaryProvider{result: makeMemoryStats("2Gi", map[*v1.Pod]statsapi.PodStats{})}
  2978  
  2979  	mockCtrl := gomock.NewController(t)
  2980  	defer mockCtrl.Finish()
  2981  
  2982  	thresholdNotifier := NewMockThresholdNotifier(mockCtrl)
  2983  	thresholdNotifier.EXPECT().UpdateThreshold(summaryProvider.result).Return(nil).Times(2)
  2984  
  2985  	manager := &managerImpl{
  2986  		clock:                        fakeClock,
  2987  		killPodFunc:                  podKiller.killPodNow,
  2988  		imageGC:                      diskGC,
  2989  		containerGC:                  diskGC,
  2990  		config:                       config,
  2991  		recorder:                     &record.FakeRecorder{},
  2992  		summaryProvider:              summaryProvider,
  2993  		nodeRef:                      nodeRef,
  2994  		nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
  2995  		thresholdsFirstObservedAt:    thresholdsObservedAt{},
  2996  		thresholdNotifiers:           []ThresholdNotifier{thresholdNotifier},
  2997  	}
  2998  
  2999  	// The UpdateThreshold method should have been called once, since this is the first run.
  3000  	_, err := manager.synchronize(diskInfoProvider, activePodsFunc)
  3001  
  3002  	if err != nil {
  3003  		t.Fatalf("Manager should not have an error %v", err)
  3004  	}
  3005  
  3006  	// The UpdateThreshold method should not have been called again, since not enough time has passed
  3007  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  3008  
  3009  	if err != nil {
  3010  		t.Fatalf("Manager should not have an error %v", err)
  3011  	}
  3012  
  3013  	// The UpdateThreshold method should be called again since enough time has passed
  3014  	fakeClock.Step(2 * notifierRefreshInterval)
  3015  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  3016  
  3017  	if err != nil {
  3018  		t.Fatalf("Manager should not have an error %v", err)
  3019  	}
  3020  
  3021  	// new memory threshold notifier that returns an error
  3022  	thresholdNotifier = NewMockThresholdNotifier(mockCtrl)
  3023  	thresholdNotifier.EXPECT().UpdateThreshold(summaryProvider.result).Return(fmt.Errorf("error updating threshold")).Times(1)
  3024  	thresholdNotifier.EXPECT().Description().Return("mock thresholdNotifier").Times(1)
  3025  	manager.thresholdNotifiers = []ThresholdNotifier{thresholdNotifier}
  3026  
  3027  	// The UpdateThreshold method should be called because at least notifierRefreshInterval time has passed.
  3028  	// The Description method should be called because UpdateThreshold returned an error
  3029  	fakeClock.Step(2 * notifierRefreshInterval)
  3030  	_, err = manager.synchronize(diskInfoProvider, activePodsFunc)
  3031  
  3032  	if err != nil {
  3033  		t.Fatalf("Manager should not have an error %v", err)
  3034  	}
  3035  }
  3036  
  3037  func TestManagerWithLocalStorageCapacityIsolationOpen(t *testing.T) {
  3038  	podMaker := makePodWithLocalStorageCapacityIsolationOpen
  3039  	summaryStatsMaker := makeDiskStats
  3040  	podsToMake := []podToMake{
  3041  		{name: "empty-dir", requests: newResourceList("", "900Mi", ""), limits: newResourceList("", "1Gi", "")},
  3042  		{name: "container-ephemeral-storage-limit", requests: newResourceList("", "", "900Mi"), limits: newResourceList("", "", "800Mi")},
  3043  		{name: "pod-ephemeral-storage-limit", requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "800Mi")},
  3044  	}
  3045  
  3046  	pods := []*v1.Pod{}
  3047  	podStats := map[*v1.Pod]statsapi.PodStats{}
  3048  	for _, podToMake := range podsToMake {
  3049  		pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.memoryWorkingSet)
  3050  		pods = append(pods, pod)
  3051  		podStats[pod] = podStat
  3052  	}
  3053  
  3054  	diskStat := diskStats{
  3055  		rootFsAvailableBytes:  "1Gi",
  3056  		imageFsAvailableBytes: "200Mi",
  3057  		podStats:              podStats,
  3058  	}
  3059  	summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker(diskStat)}
  3060  
  3061  	config := Config{
  3062  		MaxPodGracePeriodSeconds: 5,
  3063  		PressureTransitionPeriod: time.Minute * 5,
  3064  		Thresholds: []evictionapi.Threshold{
  3065  			{
  3066  				Signal:   evictionapi.SignalAllocatableMemoryAvailable,
  3067  				Operator: evictionapi.OpLessThan,
  3068  				Value: evictionapi.ThresholdValue{
  3069  					Quantity: quantityMustParse("1Gi"),
  3070  				},
  3071  			},
  3072  		},
  3073  	}
  3074  
  3075  	podKiller := &mockPodKiller{}
  3076  	diskGC := &mockDiskGC{err: nil}
  3077  	nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}
  3078  	fakeClock := testingclock.NewFakeClock(time.Now())
  3079  	diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: ptr.To(false)}
  3080  
  3081  	mgr := &managerImpl{
  3082  		clock:                         fakeClock,
  3083  		killPodFunc:                   podKiller.killPodNow,
  3084  		imageGC:                       diskGC,
  3085  		containerGC:                   diskGC,
  3086  		config:                        config,
  3087  		recorder:                      &record.FakeRecorder{},
  3088  		summaryProvider:               summaryProvider,
  3089  		nodeRef:                       nodeRef,
  3090  		localStorageCapacityIsolation: true,
  3091  		dedicatedImageFs:              diskInfoProvider.dedicatedImageFs,
  3092  	}
  3093  
  3094  	activePodsFunc := func() []*v1.Pod {
  3095  		return pods
  3096  	}
  3097  
  3098  	evictedPods, err := mgr.synchronize(diskInfoProvider, activePodsFunc)
  3099  
  3100  	if err != nil {
  3101  		t.Fatalf("Manager should not have error but got %v", err)
  3102  	}
  3103  	if podKiller.pod == nil {
  3104  		t.Fatalf("Manager should have selected a pod for eviction")
  3105  	}
  3106  
  3107  	if diff := cmp.Diff(pods, evictedPods); diff != "" {
  3108  		t.Fatalf("Unexpected evicted pod (-want,+got):\n%s", diff)
  3109  	}
  3110  }