k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/controller/volume/attachdetach/reconciler/reconciler_test.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package reconciler 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "testing" 24 "time" 25 26 v1 "k8s.io/api/core/v1" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 k8stypes "k8s.io/apimachinery/pkg/types" 29 "k8s.io/apimachinery/pkg/util/wait" 30 "k8s.io/client-go/informers" 31 "k8s.io/client-go/tools/record" 32 "k8s.io/component-base/metrics/legacyregistry" 33 metricstestutil "k8s.io/component-base/metrics/testutil" 34 "k8s.io/klog/v2" 35 "k8s.io/klog/v2/ktesting" 36 "k8s.io/kubernetes/pkg/controller" 37 "k8s.io/kubernetes/pkg/controller/volume/attachdetach/cache" 38 "k8s.io/kubernetes/pkg/controller/volume/attachdetach/metrics" 39 "k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater" 40 controllervolumetesting "k8s.io/kubernetes/pkg/controller/volume/attachdetach/testing" 41 volumetesting "k8s.io/kubernetes/pkg/volume/testing" 42 "k8s.io/kubernetes/pkg/volume/util/operationexecutor" 43 "k8s.io/kubernetes/pkg/volume/util/types" 44 utilstrings "k8s.io/utils/strings" 45 ) 46 47 const ( 48 reconcilerLoopPeriod = 10 * time.Millisecond 49 syncLoopPeriod = 100 * time.Minute 50 maxWaitForUnmountDuration = 50 * time.Millisecond 51 maxLongWaitForUnmountDuration = 4200 * time.Second 52 volumeAttachedCheckTimeout = 5 * time.Second 53 ) 54 55 var registerMetrics sync.Once 56 57 // Calls Run() 58 // Verifies there are no calls to attach or detach. 59 func Test_Run_Positive_DoNothing(t *testing.T) { 60 // Arrange 61 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 62 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 63 asw := cache.NewActualStateOfWorld(volumePluginMgr) 64 65 fakeKubeClient := controllervolumetesting.CreateTestClient() 66 fakeRecorder := &record.FakeRecorder{} 67 fakeHandler := volumetesting.NewBlockVolumePathHandler() 68 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 69 fakeKubeClient, 70 volumePluginMgr, 71 fakeRecorder, 72 fakeHandler)) 73 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 74 nsu := statusupdater.NewNodeStatusUpdater( 75 fakeKubeClient, informerFactory.Core().V1().Nodes().Lister(), asw) 76 nodeLister := informerFactory.Core().V1().Nodes().Lister() 77 reconciler := NewReconciler( 78 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 79 80 // Act 81 _, ctx := ktesting.NewTestContext(t) 82 ctx, cancel := context.WithCancel(ctx) 83 defer cancel() 84 go reconciler.Run(ctx) 85 86 // Assert 87 waitForNewAttacherCallCount(t, 0 /* expectedCallCount */, fakePlugin) 88 verifyNewAttacherCallCount(t, true /* expectZeroNewAttacherCallCount */, fakePlugin) 89 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 90 waitForAttachCallCount(t, 0 /* expectedAttachCallCount */, fakePlugin) 91 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 92 } 93 94 // Populates desiredStateOfWorld cache with one node/volume/pod tuple. 95 // Calls Run() 96 // Verifies there is one attach call and no detach calls. 97 func Test_Run_Positive_OneDesiredVolumeAttach(t *testing.T) { 98 // Arrange 99 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 100 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 101 asw := cache.NewActualStateOfWorld(volumePluginMgr) 102 fakeKubeClient := controllervolumetesting.CreateTestClient() 103 fakeRecorder := &record.FakeRecorder{} 104 fakeHandler := volumetesting.NewBlockVolumePathHandler() 105 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 106 fakeKubeClient, 107 volumePluginMgr, 108 fakeRecorder, 109 fakeHandler)) 110 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 111 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 112 nodeLister := informerFactory.Core().V1().Nodes().Lister() 113 reconciler := NewReconciler( 114 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 115 podName := "pod-uid" 116 volumeName := v1.UniqueVolumeName("volume-name") 117 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 118 nodeName := k8stypes.NodeName("node-name") 119 dsw.AddNode(nodeName) 120 volumeExists := dsw.VolumeExists(volumeName, nodeName) 121 if volumeExists { 122 t.Fatalf( 123 "Volume %q/node %q should not exist, but it does.", 124 volumeName, 125 nodeName) 126 } 127 128 _, podErr := dsw.AddPod(types.UniquePodName(podName), controllervolumetesting.NewPod(podName, podName), volumeSpec, nodeName) 129 if podErr != nil { 130 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podErr) 131 } 132 133 // Act 134 _, ctx := ktesting.NewTestContext(t) 135 ctx, cancel := context.WithCancel(ctx) 136 defer cancel() 137 go reconciler.Run(ctx) 138 139 // Assert 140 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 141 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 142 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 143 } 144 145 // Populates desiredStateOfWorld cache with one node/volume/pod tuple. 146 // Calls Run() 147 // Verifies there is one attach call and no detach calls. 148 // Marks the node/volume as unmounted. 149 // Deletes the node/volume/pod tuple from desiredStateOfWorld cache. 150 // Verifies there is one detach call and no (new) attach calls. 151 func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithUnmountedVolume(t *testing.T) { 152 // Arrange 153 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 154 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 155 asw := cache.NewActualStateOfWorld(volumePluginMgr) 156 fakeKubeClient := controllervolumetesting.CreateTestClient() 157 fakeRecorder := &record.FakeRecorder{} 158 fakeHandler := volumetesting.NewBlockVolumePathHandler() 159 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 160 fakeKubeClient, 161 volumePluginMgr, 162 fakeRecorder, 163 fakeHandler)) 164 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 165 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 166 nodeLister := informerFactory.Core().V1().Nodes().Lister() 167 reconciler := NewReconciler( 168 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 169 podName := "pod-uid" 170 volumeName := v1.UniqueVolumeName("volume-name") 171 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 172 nodeName := k8stypes.NodeName("node-name") 173 dsw.AddNode(nodeName) 174 volumeExists := dsw.VolumeExists(volumeName, nodeName) 175 if volumeExists { 176 t.Fatalf( 177 "Volume %q/node %q should not exist, but it does.", 178 volumeName, 179 nodeName) 180 } 181 182 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName), controllervolumetesting.NewPod(podName, podName), volumeSpec, nodeName) 183 if podAddErr != nil { 184 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 185 } 186 187 // Act 188 logger, ctx := ktesting.NewTestContext(t) 189 ctx, cancel := context.WithCancel(ctx) 190 defer cancel() 191 go reconciler.Run(ctx) 192 193 // Assert 194 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 195 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 196 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 197 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 198 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 199 200 // Act 201 dsw.DeletePod(types.UniquePodName(podName), generatedVolumeName, nodeName) 202 volumeExists = dsw.VolumeExists(generatedVolumeName, nodeName) 203 if volumeExists { 204 t.Fatalf( 205 "Deleted pod %q from volume %q/node %q. Volume should also be deleted but it still exists.", 206 podName, 207 generatedVolumeName, 208 nodeName) 209 } 210 asw.SetVolumesMountedByNode(logger, []v1.UniqueVolumeName{generatedVolumeName}, nodeName) 211 asw.SetVolumesMountedByNode(logger, nil, nodeName) 212 213 // Assert 214 waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 215 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 216 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 217 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 218 waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) 219 } 220 221 // Populates desiredStateOfWorld cache with one node/volume/pod tuple. 222 // Calls Run() 223 // Verifies there is one attach call and no detach calls. 224 // Deletes the node/volume/pod tuple from desiredStateOfWorld cache without first marking the node/volume as unmounted. 225 // Verifies there is one detach call and no (new) attach calls. 226 func Test_Run_Positive_OneDesiredVolumeAttachThenDetachWithMountedVolume(t *testing.T) { 227 registerMetrics.Do(func() { 228 legacyregistry.MustRegister(metrics.ForceDetachMetricCounter) 229 }) 230 // Arrange 231 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 232 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 233 asw := cache.NewActualStateOfWorld(volumePluginMgr) 234 fakeKubeClient := controllervolumetesting.CreateTestClient() 235 fakeRecorder := &record.FakeRecorder{} 236 fakeHandler := volumetesting.NewBlockVolumePathHandler() 237 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 238 fakeKubeClient, 239 volumePluginMgr, 240 fakeRecorder, 241 fakeHandler)) 242 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 243 nodeLister := informerFactory.Core().V1().Nodes().Lister() 244 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 245 reconciler := NewReconciler( 246 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 247 podName := "pod-uid" 248 volumeName := v1.UniqueVolumeName("volume-name") 249 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 250 nodeName := k8stypes.NodeName("node-name") 251 dsw.AddNode(nodeName) 252 253 volumeExists := dsw.VolumeExists(volumeName, nodeName) 254 if volumeExists { 255 t.Fatalf( 256 "Volume %q/node %q should not exist, but it does.", 257 volumeName, 258 nodeName) 259 } 260 261 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName), controllervolumetesting.NewPod(podName, podName), volumeSpec, nodeName) 262 if podAddErr != nil { 263 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 264 } 265 266 // Act 267 _, ctx := ktesting.NewTestContext(t) 268 ctx, cancel := context.WithCancel(ctx) 269 defer cancel() 270 go reconciler.Run(ctx) 271 272 // Assert 273 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 274 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 275 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 276 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 277 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 278 279 // Act 280 dsw.DeletePod(types.UniquePodName(podName), generatedVolumeName, nodeName) 281 volumeExists = dsw.VolumeExists(generatedVolumeName, nodeName) 282 if volumeExists { 283 t.Fatalf( 284 "Deleted pod %q from volume %q/node %q. Volume should also be deleted but it still exists.", 285 podName, 286 generatedVolumeName, 287 nodeName) 288 } 289 290 // Assert -- Timer will trigger detach 291 waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 292 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 293 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 294 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 295 waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) 296 297 // Force detach metric due to timeout 298 testForceDetachMetric(t, 1, metrics.ForceDetachReasonTimeout) 299 } 300 301 // Populates desiredStateOfWorld cache with one node/volume/pod tuple. 302 // Has node update fail 303 // Calls Run() 304 // Verifies there is one attach call and no detach calls. 305 // Marks the node/volume as unmounted. 306 // Deletes the node/volume/pod tuple from desiredStateOfWorld cache. 307 // Verifies there are NO detach call and no (new) attach calls. 308 func Test_Run_Negative_OneDesiredVolumeAttachThenDetachWithUnmountedVolumeUpdateStatusFail(t *testing.T) { 309 // Arrange 310 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 311 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 312 asw := cache.NewActualStateOfWorld(volumePluginMgr) 313 fakeKubeClient := controllervolumetesting.CreateTestClient() 314 fakeRecorder := &record.FakeRecorder{} 315 fakeHandler := volumetesting.NewBlockVolumePathHandler() 316 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 317 fakeKubeClient, 318 volumePluginMgr, 319 fakeRecorder, 320 fakeHandler)) 321 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 322 nodeLister := informerFactory.Core().V1().Nodes().Lister() 323 nsu := statusupdater.NewFakeNodeStatusUpdater(true /* returnError */) 324 reconciler := NewReconciler( 325 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 326 podName := "pod-uid" 327 volumeName := v1.UniqueVolumeName("volume-name") 328 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 329 nodeName := k8stypes.NodeName("node-name") 330 dsw.AddNode(nodeName) 331 volumeExists := dsw.VolumeExists(volumeName, nodeName) 332 if volumeExists { 333 t.Fatalf( 334 "Volume %q/node %q should not exist, but it does.", 335 volumeName, 336 nodeName) 337 } 338 339 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName), controllervolumetesting.NewPod(podName, podName), volumeSpec, nodeName) 340 if podAddErr != nil { 341 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 342 } 343 344 // Act 345 logger, ctx := ktesting.NewTestContext(t) 346 ctx, cancel := context.WithCancel(ctx) 347 defer cancel() 348 go reconciler.Run(ctx) 349 350 // Assert 351 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 352 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 353 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 354 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 355 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 356 357 // Act 358 dsw.DeletePod(types.UniquePodName(podName), generatedVolumeName, nodeName) 359 volumeExists = dsw.VolumeExists(generatedVolumeName, nodeName) 360 if volumeExists { 361 t.Fatalf( 362 "Deleted pod %q from volume %q/node %q. Volume should also be deleted but it still exists.", 363 podName, 364 generatedVolumeName, 365 nodeName) 366 } 367 asw.SetVolumesMountedByNode(logger, []v1.UniqueVolumeName{generatedVolumeName}, nodeName) 368 asw.SetVolumesMountedByNode(logger, nil, nodeName) 369 370 // Assert 371 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 372 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 373 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 374 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 375 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 376 } 377 378 // Creates a volume with accessMode ReadWriteMany 379 // Populates desiredStateOfWorld cache with two node/volume/pod tuples pointing to the created volume 380 // Calls Run() 381 // Verifies there are two attach calls and no detach calls. 382 // Deletes the first node/volume/pod tuple from desiredStateOfWorld cache without first marking the node/volume as unmounted. 383 // Verifies there is one detach call and no (new) attach calls. 384 // Deletes the second node/volume/pod tuple from desiredStateOfWorld cache without first marking the node/volume as unmounted. 385 // Verifies there are two detach calls and no (new) attach calls. 386 func Test_Run_OneVolumeAttachAndDetachMultipleNodesWithReadWriteMany(t *testing.T) { 387 // Arrange 388 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 389 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 390 asw := cache.NewActualStateOfWorld(volumePluginMgr) 391 fakeKubeClient := controllervolumetesting.CreateTestClient() 392 fakeRecorder := &record.FakeRecorder{} 393 fakeHandler := volumetesting.NewBlockVolumePathHandler() 394 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 395 fakeKubeClient, 396 volumePluginMgr, 397 fakeRecorder, 398 fakeHandler)) 399 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 400 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 401 nodeLister := informerFactory.Core().V1().Nodes().Lister() 402 reconciler := NewReconciler( 403 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 404 podName1 := "pod-uid1" 405 podName2 := "pod-uid2" 406 volumeName := v1.UniqueVolumeName("volume-name") 407 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 408 volumeSpec.PersistentVolume.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteMany} 409 nodeName1 := k8stypes.NodeName("node-name1") 410 nodeName2 := k8stypes.NodeName(volumetesting.MultiAttachNode) 411 dsw.AddNode(nodeName1) 412 dsw.AddNode(nodeName2) 413 414 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, podName1), volumeSpec, nodeName1) 415 if podAddErr != nil { 416 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 417 } 418 419 _, podAddErr = dsw.AddPod(types.UniquePodName(podName2), controllervolumetesting.NewPod(podName2, podName2), volumeSpec, nodeName2) 420 if podAddErr != nil { 421 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 422 } 423 424 // Act 425 _, ctx := ktesting.NewTestContext(t) 426 ctx, cancel := context.WithCancel(ctx) 427 defer cancel() 428 go reconciler.Run(ctx) 429 430 // Assert 431 waitForNewAttacherCallCount(t, 2 /* expectedCallCount */, fakePlugin) 432 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 433 waitForTotalAttachCallCount(t, 2 /* expectedAttachCallCount */, fakePlugin) 434 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 435 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 436 waitForAttachedToNodesCount(t, 2 /* expectedNodeCount */, generatedVolumeName, asw) 437 438 // Act 439 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 440 volumeExists := dsw.VolumeExists(generatedVolumeName, nodeName1) 441 if volumeExists { 442 t.Fatalf( 443 "Deleted pod %q from volume %q/node %q. Volume should also be deleted but it still exists.", 444 podName1, 445 generatedVolumeName, 446 nodeName1) 447 } 448 449 // Assert -- Timer will trigger detach 450 waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 451 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 452 waitForTotalAttachCallCount(t, 2 /* expectedAttachCallCount */, fakePlugin) 453 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 454 waitForTotalDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) 455 456 // Act 457 dsw.DeletePod(types.UniquePodName(podName2), generatedVolumeName, nodeName2) 458 volumeExists = dsw.VolumeExists(generatedVolumeName, nodeName2) 459 if volumeExists { 460 t.Fatalf( 461 "Deleted pod %q from volume %q/node %q. Volume should also be deleted but it still exists.", 462 podName2, 463 generatedVolumeName, 464 nodeName2) 465 } 466 467 // Assert -- Timer will trigger detach 468 waitForNewDetacherCallCount(t, 2 /* expectedCallCount */, fakePlugin) 469 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 470 waitForTotalAttachCallCount(t, 2 /* expectedAttachCallCount */, fakePlugin) 471 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 472 waitForTotalDetachCallCount(t, 2 /* expectedDetachCallCount */, fakePlugin) 473 } 474 475 // Creates a volume with accessMode ReadWriteOnce 476 // Populates desiredStateOfWorld cache with two ode/volume/pod tuples pointing to the created volume 477 // Calls Run() 478 // Verifies there is one attach call and no detach calls. 479 // Deletes the node/volume/pod tuple from desiredStateOfWorld which succeeded in attaching 480 // Verifies there are two attach call and one detach call. 481 func Test_Run_OneVolumeAttachAndDetachMultipleNodesWithReadWriteOnce(t *testing.T) { 482 // Arrange 483 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 484 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 485 asw := cache.NewActualStateOfWorld(volumePluginMgr) 486 fakeKubeClient := controllervolumetesting.CreateTestClient() 487 fakeRecorder := &record.FakeRecorder{} 488 fakeHandler := volumetesting.NewBlockVolumePathHandler() 489 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 490 fakeKubeClient, 491 volumePluginMgr, 492 fakeRecorder, 493 fakeHandler)) 494 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 495 nodeLister := informerFactory.Core().V1().Nodes().Lister() 496 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 497 reconciler := NewReconciler( 498 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 499 podName1 := "pod-uid1" 500 podName2 := "pod-uid2" 501 volumeName := v1.UniqueVolumeName("volume-name") 502 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 503 volumeSpec.PersistentVolume.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} 504 nodeName1 := k8stypes.NodeName("node-name1") 505 nodeName2 := k8stypes.NodeName("node-name2") 506 dsw.AddNode(nodeName1) 507 dsw.AddNode(nodeName2) 508 509 // Add both pods at the same time to provoke a potential race condition in the reconciler 510 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, podName1), volumeSpec, nodeName1) 511 if podAddErr != nil { 512 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 513 } 514 _, podAddErr = dsw.AddPod(types.UniquePodName(podName2), controllervolumetesting.NewPod(podName2, podName2), volumeSpec, nodeName2) 515 if podAddErr != nil { 516 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 517 } 518 519 // Act 520 _, ctx := ktesting.NewTestContext(t) 521 ctx, cancel := context.WithCancel(ctx) 522 defer cancel() 523 go reconciler.Run(ctx) 524 525 // Assert 526 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 527 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 528 waitForTotalAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 529 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 530 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 531 waitForAttachedToNodesCount(t, 1 /* expectedNodeCount */, generatedVolumeName, asw) 532 533 nodesForVolume := asw.GetNodesForAttachedVolume(generatedVolumeName) 534 535 // check if multiattach is marked 536 // at least one volume+node should be marked with multiattach error 537 nodeAttachedTo := nodesForVolume[0] 538 waitForMultiAttachErrorOnNode(t, nodeAttachedTo, dsw) 539 540 // Act 541 podToDelete := "" 542 if nodesForVolume[0] == nodeName1 { 543 podToDelete = podName1 544 } else if nodesForVolume[0] == nodeName2 { 545 podToDelete = podName2 546 } else { 547 t.Fatal("Volume attached to unexpected node") 548 } 549 550 dsw.DeletePod(types.UniquePodName(podToDelete), generatedVolumeName, nodesForVolume[0]) 551 volumeExists := dsw.VolumeExists(generatedVolumeName, nodesForVolume[0]) 552 if volumeExists { 553 t.Fatalf( 554 "Deleted pod %q from volume %q/node %q. Volume should also be deleted but it still exists.", 555 podToDelete, 556 generatedVolumeName, 557 nodesForVolume[0]) 558 } 559 560 // Assert 561 waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 562 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 563 waitForTotalDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) 564 waitForNewAttacherCallCount(t, 2 /* expectedCallCount */, fakePlugin) 565 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 566 waitForTotalAttachCallCount(t, 2 /* expectedAttachCallCount */, fakePlugin) 567 } 568 569 // Creates a volume with accessMode ReadWriteOnce 570 // First create a pod which will try to attach the volume to the a node named "uncertain-node". The attach call for this node will 571 // fail for timeout, but the volume will be actually attached to the node after the call. 572 // Secondly, delete this pod. 573 // Lastly, create a pod scheduled to a normal node which will trigger attach volume to the node. The attach should return successfully. 574 func Test_Run_OneVolumeAttachAndDetachUncertainNodesWithReadWriteOnce(t *testing.T) { 575 // Arrange 576 volumePluginMgr, _ := volumetesting.GetTestVolumePluginMgr(t) 577 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 578 asw := cache.NewActualStateOfWorld(volumePluginMgr) 579 fakeKubeClient := controllervolumetesting.CreateTestClient() 580 fakeRecorder := &record.FakeRecorder{} 581 fakeHandler := volumetesting.NewBlockVolumePathHandler() 582 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 583 fakeKubeClient, 584 volumePluginMgr, 585 fakeRecorder, 586 fakeHandler)) 587 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 588 nodeLister := informerFactory.Core().V1().Nodes().Lister() 589 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 590 reconciler := NewReconciler( 591 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 592 podName1 := "pod-uid1" 593 podName2 := "pod-uid2" 594 volumeName := v1.UniqueVolumeName("volume-name") 595 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 596 volumeSpec.PersistentVolume.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} 597 nodeName1 := k8stypes.NodeName(volumetesting.UncertainAttachNode) 598 nodeName2 := k8stypes.NodeName("node-name2") 599 dsw.AddNode(nodeName1) 600 dsw.AddNode(nodeName2) 601 602 // Act 603 logger, ctx := ktesting.NewTestContext(t) 604 ctx, cancel := context.WithCancel(ctx) 605 defer cancel() 606 go reconciler.Run(ctx) 607 608 // Add the pod in which the volume is attached to the uncertain node 609 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, podName1), volumeSpec, nodeName1) 610 if podAddErr != nil { 611 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 612 } 613 614 time.Sleep(1 * time.Second) 615 // Volume is added to asw. Because attach operation fails, volume should not be reported as attached to the node. 616 waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw) 617 verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw) 618 verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout) 619 620 // When volume is added to the node, it is set to mounted by default. Then the status will be updated by checking node status VolumeInUse. 621 // Without this, the delete operation will be delayed due to mounted status 622 asw.SetVolumesMountedByNode(logger, nil, nodeName1) 623 624 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 625 626 waitForVolumeRemovedFromNode(t, generatedVolumeName, nodeName1, asw) 627 628 // Add a second pod which tries to attach the volume to a different node. 629 generatedVolumeName, podAddErr = dsw.AddPod(types.UniquePodName(podName2), controllervolumetesting.NewPod(podName2, podName2), volumeSpec, nodeName2) 630 if podAddErr != nil { 631 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 632 } 633 waitForVolumeAttachStateToNode(t, generatedVolumeName, nodeName2, cache.AttachStateAttached, asw) 634 } 635 636 func Test_Run_UpdateNodeStatusFailBeforeOneVolumeDetachNodeWithReadWriteOnce(t *testing.T) { 637 // Arrange 638 volumePluginMgr, _ := volumetesting.GetTestVolumePluginMgr(t) 639 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 640 asw := cache.NewActualStateOfWorld(volumePluginMgr) 641 fakeKubeClient := controllervolumetesting.CreateTestClient() 642 fakeRecorder := &record.FakeRecorder{} 643 fakeHandler := volumetesting.NewBlockVolumePathHandler() 644 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 645 fakeKubeClient, 646 volumePluginMgr, 647 fakeRecorder, 648 fakeHandler)) 649 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 650 nodeLister := informerFactory.Core().V1().Nodes().Lister() 651 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 652 logger, ctx := ktesting.NewTestContext(t) 653 ctx, cancel := context.WithCancel(ctx) 654 defer cancel() 655 rc := NewReconciler( 656 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 657 reconciliationLoopFunc := rc.(*reconciler).reconciliationLoopFunc(ctx) 658 podName1 := "pod-uid1" 659 volumeName := v1.UniqueVolumeName("volume-name") 660 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 661 volumeSpec.PersistentVolume.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} 662 nodeName1 := k8stypes.NodeName("node-name1") 663 dsw.AddNode(nodeName1) 664 665 // Add the pod in which the volume is attached to the FailDetachNode 666 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, podName1), volumeSpec, nodeName1) 667 if podAddErr != nil { 668 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 669 } 670 671 // Act 672 reconciliationLoopFunc(ctx) 673 674 // Volume is added to asw, volume should be reported as attached to the node. 675 waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw) 676 verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw) 677 verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout) 678 679 // Delete the pod 680 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 681 682 // Mock NodeStatusUpdate fail 683 rc.(*reconciler).nodeStatusUpdater = statusupdater.NewFakeNodeStatusUpdater(true /* returnError */) 684 reconciliationLoopFunc(ctx) 685 // The first detach will be triggered after at least 50ms (maxWaitForUnmountDuration in test). 686 time.Sleep(100 * time.Millisecond) 687 reconciliationLoopFunc(ctx) 688 // Right before detach operation is performed, the volume will be first removed from being reported 689 // as attached on node status (RemoveVolumeFromReportAsAttached). After UpdateNodeStatus operation which is expected to fail, 690 // controller then added the volume back as attached. 691 // verifyVolumeReportedAsAttachedToNode will check volume is in the list of volume attached that needs to be updated 692 // in node status. By calling this function (GetVolumesToReportAttached), node status should be updated, and the volume 693 // will not need to be updated until new changes are applied (detach is triggered again) 694 verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw) 695 verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout) 696 697 } 698 699 func Test_Run_OneVolumeDetachFailNodeWithReadWriteOnce(t *testing.T) { 700 // Arrange 701 volumePluginMgr, _ := volumetesting.GetTestVolumePluginMgr(t) 702 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 703 asw := cache.NewActualStateOfWorld(volumePluginMgr) 704 fakeKubeClient := controllervolumetesting.CreateTestClient() 705 fakeRecorder := &record.FakeRecorder{} 706 fakeHandler := volumetesting.NewBlockVolumePathHandler() 707 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 708 fakeKubeClient, 709 volumePluginMgr, 710 fakeRecorder, 711 fakeHandler)) 712 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 713 nodeLister := informerFactory.Core().V1().Nodes().Lister() 714 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 715 reconciler := NewReconciler( 716 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 717 podName1 := "pod-uid1" 718 podName2 := "pod-uid2" 719 podName3 := "pod-uid3" 720 volumeName := v1.UniqueVolumeName("volume-name") 721 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 722 volumeSpec.PersistentVolume.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} 723 nodeName1 := k8stypes.NodeName(volumetesting.FailDetachNode) 724 nodeName2 := k8stypes.NodeName("node-name2") 725 dsw.AddNode(nodeName1) 726 dsw.AddNode(nodeName2) 727 728 // Act 729 logger, ctx := ktesting.NewTestContext(t) 730 ctx, cancel := context.WithCancel(ctx) 731 defer cancel() 732 go reconciler.Run(ctx) 733 734 // Add the pod in which the volume is attached to the FailDetachNode 735 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, podName1), volumeSpec, nodeName1) 736 if podAddErr != nil { 737 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 738 } 739 740 // Volume is added to asw, volume should be reported as attached to the node. 741 waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw) 742 verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw) 743 verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout) 744 745 // Delete the pod, but detach will fail 746 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 747 748 waitForVolumeAttachStateToNode(t, generatedVolumeName, nodeName1, cache.AttachStateUncertain, asw) 749 verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, false, asw, volumeAttachedCheckTimeout) 750 751 // Add a second pod which tries to attach the volume to the same node. 752 // After adding pod to the same node, detach will not be triggered any more, 753 // the volume gets attached and reported as attached to the node. 754 generatedVolumeName, podAddErr = dsw.AddPod(types.UniquePodName(podName2), controllervolumetesting.NewPod(podName2, podName2), volumeSpec, nodeName1) 755 if podAddErr != nil { 756 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 757 } 758 // Sleep 1s to verify no detach are triggered after second pod is added in the future. 759 time.Sleep(1000 * time.Millisecond) 760 verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw) 761 verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, true, asw, volumeAttachedCheckTimeout) 762 // verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw) 763 764 // Add a third pod which tries to attach the volume to a different node. 765 // At this point, volume is still attached to first node. There are no status update for both nodes. 766 generatedVolumeName, podAddErr = dsw.AddPod(types.UniquePodName(podName3), controllervolumetesting.NewPod(podName3, podName3), volumeSpec, nodeName2) 767 if podAddErr != nil { 768 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 769 } 770 verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateAttached, asw) 771 verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName1, asw) 772 verifyVolumeNoStatusUpdateNeeded(t, logger, generatedVolumeName, nodeName2, asw) 773 } 774 775 // Creates a volume with accessMode ReadWriteOnce 776 // First create a pod which will try to attach the volume to the a node named "timeout-node". The attach call for this node will 777 // fail for timeout, but the volume will be actually attached to the node after the call. 778 // Secondly, delete the this pod. 779 // Lastly, create a pod scheduled to a normal node which will trigger attach volume to the node. The attach should return successfully. 780 func Test_Run_OneVolumeAttachAndDetachTimeoutNodesWithReadWriteOnce(t *testing.T) { 781 // Arrange 782 volumePluginMgr, _ := volumetesting.GetTestVolumePluginMgr(t) 783 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 784 asw := cache.NewActualStateOfWorld(volumePluginMgr) 785 fakeKubeClient := controllervolumetesting.CreateTestClient() 786 fakeRecorder := &record.FakeRecorder{} 787 fakeHandler := volumetesting.NewBlockVolumePathHandler() 788 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 789 fakeKubeClient, 790 volumePluginMgr, 791 fakeRecorder, 792 fakeHandler)) 793 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 794 nodeLister := informerFactory.Core().V1().Nodes().Lister() 795 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 796 reconciler := NewReconciler( 797 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 798 podName1 := "pod-uid1" 799 podName2 := "pod-uid2" 800 volumeName := v1.UniqueVolumeName("volume-name") 801 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 802 volumeSpec.PersistentVolume.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} 803 nodeName1 := k8stypes.NodeName(volumetesting.TimeoutAttachNode) 804 nodeName2 := k8stypes.NodeName("node-name2") 805 dsw.AddNode(nodeName1) 806 dsw.AddNode(nodeName2) 807 808 // Act 809 logger, ctx := ktesting.NewTestContext(t) 810 ctx, cancel := context.WithCancel(ctx) 811 defer cancel() 812 go reconciler.Run(ctx) 813 814 // Add the pod in which the volume is attached to the timeout node 815 generatedVolumeName, podAddErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, podName1), volumeSpec, nodeName1) 816 if podAddErr != nil { 817 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 818 } 819 820 // Volume is added to asw. Because attach operation fails, volume should not be reported as attached to the node. 821 waitForVolumeAddedToNode(t, generatedVolumeName, nodeName1, asw) 822 verifyVolumeAttachedToNode(t, generatedVolumeName, nodeName1, cache.AttachStateUncertain, asw) 823 verifyVolumeReportedAsAttachedToNode(t, logger, generatedVolumeName, nodeName1, false, asw, volumeAttachedCheckTimeout) 824 825 // When volume is added to the node, it is set to mounted by default. Then the status will be updated by checking node status VolumeInUse. 826 // Without this, the delete operation will be delayed due to mounted status 827 asw.SetVolumesMountedByNode(logger, nil, nodeName1) 828 829 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 830 831 waitForVolumeRemovedFromNode(t, generatedVolumeName, nodeName1, asw) 832 833 // Add a second pod which tries to attach the volume to a different node. 834 generatedVolumeName, podAddErr = dsw.AddPod(types.UniquePodName(podName2), controllervolumetesting.NewPod(podName2, podName2), volumeSpec, nodeName2) 835 if podAddErr != nil { 836 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podAddErr) 837 } 838 waitForVolumeAttachStateToNode(t, generatedVolumeName, nodeName2, cache.AttachStateAttached, asw) 839 } 840 841 // Populates desiredStateOfWorld cache with one node/volume/pod tuple. 842 // The node has node.kubernetes.io/out-of-service taint present. 843 // 844 // The maxWaitForUnmountDuration is longer (in this case it is 4200 * time.Second so that detach does not happen 845 // immediately due to timeout. 846 // 847 // Calls Run() 848 // Verifies there is one attach call and no detach calls. 849 // Deletes the pod from desiredStateOfWorld cache without first marking the node/volume as unmounted. 850 // Verifies there is one detach call and no (new) attach calls. 851 func Test_Run_OneVolumeDetachOnOutOfServiceTaintedNode(t *testing.T) { 852 registerMetrics.Do(func() { 853 legacyregistry.MustRegister(metrics.ForceDetachMetricCounter) 854 }) 855 // Arrange 856 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 857 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 858 asw := cache.NewActualStateOfWorld(volumePluginMgr) 859 fakeKubeClient := controllervolumetesting.CreateTestClient() 860 fakeRecorder := &record.FakeRecorder{} 861 fakeHandler := volumetesting.NewBlockVolumePathHandler() 862 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 863 fakeKubeClient, 864 volumePluginMgr, 865 fakeRecorder, 866 fakeHandler)) 867 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 868 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 869 nodeLister := informerFactory.Core().V1().Nodes().Lister() 870 reconciler := NewReconciler( 871 reconcilerLoopPeriod, maxLongWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, 872 nsu, nodeLister, fakeRecorder) 873 podName1 := "pod-uid1" 874 volumeName1 := v1.UniqueVolumeName("volume-name1") 875 volumeSpec1 := controllervolumetesting.GetTestVolumeSpec(string(volumeName1), volumeName1) 876 nodeName1 := k8stypes.NodeName("worker-0") 877 node1 := &v1.Node{ 878 ObjectMeta: metav1.ObjectMeta{Name: string(nodeName1)}, 879 Spec: v1.NodeSpec{ 880 Taints: []v1.Taint{{Key: v1.TaintNodeOutOfService, Effect: v1.TaintEffectNoExecute}}, 881 }, 882 } 883 informerFactory.Core().V1().Nodes().Informer().GetStore().Add(node1) 884 dsw.AddNode(nodeName1) 885 volumeExists := dsw.VolumeExists(volumeName1, nodeName1) 886 if volumeExists { 887 t.Fatalf( 888 "Volume %q/node %q should not exist, but it does.", 889 volumeName1, 890 nodeName1) 891 } 892 893 generatedVolumeName, podErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, 894 podName1), volumeSpec1, nodeName1) 895 if podErr != nil { 896 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podErr) 897 } 898 899 // Act 900 _, ctx := ktesting.NewTestContext(t) 901 ctx, cancel := context.WithCancel(ctx) 902 defer cancel() 903 go reconciler.Run(ctx) 904 905 // Assert 906 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 907 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 908 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 909 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 910 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 911 912 // Delete the pod and the volume will be detached only after the maxLongWaitForUnmountDuration expires as volume is 913 //not unmounted. Here maxLongWaitForUnmountDuration is used to mimic that node is out of service. 914 // But in this case the node has the node.kubernetes.io/out-of-service taint and hence it will not wait for 915 // maxLongWaitForUnmountDuration and will progress to detach immediately. 916 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 917 // Assert -- Detach will be triggered if node has out of service taint 918 waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 919 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 920 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 921 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 922 waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) 923 924 // Force detach metric due to out-of-service taint 925 testForceDetachMetric(t, 1, metrics.ForceDetachReasonOutOfService) 926 } 927 928 // Populates desiredStateOfWorld cache with one node/volume/pod tuple. 929 // The node does not have the node.kubernetes.io/out-of-service taint present. 930 // 931 // The maxWaitForUnmountDuration is longer (in this case it is 4200 * time.Second so that detach does not happen 932 // immediately due to timeout. 933 // 934 // Calls Run() 935 // Verifies there is one attach call and no detach calls. 936 // Deletes the pod from desiredStateOfWorld cache without first marking the node/volume as unmounted. 937 // Verifies there is no detach call and no (new) attach calls. 938 func Test_Run_OneVolumeDetachOnNoOutOfServiceTaintedNode(t *testing.T) { 939 // Arrange 940 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 941 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 942 asw := cache.NewActualStateOfWorld(volumePluginMgr) 943 fakeKubeClient := controllervolumetesting.CreateTestClient() 944 fakeRecorder := &record.FakeRecorder{} 945 fakeHandler := volumetesting.NewBlockVolumePathHandler() 946 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 947 fakeKubeClient, 948 volumePluginMgr, 949 fakeRecorder, 950 fakeHandler)) 951 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 952 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 953 nodeLister := informerFactory.Core().V1().Nodes().Lister() 954 reconciler := NewReconciler( 955 reconcilerLoopPeriod, maxLongWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, 956 nsu, nodeLister, fakeRecorder) 957 podName1 := "pod-uid1" 958 volumeName1 := v1.UniqueVolumeName("volume-name1") 959 volumeSpec1 := controllervolumetesting.GetTestVolumeSpec(string(volumeName1), volumeName1) 960 nodeName1 := k8stypes.NodeName("worker-0") 961 node1 := &v1.Node{ 962 ObjectMeta: metav1.ObjectMeta{Name: string(nodeName1)}, 963 } 964 informerFactory.Core().V1().Nodes().Informer().GetStore().Add(node1) 965 dsw.AddNode(nodeName1) 966 volumeExists := dsw.VolumeExists(volumeName1, nodeName1) 967 if volumeExists { 968 t.Fatalf( 969 "Volume %q/node %q should not exist, but it does.", 970 volumeName1, 971 nodeName1) 972 } 973 974 generatedVolumeName, podErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, 975 podName1), volumeSpec1, nodeName1) 976 if podErr != nil { 977 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podErr) 978 } 979 980 // Act 981 _, ctx := ktesting.NewTestContext(t) 982 ctx, cancel := context.WithCancel(ctx) 983 defer cancel() 984 go reconciler.Run(ctx) 985 986 // Assert 987 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 988 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 989 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 990 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 991 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 992 993 // Delete the pod and the volume will be detached only after the maxLongWaitForUnmountDuration expires as volume is 994 // not unmounted. Here maxLongWaitForUnmountDuration is used to mimic that node is out of service. 995 // But in this case the node does not have the node.kubernetes.io/out-of-service taint and hence it will wait for 996 // maxLongWaitForUnmountDuration and will not be detached immediately. 997 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 998 // Assert -- Detach will be triggered only after maxLongWaitForUnmountDuration expires 999 waitForNewDetacherCallCount(t, 0 /* expectedCallCount */, fakePlugin) 1000 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 1001 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 1002 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 1003 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 1004 } 1005 1006 // Populates desiredStateOfWorld cache with one node/volume/pod tuple. 1007 // The node starts as healthy. 1008 // 1009 // Calls Run() 1010 // Verifies there is one attach call and no detach calls. 1011 // Deletes the pod from desiredStateOfWorld cache without first marking the node/volume as unmounted. 1012 // Verifies that the volume is NOT detached after maxWaitForUnmountDuration. 1013 // Marks the node as unhealthy. 1014 // Verifies that the volume is detached after maxWaitForUnmountDuration. 1015 func Test_Run_OneVolumeDetachOnUnhealthyNode(t *testing.T) { 1016 // Arrange 1017 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 1018 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 1019 asw := cache.NewActualStateOfWorld(volumePluginMgr) 1020 fakeKubeClient := controllervolumetesting.CreateTestClient() 1021 fakeRecorder := &record.FakeRecorder{} 1022 fakeHandler := volumetesting.NewBlockVolumePathHandler() 1023 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 1024 fakeKubeClient, 1025 volumePluginMgr, 1026 fakeRecorder, 1027 fakeHandler)) 1028 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 1029 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 1030 nodeLister := informerFactory.Core().V1().Nodes().Lister() 1031 reconciler := NewReconciler( 1032 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, 1033 nsu, nodeLister, fakeRecorder) 1034 podName1 := "pod-uid1" 1035 volumeName1 := v1.UniqueVolumeName("volume-name1") 1036 volumeSpec1 := controllervolumetesting.GetTestVolumeSpec(string(volumeName1), volumeName1) 1037 nodeName1 := k8stypes.NodeName("worker-0") 1038 node1 := &v1.Node{ 1039 ObjectMeta: metav1.ObjectMeta{Name: string(nodeName1)}, 1040 Status: v1.NodeStatus{ 1041 Conditions: []v1.NodeCondition{ 1042 { 1043 Type: v1.NodeReady, 1044 Status: v1.ConditionTrue, 1045 }, 1046 }, 1047 }, 1048 } 1049 informerFactory.Core().V1().Nodes().Informer().GetStore().Add(node1) 1050 dsw.AddNode(nodeName1) 1051 volumeExists := dsw.VolumeExists(volumeName1, nodeName1) 1052 if volumeExists { 1053 t.Fatalf( 1054 "Volume %q/node %q should not exist, but it does.", 1055 volumeName1, 1056 nodeName1) 1057 } 1058 1059 generatedVolumeName, podErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, 1060 podName1), volumeSpec1, nodeName1) 1061 if podErr != nil { 1062 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podErr) 1063 } 1064 1065 // Act 1066 _, ctx := ktesting.NewTestContext(t) 1067 ctx, cancel := context.WithCancel(ctx) 1068 defer cancel() 1069 go reconciler.Run(ctx) 1070 1071 // Assert 1072 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 1073 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 1074 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 1075 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 1076 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 1077 1078 // Act 1079 // Delete the pod and the volume will be detached even after the maxWaitForUnmountDuration expires as volume is 1080 // not unmounted and the node is healthy. 1081 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 1082 time.Sleep(maxWaitForUnmountDuration * 5) 1083 // Assert 1084 waitForNewDetacherCallCount(t, 0 /* expectedCallCount */, fakePlugin) 1085 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 1086 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 1087 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 1088 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 1089 1090 // Act 1091 // Mark the node unhealthy 1092 node2 := node1.DeepCopy() 1093 node2.Status.Conditions[0].Status = v1.ConditionFalse 1094 informerFactory.Core().V1().Nodes().Informer().GetStore().Update(node2) 1095 // Assert -- Detach was triggered after maxWaitForUnmountDuration 1096 waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 1097 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 1098 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 1099 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 1100 waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) 1101 } 1102 1103 // Populates desiredStateOfWorld cache with one node/volume/pod tuple. 1104 // The node starts as healthy. 1105 // 1106 // Calls Run() 1107 // Verifies there is one attach call and no detach calls. 1108 // Deletes the pod from desiredStateOfWorld cache without first marking the node/volume as unmounted. 1109 // Verifies that the volume is NOT detached after maxWaitForUnmountDuration. 1110 // Marks the node as unhealthy. 1111 // Sets forceDetachOnUmountDisabled to true. 1112 // Verifies that the volume is not detached after maxWaitForUnmountDuration. 1113 // 1114 // Then applies the node.kubernetes.io/out-of-service taint. 1115 // Verifies that there is still just one attach call. 1116 // Verifies there is now one detach call. 1117 func Test_Run_OneVolumeDetachOnUnhealthyNodeWithForceDetachOnUnmountDisabled(t *testing.T) { 1118 registerMetrics.Do(func() { 1119 legacyregistry.MustRegister(metrics.ForceDetachMetricCounter) 1120 }) 1121 // NOTE: This value is being pulled from a global variable, so it won't necessarily be 0 at the start of the test 1122 // For example, if Test_Run_OneVolumeDetachOnOutOfServiceTaintedNode runs before this test, then it will be 1 1123 initialForceDetachCount, err := metricstestutil.GetCounterMetricValue(metrics.ForceDetachMetricCounter.WithLabelValues(metrics.ForceDetachReasonOutOfService)) 1124 if err != nil { 1125 t.Errorf("Error getting initialForceDetachCount") 1126 } 1127 1128 // Arrange 1129 volumePluginMgr, fakePlugin := volumetesting.GetTestVolumePluginMgr(t) 1130 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 1131 asw := cache.NewActualStateOfWorld(volumePluginMgr) 1132 fakeKubeClient := controllervolumetesting.CreateTestClient() 1133 fakeRecorder := &record.FakeRecorder{} 1134 fakeHandler := volumetesting.NewBlockVolumePathHandler() 1135 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 1136 fakeKubeClient, 1137 volumePluginMgr, 1138 fakeRecorder, 1139 fakeHandler)) 1140 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 1141 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 1142 nodeLister := informerFactory.Core().V1().Nodes().Lister() 1143 reconciler := NewReconciler( 1144 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, true, dsw, asw, ad, 1145 nsu, nodeLister, fakeRecorder) 1146 podName1 := "pod-uid1" 1147 volumeName1 := v1.UniqueVolumeName("volume-name1") 1148 volumeSpec1 := controllervolumetesting.GetTestVolumeSpec(string(volumeName1), volumeName1) 1149 nodeName1 := k8stypes.NodeName("worker-0") 1150 node1 := &v1.Node{ 1151 ObjectMeta: metav1.ObjectMeta{Name: string(nodeName1)}, 1152 Status: v1.NodeStatus{ 1153 Conditions: []v1.NodeCondition{ 1154 { 1155 Type: v1.NodeReady, 1156 Status: v1.ConditionTrue, 1157 }, 1158 }, 1159 }, 1160 } 1161 addErr := informerFactory.Core().V1().Nodes().Informer().GetStore().Add(node1) 1162 if addErr != nil { 1163 t.Fatalf("Add node failed. Expected: <no error> Actual: <%v>", addErr) 1164 } 1165 dsw.AddNode(nodeName1) 1166 volumeExists := dsw.VolumeExists(volumeName1, nodeName1) 1167 if volumeExists { 1168 t.Fatalf( 1169 "Volume %q/node %q should not exist, but it does.", 1170 volumeName1, 1171 nodeName1) 1172 } 1173 1174 generatedVolumeName, podErr := dsw.AddPod(types.UniquePodName(podName1), controllervolumetesting.NewPod(podName1, 1175 podName1), volumeSpec1, nodeName1) 1176 if podErr != nil { 1177 t.Fatalf("AddPod failed. Expected: <no error> Actual: <%v>", podErr) 1178 } 1179 1180 // Act 1181 _, ctx := ktesting.NewTestContext(t) 1182 ctx, cancel := context.WithCancel(ctx) 1183 defer cancel() 1184 go reconciler.Run(ctx) 1185 1186 // Assert 1187 waitForNewAttacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 1188 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 1189 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 1190 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 1191 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 1192 1193 // Act 1194 // Delete the pod and the volume will be detached even after the maxWaitForUnmountDuration expires as volume is 1195 // not unmounted and the node is healthy. 1196 dsw.DeletePod(types.UniquePodName(podName1), generatedVolumeName, nodeName1) 1197 time.Sleep(maxWaitForUnmountDuration * 5) 1198 // Assert 1199 waitForNewDetacherCallCount(t, 0 /* expectedCallCount */, fakePlugin) 1200 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 1201 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 1202 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 1203 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 1204 1205 // Act 1206 // Mark the node unhealthy 1207 node2 := node1.DeepCopy() 1208 node2.Status.Conditions[0].Status = v1.ConditionFalse 1209 updateErr := informerFactory.Core().V1().Nodes().Informer().GetStore().Update(node2) 1210 if updateErr != nil { 1211 t.Fatalf("Update node failed. Expected: <no error> Actual: <%v>", updateErr) 1212 } 1213 // Assert -- Detach was not triggered after maxWaitForUnmountDuration 1214 waitForNewDetacherCallCount(t, 0 /* expectedCallCount */, fakePlugin) 1215 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 1216 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 1217 verifyNewDetacherCallCount(t, true /* expectZeroNewDetacherCallCount */, fakePlugin) 1218 waitForDetachCallCount(t, 0 /* expectedDetachCallCount */, fakePlugin) 1219 1220 // Force detach metric due to out-of-service taint 1221 // We shouldn't see any additional force detaches, so only consider the initial count 1222 testForceDetachMetric(t, int(initialForceDetachCount), metrics.ForceDetachReasonOutOfService) 1223 1224 // Act 1225 // Taint the node 1226 node3 := node2.DeepCopy() 1227 node3.Spec.Taints = append(node3.Spec.Taints, v1.Taint{Key: v1.TaintNodeOutOfService, Effect: v1.TaintEffectNoExecute}) 1228 updateErr = informerFactory.Core().V1().Nodes().Informer().GetStore().Update(node3) 1229 if updateErr != nil { 1230 t.Fatalf("Update node failed. Expected: <no error> Actual: <%v>", updateErr) 1231 } 1232 // Assert -- Detach was triggered after maxWaitForUnmountDuration 1233 waitForNewDetacherCallCount(t, 1 /* expectedCallCount */, fakePlugin) 1234 verifyNewAttacherCallCount(t, false /* expectZeroNewAttacherCallCount */, fakePlugin) 1235 waitForAttachCallCount(t, 1 /* expectedAttachCallCount */, fakePlugin) 1236 verifyNewDetacherCallCount(t, false /* expectZeroNewDetacherCallCount */, fakePlugin) 1237 waitForDetachCallCount(t, 1 /* expectedDetachCallCount */, fakePlugin) 1238 1239 // Force detach metric due to out-of-service taint 1240 // We should see one more force detach, so consider the initial count + 1 1241 testForceDetachMetric(t, int(initialForceDetachCount)+1, metrics.ForceDetachReasonOutOfService) 1242 } 1243 1244 func Test_ReportMultiAttachError(t *testing.T) { 1245 type nodeWithPods struct { 1246 name k8stypes.NodeName 1247 podNames []string 1248 } 1249 tests := []struct { 1250 name string 1251 nodes []nodeWithPods 1252 expectedEvents []string 1253 }{ 1254 { 1255 "no pods use the volume", 1256 []nodeWithPods{ 1257 {"node1", []string{"ns1/pod1"}}, 1258 }, 1259 []string{"Warning FailedAttachVolume Multi-Attach error for volume \"volume-name\" Volume is already exclusively attached to one node and can't be attached to another"}, 1260 }, 1261 { 1262 "pods in the same namespace use the volume", 1263 []nodeWithPods{ 1264 {"node1", []string{"ns1/pod1"}}, 1265 {"node2", []string{"ns1/pod2"}}, 1266 }, 1267 []string{"Warning FailedAttachVolume Multi-Attach error for volume \"volume-name\" Volume is already used by pod(s) pod2"}, 1268 }, 1269 { 1270 "pods in another namespace use the volume", 1271 []nodeWithPods{ 1272 {"node1", []string{"ns1/pod1"}}, 1273 {"node2", []string{"ns2/pod2"}}, 1274 }, 1275 []string{"Warning FailedAttachVolume Multi-Attach error for volume \"volume-name\" Volume is already used by 1 pod(s) in different namespaces"}, 1276 }, 1277 { 1278 "pods both in the same and another namespace use the volume", 1279 []nodeWithPods{ 1280 {"node1", []string{"ns1/pod1"}}, 1281 {"node2", []string{"ns2/pod2"}}, 1282 {"node3", []string{"ns1/pod3"}}, 1283 }, 1284 []string{"Warning FailedAttachVolume Multi-Attach error for volume \"volume-name\" Volume is already used by pod(s) pod3 and 1 pod(s) in different namespaces"}, 1285 }, 1286 } 1287 1288 for _, test := range tests { 1289 // Arrange 1290 t.Logf("Test %q starting", test.name) 1291 volumePluginMgr, _ := volumetesting.GetTestVolumePluginMgr(t) 1292 dsw := cache.NewDesiredStateOfWorld(volumePluginMgr) 1293 asw := cache.NewActualStateOfWorld(volumePluginMgr) 1294 fakeKubeClient := controllervolumetesting.CreateTestClient() 1295 fakeRecorder := record.NewFakeRecorder(100) 1296 fakeHandler := volumetesting.NewBlockVolumePathHandler() 1297 ad := operationexecutor.NewOperationExecutor(operationexecutor.NewOperationGenerator( 1298 fakeKubeClient, 1299 volumePluginMgr, 1300 fakeRecorder, 1301 fakeHandler)) 1302 informerFactory := informers.NewSharedInformerFactory(fakeKubeClient, controller.NoResyncPeriodFunc()) 1303 nodeLister := informerFactory.Core().V1().Nodes().Lister() 1304 nsu := statusupdater.NewFakeNodeStatusUpdater(false /* returnError */) 1305 rc := NewReconciler( 1306 reconcilerLoopPeriod, maxWaitForUnmountDuration, syncLoopPeriod, false, false, dsw, asw, ad, nsu, nodeLister, fakeRecorder) 1307 1308 nodes := []k8stypes.NodeName{} 1309 for _, n := range test.nodes { 1310 dsw.AddNode(n.name) 1311 nodes = append(nodes, n.name) 1312 for _, podName := range n.podNames { 1313 volumeName := v1.UniqueVolumeName("volume-name") 1314 volumeSpec := controllervolumetesting.GetTestVolumeSpec(string(volumeName), volumeName) 1315 volumeSpec.PersistentVolume.Spec.AccessModes = []v1.PersistentVolumeAccessMode{v1.ReadWriteOnce} 1316 uid := string(n.name) + "-" + podName // unique UID 1317 namespace, name := utilstrings.SplitQualifiedName(podName) 1318 pod := controllervolumetesting.NewPod(uid, name) 1319 pod.Namespace = namespace 1320 _, err := dsw.AddPod(types.UniquePodName(uid), pod, volumeSpec, n.name) 1321 if err != nil { 1322 t.Fatalf("Error adding pod %s to DSW: %s", podName, err) 1323 } 1324 } 1325 } 1326 // Act 1327 logger, _ := ktesting.NewTestContext(t) 1328 volumes := dsw.GetVolumesToAttach() 1329 for _, vol := range volumes { 1330 if vol.NodeName == "node1" { 1331 rc.(*reconciler).reportMultiAttachError(logger, vol, nodes) 1332 } 1333 } 1334 1335 // Assert 1336 close(fakeRecorder.Events) 1337 index := 0 1338 for event := range fakeRecorder.Events { 1339 if len(test.expectedEvents) < index { 1340 t.Errorf("Test %q: unexpected event received: %s", test.name, event) 1341 } else { 1342 expectedEvent := test.expectedEvents[index] 1343 if expectedEvent != event { 1344 t.Errorf("Test %q: event %d: expected %q, got %q", test.name, index, expectedEvent, event) 1345 } 1346 } 1347 index++ 1348 } 1349 for i := index; i < len(test.expectedEvents); i++ { 1350 t.Errorf("Test %q: event %d: expected %q, got none", test.name, i, test.expectedEvents[i]) 1351 } 1352 } 1353 } 1354 1355 func waitForMultiAttachErrorOnNode( 1356 t *testing.T, 1357 attachedNode k8stypes.NodeName, 1358 dsow cache.DesiredStateOfWorld) { 1359 multAttachCheckFunc := func() (bool, error) { 1360 for _, volumeToAttach := range dsow.GetVolumesToAttach() { 1361 if volumeToAttach.NodeName != attachedNode { 1362 if volumeToAttach.MultiAttachErrorReported { 1363 return true, nil 1364 } 1365 } 1366 } 1367 t.Logf("Warning: MultiAttach error not yet set on Node. Will retry.") 1368 return false, nil 1369 } 1370 1371 err := retryWithExponentialBackOff(100*time.Millisecond, multAttachCheckFunc) 1372 if err != nil { 1373 t.Fatalf("Timed out waiting for MultiAttach Error to be set on non-attached node") 1374 } 1375 } 1376 1377 func waitForNewAttacherCallCount( 1378 t *testing.T, 1379 expectedCallCount int, 1380 fakePlugin *volumetesting.FakeVolumePlugin) { 1381 err := retryWithExponentialBackOff( 1382 time.Duration(5*time.Millisecond), 1383 func() (bool, error) { 1384 actualCallCount := fakePlugin.GetNewAttacherCallCount() 1385 if actualCallCount >= expectedCallCount { 1386 return true, nil 1387 } 1388 t.Logf( 1389 "Warning: Wrong NewAttacherCallCount. Expected: <%v> Actual: <%v>. Will retry.", 1390 expectedCallCount, 1391 actualCallCount) 1392 return false, nil 1393 }, 1394 ) 1395 1396 if err != nil { 1397 t.Fatalf( 1398 "Timed out waiting for NewAttacherCallCount. Expected: <%v> Actual: <%v>", 1399 expectedCallCount, 1400 fakePlugin.GetNewAttacherCallCount()) 1401 } 1402 } 1403 1404 func waitForNewDetacherCallCount( 1405 t *testing.T, 1406 expectedCallCount int, 1407 fakePlugin *volumetesting.FakeVolumePlugin) { 1408 err := retryWithExponentialBackOff( 1409 time.Duration(5*time.Millisecond), 1410 func() (bool, error) { 1411 actualCallCount := fakePlugin.GetNewDetacherCallCount() 1412 if actualCallCount >= expectedCallCount { 1413 return true, nil 1414 } 1415 t.Logf( 1416 "Warning: Wrong NewDetacherCallCount. Expected: <%v> Actual: <%v>. Will retry.", 1417 expectedCallCount, 1418 actualCallCount) 1419 return false, nil 1420 }, 1421 ) 1422 1423 if err != nil { 1424 t.Fatalf( 1425 "Timed out waiting for NewDetacherCallCount. Expected: <%v> Actual: <%v>", 1426 expectedCallCount, 1427 fakePlugin.GetNewDetacherCallCount()) 1428 } 1429 } 1430 1431 func waitForAttachCallCount( 1432 t *testing.T, 1433 expectedAttachCallCount int, 1434 fakePlugin *volumetesting.FakeVolumePlugin) { 1435 if len(fakePlugin.GetAttachers()) == 0 && expectedAttachCallCount == 0 { 1436 return 1437 } 1438 1439 err := retryWithExponentialBackOff( 1440 time.Duration(5*time.Millisecond), 1441 func() (bool, error) { 1442 for i, attacher := range fakePlugin.GetAttachers() { 1443 actualCallCount := attacher.GetAttachCallCount() 1444 if actualCallCount == expectedAttachCallCount { 1445 return true, nil 1446 } 1447 t.Logf( 1448 "Warning: Wrong attacher[%v].GetAttachCallCount(). Expected: <%v> Actual: <%v>. Will try next attacher.", 1449 i, 1450 expectedAttachCallCount, 1451 actualCallCount) 1452 } 1453 1454 t.Logf( 1455 "Warning: No attachers have expected AttachCallCount. Expected: <%v>. Will retry.", 1456 expectedAttachCallCount) 1457 return false, nil 1458 }, 1459 ) 1460 1461 if err != nil { 1462 t.Fatalf( 1463 "No attachers have expected AttachCallCount. Expected: <%v>", 1464 expectedAttachCallCount) 1465 } 1466 } 1467 1468 func waitForTotalAttachCallCount( 1469 t *testing.T, 1470 expectedAttachCallCount int, 1471 fakePlugin *volumetesting.FakeVolumePlugin) { 1472 if len(fakePlugin.GetAttachers()) == 0 && expectedAttachCallCount == 0 { 1473 return 1474 } 1475 1476 err := retryWithExponentialBackOff( 1477 time.Duration(5*time.Millisecond), 1478 func() (bool, error) { 1479 totalCount := 0 1480 for _, attacher := range fakePlugin.GetAttachers() { 1481 totalCount += attacher.GetAttachCallCount() 1482 } 1483 if totalCount == expectedAttachCallCount { 1484 return true, nil 1485 } 1486 t.Logf( 1487 "Warning: Wrong total GetAttachCallCount(). Expected: <%v> Actual: <%v>. Will retry.", 1488 expectedAttachCallCount, 1489 totalCount) 1490 1491 return false, nil 1492 }, 1493 ) 1494 1495 if err != nil { 1496 t.Fatalf( 1497 "Total AttachCallCount does not match expected value. Expected: <%v>", 1498 expectedAttachCallCount) 1499 } 1500 } 1501 1502 func waitForDetachCallCount( 1503 t *testing.T, 1504 expectedDetachCallCount int, 1505 fakePlugin *volumetesting.FakeVolumePlugin) { 1506 if len(fakePlugin.GetDetachers()) == 0 && expectedDetachCallCount == 0 { 1507 return 1508 } 1509 1510 err := retryWithExponentialBackOff( 1511 time.Duration(5*time.Millisecond), 1512 func() (bool, error) { 1513 for i, detacher := range fakePlugin.GetDetachers() { 1514 actualCallCount := detacher.GetDetachCallCount() 1515 if actualCallCount == expectedDetachCallCount { 1516 return true, nil 1517 } 1518 t.Logf( 1519 "Wrong detacher[%v].GetDetachCallCount(). Expected: <%v> Actual: <%v>. Will try next detacher.", 1520 i, 1521 expectedDetachCallCount, 1522 actualCallCount) 1523 } 1524 1525 t.Logf( 1526 "Warning: No detachers have expected DetachCallCount. Expected: <%v>. Will retry.", 1527 expectedDetachCallCount) 1528 return false, nil 1529 }, 1530 ) 1531 1532 if err != nil { 1533 t.Fatalf( 1534 "No detachers have expected DetachCallCount. Expected: <%v>", 1535 expectedDetachCallCount) 1536 } 1537 } 1538 1539 func waitForTotalDetachCallCount( 1540 t *testing.T, 1541 expectedDetachCallCount int, 1542 fakePlugin *volumetesting.FakeVolumePlugin) { 1543 if len(fakePlugin.GetDetachers()) == 0 && expectedDetachCallCount == 0 { 1544 return 1545 } 1546 1547 err := retryWithExponentialBackOff( 1548 time.Duration(5*time.Millisecond), 1549 func() (bool, error) { 1550 totalCount := 0 1551 for _, detacher := range fakePlugin.GetDetachers() { 1552 totalCount += detacher.GetDetachCallCount() 1553 } 1554 if totalCount == expectedDetachCallCount { 1555 return true, nil 1556 } 1557 t.Logf( 1558 "Warning: Wrong total GetDetachCallCount(). Expected: <%v> Actual: <%v>. Will retry.", 1559 expectedDetachCallCount, 1560 totalCount) 1561 1562 return false, nil 1563 }, 1564 ) 1565 1566 if err != nil { 1567 t.Fatalf( 1568 "Total DetachCallCount does not match expected value. Expected: <%v>", 1569 expectedDetachCallCount) 1570 } 1571 } 1572 1573 func waitForAttachedToNodesCount( 1574 t *testing.T, 1575 expectedNodeCount int, 1576 volumeName v1.UniqueVolumeName, 1577 asw cache.ActualStateOfWorld) { 1578 1579 err := retryWithExponentialBackOff( 1580 time.Duration(5*time.Millisecond), 1581 func() (bool, error) { 1582 count := len(asw.GetNodesForAttachedVolume(volumeName)) 1583 if count == expectedNodeCount { 1584 return true, nil 1585 } 1586 t.Logf( 1587 "Warning: Wrong number of nodes having <%v> attached. Expected: <%v> Actual: <%v>. Will retry.", 1588 volumeName, 1589 expectedNodeCount, 1590 count) 1591 1592 return false, nil 1593 }, 1594 ) 1595 1596 if err != nil { 1597 count := len(asw.GetNodesForAttachedVolume(volumeName)) 1598 t.Fatalf( 1599 "Wrong number of nodes having <%v> attached. Expected: <%v> Actual: <%v>", 1600 volumeName, 1601 expectedNodeCount, 1602 count) 1603 } 1604 } 1605 1606 func verifyNewAttacherCallCount( 1607 t *testing.T, 1608 expectZeroNewAttacherCallCount bool, 1609 fakePlugin *volumetesting.FakeVolumePlugin) { 1610 1611 if expectZeroNewAttacherCallCount && 1612 fakePlugin.GetNewAttacherCallCount() != 0 { 1613 t.Fatalf( 1614 "Wrong NewAttacherCallCount. Expected: <0> Actual: <%v>", 1615 fakePlugin.GetNewAttacherCallCount()) 1616 } 1617 } 1618 1619 func waitForVolumeAttachStateToNode( 1620 t *testing.T, 1621 volumeName v1.UniqueVolumeName, 1622 nodeName k8stypes.NodeName, 1623 expectedAttachState cache.AttachState, 1624 asw cache.ActualStateOfWorld) { 1625 1626 err := retryWithExponentialBackOff( 1627 time.Duration(500*time.Millisecond), 1628 func() (bool, error) { 1629 attachState := asw.GetAttachState(volumeName, nodeName) 1630 if attachState == expectedAttachState { 1631 return true, nil 1632 } 1633 t.Logf("Warning: expected attach state: %v, actual attach state: %v. Will retry.", 1634 expectedAttachState, attachState) 1635 return false, nil 1636 }, 1637 ) 1638 1639 attachState := asw.GetAttachState(volumeName, nodeName) 1640 if err != nil && attachState != expectedAttachState { 1641 t.Fatalf("Volume <%v> is not in expected attach state: %v, actual attach state: %v", 1642 volumeName, expectedAttachState, attachState) 1643 } 1644 1645 t.Logf("Volume <%v> is attached to node <%v>: %v", volumeName, nodeName, expectedAttachState) 1646 } 1647 1648 func waitForVolumeAddedToNode( 1649 t *testing.T, 1650 volumeName v1.UniqueVolumeName, 1651 nodeName k8stypes.NodeName, 1652 asw cache.ActualStateOfWorld) { 1653 1654 err := retryWithExponentialBackOff( 1655 time.Duration(500*time.Millisecond), 1656 func() (bool, error) { 1657 volumes := asw.GetAttachedVolumes() 1658 for _, volume := range volumes { 1659 if volume.VolumeName == volumeName && volume.NodeName == nodeName { 1660 return true, nil 1661 } 1662 } 1663 t.Logf( 1664 "Warning: Volume <%v> is not added to node <%v> yet. Will retry.", 1665 volumeName, 1666 nodeName) 1667 1668 return false, nil 1669 }, 1670 ) 1671 1672 if err != nil { 1673 t.Fatalf( 1674 "Volume <%v> is not added to node <%v>. %v", 1675 volumeName, 1676 nodeName, err) 1677 } 1678 } 1679 1680 func waitForVolumeRemovedFromNode( 1681 t *testing.T, 1682 volumeName v1.UniqueVolumeName, 1683 nodeName k8stypes.NodeName, 1684 asw cache.ActualStateOfWorld) { 1685 1686 err := retryWithExponentialBackOff( 1687 time.Duration(500*time.Millisecond), 1688 func() (bool, error) { 1689 volumes := asw.GetAttachedVolumes() 1690 exist := false 1691 for _, volume := range volumes { 1692 if volume.VolumeName == volumeName && volume.NodeName == nodeName { 1693 exist = true 1694 } 1695 } 1696 if exist { 1697 t.Logf( 1698 "Warning: Volume <%v> is not removed from the node <%v> yet. Will retry.", 1699 volumeName, 1700 nodeName) 1701 1702 return false, nil 1703 } 1704 return true, nil 1705 1706 }, 1707 ) 1708 1709 if err != nil { 1710 t.Fatalf( 1711 "Volume <%v> is not removed from node <%v>. %v", 1712 volumeName, 1713 nodeName, err) 1714 } 1715 } 1716 1717 func verifyVolumeAttachedToNode( 1718 t *testing.T, 1719 volumeName v1.UniqueVolumeName, 1720 nodeName k8stypes.NodeName, 1721 expectedAttachState cache.AttachState, 1722 asw cache.ActualStateOfWorld, 1723 ) { 1724 attachState := asw.GetAttachState(volumeName, nodeName) 1725 if attachState != expectedAttachState { 1726 t.Fatalf("Check volume <%v> is attached to node <%v>, got %v, expected %v", 1727 volumeName, 1728 nodeName, 1729 attachState, 1730 expectedAttachState) 1731 } 1732 t.Logf("Volume <%v> is attached to node <%v>: %v", volumeName, nodeName, attachState) 1733 } 1734 1735 func verifyVolumeReportedAsAttachedToNode( 1736 t *testing.T, 1737 logger klog.Logger, 1738 volumeName v1.UniqueVolumeName, 1739 nodeName k8stypes.NodeName, 1740 isAttached bool, 1741 asw cache.ActualStateOfWorld, 1742 timeout time.Duration, 1743 ) { 1744 var result bool 1745 var lastErr error 1746 err := wait.PollUntilContextTimeout(context.TODO(), 50*time.Millisecond, timeout, false, func(context.Context) (done bool, err error) { 1747 volumes := asw.GetVolumesToReportAttached(logger) 1748 for _, volume := range volumes[nodeName] { 1749 if volume.Name == volumeName { 1750 result = true 1751 } 1752 } 1753 1754 if result == isAttached { 1755 t.Logf("Volume <%v> is reported as attached to node <%v>: %v", volumeName, nodeName, result) 1756 return true, nil 1757 } 1758 lastErr = fmt.Errorf("Check volume <%v> is reported as attached to node <%v>, got %v, expected %v", 1759 volumeName, 1760 nodeName, 1761 result, 1762 isAttached) 1763 return false, nil 1764 }) 1765 if err != nil { 1766 t.Fatalf("last error: %q, wait timeout: %q", lastErr, err.Error()) 1767 } 1768 1769 } 1770 1771 func verifyVolumeNoStatusUpdateNeeded( 1772 t *testing.T, 1773 logger klog.Logger, 1774 volumeName v1.UniqueVolumeName, 1775 nodeName k8stypes.NodeName, 1776 asw cache.ActualStateOfWorld, 1777 ) { 1778 volumes := asw.GetVolumesToReportAttached(logger) 1779 for _, volume := range volumes[nodeName] { 1780 if volume.Name == volumeName { 1781 t.Fatalf("Check volume <%v> is reported as need to update status on node <%v>, expected false", 1782 volumeName, 1783 nodeName) 1784 } 1785 } 1786 t.Logf("Volume <%v> is not reported as need to update status on node <%v>", volumeName, nodeName) 1787 } 1788 1789 func verifyNewDetacherCallCount( 1790 t *testing.T, 1791 expectZeroNewDetacherCallCount bool, 1792 fakePlugin *volumetesting.FakeVolumePlugin) { 1793 1794 if expectZeroNewDetacherCallCount && 1795 fakePlugin.GetNewDetacherCallCount() != 0 { 1796 t.Fatalf("Wrong NewDetacherCallCount. Expected: <0> Actual: <%v>", 1797 fakePlugin.GetNewDetacherCallCount()) 1798 } 1799 } 1800 1801 func retryWithExponentialBackOff(initialDuration time.Duration, fn wait.ConditionFunc) error { 1802 backoff := wait.Backoff{ 1803 Duration: initialDuration, 1804 Factor: 3, 1805 Jitter: 0, 1806 Steps: 6, 1807 } 1808 return wait.ExponentialBackoff(backoff, fn) 1809 } 1810 1811 // verifies the force detach metric with reason 1812 func testForceDetachMetric(t *testing.T, inputForceDetachMetricCounter int, reason string) { 1813 t.Helper() 1814 1815 actualForceDetachMericCounter, err := metricstestutil.GetCounterMetricValue(metrics.ForceDetachMetricCounter.WithLabelValues(reason)) 1816 if err != nil { 1817 t.Errorf("Error getting actualForceDetachMericCounter") 1818 } 1819 if actualForceDetachMericCounter != float64(inputForceDetachMetricCounter) { 1820 t.Errorf("Expected desiredForceDetachMericCounter to be %d, got %v", inputForceDetachMetricCounter, actualForceDetachMericCounter) 1821 } 1822 }