k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/apps/daemon_set.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package apps 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/json" 23 "fmt" 24 "math/rand" 25 "reflect" 26 "sort" 27 "strings" 28 "text/tabwriter" 29 "time" 30 31 "k8s.io/client-go/tools/cache" 32 33 "github.com/onsi/ginkgo/v2" 34 "github.com/onsi/gomega" 35 appsv1 "k8s.io/api/apps/v1" 36 v1 "k8s.io/api/core/v1" 37 apierrors "k8s.io/apimachinery/pkg/api/errors" 38 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 39 "k8s.io/apimachinery/pkg/labels" 40 "k8s.io/apimachinery/pkg/runtime" 41 "k8s.io/apimachinery/pkg/runtime/schema" 42 "k8s.io/apimachinery/pkg/selection" 43 "k8s.io/apimachinery/pkg/types" 44 "k8s.io/apimachinery/pkg/util/intstr" 45 "k8s.io/apimachinery/pkg/util/sets" 46 "k8s.io/apimachinery/pkg/util/wait" 47 watch "k8s.io/apimachinery/pkg/watch" 48 clientset "k8s.io/client-go/kubernetes" 49 "k8s.io/client-go/kubernetes/scheme" 50 watchtools "k8s.io/client-go/tools/watch" 51 "k8s.io/client-go/util/retry" 52 podutil "k8s.io/kubernetes/pkg/api/v1/pod" 53 extensionsinternal "k8s.io/kubernetes/pkg/apis/extensions" 54 "k8s.io/kubernetes/pkg/controller/daemon" 55 "k8s.io/kubernetes/test/e2e/framework" 56 e2edaemonset "k8s.io/kubernetes/test/e2e/framework/daemonset" 57 e2ekubectl "k8s.io/kubernetes/test/e2e/framework/kubectl" 58 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 59 e2eresource "k8s.io/kubernetes/test/e2e/framework/resource" 60 admissionapi "k8s.io/pod-security-admission/api" 61 ) 62 63 const ( 64 // this should not be a multiple of 5, because node status updates 65 // every 5 seconds. See https://github.com/kubernetes/kubernetes/pull/14915. 66 dsRetryPeriod = 1 * time.Second 67 dsRetryTimeout = 5 * time.Minute 68 69 daemonsetLabelPrefix = "daemonset-" 70 daemonsetNameLabel = daemonsetLabelPrefix + "name" 71 daemonsetColorLabel = daemonsetLabelPrefix + "color" 72 ) 73 74 // NamespaceNodeSelectors the annotation key scheduler.alpha.kubernetes.io/node-selector is for assigning 75 // node selectors labels to namespaces 76 var NamespaceNodeSelectors = []string{"scheduler.alpha.kubernetes.io/node-selector"} 77 78 var nonTerminalPhaseSelector = func() labels.Selector { 79 var reqs []labels.Requirement 80 for _, phase := range []v1.PodPhase{v1.PodFailed, v1.PodSucceeded} { 81 req, _ := labels.NewRequirement("status.phase", selection.NotEquals, []string{string(phase)}) 82 reqs = append(reqs, *req) 83 } 84 selector := labels.NewSelector() 85 return selector.Add(reqs...) 86 }() 87 88 type updateDSFunc func(*appsv1.DaemonSet) 89 90 // updateDaemonSetWithRetries updates daemonsets with the given applyUpdate func 91 // until it succeeds or a timeout expires. 92 func updateDaemonSetWithRetries(ctx context.Context, c clientset.Interface, namespace, name string, applyUpdate updateDSFunc) (ds *appsv1.DaemonSet, err error) { 93 daemonsets := c.AppsV1().DaemonSets(namespace) 94 var updateErr error 95 pollErr := wait.PollUntilContextTimeout(ctx, 10*time.Millisecond, 1*time.Minute, true, func(ctx context.Context) (bool, error) { 96 if ds, err = daemonsets.Get(ctx, name, metav1.GetOptions{}); err != nil { 97 return false, err 98 } 99 // Apply the update, then attempt to push it to the apiserver. 100 applyUpdate(ds) 101 if ds, err = daemonsets.Update(ctx, ds, metav1.UpdateOptions{}); err == nil { 102 framework.Logf("Updating DaemonSet %s", name) 103 return true, nil 104 } 105 updateErr = err 106 return false, nil 107 }) 108 if wait.Interrupted(pollErr) { 109 pollErr = fmt.Errorf("couldn't apply the provided updated to DaemonSet %q: %v", name, updateErr) 110 } 111 return ds, pollErr 112 } 113 114 // This test must be run in serial because it assumes the Daemon Set pods will 115 // always get scheduled. If we run other tests in parallel, this may not 116 // happen. In the future, running in parallel may work if we have an eviction 117 // model which lets the DS controller kick out other pods to make room. 118 // See https://issues.k8s.io/21767 for more details 119 var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() { 120 var f *framework.Framework 121 122 ginkgo.AfterEach(func(ctx context.Context) { 123 // Clean up 124 daemonsets, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).List(ctx, metav1.ListOptions{}) 125 framework.ExpectNoError(err, "unable to dump DaemonSets") 126 if daemonsets != nil && len(daemonsets.Items) > 0 { 127 for _, ds := range daemonsets.Items { 128 ginkgo.By(fmt.Sprintf("Deleting DaemonSet %q", ds.Name)) 129 framework.ExpectNoError(e2eresource.DeleteResourceAndWaitForGC(ctx, f.ClientSet, extensionsinternal.Kind("DaemonSet"), f.Namespace.Name, ds.Name)) 130 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnNoNodes(f, &ds)) 131 framework.ExpectNoError(err, "error waiting for daemon pod to be reaped") 132 } 133 } 134 if daemonsets, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).List(ctx, metav1.ListOptions{}); err == nil { 135 framework.Logf("daemonset: %s", runtime.EncodeOrDie(scheme.Codecs.LegacyCodec(scheme.Scheme.PrioritizedVersionsAllGroups()...), daemonsets)) 136 } else { 137 framework.Logf("unable to dump daemonsets: %v", err) 138 } 139 if pods, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(ctx, metav1.ListOptions{}); err == nil { 140 framework.Logf("pods: %s", runtime.EncodeOrDie(scheme.Codecs.LegacyCodec(scheme.Scheme.PrioritizedVersionsAllGroups()...), pods)) 141 } else { 142 framework.Logf("unable to dump pods: %v", err) 143 } 144 err = clearDaemonSetNodeLabels(ctx, f.ClientSet) 145 framework.ExpectNoError(err) 146 }) 147 148 f = framework.NewDefaultFramework("daemonsets") 149 f.NamespacePodSecurityLevel = admissionapi.LevelBaseline 150 151 image := WebserverImage 152 dsName := "daemon-set" 153 154 var ns string 155 var c clientset.Interface 156 157 ginkgo.BeforeEach(func(ctx context.Context) { 158 ns = f.Namespace.Name 159 160 c = f.ClientSet 161 162 updatedNS, err := patchNamespaceAnnotations(ctx, c, ns) 163 framework.ExpectNoError(err) 164 165 ns = updatedNS.Name 166 167 err = clearDaemonSetNodeLabels(ctx, c) 168 framework.ExpectNoError(err) 169 }) 170 171 /* 172 Release: v1.10 173 Testname: DaemonSet-Creation 174 Description: A conformant Kubernetes distribution MUST support the creation of DaemonSets. When a DaemonSet 175 Pod is deleted, the DaemonSet controller MUST create a replacement Pod. 176 */ 177 framework.ConformanceIt("should run and stop simple daemon", func(ctx context.Context) { 178 label := map[string]string{daemonsetNameLabel: dsName} 179 180 ginkgo.By(fmt.Sprintf("Creating simple DaemonSet %q", dsName)) 181 ds, err := c.AppsV1().DaemonSets(ns).Create(ctx, newDaemonSet(dsName, image, label), metav1.CreateOptions{}) 182 framework.ExpectNoError(err) 183 184 ginkgo.By("Check that daemon pods launch on every node of the cluster.") 185 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 186 framework.ExpectNoError(err, "error waiting for daemon pod to start") 187 err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName) 188 framework.ExpectNoError(err) 189 190 ginkgo.By("Stop a daemon pod, check that the daemon pod is revived.") 191 podList := listDaemonPods(ctx, c, ns, label) 192 pod := podList.Items[0] 193 err = c.CoreV1().Pods(ns).Delete(ctx, pod.Name, metav1.DeleteOptions{}) 194 framework.ExpectNoError(err) 195 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 196 framework.ExpectNoError(err, "error waiting for daemon pod to revive") 197 }) 198 199 /* 200 Release: v1.10 201 Testname: DaemonSet-NodeSelection 202 Description: A conformant Kubernetes distribution MUST support DaemonSet Pod node selection via label 203 selectors. 204 */ 205 framework.ConformanceIt("should run and stop complex daemon", func(ctx context.Context) { 206 complexLabel := map[string]string{daemonsetNameLabel: dsName} 207 nodeSelector := map[string]string{daemonsetColorLabel: "blue"} 208 framework.Logf("Creating daemon %q with a node selector", dsName) 209 ds := newDaemonSet(dsName, image, complexLabel) 210 ds.Spec.Template.Spec.NodeSelector = nodeSelector 211 ds, err := c.AppsV1().DaemonSets(ns).Create(ctx, ds, metav1.CreateOptions{}) 212 framework.ExpectNoError(err) 213 214 ginkgo.By("Initially, daemon pods should not be running on any nodes.") 215 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnNoNodes(f, ds)) 216 framework.ExpectNoError(err, "error waiting for daemon pods to be running on no nodes") 217 218 ginkgo.By("Change node label to blue, check that daemon pod is launched.") 219 node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet) 220 framework.ExpectNoError(err) 221 newNode, err := setDaemonSetNodeLabels(ctx, c, node.Name, nodeSelector) 222 framework.ExpectNoError(err, "error setting labels on node") 223 daemonSetLabels, _ := separateDaemonSetNodeLabels(newNode.Labels) 224 gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1)) 225 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name})) 226 framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes") 227 err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName) 228 framework.ExpectNoError(err) 229 230 ginkgo.By("Update the node label to green, and wait for daemons to be unscheduled") 231 nodeSelector[daemonsetColorLabel] = "green" 232 greenNode, err := setDaemonSetNodeLabels(ctx, c, node.Name, nodeSelector) 233 framework.ExpectNoError(err, "error removing labels on node") 234 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnNoNodes(f, ds)) 235 framework.ExpectNoError(err, "error waiting for daemon pod to not be running on nodes") 236 237 ginkgo.By("Update DaemonSet node selector to green, and change its update strategy to RollingUpdate") 238 patch := fmt.Sprintf(`{"spec":{"template":{"spec":{"nodeSelector":{"%s":"%s"}}},"updateStrategy":{"type":"RollingUpdate"}}}`, 239 daemonsetColorLabel, greenNode.Labels[daemonsetColorLabel]) 240 ds, err = c.AppsV1().DaemonSets(ns).Patch(ctx, dsName, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}) 241 framework.ExpectNoError(err, "error patching daemon set") 242 daemonSetLabels, _ = separateDaemonSetNodeLabels(greenNode.Labels) 243 gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1)) 244 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{greenNode.Name})) 245 framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes") 246 err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName) 247 framework.ExpectNoError(err) 248 }) 249 250 // We defer adding this test to conformance pending the disposition of moving DaemonSet scheduling logic to the 251 // default scheduler. 252 ginkgo.It("should run and stop complex daemon with node affinity", func(ctx context.Context) { 253 complexLabel := map[string]string{daemonsetNameLabel: dsName} 254 nodeSelector := map[string]string{daemonsetColorLabel: "blue"} 255 framework.Logf("Creating daemon %q with a node affinity", dsName) 256 ds := newDaemonSet(dsName, image, complexLabel) 257 ds.Spec.Template.Spec.Affinity = &v1.Affinity{ 258 NodeAffinity: &v1.NodeAffinity{ 259 RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ 260 NodeSelectorTerms: []v1.NodeSelectorTerm{ 261 { 262 MatchExpressions: []v1.NodeSelectorRequirement{ 263 { 264 Key: daemonsetColorLabel, 265 Operator: v1.NodeSelectorOpIn, 266 Values: []string{nodeSelector[daemonsetColorLabel]}, 267 }, 268 }, 269 }, 270 }, 271 }, 272 }, 273 } 274 ds, err := c.AppsV1().DaemonSets(ns).Create(ctx, ds, metav1.CreateOptions{}) 275 framework.ExpectNoError(err) 276 277 ginkgo.By("Initially, daemon pods should not be running on any nodes.") 278 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnNoNodes(f, ds)) 279 framework.ExpectNoError(err, "error waiting for daemon pods to be running on no nodes") 280 281 ginkgo.By("Change node label to blue, check that daemon pod is launched.") 282 node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet) 283 framework.ExpectNoError(err) 284 newNode, err := setDaemonSetNodeLabels(ctx, c, node.Name, nodeSelector) 285 framework.ExpectNoError(err, "error setting labels on node") 286 daemonSetLabels, _ := separateDaemonSetNodeLabels(newNode.Labels) 287 gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1)) 288 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name})) 289 framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes") 290 err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName) 291 framework.ExpectNoError(err) 292 293 ginkgo.By("Remove the node label and wait for daemons to be unscheduled") 294 _, err = setDaemonSetNodeLabels(ctx, c, node.Name, map[string]string{}) 295 framework.ExpectNoError(err, "error removing labels on node") 296 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnNoNodes(f, ds)) 297 framework.ExpectNoError(err, "error waiting for daemon pod to not be running on nodes") 298 }) 299 300 /* 301 Release: v1.10 302 Testname: DaemonSet-FailedPodCreation 303 Description: A conformant Kubernetes distribution MUST create new DaemonSet Pods when they fail. 304 */ 305 framework.ConformanceIt("should retry creating failed daemon pods", func(ctx context.Context) { 306 label := map[string]string{daemonsetNameLabel: dsName} 307 308 ginkgo.By(fmt.Sprintf("Creating a simple DaemonSet %q", dsName)) 309 ds, err := c.AppsV1().DaemonSets(ns).Create(ctx, newDaemonSet(dsName, image, label), metav1.CreateOptions{}) 310 framework.ExpectNoError(err) 311 312 ginkgo.By("Check that daemon pods launch on every node of the cluster.") 313 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 314 framework.ExpectNoError(err, "error waiting for daemon pod to start") 315 err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName) 316 framework.ExpectNoError(err) 317 318 ginkgo.By("Set a daemon pod's phase to 'Failed', check that the daemon pod is revived.") 319 podList := listDaemonPods(ctx, c, ns, label) 320 pod := podList.Items[0] 321 pod.ResourceVersion = "" 322 pod.Status.Phase = v1.PodFailed 323 _, err = c.CoreV1().Pods(ns).UpdateStatus(ctx, &pod, metav1.UpdateOptions{}) 324 framework.ExpectNoError(err, "error failing a daemon pod") 325 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 326 framework.ExpectNoError(err, "error waiting for daemon pod to revive") 327 328 ginkgo.By("Wait for the failed daemon pod to be completely deleted.") 329 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, waitFailedDaemonPodDeleted(c, &pod)) 330 framework.ExpectNoError(err, "error waiting for the failed daemon pod to be completely deleted") 331 }) 332 333 // This test should not be added to conformance. We will consider deprecating OnDelete when the 334 // extensions/v1beta1 and apps/v1beta1 are removed. 335 ginkgo.It("should not update pod when spec was updated and update strategy is OnDelete", func(ctx context.Context) { 336 label := map[string]string{daemonsetNameLabel: dsName} 337 338 framework.Logf("Creating simple daemon set %s", dsName) 339 ds := newDaemonSet(dsName, image, label) 340 ds.Spec.UpdateStrategy = appsv1.DaemonSetUpdateStrategy{Type: appsv1.OnDeleteDaemonSetStrategyType} 341 ds, err := c.AppsV1().DaemonSets(ns).Create(ctx, ds, metav1.CreateOptions{}) 342 framework.ExpectNoError(err) 343 344 ginkgo.By("Check that daemon pods launch on every node of the cluster.") 345 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 346 framework.ExpectNoError(err, "error waiting for daemon pod to start") 347 348 // Check history and labels 349 ds, err = c.AppsV1().DaemonSets(ns).Get(ctx, ds.Name, metav1.GetOptions{}) 350 framework.ExpectNoError(err) 351 waitForHistoryCreated(ctx, c, ns, label, 1) 352 first := curHistory(listDaemonHistories(ctx, c, ns, label), ds) 353 firstHash := first.Labels[appsv1.DefaultDaemonSetUniqueLabelKey] 354 gomega.Expect(first.Revision).To(gomega.Equal(int64(1))) 355 checkDaemonSetPodsLabels(listDaemonPods(ctx, c, ns, label), firstHash) 356 357 ginkgo.By("Update daemon pods image.") 358 patch := getDaemonSetImagePatch(ds.Spec.Template.Spec.Containers[0].Name, AgnhostImage) 359 ds, err = c.AppsV1().DaemonSets(ns).Patch(ctx, dsName, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}) 360 framework.ExpectNoError(err) 361 362 ginkgo.By("Check that daemon pods images aren't updated.") 363 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkDaemonPodsImageAndAvailability(c, ds, image, 0)) 364 framework.ExpectNoError(err) 365 366 ginkgo.By("Check that daemon pods are still running on every node of the cluster.") 367 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 368 framework.ExpectNoError(err, "error waiting for daemon pod to start") 369 370 // Check history and labels 371 ds, err = c.AppsV1().DaemonSets(ns).Get(ctx, ds.Name, metav1.GetOptions{}) 372 framework.ExpectNoError(err) 373 waitForHistoryCreated(ctx, c, ns, label, 2) 374 cur := curHistory(listDaemonHistories(ctx, c, ns, label), ds) 375 gomega.Expect(cur.Revision).To(gomega.Equal(int64(2))) 376 gomega.Expect(cur.Labels).NotTo(gomega.HaveKeyWithValue(appsv1.DefaultDaemonSetUniqueLabelKey, firstHash)) 377 checkDaemonSetPodsLabels(listDaemonPods(ctx, c, ns, label), firstHash) 378 }) 379 380 /* 381 Release: v1.10 382 Testname: DaemonSet-RollingUpdate 383 Description: A conformant Kubernetes distribution MUST support DaemonSet RollingUpdates. 384 */ 385 framework.ConformanceIt("should update pod when spec was updated and update strategy is RollingUpdate", func(ctx context.Context) { 386 label := map[string]string{daemonsetNameLabel: dsName} 387 388 framework.Logf("Creating simple daemon set %s", dsName) 389 ds := newDaemonSet(dsName, image, label) 390 ds.Spec.UpdateStrategy = appsv1.DaemonSetUpdateStrategy{Type: appsv1.RollingUpdateDaemonSetStrategyType} 391 ds, err := c.AppsV1().DaemonSets(ns).Create(ctx, ds, metav1.CreateOptions{}) 392 framework.ExpectNoError(err) 393 394 ginkgo.By("Check that daemon pods launch on every node of the cluster.") 395 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 396 framework.ExpectNoError(err, "error waiting for daemon pod to start") 397 398 // Check history and labels 399 ds, err = c.AppsV1().DaemonSets(ns).Get(ctx, ds.Name, metav1.GetOptions{}) 400 framework.ExpectNoError(err) 401 waitForHistoryCreated(ctx, c, ns, label, 1) 402 cur := curHistory(listDaemonHistories(ctx, c, ns, label), ds) 403 hash := cur.Labels[appsv1.DefaultDaemonSetUniqueLabelKey] 404 gomega.Expect(cur.Revision).To(gomega.Equal(int64(1))) 405 checkDaemonSetPodsLabels(listDaemonPods(ctx, c, ns, label), hash) 406 407 ginkgo.By("Update daemon pods image.") 408 patch := getDaemonSetImagePatch(ds.Spec.Template.Spec.Containers[0].Name, AgnhostImage) 409 ds, err = c.AppsV1().DaemonSets(ns).Patch(ctx, dsName, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}) 410 framework.ExpectNoError(err) 411 412 // Time to complete the rolling upgrade is proportional to the number of nodes in the cluster. 413 // Get the number of nodes, and set the timeout appropriately. 414 nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 415 framework.ExpectNoError(err) 416 nodeCount := len(nodes.Items) 417 retryTimeout := dsRetryTimeout + time.Duration(nodeCount*30)*time.Second 418 419 ginkgo.By("Check that daemon pods images are updated.") 420 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, retryTimeout, true, checkDaemonPodsImageAndAvailability(c, ds, AgnhostImage, 1)) 421 framework.ExpectNoError(err) 422 423 ginkgo.By("Check that daemon pods are still running on every node of the cluster.") 424 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 425 framework.ExpectNoError(err, "error waiting for daemon pod to start") 426 427 // Check history and labels 428 ds, err = c.AppsV1().DaemonSets(ns).Get(ctx, ds.Name, metav1.GetOptions{}) 429 framework.ExpectNoError(err) 430 waitForHistoryCreated(ctx, c, ns, label, 2) 431 cur = curHistory(listDaemonHistories(ctx, c, ns, label), ds) 432 hash = cur.Labels[appsv1.DefaultDaemonSetUniqueLabelKey] 433 gomega.Expect(cur.Revision).To(gomega.Equal(int64(2))) 434 checkDaemonSetPodsLabels(listDaemonPods(ctx, c, ns, label), hash) 435 }) 436 437 /* 438 Release: v1.10 439 Testname: DaemonSet-Rollback 440 Description: A conformant Kubernetes distribution MUST support automated, minimally disruptive 441 rollback of updates to a DaemonSet. 442 */ 443 framework.ConformanceIt("should rollback without unnecessary restarts", func(ctx context.Context) { 444 schedulableNodes, err := e2enode.GetReadySchedulableNodes(ctx, c) 445 framework.ExpectNoError(err) 446 gomega.Expect(len(schedulableNodes.Items)).To(gomega.BeNumerically(">", 1), "Conformance test suite needs a cluster with at least 2 nodes.") 447 framework.Logf("Create a RollingUpdate DaemonSet") 448 label := map[string]string{daemonsetNameLabel: dsName} 449 ds := newDaemonSet(dsName, image, label) 450 ds.Spec.UpdateStrategy = appsv1.DaemonSetUpdateStrategy{Type: appsv1.RollingUpdateDaemonSetStrategyType} 451 ds, err = c.AppsV1().DaemonSets(ns).Create(ctx, ds, metav1.CreateOptions{}) 452 framework.ExpectNoError(err) 453 454 framework.Logf("Check that daemon pods launch on every node of the cluster") 455 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 456 framework.ExpectNoError(err, "error waiting for daemon pod to start") 457 458 framework.Logf("Update the DaemonSet to trigger a rollout") 459 // We use a nonexistent image here, so that we make sure it won't finish 460 newImage := "foo:non-existent" 461 newDS, err := updateDaemonSetWithRetries(ctx, c, ns, ds.Name, func(update *appsv1.DaemonSet) { 462 update.Spec.Template.Spec.Containers[0].Image = newImage 463 }) 464 framework.ExpectNoError(err) 465 466 // Make sure we're in the middle of a rollout 467 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkAtLeastOneNewPod(c, ns, label, newImage)) 468 framework.ExpectNoError(err) 469 470 pods := listDaemonPods(ctx, c, ns, label) 471 var existingPods, newPods []*v1.Pod 472 for i := range pods.Items { 473 pod := pods.Items[i] 474 image := pod.Spec.Containers[0].Image 475 switch image { 476 case ds.Spec.Template.Spec.Containers[0].Image: 477 existingPods = append(existingPods, &pod) 478 case newDS.Spec.Template.Spec.Containers[0].Image: 479 newPods = append(newPods, &pod) 480 default: 481 framework.Failf("unexpected pod found, image = %s", image) 482 } 483 } 484 schedulableNodes, err = e2enode.GetReadySchedulableNodes(ctx, c) 485 framework.ExpectNoError(err) 486 if len(schedulableNodes.Items) < 2 { 487 gomega.Expect(existingPods).To(gomega.BeEmpty()) 488 } else { 489 gomega.Expect(existingPods).NotTo(gomega.BeEmpty()) 490 } 491 gomega.Expect(newPods).NotTo(gomega.BeEmpty()) 492 493 framework.Logf("Roll back the DaemonSet before rollout is complete") 494 rollbackDS, err := updateDaemonSetWithRetries(ctx, c, ns, ds.Name, func(update *appsv1.DaemonSet) { 495 update.Spec.Template.Spec.Containers[0].Image = image 496 }) 497 framework.ExpectNoError(err) 498 499 framework.Logf("Make sure DaemonSet rollback is complete") 500 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkDaemonPodsImageAndAvailability(c, rollbackDS, image, 1)) 501 framework.ExpectNoError(err) 502 503 // After rollback is done, compare current pods with previous old pods during rollout, to make sure they're not restarted 504 pods = listDaemonPods(ctx, c, ns, label) 505 rollbackPods := map[string]bool{} 506 for _, pod := range pods.Items { 507 rollbackPods[pod.Name] = true 508 } 509 for _, pod := range existingPods { 510 if !rollbackPods[pod.Name] { 511 framework.Failf("unexpected pod %s be restarted", pod.Name) 512 } 513 } 514 }) 515 516 // TODO: This test is expected to be promoted to conformance after the feature is promoted 517 ginkgo.It("should surge pods onto nodes when spec was updated and update strategy is RollingUpdate", func(ctx context.Context) { 518 label := map[string]string{daemonsetNameLabel: dsName} 519 520 framework.Logf("Creating surge daemon set %s", dsName) 521 maxSurgeOverlap := 60 * time.Second 522 maxSurge := 1 523 surgePercent := intstr.FromString("20%") 524 zero := intstr.FromInt32(0) 525 oldVersion := "1" 526 ds := newDaemonSet(dsName, image, label) 527 ds.Spec.Template.Spec.Containers[0].Env = []v1.EnvVar{ 528 {Name: "VERSION", Value: oldVersion}, 529 } 530 // delay shutdown by 15s to allow containers to overlap in time 531 ds.Spec.Template.Spec.Containers[0].Lifecycle = &v1.Lifecycle{ 532 PreStop: &v1.LifecycleHandler{ 533 Exec: &v1.ExecAction{ 534 Command: []string{"/bin/sh", "-c", "sleep 15"}, 535 }, 536 }, 537 } 538 // use a readiness probe that can be forced to fail (by changing the contents of /var/tmp/ready) 539 ds.Spec.Template.Spec.Containers[0].ReadinessProbe = &v1.Probe{ 540 ProbeHandler: v1.ProbeHandler{ 541 Exec: &v1.ExecAction{ 542 Command: []string{"/bin/sh", "-ec", `touch /var/tmp/ready; [[ "$( cat /var/tmp/ready )" == "" ]]`}, 543 }, 544 }, 545 InitialDelaySeconds: 7, 546 PeriodSeconds: 3, 547 SuccessThreshold: 1, 548 FailureThreshold: 1, 549 } 550 // use a simple surge strategy 551 ds.Spec.UpdateStrategy = appsv1.DaemonSetUpdateStrategy{ 552 Type: appsv1.RollingUpdateDaemonSetStrategyType, 553 RollingUpdate: &appsv1.RollingUpdateDaemonSet{ 554 MaxUnavailable: &zero, 555 MaxSurge: &surgePercent, 556 }, 557 } 558 // The pod must be ready for at least 10s before we delete the old pod 559 ds.Spec.MinReadySeconds = 10 560 561 ds, err := c.AppsV1().DaemonSets(ns).Create(ctx, ds, metav1.CreateOptions{}) 562 framework.ExpectNoError(err) 563 564 ginkgo.By("Check that daemon pods launch on every node of the cluster.") 565 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 566 framework.ExpectNoError(err, "error waiting for daemon pod to start") 567 568 // Check history and labels 569 ds, err = c.AppsV1().DaemonSets(ns).Get(ctx, ds.Name, metav1.GetOptions{}) 570 framework.ExpectNoError(err) 571 waitForHistoryCreated(ctx, c, ns, label, 1) 572 cur := curHistory(listDaemonHistories(ctx, c, ns, label), ds) 573 hash := cur.Labels[appsv1.DefaultDaemonSetUniqueLabelKey] 574 gomega.Expect(cur.Revision).To(gomega.Equal(int64(1))) 575 checkDaemonSetPodsLabels(listDaemonPods(ctx, c, ns, label), hash) 576 577 newVersion := "2" 578 ginkgo.By("Update daemon pods environment var") 579 patch := fmt.Sprintf(`{"spec":{"template":{"spec":{"containers":[{"name":"%s","env":[{"name":"VERSION","value":"%s"}]}]}}}}`, ds.Spec.Template.Spec.Containers[0].Name, newVersion) 580 ds, err = c.AppsV1().DaemonSets(ns).Patch(ctx, dsName, types.StrategicMergePatchType, []byte(patch), metav1.PatchOptions{}) 581 framework.ExpectNoError(err) 582 583 // Time to complete the rolling upgrade is proportional to the number of nodes in the cluster. 584 // Get the number of nodes, and set the timeout appropriately. 585 nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 586 framework.ExpectNoError(err) 587 nodeCount := len(nodes.Items) 588 // We disturb daemonset progress by randomly terminating pods. 589 randomPodTerminationTimeout := 5 * time.Minute 590 retryTimeout := dsRetryTimeout + randomPodTerminationTimeout + time.Duration(nodeCount*30)*time.Second 591 592 ginkgo.By("Check that daemon pods surge and invariants are preserved during that rollout") 593 nodeToAgeOfOldPod := make(map[string]map[string]time.Time) 594 deliberatelyDeletedPods := sets.NewString() 595 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, retryTimeout, true, func(ctx context.Context) (bool, error) { 596 podList, err := c.CoreV1().Pods(ds.Namespace).List(ctx, metav1.ListOptions{}) 597 if err != nil { 598 return false, err 599 } 600 pods := podList.Items 601 602 var buf bytes.Buffer 603 pw := tabwriter.NewWriter(&buf, 1, 1, 1, ' ', 0) 604 fmt.Fprint(pw, "Node\tVersion\tName\tUID\tDeleted\tReady\n") 605 606 now := time.Now() 607 podUIDs := sets.NewString() 608 deletedPodUIDs := sets.NewString() 609 nodes := sets.NewString() 610 versions := sets.NewString() 611 nodesToVersions := make(map[string]map[string]int) 612 nodesToDeletedVersions := make(map[string]map[string]int) 613 var surgeCount, newUnavailableCount, newDeliberatelyDeletedCount, oldUnavailableCount, nodesWithoutOldVersion int 614 for _, pod := range pods { 615 if !metav1.IsControlledBy(&pod, ds) { 616 continue 617 } 618 nodeName := pod.Spec.NodeName 619 nodes.Insert(nodeName) 620 podVersion := pod.Spec.Containers[0].Env[0].Value 621 if pod.DeletionTimestamp != nil { 622 if !deliberatelyDeletedPods.Has(string(pod.UID)) { 623 versions := nodesToDeletedVersions[nodeName] 624 if versions == nil { 625 versions = make(map[string]int) 626 nodesToDeletedVersions[nodeName] = versions 627 } 628 versions[podVersion]++ 629 } 630 } else { 631 versions := nodesToVersions[nodeName] 632 if versions == nil { 633 versions = make(map[string]int) 634 nodesToVersions[nodeName] = versions 635 } 636 versions[podVersion]++ 637 } 638 639 ready := podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now()) 640 if podVersion == newVersion { 641 surgeCount++ 642 if !ready || pod.DeletionTimestamp != nil { 643 if deliberatelyDeletedPods.Has(string(pod.UID)) { 644 newDeliberatelyDeletedCount++ 645 } 646 newUnavailableCount++ 647 } 648 } else { 649 if !ready || pod.DeletionTimestamp != nil { 650 oldUnavailableCount++ 651 } 652 } 653 fmt.Fprintf(pw, "%s\t%s\t%s\t%s\t%t\t%t\n", pod.Spec.NodeName, podVersion, pod.Name, pod.UID, pod.DeletionTimestamp != nil, ready) 654 } 655 656 // print a stable sorted list of pods by node for debugging 657 pw.Flush() 658 lines := strings.Split(buf.String(), "\n") 659 lines = lines[:len(lines)-1] 660 sort.Strings(lines[1:]) 661 for _, line := range lines { 662 framework.Logf("%s", line) 663 } 664 665 // if there is an old and new pod at the same time, record a timestamp 666 deletedPerNode := make(map[string]int) 667 for _, pod := range pods { 668 if !metav1.IsControlledBy(&pod, ds) { 669 continue 670 } 671 // ignore deleted pods 672 if pod.DeletionTimestamp != nil { 673 deletedPodUIDs.Insert(string(pod.UID)) 674 if !deliberatelyDeletedPods.Has(string(pod.UID)) { 675 deletedPerNode[pod.Spec.NodeName]++ 676 } 677 continue 678 } 679 podUIDs.Insert(string(pod.UID)) 680 podVersion := pod.Spec.Containers[0].Env[0].Value 681 if podVersion == newVersion { 682 continue 683 } 684 // if this is a pod in an older version AND there is a new version of this pod, record when 685 // we started seeing this, otherwise delete the record (perhaps the node was drained) 686 if nodesToVersions[pod.Spec.NodeName][newVersion] > 0 { 687 if _, ok := nodeToAgeOfOldPod[pod.Spec.NodeName][string(pod.UID)]; !ok { 688 if _, ok := nodeToAgeOfOldPod[pod.Spec.NodeName]; !ok { 689 nodeToAgeOfOldPod[pod.Spec.NodeName] = make(map[string]time.Time) 690 } 691 nodeToAgeOfOldPod[pod.Spec.NodeName][string(pod.UID)] = now 692 } 693 } else { 694 delete(nodeToAgeOfOldPod, pod.Spec.NodeName) 695 } 696 } 697 // purge the old pods list of any deleted pods 698 for node, uidToTime := range nodeToAgeOfOldPod { 699 for uid := range uidToTime { 700 if !podUIDs.Has(uid) { 701 delete(uidToTime, uid) 702 } 703 } 704 if len(uidToTime) == 0 { 705 delete(nodeToAgeOfOldPod, node) 706 } 707 } 708 deliberatelyDeletedPods = deliberatelyDeletedPods.Intersection(deletedPodUIDs) 709 710 for _, versions := range nodesToVersions { 711 if versions[oldVersion] == 0 { 712 nodesWithoutOldVersion++ 713 } 714 } 715 716 var errs []string 717 718 // invariant: we should not see more than 1 deleted pod per node unless a severe node problem is occurring or the controller is misbehaving 719 for node, count := range deletedPerNode { 720 if count > 1 { 721 errs = append(errs, fmt.Sprintf("Node %s has %d deleted pods, which may indicate a problem on the node or a controller race condition", node, count)) 722 } 723 } 724 725 // invariant: the controller must react to the new pod becoming ready within a reasonable timeframe (2x grace period) 726 for node, uidToTime := range nodeToAgeOfOldPod { 727 for uid, firstSeenSinceNewVersionPod := range uidToTime { 728 if now.Sub(firstSeenSinceNewVersionPod) > maxSurgeOverlap { 729 errs = append(errs, fmt.Sprintf("An old pod with UID %s on a node %s has been running alongside a newer version for longer than %s", uid, node, maxSurgeOverlap)) 730 } 731 } 732 } 733 734 // invariant: we should never have more than maxSurge + oldUnavailableCount instances of the new version unready unless a flake in the infrastructure happens, or 735 // if we deliberately deleted one of the new pods 736 if newUnavailableCount > (maxSurge + oldUnavailableCount + newDeliberatelyDeletedCount + nodesWithoutOldVersion) { 737 errs = append(errs, fmt.Sprintf("observed %d new unavailable pods greater than (surge count %d + old unavailable count %d + deliberately deleted new count %d + nodes without old version %d), may be infrastructure flake", newUnavailableCount, maxSurge, oldUnavailableCount, newDeliberatelyDeletedCount, nodesWithoutOldVersion)) 738 } 739 // invariant: the total number of versions created should be 2 740 if versions.Len() > 2 { 741 errs = append(errs, fmt.Sprintf("observed %d versions running simultaneously, must have max 2", versions.Len())) 742 } 743 for _, node := range nodes.List() { 744 // ignore pods that haven't been scheduled yet 745 if len(node) == 0 { 746 continue 747 } 748 versionCount := make(map[string]int) 749 // invariant: surge should never have more than one instance of a pod per node running 750 for version, count := range nodesToVersions[node] { 751 if count > 1 { 752 errs = append(errs, fmt.Sprintf("node %s has %d instances of version %s running simultaneously, must have max 1", node, count, version)) 753 } 754 versionCount[version] += count 755 } 756 // invariant: when surging, the most number of pods we should allow to be deleted is 2 (if we are getting evicted) 757 for version, count := range nodesToDeletedVersions[node] { 758 if count > 2 { 759 errs = append(errs, fmt.Sprintf("node %s has %d deleted instances of version %s running simultaneously, must have max 1", node, count, version)) 760 } 761 versionCount[version] += count 762 } 763 // invariant: on any node, we should never have more than two instances of a version (if we are getting evicted) 764 for version, count := range versionCount { 765 if count > 2 { 766 errs = append(errs, fmt.Sprintf("node %s has %d total instances of version %s running simultaneously, must have max 2 (one deleted and one running)", node, count, version)) 767 } 768 } 769 } 770 771 if len(errs) > 0 { 772 sort.Strings(errs) 773 return false, fmt.Errorf("invariants were violated during daemonset update:\n%s", strings.Join(errs, "\n")) 774 } 775 776 // Make sure every daemon pod on the node has been updated 777 nodeNames := e2edaemonset.SchedulableNodes(ctx, c, ds) 778 for _, node := range nodeNames { 779 switch { 780 case 781 // if we don't have the new version yet 782 nodesToVersions[node][newVersion] == 0, 783 // if there are more than one version on a node 784 len(nodesToVersions[node]) > 1, 785 // if there are still any deleted pods 786 len(nodesToDeletedVersions[node]) > 0, 787 // if any of the new pods are unavailable 788 newUnavailableCount > 0: 789 790 // inject a failure randomly to ensure the controller recovers 791 switch rand.Intn(25) { 792 // cause a random old pod to go unready 793 case 0: 794 // select a not-deleted pod of the old version 795 if pod := randomPod(pods, func(pod *v1.Pod) bool { 796 return pod.DeletionTimestamp == nil && oldVersion == pod.Spec.Containers[0].Env[0].Value 797 }); pod != nil { 798 // make the /tmp/ready file read only, which will cause readiness to fail 799 if _, err := e2ekubectl.RunKubectl(pod.Namespace, "exec", "-c", pod.Spec.Containers[0].Name, pod.Name, "--", "/bin/sh", "-ec", "echo 0 > /var/tmp/ready"); err != nil { 800 framework.Logf("Failed to mark pod %s as unready via exec: %v", pod.Name, err) 801 } else { 802 framework.Logf("Marked old pod %s as unready", pod.Name) 803 } 804 } 805 case 1: 806 // delete a random pod 807 if pod := randomPod(pods, func(pod *v1.Pod) bool { 808 return pod.DeletionTimestamp == nil 809 }); pod != nil { 810 if err := c.CoreV1().Pods(ds.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil { 811 framework.Logf("Failed to delete pod %s early: %v", pod.Name, err) 812 } else { 813 framework.Logf("Deleted pod %s prematurely", pod.Name) 814 deliberatelyDeletedPods.Insert(string(pod.UID)) 815 // If it is an old version we do not need to measure the controller reaction because we have done it instead. 816 // If it is a new version, we have to reset the time to start counting the time for the replacement pod to reach readiness again. 817 delete(nodeToAgeOfOldPod, pod.Spec.NodeName) 818 } 819 } 820 } 821 822 // then wait 823 return false, nil 824 } 825 } 826 return true, nil 827 }) 828 framework.ExpectNoError(err) 829 830 ginkgo.By("Check that daemon pods are still running on every node of the cluster.") 831 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds)) 832 framework.ExpectNoError(err, "error waiting for daemon pod to start") 833 834 // Check history and labels 835 ds, err = c.AppsV1().DaemonSets(ns).Get(ctx, ds.Name, metav1.GetOptions{}) 836 framework.ExpectNoError(err) 837 waitForHistoryCreated(ctx, c, ns, label, 2) 838 cur = curHistory(listDaemonHistories(ctx, c, ns, label), ds) 839 hash = cur.Labels[appsv1.DefaultDaemonSetUniqueLabelKey] 840 gomega.Expect(cur.Revision).To(gomega.Equal(int64(2))) 841 checkDaemonSetPodsLabels(listDaemonPods(ctx, c, ns, label), hash) 842 }) 843 844 /* 845 Release: v1.22 846 Testname: DaemonSet, list and delete a collection of DaemonSets 847 Description: When a DaemonSet is created it MUST succeed. It 848 MUST succeed when listing DaemonSets via a label selector. It 849 MUST succeed when deleting the DaemonSet via deleteCollection. 850 */ 851 framework.ConformanceIt("should list and delete a collection of DaemonSets", func(ctx context.Context) { 852 label := map[string]string{daemonsetNameLabel: dsName} 853 labelSelector := labels.SelectorFromSet(label).String() 854 855 dsClient := f.ClientSet.AppsV1().DaemonSets(ns) 856 cs := f.ClientSet 857 one := int64(1) 858 859 ginkgo.By(fmt.Sprintf("Creating simple DaemonSet %q", dsName)) 860 testDaemonset, err := c.AppsV1().DaemonSets(ns).Create(ctx, newDaemonSetWithLabel(dsName, image, label), metav1.CreateOptions{}) 861 framework.ExpectNoError(err) 862 863 ginkgo.By("Check that daemon pods launch on every node of the cluster.") 864 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset)) 865 framework.ExpectNoError(err, "error waiting for daemon pod to start") 866 err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName) 867 framework.ExpectNoError(err) 868 869 ginkgo.By("listing all DaemonSets") 870 dsList, err := cs.AppsV1().DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) 871 framework.ExpectNoError(err, "failed to list Daemon Sets") 872 gomega.Expect(dsList.Items).To(gomega.HaveLen(1), "filtered list wasn't found") 873 874 ginkgo.By("DeleteCollection of the DaemonSets") 875 err = dsClient.DeleteCollection(ctx, metav1.DeleteOptions{GracePeriodSeconds: &one}, metav1.ListOptions{LabelSelector: labelSelector}) 876 framework.ExpectNoError(err, "failed to delete DaemonSets") 877 878 ginkgo.By("Verify that ReplicaSets have been deleted") 879 dsList, err = c.AppsV1().DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) 880 framework.ExpectNoError(err, "failed to list DaemonSets") 881 gomega.Expect(dsList.Items).To(gomega.BeEmpty(), "filtered list should have no daemonset") 882 }) 883 884 /* Release: v1.22 885 Testname: DaemonSet, status sub-resource 886 Description: When a DaemonSet is created it MUST succeed. 887 Attempt to read, update and patch its status sub-resource; all 888 mutating sub-resource operations MUST be visible to subsequent reads. 889 */ 890 framework.ConformanceIt("should verify changes to a daemon set status", func(ctx context.Context) { 891 label := map[string]string{daemonsetNameLabel: dsName} 892 labelSelector := labels.SelectorFromSet(label).String() 893 894 dsClient := f.ClientSet.AppsV1().DaemonSets(ns) 895 cs := f.ClientSet 896 897 w := &cache.ListWatch{ 898 WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { 899 options.LabelSelector = labelSelector 900 return dsClient.Watch(ctx, options) 901 }, 902 } 903 904 dsList, err := cs.AppsV1().DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: labelSelector}) 905 framework.ExpectNoError(err, "failed to list Daemon Sets") 906 907 ginkgo.By(fmt.Sprintf("Creating simple DaemonSet %q", dsName)) 908 testDaemonset, err := c.AppsV1().DaemonSets(ns).Create(ctx, newDaemonSetWithLabel(dsName, image, label), metav1.CreateOptions{}) 909 framework.ExpectNoError(err) 910 911 ginkgo.By("Check that daemon pods launch on every node of the cluster.") 912 err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset)) 913 framework.ExpectNoError(err, "error waiting for daemon pod to start") 914 err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName) 915 framework.ExpectNoError(err) 916 917 ginkgo.By("Getting /status") 918 dsResource := schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "daemonsets"} 919 dsStatusUnstructured, err := f.DynamicClient.Resource(dsResource).Namespace(ns).Get(ctx, dsName, metav1.GetOptions{}, "status") 920 framework.ExpectNoError(err, "Failed to fetch the status of daemon set %s in namespace %s", dsName, ns) 921 dsStatusBytes, err := json.Marshal(dsStatusUnstructured) 922 framework.ExpectNoError(err, "Failed to marshal unstructured response. %v", err) 923 924 var dsStatus appsv1.DaemonSet 925 err = json.Unmarshal(dsStatusBytes, &dsStatus) 926 framework.ExpectNoError(err, "Failed to unmarshal JSON bytes to a daemon set object type") 927 framework.Logf("Daemon Set %s has Conditions: %v", dsName, dsStatus.Status.Conditions) 928 929 ginkgo.By("updating the DaemonSet Status") 930 var statusToUpdate, updatedStatus *appsv1.DaemonSet 931 932 err = retry.RetryOnConflict(retry.DefaultRetry, func() error { 933 statusToUpdate, err = dsClient.Get(ctx, dsName, metav1.GetOptions{}) 934 framework.ExpectNoError(err, "Unable to retrieve daemon set %s", dsName) 935 936 statusToUpdate.Status.Conditions = append(statusToUpdate.Status.Conditions, appsv1.DaemonSetCondition{ 937 Type: "StatusUpdate", 938 Status: "True", 939 Reason: "E2E", 940 Message: "Set from e2e test", 941 }) 942 943 updatedStatus, err = dsClient.UpdateStatus(ctx, statusToUpdate, metav1.UpdateOptions{}) 944 return err 945 }) 946 framework.ExpectNoError(err, "Failed to update status. %v", err) 947 framework.Logf("updatedStatus.Conditions: %#v", updatedStatus.Status.Conditions) 948 949 ginkgo.By("watching for the daemon set status to be updated") 950 ctxUntil, cancel := context.WithTimeout(ctx, dsRetryTimeout) 951 defer cancel() 952 _, err = watchtools.Until(ctxUntil, dsList.ResourceVersion, w, func(event watch.Event) (bool, error) { 953 if ds, ok := event.Object.(*appsv1.DaemonSet); ok { 954 found := ds.ObjectMeta.Name == testDaemonset.ObjectMeta.Name && 955 ds.ObjectMeta.Namespace == testDaemonset.ObjectMeta.Namespace && 956 ds.Labels[daemonsetNameLabel] == dsName 957 if !found { 958 framework.Logf("Observed daemon set %v in namespace %v with annotations: %v & Conditions: %v", ds.ObjectMeta.Name, ds.ObjectMeta.Namespace, ds.Annotations, ds.Status.Conditions) 959 return false, nil 960 } 961 for _, cond := range ds.Status.Conditions { 962 if cond.Type == "StatusUpdate" && 963 cond.Reason == "E2E" && 964 cond.Message == "Set from e2e test" { 965 framework.Logf("Found daemon set %v in namespace %v with labels: %v annotations: %v & Conditions: %v", ds.ObjectMeta.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Labels, ds.Annotations, ds.Status.Conditions) 966 return found, nil 967 } 968 framework.Logf("Observed daemon set %v in namespace %v with annotations: %v & Conditions: %v", ds.ObjectMeta.Name, ds.ObjectMeta.Namespace, ds.Annotations, ds.Status.Conditions) 969 } 970 } 971 object := strings.Split(fmt.Sprintf("%v", event.Object), "{")[0] 972 framework.Logf("Observed %v event: %+v", object, event.Type) 973 return false, nil 974 }) 975 framework.ExpectNoError(err, "failed to locate daemon set %v in namespace %v", testDaemonset.ObjectMeta.Name, ns) 976 framework.Logf("Daemon set %s has an updated status", dsName) 977 978 ginkgo.By("patching the DaemonSet Status") 979 daemonSetStatusPatch := appsv1.DaemonSet{ 980 Status: appsv1.DaemonSetStatus{ 981 Conditions: []appsv1.DaemonSetCondition{ 982 { 983 Type: "StatusPatched", 984 Status: "True", 985 }, 986 }, 987 }, 988 } 989 990 payload, err := json.Marshal(daemonSetStatusPatch) 991 framework.ExpectNoError(err, "Failed to marshal JSON. %v", err) 992 _, err = dsClient.Patch(ctx, dsName, types.MergePatchType, payload, metav1.PatchOptions{}, "status") 993 framework.ExpectNoError(err, "Failed to patch daemon set status", err) 994 995 ginkgo.By("watching for the daemon set status to be patched") 996 ctxUntil, cancel = context.WithTimeout(ctx, dsRetryTimeout) 997 defer cancel() 998 _, err = watchtools.Until(ctxUntil, dsList.ResourceVersion, w, func(event watch.Event) (bool, error) { 999 if ds, ok := event.Object.(*appsv1.DaemonSet); ok { 1000 found := ds.ObjectMeta.Name == testDaemonset.ObjectMeta.Name && 1001 ds.ObjectMeta.Namespace == testDaemonset.ObjectMeta.Namespace && 1002 ds.Labels[daemonsetNameLabel] == dsName 1003 if !found { 1004 framework.Logf("Observed daemon set %v in namespace %v with annotations: %v & Conditions: %v", ds.ObjectMeta.Name, ds.ObjectMeta.Namespace, ds.Annotations, ds.Status.Conditions) 1005 return false, nil 1006 } 1007 for _, cond := range ds.Status.Conditions { 1008 if cond.Type == "StatusPatched" { 1009 framework.Logf("Found daemon set %v in namespace %v with labels: %v annotations: %v & Conditions: %v", ds.ObjectMeta.Name, ds.ObjectMeta.Namespace, ds.ObjectMeta.Labels, ds.Annotations, ds.Status.Conditions) 1010 return found, nil 1011 } 1012 framework.Logf("Observed daemon set %v in namespace %v with annotations: %v & Conditions: %v", ds.ObjectMeta.Name, ds.ObjectMeta.Namespace, ds.Annotations, ds.Status.Conditions) 1013 } 1014 } 1015 object := strings.Split(fmt.Sprintf("%v", event.Object), "{")[0] 1016 framework.Logf("Observed %v event: %v", object, event.Type) 1017 return false, nil 1018 }) 1019 framework.ExpectNoError(err, "failed to locate daemon set %v in namespace %v", testDaemonset.ObjectMeta.Name, ns) 1020 framework.Logf("Daemon set %s has a patched status", dsName) 1021 }) 1022 }) 1023 1024 // randomPod selects a random pod within pods that causes fn to return true, or nil 1025 // if no pod can be found matching the criteria. 1026 func randomPod(pods []v1.Pod, fn func(p *v1.Pod) bool) *v1.Pod { 1027 podCount := len(pods) 1028 for offset, i := rand.Intn(podCount), 0; i < (podCount - 1); i++ { 1029 pod := &pods[(offset+i)%podCount] 1030 if fn(pod) { 1031 return pod 1032 } 1033 } 1034 return nil 1035 } 1036 1037 // getDaemonSetImagePatch generates a patch for updating a DaemonSet's container image 1038 func getDaemonSetImagePatch(containerName, containerImage string) string { 1039 return fmt.Sprintf(`{"spec":{"template":{"spec":{"containers":[{"name":"%s","image":"%s"}]}}}}`, containerName, containerImage) 1040 } 1041 1042 func newDaemonSet(dsName, image string, label map[string]string) *appsv1.DaemonSet { 1043 ds := newDaemonSetWithLabel(dsName, image, label) 1044 ds.ObjectMeta.Labels = nil 1045 return ds 1046 } 1047 1048 func newDaemonSetWithLabel(dsName, image string, label map[string]string) *appsv1.DaemonSet { 1049 return e2edaemonset.NewDaemonSet(dsName, image, label, nil, nil, []v1.ContainerPort{{ContainerPort: 9376}}) 1050 } 1051 1052 func listDaemonPods(ctx context.Context, c clientset.Interface, ns string, label map[string]string) *v1.PodList { 1053 selector := labels.Set(label).AsSelector() 1054 options := metav1.ListOptions{ 1055 LabelSelector: selector.String(), 1056 FieldSelector: nonTerminalPhaseSelector.String(), 1057 } 1058 podList, err := c.CoreV1().Pods(ns).List(ctx, options) 1059 framework.ExpectNoError(err) 1060 gomega.Expect(podList.Items).ToNot(gomega.BeEmpty()) 1061 return podList 1062 } 1063 1064 func separateDaemonSetNodeLabels(labels map[string]string) (map[string]string, map[string]string) { 1065 daemonSetLabels := map[string]string{} 1066 otherLabels := map[string]string{} 1067 for k, v := range labels { 1068 if strings.HasPrefix(k, daemonsetLabelPrefix) { 1069 daemonSetLabels[k] = v 1070 } else { 1071 otherLabels[k] = v 1072 } 1073 } 1074 return daemonSetLabels, otherLabels 1075 } 1076 1077 func clearDaemonSetNodeLabels(ctx context.Context, c clientset.Interface) error { 1078 nodeList, err := e2enode.GetReadySchedulableNodes(ctx, c) 1079 if err != nil { 1080 return err 1081 } 1082 for _, node := range nodeList.Items { 1083 _, err := setDaemonSetNodeLabels(ctx, c, node.Name, map[string]string{}) 1084 if err != nil { 1085 return err 1086 } 1087 } 1088 return nil 1089 } 1090 1091 // patchNamespaceAnnotations sets node selectors related annotations on tests namespaces to empty 1092 func patchNamespaceAnnotations(ctx context.Context, c clientset.Interface, nsName string) (*v1.Namespace, error) { 1093 nsClient := c.CoreV1().Namespaces() 1094 1095 annotations := make(map[string]string) 1096 for _, n := range NamespaceNodeSelectors { 1097 annotations[n] = "" 1098 } 1099 nsPatch, err := json.Marshal(map[string]interface{}{ 1100 "metadata": map[string]interface{}{ 1101 "annotations": annotations, 1102 }, 1103 }) 1104 if err != nil { 1105 return nil, err 1106 } 1107 1108 return nsClient.Patch(ctx, nsName, types.StrategicMergePatchType, nsPatch, metav1.PatchOptions{}) 1109 } 1110 1111 func setDaemonSetNodeLabels(ctx context.Context, c clientset.Interface, nodeName string, labels map[string]string) (*v1.Node, error) { 1112 nodeClient := c.CoreV1().Nodes() 1113 var newNode *v1.Node 1114 var newLabels map[string]string 1115 err := wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, func(ctx context.Context) (bool, error) { 1116 node, err := nodeClient.Get(ctx, nodeName, metav1.GetOptions{}) 1117 if err != nil { 1118 return false, err 1119 } 1120 1121 // remove all labels this test is creating 1122 daemonSetLabels, otherLabels := separateDaemonSetNodeLabels(node.Labels) 1123 if reflect.DeepEqual(daemonSetLabels, labels) { 1124 newNode = node 1125 return true, nil 1126 } 1127 node.Labels = otherLabels 1128 for k, v := range labels { 1129 node.Labels[k] = v 1130 } 1131 newNode, err = nodeClient.Update(ctx, node, metav1.UpdateOptions{}) 1132 if err == nil { 1133 newLabels, _ = separateDaemonSetNodeLabels(newNode.Labels) 1134 return true, err 1135 } 1136 if se, ok := err.(*apierrors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict { 1137 framework.Logf("failed to update node due to resource version conflict") 1138 return false, nil 1139 } 1140 return false, err 1141 }) 1142 if err != nil { 1143 return nil, err 1144 } else if len(newLabels) != len(labels) { 1145 return nil, fmt.Errorf("could not set daemon set test labels as expected") 1146 } 1147 1148 return newNode, nil 1149 } 1150 1151 func checkRunningOnAllNodes(f *framework.Framework, ds *appsv1.DaemonSet) func(ctx context.Context) (bool, error) { 1152 return func(ctx context.Context) (bool, error) { 1153 return e2edaemonset.CheckRunningOnAllNodes(ctx, f, ds) 1154 } 1155 } 1156 1157 func checkAtLeastOneNewPod(c clientset.Interface, ns string, label map[string]string, newImage string) func(ctx context.Context) (bool, error) { 1158 return func(ctx context.Context) (bool, error) { 1159 pods := listDaemonPods(ctx, c, ns, label) 1160 for _, pod := range pods.Items { 1161 if pod.Spec.Containers[0].Image == newImage { 1162 return true, nil 1163 } 1164 } 1165 return false, nil 1166 } 1167 } 1168 1169 func checkRunningOnNoNodes(f *framework.Framework, ds *appsv1.DaemonSet) func(ctx context.Context) (bool, error) { 1170 return e2edaemonset.CheckDaemonPodOnNodes(f, ds, make([]string, 0)) 1171 } 1172 1173 func checkDaemonPodsImageAndAvailability(c clientset.Interface, ds *appsv1.DaemonSet, image string, maxUnavailable int) func(ctx context.Context) (bool, error) { 1174 return func(ctx context.Context) (bool, error) { 1175 podList, err := c.CoreV1().Pods(ds.Namespace).List(ctx, metav1.ListOptions{}) 1176 if err != nil { 1177 return false, err 1178 } 1179 pods := podList.Items 1180 1181 unavailablePods := 0 1182 nodesToUpdatedPodCount := make(map[string]int) 1183 for _, pod := range pods { 1184 // Ignore the pod on the node that is supposed to be deleted 1185 if pod.DeletionTimestamp != nil { 1186 continue 1187 } 1188 if !metav1.IsControlledBy(&pod, ds) { 1189 continue 1190 } 1191 podImage := pod.Spec.Containers[0].Image 1192 if podImage != image { 1193 framework.Logf("Wrong image for pod: %s. Expected: %s, got: %s.", pod.Name, image, podImage) 1194 } else { 1195 nodesToUpdatedPodCount[pod.Spec.NodeName]++ 1196 } 1197 if !podutil.IsPodAvailable(&pod, ds.Spec.MinReadySeconds, metav1.Now()) { 1198 framework.Logf("Pod %s is not available", pod.Name) 1199 unavailablePods++ 1200 } 1201 } 1202 if unavailablePods > maxUnavailable { 1203 return false, fmt.Errorf("number of unavailable pods: %d is greater than maxUnavailable: %d", unavailablePods, maxUnavailable) 1204 } 1205 // Make sure every daemon pod on the node has been updated 1206 nodeNames := e2edaemonset.SchedulableNodes(ctx, c, ds) 1207 for _, node := range nodeNames { 1208 if nodesToUpdatedPodCount[node] == 0 { 1209 return false, nil 1210 } 1211 } 1212 return true, nil 1213 } 1214 } 1215 1216 func checkDaemonSetPodsLabels(podList *v1.PodList, hash string) { 1217 for _, pod := range podList.Items { 1218 // Ignore all the DS pods that will be deleted 1219 if pod.DeletionTimestamp != nil { 1220 continue 1221 } 1222 podHash := pod.Labels[appsv1.DefaultDaemonSetUniqueLabelKey] 1223 gomega.Expect(podHash).ToNot(gomega.BeEmpty()) 1224 if len(hash) > 0 { 1225 gomega.Expect(podHash).To(gomega.Equal(hash), "unexpected hash for pod %s", pod.Name) 1226 } 1227 } 1228 } 1229 1230 func waitForHistoryCreated(ctx context.Context, c clientset.Interface, ns string, label map[string]string, numHistory int) { 1231 listHistoryFn := func(ctx context.Context) (bool, error) { 1232 selector := labels.Set(label).AsSelector() 1233 options := metav1.ListOptions{LabelSelector: selector.String()} 1234 historyList, err := c.AppsV1().ControllerRevisions(ns).List(ctx, options) 1235 if err != nil { 1236 return false, err 1237 } 1238 if len(historyList.Items) == numHistory { 1239 return true, nil 1240 } 1241 framework.Logf("%d/%d controllerrevisions created.", len(historyList.Items), numHistory) 1242 return false, nil 1243 } 1244 err := wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, listHistoryFn) 1245 framework.ExpectNoError(err, "error waiting for controllerrevisions to be created") 1246 } 1247 1248 func listDaemonHistories(ctx context.Context, c clientset.Interface, ns string, label map[string]string) *appsv1.ControllerRevisionList { 1249 selector := labels.Set(label).AsSelector() 1250 options := metav1.ListOptions{LabelSelector: selector.String()} 1251 historyList, err := c.AppsV1().ControllerRevisions(ns).List(ctx, options) 1252 framework.ExpectNoError(err) 1253 gomega.Expect(historyList.Items).ToNot(gomega.BeEmpty()) 1254 return historyList 1255 } 1256 1257 func curHistory(historyList *appsv1.ControllerRevisionList, ds *appsv1.DaemonSet) *appsv1.ControllerRevision { 1258 var curHistory *appsv1.ControllerRevision 1259 foundCurHistories := 0 1260 for i := range historyList.Items { 1261 history := &historyList.Items[i] 1262 // Every history should have the hash label 1263 gomega.Expect(history.Labels[appsv1.DefaultDaemonSetUniqueLabelKey]).ToNot(gomega.BeEmpty()) 1264 match, err := daemon.Match(ds, history) 1265 framework.ExpectNoError(err) 1266 if match { 1267 curHistory = history 1268 foundCurHistories++ 1269 } 1270 } 1271 gomega.Expect(foundCurHistories).To(gomega.Equal(1)) 1272 gomega.Expect(curHistory).NotTo(gomega.BeNil()) 1273 return curHistory 1274 } 1275 1276 func waitFailedDaemonPodDeleted(c clientset.Interface, pod *v1.Pod) func(ctx context.Context) (bool, error) { 1277 return func(ctx context.Context) (bool, error) { 1278 if _, err := c.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}); err != nil { 1279 if apierrors.IsNotFound(err) { 1280 return true, nil 1281 } 1282 return false, fmt.Errorf("failed to get failed daemon pod %q: %w", pod.Name, err) 1283 } 1284 return false, nil 1285 } 1286 }