istio.io/istio@v0.0.0-20240520182934-d79c90f27776/operator/pkg/helmreconciler/wait.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package helmreconciler 16 17 import ( 18 "context" 19 "fmt" 20 "sort" 21 "strings" 22 "time" 23 24 appsv1 "k8s.io/api/apps/v1" 25 corev1 "k8s.io/api/core/v1" 26 apiextensions "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 "k8s.io/apimachinery/pkg/util/wait" 30 "k8s.io/client-go/kubernetes" 31 kctldeployment "k8s.io/kubectl/pkg/util/deployment" 32 33 "istio.io/istio/operator/pkg/name" 34 "istio.io/istio/operator/pkg/object" 35 "istio.io/istio/operator/pkg/util/progress" 36 "istio.io/istio/pkg/kube" 37 ) 38 39 const ( 40 // defaultWaitResourceTimeout is the maximum wait time for all resources(namespace/deployment/pod) to be created. 41 defaultWaitResourceTimeout = 300 * time.Second 42 // cRDPollInterval is how often the state of CRDs is polled when waiting for their creation. 43 cRDPollInterval = 500 * time.Millisecond 44 // cRDPollTimeout is the maximum wait time for all CRDs to be created. 45 cRDPollTimeout = 60 * time.Second 46 ) 47 48 // deployment holds associated replicaSets for a deployment 49 type deployment struct { 50 replicaSets *appsv1.ReplicaSet 51 deployment *appsv1.Deployment 52 } 53 54 // WaitForResources polls to get the current status of all pods, PVCs, and Services 55 // until all are ready or a timeout is reached 56 func WaitForResources(objects object.K8sObjects, client kube.Client, 57 waitTimeout time.Duration, dryRun bool, l *progress.ManifestLog, 58 ) error { 59 if dryRun || TestMode { 60 return nil 61 } 62 63 if err := waitForCRDs(objects, client); err != nil { 64 return err 65 } 66 67 var notReady []string 68 var debugInfo map[string]string 69 70 // Check if we are ready immediately, to avoid the 2s delay below when we are already ready 71 if ready, _, _, err := waitForResources(objects, client.Kube(), l); err == nil && ready { 72 return nil 73 } 74 75 errPoll := wait.PollUntilContextTimeout(context.Background(), 2*time.Second, waitTimeout, false, func(context.Context) (bool, error) { 76 isReady, notReadyObjects, debugInfoObjects, err := waitForResources(objects, client.Kube(), l) 77 notReady = notReadyObjects 78 debugInfo = debugInfoObjects 79 return isReady, err 80 }) 81 82 messages := []string{} 83 for _, id := range notReady { 84 debug, f := debugInfo[id] 85 if f { 86 messages = append(messages, fmt.Sprintf(" %s (%s)", id, debug)) 87 } else { 88 messages = append(messages, fmt.Sprintf(" %s", debug)) 89 } 90 } 91 if errPoll != nil { 92 msg := fmt.Sprintf("resources not ready after %v: %v\n%s", waitTimeout, errPoll, strings.Join(messages, "\n")) 93 return fmt.Errorf(msg) 94 } 95 return nil 96 } 97 98 func waitForResources(objects object.K8sObjects, cs kubernetes.Interface, l *progress.ManifestLog) (bool, []string, map[string]string, error) { 99 pods := []corev1.Pod{} 100 deployments := []deployment{} 101 daemonsets := []*appsv1.DaemonSet{} 102 statefulsets := []*appsv1.StatefulSet{} 103 namespaces := []corev1.Namespace{} 104 105 for _, o := range objects { 106 kind := o.GroupVersionKind().Kind 107 switch kind { 108 case name.NamespaceStr: 109 namespace, err := cs.CoreV1().Namespaces().Get(context.TODO(), o.Name, metav1.GetOptions{}) 110 if err != nil { 111 return false, nil, nil, err 112 } 113 namespaces = append(namespaces, *namespace) 114 case name.DeploymentStr: 115 currentDeployment, err := cs.AppsV1().Deployments(o.Namespace).Get(context.TODO(), o.Name, metav1.GetOptions{}) 116 if err != nil { 117 return false, nil, nil, err 118 } 119 _, _, newReplicaSet, err := kctldeployment.GetAllReplicaSets(currentDeployment, cs.AppsV1()) 120 if err != nil || newReplicaSet == nil { 121 return false, nil, nil, err 122 } 123 newDeployment := deployment{ 124 newReplicaSet, 125 currentDeployment, 126 } 127 deployments = append(deployments, newDeployment) 128 case name.DaemonSetStr: 129 ds, err := cs.AppsV1().DaemonSets(o.Namespace).Get(context.TODO(), o.Name, metav1.GetOptions{}) 130 if err != nil { 131 return false, nil, nil, err 132 } 133 daemonsets = append(daemonsets, ds) 134 case name.StatefulSetStr: 135 sts, err := cs.AppsV1().StatefulSets(o.Namespace).Get(context.TODO(), o.Name, metav1.GetOptions{}) 136 if err != nil { 137 return false, nil, nil, err 138 } 139 statefulsets = append(statefulsets, sts) 140 } 141 } 142 143 resourceDebugInfo := map[string]string{} 144 dr, dnr := deploymentsReady(cs, deployments, resourceDebugInfo) 145 dsr, dsnr := daemonsetsReady(daemonsets) 146 stsr, stsnr := statefulsetsReady(statefulsets) 147 nsr, nnr := namespacesReady(namespaces) 148 pr, pnr := podsReady(pods) 149 isReady := dr && nsr && dsr && stsr && pr 150 notReady := append(append(append(append(nnr, dnr...), pnr...), dsnr...), stsnr...) 151 if !isReady { 152 l.ReportWaiting(notReady) 153 } 154 return isReady, notReady, resourceDebugInfo, nil 155 } 156 157 func waitForCRDs(objects object.K8sObjects, client kube.Client) error { 158 var crdNames []string 159 for _, o := range object.KindObjects(objects, name.CRDStr) { 160 crdNames = append(crdNames, o.Name) 161 } 162 if len(crdNames) == 0 { 163 return nil 164 } 165 166 errPoll := wait.PollUntilContextTimeout(context.Background(), cRDPollInterval, cRDPollTimeout, false, func(context.Context) (bool, error) { 167 descriptor: 168 for _, crdName := range crdNames { 169 crd, errGet := client.Ext().ApiextensionsV1().CustomResourceDefinitions().Get(context.TODO(), crdName, metav1.GetOptions{}) 170 if errGet != nil { 171 return false, errGet 172 } 173 for _, cond := range crd.Status.Conditions { 174 switch cond.Type { 175 case apiextensions.Established: 176 if cond.Status == apiextensions.ConditionTrue { 177 scope.Infof("established CRD %s", crdName) 178 continue descriptor 179 } 180 case apiextensions.NamesAccepted: 181 if cond.Status == apiextensions.ConditionFalse { 182 scope.Warnf("name conflict for %v: %v", crdName, cond.Reason) 183 } 184 } 185 } 186 scope.Infof("missing status condition for %q", crdName) 187 return false, nil 188 } 189 return true, nil 190 }) 191 192 if errPoll != nil { 193 scope.Errorf("failed to verify CRD creation; %s", errPoll) 194 return fmt.Errorf("failed to verify CRD creation: %s", errPoll) 195 } 196 197 scope.Info("Finished applying CRDs.") 198 return nil 199 } 200 201 func getPods(client kubernetes.Interface, namespace string, selector labels.Selector) ([]corev1.Pod, error) { 202 list, err := client.CoreV1().Pods(namespace).List(context.TODO(), metav1.ListOptions{ 203 LabelSelector: selector.String(), 204 }) 205 return list.Items, err 206 } 207 208 func namespacesReady(namespaces []corev1.Namespace) (bool, []string) { 209 var notReady []string 210 for _, namespace := range namespaces { 211 if namespace.Status.Phase != corev1.NamespaceActive { 212 notReady = append(notReady, "Namespace/"+namespace.Name) 213 } 214 } 215 return len(notReady) == 0, notReady 216 } 217 218 func podsReady(pods []corev1.Pod) (bool, []string) { 219 var notReady []string 220 for _, pod := range pods { 221 if !isPodReady(&pod) { 222 notReady = append(notReady, "Pod/"+pod.Namespace+"/"+pod.Name) 223 } 224 } 225 return len(notReady) == 0, notReady 226 } 227 228 func isPodReady(pod *corev1.Pod) bool { 229 if len(pod.Status.Conditions) > 0 { 230 for _, condition := range pod.Status.Conditions { 231 if condition.Type == corev1.PodReady && 232 condition.Status == corev1.ConditionTrue { 233 return true 234 } 235 } 236 } 237 return false 238 } 239 240 func deploymentsReady(cs kubernetes.Interface, deployments []deployment, info map[string]string) (bool, []string) { 241 var notReady []string 242 for _, v := range deployments { 243 if v.replicaSets.Status.ReadyReplicas >= *v.deployment.Spec.Replicas { 244 // Ready 245 continue 246 } 247 id := "Deployment/" + v.deployment.Namespace + "/" + v.deployment.Name 248 notReady = append(notReady, id) 249 failure := extractPodFailureReason(cs, v.deployment.Namespace, v.deployment.Spec.Selector) 250 if failure != "" { 251 info[id] = failure 252 } 253 } 254 return len(notReady) == 0, notReady 255 } 256 257 func extractPodFailureReason(client kubernetes.Interface, namespace string, selector *metav1.LabelSelector) string { 258 sel, err := metav1.LabelSelectorAsSelector(selector) 259 if err != nil { 260 return fmt.Sprintf("failed to get label selector: %v", err) 261 } 262 pods, err := getPods(client, namespace, sel) 263 if err != nil { 264 return fmt.Sprintf("failed to fetch pods: %v", err) 265 } 266 sort.Slice(pods, func(i, j int) bool { 267 return pods[i].CreationTimestamp.After(pods[j].CreationTimestamp.Time) 268 }) 269 for _, pod := range pods { 270 for _, cs := range pod.Status.ContainerStatuses { 271 if cs.State.Waiting != nil { 272 return fmt.Sprintf("container failed to start: %v: %v", cs.State.Waiting.Reason, cs.State.Waiting.Message) 273 } 274 } 275 if c := getCondition(pod.Status.Conditions, corev1.PodReady); c != nil && c.Status == corev1.ConditionFalse { 276 return fmt.Sprintf(c.Message) 277 } 278 } 279 return "" 280 } 281 282 func getCondition(conditions []corev1.PodCondition, condition corev1.PodConditionType) *corev1.PodCondition { 283 for _, cond := range conditions { 284 if cond.Type == condition { 285 return &cond 286 } 287 } 288 return nil 289 } 290 291 func daemonsetsReady(daemonsets []*appsv1.DaemonSet) (bool, []string) { 292 var notReady []string 293 for _, ds := range daemonsets { 294 // Check if the wanting generation is same as the observed generation 295 // Only when the observed generation is the same as the generation, 296 // other checks will make sense. If not the same, daemon set is not 297 // ready 298 if ds.Status.ObservedGeneration != ds.Generation { 299 scope.Infof("DaemonSet is not ready: %s/%s. Observed generation: %d expected generation: %d", 300 ds.Namespace, ds.Name, ds.Status.ObservedGeneration, ds.Generation) 301 notReady = append(notReady, "DaemonSet/"+ds.Namespace+"/"+ds.Name) 302 } else { 303 // Make sure all the updated pods have been scheduled 304 if ds.Spec.UpdateStrategy.Type == appsv1.OnDeleteDaemonSetStrategyType && 305 ds.Status.UpdatedNumberScheduled != ds.Status.DesiredNumberScheduled { 306 scope.Infof("DaemonSet is not ready: %s/%s. %d out of %d expected pods have been scheduled", 307 ds.Namespace, ds.Name, ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled) 308 notReady = append(notReady, "DaemonSet/"+ds.Namespace+"/"+ds.Name) 309 } 310 if ds.Spec.UpdateStrategy.Type == appsv1.RollingUpdateDaemonSetStrategyType { 311 if ds.Status.DesiredNumberScheduled <= 0 { 312 // If DesiredNumberScheduled less then or equal 0, there some cases: 313 // 1) daemonset is just created 314 // 2) daemonset desired no pod 315 // 3) somebody changed it manually 316 // All the case is not a ready signal 317 scope.Infof("DaemonSet is not ready: %s/%s. Initializing, no pods is running", 318 ds.Namespace, ds.Name) 319 notReady = append(notReady, "DaemonSet/"+ds.Namespace+"/"+ds.Name) 320 } else if ds.Status.NumberReady < ds.Status.DesiredNumberScheduled { 321 // Make sure every node has a ready pod 322 scope.Infof("DaemonSet is not ready: %s/%s. %d out of %d expected pods are ready", 323 ds.Namespace, ds.Name, ds.Status.NumberReady, ds.Status.UpdatedNumberScheduled) 324 notReady = append(notReady, "DaemonSet/"+ds.Namespace+"/"+ds.Name) 325 } else if ds.Status.UpdatedNumberScheduled != ds.Status.DesiredNumberScheduled { 326 // Make sure all the updated pods have been scheduled 327 scope.Infof("DaemonSet is not ready: %s/%s. %d out of %d expected pods have been scheduled", 328 ds.Namespace, ds.Name, ds.Status.UpdatedNumberScheduled, ds.Status.DesiredNumberScheduled) 329 notReady = append(notReady, "DaemonSet/"+ds.Namespace+"/"+ds.Name) 330 } 331 } 332 } 333 } 334 return len(notReady) == 0, notReady 335 } 336 337 func statefulsetsReady(statefulsets []*appsv1.StatefulSet) (bool, []string) { 338 var notReady []string 339 for _, sts := range statefulsets { 340 // Make sure all the updated pods have been scheduled 341 if sts.Spec.UpdateStrategy.Type == appsv1.OnDeleteStatefulSetStrategyType && 342 sts.Status.UpdatedReplicas != sts.Status.Replicas { 343 scope.Infof("StatefulSet is not ready: %s/%s. %d out of %d expected pods have been scheduled", 344 sts.Namespace, sts.Name, sts.Status.UpdatedReplicas, sts.Status.Replicas) 345 notReady = append(notReady, "StatefulSet/"+sts.Namespace+"/"+sts.Name) 346 } 347 if sts.Spec.UpdateStrategy.Type == appsv1.RollingUpdateStatefulSetStrategyType { 348 // Dereference all the pointers because StatefulSets like them 349 var partition int 350 // default replicas for sts is 1 351 replicas := 1 352 // the rollingUpdate field can be nil even if the update strategy is a rolling update. 353 if sts.Spec.UpdateStrategy.RollingUpdate != nil && 354 sts.Spec.UpdateStrategy.RollingUpdate.Partition != nil { 355 partition = int(*sts.Spec.UpdateStrategy.RollingUpdate.Partition) 356 } 357 if sts.Spec.Replicas != nil { 358 replicas = int(*sts.Spec.Replicas) 359 } 360 expectedReplicas := replicas - partition 361 // Make sure all the updated pods have been scheduled 362 if int(sts.Status.UpdatedReplicas) != expectedReplicas { 363 scope.Infof("StatefulSet is not ready: %s/%s. %d out of %d expected pods have been scheduled", 364 sts.Namespace, sts.Name, sts.Status.UpdatedReplicas, expectedReplicas) 365 notReady = append(notReady, "StatefulSet/"+sts.Namespace+"/"+sts.Name) 366 continue 367 } 368 } 369 } 370 return len(notReady) == 0, notReady 371 }