k8s.io/kubernetes@v1.29.3/pkg/controller/job/pod_failure_policy.go (about) 1 /* 2 Copyright 2021 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package job 18 19 import ( 20 "fmt" 21 22 batch "k8s.io/api/batch/v1" 23 v1 "k8s.io/api/core/v1" 24 "k8s.io/apiserver/pkg/util/feature" 25 "k8s.io/kubernetes/pkg/features" 26 ) 27 28 // matchPodFailurePolicy returns information about matching a given failed pod 29 // against the pod failure policy rules. The information is represented as an 30 // - optional job failure message (present in case the pod matched a 'FailJob' rule), 31 // - a boolean indicating if the failure should be counted towards backoffLimit 32 // (and backoffLimitPerIndex if specified). It should not be counted 33 // if the pod matched an 'Ignore' rule, 34 // - a pointer to the matched pod failure policy action. 35 func matchPodFailurePolicy(podFailurePolicy *batch.PodFailurePolicy, failedPod *v1.Pod) (*string, bool, *batch.PodFailurePolicyAction) { 36 if podFailurePolicy == nil { 37 return nil, true, nil 38 } 39 ignore := batch.PodFailurePolicyActionIgnore 40 failJob := batch.PodFailurePolicyActionFailJob 41 failIndex := batch.PodFailurePolicyActionFailIndex 42 count := batch.PodFailurePolicyActionCount 43 for index, podFailurePolicyRule := range podFailurePolicy.Rules { 44 if podFailurePolicyRule.OnExitCodes != nil { 45 if containerStatus := matchOnExitCodes(&failedPod.Status, podFailurePolicyRule.OnExitCodes); containerStatus != nil { 46 switch podFailurePolicyRule.Action { 47 case batch.PodFailurePolicyActionIgnore: 48 return nil, false, &ignore 49 case batch.PodFailurePolicyActionFailIndex: 50 if feature.DefaultFeatureGate.Enabled(features.JobBackoffLimitPerIndex) { 51 return nil, true, &failIndex 52 } 53 case batch.PodFailurePolicyActionCount: 54 return nil, true, &count 55 case batch.PodFailurePolicyActionFailJob: 56 msg := fmt.Sprintf("Container %s for pod %s/%s failed with exit code %v matching %v rule at index %d", 57 containerStatus.Name, failedPod.Namespace, failedPod.Name, containerStatus.State.Terminated.ExitCode, podFailurePolicyRule.Action, index) 58 return &msg, true, &failJob 59 } 60 } 61 } else if podFailurePolicyRule.OnPodConditions != nil { 62 if podCondition := matchOnPodConditions(&failedPod.Status, podFailurePolicyRule.OnPodConditions); podCondition != nil { 63 switch podFailurePolicyRule.Action { 64 case batch.PodFailurePolicyActionIgnore: 65 return nil, false, &ignore 66 case batch.PodFailurePolicyActionFailIndex: 67 if feature.DefaultFeatureGate.Enabled(features.JobBackoffLimitPerIndex) { 68 return nil, true, &failIndex 69 } 70 case batch.PodFailurePolicyActionCount: 71 return nil, true, &count 72 case batch.PodFailurePolicyActionFailJob: 73 msg := fmt.Sprintf("Pod %s/%s has condition %v matching %v rule at index %d", 74 failedPod.Namespace, failedPod.Name, podCondition.Type, podFailurePolicyRule.Action, index) 75 return &msg, true, &failJob 76 } 77 } 78 } 79 } 80 return nil, true, nil 81 } 82 83 // matchOnExitCodes returns a terminated container status that matches the error code requirement, if any exists. 84 // If the returned status is non-nil, it has a non-nil Terminated field. 85 func matchOnExitCodes(podStatus *v1.PodStatus, requirement *batch.PodFailurePolicyOnExitCodesRequirement) *v1.ContainerStatus { 86 if containerStatus := getMatchingContainerFromList(podStatus.ContainerStatuses, requirement); containerStatus != nil { 87 return containerStatus 88 } 89 return getMatchingContainerFromList(podStatus.InitContainerStatuses, requirement) 90 } 91 92 func matchOnPodConditions(podStatus *v1.PodStatus, requirement []batch.PodFailurePolicyOnPodConditionsPattern) *v1.PodCondition { 93 for _, podCondition := range podStatus.Conditions { 94 for _, pattern := range requirement { 95 if podCondition.Type == pattern.Type && podCondition.Status == pattern.Status { 96 return &podCondition 97 } 98 } 99 } 100 return nil 101 } 102 103 // getMatchingContainerFromList returns the first terminated container status in the list that matches the error code requirement, or nil if none match. 104 // If the returned status is non-nil, it has a non-nil Terminated field 105 func getMatchingContainerFromList(containerStatuses []v1.ContainerStatus, requirement *batch.PodFailurePolicyOnExitCodesRequirement) *v1.ContainerStatus { 106 for _, containerStatus := range containerStatuses { 107 if containerStatus.State.Terminated == nil { 108 // This container is still be terminating. There is no exit code to match. 109 continue 110 } 111 if requirement.ContainerName == nil || *requirement.ContainerName == containerStatus.Name { 112 if containerStatus.State.Terminated.ExitCode != 0 { 113 if isOnExitCodesOperatorMatching(containerStatus.State.Terminated.ExitCode, requirement) { 114 return &containerStatus 115 } 116 } 117 } 118 } 119 return nil 120 } 121 122 func isOnExitCodesOperatorMatching(exitCode int32, requirement *batch.PodFailurePolicyOnExitCodesRequirement) bool { 123 switch requirement.Operator { 124 case batch.PodFailurePolicyOnExitCodesOpIn: 125 for _, value := range requirement.Values { 126 if value == exitCode { 127 return true 128 } 129 } 130 return false 131 case batch.PodFailurePolicyOnExitCodesOpNotIn: 132 for _, value := range requirement.Values { 133 if value == exitCode { 134 return false 135 } 136 } 137 return true 138 default: 139 return false 140 } 141 }