github.com/IBM-Blockchain/fabric-operator@v1.0.4/pkg/manager/resources/job/job.go (about) 1 /* 2 * Copyright contributors to the Hyperledger Fabric Operator project 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at: 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package job 20 21 import ( 22 "context" 23 "crypto/rand" 24 "fmt" 25 "math/big" 26 "time" 27 28 "github.com/pkg/errors" 29 30 controller "github.com/IBM-Blockchain/fabric-operator/pkg/k8s/controllerclient" 31 "github.com/IBM-Blockchain/fabric-operator/pkg/manager/resources/container" 32 "github.com/IBM-Blockchain/fabric-operator/pkg/util" 33 34 v1 "k8s.io/api/batch/v1" 35 corev1 "k8s.io/api/core/v1" 36 "k8s.io/apimachinery/pkg/labels" 37 "k8s.io/apimachinery/pkg/types" 38 "k8s.io/apimachinery/pkg/util/wait" 39 40 k8sclient "sigs.k8s.io/controller-runtime/pkg/client" 41 logf "sigs.k8s.io/controller-runtime/pkg/log" 42 ) 43 44 type Status string 45 46 const ( 47 FAILED Status = "failed" 48 COMPLETED Status = "completed" 49 UNKNOWN Status = "unknown" 50 ) 51 52 var log = logf.Log.WithName("job_resource") 53 54 type Timeouts struct { 55 WaitUntilActive, WaitUntilFinished time.Duration 56 } 57 58 func jobIDGenerator() string { 59 charset := "0123456789abcdefghijklmnopqrstuvwxyz" 60 61 randString1 := make([]byte, 10) 62 for i := range randString1 { 63 num, _ := rand.Int(rand.Reader, big.NewInt(int64(len(charset)))) 64 randString1[i] = charset[num.Int64()] 65 } 66 67 randString2 := make([]byte, 5) 68 for i := range randString2 { 69 num, _ := rand.Int(rand.Reader, big.NewInt(int64(len(charset)))) 70 randString2[i] = charset[num.Int64()] 71 } 72 73 return string(randString1) + "-" + string(randString2) 74 } 75 76 func New(job *v1.Job, timeouts *Timeouts) *Job { 77 if job != nil { 78 job.Name = fmt.Sprintf("%s-%s", job.GetName(), jobIDGenerator()) 79 } 80 81 return &Job{ 82 Job: job, 83 Timeouts: timeouts, 84 } 85 } 86 87 func NewWithDefaults(job *v1.Job) *Job { 88 if job != nil { 89 job.Name = fmt.Sprintf("%s-%s", job.GetName(), jobIDGenerator()) 90 } 91 92 return &Job{ 93 Job: job, 94 Timeouts: &Timeouts{ 95 WaitUntilActive: 60 * time.Second, 96 WaitUntilFinished: 60 * time.Second, 97 }, 98 } 99 } 100 101 func NewWithDefaultsUseExistingName(job *v1.Job) *Job { 102 return &Job{ 103 Job: job, 104 Timeouts: &Timeouts{ 105 WaitUntilActive: 60 * time.Second, 106 WaitUntilFinished: 60 * time.Second, 107 }, 108 } 109 } 110 111 type Job struct { 112 *v1.Job 113 114 Timeouts *Timeouts 115 } 116 117 func (j *Job) MustGetContainer(name string) container.Container { 118 cont, _ := j.GetContainer(name) 119 return cont 120 } 121 122 func (j *Job) GetContainer(name string) (cont container.Container, err error) { 123 for i, c := range j.Spec.Template.Spec.Containers { 124 if c.Name == name { 125 cont = container.Container{Container: &j.Spec.Template.Spec.Containers[i]} 126 return 127 } 128 } 129 for i, c := range j.Spec.Template.Spec.InitContainers { 130 if c.Name == name { 131 cont = container.Container{Container: &j.Spec.Template.Spec.InitContainers[i]} 132 return 133 } 134 } 135 return cont, fmt.Errorf("container '%s' not found", name) 136 } 137 138 func (j *Job) AddContainer(add container.Container) { 139 j.Spec.Template.Spec.Containers = util.AppendContainerIfMissing(j.Spec.Template.Spec.Containers, *add.Container) 140 } 141 142 func (j *Job) AddInitContainer(add container.Container) { 143 j.Spec.Template.Spec.InitContainers = util.AppendContainerIfMissing(j.Spec.Template.Spec.InitContainers, *add.Container) 144 } 145 146 func (j *Job) AppendVolumeIfMissing(volume corev1.Volume) { 147 j.Spec.Template.Spec.Volumes = util.AppendVolumeIfMissing(j.Spec.Template.Spec.Volumes, volume) 148 } 149 150 func (j *Job) AppendPullSecret(imagePullSecret corev1.LocalObjectReference) { 151 j.Spec.Template.Spec.ImagePullSecrets = util.AppendImagePullSecretIfMissing(j.Spec.Template.Spec.ImagePullSecrets, imagePullSecret) 152 } 153 154 // UpdateSecurityContextForAllContainers updates the security context for all containers defined 155 // in the job 156 func (j *Job) UpdateSecurityContextForAllContainers(sc container.SecurityContext) { 157 for i := range j.Spec.Template.Spec.InitContainers { 158 container.UpdateSecurityContext(&j.Spec.Template.Spec.InitContainers[i], sc) 159 } 160 161 for i := range j.Spec.Template.Spec.Containers { 162 container.UpdateSecurityContext(&j.Spec.Template.Spec.Containers[i], sc) 163 } 164 } 165 166 func (j *Job) Delete(client controller.Client) error { 167 if err := client.Delete(context.TODO(), j.Job); err != nil { 168 return errors.Wrap(err, "failed to delete") 169 } 170 171 // TODO: Need to investigate why job is not adding controller reference to job pod, 172 // this manual cleanup should not be required after deleting job 173 podList := &corev1.PodList{} 174 if err := client.List(context.TODO(), podList, k8sclient.MatchingLabels{"job-name": j.GetName()}); err != nil { 175 return errors.Wrap(err, "failed to list job pods") 176 } 177 178 for _, pod := range podList.Items { 179 podListItem := pod 180 if err := client.Delete(context.TODO(), &podListItem); err != nil { 181 return errors.Wrapf(err, "failed to delete pod '%s'", podListItem.Name) 182 } 183 } 184 185 return nil 186 } 187 188 func (j *Job) Status(client controller.Client) (Status, error) { 189 k8sJob, err := j.get(client) 190 if err != nil { 191 return UNKNOWN, err 192 } 193 194 if k8sJob.Status.Failed >= int32(1) { 195 return FAILED, nil 196 } 197 198 pods, err := j.getPods(client) 199 if err != nil { 200 return UNKNOWN, err 201 } 202 203 for _, pod := range pods.Items { 204 if pod.Status.Phase != corev1.PodSucceeded { 205 return FAILED, nil 206 } 207 } 208 209 return COMPLETED, nil 210 } 211 212 func (j *Job) ContainerStatus(client controller.Client, contName string) (Status, error) { 213 pods, err := j.getPods(client) 214 if err != nil { 215 return UNKNOWN, err 216 } 217 218 for _, pod := range pods.Items { 219 for _, containerStatus := range pod.Status.ContainerStatuses { 220 if containerStatus.Name == contName { 221 if containerStatus.State.Terminated != nil { 222 if containerStatus.State.Terminated.ExitCode == int32(0) { 223 return COMPLETED, nil 224 } 225 return FAILED, nil 226 } 227 } 228 } 229 } 230 231 return UNKNOWN, nil 232 } 233 234 func (j *Job) WaitUntilActive(client controller.Client) error { 235 err := wait.Poll(500*time.Millisecond, j.Timeouts.WaitUntilActive, func() (bool, error) { 236 log.Info(fmt.Sprintf("Waiting for job '%s' to start in namespace '%s'", j.GetName(), j.GetNamespace())) 237 238 k8sJob, err := j.get(client) 239 if err != nil { 240 return false, err 241 } 242 243 if k8sJob.Status.Active >= int32(1) || k8sJob.Status.Succeeded >= int32(1) { 244 return true, nil 245 } 246 247 return false, nil 248 }) 249 if err != nil { 250 return errors.Wrap(err, "job failed to start") 251 } 252 return nil 253 } 254 255 func (j *Job) WaitUntilFinished(client controller.Client) error { 256 var err error 257 258 err = wait.Poll(2*time.Second, j.Timeouts.WaitUntilFinished, func() (bool, error) { 259 log.Info(fmt.Sprintf("Waiting for job pod '%s' to finish", j.GetName())) 260 261 pods, err := j.getPods(client) 262 if err != nil { 263 log.Info(fmt.Sprintf("get job pod err: %s", err)) 264 return false, nil 265 } 266 267 if len(pods.Items) == 0 { 268 return false, nil 269 } 270 271 return j.podsTerminated(pods), nil 272 }) 273 if err != nil { 274 return errors.Wrapf(err, "pod for job '%s' failed to finish", j.GetName()) 275 } 276 277 return nil 278 } 279 280 func (j *Job) podsTerminated(pods *corev1.PodList) bool { 281 for _, pod := range pods.Items { 282 for _, containerStatus := range pod.Status.ContainerStatuses { 283 if containerStatus.State.Terminated == nil { 284 return false 285 } 286 } 287 } 288 289 return true 290 } 291 292 func (j *Job) WaitUntilContainerFinished(client controller.Client, contName string) error { 293 var err error 294 295 err = wait.Poll(2*time.Second, j.Timeouts.WaitUntilFinished, func() (bool, error) { 296 log.Info(fmt.Sprintf("Waiting for job pod '%s' to finish", j.GetName())) 297 298 pods, err := j.getPods(client) 299 if err != nil { 300 log.Info(fmt.Sprintf("get job pod err: %s", err)) 301 return false, nil 302 } 303 304 if len(pods.Items) == 0 { 305 return false, nil 306 } 307 308 return j.containerTerminated(pods, contName), nil 309 }) 310 if err != nil { 311 return errors.Wrapf(err, "pod for job '%s' failed to finish", j.GetName()) 312 } 313 314 return nil 315 } 316 317 func (j *Job) ContainerFinished(client controller.Client, contName string) (bool, error) { 318 pods, err := j.getPods(client) 319 if err != nil { 320 return false, err 321 } 322 323 return j.containerTerminated(pods, contName), nil 324 } 325 326 func (j *Job) containerTerminated(pods *corev1.PodList, contName string) bool { 327 for _, pod := range pods.Items { 328 for _, containerStatus := range pod.Status.ContainerStatuses { 329 if containerStatus.Name == contName { 330 if containerStatus.State.Terminated == nil { 331 return false 332 } 333 } 334 } 335 } 336 337 return true 338 } 339 340 func (j *Job) getPods(client controller.Client) (*corev1.PodList, error) { 341 labelSelector, err := labels.Parse(fmt.Sprintf("job-name=%s", j.GetName())) 342 if err != nil { 343 return nil, err 344 } 345 346 opts := &k8sclient.ListOptions{ 347 LabelSelector: labelSelector, 348 } 349 350 pods := &corev1.PodList{} 351 if err := client.List(context.TODO(), pods, opts); err != nil { 352 return nil, err 353 } 354 355 return pods, nil 356 } 357 358 func (j *Job) get(client controller.Client) (*v1.Job, error) { 359 k8sJob := &v1.Job{} 360 err := client.Get(context.TODO(), types.NamespacedName{Name: j.GetName(), Namespace: j.GetNamespace()}, k8sJob) 361 if err != nil { 362 return nil, err 363 } 364 365 return k8sJob, nil 366 }