github.com/IBM-Blockchain/fabric-operator@v1.0.4/pkg/manager/resources/job/job.go (about)

     1  /*
     2   * Copyright contributors to the Hyperledger Fabric Operator project
     3   *
     4   * SPDX-License-Identifier: Apache-2.0
     5   *
     6   * Licensed under the Apache License, Version 2.0 (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at:
     9   *
    10   * 	  http://www.apache.org/licenses/LICENSE-2.0
    11   *
    12   * Unless required by applicable law or agreed to in writing, software
    13   * distributed under the License is distributed on an "AS IS" BASIS,
    14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15   * See the License for the specific language governing permissions and
    16   * limitations under the License.
    17   */
    18  
    19  package job
    20  
    21  import (
    22  	"context"
    23  	"crypto/rand"
    24  	"fmt"
    25  	"math/big"
    26  	"time"
    27  
    28  	"github.com/pkg/errors"
    29  
    30  	controller "github.com/IBM-Blockchain/fabric-operator/pkg/k8s/controllerclient"
    31  	"github.com/IBM-Blockchain/fabric-operator/pkg/manager/resources/container"
    32  	"github.com/IBM-Blockchain/fabric-operator/pkg/util"
    33  
    34  	v1 "k8s.io/api/batch/v1"
    35  	corev1 "k8s.io/api/core/v1"
    36  	"k8s.io/apimachinery/pkg/labels"
    37  	"k8s.io/apimachinery/pkg/types"
    38  	"k8s.io/apimachinery/pkg/util/wait"
    39  
    40  	k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
    41  	logf "sigs.k8s.io/controller-runtime/pkg/log"
    42  )
    43  
    44  type Status string
    45  
    46  const (
    47  	FAILED    Status = "failed"
    48  	COMPLETED Status = "completed"
    49  	UNKNOWN   Status = "unknown"
    50  )
    51  
    52  var log = logf.Log.WithName("job_resource")
    53  
    54  type Timeouts struct {
    55  	WaitUntilActive, WaitUntilFinished time.Duration
    56  }
    57  
    58  func jobIDGenerator() string {
    59  	charset := "0123456789abcdefghijklmnopqrstuvwxyz"
    60  
    61  	randString1 := make([]byte, 10)
    62  	for i := range randString1 {
    63  		num, _ := rand.Int(rand.Reader, big.NewInt(int64(len(charset))))
    64  		randString1[i] = charset[num.Int64()]
    65  	}
    66  
    67  	randString2 := make([]byte, 5)
    68  	for i := range randString2 {
    69  		num, _ := rand.Int(rand.Reader, big.NewInt(int64(len(charset))))
    70  		randString2[i] = charset[num.Int64()]
    71  	}
    72  
    73  	return string(randString1) + "-" + string(randString2)
    74  }
    75  
    76  func New(job *v1.Job, timeouts *Timeouts) *Job {
    77  	if job != nil {
    78  		job.Name = fmt.Sprintf("%s-%s", job.GetName(), jobIDGenerator())
    79  	}
    80  
    81  	return &Job{
    82  		Job:      job,
    83  		Timeouts: timeouts,
    84  	}
    85  }
    86  
    87  func NewWithDefaults(job *v1.Job) *Job {
    88  	if job != nil {
    89  		job.Name = fmt.Sprintf("%s-%s", job.GetName(), jobIDGenerator())
    90  	}
    91  
    92  	return &Job{
    93  		Job: job,
    94  		Timeouts: &Timeouts{
    95  			WaitUntilActive:   60 * time.Second,
    96  			WaitUntilFinished: 60 * time.Second,
    97  		},
    98  	}
    99  }
   100  
   101  func NewWithDefaultsUseExistingName(job *v1.Job) *Job {
   102  	return &Job{
   103  		Job: job,
   104  		Timeouts: &Timeouts{
   105  			WaitUntilActive:   60 * time.Second,
   106  			WaitUntilFinished: 60 * time.Second,
   107  		},
   108  	}
   109  }
   110  
   111  type Job struct {
   112  	*v1.Job
   113  
   114  	Timeouts *Timeouts
   115  }
   116  
   117  func (j *Job) MustGetContainer(name string) container.Container {
   118  	cont, _ := j.GetContainer(name)
   119  	return cont
   120  }
   121  
   122  func (j *Job) GetContainer(name string) (cont container.Container, err error) {
   123  	for i, c := range j.Spec.Template.Spec.Containers {
   124  		if c.Name == name {
   125  			cont = container.Container{Container: &j.Spec.Template.Spec.Containers[i]}
   126  			return
   127  		}
   128  	}
   129  	for i, c := range j.Spec.Template.Spec.InitContainers {
   130  		if c.Name == name {
   131  			cont = container.Container{Container: &j.Spec.Template.Spec.InitContainers[i]}
   132  			return
   133  		}
   134  	}
   135  	return cont, fmt.Errorf("container '%s' not found", name)
   136  }
   137  
   138  func (j *Job) AddContainer(add container.Container) {
   139  	j.Spec.Template.Spec.Containers = util.AppendContainerIfMissing(j.Spec.Template.Spec.Containers, *add.Container)
   140  }
   141  
   142  func (j *Job) AddInitContainer(add container.Container) {
   143  	j.Spec.Template.Spec.InitContainers = util.AppendContainerIfMissing(j.Spec.Template.Spec.InitContainers, *add.Container)
   144  }
   145  
   146  func (j *Job) AppendVolumeIfMissing(volume corev1.Volume) {
   147  	j.Spec.Template.Spec.Volumes = util.AppendVolumeIfMissing(j.Spec.Template.Spec.Volumes, volume)
   148  }
   149  
   150  func (j *Job) AppendPullSecret(imagePullSecret corev1.LocalObjectReference) {
   151  	j.Spec.Template.Spec.ImagePullSecrets = util.AppendImagePullSecretIfMissing(j.Spec.Template.Spec.ImagePullSecrets, imagePullSecret)
   152  }
   153  
   154  // UpdateSecurityContextForAllContainers updates the security context for all containers defined
   155  // in the job
   156  func (j *Job) UpdateSecurityContextForAllContainers(sc container.SecurityContext) {
   157  	for i := range j.Spec.Template.Spec.InitContainers {
   158  		container.UpdateSecurityContext(&j.Spec.Template.Spec.InitContainers[i], sc)
   159  	}
   160  
   161  	for i := range j.Spec.Template.Spec.Containers {
   162  		container.UpdateSecurityContext(&j.Spec.Template.Spec.Containers[i], sc)
   163  	}
   164  }
   165  
   166  func (j *Job) Delete(client controller.Client) error {
   167  	if err := client.Delete(context.TODO(), j.Job); err != nil {
   168  		return errors.Wrap(err, "failed to delete")
   169  	}
   170  
   171  	// TODO: Need to investigate why job is not adding controller reference to job pod,
   172  	// this manual cleanup should not be required after deleting job
   173  	podList := &corev1.PodList{}
   174  	if err := client.List(context.TODO(), podList, k8sclient.MatchingLabels{"job-name": j.GetName()}); err != nil {
   175  		return errors.Wrap(err, "failed to list job pods")
   176  	}
   177  
   178  	for _, pod := range podList.Items {
   179  		podListItem := pod
   180  		if err := client.Delete(context.TODO(), &podListItem); err != nil {
   181  			return errors.Wrapf(err, "failed to delete pod '%s'", podListItem.Name)
   182  		}
   183  	}
   184  
   185  	return nil
   186  }
   187  
   188  func (j *Job) Status(client controller.Client) (Status, error) {
   189  	k8sJob, err := j.get(client)
   190  	if err != nil {
   191  		return UNKNOWN, err
   192  	}
   193  
   194  	if k8sJob.Status.Failed >= int32(1) {
   195  		return FAILED, nil
   196  	}
   197  
   198  	pods, err := j.getPods(client)
   199  	if err != nil {
   200  		return UNKNOWN, err
   201  	}
   202  
   203  	for _, pod := range pods.Items {
   204  		if pod.Status.Phase != corev1.PodSucceeded {
   205  			return FAILED, nil
   206  		}
   207  	}
   208  
   209  	return COMPLETED, nil
   210  }
   211  
   212  func (j *Job) ContainerStatus(client controller.Client, contName string) (Status, error) {
   213  	pods, err := j.getPods(client)
   214  	if err != nil {
   215  		return UNKNOWN, err
   216  	}
   217  
   218  	for _, pod := range pods.Items {
   219  		for _, containerStatus := range pod.Status.ContainerStatuses {
   220  			if containerStatus.Name == contName {
   221  				if containerStatus.State.Terminated != nil {
   222  					if containerStatus.State.Terminated.ExitCode == int32(0) {
   223  						return COMPLETED, nil
   224  					}
   225  					return FAILED, nil
   226  				}
   227  			}
   228  		}
   229  	}
   230  
   231  	return UNKNOWN, nil
   232  }
   233  
   234  func (j *Job) WaitUntilActive(client controller.Client) error {
   235  	err := wait.Poll(500*time.Millisecond, j.Timeouts.WaitUntilActive, func() (bool, error) {
   236  		log.Info(fmt.Sprintf("Waiting for job '%s' to start in namespace '%s'", j.GetName(), j.GetNamespace()))
   237  
   238  		k8sJob, err := j.get(client)
   239  		if err != nil {
   240  			return false, err
   241  		}
   242  
   243  		if k8sJob.Status.Active >= int32(1) || k8sJob.Status.Succeeded >= int32(1) {
   244  			return true, nil
   245  		}
   246  
   247  		return false, nil
   248  	})
   249  	if err != nil {
   250  		return errors.Wrap(err, "job failed to start")
   251  	}
   252  	return nil
   253  }
   254  
   255  func (j *Job) WaitUntilFinished(client controller.Client) error {
   256  	var err error
   257  
   258  	err = wait.Poll(2*time.Second, j.Timeouts.WaitUntilFinished, func() (bool, error) {
   259  		log.Info(fmt.Sprintf("Waiting for job pod '%s' to finish", j.GetName()))
   260  
   261  		pods, err := j.getPods(client)
   262  		if err != nil {
   263  			log.Info(fmt.Sprintf("get job pod err: %s", err))
   264  			return false, nil
   265  		}
   266  
   267  		if len(pods.Items) == 0 {
   268  			return false, nil
   269  		}
   270  
   271  		return j.podsTerminated(pods), nil
   272  	})
   273  	if err != nil {
   274  		return errors.Wrapf(err, "pod for job '%s' failed to finish", j.GetName())
   275  	}
   276  
   277  	return nil
   278  }
   279  
   280  func (j *Job) podsTerminated(pods *corev1.PodList) bool {
   281  	for _, pod := range pods.Items {
   282  		for _, containerStatus := range pod.Status.ContainerStatuses {
   283  			if containerStatus.State.Terminated == nil {
   284  				return false
   285  			}
   286  		}
   287  	}
   288  
   289  	return true
   290  }
   291  
   292  func (j *Job) WaitUntilContainerFinished(client controller.Client, contName string) error {
   293  	var err error
   294  
   295  	err = wait.Poll(2*time.Second, j.Timeouts.WaitUntilFinished, func() (bool, error) {
   296  		log.Info(fmt.Sprintf("Waiting for job pod '%s' to finish", j.GetName()))
   297  
   298  		pods, err := j.getPods(client)
   299  		if err != nil {
   300  			log.Info(fmt.Sprintf("get job pod err: %s", err))
   301  			return false, nil
   302  		}
   303  
   304  		if len(pods.Items) == 0 {
   305  			return false, nil
   306  		}
   307  
   308  		return j.containerTerminated(pods, contName), nil
   309  	})
   310  	if err != nil {
   311  		return errors.Wrapf(err, "pod for job '%s' failed to finish", j.GetName())
   312  	}
   313  
   314  	return nil
   315  }
   316  
   317  func (j *Job) ContainerFinished(client controller.Client, contName string) (bool, error) {
   318  	pods, err := j.getPods(client)
   319  	if err != nil {
   320  		return false, err
   321  	}
   322  
   323  	return j.containerTerminated(pods, contName), nil
   324  }
   325  
   326  func (j *Job) containerTerminated(pods *corev1.PodList, contName string) bool {
   327  	for _, pod := range pods.Items {
   328  		for _, containerStatus := range pod.Status.ContainerStatuses {
   329  			if containerStatus.Name == contName {
   330  				if containerStatus.State.Terminated == nil {
   331  					return false
   332  				}
   333  			}
   334  		}
   335  	}
   336  
   337  	return true
   338  }
   339  
   340  func (j *Job) getPods(client controller.Client) (*corev1.PodList, error) {
   341  	labelSelector, err := labels.Parse(fmt.Sprintf("job-name=%s", j.GetName()))
   342  	if err != nil {
   343  		return nil, err
   344  	}
   345  
   346  	opts := &k8sclient.ListOptions{
   347  		LabelSelector: labelSelector,
   348  	}
   349  
   350  	pods := &corev1.PodList{}
   351  	if err := client.List(context.TODO(), pods, opts); err != nil {
   352  		return nil, err
   353  	}
   354  
   355  	return pods, nil
   356  }
   357  
   358  func (j *Job) get(client controller.Client) (*v1.Job, error) {
   359  	k8sJob := &v1.Job{}
   360  	err := client.Get(context.TODO(), types.NamespacedName{Name: j.GetName(), Namespace: j.GetNamespace()}, k8sJob)
   361  	if err != nil {
   362  		return nil, err
   363  	}
   364  
   365  	return k8sJob, nil
   366  }