github.com/buildkite/agent-stack-k8s@v0.4.0/scheduler/completions.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  
     7  	"github.com/buildkite/agent-stack-k8s/api"
     8  	"go.uber.org/zap"
     9  	v1 "k8s.io/api/core/v1"
    10  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    11  	"k8s.io/client-go/informers"
    12  	"k8s.io/client-go/kubernetes"
    13  	_ "k8s.io/client-go/tools/cache"
    14  	"k8s.io/client-go/util/retry"
    15  	"k8s.io/utils/pointer"
    16  )
    17  
    18  type completionsWatcher struct {
    19  	logger *zap.Logger
    20  	k8s    kubernetes.Interface
    21  }
    22  
    23  func NewPodCompletionWatcher(logger *zap.Logger, k8s kubernetes.Interface) *completionsWatcher {
    24  	watcher := &completionsWatcher{
    25  		logger: logger,
    26  		k8s:    k8s,
    27  	}
    28  	return watcher
    29  }
    30  
    31  // Creates a Pods informer and registers the handler on it
    32  func (w *completionsWatcher) RegisterInformer(ctx context.Context, factory informers.SharedInformerFactory) error {
    33  	informer := factory.Core().V1().Pods().Informer()
    34  	if _, err := informer.AddEventHandler(w); err != nil {
    35  		return fmt.Errorf("failed to register pod event handler: %w", err)
    36  	}
    37  	go factory.Start(ctx.Done())
    38  	return nil
    39  }
    40  
    41  // ignored
    42  func (w *completionsWatcher) OnDelete(obj interface{}) {}
    43  
    44  // handle pods completed while the controller wasn't running
    45  func (w *completionsWatcher) OnAdd(obj interface{}) {
    46  	pod := obj.(*v1.Pod)
    47  	w.cleanupSidecars(pod)
    48  }
    49  
    50  func (w *completionsWatcher) OnUpdate(old interface{}, new interface{}) {
    51  	oldPod := old.(*v1.Pod)
    52  	if terminated := getTermination(oldPod); terminated != nil {
    53  		// skip subsequent reconciles after we've already handled termination
    54  		return
    55  	}
    56  
    57  	newPod := new.(*v1.Pod)
    58  	w.cleanupSidecars(newPod)
    59  }
    60  
    61  func (w *completionsWatcher) cleanupSidecars(pod *v1.Pod) {
    62  	if terminated := getTermination(pod); terminated != nil {
    63  		if err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
    64  			job, err := w.k8s.BatchV1().Jobs(pod.Namespace).Get(context.TODO(), pod.Labels["job-name"], metav1.GetOptions{})
    65  			if err != nil {
    66  				return err
    67  			} else {
    68  				job.Spec.ActiveDeadlineSeconds = pointer.Int64(1)
    69  				_, err = w.k8s.BatchV1().Jobs(pod.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{})
    70  				return err
    71  			}
    72  		}); err != nil {
    73  			w.logger.Error("failed to update job", zap.Error(err))
    74  		}
    75  		w.logger.Debug("agent finished", zap.String("uuid", pod.Labels[api.UUIDLabel]), zap.Int32("exit code", terminated.ExitCode))
    76  	}
    77  }
    78  
    79  func getTermination(pod *v1.Pod) *v1.ContainerStateTerminated {
    80  	for _, container := range pod.Status.ContainerStatuses {
    81  		if container.Name == AgentContainerName {
    82  			if container.State.Terminated != nil {
    83  				// oldPod is not terminated, but newPod is
    84  				return container.State.Terminated
    85  			}
    86  		}
    87  	}
    88  	return nil
    89  }