github.com/buildkite/agent-stack-k8s@v0.4.0/scheduler/completions.go (about) 1 package scheduler 2 3 import ( 4 "context" 5 "fmt" 6 7 "github.com/buildkite/agent-stack-k8s/api" 8 "go.uber.org/zap" 9 v1 "k8s.io/api/core/v1" 10 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 "k8s.io/client-go/informers" 12 "k8s.io/client-go/kubernetes" 13 _ "k8s.io/client-go/tools/cache" 14 "k8s.io/client-go/util/retry" 15 "k8s.io/utils/pointer" 16 ) 17 18 type completionsWatcher struct { 19 logger *zap.Logger 20 k8s kubernetes.Interface 21 } 22 23 func NewPodCompletionWatcher(logger *zap.Logger, k8s kubernetes.Interface) *completionsWatcher { 24 watcher := &completionsWatcher{ 25 logger: logger, 26 k8s: k8s, 27 } 28 return watcher 29 } 30 31 // Creates a Pods informer and registers the handler on it 32 func (w *completionsWatcher) RegisterInformer(ctx context.Context, factory informers.SharedInformerFactory) error { 33 informer := factory.Core().V1().Pods().Informer() 34 if _, err := informer.AddEventHandler(w); err != nil { 35 return fmt.Errorf("failed to register pod event handler: %w", err) 36 } 37 go factory.Start(ctx.Done()) 38 return nil 39 } 40 41 // ignored 42 func (w *completionsWatcher) OnDelete(obj interface{}) {} 43 44 // handle pods completed while the controller wasn't running 45 func (w *completionsWatcher) OnAdd(obj interface{}) { 46 pod := obj.(*v1.Pod) 47 w.cleanupSidecars(pod) 48 } 49 50 func (w *completionsWatcher) OnUpdate(old interface{}, new interface{}) { 51 oldPod := old.(*v1.Pod) 52 if terminated := getTermination(oldPod); terminated != nil { 53 // skip subsequent reconciles after we've already handled termination 54 return 55 } 56 57 newPod := new.(*v1.Pod) 58 w.cleanupSidecars(newPod) 59 } 60 61 func (w *completionsWatcher) cleanupSidecars(pod *v1.Pod) { 62 if terminated := getTermination(pod); terminated != nil { 63 if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { 64 job, err := w.k8s.BatchV1().Jobs(pod.Namespace).Get(context.TODO(), pod.Labels["job-name"], metav1.GetOptions{}) 65 if err != nil { 66 return err 67 } else { 68 job.Spec.ActiveDeadlineSeconds = pointer.Int64(1) 69 _, err = w.k8s.BatchV1().Jobs(pod.Namespace).Update(context.TODO(), job, metav1.UpdateOptions{}) 70 return err 71 } 72 }); err != nil { 73 w.logger.Error("failed to update job", zap.Error(err)) 74 } 75 w.logger.Debug("agent finished", zap.String("uuid", pod.Labels[api.UUIDLabel]), zap.Int32("exit code", terminated.ExitCode)) 76 } 77 } 78 79 func getTermination(pod *v1.Pod) *v1.ContainerStateTerminated { 80 for _, container := range pod.Status.ContainerStatuses { 81 if container.Name == AgentContainerName { 82 if container.State.Terminated != nil { 83 // oldPod is not terminated, but newPod is 84 return container.State.Terminated 85 } 86 } 87 } 88 return nil 89 }