github.com/castai/kvisor@v1.7.1-0.20240516114728-b3572a2607b5/cmd/controller/state/jobs_cleanup_controller.go (about)

     1  package state
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"time"
     8  
     9  	"github.com/castai/kvisor/pkg/logging"
    10  	"github.com/samber/lo"
    11  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    12  	"k8s.io/apimachinery/pkg/labels"
    13  	"k8s.io/client-go/kubernetes"
    14  )
    15  
    16  type JobsCleanupConfig struct {
    17  	CleanupInterval time.Duration `validate:"required"`
    18  	CleanupJobAge   time.Duration `validate:"required"`
    19  	Namespace       string        `validate:"required"`
    20  }
    21  
    22  func NewJobsCleanupController(log *logging.Logger, clientset kubernetes.Interface, cfg JobsCleanupConfig) *JobsCleanupController {
    23  	if cfg.CleanupInterval == 0 {
    24  		cfg.CleanupInterval = 10 * time.Minute
    25  	}
    26  	if cfg.CleanupJobAge == 0 {
    27  		cfg.CleanupJobAge = 10 * time.Minute
    28  	}
    29  	return &JobsCleanupController{
    30  		log:       log.WithField("component", "jobs_cleanup"),
    31  		clientset: clientset,
    32  		cfg:       cfg,
    33  	}
    34  }
    35  
    36  type JobsCleanupController struct {
    37  	log       *logging.Logger
    38  	clientset kubernetes.Interface
    39  	cfg       JobsCleanupConfig
    40  }
    41  
    42  func (c *JobsCleanupController) Run(ctx context.Context) error {
    43  	c.log.Info("running")
    44  	defer c.log.Infof("stopping")
    45  
    46  	for {
    47  		select {
    48  		case <-ctx.Done():
    49  			return ctx.Err()
    50  		case <-time.After(c.cfg.CleanupInterval):
    51  			func() {
    52  				ctx, cancel := context.WithTimeout(ctx, 3*time.Minute)
    53  				defer cancel()
    54  				if err := c.cleanupJobs(ctx); err != nil && !errors.Is(err, context.Canceled) {
    55  					c.log.Errorf("jobs cleanup: %v", err)
    56  				}
    57  			}()
    58  		}
    59  	}
    60  }
    61  
    62  func (c *JobsCleanupController) cleanupJobs(ctx context.Context) error {
    63  	selector := labels.Set{"app.kubernetes.io/managed-by": "castai"}.String()
    64  	jobs, err := c.clientset.BatchV1().Jobs(c.cfg.Namespace).List(ctx, metav1.ListOptions{
    65  		LabelSelector: selector,
    66  	})
    67  	if err != nil {
    68  		return fmt.Errorf("list jobs for cleanup: %w", err)
    69  	}
    70  
    71  	cleanupOlderThan := time.Now().UTC().Add(-c.cfg.CleanupJobAge)
    72  
    73  	for _, job := range jobs.Items {
    74  		if job.CreationTimestamp.Time.UTC().Before(cleanupOlderThan) {
    75  			if err := c.clientset.BatchV1().Jobs(c.cfg.Namespace).Delete(ctx, job.Name, metav1.DeleteOptions{
    76  				GracePeriodSeconds: lo.ToPtr(int64(0)),
    77  				PropagationPolicy:  lo.ToPtr(metav1.DeletePropagationBackground),
    78  			}); err != nil {
    79  				return fmt.Errorf("deleting old job: %w", err)
    80  			}
    81  			c.log.Infof("deleted old job %q", job.Name)
    82  		}
    83  	}
    84  	return nil
    85  }