github.com/kyma-incubator/compass/components/director@v0.0.0-20230623144113-d764f56ff805/pkg/cronjob/cronjob.go (about)

     1  package cronjob
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  
     7  	"github.com/kyma-incubator/compass/components/director/pkg/kubernetes"
     8  	"github.com/kyma-incubator/compass/components/director/pkg/log"
     9  
    10  	"context"
    11  	"time"
    12  
    13  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    14  	"k8s.io/client-go/tools/leaderelection"
    15  	"k8s.io/client-go/tools/leaderelection/resourcelock"
    16  )
    17  
    18  const hostnameEnvVar = "HOSTNAME"
    19  
    20  // ElectionConfig configuration for k8s leader election with lease
    21  type ElectionConfig struct {
    22  	LeaseLockName      string        `envconfig:"APP_ELECTION_LEASE_LOCK_NAME"`
    23  	LeaseLockNamespace string        `envconfig:"APP_ELECTION_LEASE_LOCK_NAMESPACE"`
    24  	LeaseDuration      time.Duration `envconfig:"optional,default=60s,APP_ELECTION_LEASE_DURATION"`
    25  	RenewDeadline      time.Duration `envconfig:"optional,default=15s,APP_ELECTION_RENEW_DEADLINE"`
    26  	RetryPeriod        time.Duration `envconfig:"optional,default=5s,APP_ELECTION_RETRY_PERIOD"`
    27  	ElectionEnabled    bool          `envconfig:"optional,default=true,APP_ELECTION_ENABLED"`
    28  	ClientConfig       kubernetes.Config
    29  }
    30  
    31  // CronJob represents a job that executes Fn on every SchedulePeriod.
    32  type CronJob struct {
    33  	Name           string
    34  	Fn             func(ctx context.Context)
    35  	SchedulePeriod time.Duration
    36  }
    37  
    38  type cronJobRunner struct {
    39  	CronJob CronJob
    40  	stop    context.CancelFunc
    41  }
    42  
    43  func (r *cronJobRunner) Start(ctx context.Context) {
    44  	newCtx, stop := context.WithCancel(ctx)
    45  	r.stop = stop
    46  	defer r.Stop()
    47  
    48  	for {
    49  		start := time.Now()
    50  		log.C(ctx).Infof("Starting CronJob %s execution", r.CronJob.Name)
    51  		r.CronJob.Fn(newCtx)
    52  		if newCtx.Err() != nil {
    53  			log.C(ctx).Infof("CronJob %s stopped due to context done", r.CronJob.Name)
    54  			return
    55  		}
    56  		jobTime := time.Since(start)
    57  		log.C(ctx).Infof("CronJob %s executed for %s", r.CronJob.Name, jobTime.String())
    58  		if jobTime < r.CronJob.SchedulePeriod {
    59  			waitPeriod := r.CronJob.SchedulePeriod - jobTime
    60  			log.C(ctx).Infof("Scheduling CronJob %s to run after %s", r.CronJob.Name, waitPeriod.String())
    61  
    62  			select {
    63  			case <-newCtx.Done():
    64  				log.C(ctx).Infof("Context of CronJob %s is done. Exiting CronJob loop...", r.CronJob.Name)
    65  				return
    66  			case <-time.After(waitPeriod):
    67  				log.C(ctx).Infof("Waited %s to run next iteration of CronJob %s",
    68  					waitPeriod.String(), r.CronJob.Name)
    69  			}
    70  		}
    71  	}
    72  }
    73  
    74  func (r *cronJobRunner) Stop() {
    75  	if r.stop != nil {
    76  		r.stop()
    77  	}
    78  }
    79  
    80  func runLeaderLeaseLoop(ctx context.Context, electionConfig ElectionConfig, job CronJob) error {
    81  	k8sConfig := electionConfig.ClientConfig
    82  	client, err := kubernetes.NewKubernetesClientSet(
    83  		ctx, k8sConfig.PollInterval, k8sConfig.PollTimeout, k8sConfig.Timeout)
    84  	if err != nil {
    85  		return err
    86  	}
    87  	electionID := os.Getenv(hostnameEnvVar)
    88  	if electionID == "" {
    89  		return fmt.Errorf("not running in k8s pod. Env variable %s not set", hostnameEnvVar)
    90  	}
    91  
    92  	runner := cronJobRunner{
    93  		CronJob: job,
    94  	}
    95  
    96  	lock := &resourcelock.LeaseLock{
    97  		LeaseMeta: metav1.ObjectMeta{
    98  			Name:      electionConfig.LeaseLockName,
    99  			Namespace: electionConfig.LeaseLockNamespace,
   100  		},
   101  		Client: client.CoordinationV1(),
   102  		LockConfig: resourcelock.ResourceLockConfig{
   103  			Identity: electionID,
   104  		},
   105  	}
   106  
   107  	leaderElectionConfig := leaderelection.LeaderElectionConfig{
   108  		Lock:            lock,
   109  		ReleaseOnCancel: true,
   110  		LeaseDuration:   electionConfig.LeaseDuration,
   111  		RenewDeadline:   electionConfig.RenewDeadline,
   112  		RetryPeriod:     electionConfig.RetryPeriod,
   113  		Callbacks: leaderelection.LeaderCallbacks{
   114  			OnStartedLeading: func(ctx context.Context) {
   115  				log.C(ctx).Infof("Starting CronJob executor on %s", electionID)
   116  				runner.Start(ctx)
   117  				log.C(ctx).Infof("CronJob executor on %s exited", electionID)
   118  			},
   119  			OnStoppedLeading: func() {
   120  				log.C(ctx).Errorf("Instance %s is no longer leader. Stopping CronJob executor", electionID)
   121  				runner.Stop()
   122  			},
   123  			OnNewLeader: func(identity string) {
   124  				log.C(ctx).Debugf("Instance %s elected as leader", identity)
   125  			},
   126  		},
   127  	}
   128  
   129  	leaderElection, err := leaderelection.NewLeaderElector(leaderElectionConfig)
   130  	if err != nil {
   131  		return err
   132  	}
   133  
   134  	leaderElection.Run(ctx)
   135  	return nil
   136  }
   137  
   138  func runCronJobWithElection(ctx context.Context, cfg ElectionConfig, job CronJob) error {
   139  	for {
   140  		if err := runLeaderLeaseLoop(ctx, cfg, job); err != nil {
   141  			return err
   142  		}
   143  		select {
   144  		case <-ctx.Done():
   145  			log.C(ctx).Info("Leader lease loop context is done, exiting leader lease loop...")
   146  			return nil
   147  		default:
   148  			log.C(ctx).Error("Leader lease loop ended, re-running...")
   149  		}
   150  	}
   151  }
   152  
   153  // RunCronJob runs a CronJob and blocks.
   154  // If cfg.LeaseEnabled is true then only one pod (if application is scaled) will run the cron job.
   155  // This is done using leader election from k8s with leases.
   156  // Returns error in case of bad configuration or bad connection to k8s cluster
   157  func RunCronJob(ctx context.Context, cfg ElectionConfig, job CronJob) error {
   158  	if cfg.ElectionEnabled {
   159  		return runCronJobWithElection(ctx, cfg, job)
   160  	}
   161  
   162  	runner := cronJobRunner{
   163  		CronJob: job,
   164  	}
   165  	runner.Start(ctx)
   166  	return nil
   167  }