github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/pkg/dataprotection/backup/scheduler.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package backup
    21  
    22  import (
    23  	"fmt"
    24  	"sort"
    25  
    26  	batchv1 "k8s.io/api/batch/v1"
    27  	corev1 "k8s.io/api/core/v1"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	k8sruntime "k8s.io/apimachinery/pkg/runtime"
    30  	"k8s.io/apimachinery/pkg/util/json"
    31  	"sigs.k8s.io/controller-runtime/pkg/client"
    32  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    33  
    34  	appsv1alpha1 "github.com/1aal/kubeblocks/apis/apps/v1alpha1"
    35  	dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1"
    36  	"github.com/1aal/kubeblocks/pkg/constant"
    37  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    38  	dperrors "github.com/1aal/kubeblocks/pkg/dataprotection/errors"
    39  	dptypes "github.com/1aal/kubeblocks/pkg/dataprotection/types"
    40  	dputils "github.com/1aal/kubeblocks/pkg/dataprotection/utils"
    41  	"github.com/1aal/kubeblocks/pkg/dataprotection/utils/boolptr"
    42  	viper "github.com/1aal/kubeblocks/pkg/viperx"
    43  )
    44  
    45  type Scheduler struct {
    46  	intctrlutil.RequestCtx
    47  	Client         client.Client
    48  	Scheme         *k8sruntime.Scheme
    49  	BackupSchedule *dpv1alpha1.BackupSchedule
    50  	BackupPolicy   *dpv1alpha1.BackupPolicy
    51  }
    52  
    53  func (s *Scheduler) Schedule() error {
    54  	if err := s.validate(); err != nil {
    55  		return err
    56  	}
    57  
    58  	for i := range s.BackupSchedule.Spec.Schedules {
    59  		if err := s.handleSchedulePolicy(i); err != nil {
    60  			return err
    61  		}
    62  	}
    63  	return nil
    64  }
    65  
    66  // validate validates the backup schedule.
    67  func (s *Scheduler) validate() error {
    68  	methodInBackupPolicy := func(name string) bool {
    69  		for _, method := range s.BackupPolicy.Spec.BackupMethods {
    70  			if method.Name == name {
    71  				return true
    72  			}
    73  		}
    74  		return false
    75  	}
    76  
    77  	for _, sp := range s.BackupSchedule.Spec.Schedules {
    78  		if methodInBackupPolicy(sp.BackupMethod) {
    79  			continue
    80  		}
    81  		// backup method name is not in backup policy
    82  		return fmt.Errorf("backup method %s is not in backup policy %s/%s",
    83  			sp.BackupMethod, s.BackupPolicy.Namespace, s.BackupPolicy.Name)
    84  	}
    85  	return nil
    86  }
    87  
    88  func (s *Scheduler) handleSchedulePolicy(index int) error {
    89  	schedulePolicy := &s.BackupSchedule.Spec.Schedules[index]
    90  	// TODO(ldm): better to remove this dependency in the future
    91  	if err := s.reconfigure(schedulePolicy); err != nil {
    92  		return err
    93  	}
    94  
    95  	// create/delete/patch cronjob workload
    96  	return s.reconcileCronJob(schedulePolicy)
    97  }
    98  
    99  type backupReconfigureRef struct {
   100  	Name    string         `json:"name"`
   101  	Key     string         `json:"key"`
   102  	Enable  parameterPairs `json:"enable,omitempty"`
   103  	Disable parameterPairs `json:"disable,omitempty"`
   104  }
   105  
   106  type parameterPairs map[string][]appsv1alpha1.ParameterPair
   107  
   108  func (s *Scheduler) reconfigure(schedulePolicy *dpv1alpha1.SchedulePolicy) error {
   109  	reCfgRef := s.BackupSchedule.Annotations[dptypes.ReconfigureRefAnnotationKey]
   110  	if reCfgRef == "" {
   111  		return nil
   112  	}
   113  	configRef := backupReconfigureRef{}
   114  	if err := json.Unmarshal([]byte(reCfgRef), &configRef); err != nil {
   115  		return err
   116  	}
   117  
   118  	enable := boolptr.IsSetToTrue(schedulePolicy.Enabled)
   119  	if s.BackupSchedule.Annotations[constant.LastAppliedConfigAnnotationKey] == "" && !enable {
   120  		// disable in the first policy created, no need reconfigure because default configs had been set.
   121  		return nil
   122  	}
   123  	configParameters := configRef.Disable
   124  	if enable {
   125  		configParameters = configRef.Enable
   126  	}
   127  	if configParameters == nil {
   128  		return nil
   129  	}
   130  	parameters := configParameters[schedulePolicy.BackupMethod]
   131  	if len(parameters) == 0 {
   132  		// skip reconfigure if not found parameters.
   133  		return nil
   134  	}
   135  	updateParameterPairsBytes, _ := json.Marshal(parameters)
   136  	updateParameterPairs := string(updateParameterPairsBytes)
   137  	if updateParameterPairs == s.BackupSchedule.Annotations[constant.LastAppliedConfigAnnotationKey] {
   138  		// reconcile the config job if finished
   139  		return s.reconcileReconfigure()
   140  	}
   141  
   142  	targetPodSelector := s.BackupPolicy.Spec.Target.PodSelector
   143  	ops := appsv1alpha1.OpsRequest{
   144  		ObjectMeta: metav1.ObjectMeta{
   145  			GenerateName: s.BackupSchedule.Name + "-",
   146  			Namespace:    s.BackupSchedule.Namespace,
   147  			Labels: map[string]string{
   148  				dptypes.BackupScheduleLabelKey: s.BackupSchedule.Name,
   149  			},
   150  		},
   151  		Spec: appsv1alpha1.OpsRequestSpec{
   152  			Type:       appsv1alpha1.ReconfiguringType,
   153  			ClusterRef: targetPodSelector.MatchLabels[constant.AppInstanceLabelKey],
   154  			Reconfigure: &appsv1alpha1.Reconfigure{
   155  				ComponentOps: appsv1alpha1.ComponentOps{
   156  					ComponentName: targetPodSelector.MatchLabels[constant.KBAppComponentLabelKey],
   157  				},
   158  				Configurations: []appsv1alpha1.ConfigurationItem{
   159  					{
   160  						Name: configRef.Name,
   161  						Keys: []appsv1alpha1.ParameterConfig{
   162  							{
   163  								Key:        configRef.Key,
   164  								Parameters: parameters,
   165  							},
   166  						},
   167  					},
   168  				},
   169  			},
   170  		},
   171  	}
   172  	if err := s.Client.Create(s.Ctx, &ops); err != nil {
   173  		return err
   174  	}
   175  	s.Recorder.Eventf(s.BackupSchedule, corev1.EventTypeNormal, "Reconfiguring", "update config %s", updateParameterPairs)
   176  	patch := client.MergeFrom(s.BackupSchedule.DeepCopy())
   177  	if s.BackupSchedule.Annotations == nil {
   178  		s.BackupSchedule.Annotations = map[string]string{}
   179  	}
   180  	s.BackupSchedule.Annotations[constant.LastAppliedConfigAnnotationKey] = updateParameterPairs
   181  	if err := s.Client.Patch(s.Ctx, s.BackupSchedule, patch); err != nil {
   182  		return err
   183  	}
   184  	return intctrlutil.NewErrorf(intctrlutil.ErrorTypeRequeue, "requeue to waiting for ops %s finished.", ops.Name)
   185  }
   186  
   187  func (s *Scheduler) reconcileReconfigure() error {
   188  	opsList := appsv1alpha1.OpsRequestList{}
   189  	if err := s.Client.List(s.Ctx, &opsList,
   190  		client.InNamespace(s.BackupSchedule.Namespace),
   191  		client.MatchingLabels{dptypes.BackupScheduleLabelKey: s.BackupPolicy.Name}); err != nil {
   192  		return err
   193  	}
   194  	if len(opsList.Items) > 0 {
   195  		sort.Slice(opsList.Items, func(i, j int) bool {
   196  			return opsList.Items[j].CreationTimestamp.Before(&opsList.Items[i].CreationTimestamp)
   197  		})
   198  		latestOps := opsList.Items[0]
   199  		if latestOps.Status.Phase == appsv1alpha1.OpsFailedPhase {
   200  			return intctrlutil.NewErrorf(dperrors.ErrorTypeReconfigureFailed, "ops failed %s", latestOps.Name)
   201  		} else if latestOps.Status.Phase != appsv1alpha1.OpsSucceedPhase {
   202  			return intctrlutil.NewErrorf(intctrlutil.ErrorTypeRequeue, "waiting for ops %s finished.", latestOps.Name)
   203  		}
   204  	}
   205  	return nil
   206  }
   207  
   208  // buildCronJob builds cronjob from backup schedule.
   209  func (s *Scheduler) buildCronJob(
   210  	schedulePolicy *dpv1alpha1.SchedulePolicy,
   211  	cronJobName string) (*batchv1.CronJob, error) {
   212  	var (
   213  		successfulJobsHistoryLimit int32 = 0
   214  		failedJobsHistoryLimit     int32 = 1
   215  	)
   216  
   217  	if cronJobName == "" {
   218  		cronJobName = GenerateCRNameByBackupSchedule(s.BackupSchedule, schedulePolicy.BackupMethod)
   219  	}
   220  
   221  	podSpec, err := s.buildPodSpec(schedulePolicy)
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  
   226  	cronjob := &batchv1.CronJob{
   227  		ObjectMeta: metav1.ObjectMeta{
   228  			Name:      cronJobName,
   229  			Namespace: s.BackupSchedule.Namespace,
   230  			Labels: map[string]string{
   231  				constant.AppManagedByLabelKey: constant.AppName,
   232  			},
   233  		},
   234  		Spec: batchv1.CronJobSpec{
   235  			Schedule:                   schedulePolicy.CronExpression,
   236  			SuccessfulJobsHistoryLimit: &successfulJobsHistoryLimit,
   237  			FailedJobsHistoryLimit:     &failedJobsHistoryLimit,
   238  			ConcurrencyPolicy:          batchv1.ForbidConcurrent,
   239  			JobTemplate: batchv1.JobTemplateSpec{
   240  				Spec: batchv1.JobSpec{
   241  					BackoffLimit: s.BackupPolicy.Spec.BackoffLimit,
   242  					Template: corev1.PodTemplateSpec{
   243  						Spec: *podSpec,
   244  					},
   245  				},
   246  			},
   247  		},
   248  	}
   249  
   250  	controllerutil.AddFinalizer(cronjob, dptypes.DataProtectionFinalizerName)
   251  	// set labels
   252  	for k, v := range s.BackupSchedule.Labels {
   253  		if cronjob.Labels == nil {
   254  			cronjob.SetLabels(map[string]string{})
   255  		}
   256  		cronjob.Labels[k] = v
   257  	}
   258  	cronjob.Labels[dptypes.BackupScheduleLabelKey] = s.BackupSchedule.Name
   259  	cronjob.Labels[dptypes.BackupMethodLabelKey] = schedulePolicy.BackupMethod
   260  	return cronjob, nil
   261  }
   262  
   263  func (s *Scheduler) buildPodSpec(schedulePolicy *dpv1alpha1.SchedulePolicy) (*corev1.PodSpec, error) {
   264  	// TODO(ldm): add backup deletionPolicy
   265  	createBackupCmd := fmt.Sprintf(`
   266  kubectl create -f - <<EOF
   267  apiVersion: dataprotection.kubeblocks.io/v1alpha1
   268  kind: Backup
   269  metadata:
   270    labels:
   271      dataprotection.kubeblocks.io/autobackup: "true"
   272      dataprotection.kubeblocks.io/backup-schedule: "%s"
   273    name: %s
   274    namespace: %s
   275  spec:
   276    backupPolicyName: %s
   277    backupMethod: %s
   278    retentionPeriod: %s
   279  EOF
   280  `, s.BackupSchedule.Name, s.generateBackupName(), s.BackupSchedule.Namespace,
   281  		s.BackupPolicy.Name, schedulePolicy.BackupMethod,
   282  		schedulePolicy.RetentionPeriod)
   283  
   284  	container := corev1.Container{
   285  		Name:            "backup-schedule",
   286  		Image:           viper.GetString(constant.KBToolsImage),
   287  		ImagePullPolicy: corev1.PullPolicy(viper.GetString(constant.KBImagePullPolicy)),
   288  		Command:         []string{"sh", "-c"},
   289  		Args:            []string{createBackupCmd},
   290  	}
   291  	intctrlutil.InjectZeroResourcesLimitsIfEmpty(&container)
   292  
   293  	podSpec := &corev1.PodSpec{
   294  		ServiceAccountName: s.BackupPolicy.Spec.Target.ServiceAccountName,
   295  		RestartPolicy:      corev1.RestartPolicyNever,
   296  		Containers:         []corev1.Container{container},
   297  	}
   298  	if err := dputils.AddTolerations(podSpec); err != nil {
   299  		return nil, err
   300  	}
   301  	return podSpec, nil
   302  }
   303  
   304  // reconcileCronJob will create/delete/patch cronjob according to cronExpression and policy changes.
   305  func (s *Scheduler) reconcileCronJob(schedulePolicy *dpv1alpha1.SchedulePolicy) error {
   306  	// get cronjob from labels
   307  	cronJob := &batchv1.CronJob{}
   308  	cronJobList := &batchv1.CronJobList{}
   309  	if err := s.Client.List(s.Ctx, cronJobList,
   310  		client.InNamespace(s.BackupSchedule.Namespace),
   311  		client.MatchingLabels{
   312  			dptypes.BackupScheduleLabelKey: s.BackupSchedule.Name,
   313  			dptypes.BackupMethodLabelKey:   schedulePolicy.BackupMethod,
   314  			constant.AppManagedByLabelKey:  constant.AppName,
   315  		},
   316  	); err != nil {
   317  		return err
   318  	} else if len(cronJobList.Items) > 0 {
   319  		cronJob = &cronJobList.Items[0]
   320  	}
   321  
   322  	// schedule is disabled, delete cronjob if exists
   323  	if !boolptr.IsSetToTrue(schedulePolicy.Enabled) {
   324  		if len(cronJob.Name) != 0 {
   325  			// delete the old cronjob.
   326  			if err := dputils.RemoveDataProtectionFinalizer(s.Ctx, s.Client, cronJob); err != nil {
   327  				return err
   328  			}
   329  			return s.Client.Delete(s.Ctx, cronJob)
   330  		}
   331  		// if no cron expression, return
   332  		return nil
   333  	}
   334  
   335  	cronjobProto, err := s.buildCronJob(schedulePolicy, cronJob.Name)
   336  	if err != nil {
   337  		return err
   338  	}
   339  
   340  	if s.BackupSchedule.Spec.StartingDeadlineMinutes != nil {
   341  		startingDeadlineSeconds := *s.BackupSchedule.Spec.StartingDeadlineMinutes * 60
   342  		cronjobProto.Spec.StartingDeadlineSeconds = &startingDeadlineSeconds
   343  	}
   344  
   345  	if len(cronJob.Name) == 0 {
   346  		// if no cronjob, create it.
   347  		return s.Client.Create(s.Ctx, cronjobProto)
   348  	}
   349  
   350  	// sync the cronjob with the current backup policy configuration.
   351  	patch := client.MergeFrom(cronJob.DeepCopy())
   352  	cronJob.Spec.StartingDeadlineSeconds = cronjobProto.Spec.StartingDeadlineSeconds
   353  	cronJob.Spec.JobTemplate.Spec.BackoffLimit = s.BackupPolicy.Spec.BackoffLimit
   354  	cronJob.Spec.JobTemplate.Spec.Template = cronjobProto.Spec.JobTemplate.Spec.Template
   355  	cronJob.Spec.Schedule = schedulePolicy.CronExpression
   356  	return s.Client.Patch(s.Ctx, cronJob, patch)
   357  }
   358  
   359  func (s *Scheduler) generateBackupName() string {
   360  	target := s.BackupPolicy.Spec.Target
   361  
   362  	// if cluster name can be found in target labels, use it as backup name prefix
   363  	backupNamePrefix := target.PodSelector.MatchLabels[constant.AppInstanceLabelKey]
   364  
   365  	// if cluster name can not be found, use backup schedule name as backup name prefix
   366  	if backupNamePrefix == "" {
   367  		backupNamePrefix = s.BackupSchedule.Name
   368  	}
   369  	return backupNamePrefix + "-$(date -u +'%Y%m%d%H%M%S')"
   370  }