github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/dataprotection/restore_controller.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package dataprotection
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"reflect"
    26  	"time"
    27  
    28  	batchv1 "k8s.io/api/batch/v1"
    29  	corev1 "k8s.io/api/core/v1"
    30  	"k8s.io/apimachinery/pkg/api/meta"
    31  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  	"k8s.io/apimachinery/pkg/runtime"
    33  	"k8s.io/client-go/tools/record"
    34  	"k8s.io/klog/v2"
    35  	ctrl "sigs.k8s.io/controller-runtime"
    36  	"sigs.k8s.io/controller-runtime/pkg/client"
    37  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    38  	"sigs.k8s.io/controller-runtime/pkg/log"
    39  
    40  	dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1"
    41  	"github.com/1aal/kubeblocks/pkg/constant"
    42  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    43  	dprestore "github.com/1aal/kubeblocks/pkg/dataprotection/restore"
    44  	dptypes "github.com/1aal/kubeblocks/pkg/dataprotection/types"
    45  )
    46  
    47  // RestoreReconciler reconciles a Restore object
    48  type RestoreReconciler struct {
    49  	client.Client
    50  	Scheme   *runtime.Scheme
    51  	Recorder record.EventRecorder
    52  }
    53  
    54  // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=restores,verbs=get;list;watch;create;update;patch;delete
    55  // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=restores/status,verbs=get;update;patch
    56  // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=restores/finalizers,verbs=update
    57  // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
    58  
    59  // Reconcile is part of the main kubernetes reconciliation loop which aims to
    60  // move the current state of the cluster closer to the desired state.
    61  //
    62  // For more details, check Reconcile and its Result here:
    63  // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile
    64  func (r *RestoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
    65  	reqCtx := intctrlutil.RequestCtx{
    66  		Ctx:      ctx,
    67  		Req:      req,
    68  		Log:      log.FromContext(ctx).WithValues("backup", req.NamespacedName),
    69  		Recorder: r.Recorder,
    70  	}
    71  
    72  	// Get restore CR
    73  	restore := &dpv1alpha1.Restore{}
    74  	if err := r.Client.Get(reqCtx.Ctx, reqCtx.Req.NamespacedName, restore); err != nil {
    75  		return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")
    76  	}
    77  
    78  	// handle finalizer
    79  	res, err := intctrlutil.HandleCRDeletion(reqCtx, r, restore, dptypes.DataProtectionFinalizerName, func() (*ctrl.Result, error) {
    80  		return nil, r.deleteExternalResources(reqCtx, restore)
    81  	})
    82  	if res != nil {
    83  		return *res, err
    84  	}
    85  
    86  	switch restore.Status.Phase {
    87  	case "":
    88  		return r.newAction(reqCtx, restore)
    89  	case dpv1alpha1.RestorePhaseRunning:
    90  		return r.inProgressAction(reqCtx, restore)
    91  	}
    92  	return intctrlutil.Reconciled()
    93  }
    94  
    95  // SetupWithManager sets up the controller with the Manager.
    96  func (r *RestoreReconciler) SetupWithManager(mgr ctrl.Manager) error {
    97  	return ctrl.NewControllerManagedBy(mgr).
    98  		For(&dpv1alpha1.Restore{}).
    99  		Owns(&batchv1.Job{}).
   100  		Complete(r)
   101  }
   102  
   103  func (r *RestoreReconciler) deleteExternalResources(reqCtx intctrlutil.RequestCtx, restore *dpv1alpha1.Restore) error {
   104  	jobs := &batchv1.JobList{}
   105  	if err := r.Client.List(reqCtx.Ctx, jobs,
   106  		client.InNamespace(restore.Namespace),
   107  		client.MatchingLabels(dprestore.BuildRestoreLabels(restore.Name))); err != nil {
   108  		return client.IgnoreNotFound(err)
   109  	}
   110  	for i := range jobs.Items {
   111  		job := &jobs.Items[i]
   112  		if controllerutil.ContainsFinalizer(job, dptypes.DataProtectionFinalizerName) {
   113  			patch := client.MergeFrom(job.DeepCopy())
   114  			controllerutil.RemoveFinalizer(job, dptypes.DataProtectionFinalizerName)
   115  			if err := r.Patch(reqCtx.Ctx, job, patch); err != nil {
   116  				return err
   117  			}
   118  		}
   119  	}
   120  	return nil
   121  }
   122  
   123  func (r *RestoreReconciler) newAction(reqCtx intctrlutil.RequestCtx, restore *dpv1alpha1.Restore) (ctrl.Result, error) {
   124  	oldRestore := restore.DeepCopy()
   125  	patch := client.MergeFrom(oldRestore)
   126  	// patch metaObject
   127  	if restore.Labels == nil {
   128  		restore.Labels = map[string]string{}
   129  	}
   130  	restore.Labels[constant.AppManagedByLabelKey] = constant.AppName
   131  	if !reflect.DeepEqual(restore.ObjectMeta, oldRestore.ObjectMeta) {
   132  		if err := r.Client.Patch(reqCtx.Ctx, restore, patch); err != nil {
   133  			return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")
   134  		}
   135  		return intctrlutil.Reconciled()
   136  	}
   137  	if restore.Spec.PrepareDataConfig != nil && restore.Spec.PrepareDataConfig.DataSourceRef != nil {
   138  		restore.Status.Phase = dpv1alpha1.RestorePhaseAsDataSource
   139  	} else {
   140  		// patch status
   141  		restore.Status.StartTimestamp = &metav1.Time{Time: time.Now()}
   142  		restore.Status.Phase = dpv1alpha1.RestorePhaseRunning
   143  		r.Recorder.Event(restore, corev1.EventTypeNormal, dprestore.ReasonRestoreStarting, "start to restore")
   144  	}
   145  	if err := r.Client.Status().Patch(reqCtx.Ctx, restore, patch); err != nil {
   146  		return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")
   147  	}
   148  	return intctrlutil.Reconciled()
   149  }
   150  
   151  func (r *RestoreReconciler) inProgressAction(reqCtx intctrlutil.RequestCtx, restore *dpv1alpha1.Restore) (ctrl.Result, error) {
   152  	restoreMgr := dprestore.NewRestoreManager(restore, r.Recorder, r.Scheme)
   153  	// handle restore actions
   154  	err := r.handleRestoreActions(reqCtx, restoreMgr)
   155  	if intctrlutil.IsTargetError(err, intctrlutil.ErrorTypeFatal) {
   156  		// set restore phase to failed if the error is fatal.
   157  		restoreMgr.Restore.Status.Phase = dpv1alpha1.RestorePhaseFailed
   158  		restoreMgr.Restore.Status.CompletionTimestamp = &metav1.Time{Time: time.Now()}
   159  		restoreMgr.Restore.Status.Duration = dprestore.GetRestoreDuration(restoreMgr.Restore.Status)
   160  		r.Recorder.Event(restore, corev1.EventTypeWarning, dprestore.ReasonRestoreFailed, err.Error())
   161  		err = nil
   162  	}
   163  	// patch restore status if changes occur
   164  	if !reflect.DeepEqual(restoreMgr.OriginalRestore.Status, restoreMgr.Restore.Status) {
   165  		err = r.Client.Status().Patch(reqCtx.Ctx, restoreMgr.Restore, client.MergeFrom(restoreMgr.OriginalRestore))
   166  	}
   167  	if err != nil {
   168  		r.Recorder.Event(restore, corev1.EventTypeWarning, corev1.EventTypeWarning, err.Error())
   169  		return intctrlutil.RequeueWithError(err, reqCtx.Log, "")
   170  	}
   171  	return intctrlutil.Reconciled()
   172  }
   173  
   174  func (r *RestoreReconciler) handleRestoreActions(reqCtx intctrlutil.RequestCtx, restoreMgr *dprestore.RestoreManager) error {
   175  	// 1. validate if the restore.spec is valid and build restore manager.
   176  	if err := r.validateAndBuildMGR(reqCtx, restoreMgr); err != nil {
   177  		return err
   178  	}
   179  
   180  	// 2. handle the prepareData stage.
   181  	isCompleted, err := r.prepareData(reqCtx, restoreMgr)
   182  	if err != nil {
   183  		return err
   184  	}
   185  	// if prepareData is not completed, return
   186  	if !isCompleted {
   187  		return nil
   188  	}
   189  	// 3. handle the postReady stage.
   190  	isCompleted, err = r.postReady(reqCtx, restoreMgr)
   191  	if err != nil {
   192  		return err
   193  	}
   194  	if isCompleted {
   195  		restoreMgr.Restore.Status.Phase = dpv1alpha1.RestorePhaseCompleted
   196  		restoreMgr.Restore.Status.CompletionTimestamp = &metav1.Time{Time: time.Now()}
   197  		restoreMgr.Restore.Status.Duration = dprestore.GetRestoreDuration(restoreMgr.Restore.Status)
   198  		r.Recorder.Event(restoreMgr.Restore, corev1.EventTypeNormal, dprestore.ReasonRestoreCompleted, "restore completed.")
   199  	}
   200  	return nil
   201  }
   202  
   203  // validateAndBuildMGR validates the spec is valid to restore. if ok, build a manager for restoring.
   204  func (r *RestoreReconciler) validateAndBuildMGR(reqCtx intctrlutil.RequestCtx, restoreMgr *dprestore.RestoreManager) (err error) {
   205  	defer func() {
   206  		if err == nil {
   207  			dprestore.SetRestoreValidationCondition(restoreMgr.Restore, dprestore.ReasonValidateSuccessfully, "validate restore spec successfully")
   208  		} else if intctrlutil.IsTargetError(err, intctrlutil.ErrorTypeFatal) {
   209  			dprestore.SetRestoreValidationCondition(restoreMgr.Restore, dprestore.ReasonValidateFailed, err.Error())
   210  			r.Recorder.Event(restoreMgr.Restore, corev1.EventTypeWarning, dprestore.ReasonValidateFailed, err.Error())
   211  		}
   212  	}()
   213  
   214  	err = dprestore.ValidateAndInitRestoreMGR(reqCtx, r.Client, r.Recorder, restoreMgr)
   215  	return err
   216  }
   217  
   218  // prepareData handles the prepareData stage of the backups.
   219  func (r *RestoreReconciler) prepareData(reqCtx intctrlutil.RequestCtx, restoreMgr *dprestore.RestoreManager) (bool, error) {
   220  	if len(restoreMgr.PrepareDataBackupSets) == 0 {
   221  		return true, nil
   222  	}
   223  	prepareDataConfig := restoreMgr.Restore.Spec.PrepareDataConfig
   224  	if prepareDataConfig == nil || (prepareDataConfig.RestoreVolumeClaimsTemplate == nil && len(prepareDataConfig.RestoreVolumeClaims) == 0) {
   225  		return true, nil
   226  	}
   227  	if meta.IsStatusConditionTrue(restoreMgr.Restore.Status.Conditions, dprestore.ConditionTypeRestorePreparedData) {
   228  		return true, nil
   229  	}
   230  	var (
   231  		err         error
   232  		isCompleted bool
   233  	)
   234  	defer func() {
   235  		r.handleRestoreStageError(restoreMgr.Restore, dpv1alpha1.PrepareData, err)
   236  	}()
   237  	// set processing prepare data condition
   238  	dprestore.SetRestoreStageCondition(restoreMgr.Restore, dpv1alpha1.PrepareData, dprestore.ReasonProcessing, "processing prepareData stage.")
   239  	for i, v := range restoreMgr.PrepareDataBackupSets {
   240  		isCompleted, err = r.handleBackupActionSet(reqCtx, restoreMgr, v, dpv1alpha1.PrepareData, i)
   241  		if err != nil {
   242  			return false, err
   243  		}
   244  		// waiting for restore jobs finished.
   245  		if !isCompleted {
   246  			return false, nil
   247  		}
   248  	}
   249  	// set prepare data successfully condition
   250  	dprestore.SetRestoreStageCondition(restoreMgr.Restore, dpv1alpha1.PrepareData, dprestore.ReasonSucceed, "prepare data successfully")
   251  	return true, nil
   252  }
   253  
   254  func (r *RestoreReconciler) postReady(reqCtx intctrlutil.RequestCtx, restoreMgr *dprestore.RestoreManager) (bool, error) {
   255  	readyConfig := restoreMgr.Restore.Spec.ReadyConfig
   256  	if len(restoreMgr.PostReadyBackupSets) == 0 || readyConfig == nil {
   257  		return true, nil
   258  	}
   259  	if meta.IsStatusConditionTrue(restoreMgr.Restore.Status.Conditions, dprestore.ConditionTypeRestorePostReady) {
   260  		return true, nil
   261  	}
   262  	dprestore.SetRestoreStageCondition(restoreMgr.Restore, dpv1alpha1.PostReady, dprestore.ReasonProcessing, "processing postReady stage")
   263  	var (
   264  		err         error
   265  		isCompleted bool
   266  	)
   267  	defer func() {
   268  		r.handleRestoreStageError(restoreMgr.Restore, dpv1alpha1.PrepareData, err)
   269  	}()
   270  	if readyConfig.ReadinessProbe != nil && !meta.IsStatusConditionTrue(restoreMgr.Restore.Status.Conditions, dprestore.ConditionTypeReadinessProbe) {
   271  		// TODO: check readiness probe, use a job and kubectl exec?
   272  		_ = klog.TODO()
   273  	}
   274  	for _, v := range restoreMgr.PostReadyBackupSets {
   275  		// handle postReady actions
   276  		for i := range v.ActionSet.Spec.Restore.PostReady {
   277  			isCompleted, err = r.handleBackupActionSet(reqCtx, restoreMgr, v, dpv1alpha1.PostReady, i)
   278  			if err != nil {
   279  				return false, err
   280  			}
   281  			// waiting for restore jobs finished.
   282  			if !isCompleted {
   283  				return false, nil
   284  			}
   285  		}
   286  	}
   287  	dprestore.SetRestoreStageCondition(restoreMgr.Restore, dpv1alpha1.PostReady, dprestore.ReasonSucceed, "processing postReady stage successfully")
   288  	return true, nil
   289  }
   290  
   291  func (r *RestoreReconciler) handleBackupActionSet(reqCtx intctrlutil.RequestCtx,
   292  	restoreMgr *dprestore.RestoreManager,
   293  	backupSet dprestore.BackupActionSet,
   294  	stage dpv1alpha1.RestoreStage,
   295  	step int) (bool, error) {
   296  	handleFailed := func(restore *dpv1alpha1.Restore, backupName string) error {
   297  		errorMsg := fmt.Sprintf(`restore failed for backup "%s", more information can be found in status.actions.%s`, backupName, stage)
   298  		dprestore.SetRestoreStageCondition(restore, stage, dprestore.ReasonFailed, errorMsg)
   299  		return intctrlutil.NewFatalError(errorMsg)
   300  	}
   301  
   302  	checkIsCompleted := func(allActionsFinished, existFailedAction bool) (bool, error) {
   303  		if !allActionsFinished {
   304  			return false, nil
   305  		}
   306  		if existFailedAction {
   307  			return true, handleFailed(restoreMgr.Restore, backupSet.Backup.Name)
   308  		}
   309  		return true, nil
   310  	}
   311  
   312  	actionName := fmt.Sprintf("%s-%d", stage, step)
   313  	// 1. check if the restore actions are completed from status.actions firstly.
   314  	allActionsFinished, existFailedAction := restoreMgr.AnalysisRestoreActionsWithBackup(stage, backupSet.Backup.Name, actionName)
   315  	isCompleted, err := checkIsCompleted(allActionsFinished, existFailedAction)
   316  	if isCompleted || err != nil {
   317  		return isCompleted, err
   318  	}
   319  
   320  	var jobs []*batchv1.Job
   321  	switch stage {
   322  	case dpv1alpha1.PrepareData:
   323  		if backupSet.UseVolumeSnapshot {
   324  			if err = restoreMgr.RestorePVCFromSnapshot(reqCtx, r.Client, backupSet); err != nil {
   325  				return false, nil
   326  			}
   327  		}
   328  		jobs, err = restoreMgr.BuildPrepareDataJobs(reqCtx, r.Client, backupSet, actionName)
   329  	case dpv1alpha1.PostReady:
   330  		// 2. build jobs for postReady action
   331  		jobs, err = restoreMgr.BuildPostReadyActionJobs(reqCtx, r.Client, backupSet, step)
   332  	}
   333  	if err != nil {
   334  		return false, err
   335  	}
   336  	if len(jobs) == 0 {
   337  		return true, nil
   338  	}
   339  	// 3. create jobs
   340  	jobs, err = restoreMgr.CreateJobsIfNotExist(reqCtx, r.Client, restoreMgr.Restore, jobs)
   341  	if err != nil {
   342  		return false, err
   343  	}
   344  
   345  	// 4. check if jobs are finished.
   346  	allActionsFinished, existFailedAction = restoreMgr.CheckJobsDone(stage, actionName, backupSet, jobs)
   347  	if stage == dpv1alpha1.PrepareData {
   348  		// recalculation whether all actions have been completed.
   349  		restoreMgr.Recalculation(backupSet.Backup.Name, actionName, &allActionsFinished, &existFailedAction)
   350  	}
   351  	return checkIsCompleted(allActionsFinished, existFailedAction)
   352  }
   353  
   354  func (r *RestoreReconciler) handleRestoreStageError(restore *dpv1alpha1.Restore, stage dpv1alpha1.RestoreStage, err error) {
   355  	if intctrlutil.IsTargetError(err, intctrlutil.ErrorTypeFatal) {
   356  		condition := meta.FindStatusCondition(restore.Status.Conditions, dprestore.ConditionTypeRestorePreparedData)
   357  		if condition != nil && condition.Reason != dprestore.ReasonFailed {
   358  			dprestore.SetRestoreStageCondition(restore, stage, dprestore.ReasonFailed, err.Error())
   359  		}
   360  	}
   361  }