github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/dataprotection/volumepopulator_controller.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package dataprotection
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"strings"
    26  
    27  	"golang.org/x/exp/slices"
    28  	batchv1 "k8s.io/api/batch/v1"
    29  	corev1 "k8s.io/api/core/v1"
    30  	storagev1 "k8s.io/api/storage/v1"
    31  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    32  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    33  	"k8s.io/apimachinery/pkg/runtime"
    34  	"k8s.io/apimachinery/pkg/types"
    35  	"k8s.io/client-go/tools/record"
    36  	"k8s.io/component-helpers/storage/volume"
    37  	ctrl "sigs.k8s.io/controller-runtime"
    38  	"sigs.k8s.io/controller-runtime/pkg/client"
    39  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    40  	"sigs.k8s.io/controller-runtime/pkg/log"
    41  
    42  	dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1"
    43  	"github.com/1aal/kubeblocks/pkg/constant"
    44  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    45  	dprestore "github.com/1aal/kubeblocks/pkg/dataprotection/restore"
    46  	dptypes "github.com/1aal/kubeblocks/pkg/dataprotection/types"
    47  	"github.com/1aal/kubeblocks/pkg/dataprotection/utils"
    48  )
    49  
    50  // VolumePopulatorReconciler reconciles a Restore object
    51  type VolumePopulatorReconciler struct {
    52  	client.Client
    53  	Scheme   *runtime.Scheme
    54  	Recorder record.EventRecorder
    55  }
    56  
    57  // +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
    58  // +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims/status,verbs=get;update;patch
    59  // +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims/finalizers,verbs=update
    60  // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
    61  
    62  // Reconcile is part of the main kubernetes reconciliation loop which aims to
    63  // move the current state of the cluster closer to the desired state.
    64  //
    65  // For more details, check Reconcile and its Result here:
    66  // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile
    67  func (r *VolumePopulatorReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
    68  	reqCtx := intctrlutil.RequestCtx{
    69  		Ctx:      ctx,
    70  		Req:      req,
    71  		Log:      log.FromContext(ctx).WithValues("volume-populator", req.NamespacedName),
    72  		Recorder: r.Recorder,
    73  	}
    74  
    75  	// Get pvc
    76  	pvc := &corev1.PersistentVolumeClaim{}
    77  	if err := r.Client.Get(reqCtx.Ctx, reqCtx.Req.NamespacedName, pvc); err != nil {
    78  		return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")
    79  	}
    80  
    81  	if err := r.syncPVC(reqCtx, pvc); err != nil {
    82  		if intctrlutil.IsTargetError(err, intctrlutil.ErrorTypeFatal) {
    83  			r.Recorder.Event(pvc, corev1.EventTypeWarning, reasonVolumePopulateFailed, err.Error())
    84  			if patchErr := r.updatePVCConditions(reqCtx, pvc, reasonPopulatingFailed, err.Error()); patchErr != nil {
    85  				return intctrlutil.RequeueWithError(patchErr, reqCtx.Log, "")
    86  			}
    87  			return intctrlutil.Reconciled()
    88  		}
    89  		return intctrlutil.RequeueWithError(err, reqCtx.Log, "")
    90  	}
    91  	return intctrlutil.Reconciled()
    92  }
    93  
    94  // SetupWithManager sets up the controller with the Manager.
    95  func (r *VolumePopulatorReconciler) SetupWithManager(mgr ctrl.Manager) error {
    96  	return ctrl.NewControllerManagedBy(mgr).
    97  		For(&corev1.PersistentVolumeClaim{}).
    98  		Owns(&batchv1.Job{}).
    99  		Complete(r)
   100  }
   101  
   102  func (r *VolumePopulatorReconciler) matchToPopulate(pvc *corev1.PersistentVolumeClaim) (bool, error) {
   103  	dataSourceRef := pvc.Spec.DataSourceRef
   104  	if dataSourceRef == nil {
   105  		// Ignore PVCs without a datasource
   106  		return false, nil
   107  	}
   108  	apiGroup := ""
   109  	if dataSourceRef.APIGroup != nil {
   110  		apiGroup = *dataSourceRef.APIGroup
   111  	}
   112  	if apiGroup != dptypes.DataprotectionAPIGroup || dataSourceRef.Kind != dptypes.RestoreKind || dataSourceRef.Name == "" {
   113  		// Ignore PVCs that aren't for this populator to handle
   114  		return false, nil
   115  	}
   116  	if dataSourceRef.Namespace != nil && *dataSourceRef.Namespace != pvc.Namespace {
   117  		message := fmt.Sprintf(`custom resource of restore "%s" should be in the same namespace as the persistentVolumeClaim's namespace.`, *dataSourceRef.Namespace)
   118  		return false, intctrlutil.NewFatalError(message)
   119  	}
   120  	return true, nil
   121  }
   122  
   123  func (r *VolumePopulatorReconciler) syncPVC(reqCtx intctrlutil.RequestCtx, pvc *corev1.PersistentVolumeClaim) error {
   124  	matched, err := r.matchToPopulate(pvc)
   125  	if err != nil {
   126  		return err
   127  	}
   128  	if !matched {
   129  		return nil
   130  	}
   131  	// if pvc has not bound pv, populate it.
   132  	if pvc.Spec.VolumeName == "" {
   133  		return r.populate(reqCtx, pvc)
   134  	}
   135  	return r.cleanup(reqCtx, pvc)
   136  }
   137  
   138  func (r *VolumePopulatorReconciler) populate(reqCtx intctrlutil.RequestCtx, pvc *corev1.PersistentVolumeClaim) error {
   139  	wait, nodeName, err := r.waitForPVCSelectedNode(reqCtx, pvc)
   140  	if err != nil || wait {
   141  		return err
   142  	}
   143  	// Make sure the PVC finalizer is present
   144  	if !slices.Contains(pvc.Finalizers, dptypes.DataProtectionFinalizerName) {
   145  		pvcPatch := client.MergeFrom(pvc.DeepCopy())
   146  		controllerutil.AddFinalizer(pvc, dptypes.DataProtectionFinalizerName)
   147  		if err = r.Client.Patch(reqCtx.Ctx, pvc, pvcPatch); err != nil {
   148  			return err
   149  		}
   150  	}
   151  	if err = r.updatePVCConditions(reqCtx, pvc, reasonPopulatingProcessing, "Populator started"); err != nil {
   152  		return err
   153  	}
   154  
   155  	restore, err := r.getRestoreCR(reqCtx, pvc, nodeName)
   156  	if err != nil {
   157  		return err
   158  	}
   159  
   160  	restoreMgr := dprestore.NewRestoreManager(restore, r.Recorder, r.Scheme)
   161  	if err = dprestore.ValidateAndInitRestoreMGR(reqCtx, r.Client, r.Recorder, restoreMgr); err != nil {
   162  		return err
   163  	}
   164  
   165  	var populatePVC *corev1.PersistentVolumeClaim
   166  	for i, v := range restoreMgr.PrepareDataBackupSets {
   167  		if populatePVC == nil {
   168  			populatePVC, err = r.getPopulatePVC(reqCtx, pvc, v,
   169  				restore.Spec.PrepareDataConfig.DataSourceRef.VolumeSource, nodeName)
   170  			if err != nil {
   171  				return err
   172  			}
   173  		}
   174  
   175  		// 1. build populate job
   176  		job, err := restoreMgr.BuildVolumePopulateJob(reqCtx, r.Client, v, populatePVC, i)
   177  		if err != nil {
   178  			return err
   179  		}
   180  		if job == nil {
   181  			continue
   182  		}
   183  
   184  		// 2. create job
   185  		jobs, err := restoreMgr.CreateJobsIfNotExist(reqCtx, r.Client, pvc, []*batchv1.Job{job})
   186  		if err != nil {
   187  			return err
   188  		}
   189  
   190  		// 3. check if jobs are finished.
   191  		isCompleted, _, errMsg := utils.IsJobFinished(jobs[0])
   192  		if !isCompleted {
   193  			return nil
   194  		}
   195  		if errMsg != "" {
   196  			return intctrlutil.NewFatalError(errMsg)
   197  		}
   198  	}
   199  	// 4. if jobs are succeed, rebind the pvc and pv
   200  	if err = r.rebindPVCAndPV(reqCtx, populatePVC, pvc); err != nil {
   201  		return err
   202  	}
   203  	if err = r.updatePVCConditions(reqCtx, pvc, reasonPopulatingSucceed, "Populator finished"); err != nil {
   204  		return err
   205  	}
   206  	return nil
   207  }
   208  
   209  func (r *VolumePopulatorReconciler) cleanup(reqCtx intctrlutil.RequestCtx, pvc *corev1.PersistentVolumeClaim) error {
   210  	if slices.Contains(pvc.Finalizers, dptypes.DataProtectionFinalizerName) {
   211  		pvcPatch := client.MergeFrom(pvc.DeepCopy())
   212  		controllerutil.RemoveFinalizer(pvc, dptypes.DataProtectionFinalizerName)
   213  		if err := r.Client.Patch(reqCtx.Ctx, pvc, pvcPatch); err != nil {
   214  			return err
   215  		}
   216  	}
   217  
   218  	jobs := &batchv1.JobList{}
   219  	if err := r.Client.List(reqCtx.Ctx, jobs,
   220  		client.InNamespace(pvc.Namespace), client.MatchingLabels(map[string]string{
   221  			dprestore.DataProtectionLabelPopulatePVCKey: getPopulatePVCName(pvc.UID),
   222  		})); err != nil {
   223  		return err
   224  	}
   225  
   226  	for i := range jobs.Items {
   227  		job := &jobs.Items[i]
   228  		if controllerutil.ContainsFinalizer(job, dptypes.DataProtectionFinalizerName) {
   229  			patch := client.MergeFrom(job.DeepCopy())
   230  			controllerutil.RemoveFinalizer(job, dptypes.DataProtectionFinalizerName)
   231  			if err := r.Patch(reqCtx.Ctx, job, patch); err != nil {
   232  				return err
   233  			}
   234  		}
   235  		if !job.DeletionTimestamp.IsZero() {
   236  			continue
   237  		}
   238  		if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, job); err != nil {
   239  			return err
   240  		}
   241  	}
   242  
   243  	populatePVC := &corev1.PersistentVolumeClaim{}
   244  	if err := r.Client.Get(reqCtx.Ctx, types.NamespacedName{Name: getPopulatePVCName(pvc.UID),
   245  		Namespace: pvc.Namespace}, populatePVC); err != nil {
   246  		return client.IgnoreNotFound(err)
   247  	}
   248  	return r.Client.Delete(reqCtx.Ctx, populatePVC)
   249  }
   250  
   251  func (r *VolumePopulatorReconciler) checkIntreeStorageClass(pvc *corev1.PersistentVolumeClaim, sc *storagev1.StorageClass) error {
   252  	if !strings.HasPrefix(sc.Provisioner, "kubernetes.io/") {
   253  		// This is not an in-tree StorageClass
   254  		return nil
   255  	}
   256  
   257  	if pvc.Annotations != nil {
   258  		if migrated := pvc.Annotations[volume.AnnMigratedTo]; migrated != "" {
   259  			// The PVC is migrated to CSI
   260  			return nil
   261  		}
   262  	}
   263  	// The SC is in-tree & PVC is not migrated
   264  	return intctrlutil.NewFatalError(fmt.Sprintf("in-tree volume volume plugin %q cannot use volume populator", sc.Provisioner))
   265  }
   266  
   267  func (r *VolumePopulatorReconciler) waitForPVCSelectedNode(reqCtx intctrlutil.RequestCtx, pvc *corev1.PersistentVolumeClaim) (bool, string, error) {
   268  	var nodeName string
   269  	if pvc.Spec.StorageClassName != nil {
   270  		storageClassName := *pvc.Spec.StorageClassName
   271  		storageClass := &storagev1.StorageClass{}
   272  		if err := r.Client.Get(reqCtx.Ctx, types.NamespacedName{Name: storageClassName}, storageClass); err != nil {
   273  			return false, nodeName, err
   274  		}
   275  
   276  		if err := r.checkIntreeStorageClass(pvc, storageClass); err != nil {
   277  			return false, nodeName, err
   278  		}
   279  		if storageClass.VolumeBindingMode != nil && storagev1.VolumeBindingWaitForFirstConsumer == *storageClass.VolumeBindingMode {
   280  			nodeName = pvc.Annotations[annSelectedNode]
   281  			if nodeName == "" {
   282  				// Wait for the PVC to get a node name before continuing
   283  				return true, nodeName, nil
   284  			}
   285  		}
   286  	}
   287  	return false, nodeName, nil
   288  }
   289  
   290  func (r *VolumePopulatorReconciler) getPopulatePVC(reqCtx intctrlutil.RequestCtx,
   291  	pvc *corev1.PersistentVolumeClaim,
   292  	backupSet dprestore.BackupActionSet,
   293  	volumeSource,
   294  	nodeName string) (*corev1.PersistentVolumeClaim, error) {
   295  	populatePVCName := getPopulatePVCName(pvc.UID)
   296  	populatePVC := &corev1.PersistentVolumeClaim{}
   297  	if err := r.Client.Get(reqCtx.Ctx, types.NamespacedName{Name: populatePVCName,
   298  		Namespace: pvc.Namespace}, populatePVC); err != nil {
   299  		if !apierrors.IsNotFound(err) {
   300  			return nil, err
   301  		}
   302  		// create populate pvc
   303  		populatePVC = &corev1.PersistentVolumeClaim{
   304  			ObjectMeta: metav1.ObjectMeta{
   305  				Name:      populatePVCName,
   306  				Namespace: pvc.Namespace,
   307  			},
   308  			Spec: corev1.PersistentVolumeClaimSpec{
   309  				AccessModes:      pvc.Spec.AccessModes,
   310  				Resources:        pvc.Spec.Resources,
   311  				StorageClassName: pvc.Spec.StorageClassName,
   312  				VolumeMode:       pvc.Spec.VolumeMode,
   313  			},
   314  		}
   315  		if nodeName != "" {
   316  			populatePVC.Annotations = map[string]string{
   317  				annSelectedNode: pvc.Annotations[annSelectedNode],
   318  			}
   319  		}
   320  		if backupSet.UseVolumeSnapshot {
   321  			// restore from volume snapshot.
   322  			populatePVC.Spec.DataSourceRef = &corev1.TypedObjectReference{
   323  				Name:     utils.GetBackupVolumeSnapshotName(backupSet.Backup.Name, volumeSource),
   324  				Kind:     constant.VolumeSnapshotKind,
   325  				APIGroup: &dprestore.VolumeSnapshotGroup,
   326  			}
   327  		}
   328  		if err = r.Client.Create(reqCtx.Ctx, populatePVC); err != nil && !apierrors.IsAlreadyExists(err) {
   329  			return nil, err
   330  		}
   331  	}
   332  	return populatePVC, nil
   333  }
   334  
   335  func (r *VolumePopulatorReconciler) getRestoreCR(reqCtx intctrlutil.RequestCtx, pvc *corev1.PersistentVolumeClaim, nodeName string) (*dpv1alpha1.Restore, error) {
   336  	restore := &dpv1alpha1.Restore{}
   337  	if err := r.Client.Get(reqCtx.Ctx, types.NamespacedName{Name: pvc.Spec.DataSourceRef.Name,
   338  		Namespace: pvc.Namespace}, restore); err != nil {
   339  		return nil, err
   340  	}
   341  	if restore.Spec.PrepareDataConfig == nil || restore.Spec.PrepareDataConfig.DataSourceRef == nil {
   342  		return nil, intctrlutil.NewFatalError(fmt.Sprintf(`spec.prepareDataConfig.datasourceRef of restore "%s" can not be empty`, restore.Name))
   343  	}
   344  	restore.Spec.PrepareDataConfig.SchedulingSpec = dpv1alpha1.SchedulingSpec{
   345  		Tolerations: []corev1.Toleration{
   346  			{Operator: corev1.TolerationOpExists},
   347  		},
   348  	}
   349  	if nodeName != "" {
   350  		restore.Spec.PrepareDataConfig.SchedulingSpec.NodeSelector = map[string]string{
   351  			corev1.LabelHostname: nodeName,
   352  		}
   353  	}
   354  	return restore, nil
   355  }
   356  
   357  func (r *VolumePopulatorReconciler) rebindPVCAndPV(reqCtx intctrlutil.RequestCtx, populatePVC, pvc *corev1.PersistentVolumeClaim) error {
   358  	pv := &corev1.PersistentVolume{}
   359  	if err := r.Client.Get(reqCtx.Ctx, types.NamespacedName{Name: populatePVC.Spec.VolumeName, Namespace: pvc.Namespace}, pv); err != nil {
   360  		if !apierrors.IsNotFound(err) {
   361  			return err
   362  		}
   363  		// We'll get called again later when the PV exists
   364  		return nil
   365  	}
   366  	// Examine the claimref for the PV and see if it's bound to the correct PVC
   367  	claimRef := pv.Spec.ClaimRef
   368  	if claimRef.Name == pvc.Name && claimRef.Namespace == pvc.Namespace && claimRef.UID == pvc.UID {
   369  		return nil
   370  	}
   371  	// Make new PV with strategic patch values to perform the PV rebind
   372  	patchPV := client.MergeFrom(pv.DeepCopy())
   373  	pv.Spec.ClaimRef = &corev1.ObjectReference{
   374  		Namespace:       pvc.Namespace,
   375  		Name:            pvc.Name,
   376  		UID:             pvc.UID,
   377  		ResourceVersion: pvc.ResourceVersion,
   378  	}
   379  	if pv.Annotations == nil {
   380  		pv.Annotations = map[string]string{}
   381  	}
   382  	pv.Annotations[annPopulateFrom] = pvc.Spec.DataSourceRef.Name
   383  	return r.Client.Patch(reqCtx.Ctx, pv, patchPV)
   384  }
   385  
   386  func (r *VolumePopulatorReconciler) updatePVCConditions(reqCtx intctrlutil.RequestCtx, pvc *corev1.PersistentVolumeClaim, reason, message string) error {
   387  	progressCondition := corev1.PersistentVolumeClaimCondition{
   388  		Type:               PersistentVolumeClaimPopulating,
   389  		Status:             corev1.ConditionTrue,
   390  		LastTransitionTime: metav1.Now(),
   391  		Reason:             reason,
   392  		Message:            message,
   393  	}
   394  	pvcPatch := client.MergeFrom(pvc.DeepCopy())
   395  	var existPopulating bool
   396  	for i, v := range pvc.Status.Conditions {
   397  		if v.Type != PersistentVolumeClaimPopulating {
   398  			continue
   399  		}
   400  		if reason == v.Reason {
   401  			return nil
   402  		}
   403  		existPopulating = true
   404  		pvc.Status.Conditions[i] = progressCondition
   405  	}
   406  	if !existPopulating {
   407  		pvc.Status.Conditions = append(pvc.Status.Conditions, progressCondition)
   408  	}
   409  	switch reason {
   410  	case reasonPopulatingProcessing:
   411  		r.Recorder.Event(pvc, corev1.EventTypeNormal, reasonStartToVolumePopulate, message)
   412  	case reasonPopulatingSucceed:
   413  		r.Recorder.Event(pvc, corev1.EventTypeNormal, reasonVolumePopulateSucceed, message)
   414  	}
   415  	return r.Client.Status().Patch(reqCtx.Ctx, pvc, pvcPatch)
   416  }