github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/dataprotection/backuprepo_controller.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package dataprotection
    21  
    22  import (
    23  	"bytes"
    24  	"context"
    25  	"crypto/md5"
    26  	"encoding/hex"
    27  	"errors"
    28  	"fmt"
    29  	"io"
    30  	"reflect"
    31  	"slices"
    32  	"sort"
    33  	"strings"
    34  	"text/template"
    35  	"time"
    36  
    37  	"github.com/Masterminds/sprig/v3"
    38  	"github.com/go-logr/logr"
    39  	batchv1 "k8s.io/api/batch/v1"
    40  	corev1 "k8s.io/api/core/v1"
    41  	storagev1 "k8s.io/api/storage/v1"
    42  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    43  	"k8s.io/apimachinery/pkg/api/meta"
    44  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    45  	"k8s.io/apimachinery/pkg/runtime"
    46  	"k8s.io/apimachinery/pkg/types"
    47  	"k8s.io/apimachinery/pkg/util/rand"
    48  	"k8s.io/apimachinery/pkg/util/yaml"
    49  	corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
    50  	"k8s.io/client-go/rest"
    51  	"k8s.io/client-go/tools/record"
    52  	"k8s.io/utils/clock"
    53  	"k8s.io/utils/pointer"
    54  	ctrl "sigs.k8s.io/controller-runtime"
    55  	"sigs.k8s.io/controller-runtime/pkg/client"
    56  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    57  	"sigs.k8s.io/controller-runtime/pkg/handler"
    58  	"sigs.k8s.io/controller-runtime/pkg/log"
    59  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    60  
    61  	dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1"
    62  	storagev1alpha1 "github.com/1aal/kubeblocks/apis/storage/v1alpha1"
    63  	"github.com/1aal/kubeblocks/pkg/constant"
    64  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    65  	dptypes "github.com/1aal/kubeblocks/pkg/dataprotection/types"
    66  	"github.com/1aal/kubeblocks/pkg/dataprotection/utils"
    67  	"github.com/1aal/kubeblocks/pkg/generics"
    68  	viper "github.com/1aal/kubeblocks/pkg/viperx"
    69  )
    70  
    71  const (
    72  	// TODO: make it configurable
    73  	defaultPreCheckTimeout = 15 * time.Minute
    74  	defaultCheckInterval   = 1 * time.Minute
    75  
    76  	preCheckContainerName = "pre-check"
    77  )
    78  
    79  var (
    80  	// for testing
    81  	wallClock clock.Clock = &clock.RealClock{}
    82  )
    83  
    84  type reconcileContext struct {
    85  	intctrlutil.RequestCtx
    86  	repo       *dpv1alpha1.BackupRepo
    87  	provider   *storagev1alpha1.StorageProvider
    88  	Parameters map[string]string
    89  	renderCtx  renderContext
    90  	digest     string
    91  }
    92  
    93  func (r *reconcileContext) getDigest() string {
    94  	if r.digest != "" {
    95  		return r.digest
    96  	}
    97  	content := ""
    98  	content += stableSerializeMap(r.Parameters)
    99  	content += r.provider.Spec.StorageClassTemplate
   100  	content += r.provider.Spec.PersistentVolumeClaimTemplate
   101  	content += r.provider.Spec.CSIDriverSecretTemplate
   102  	content += r.provider.Spec.DatasafedConfigTemplate
   103  	r.digest = md5Digest(content)
   104  	return r.digest
   105  }
   106  
   107  func (r *reconcileContext) digestChanged() bool {
   108  	return !r.hasSameDigest(r.repo)
   109  }
   110  
   111  func (r *reconcileContext) preCheckFinished() bool {
   112  	cond := meta.FindStatusCondition(r.repo.Status.Conditions, ConditionTypePreCheckPassed)
   113  	return cond != nil && cond.Status != metav1.ConditionUnknown
   114  }
   115  
   116  func (r *reconcileContext) hasSameDigest(obj client.Object) bool {
   117  	return obj.GetAnnotations()[dataProtectionBackupRepoDigestAnnotationKey] == r.getDigest()
   118  }
   119  
   120  func (r *reconcileContext) preCheckResourceName() string {
   121  	return cutName(fmt.Sprintf("pre-check-%s-%s", r.repo.UID[:8], r.repo.Name))
   122  }
   123  
   124  // BackupRepoReconciler reconciles a BackupRepo object
   125  type BackupRepoReconciler struct {
   126  	client.Client
   127  	Scheme     *runtime.Scheme
   128  	Recorder   record.EventRecorder
   129  	RestConfig *rest.Config
   130  
   131  	secretRefMapper   refObjectMapper
   132  	providerRefMapper refObjectMapper
   133  }
   134  
   135  // full access on BackupRepos
   136  // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=backuprepos,verbs=get;list;watch;create;update;patch;delete
   137  // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=backuprepos/status,verbs=get;update;patch
   138  // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=backuprepos/finalizers,verbs=update
   139  
   140  // watch StorageProviders
   141  // +kubebuilder:rbac:groups=storage.kubeblocks.io,resources=storageproviders,verbs=get;list;watch
   142  
   143  // watch or update Backups
   144  // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=backups,verbs=get;list;watch;update;patch
   145  
   146  // create or delete StorageClasses
   147  // +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list;watch;create;delete
   148  
   149  // create or delete PVCs
   150  // +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
   151  
   152  // create or delete Secrets
   153  // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete
   154  
   155  // create or delete Jobs
   156  // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
   157  
   158  // Reconcile is part of the main kubernetes reconciliation loop which aims to
   159  // move the current state of the cluster closer to the desired state.
   160  //
   161  // For more details, check Reconcile and its Result here:
   162  // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.1/pkg/reconcile
   163  func (r *BackupRepoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
   164  	logger := log.FromContext(ctx).WithValues("backuprepo", req.NamespacedName)
   165  	reqCtx := intctrlutil.RequestCtx{
   166  		Ctx:      ctx,
   167  		Req:      req,
   168  		Log:      logger,
   169  		Recorder: r.Recorder,
   170  	}
   171  
   172  	// TODO: better event recording
   173  
   174  	// get repo object
   175  	repo := &dpv1alpha1.BackupRepo{}
   176  	if err := r.Get(ctx, req.NamespacedName, repo); err != nil {
   177  		return checkedRequeueWithError(err, reqCtx.Log, "failed to get BackupRepo")
   178  	}
   179  
   180  	// handle finalizer
   181  	res, err := intctrlutil.HandleCRDeletion(reqCtx, r, repo, dptypes.DataProtectionFinalizerName, func() (*ctrl.Result, error) {
   182  		return nil, r.deleteExternalResources(reqCtx, repo)
   183  	})
   184  	if res != nil {
   185  		return *res, err
   186  	}
   187  
   188  	// add references
   189  	if repo.Spec.Credential != nil {
   190  		r.secretRefMapper.setRef(repo, types.NamespacedName{
   191  			Name:      repo.Spec.Credential.Name,
   192  			Namespace: repo.Spec.Credential.Namespace,
   193  		})
   194  	}
   195  	r.providerRefMapper.setRef(repo, types.NamespacedName{Name: repo.Spec.StorageProviderRef})
   196  
   197  	// check storage provider
   198  	provider, err := r.checkStorageProvider(reqCtx, repo)
   199  	if err != nil {
   200  		_ = r.updateStatus(reqCtx, repo)
   201  		return checkedRequeueWithError(err, reqCtx.Log, "check storage provider status failed")
   202  	}
   203  
   204  	// check parameters for rendering templates
   205  	parameters, err := r.checkParameters(reqCtx, repo)
   206  	if err != nil {
   207  		_ = r.updateStatus(reqCtx, repo)
   208  		return checkedRequeueWithError(err, reqCtx.Log, "check parameters failed")
   209  	}
   210  
   211  	reconCtx := &reconcileContext{
   212  		RequestCtx: reqCtx,
   213  		repo:       repo,
   214  		provider:   provider,
   215  		Parameters: parameters,
   216  		renderCtx: renderContext{
   217  			Parameters: parameters,
   218  		},
   219  	}
   220  
   221  	// create StorageClass and Secret for the CSI driver
   222  	err = r.createStorageClassAndSecret(reconCtx)
   223  	if err != nil {
   224  		_ = r.updateStatus(reqCtx, repo)
   225  		return checkedRequeueWithError(err, reqCtx.Log,
   226  			"failed to create storage class and secret")
   227  	}
   228  
   229  	// check PVC template
   230  	err = r.checkPVCTemplate(reconCtx)
   231  	if err != nil {
   232  		_ = r.updateStatus(reqCtx, repo)
   233  		return checkedRequeueWithError(err, reqCtx.Log,
   234  			"failed to check PVC template")
   235  	}
   236  
   237  	// pre-check the repo by running a real job
   238  	if repo.Status.Phase != dpv1alpha1.BackupRepoDeleting {
   239  		err = r.preCheckRepo(reconCtx)
   240  		if err != nil {
   241  			_ = r.updateStatus(reqCtx, repo)
   242  			return checkedRequeueWithError(err, reqCtx.Log, "failed to pre-check")
   243  		}
   244  	}
   245  
   246  	// update status phase to ready if all conditions are met
   247  	if err = r.updateStatus(reqCtx, repo); err != nil {
   248  		return checkedRequeueWithError(err, reqCtx.Log,
   249  			"failed to update BackupRepo status")
   250  	}
   251  
   252  	if reconCtx.preCheckFinished() {
   253  		// clear pre-check resources
   254  		if err := r.removePreCheckResources(reconCtx); err != nil {
   255  			return checkedRequeueWithError(err, reqCtx.Log,
   256  				"failed to remove pre-check resources")
   257  		}
   258  	}
   259  
   260  	if repo.Status.Phase == dpv1alpha1.BackupRepoReady {
   261  		// update tool config if needed
   262  		err = r.updateToolConfigSecrets(reconCtx)
   263  		if err != nil {
   264  			return checkedRequeueWithError(err, reqCtx.Log,
   265  				"failed to update tool config secrets")
   266  		}
   267  
   268  		// check associated backups, to create PVC in their namespaces
   269  		if err = r.prepareForAssociatedBackups(reconCtx); err != nil {
   270  			return checkedRequeueWithError(err, reqCtx.Log,
   271  				"check associated backups failed")
   272  		}
   273  	}
   274  
   275  	return ctrl.Result{}, nil
   276  }
   277  
   278  func (r *BackupRepoReconciler) updateStatus(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error {
   279  	old := repo.DeepCopy()
   280  	// not allow to transit to other phase if it is deleting
   281  	if repo.Status.Phase != dpv1alpha1.BackupRepoDeleting {
   282  		phase := dpv1alpha1.BackupRepoFailed
   283  		basicCheckingPassed := meta.IsStatusConditionTrue(repo.Status.Conditions, ConditionTypeStorageProviderReady) &&
   284  			meta.IsStatusConditionTrue(repo.Status.Conditions, ConditionTypeParametersChecked) &&
   285  			meta.IsStatusConditionTrue(repo.Status.Conditions, ConditionTypeStorageClassCreated) &&
   286  			meta.IsStatusConditionTrue(repo.Status.Conditions, ConditionTypePVCTemplateChecked)
   287  		if basicCheckingPassed {
   288  			cond := meta.FindStatusCondition(repo.Status.Conditions, ConditionTypePreCheckPassed)
   289  			if cond != nil && cond.Status == metav1.ConditionTrue {
   290  				phase = dpv1alpha1.BackupRepoReady
   291  			} else if cond != nil && cond.Status == metav1.ConditionUnknown {
   292  				phase = dpv1alpha1.BackupRepoPreChecking
   293  			}
   294  		}
   295  		repo.Status.Phase = phase
   296  	}
   297  	repo.Status.IsDefault = repo.Annotations[dptypes.DefaultBackupRepoAnnotationKey] == trueVal
   298  
   299  	// update other fields
   300  	if repo.Status.BackupPVCName == "" {
   301  		repo.Status.BackupPVCName = randomNameForDerivedObject(repo, "pvc")
   302  	}
   303  	if repo.Status.ToolConfigSecretName == "" {
   304  		repo.Status.ToolConfigSecretName = randomNameForDerivedObject(repo, "tool-config")
   305  	}
   306  	if repo.Status.ObservedGeneration != repo.Generation {
   307  		repo.Status.ObservedGeneration = repo.Generation
   308  	}
   309  
   310  	if !reflect.DeepEqual(old.Status, repo.Status) {
   311  		if err := r.Client.Status().Patch(reqCtx.Ctx, repo, client.MergeFrom(old)); err != nil {
   312  			return fmt.Errorf("updateStatus failed: %w", err)
   313  		}
   314  	}
   315  	return nil
   316  }
   317  
   318  func (r *BackupRepoReconciler) updateConditionInDefer(ctx context.Context, repo *dpv1alpha1.BackupRepo,
   319  	condType string, reason string, statusPtr *metav1.ConditionStatus, messagePtr *string, err *error) {
   320  	status := metav1.ConditionTrue
   321  	message := ""
   322  	if *err != nil {
   323  		status = metav1.ConditionFalse
   324  		message = (*err).Error()
   325  	}
   326  	if statusPtr != nil {
   327  		status = *statusPtr
   328  	}
   329  	if messagePtr != nil {
   330  		message = *messagePtr
   331  	}
   332  	updateErr := updateCondition(ctx, r.Client, repo, condType, status, reason, message)
   333  	if *err == nil {
   334  		*err = updateErr
   335  	}
   336  }
   337  
   338  func (r *BackupRepoReconciler) checkStorageProvider(
   339  	reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) (provider *storagev1alpha1.StorageProvider, err error) {
   340  	reason := ReasonUnknownError
   341  	defer func() {
   342  		r.updateConditionInDefer(reqCtx.Ctx, repo, ConditionTypeStorageProviderReady, reason, nil, nil, &err)
   343  	}()
   344  
   345  	// get storage provider object
   346  	providerKey := client.ObjectKey{Name: repo.Spec.StorageProviderRef}
   347  	provider = &storagev1alpha1.StorageProvider{}
   348  	err = r.Client.Get(reqCtx.Ctx, providerKey, provider)
   349  	if err != nil {
   350  		if apierrors.IsNotFound(err) {
   351  			reason = ReasonStorageProviderNotFound
   352  		}
   353  		return nil, err
   354  	}
   355  
   356  	// check its spec
   357  	switch {
   358  	case repo.AccessByMount():
   359  		if provider.Spec.StorageClassTemplate == "" &&
   360  			provider.Spec.PersistentVolumeClaimTemplate == "" {
   361  			// both StorageClassTemplate and PersistentVolumeClaimTemplate are empty.
   362  			// in this case, we are unable to create a backup PVC.
   363  			reason = ReasonInvalidStorageProvider
   364  			return provider, newDependencyError("both StorageClassTemplate and PersistentVolumeClaimTemplate are empty")
   365  		}
   366  		csiInstalledCond := meta.FindStatusCondition(provider.Status.Conditions, storagev1alpha1.ConditionTypeCSIDriverInstalled)
   367  		if csiInstalledCond == nil || csiInstalledCond.Status != metav1.ConditionTrue {
   368  			reason = ReasonStorageProviderNotReady
   369  			return provider, newDependencyError("CSI driver is not installed")
   370  		}
   371  	case repo.AccessByTool():
   372  		if provider.Spec.DatasafedConfigTemplate == "" {
   373  			reason = ReasonInvalidStorageProvider
   374  			return provider, newDependencyError("DatasafedConfigTemplate is empty")
   375  		}
   376  	}
   377  
   378  	// check its status
   379  	reason = ReasonStorageProviderReady
   380  	return provider, nil
   381  }
   382  
   383  func (r *BackupRepoReconciler) checkParameters(reqCtx intctrlutil.RequestCtx,
   384  	repo *dpv1alpha1.BackupRepo) (parameters map[string]string, err error) {
   385  	reason := ReasonUnknownError
   386  	defer func() {
   387  		r.updateConditionInDefer(reqCtx.Ctx, repo, ConditionTypeParametersChecked, reason, nil, nil, &err)
   388  	}()
   389  
   390  	// collect parameters for rendering templates
   391  	parameters, err = r.collectParameters(reqCtx, repo)
   392  	if err != nil {
   393  		if apierrors.IsNotFound(err) {
   394  			reason = ReasonCredentialSecretNotFound
   395  		}
   396  		return nil, err
   397  	}
   398  	// TODO: verify parameters
   399  	reason = ReasonParametersChecked
   400  	return parameters, nil
   401  }
   402  
   403  func (r *BackupRepoReconciler) createStorageClassAndSecret(reconCtx *reconcileContext) (err error) {
   404  
   405  	reason := ReasonUnknownError
   406  	defer func() {
   407  		r.updateConditionInDefer(reconCtx.Ctx, reconCtx.repo, ConditionTypeStorageClassCreated, reason, nil, nil, &err)
   408  	}()
   409  
   410  	oldRepo := reconCtx.repo.DeepCopy()
   411  
   412  	// create secret for the CSI driver if it's not exist,
   413  	// or update the secret if the template or values are updated
   414  	if reconCtx.provider.Spec.CSIDriverSecretTemplate != "" {
   415  		if reconCtx.repo.Status.GeneratedCSIDriverSecret == nil {
   416  			reconCtx.repo.Status.GeneratedCSIDriverSecret = &corev1.SecretReference{
   417  				Name:      randomNameForDerivedObject(reconCtx.repo, "secret"),
   418  				Namespace: viper.GetString(constant.CfgKeyCtrlrMgrNS),
   419  			}
   420  		}
   421  		reconCtx.renderCtx.CSIDriverSecretRef = *reconCtx.repo.Status.GeneratedCSIDriverSecret
   422  		// create or update the secret for CSI
   423  		if _, err = r.createOrUpdateSecretForCSIDriver(reconCtx); err != nil {
   424  			reason = ReasonPrepareCSISecretFailed
   425  			return err
   426  		}
   427  	}
   428  
   429  	if reconCtx.provider.Spec.StorageClassTemplate != "" {
   430  		// create storage class if it's not exist
   431  		if reconCtx.repo.Status.GeneratedStorageClassName == "" {
   432  			reconCtx.repo.Status.GeneratedStorageClassName = randomNameForDerivedObject(reconCtx.repo, "sc")
   433  		}
   434  		if _, err = r.createStorageClass(reconCtx); err != nil {
   435  			reason = ReasonPrepareStorageClassFailed
   436  			return err
   437  		}
   438  	}
   439  
   440  	if !meta.IsStatusConditionTrue(reconCtx.repo.Status.Conditions, ConditionTypeStorageClassCreated) {
   441  		setCondition(reconCtx.repo, ConditionTypeStorageClassCreated,
   442  			metav1.ConditionTrue, ReasonStorageClassCreated, "")
   443  	}
   444  
   445  	if !reflect.DeepEqual(oldRepo.Status, reconCtx.repo.Status) {
   446  		err := r.Client.Status().Patch(reconCtx.Ctx, reconCtx.repo, client.MergeFrom(oldRepo))
   447  		if err != nil {
   448  			return fmt.Errorf("failed to patch backup repo: %w", err)
   449  		}
   450  	}
   451  	reason = ReasonStorageClassCreated
   452  	return nil
   453  }
   454  
   455  func (r *BackupRepoReconciler) createOrUpdateSecretForCSIDriver(
   456  	reconCtx *reconcileContext) (created bool, err error) {
   457  
   458  	secret := &corev1.Secret{}
   459  	secret.Name = reconCtx.repo.Status.GeneratedCSIDriverSecret.Name
   460  	secret.Namespace = reconCtx.repo.Status.GeneratedCSIDriverSecret.Namespace
   461  
   462  	shouldUpdateFunc := func() bool {
   463  		oldDigest := secret.Annotations[dataProtectionBackupRepoDigestAnnotationKey]
   464  		return oldDigest != reconCtx.getDigest()
   465  	}
   466  
   467  	return createOrUpdateObject(reconCtx.Ctx, r.Client, secret, func() error {
   468  		// render secret template
   469  		content, err := renderTemplate("secret", reconCtx.provider.Spec.CSIDriverSecretTemplate, reconCtx.renderCtx)
   470  		if err != nil {
   471  			return fmt.Errorf("failed to render secret template: %w", err)
   472  		}
   473  		secretStringData := map[string]string{}
   474  		if err = yaml.Unmarshal([]byte(content), &secretStringData); err != nil {
   475  			return fmt.Errorf("failed to unmarshal secret content: %w", err)
   476  		}
   477  		secretData := make(map[string][]byte, len(secretStringData))
   478  		for k, v := range secretStringData {
   479  			secretData[k] = []byte(v)
   480  		}
   481  		secret.Data = secretData
   482  
   483  		// set labels and annotations
   484  		if secret.Labels == nil {
   485  			secret.Labels = make(map[string]string)
   486  		}
   487  		secret.Labels[dataProtectionBackupRepoKey] = reconCtx.repo.Name
   488  
   489  		if secret.Annotations == nil {
   490  			secret.Annotations = make(map[string]string)
   491  		}
   492  		secret.Annotations[dataProtectionBackupRepoDigestAnnotationKey] = reconCtx.getDigest()
   493  
   494  		if err := controllerutil.SetControllerReference(reconCtx.repo, secret, r.Scheme); err != nil {
   495  			return fmt.Errorf("failed to set controller reference: %w", err)
   496  		}
   497  		return nil
   498  	}, shouldUpdateFunc)
   499  }
   500  
   501  func (r *BackupRepoReconciler) createStorageClass(
   502  	reconCtx *reconcileContext) (created bool, err error) {
   503  
   504  	storageClass := &storagev1.StorageClass{}
   505  	storageClass.Name = reconCtx.repo.Status.GeneratedStorageClassName
   506  	return createObjectIfNotExist(reconCtx.Ctx, r.Client, storageClass,
   507  		func() error {
   508  			// render storage class template
   509  			content, err := renderTemplate("sc", reconCtx.provider.Spec.StorageClassTemplate, reconCtx.renderCtx)
   510  			if err != nil {
   511  				return fmt.Errorf("failed to render storage class template: %w", err)
   512  			}
   513  			if err = yaml.Unmarshal([]byte(content), storageClass); err != nil {
   514  				return fmt.Errorf("failed to unmarshal storage class: %w", err)
   515  			}
   516  
   517  			// create storage class object
   518  			storageClass.Labels = map[string]string{
   519  				dataProtectionBackupRepoKey: reconCtx.repo.Name,
   520  			}
   521  			bindingMode := storagev1.VolumeBindingImmediate
   522  			storageClass.VolumeBindingMode = &bindingMode
   523  			if reconCtx.repo.Spec.PVReclaimPolicy != "" {
   524  				storageClass.ReclaimPolicy = &reconCtx.repo.Spec.PVReclaimPolicy
   525  			}
   526  			if err := controllerutil.SetControllerReference(reconCtx.repo, storageClass, r.Scheme); err != nil {
   527  				return fmt.Errorf("failed to set owner reference: %w", err)
   528  			}
   529  			return nil
   530  		})
   531  }
   532  
   533  func (r *BackupRepoReconciler) checkPVCTemplate(reconCtx *reconcileContext) (err error) {
   534  	reason := ReasonUnknownError
   535  	defer func() {
   536  		r.updateConditionInDefer(reconCtx.Ctx, reconCtx.repo, ConditionTypePVCTemplateChecked, reason, nil, nil, &err)
   537  	}()
   538  
   539  	if !reconCtx.repo.AccessByMount() || reconCtx.provider.Spec.PersistentVolumeClaimTemplate == "" {
   540  		reason = ReasonSkipped
   541  		return nil
   542  	}
   543  	if reconCtx.digestChanged() {
   544  		pvc := &corev1.PersistentVolumeClaim{}
   545  		err := r.constructPVCByTemplate(reconCtx, pvc, reconCtx.provider.Spec.PersistentVolumeClaimTemplate)
   546  		if err != nil {
   547  			reason = ReasonBadPVCTemplate
   548  			return err
   549  		}
   550  	}
   551  	reason = ReasonPVCTemplateChecked
   552  	return nil
   553  }
   554  
   555  func (r *BackupRepoReconciler) updateToolConfigSecrets(reconCtx *reconcileContext) (err error) {
   556  	if !reconCtx.repo.AccessByTool() {
   557  		return nil
   558  	}
   559  	if reconCtx.repo.Annotations[dataProtectionNeedUpdateToolConfigAnnotationKey] != trueVal {
   560  		return nil
   561  	}
   562  	// render tool config template
   563  	content, err := renderTemplate("tool-config", reconCtx.provider.Spec.DatasafedConfigTemplate, reconCtx.renderCtx)
   564  	if err != nil {
   565  		return err
   566  	}
   567  	// update existing tool config secrets
   568  	secretList := &corev1.SecretList{}
   569  	err = r.Client.List(reconCtx.Ctx, secretList, client.MatchingLabels{
   570  		dataProtectionBackupRepoKey:   reconCtx.repo.Name,
   571  		dataProtectionIsToolConfigKey: trueVal,
   572  	})
   573  	if err != nil {
   574  		return err
   575  	}
   576  	for idx := range secretList.Items {
   577  		secret := &secretList.Items[idx]
   578  		oldDigest := secret.Annotations[dataProtectionBackupRepoDigestAnnotationKey]
   579  		if oldDigest == reconCtx.getDigest() {
   580  			continue
   581  		}
   582  		patch := client.MergeFrom(secret.DeepCopy())
   583  		constructToolConfigSecret(secret, content)
   584  		if secret.Annotations == nil {
   585  			secret.Annotations = make(map[string]string)
   586  		}
   587  		secret.Annotations[dataProtectionBackupRepoDigestAnnotationKey] = reconCtx.getDigest()
   588  		if err = r.Client.Patch(reconCtx.Ctx, secret, patch); err != nil {
   589  			return err
   590  		}
   591  	}
   592  
   593  	return updateAnnotations(reconCtx.Ctx, r.Client, reconCtx.repo, map[string]string{
   594  		dataProtectionNeedUpdateToolConfigAnnotationKey: "false",
   595  	})
   596  }
   597  
   598  func (r *BackupRepoReconciler) preCheckRepo(reconCtx *reconcileContext) (err error) {
   599  	if reconCtx.digestChanged() {
   600  		// invalidate the old status. reconCtx.preCheckFinished() depends on this value
   601  		err := updateCondition(reconCtx.Ctx, r.Client, reconCtx.repo, ConditionTypePreCheckPassed,
   602  			metav1.ConditionUnknown, ReasonDigestChanged, "")
   603  		if err != nil {
   604  			return err
   605  		}
   606  
   607  		err = updateAnnotations(reconCtx.Ctx, r.Client, reconCtx.repo, map[string]string{
   608  			dataProtectionBackupRepoDigestAnnotationKey:     reconCtx.getDigest(),
   609  			dataProtectionNeedUpdateToolConfigAnnotationKey: trueVal,
   610  		})
   611  		if err != nil {
   612  			return err
   613  		}
   614  	}
   615  	if reconCtx.preCheckFinished() {
   616  		return nil
   617  	}
   618  
   619  	status := metav1.ConditionUnknown
   620  	reason := ReasonUnknownError
   621  	message := ""
   622  	defer func() {
   623  		if message == "" && err != nil {
   624  			message = err.Error()
   625  		}
   626  		r.updateConditionInDefer(reconCtx.Ctx, reconCtx.repo, ConditionTypePreCheckPassed, reason, &status, &message, &err)
   627  	}()
   628  	var job *batchv1.Job
   629  	var pvc *corev1.PersistentVolumeClaim
   630  	switch {
   631  	case reconCtx.repo.AccessByMount():
   632  		job, pvc, err = r.runPreCheckJobForMounting(reconCtx)
   633  	case reconCtx.repo.AccessByTool():
   634  		job, err = r.runPreCheckJobForTool(reconCtx)
   635  	default:
   636  		err = fmt.Errorf("unknown access method: %s", reconCtx.repo.Spec.AccessMethod)
   637  	}
   638  	if err != nil {
   639  		return err
   640  	}
   641  
   642  	finished, jobStatus, failureReason := utils.IsJobFinished(job)
   643  	if !finished {
   644  		duration := wallClock.Since(job.CreationTimestamp.Time)
   645  		if duration > defaultPreCheckTimeout {
   646  			// HACK: mark as failure
   647  			jobStatus = batchv1.JobFailed
   648  			failureReason = "timeout"
   649  		} else {
   650  			// Job and Pod both have activeDeadlineSeconds, but neither of them is suitable for our scenario.
   651  			// If job.spec.activeDeadlineSeconds is set, when the run times out, the job controller will delete
   652  			// the running pods directly to stop them; since the pods are deleted, we may not have time to collect
   653  			// the error logs.
   654  			// In the meantime, pod.spec.activeDeadlineSeconds may fail in some cases. When the configuration
   655  			// of a PVC based backup repository is wrong, the PVC provisioning will fail, which makes the pod
   656  			// get stuck in the "Pending" state, but activeDeadlineSeconds seems to start counting from the
   657  			// "Running" state, so the pod will not fail due to timeout.
   658  			return intctrlutil.NewRequeueError(defaultCheckInterval, "wait job to finish")
   659  		}
   660  	}
   661  
   662  	if jobStatus == batchv1.JobFailed {
   663  		status = metav1.ConditionFalse
   664  		reason = ReasonPreCheckFailed
   665  
   666  		// collect logs and events from these objects
   667  		info, err := r.collectPreCheckFailureMessage(reconCtx, job, pvc)
   668  		if err != nil {
   669  			return fmt.Errorf("failed to collectPreCheckFailureMessage, err: %w", err)
   670  		}
   671  		message = "Pre-check job failed, information collected for diagnosis.\n\n"
   672  		message += fmt.Sprintf("Job failure message: %s\n\n", failureReason)
   673  		message += info
   674  		// max length of metav1.Condition.Message is 32K
   675  		const messageLimit = 32 * 1024
   676  		if len(message) > messageLimit {
   677  			message = message[:messageLimit]
   678  		}
   679  	} else {
   680  		status = metav1.ConditionTrue
   681  		reason = ReasonPreCheckPassed
   682  	}
   683  	return nil
   684  }
   685  
   686  func (r *BackupRepoReconciler) removePreCheckResources(reconCtx *reconcileContext) error {
   687  	objects := []client.Object{
   688  		&batchv1.Job{},
   689  		&corev1.PersistentVolumeClaim{},
   690  		&corev1.Secret{},
   691  	}
   692  	name := reconCtx.preCheckResourceName()
   693  	namespace := viper.GetString(constant.CfgKeyCtrlrMgrNS)
   694  	objKey := client.ObjectKey{Name: name, Namespace: namespace}
   695  	for _, obj := range objects {
   696  		err := r.Client.Get(reconCtx.Ctx, objKey, obj)
   697  		if err == nil {
   698  			err = intctrlutil.BackgroundDeleteObject(r.Client, reconCtx.Ctx, obj)
   699  		}
   700  		if err == nil || apierrors.IsNotFound(err) {
   701  			continue
   702  		}
   703  		return err
   704  	}
   705  	return nil
   706  }
   707  
   708  func (r *BackupRepoReconciler) runPreCheckJobForMounting(reconCtx *reconcileContext) (job *batchv1.Job, pvc *corev1.PersistentVolumeClaim, err error) {
   709  	namespace := viper.GetString(constant.CfgKeyCtrlrMgrNS)
   710  	// create PVC
   711  	pvcName := reconCtx.preCheckResourceName()
   712  	pvc, err = r.createRepoPVC(reconCtx, pvcName, namespace, map[string]string{
   713  		dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(),
   714  	})
   715  	if err != nil {
   716  		return nil, nil, err
   717  	}
   718  	// run pre-check job
   719  	job = &batchv1.Job{}
   720  	job.Name = reconCtx.preCheckResourceName()
   721  	job.Namespace = namespace
   722  	_, err = createObjectIfNotExist(reconCtx.Ctx, r.Client, job, func() error {
   723  		job.Spec = batchv1.JobSpec{
   724  			Template: corev1.PodTemplateSpec{
   725  				Spec: corev1.PodSpec{
   726  					RestartPolicy: corev1.RestartPolicyNever,
   727  					Containers: []corev1.Container{{
   728  						Name:            preCheckContainerName,
   729  						Image:           viper.GetString(constant.KBToolsImage),
   730  						ImagePullPolicy: corev1.PullPolicy(viper.GetString(constant.KBImagePullPolicy)),
   731  						Command: []string{
   732  							"sh", "-c", `set -ex; echo "pre-check" > /backup/precheck.txt; sync`,
   733  						},
   734  						VolumeMounts: []corev1.VolumeMount{{
   735  							Name:      "backup-pvc",
   736  							MountPath: "/backup",
   737  						}},
   738  					}},
   739  					Volumes: []corev1.Volume{{
   740  						Name: "backup-pvc",
   741  						VolumeSource: corev1.VolumeSource{
   742  							PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
   743  								ClaimName: pvcName,
   744  							},
   745  						},
   746  					}},
   747  				},
   748  			},
   749  			BackoffLimit: pointer.Int32(2),
   750  		}
   751  		job.Labels = map[string]string{
   752  			dataProtectionBackupRepoKey: reconCtx.repo.Name,
   753  		}
   754  		job.Annotations = map[string]string{
   755  			dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(),
   756  		}
   757  		return controllerutil.SetControllerReference(reconCtx.repo, job, r.Scheme)
   758  	})
   759  	if err != nil {
   760  		return nil, nil, err
   761  	}
   762  
   763  	// these resources were created for the old generation of the backupRepo,
   764  	// so remove them and then retry.
   765  	if !reconCtx.hasSameDigest(pvc) || !reconCtx.hasSameDigest(job) {
   766  		err = r.removePreCheckResources(reconCtx)
   767  		if err != nil {
   768  			return nil, nil, err
   769  		}
   770  		return nil, nil, fmt.Errorf("pre-check job or PVC digest not match, try again")
   771  	}
   772  	return job, pvc, nil
   773  }
   774  
   775  func (r *BackupRepoReconciler) runPreCheckJobForTool(reconCtx *reconcileContext) (job *batchv1.Job, err error) {
   776  	namespace := viper.GetString(constant.CfgKeyCtrlrMgrNS)
   777  	// create tool config
   778  	secretName := reconCtx.preCheckResourceName()
   779  	secret, err := r.createToolConfigSecret(reconCtx, secretName, namespace, map[string]string{
   780  		dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(),
   781  	})
   782  	if err != nil {
   783  		return nil, err
   784  	}
   785  	// run pre-check job
   786  	job = &batchv1.Job{}
   787  	job.Name = reconCtx.preCheckResourceName()
   788  	job.Namespace = namespace
   789  	_, err = createObjectIfNotExist(reconCtx.Ctx, r.Client, job, func() error {
   790  		job.Spec = batchv1.JobSpec{
   791  			Template: corev1.PodTemplateSpec{
   792  				Spec: corev1.PodSpec{
   793  					RestartPolicy: corev1.RestartPolicyNever,
   794  					Containers: []corev1.Container{{
   795  						Name:            preCheckContainerName,
   796  						Image:           viper.GetString(constant.KBToolsImage),
   797  						ImagePullPolicy: corev1.PullPolicy(viper.GetString(constant.KBImagePullPolicy)),
   798  						Command: []string{
   799  							"sh", "-c",
   800  							`
   801  set -ex
   802  export PATH="$PATH:$DP_DATASAFED_BIN_PATH"
   803  echo "pre-check" | datasafed push - /precheck.txt`,
   804  						},
   805  					}},
   806  				},
   807  			},
   808  			BackoffLimit: pointer.Int32(2),
   809  		}
   810  		job.Labels = map[string]string{
   811  			dataProtectionBackupRepoKey: reconCtx.repo.Name,
   812  		}
   813  		job.Annotations = map[string]string{
   814  			dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(),
   815  		}
   816  		utils.InjectDatasafedWithConfig(&job.Spec.Template.Spec, secretName, "")
   817  		return controllerutil.SetControllerReference(reconCtx.repo, job, r.Scheme)
   818  	})
   819  	if err != nil {
   820  		return nil, err
   821  	}
   822  
   823  	// these resources were created for the old generation of the backupRepo,
   824  	// so remove them and then retry.
   825  	if !reconCtx.hasSameDigest(secret) || !reconCtx.hasSameDigest(job) {
   826  		err = r.removePreCheckResources(reconCtx)
   827  		if err != nil {
   828  			return nil, err
   829  		}
   830  		return nil, fmt.Errorf("pre-check job or tool config secret digest not match, try again")
   831  	}
   832  	return job, nil
   833  }
   834  
   835  func (r *BackupRepoReconciler) collectPreCheckFailureMessage(reconCtx *reconcileContext, job *batchv1.Job, pvc *corev1.PersistentVolumeClaim) (string, error) {
   836  	podList, err := utils.GetAssociatedPodsOfJob(reconCtx.Ctx, r.Client, job.Namespace, job.Name)
   837  	if err != nil {
   838  		return "", err
   839  	}
   840  	// sort pod with latest creation place front
   841  	slices.SortFunc(podList.Items, func(a, b corev1.Pod) int {
   842  		if a.CreationTimestamp.Equal(&(b.CreationTimestamp)) {
   843  			return 0
   844  		}
   845  		if a.CreationTimestamp.Before(&(b.CreationTimestamp)) {
   846  			return 1
   847  		}
   848  		return -1
   849  	})
   850  
   851  	prependSpaces := func(content string, spaces int) string {
   852  		prefix := ""
   853  		for i := 0; i < spaces; i++ {
   854  			prefix += " "
   855  		}
   856  		r := bytes.NewBufferString(content)
   857  		w := bytes.NewBuffer(nil)
   858  		w.Grow(r.Len())
   859  		for {
   860  			line, err := r.ReadString('\n')
   861  			if len(line) > 0 {
   862  				w.WriteString(prefix)
   863  				w.WriteString(line)
   864  			}
   865  			if err != nil {
   866  				break
   867  			}
   868  		}
   869  		return w.String()
   870  	}
   871  
   872  	var message string
   873  
   874  	// collect failure logs from the pod
   875  	const contentLimit = 4 * 1024
   876  	failureLogs, err := r.collectFailedPodLogs(reconCtx.Ctx, podList, preCheckContainerName, contentLimit)
   877  	if err != nil {
   878  		return "", err
   879  	}
   880  	if failureLogs == "" {
   881  		message += "No logs are available.\n\n"
   882  	} else {
   883  		message += fmt.Sprintf("Logs from the pre-check job:\n%s\n", prependSpaces(failureLogs, 2))
   884  	}
   885  
   886  	collectEvents := func(object client.Object) error {
   887  		gvk, err := r.Client.GroupVersionKindFor(object)
   888  		if err != nil {
   889  			return err
   890  		}
   891  		events, err := fetchObjectEvents(reconCtx.Ctx, r.Client, object)
   892  		if err != nil {
   893  			return err
   894  		}
   895  		// kind := object.GetObjectKind().GroupVersionKind().Kind
   896  		kind := gvk.Kind
   897  		if len(events.Items) == 0 {
   898  			message += fmt.Sprintf("No events are available for %s/%s.\n\n", kind, client.ObjectKeyFromObject(object))
   899  		} else {
   900  			content := utils.EventsToString(events)
   901  			if len(content) > contentLimit {
   902  				content = content[:contentLimit] + "[truncated]"
   903  			}
   904  			message += fmt.Sprintf("Events from %s/%s:\n%s\n", kind, client.ObjectKeyFromObject(object), content)
   905  		}
   906  		return nil
   907  	}
   908  
   909  	// collect events from the latest pod
   910  	if len(podList.Items) > 0 {
   911  		if err := collectEvents(&podList.Items[0]); err != nil {
   912  			return "", err
   913  		}
   914  	}
   915  	// collect events from the pvc
   916  	if pvc != nil {
   917  		if err := collectEvents(pvc); err != nil {
   918  			return "", err
   919  		}
   920  	}
   921  	// collect events from the job
   922  	if err := collectEvents(job); err != nil {
   923  		return "", err
   924  	}
   925  	return message, nil
   926  }
   927  
   928  func (r *BackupRepoReconciler) collectFailedPodLogs(ctx context.Context,
   929  	podList *corev1.PodList, containerName string, limit int64) (string, error) {
   930  	typedCli, err := corev1client.NewForConfig(r.RestConfig)
   931  	if err != nil {
   932  		return "", err
   933  	}
   934  	for _, pod := range podList.Items {
   935  		if pod.Status.Phase == corev1.PodFailed {
   936  			currOpts := &corev1.PodLogOptions{
   937  				Container: containerName,
   938  			}
   939  			req := typedCli.Pods(pod.Namespace).GetLogs(pod.Name, currOpts)
   940  			stream, err := req.Stream(ctx)
   941  			if err != nil {
   942  				return "", err
   943  			}
   944  			limited := io.LimitReader(stream, limit)
   945  			data, _ := io.ReadAll(limited)
   946  			return string(data), nil
   947  		}
   948  	}
   949  	return "", nil
   950  }
   951  
   952  func (r *BackupRepoReconciler) constructPVCByTemplate(
   953  	reconCtx *reconcileContext, pvc *corev1.PersistentVolumeClaim, tmpl string) error {
   954  	// fill render values
   955  	reconCtx.renderCtx.GeneratedStorageClassName = reconCtx.repo.Status.GeneratedStorageClassName
   956  
   957  	content, err := renderTemplate("pvc", tmpl, reconCtx.renderCtx)
   958  	if err != nil {
   959  		return fmt.Errorf("failed to render PVC template: %w", err)
   960  	}
   961  	if err = yaml.Unmarshal([]byte(content), pvc); err != nil {
   962  		return fmt.Errorf("failed to unmarshal PVC object: %w", err)
   963  	}
   964  	return nil
   965  }
   966  
   967  func (r *BackupRepoReconciler) listAssociatedBackups(
   968  	ctx context.Context, repo *dpv1alpha1.BackupRepo, extraSelector map[string]string) ([]*dpv1alpha1.Backup, error) {
   969  	// list backups associated with the repo
   970  	backupList := &dpv1alpha1.BackupList{}
   971  	selectors := client.MatchingLabels{
   972  		dataProtectionBackupRepoKey: repo.Name,
   973  	}
   974  	for k, v := range extraSelector {
   975  		selectors[k] = v
   976  	}
   977  	err := r.Client.List(ctx, backupList, selectors)
   978  	var filtered []*dpv1alpha1.Backup
   979  	for idx := range backupList.Items {
   980  		backup := &backupList.Items[idx]
   981  		if backup.Status.Phase == dpv1alpha1.BackupPhaseFailed {
   982  			continue
   983  		}
   984  		filtered = append(filtered, backup)
   985  	}
   986  	return filtered, err
   987  }
   988  
   989  func (r *BackupRepoReconciler) prepareForAssociatedBackups(reconCtx *reconcileContext) error {
   990  	backups, err := r.listAssociatedBackups(reconCtx.Ctx, reconCtx.repo, map[string]string{
   991  		dataProtectionWaitRepoPreparationKey: trueVal,
   992  	})
   993  	if err != nil {
   994  		return err
   995  	}
   996  	// return any error to reconcile the repo
   997  	var retErr error
   998  	for _, backup := range backups {
   999  		switch {
  1000  		case reconCtx.repo.AccessByMount():
  1001  			if _, err := r.createRepoPVC(reconCtx, reconCtx.repo.Status.BackupPVCName, backup.Namespace, nil); err != nil {
  1002  				reconCtx.Log.Error(err, "failed to check or create PVC", "namespace", backup.Namespace)
  1003  				retErr = err
  1004  				continue
  1005  			}
  1006  		case reconCtx.repo.AccessByTool():
  1007  			if _, err := r.createToolConfigSecret(reconCtx, reconCtx.repo.Status.ToolConfigSecretName, backup.Namespace, nil); err != nil {
  1008  				reconCtx.Log.Error(err, "failed to check or create tool config secret", "namespace", backup.Namespace)
  1009  				retErr = err
  1010  				continue
  1011  			}
  1012  		default:
  1013  			retErr = fmt.Errorf("unknown access method: %s", reconCtx.repo.Spec.AccessMethod)
  1014  		}
  1015  
  1016  		if backup.Labels[dataProtectionWaitRepoPreparationKey] != "" {
  1017  			patch := client.MergeFrom(backup.DeepCopy())
  1018  			delete(backup.Labels, dataProtectionWaitRepoPreparationKey)
  1019  			if err = r.Client.Patch(reconCtx.Ctx, backup, patch); err != nil {
  1020  				reconCtx.Log.Error(err, "failed to patch backup",
  1021  					"backup", client.ObjectKeyFromObject(backup))
  1022  				retErr = err
  1023  				continue
  1024  			}
  1025  		}
  1026  	}
  1027  	return retErr
  1028  }
  1029  
  1030  func (r *BackupRepoReconciler) createRepoPVC(reconCtx *reconcileContext,
  1031  	name, namespace string, extraAnnos map[string]string) (*corev1.PersistentVolumeClaim, error) {
  1032  
  1033  	pvc := &corev1.PersistentVolumeClaim{}
  1034  	pvc.Name = name
  1035  	pvc.Namespace = namespace
  1036  	_, err := createObjectIfNotExist(reconCtx.Ctx, r.Client, pvc,
  1037  		func() error {
  1038  			if reconCtx.provider.Spec.PersistentVolumeClaimTemplate != "" {
  1039  				// construct the PVC object by rendering the template
  1040  				err := r.constructPVCByTemplate(reconCtx, pvc, reconCtx.provider.Spec.PersistentVolumeClaimTemplate)
  1041  				if err != nil {
  1042  					return err
  1043  				}
  1044  				// overwrite PVC name and namespace
  1045  				pvc.Name = name
  1046  				pvc.Namespace = namespace
  1047  			} else {
  1048  				// set storage class name to PVC, other fields will be set with default value later
  1049  				storageClassName := reconCtx.repo.Status.GeneratedStorageClassName
  1050  				pvc.Spec = corev1.PersistentVolumeClaimSpec{
  1051  					StorageClassName: &storageClassName,
  1052  				}
  1053  			}
  1054  			// add a referencing label
  1055  			if pvc.Labels == nil {
  1056  				pvc.Labels = make(map[string]string)
  1057  			}
  1058  			pvc.Labels[dataProtectionBackupRepoKey] = reconCtx.repo.Name
  1059  			// extra annotations
  1060  			if pvc.Annotations == nil {
  1061  				pvc.Annotations = make(map[string]string)
  1062  			}
  1063  			for k, v := range extraAnnos {
  1064  				pvc.Annotations[k] = v
  1065  			}
  1066  			// set default values if not set
  1067  			if len(pvc.Spec.AccessModes) == 0 {
  1068  				pvc.Spec.AccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany}
  1069  			}
  1070  			if pvc.Spec.VolumeMode == nil {
  1071  				volumeMode := corev1.PersistentVolumeFilesystem
  1072  				pvc.Spec.VolumeMode = &volumeMode
  1073  			}
  1074  			if pvc.Spec.Resources.Requests == nil {
  1075  				pvc.Spec.Resources.Requests = corev1.ResourceList{}
  1076  			}
  1077  			// note: pvc.Spec.Resources.Requests.Storage() never returns nil
  1078  			if pvc.Spec.Resources.Requests.Storage().IsZero() {
  1079  				pvc.Spec.Resources.Requests[corev1.ResourceStorage] = reconCtx.repo.Spec.VolumeCapacity
  1080  			}
  1081  			if err := controllerutil.SetControllerReference(reconCtx.repo, pvc, r.Scheme); err != nil {
  1082  				return fmt.Errorf("failed to set owner reference: %w", err)
  1083  			}
  1084  			return nil
  1085  		})
  1086  
  1087  	return pvc, err
  1088  }
  1089  
  1090  func constructToolConfigSecret(secret *corev1.Secret, content string) {
  1091  	secret.Data = map[string][]byte{
  1092  		"datasafed.conf": []byte(content),
  1093  	}
  1094  }
  1095  
  1096  func (r *BackupRepoReconciler) createToolConfigSecret(reconCtx *reconcileContext,
  1097  	name, namespace string, extraAnnos map[string]string) (*corev1.Secret, error) {
  1098  
  1099  	secret := &corev1.Secret{}
  1100  	secret.Name = name
  1101  	secret.Namespace = namespace
  1102  	_, err := createObjectIfNotExist(reconCtx.Ctx, r.Client, secret,
  1103  		func() error {
  1104  			content, err := renderTemplate("tool-config", reconCtx.provider.Spec.DatasafedConfigTemplate, reconCtx.renderCtx)
  1105  			if err != nil {
  1106  				return fmt.Errorf("failed to render tool config template: %w", err)
  1107  			}
  1108  			constructToolConfigSecret(secret, content)
  1109  
  1110  			// add a referencing label
  1111  			secret.Labels = map[string]string{
  1112  				dataProtectionBackupRepoKey:   reconCtx.repo.Name,
  1113  				dataProtectionIsToolConfigKey: trueVal,
  1114  			}
  1115  			secret.Annotations = map[string]string{
  1116  				dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(),
  1117  			}
  1118  			for k, v := range extraAnnos {
  1119  				secret.Annotations[k] = v
  1120  			}
  1121  			if err := controllerutil.SetControllerReference(reconCtx.repo, secret, r.Scheme); err != nil {
  1122  				return fmt.Errorf("failed to set owner reference: %w", err)
  1123  			}
  1124  			return nil
  1125  		})
  1126  
  1127  	return secret, err
  1128  }
  1129  
  1130  func (r *BackupRepoReconciler) collectParameters(
  1131  	reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) (map[string]string, error) {
  1132  	values := make(map[string]string)
  1133  	for k, v := range repo.Spec.Config {
  1134  		values[k] = v
  1135  	}
  1136  	// merge with secret values
  1137  	if repo.Spec.Credential != nil {
  1138  		secretObj := &corev1.Secret{}
  1139  		err := r.Client.Get(reqCtx.Ctx, client.ObjectKey{
  1140  			Namespace: repo.Spec.Credential.Namespace,
  1141  			Name:      repo.Spec.Credential.Name,
  1142  		}, secretObj)
  1143  		if err != nil {
  1144  			return nil, fmt.Errorf("failed to get secret: %w", err)
  1145  		}
  1146  		for k, v := range secretObj.Data {
  1147  			values[k] = string(v)
  1148  		}
  1149  	}
  1150  	return values, nil
  1151  }
  1152  
  1153  func (r *BackupRepoReconciler) deleteExternalResources(
  1154  	reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error {
  1155  	// set phase to deleting, so no new Backup can reference to this repo
  1156  	if repo.Status.Phase != dpv1alpha1.BackupRepoDeleting {
  1157  		patch := client.MergeFrom(repo.DeepCopy())
  1158  		repo.Status.Phase = dpv1alpha1.BackupRepoDeleting
  1159  		if err := r.Client.Status().Patch(reqCtx.Ctx, repo, patch); err != nil {
  1160  			return err
  1161  		}
  1162  	}
  1163  
  1164  	// TODO: block deletion if any BackupPolicy is referencing to this repo
  1165  
  1166  	// check if the repo is still being used by any backup
  1167  	if backups, err := r.listAssociatedBackups(reqCtx.Ctx, repo, nil); err != nil {
  1168  		return err
  1169  	} else if len(backups) > 0 {
  1170  		_ = updateCondition(reqCtx.Ctx, r.Client, repo, ConditionTypeDerivedObjectsDeleted,
  1171  			metav1.ConditionFalse, ReasonHaveAssociatedBackups,
  1172  			"some backups still refer to this repo")
  1173  		return fmt.Errorf("some backups still refer to this repo")
  1174  	}
  1175  
  1176  	// delete pre-check jobs
  1177  	if err := r.deleteJobs(reqCtx, repo); err != nil {
  1178  		return err
  1179  	}
  1180  
  1181  	// delete PVCs
  1182  	if cleared, err := r.deletePVCs(reqCtx, repo); err != nil {
  1183  		return err
  1184  	} else if !cleared {
  1185  		_ = updateCondition(reqCtx.Ctx, r.Client, repo, ConditionTypeDerivedObjectsDeleted,
  1186  			metav1.ConditionFalse, ReasonHaveResidualPVCs,
  1187  			"maybe the derived PVCs are still in use")
  1188  		return fmt.Errorf("derived PVCs are still in use")
  1189  	}
  1190  
  1191  	// delete derived storage classes
  1192  	if err := r.deleteStorageClasses(reqCtx, repo); err != nil {
  1193  		return err
  1194  	}
  1195  
  1196  	// delete derived secrets (secret for CSI and tool configs)
  1197  	if err := r.deleteSecrets(reqCtx, repo); err != nil {
  1198  		return err
  1199  	}
  1200  
  1201  	// update condition status
  1202  	err := updateCondition(reqCtx.Ctx, r.Client, repo, ConditionTypeDerivedObjectsDeleted,
  1203  		metav1.ConditionTrue, ReasonDerivedObjectsDeleted, "")
  1204  	if err != nil {
  1205  		return fmt.Errorf("failed to update condition: %w", err)
  1206  	}
  1207  
  1208  	// maintain mappers
  1209  	r.secretRefMapper.removeRef(repo)
  1210  	r.providerRefMapper.removeRef(repo)
  1211  
  1212  	return nil
  1213  }
  1214  
  1215  func (r *BackupRepoReconciler) deleteJobs(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error {
  1216  	jobList := &batchv1.JobList{}
  1217  	if err := r.Client.List(reqCtx.Ctx, jobList,
  1218  		client.MatchingLabels(map[string]string{
  1219  			dataProtectionBackupRepoKey: repo.Name,
  1220  		})); err != nil {
  1221  		return fmt.Errorf("failed to list Jobs: %w", err)
  1222  	}
  1223  
  1224  	for _, job := range jobList.Items {
  1225  		if !isOwned(repo, &job) {
  1226  			continue
  1227  		}
  1228  		reqCtx.Log.Info("deleting job", "name", job.Name, "namespace", job.Namespace)
  1229  		if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, &job); err != nil {
  1230  			return err
  1231  		}
  1232  	}
  1233  	return nil
  1234  }
  1235  
  1236  func (r *BackupRepoReconciler) deletePVCs(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) (cleared bool, err error) {
  1237  	pvcList := &corev1.PersistentVolumeClaimList{}
  1238  	if err := r.Client.List(reqCtx.Ctx, pvcList,
  1239  		client.MatchingLabels(map[string]string{
  1240  			dataProtectionBackupRepoKey: repo.Name,
  1241  		})); err != nil {
  1242  		return false, fmt.Errorf("failed to list PVCs: %w", err)
  1243  	}
  1244  
  1245  	for _, pvc := range pvcList.Items {
  1246  		if !isOwned(repo, &pvc) {
  1247  			continue
  1248  		}
  1249  		reqCtx.Log.Info("deleting PVC", "name", pvc.Name, "namespace", pvc.Namespace)
  1250  		if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, &pvc); err != nil {
  1251  			return false, err
  1252  		}
  1253  	}
  1254  	// make sure all derived PVCs are deleted
  1255  	cleared = true
  1256  	for _, pvc := range pvcList.Items {
  1257  		if !isOwned(repo, &pvc) {
  1258  			continue
  1259  		}
  1260  		err = r.Client.Get(reqCtx.Ctx, client.ObjectKeyFromObject(&pvc), &corev1.PersistentVolumeClaim{})
  1261  		if !apierrors.IsNotFound(err) {
  1262  			cleared = false
  1263  			break
  1264  		}
  1265  	}
  1266  	return cleared, nil
  1267  }
  1268  
  1269  func (r *BackupRepoReconciler) deleteStorageClasses(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error {
  1270  	scList := &storagev1.StorageClassList{}
  1271  	if err := r.Client.List(reqCtx.Ctx, scList,
  1272  		client.MatchingLabels(map[string]string{
  1273  			dataProtectionBackupRepoKey: repo.Name,
  1274  		})); err != nil {
  1275  		return fmt.Errorf("failed to list StorageClasses: %w", err)
  1276  	}
  1277  
  1278  	for _, sc := range scList.Items {
  1279  		if !isOwned(repo, &sc) {
  1280  			continue
  1281  		}
  1282  		reqCtx.Log.Info("deleting StorageClass", "storageclass", sc.Name)
  1283  		if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, &sc); err != nil {
  1284  			return err
  1285  		}
  1286  	}
  1287  	return nil
  1288  }
  1289  
  1290  func (r *BackupRepoReconciler) deleteSecrets(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error {
  1291  	secretList := &corev1.SecretList{}
  1292  	if err := r.Client.List(reqCtx.Ctx, secretList,
  1293  		client.MatchingLabels(map[string]string{
  1294  			dataProtectionBackupRepoKey: repo.Name,
  1295  		})); err != nil {
  1296  		return fmt.Errorf("failed to list Secret: %w", err)
  1297  	}
  1298  
  1299  	for _, secret := range secretList.Items {
  1300  		if !isOwned(repo, &secret) {
  1301  			continue
  1302  		}
  1303  		reqCtx.Log.Info("deleting Secret", "secret", client.ObjectKeyFromObject(&secret))
  1304  		if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, &secret); err != nil {
  1305  			return err
  1306  		}
  1307  	}
  1308  	return nil
  1309  }
  1310  
  1311  func (r *BackupRepoReconciler) mapBackupToRepo(ctx context.Context, obj client.Object) []ctrl.Request {
  1312  	backup := obj.(*dpv1alpha1.Backup)
  1313  	repoName, ok := backup.Labels[dataProtectionBackupRepoKey]
  1314  	if !ok {
  1315  		return nil
  1316  	}
  1317  	// ignore failed backups
  1318  	if backup.Status.Phase == dpv1alpha1.BackupPhaseFailed {
  1319  		return nil
  1320  	}
  1321  	// we should reconcile the BackupRepo when:
  1322  	//   1. the Backup needs to use the BackupRepo, but it's not ready for the namespace.
  1323  	//   2. the Backup is being deleted, because it may block the deletion of the BackupRepo.
  1324  	shouldReconcileRepo := backup.Labels[dataProtectionWaitRepoPreparationKey] == trueVal ||
  1325  		!backup.DeletionTimestamp.IsZero()
  1326  	if shouldReconcileRepo {
  1327  		return []ctrl.Request{{
  1328  			NamespacedName: client.ObjectKey{Name: repoName},
  1329  		}}
  1330  	}
  1331  	return nil
  1332  }
  1333  
  1334  func (r *BackupRepoReconciler) mapProviderToRepos(ctx context.Context, obj client.Object) []ctrl.Request {
  1335  	return r.providerRefMapper.mapToRequests(obj)
  1336  }
  1337  
  1338  func (r *BackupRepoReconciler) mapSecretToRepos(ctx context.Context, obj client.Object) []ctrl.Request {
  1339  	// check if the secret is created by this controller
  1340  	owner := metav1.GetControllerOf(obj)
  1341  	if owner != nil {
  1342  		apiGVStr := dpv1alpha1.GroupVersion.String()
  1343  		if owner.APIVersion == apiGVStr && owner.Kind == "BackupRepo" {
  1344  			return []ctrl.Request{{
  1345  				NamespacedName: types.NamespacedName{
  1346  					Name:      owner.Name,
  1347  					Namespace: obj.GetNamespace(),
  1348  				},
  1349  			}}
  1350  		}
  1351  	}
  1352  
  1353  	// get repos which is referencing this secret
  1354  	return r.secretRefMapper.mapToRequests(obj)
  1355  }
  1356  
  1357  // SetupWithManager sets up the controller with the Manager.
  1358  func (r *BackupRepoReconciler) SetupWithManager(mgr ctrl.Manager) error {
  1359  	if err := mgr.GetFieldIndexer().IndexField(context.Background(), &corev1.Event{}, "involvedObject.uid", func(rawObj client.Object) []string {
  1360  		event := rawObj.(*corev1.Event)
  1361  		return []string{string(event.InvolvedObject.UID)}
  1362  	}); err != nil {
  1363  		return err
  1364  	}
  1365  	return ctrl.NewControllerManagedBy(mgr).
  1366  		For(&dpv1alpha1.BackupRepo{}).
  1367  		Watches(&storagev1alpha1.StorageProvider{}, handler.EnqueueRequestsFromMapFunc(r.mapProviderToRepos)).
  1368  		Watches(&dpv1alpha1.Backup{}, handler.EnqueueRequestsFromMapFunc(r.mapBackupToRepo)).
  1369  		Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(r.mapSecretToRepos)).
  1370  		Owns(&storagev1.StorageClass{}).
  1371  		Owns(&corev1.PersistentVolumeClaim{}).
  1372  		Owns(&batchv1.Job{}).
  1373  		Complete(r)
  1374  }
  1375  
  1376  // ============================================================================
  1377  // helper functions
  1378  // ============================================================================
  1379  
  1380  // dependencyError indicates that the error itself cannot be resolved
  1381  // unless the dependent object is updated.
  1382  type dependencyError struct {
  1383  	msg string
  1384  }
  1385  
  1386  func (e *dependencyError) Error() string {
  1387  	return e.msg
  1388  }
  1389  
  1390  func newDependencyError(msg string) error {
  1391  	return &dependencyError{msg: msg}
  1392  }
  1393  
  1394  func isDependencyError(err error) bool {
  1395  	de, ok := err.(*dependencyError)
  1396  	return ok || errors.As(err, &de)
  1397  }
  1398  
  1399  func checkedRequeueWithError(err error, logger logr.Logger, msg string, keysAndValues ...interface{}) (reconcile.Result, error) {
  1400  	if re, ok := err.(intctrlutil.RequeueError); ok {
  1401  		return intctrlutil.RequeueAfter(re.RequeueAfter(), logger, re.Reason())
  1402  	}
  1403  	if apierrors.IsNotFound(err) || isDependencyError(err) {
  1404  		return intctrlutil.Reconciled()
  1405  	}
  1406  	return intctrlutil.RequeueWithError(err, logger, msg, keysAndValues...)
  1407  }
  1408  
  1409  type renderContext struct {
  1410  	Parameters                map[string]string
  1411  	CSIDriverSecretRef        corev1.SecretReference
  1412  	GeneratedStorageClassName string
  1413  }
  1414  
  1415  func renderTemplate(name, tpl string, rCtx renderContext) (string, error) {
  1416  	fmap := sprig.TxtFuncMap()
  1417  	t, err := template.New(name).Funcs(fmap).Parse(tpl)
  1418  	if err != nil {
  1419  		return "", err
  1420  	}
  1421  	var b bytes.Buffer
  1422  	err = t.Execute(&b, rCtx)
  1423  	return b.String(), err
  1424  }
  1425  
  1426  func createOrUpdateObject[T any, PT generics.PObject[T]](
  1427  	ctx context.Context,
  1428  	c client.Client,
  1429  	obj PT,
  1430  	mutateFunc func() error,
  1431  	shouldUpdate func() bool) (created bool, err error) {
  1432  	key := client.ObjectKeyFromObject(obj)
  1433  	err = c.Get(ctx, key, obj)
  1434  	if err != nil && !apierrors.IsNotFound(err) {
  1435  		return false, fmt.Errorf("failed to check existence of object %s: %w", key, err)
  1436  	}
  1437  	var patch client.Patch
  1438  	if err == nil {
  1439  		// object already exists, check if it needs to be updated
  1440  		if !shouldUpdate() {
  1441  			return false, nil
  1442  		}
  1443  		patch = client.MergeFrom(PT(obj.DeepCopy()))
  1444  	}
  1445  	if mutateFunc != nil {
  1446  		err := mutateFunc()
  1447  		if err != nil {
  1448  			return false, err
  1449  		}
  1450  	}
  1451  	if patch != nil {
  1452  		err = c.Patch(ctx, obj, patch)
  1453  		if err != nil {
  1454  			err = fmt.Errorf("failed to patch object %s: %w", key, err)
  1455  		}
  1456  		return false, err
  1457  	} else {
  1458  		err = c.Create(ctx, obj)
  1459  		if err != nil {
  1460  			return false, fmt.Errorf("failed to create object %s: %w", key, err)
  1461  		}
  1462  		return true, nil
  1463  	}
  1464  }
  1465  
  1466  func createObjectIfNotExist[T any, PT generics.PObject[T]](
  1467  	ctx context.Context,
  1468  	c client.Client,
  1469  	obj PT,
  1470  	mutateFunc func() error) (created bool, err error) {
  1471  	noUpdate := func() bool { return false }
  1472  	return createOrUpdateObject(ctx, c, obj, mutateFunc, noUpdate)
  1473  }
  1474  
  1475  func setCondition(
  1476  	repo *dpv1alpha1.BackupRepo, condType string, status metav1.ConditionStatus,
  1477  	reason string, message string) {
  1478  	cond := metav1.Condition{
  1479  		Type:               condType,
  1480  		Status:             status,
  1481  		ObservedGeneration: repo.Generation,
  1482  		LastTransitionTime: metav1.Now(),
  1483  		Reason:             reason,
  1484  		Message:            message,
  1485  	}
  1486  	meta.SetStatusCondition(&repo.Status.Conditions, cond)
  1487  }
  1488  
  1489  func updateCondition(
  1490  	ctx context.Context, c client.Client, repo *dpv1alpha1.BackupRepo,
  1491  	condType string, status metav1.ConditionStatus, reason string, message string) error {
  1492  	cond := meta.FindStatusCondition(repo.Status.Conditions, condType)
  1493  	if cond != nil {
  1494  		// skip
  1495  		if cond.Status == status && cond.Reason == reason && cond.Message == message {
  1496  			return nil
  1497  		}
  1498  	}
  1499  	patch := client.MergeFrom(repo.DeepCopy())
  1500  	setCondition(repo, condType, status, reason, message)
  1501  	return c.Status().Patch(ctx, repo, patch)
  1502  }
  1503  
  1504  func updateAnnotations(ctx context.Context, c client.Client,
  1505  	repo *dpv1alpha1.BackupRepo, annotations map[string]string) error {
  1506  	patch := client.MergeFrom(repo.DeepCopy())
  1507  	if repo.Annotations == nil {
  1508  		repo.Annotations = make(map[string]string)
  1509  	}
  1510  	updated := false
  1511  	for k, v := range annotations {
  1512  		if curr, ok := repo.Annotations[k]; !ok || curr != v {
  1513  			repo.Annotations[k] = v
  1514  			updated = true
  1515  		}
  1516  	}
  1517  	if !updated {
  1518  		return nil
  1519  	}
  1520  	return c.Patch(ctx, repo, patch)
  1521  }
  1522  
  1523  func md5Digest(s string) string {
  1524  	h := md5.New()
  1525  	h.Write([]byte(s))
  1526  	return hex.EncodeToString(h.Sum(nil))
  1527  }
  1528  
  1529  func stableSerializeMap(m map[string]string) string {
  1530  	keys := make([]string, 0, len(m))
  1531  	for k := range m {
  1532  		keys = append(keys, k)
  1533  	}
  1534  	sort.Strings(keys)
  1535  	sb := strings.Builder{}
  1536  	for _, k := range keys {
  1537  		sb.WriteString(k)
  1538  		sb.WriteByte('=')
  1539  		sb.WriteString(m[k])
  1540  		sb.WriteByte(';')
  1541  	}
  1542  	return sb.String()
  1543  }
  1544  
  1545  func isOwned(owner client.Object, dependent client.Object) bool {
  1546  	ownerUID := owner.GetUID()
  1547  	for _, ref := range dependent.GetOwnerReferences() {
  1548  		if ref.UID == ownerUID {
  1549  			return true
  1550  		}
  1551  	}
  1552  	return false
  1553  }
  1554  
  1555  func randomNameForDerivedObject(repo *dpv1alpha1.BackupRepo, prefix string) string {
  1556  	// the final name should not exceed 63 characters
  1557  	const maxBaseNameLength = 56
  1558  	baseName := fmt.Sprintf("%s-%s", prefix, repo.Name)
  1559  	if len(baseName) > maxBaseNameLength {
  1560  		baseName = baseName[:maxBaseNameLength]
  1561  	}
  1562  	return baseName + "-" + rand.String(6)
  1563  }
  1564  
  1565  func cutName(name string) string {
  1566  	if len(name) > 63 {
  1567  		return name[:63]
  1568  	}
  1569  	return name
  1570  }
  1571  
  1572  // this method requires the corresponding field index to be added to the Manager
  1573  func fetchObjectEvents(ctx context.Context, cli client.Client, object client.Object) (*corev1.EventList, error) {
  1574  	eventList := &corev1.EventList{}
  1575  	err := cli.List(ctx, eventList, client.MatchingFields{
  1576  		"involvedObject.uid": string(object.GetUID()),
  1577  	})
  1578  	if err != nil {
  1579  		return nil, err
  1580  	}
  1581  	return eventList, nil
  1582  }