github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/dataprotection/backuprepo_controller.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package dataprotection 21 22 import ( 23 "bytes" 24 "context" 25 "crypto/md5" 26 "encoding/hex" 27 "errors" 28 "fmt" 29 "io" 30 "reflect" 31 "slices" 32 "sort" 33 "strings" 34 "text/template" 35 "time" 36 37 "github.com/Masterminds/sprig/v3" 38 "github.com/go-logr/logr" 39 batchv1 "k8s.io/api/batch/v1" 40 corev1 "k8s.io/api/core/v1" 41 storagev1 "k8s.io/api/storage/v1" 42 apierrors "k8s.io/apimachinery/pkg/api/errors" 43 "k8s.io/apimachinery/pkg/api/meta" 44 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 45 "k8s.io/apimachinery/pkg/runtime" 46 "k8s.io/apimachinery/pkg/types" 47 "k8s.io/apimachinery/pkg/util/rand" 48 "k8s.io/apimachinery/pkg/util/yaml" 49 corev1client "k8s.io/client-go/kubernetes/typed/core/v1" 50 "k8s.io/client-go/rest" 51 "k8s.io/client-go/tools/record" 52 "k8s.io/utils/clock" 53 "k8s.io/utils/pointer" 54 ctrl "sigs.k8s.io/controller-runtime" 55 "sigs.k8s.io/controller-runtime/pkg/client" 56 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 57 "sigs.k8s.io/controller-runtime/pkg/handler" 58 "sigs.k8s.io/controller-runtime/pkg/log" 59 "sigs.k8s.io/controller-runtime/pkg/reconcile" 60 61 dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1" 62 storagev1alpha1 "github.com/1aal/kubeblocks/apis/storage/v1alpha1" 63 "github.com/1aal/kubeblocks/pkg/constant" 64 intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil" 65 dptypes "github.com/1aal/kubeblocks/pkg/dataprotection/types" 66 "github.com/1aal/kubeblocks/pkg/dataprotection/utils" 67 "github.com/1aal/kubeblocks/pkg/generics" 68 viper "github.com/1aal/kubeblocks/pkg/viperx" 69 ) 70 71 const ( 72 // TODO: make it configurable 73 defaultPreCheckTimeout = 15 * time.Minute 74 defaultCheckInterval = 1 * time.Minute 75 76 preCheckContainerName = "pre-check" 77 ) 78 79 var ( 80 // for testing 81 wallClock clock.Clock = &clock.RealClock{} 82 ) 83 84 type reconcileContext struct { 85 intctrlutil.RequestCtx 86 repo *dpv1alpha1.BackupRepo 87 provider *storagev1alpha1.StorageProvider 88 Parameters map[string]string 89 renderCtx renderContext 90 digest string 91 } 92 93 func (r *reconcileContext) getDigest() string { 94 if r.digest != "" { 95 return r.digest 96 } 97 content := "" 98 content += stableSerializeMap(r.Parameters) 99 content += r.provider.Spec.StorageClassTemplate 100 content += r.provider.Spec.PersistentVolumeClaimTemplate 101 content += r.provider.Spec.CSIDriverSecretTemplate 102 content += r.provider.Spec.DatasafedConfigTemplate 103 r.digest = md5Digest(content) 104 return r.digest 105 } 106 107 func (r *reconcileContext) digestChanged() bool { 108 return !r.hasSameDigest(r.repo) 109 } 110 111 func (r *reconcileContext) preCheckFinished() bool { 112 cond := meta.FindStatusCondition(r.repo.Status.Conditions, ConditionTypePreCheckPassed) 113 return cond != nil && cond.Status != metav1.ConditionUnknown 114 } 115 116 func (r *reconcileContext) hasSameDigest(obj client.Object) bool { 117 return obj.GetAnnotations()[dataProtectionBackupRepoDigestAnnotationKey] == r.getDigest() 118 } 119 120 func (r *reconcileContext) preCheckResourceName() string { 121 return cutName(fmt.Sprintf("pre-check-%s-%s", r.repo.UID[:8], r.repo.Name)) 122 } 123 124 // BackupRepoReconciler reconciles a BackupRepo object 125 type BackupRepoReconciler struct { 126 client.Client 127 Scheme *runtime.Scheme 128 Recorder record.EventRecorder 129 RestConfig *rest.Config 130 131 secretRefMapper refObjectMapper 132 providerRefMapper refObjectMapper 133 } 134 135 // full access on BackupRepos 136 // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=backuprepos,verbs=get;list;watch;create;update;patch;delete 137 // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=backuprepos/status,verbs=get;update;patch 138 // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=backuprepos/finalizers,verbs=update 139 140 // watch StorageProviders 141 // +kubebuilder:rbac:groups=storage.kubeblocks.io,resources=storageproviders,verbs=get;list;watch 142 143 // watch or update Backups 144 // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=backups,verbs=get;list;watch;update;patch 145 146 // create or delete StorageClasses 147 // +kubebuilder:rbac:groups=storage.k8s.io,resources=storageclasses,verbs=get;list;watch;create;delete 148 149 // create or delete PVCs 150 // +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete 151 152 // create or delete Secrets 153 // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete 154 155 // create or delete Jobs 156 // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete 157 158 // Reconcile is part of the main kubernetes reconciliation loop which aims to 159 // move the current state of the cluster closer to the desired state. 160 // 161 // For more details, check Reconcile and its Result here: 162 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.1/pkg/reconcile 163 func (r *BackupRepoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 164 logger := log.FromContext(ctx).WithValues("backuprepo", req.NamespacedName) 165 reqCtx := intctrlutil.RequestCtx{ 166 Ctx: ctx, 167 Req: req, 168 Log: logger, 169 Recorder: r.Recorder, 170 } 171 172 // TODO: better event recording 173 174 // get repo object 175 repo := &dpv1alpha1.BackupRepo{} 176 if err := r.Get(ctx, req.NamespacedName, repo); err != nil { 177 return checkedRequeueWithError(err, reqCtx.Log, "failed to get BackupRepo") 178 } 179 180 // handle finalizer 181 res, err := intctrlutil.HandleCRDeletion(reqCtx, r, repo, dptypes.DataProtectionFinalizerName, func() (*ctrl.Result, error) { 182 return nil, r.deleteExternalResources(reqCtx, repo) 183 }) 184 if res != nil { 185 return *res, err 186 } 187 188 // add references 189 if repo.Spec.Credential != nil { 190 r.secretRefMapper.setRef(repo, types.NamespacedName{ 191 Name: repo.Spec.Credential.Name, 192 Namespace: repo.Spec.Credential.Namespace, 193 }) 194 } 195 r.providerRefMapper.setRef(repo, types.NamespacedName{Name: repo.Spec.StorageProviderRef}) 196 197 // check storage provider 198 provider, err := r.checkStorageProvider(reqCtx, repo) 199 if err != nil { 200 _ = r.updateStatus(reqCtx, repo) 201 return checkedRequeueWithError(err, reqCtx.Log, "check storage provider status failed") 202 } 203 204 // check parameters for rendering templates 205 parameters, err := r.checkParameters(reqCtx, repo) 206 if err != nil { 207 _ = r.updateStatus(reqCtx, repo) 208 return checkedRequeueWithError(err, reqCtx.Log, "check parameters failed") 209 } 210 211 reconCtx := &reconcileContext{ 212 RequestCtx: reqCtx, 213 repo: repo, 214 provider: provider, 215 Parameters: parameters, 216 renderCtx: renderContext{ 217 Parameters: parameters, 218 }, 219 } 220 221 // create StorageClass and Secret for the CSI driver 222 err = r.createStorageClassAndSecret(reconCtx) 223 if err != nil { 224 _ = r.updateStatus(reqCtx, repo) 225 return checkedRequeueWithError(err, reqCtx.Log, 226 "failed to create storage class and secret") 227 } 228 229 // check PVC template 230 err = r.checkPVCTemplate(reconCtx) 231 if err != nil { 232 _ = r.updateStatus(reqCtx, repo) 233 return checkedRequeueWithError(err, reqCtx.Log, 234 "failed to check PVC template") 235 } 236 237 // pre-check the repo by running a real job 238 if repo.Status.Phase != dpv1alpha1.BackupRepoDeleting { 239 err = r.preCheckRepo(reconCtx) 240 if err != nil { 241 _ = r.updateStatus(reqCtx, repo) 242 return checkedRequeueWithError(err, reqCtx.Log, "failed to pre-check") 243 } 244 } 245 246 // update status phase to ready if all conditions are met 247 if err = r.updateStatus(reqCtx, repo); err != nil { 248 return checkedRequeueWithError(err, reqCtx.Log, 249 "failed to update BackupRepo status") 250 } 251 252 if reconCtx.preCheckFinished() { 253 // clear pre-check resources 254 if err := r.removePreCheckResources(reconCtx); err != nil { 255 return checkedRequeueWithError(err, reqCtx.Log, 256 "failed to remove pre-check resources") 257 } 258 } 259 260 if repo.Status.Phase == dpv1alpha1.BackupRepoReady { 261 // update tool config if needed 262 err = r.updateToolConfigSecrets(reconCtx) 263 if err != nil { 264 return checkedRequeueWithError(err, reqCtx.Log, 265 "failed to update tool config secrets") 266 } 267 268 // check associated backups, to create PVC in their namespaces 269 if err = r.prepareForAssociatedBackups(reconCtx); err != nil { 270 return checkedRequeueWithError(err, reqCtx.Log, 271 "check associated backups failed") 272 } 273 } 274 275 return ctrl.Result{}, nil 276 } 277 278 func (r *BackupRepoReconciler) updateStatus(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error { 279 old := repo.DeepCopy() 280 // not allow to transit to other phase if it is deleting 281 if repo.Status.Phase != dpv1alpha1.BackupRepoDeleting { 282 phase := dpv1alpha1.BackupRepoFailed 283 basicCheckingPassed := meta.IsStatusConditionTrue(repo.Status.Conditions, ConditionTypeStorageProviderReady) && 284 meta.IsStatusConditionTrue(repo.Status.Conditions, ConditionTypeParametersChecked) && 285 meta.IsStatusConditionTrue(repo.Status.Conditions, ConditionTypeStorageClassCreated) && 286 meta.IsStatusConditionTrue(repo.Status.Conditions, ConditionTypePVCTemplateChecked) 287 if basicCheckingPassed { 288 cond := meta.FindStatusCondition(repo.Status.Conditions, ConditionTypePreCheckPassed) 289 if cond != nil && cond.Status == metav1.ConditionTrue { 290 phase = dpv1alpha1.BackupRepoReady 291 } else if cond != nil && cond.Status == metav1.ConditionUnknown { 292 phase = dpv1alpha1.BackupRepoPreChecking 293 } 294 } 295 repo.Status.Phase = phase 296 } 297 repo.Status.IsDefault = repo.Annotations[dptypes.DefaultBackupRepoAnnotationKey] == trueVal 298 299 // update other fields 300 if repo.Status.BackupPVCName == "" { 301 repo.Status.BackupPVCName = randomNameForDerivedObject(repo, "pvc") 302 } 303 if repo.Status.ToolConfigSecretName == "" { 304 repo.Status.ToolConfigSecretName = randomNameForDerivedObject(repo, "tool-config") 305 } 306 if repo.Status.ObservedGeneration != repo.Generation { 307 repo.Status.ObservedGeneration = repo.Generation 308 } 309 310 if !reflect.DeepEqual(old.Status, repo.Status) { 311 if err := r.Client.Status().Patch(reqCtx.Ctx, repo, client.MergeFrom(old)); err != nil { 312 return fmt.Errorf("updateStatus failed: %w", err) 313 } 314 } 315 return nil 316 } 317 318 func (r *BackupRepoReconciler) updateConditionInDefer(ctx context.Context, repo *dpv1alpha1.BackupRepo, 319 condType string, reason string, statusPtr *metav1.ConditionStatus, messagePtr *string, err *error) { 320 status := metav1.ConditionTrue 321 message := "" 322 if *err != nil { 323 status = metav1.ConditionFalse 324 message = (*err).Error() 325 } 326 if statusPtr != nil { 327 status = *statusPtr 328 } 329 if messagePtr != nil { 330 message = *messagePtr 331 } 332 updateErr := updateCondition(ctx, r.Client, repo, condType, status, reason, message) 333 if *err == nil { 334 *err = updateErr 335 } 336 } 337 338 func (r *BackupRepoReconciler) checkStorageProvider( 339 reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) (provider *storagev1alpha1.StorageProvider, err error) { 340 reason := ReasonUnknownError 341 defer func() { 342 r.updateConditionInDefer(reqCtx.Ctx, repo, ConditionTypeStorageProviderReady, reason, nil, nil, &err) 343 }() 344 345 // get storage provider object 346 providerKey := client.ObjectKey{Name: repo.Spec.StorageProviderRef} 347 provider = &storagev1alpha1.StorageProvider{} 348 err = r.Client.Get(reqCtx.Ctx, providerKey, provider) 349 if err != nil { 350 if apierrors.IsNotFound(err) { 351 reason = ReasonStorageProviderNotFound 352 } 353 return nil, err 354 } 355 356 // check its spec 357 switch { 358 case repo.AccessByMount(): 359 if provider.Spec.StorageClassTemplate == "" && 360 provider.Spec.PersistentVolumeClaimTemplate == "" { 361 // both StorageClassTemplate and PersistentVolumeClaimTemplate are empty. 362 // in this case, we are unable to create a backup PVC. 363 reason = ReasonInvalidStorageProvider 364 return provider, newDependencyError("both StorageClassTemplate and PersistentVolumeClaimTemplate are empty") 365 } 366 csiInstalledCond := meta.FindStatusCondition(provider.Status.Conditions, storagev1alpha1.ConditionTypeCSIDriverInstalled) 367 if csiInstalledCond == nil || csiInstalledCond.Status != metav1.ConditionTrue { 368 reason = ReasonStorageProviderNotReady 369 return provider, newDependencyError("CSI driver is not installed") 370 } 371 case repo.AccessByTool(): 372 if provider.Spec.DatasafedConfigTemplate == "" { 373 reason = ReasonInvalidStorageProvider 374 return provider, newDependencyError("DatasafedConfigTemplate is empty") 375 } 376 } 377 378 // check its status 379 reason = ReasonStorageProviderReady 380 return provider, nil 381 } 382 383 func (r *BackupRepoReconciler) checkParameters(reqCtx intctrlutil.RequestCtx, 384 repo *dpv1alpha1.BackupRepo) (parameters map[string]string, err error) { 385 reason := ReasonUnknownError 386 defer func() { 387 r.updateConditionInDefer(reqCtx.Ctx, repo, ConditionTypeParametersChecked, reason, nil, nil, &err) 388 }() 389 390 // collect parameters for rendering templates 391 parameters, err = r.collectParameters(reqCtx, repo) 392 if err != nil { 393 if apierrors.IsNotFound(err) { 394 reason = ReasonCredentialSecretNotFound 395 } 396 return nil, err 397 } 398 // TODO: verify parameters 399 reason = ReasonParametersChecked 400 return parameters, nil 401 } 402 403 func (r *BackupRepoReconciler) createStorageClassAndSecret(reconCtx *reconcileContext) (err error) { 404 405 reason := ReasonUnknownError 406 defer func() { 407 r.updateConditionInDefer(reconCtx.Ctx, reconCtx.repo, ConditionTypeStorageClassCreated, reason, nil, nil, &err) 408 }() 409 410 oldRepo := reconCtx.repo.DeepCopy() 411 412 // create secret for the CSI driver if it's not exist, 413 // or update the secret if the template or values are updated 414 if reconCtx.provider.Spec.CSIDriverSecretTemplate != "" { 415 if reconCtx.repo.Status.GeneratedCSIDriverSecret == nil { 416 reconCtx.repo.Status.GeneratedCSIDriverSecret = &corev1.SecretReference{ 417 Name: randomNameForDerivedObject(reconCtx.repo, "secret"), 418 Namespace: viper.GetString(constant.CfgKeyCtrlrMgrNS), 419 } 420 } 421 reconCtx.renderCtx.CSIDriverSecretRef = *reconCtx.repo.Status.GeneratedCSIDriverSecret 422 // create or update the secret for CSI 423 if _, err = r.createOrUpdateSecretForCSIDriver(reconCtx); err != nil { 424 reason = ReasonPrepareCSISecretFailed 425 return err 426 } 427 } 428 429 if reconCtx.provider.Spec.StorageClassTemplate != "" { 430 // create storage class if it's not exist 431 if reconCtx.repo.Status.GeneratedStorageClassName == "" { 432 reconCtx.repo.Status.GeneratedStorageClassName = randomNameForDerivedObject(reconCtx.repo, "sc") 433 } 434 if _, err = r.createStorageClass(reconCtx); err != nil { 435 reason = ReasonPrepareStorageClassFailed 436 return err 437 } 438 } 439 440 if !meta.IsStatusConditionTrue(reconCtx.repo.Status.Conditions, ConditionTypeStorageClassCreated) { 441 setCondition(reconCtx.repo, ConditionTypeStorageClassCreated, 442 metav1.ConditionTrue, ReasonStorageClassCreated, "") 443 } 444 445 if !reflect.DeepEqual(oldRepo.Status, reconCtx.repo.Status) { 446 err := r.Client.Status().Patch(reconCtx.Ctx, reconCtx.repo, client.MergeFrom(oldRepo)) 447 if err != nil { 448 return fmt.Errorf("failed to patch backup repo: %w", err) 449 } 450 } 451 reason = ReasonStorageClassCreated 452 return nil 453 } 454 455 func (r *BackupRepoReconciler) createOrUpdateSecretForCSIDriver( 456 reconCtx *reconcileContext) (created bool, err error) { 457 458 secret := &corev1.Secret{} 459 secret.Name = reconCtx.repo.Status.GeneratedCSIDriverSecret.Name 460 secret.Namespace = reconCtx.repo.Status.GeneratedCSIDriverSecret.Namespace 461 462 shouldUpdateFunc := func() bool { 463 oldDigest := secret.Annotations[dataProtectionBackupRepoDigestAnnotationKey] 464 return oldDigest != reconCtx.getDigest() 465 } 466 467 return createOrUpdateObject(reconCtx.Ctx, r.Client, secret, func() error { 468 // render secret template 469 content, err := renderTemplate("secret", reconCtx.provider.Spec.CSIDriverSecretTemplate, reconCtx.renderCtx) 470 if err != nil { 471 return fmt.Errorf("failed to render secret template: %w", err) 472 } 473 secretStringData := map[string]string{} 474 if err = yaml.Unmarshal([]byte(content), &secretStringData); err != nil { 475 return fmt.Errorf("failed to unmarshal secret content: %w", err) 476 } 477 secretData := make(map[string][]byte, len(secretStringData)) 478 for k, v := range secretStringData { 479 secretData[k] = []byte(v) 480 } 481 secret.Data = secretData 482 483 // set labels and annotations 484 if secret.Labels == nil { 485 secret.Labels = make(map[string]string) 486 } 487 secret.Labels[dataProtectionBackupRepoKey] = reconCtx.repo.Name 488 489 if secret.Annotations == nil { 490 secret.Annotations = make(map[string]string) 491 } 492 secret.Annotations[dataProtectionBackupRepoDigestAnnotationKey] = reconCtx.getDigest() 493 494 if err := controllerutil.SetControllerReference(reconCtx.repo, secret, r.Scheme); err != nil { 495 return fmt.Errorf("failed to set controller reference: %w", err) 496 } 497 return nil 498 }, shouldUpdateFunc) 499 } 500 501 func (r *BackupRepoReconciler) createStorageClass( 502 reconCtx *reconcileContext) (created bool, err error) { 503 504 storageClass := &storagev1.StorageClass{} 505 storageClass.Name = reconCtx.repo.Status.GeneratedStorageClassName 506 return createObjectIfNotExist(reconCtx.Ctx, r.Client, storageClass, 507 func() error { 508 // render storage class template 509 content, err := renderTemplate("sc", reconCtx.provider.Spec.StorageClassTemplate, reconCtx.renderCtx) 510 if err != nil { 511 return fmt.Errorf("failed to render storage class template: %w", err) 512 } 513 if err = yaml.Unmarshal([]byte(content), storageClass); err != nil { 514 return fmt.Errorf("failed to unmarshal storage class: %w", err) 515 } 516 517 // create storage class object 518 storageClass.Labels = map[string]string{ 519 dataProtectionBackupRepoKey: reconCtx.repo.Name, 520 } 521 bindingMode := storagev1.VolumeBindingImmediate 522 storageClass.VolumeBindingMode = &bindingMode 523 if reconCtx.repo.Spec.PVReclaimPolicy != "" { 524 storageClass.ReclaimPolicy = &reconCtx.repo.Spec.PVReclaimPolicy 525 } 526 if err := controllerutil.SetControllerReference(reconCtx.repo, storageClass, r.Scheme); err != nil { 527 return fmt.Errorf("failed to set owner reference: %w", err) 528 } 529 return nil 530 }) 531 } 532 533 func (r *BackupRepoReconciler) checkPVCTemplate(reconCtx *reconcileContext) (err error) { 534 reason := ReasonUnknownError 535 defer func() { 536 r.updateConditionInDefer(reconCtx.Ctx, reconCtx.repo, ConditionTypePVCTemplateChecked, reason, nil, nil, &err) 537 }() 538 539 if !reconCtx.repo.AccessByMount() || reconCtx.provider.Spec.PersistentVolumeClaimTemplate == "" { 540 reason = ReasonSkipped 541 return nil 542 } 543 if reconCtx.digestChanged() { 544 pvc := &corev1.PersistentVolumeClaim{} 545 err := r.constructPVCByTemplate(reconCtx, pvc, reconCtx.provider.Spec.PersistentVolumeClaimTemplate) 546 if err != nil { 547 reason = ReasonBadPVCTemplate 548 return err 549 } 550 } 551 reason = ReasonPVCTemplateChecked 552 return nil 553 } 554 555 func (r *BackupRepoReconciler) updateToolConfigSecrets(reconCtx *reconcileContext) (err error) { 556 if !reconCtx.repo.AccessByTool() { 557 return nil 558 } 559 if reconCtx.repo.Annotations[dataProtectionNeedUpdateToolConfigAnnotationKey] != trueVal { 560 return nil 561 } 562 // render tool config template 563 content, err := renderTemplate("tool-config", reconCtx.provider.Spec.DatasafedConfigTemplate, reconCtx.renderCtx) 564 if err != nil { 565 return err 566 } 567 // update existing tool config secrets 568 secretList := &corev1.SecretList{} 569 err = r.Client.List(reconCtx.Ctx, secretList, client.MatchingLabels{ 570 dataProtectionBackupRepoKey: reconCtx.repo.Name, 571 dataProtectionIsToolConfigKey: trueVal, 572 }) 573 if err != nil { 574 return err 575 } 576 for idx := range secretList.Items { 577 secret := &secretList.Items[idx] 578 oldDigest := secret.Annotations[dataProtectionBackupRepoDigestAnnotationKey] 579 if oldDigest == reconCtx.getDigest() { 580 continue 581 } 582 patch := client.MergeFrom(secret.DeepCopy()) 583 constructToolConfigSecret(secret, content) 584 if secret.Annotations == nil { 585 secret.Annotations = make(map[string]string) 586 } 587 secret.Annotations[dataProtectionBackupRepoDigestAnnotationKey] = reconCtx.getDigest() 588 if err = r.Client.Patch(reconCtx.Ctx, secret, patch); err != nil { 589 return err 590 } 591 } 592 593 return updateAnnotations(reconCtx.Ctx, r.Client, reconCtx.repo, map[string]string{ 594 dataProtectionNeedUpdateToolConfigAnnotationKey: "false", 595 }) 596 } 597 598 func (r *BackupRepoReconciler) preCheckRepo(reconCtx *reconcileContext) (err error) { 599 if reconCtx.digestChanged() { 600 // invalidate the old status. reconCtx.preCheckFinished() depends on this value 601 err := updateCondition(reconCtx.Ctx, r.Client, reconCtx.repo, ConditionTypePreCheckPassed, 602 metav1.ConditionUnknown, ReasonDigestChanged, "") 603 if err != nil { 604 return err 605 } 606 607 err = updateAnnotations(reconCtx.Ctx, r.Client, reconCtx.repo, map[string]string{ 608 dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(), 609 dataProtectionNeedUpdateToolConfigAnnotationKey: trueVal, 610 }) 611 if err != nil { 612 return err 613 } 614 } 615 if reconCtx.preCheckFinished() { 616 return nil 617 } 618 619 status := metav1.ConditionUnknown 620 reason := ReasonUnknownError 621 message := "" 622 defer func() { 623 if message == "" && err != nil { 624 message = err.Error() 625 } 626 r.updateConditionInDefer(reconCtx.Ctx, reconCtx.repo, ConditionTypePreCheckPassed, reason, &status, &message, &err) 627 }() 628 var job *batchv1.Job 629 var pvc *corev1.PersistentVolumeClaim 630 switch { 631 case reconCtx.repo.AccessByMount(): 632 job, pvc, err = r.runPreCheckJobForMounting(reconCtx) 633 case reconCtx.repo.AccessByTool(): 634 job, err = r.runPreCheckJobForTool(reconCtx) 635 default: 636 err = fmt.Errorf("unknown access method: %s", reconCtx.repo.Spec.AccessMethod) 637 } 638 if err != nil { 639 return err 640 } 641 642 finished, jobStatus, failureReason := utils.IsJobFinished(job) 643 if !finished { 644 duration := wallClock.Since(job.CreationTimestamp.Time) 645 if duration > defaultPreCheckTimeout { 646 // HACK: mark as failure 647 jobStatus = batchv1.JobFailed 648 failureReason = "timeout" 649 } else { 650 // Job and Pod both have activeDeadlineSeconds, but neither of them is suitable for our scenario. 651 // If job.spec.activeDeadlineSeconds is set, when the run times out, the job controller will delete 652 // the running pods directly to stop them; since the pods are deleted, we may not have time to collect 653 // the error logs. 654 // In the meantime, pod.spec.activeDeadlineSeconds may fail in some cases. When the configuration 655 // of a PVC based backup repository is wrong, the PVC provisioning will fail, which makes the pod 656 // get stuck in the "Pending" state, but activeDeadlineSeconds seems to start counting from the 657 // "Running" state, so the pod will not fail due to timeout. 658 return intctrlutil.NewRequeueError(defaultCheckInterval, "wait job to finish") 659 } 660 } 661 662 if jobStatus == batchv1.JobFailed { 663 status = metav1.ConditionFalse 664 reason = ReasonPreCheckFailed 665 666 // collect logs and events from these objects 667 info, err := r.collectPreCheckFailureMessage(reconCtx, job, pvc) 668 if err != nil { 669 return fmt.Errorf("failed to collectPreCheckFailureMessage, err: %w", err) 670 } 671 message = "Pre-check job failed, information collected for diagnosis.\n\n" 672 message += fmt.Sprintf("Job failure message: %s\n\n", failureReason) 673 message += info 674 // max length of metav1.Condition.Message is 32K 675 const messageLimit = 32 * 1024 676 if len(message) > messageLimit { 677 message = message[:messageLimit] 678 } 679 } else { 680 status = metav1.ConditionTrue 681 reason = ReasonPreCheckPassed 682 } 683 return nil 684 } 685 686 func (r *BackupRepoReconciler) removePreCheckResources(reconCtx *reconcileContext) error { 687 objects := []client.Object{ 688 &batchv1.Job{}, 689 &corev1.PersistentVolumeClaim{}, 690 &corev1.Secret{}, 691 } 692 name := reconCtx.preCheckResourceName() 693 namespace := viper.GetString(constant.CfgKeyCtrlrMgrNS) 694 objKey := client.ObjectKey{Name: name, Namespace: namespace} 695 for _, obj := range objects { 696 err := r.Client.Get(reconCtx.Ctx, objKey, obj) 697 if err == nil { 698 err = intctrlutil.BackgroundDeleteObject(r.Client, reconCtx.Ctx, obj) 699 } 700 if err == nil || apierrors.IsNotFound(err) { 701 continue 702 } 703 return err 704 } 705 return nil 706 } 707 708 func (r *BackupRepoReconciler) runPreCheckJobForMounting(reconCtx *reconcileContext) (job *batchv1.Job, pvc *corev1.PersistentVolumeClaim, err error) { 709 namespace := viper.GetString(constant.CfgKeyCtrlrMgrNS) 710 // create PVC 711 pvcName := reconCtx.preCheckResourceName() 712 pvc, err = r.createRepoPVC(reconCtx, pvcName, namespace, map[string]string{ 713 dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(), 714 }) 715 if err != nil { 716 return nil, nil, err 717 } 718 // run pre-check job 719 job = &batchv1.Job{} 720 job.Name = reconCtx.preCheckResourceName() 721 job.Namespace = namespace 722 _, err = createObjectIfNotExist(reconCtx.Ctx, r.Client, job, func() error { 723 job.Spec = batchv1.JobSpec{ 724 Template: corev1.PodTemplateSpec{ 725 Spec: corev1.PodSpec{ 726 RestartPolicy: corev1.RestartPolicyNever, 727 Containers: []corev1.Container{{ 728 Name: preCheckContainerName, 729 Image: viper.GetString(constant.KBToolsImage), 730 ImagePullPolicy: corev1.PullPolicy(viper.GetString(constant.KBImagePullPolicy)), 731 Command: []string{ 732 "sh", "-c", `set -ex; echo "pre-check" > /backup/precheck.txt; sync`, 733 }, 734 VolumeMounts: []corev1.VolumeMount{{ 735 Name: "backup-pvc", 736 MountPath: "/backup", 737 }}, 738 }}, 739 Volumes: []corev1.Volume{{ 740 Name: "backup-pvc", 741 VolumeSource: corev1.VolumeSource{ 742 PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ 743 ClaimName: pvcName, 744 }, 745 }, 746 }}, 747 }, 748 }, 749 BackoffLimit: pointer.Int32(2), 750 } 751 job.Labels = map[string]string{ 752 dataProtectionBackupRepoKey: reconCtx.repo.Name, 753 } 754 job.Annotations = map[string]string{ 755 dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(), 756 } 757 return controllerutil.SetControllerReference(reconCtx.repo, job, r.Scheme) 758 }) 759 if err != nil { 760 return nil, nil, err 761 } 762 763 // these resources were created for the old generation of the backupRepo, 764 // so remove them and then retry. 765 if !reconCtx.hasSameDigest(pvc) || !reconCtx.hasSameDigest(job) { 766 err = r.removePreCheckResources(reconCtx) 767 if err != nil { 768 return nil, nil, err 769 } 770 return nil, nil, fmt.Errorf("pre-check job or PVC digest not match, try again") 771 } 772 return job, pvc, nil 773 } 774 775 func (r *BackupRepoReconciler) runPreCheckJobForTool(reconCtx *reconcileContext) (job *batchv1.Job, err error) { 776 namespace := viper.GetString(constant.CfgKeyCtrlrMgrNS) 777 // create tool config 778 secretName := reconCtx.preCheckResourceName() 779 secret, err := r.createToolConfigSecret(reconCtx, secretName, namespace, map[string]string{ 780 dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(), 781 }) 782 if err != nil { 783 return nil, err 784 } 785 // run pre-check job 786 job = &batchv1.Job{} 787 job.Name = reconCtx.preCheckResourceName() 788 job.Namespace = namespace 789 _, err = createObjectIfNotExist(reconCtx.Ctx, r.Client, job, func() error { 790 job.Spec = batchv1.JobSpec{ 791 Template: corev1.PodTemplateSpec{ 792 Spec: corev1.PodSpec{ 793 RestartPolicy: corev1.RestartPolicyNever, 794 Containers: []corev1.Container{{ 795 Name: preCheckContainerName, 796 Image: viper.GetString(constant.KBToolsImage), 797 ImagePullPolicy: corev1.PullPolicy(viper.GetString(constant.KBImagePullPolicy)), 798 Command: []string{ 799 "sh", "-c", 800 ` 801 set -ex 802 export PATH="$PATH:$DP_DATASAFED_BIN_PATH" 803 echo "pre-check" | datasafed push - /precheck.txt`, 804 }, 805 }}, 806 }, 807 }, 808 BackoffLimit: pointer.Int32(2), 809 } 810 job.Labels = map[string]string{ 811 dataProtectionBackupRepoKey: reconCtx.repo.Name, 812 } 813 job.Annotations = map[string]string{ 814 dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(), 815 } 816 utils.InjectDatasafedWithConfig(&job.Spec.Template.Spec, secretName, "") 817 return controllerutil.SetControllerReference(reconCtx.repo, job, r.Scheme) 818 }) 819 if err != nil { 820 return nil, err 821 } 822 823 // these resources were created for the old generation of the backupRepo, 824 // so remove them and then retry. 825 if !reconCtx.hasSameDigest(secret) || !reconCtx.hasSameDigest(job) { 826 err = r.removePreCheckResources(reconCtx) 827 if err != nil { 828 return nil, err 829 } 830 return nil, fmt.Errorf("pre-check job or tool config secret digest not match, try again") 831 } 832 return job, nil 833 } 834 835 func (r *BackupRepoReconciler) collectPreCheckFailureMessage(reconCtx *reconcileContext, job *batchv1.Job, pvc *corev1.PersistentVolumeClaim) (string, error) { 836 podList, err := utils.GetAssociatedPodsOfJob(reconCtx.Ctx, r.Client, job.Namespace, job.Name) 837 if err != nil { 838 return "", err 839 } 840 // sort pod with latest creation place front 841 slices.SortFunc(podList.Items, func(a, b corev1.Pod) int { 842 if a.CreationTimestamp.Equal(&(b.CreationTimestamp)) { 843 return 0 844 } 845 if a.CreationTimestamp.Before(&(b.CreationTimestamp)) { 846 return 1 847 } 848 return -1 849 }) 850 851 prependSpaces := func(content string, spaces int) string { 852 prefix := "" 853 for i := 0; i < spaces; i++ { 854 prefix += " " 855 } 856 r := bytes.NewBufferString(content) 857 w := bytes.NewBuffer(nil) 858 w.Grow(r.Len()) 859 for { 860 line, err := r.ReadString('\n') 861 if len(line) > 0 { 862 w.WriteString(prefix) 863 w.WriteString(line) 864 } 865 if err != nil { 866 break 867 } 868 } 869 return w.String() 870 } 871 872 var message string 873 874 // collect failure logs from the pod 875 const contentLimit = 4 * 1024 876 failureLogs, err := r.collectFailedPodLogs(reconCtx.Ctx, podList, preCheckContainerName, contentLimit) 877 if err != nil { 878 return "", err 879 } 880 if failureLogs == "" { 881 message += "No logs are available.\n\n" 882 } else { 883 message += fmt.Sprintf("Logs from the pre-check job:\n%s\n", prependSpaces(failureLogs, 2)) 884 } 885 886 collectEvents := func(object client.Object) error { 887 gvk, err := r.Client.GroupVersionKindFor(object) 888 if err != nil { 889 return err 890 } 891 events, err := fetchObjectEvents(reconCtx.Ctx, r.Client, object) 892 if err != nil { 893 return err 894 } 895 // kind := object.GetObjectKind().GroupVersionKind().Kind 896 kind := gvk.Kind 897 if len(events.Items) == 0 { 898 message += fmt.Sprintf("No events are available for %s/%s.\n\n", kind, client.ObjectKeyFromObject(object)) 899 } else { 900 content := utils.EventsToString(events) 901 if len(content) > contentLimit { 902 content = content[:contentLimit] + "[truncated]" 903 } 904 message += fmt.Sprintf("Events from %s/%s:\n%s\n", kind, client.ObjectKeyFromObject(object), content) 905 } 906 return nil 907 } 908 909 // collect events from the latest pod 910 if len(podList.Items) > 0 { 911 if err := collectEvents(&podList.Items[0]); err != nil { 912 return "", err 913 } 914 } 915 // collect events from the pvc 916 if pvc != nil { 917 if err := collectEvents(pvc); err != nil { 918 return "", err 919 } 920 } 921 // collect events from the job 922 if err := collectEvents(job); err != nil { 923 return "", err 924 } 925 return message, nil 926 } 927 928 func (r *BackupRepoReconciler) collectFailedPodLogs(ctx context.Context, 929 podList *corev1.PodList, containerName string, limit int64) (string, error) { 930 typedCli, err := corev1client.NewForConfig(r.RestConfig) 931 if err != nil { 932 return "", err 933 } 934 for _, pod := range podList.Items { 935 if pod.Status.Phase == corev1.PodFailed { 936 currOpts := &corev1.PodLogOptions{ 937 Container: containerName, 938 } 939 req := typedCli.Pods(pod.Namespace).GetLogs(pod.Name, currOpts) 940 stream, err := req.Stream(ctx) 941 if err != nil { 942 return "", err 943 } 944 limited := io.LimitReader(stream, limit) 945 data, _ := io.ReadAll(limited) 946 return string(data), nil 947 } 948 } 949 return "", nil 950 } 951 952 func (r *BackupRepoReconciler) constructPVCByTemplate( 953 reconCtx *reconcileContext, pvc *corev1.PersistentVolumeClaim, tmpl string) error { 954 // fill render values 955 reconCtx.renderCtx.GeneratedStorageClassName = reconCtx.repo.Status.GeneratedStorageClassName 956 957 content, err := renderTemplate("pvc", tmpl, reconCtx.renderCtx) 958 if err != nil { 959 return fmt.Errorf("failed to render PVC template: %w", err) 960 } 961 if err = yaml.Unmarshal([]byte(content), pvc); err != nil { 962 return fmt.Errorf("failed to unmarshal PVC object: %w", err) 963 } 964 return nil 965 } 966 967 func (r *BackupRepoReconciler) listAssociatedBackups( 968 ctx context.Context, repo *dpv1alpha1.BackupRepo, extraSelector map[string]string) ([]*dpv1alpha1.Backup, error) { 969 // list backups associated with the repo 970 backupList := &dpv1alpha1.BackupList{} 971 selectors := client.MatchingLabels{ 972 dataProtectionBackupRepoKey: repo.Name, 973 } 974 for k, v := range extraSelector { 975 selectors[k] = v 976 } 977 err := r.Client.List(ctx, backupList, selectors) 978 var filtered []*dpv1alpha1.Backup 979 for idx := range backupList.Items { 980 backup := &backupList.Items[idx] 981 if backup.Status.Phase == dpv1alpha1.BackupPhaseFailed { 982 continue 983 } 984 filtered = append(filtered, backup) 985 } 986 return filtered, err 987 } 988 989 func (r *BackupRepoReconciler) prepareForAssociatedBackups(reconCtx *reconcileContext) error { 990 backups, err := r.listAssociatedBackups(reconCtx.Ctx, reconCtx.repo, map[string]string{ 991 dataProtectionWaitRepoPreparationKey: trueVal, 992 }) 993 if err != nil { 994 return err 995 } 996 // return any error to reconcile the repo 997 var retErr error 998 for _, backup := range backups { 999 switch { 1000 case reconCtx.repo.AccessByMount(): 1001 if _, err := r.createRepoPVC(reconCtx, reconCtx.repo.Status.BackupPVCName, backup.Namespace, nil); err != nil { 1002 reconCtx.Log.Error(err, "failed to check or create PVC", "namespace", backup.Namespace) 1003 retErr = err 1004 continue 1005 } 1006 case reconCtx.repo.AccessByTool(): 1007 if _, err := r.createToolConfigSecret(reconCtx, reconCtx.repo.Status.ToolConfigSecretName, backup.Namespace, nil); err != nil { 1008 reconCtx.Log.Error(err, "failed to check or create tool config secret", "namespace", backup.Namespace) 1009 retErr = err 1010 continue 1011 } 1012 default: 1013 retErr = fmt.Errorf("unknown access method: %s", reconCtx.repo.Spec.AccessMethod) 1014 } 1015 1016 if backup.Labels[dataProtectionWaitRepoPreparationKey] != "" { 1017 patch := client.MergeFrom(backup.DeepCopy()) 1018 delete(backup.Labels, dataProtectionWaitRepoPreparationKey) 1019 if err = r.Client.Patch(reconCtx.Ctx, backup, patch); err != nil { 1020 reconCtx.Log.Error(err, "failed to patch backup", 1021 "backup", client.ObjectKeyFromObject(backup)) 1022 retErr = err 1023 continue 1024 } 1025 } 1026 } 1027 return retErr 1028 } 1029 1030 func (r *BackupRepoReconciler) createRepoPVC(reconCtx *reconcileContext, 1031 name, namespace string, extraAnnos map[string]string) (*corev1.PersistentVolumeClaim, error) { 1032 1033 pvc := &corev1.PersistentVolumeClaim{} 1034 pvc.Name = name 1035 pvc.Namespace = namespace 1036 _, err := createObjectIfNotExist(reconCtx.Ctx, r.Client, pvc, 1037 func() error { 1038 if reconCtx.provider.Spec.PersistentVolumeClaimTemplate != "" { 1039 // construct the PVC object by rendering the template 1040 err := r.constructPVCByTemplate(reconCtx, pvc, reconCtx.provider.Spec.PersistentVolumeClaimTemplate) 1041 if err != nil { 1042 return err 1043 } 1044 // overwrite PVC name and namespace 1045 pvc.Name = name 1046 pvc.Namespace = namespace 1047 } else { 1048 // set storage class name to PVC, other fields will be set with default value later 1049 storageClassName := reconCtx.repo.Status.GeneratedStorageClassName 1050 pvc.Spec = corev1.PersistentVolumeClaimSpec{ 1051 StorageClassName: &storageClassName, 1052 } 1053 } 1054 // add a referencing label 1055 if pvc.Labels == nil { 1056 pvc.Labels = make(map[string]string) 1057 } 1058 pvc.Labels[dataProtectionBackupRepoKey] = reconCtx.repo.Name 1059 // extra annotations 1060 if pvc.Annotations == nil { 1061 pvc.Annotations = make(map[string]string) 1062 } 1063 for k, v := range extraAnnos { 1064 pvc.Annotations[k] = v 1065 } 1066 // set default values if not set 1067 if len(pvc.Spec.AccessModes) == 0 { 1068 pvc.Spec.AccessModes = []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany} 1069 } 1070 if pvc.Spec.VolumeMode == nil { 1071 volumeMode := corev1.PersistentVolumeFilesystem 1072 pvc.Spec.VolumeMode = &volumeMode 1073 } 1074 if pvc.Spec.Resources.Requests == nil { 1075 pvc.Spec.Resources.Requests = corev1.ResourceList{} 1076 } 1077 // note: pvc.Spec.Resources.Requests.Storage() never returns nil 1078 if pvc.Spec.Resources.Requests.Storage().IsZero() { 1079 pvc.Spec.Resources.Requests[corev1.ResourceStorage] = reconCtx.repo.Spec.VolumeCapacity 1080 } 1081 if err := controllerutil.SetControllerReference(reconCtx.repo, pvc, r.Scheme); err != nil { 1082 return fmt.Errorf("failed to set owner reference: %w", err) 1083 } 1084 return nil 1085 }) 1086 1087 return pvc, err 1088 } 1089 1090 func constructToolConfigSecret(secret *corev1.Secret, content string) { 1091 secret.Data = map[string][]byte{ 1092 "datasafed.conf": []byte(content), 1093 } 1094 } 1095 1096 func (r *BackupRepoReconciler) createToolConfigSecret(reconCtx *reconcileContext, 1097 name, namespace string, extraAnnos map[string]string) (*corev1.Secret, error) { 1098 1099 secret := &corev1.Secret{} 1100 secret.Name = name 1101 secret.Namespace = namespace 1102 _, err := createObjectIfNotExist(reconCtx.Ctx, r.Client, secret, 1103 func() error { 1104 content, err := renderTemplate("tool-config", reconCtx.provider.Spec.DatasafedConfigTemplate, reconCtx.renderCtx) 1105 if err != nil { 1106 return fmt.Errorf("failed to render tool config template: %w", err) 1107 } 1108 constructToolConfigSecret(secret, content) 1109 1110 // add a referencing label 1111 secret.Labels = map[string]string{ 1112 dataProtectionBackupRepoKey: reconCtx.repo.Name, 1113 dataProtectionIsToolConfigKey: trueVal, 1114 } 1115 secret.Annotations = map[string]string{ 1116 dataProtectionBackupRepoDigestAnnotationKey: reconCtx.getDigest(), 1117 } 1118 for k, v := range extraAnnos { 1119 secret.Annotations[k] = v 1120 } 1121 if err := controllerutil.SetControllerReference(reconCtx.repo, secret, r.Scheme); err != nil { 1122 return fmt.Errorf("failed to set owner reference: %w", err) 1123 } 1124 return nil 1125 }) 1126 1127 return secret, err 1128 } 1129 1130 func (r *BackupRepoReconciler) collectParameters( 1131 reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) (map[string]string, error) { 1132 values := make(map[string]string) 1133 for k, v := range repo.Spec.Config { 1134 values[k] = v 1135 } 1136 // merge with secret values 1137 if repo.Spec.Credential != nil { 1138 secretObj := &corev1.Secret{} 1139 err := r.Client.Get(reqCtx.Ctx, client.ObjectKey{ 1140 Namespace: repo.Spec.Credential.Namespace, 1141 Name: repo.Spec.Credential.Name, 1142 }, secretObj) 1143 if err != nil { 1144 return nil, fmt.Errorf("failed to get secret: %w", err) 1145 } 1146 for k, v := range secretObj.Data { 1147 values[k] = string(v) 1148 } 1149 } 1150 return values, nil 1151 } 1152 1153 func (r *BackupRepoReconciler) deleteExternalResources( 1154 reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error { 1155 // set phase to deleting, so no new Backup can reference to this repo 1156 if repo.Status.Phase != dpv1alpha1.BackupRepoDeleting { 1157 patch := client.MergeFrom(repo.DeepCopy()) 1158 repo.Status.Phase = dpv1alpha1.BackupRepoDeleting 1159 if err := r.Client.Status().Patch(reqCtx.Ctx, repo, patch); err != nil { 1160 return err 1161 } 1162 } 1163 1164 // TODO: block deletion if any BackupPolicy is referencing to this repo 1165 1166 // check if the repo is still being used by any backup 1167 if backups, err := r.listAssociatedBackups(reqCtx.Ctx, repo, nil); err != nil { 1168 return err 1169 } else if len(backups) > 0 { 1170 _ = updateCondition(reqCtx.Ctx, r.Client, repo, ConditionTypeDerivedObjectsDeleted, 1171 metav1.ConditionFalse, ReasonHaveAssociatedBackups, 1172 "some backups still refer to this repo") 1173 return fmt.Errorf("some backups still refer to this repo") 1174 } 1175 1176 // delete pre-check jobs 1177 if err := r.deleteJobs(reqCtx, repo); err != nil { 1178 return err 1179 } 1180 1181 // delete PVCs 1182 if cleared, err := r.deletePVCs(reqCtx, repo); err != nil { 1183 return err 1184 } else if !cleared { 1185 _ = updateCondition(reqCtx.Ctx, r.Client, repo, ConditionTypeDerivedObjectsDeleted, 1186 metav1.ConditionFalse, ReasonHaveResidualPVCs, 1187 "maybe the derived PVCs are still in use") 1188 return fmt.Errorf("derived PVCs are still in use") 1189 } 1190 1191 // delete derived storage classes 1192 if err := r.deleteStorageClasses(reqCtx, repo); err != nil { 1193 return err 1194 } 1195 1196 // delete derived secrets (secret for CSI and tool configs) 1197 if err := r.deleteSecrets(reqCtx, repo); err != nil { 1198 return err 1199 } 1200 1201 // update condition status 1202 err := updateCondition(reqCtx.Ctx, r.Client, repo, ConditionTypeDerivedObjectsDeleted, 1203 metav1.ConditionTrue, ReasonDerivedObjectsDeleted, "") 1204 if err != nil { 1205 return fmt.Errorf("failed to update condition: %w", err) 1206 } 1207 1208 // maintain mappers 1209 r.secretRefMapper.removeRef(repo) 1210 r.providerRefMapper.removeRef(repo) 1211 1212 return nil 1213 } 1214 1215 func (r *BackupRepoReconciler) deleteJobs(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error { 1216 jobList := &batchv1.JobList{} 1217 if err := r.Client.List(reqCtx.Ctx, jobList, 1218 client.MatchingLabels(map[string]string{ 1219 dataProtectionBackupRepoKey: repo.Name, 1220 })); err != nil { 1221 return fmt.Errorf("failed to list Jobs: %w", err) 1222 } 1223 1224 for _, job := range jobList.Items { 1225 if !isOwned(repo, &job) { 1226 continue 1227 } 1228 reqCtx.Log.Info("deleting job", "name", job.Name, "namespace", job.Namespace) 1229 if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, &job); err != nil { 1230 return err 1231 } 1232 } 1233 return nil 1234 } 1235 1236 func (r *BackupRepoReconciler) deletePVCs(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) (cleared bool, err error) { 1237 pvcList := &corev1.PersistentVolumeClaimList{} 1238 if err := r.Client.List(reqCtx.Ctx, pvcList, 1239 client.MatchingLabels(map[string]string{ 1240 dataProtectionBackupRepoKey: repo.Name, 1241 })); err != nil { 1242 return false, fmt.Errorf("failed to list PVCs: %w", err) 1243 } 1244 1245 for _, pvc := range pvcList.Items { 1246 if !isOwned(repo, &pvc) { 1247 continue 1248 } 1249 reqCtx.Log.Info("deleting PVC", "name", pvc.Name, "namespace", pvc.Namespace) 1250 if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, &pvc); err != nil { 1251 return false, err 1252 } 1253 } 1254 // make sure all derived PVCs are deleted 1255 cleared = true 1256 for _, pvc := range pvcList.Items { 1257 if !isOwned(repo, &pvc) { 1258 continue 1259 } 1260 err = r.Client.Get(reqCtx.Ctx, client.ObjectKeyFromObject(&pvc), &corev1.PersistentVolumeClaim{}) 1261 if !apierrors.IsNotFound(err) { 1262 cleared = false 1263 break 1264 } 1265 } 1266 return cleared, nil 1267 } 1268 1269 func (r *BackupRepoReconciler) deleteStorageClasses(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error { 1270 scList := &storagev1.StorageClassList{} 1271 if err := r.Client.List(reqCtx.Ctx, scList, 1272 client.MatchingLabels(map[string]string{ 1273 dataProtectionBackupRepoKey: repo.Name, 1274 })); err != nil { 1275 return fmt.Errorf("failed to list StorageClasses: %w", err) 1276 } 1277 1278 for _, sc := range scList.Items { 1279 if !isOwned(repo, &sc) { 1280 continue 1281 } 1282 reqCtx.Log.Info("deleting StorageClass", "storageclass", sc.Name) 1283 if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, &sc); err != nil { 1284 return err 1285 } 1286 } 1287 return nil 1288 } 1289 1290 func (r *BackupRepoReconciler) deleteSecrets(reqCtx intctrlutil.RequestCtx, repo *dpv1alpha1.BackupRepo) error { 1291 secretList := &corev1.SecretList{} 1292 if err := r.Client.List(reqCtx.Ctx, secretList, 1293 client.MatchingLabels(map[string]string{ 1294 dataProtectionBackupRepoKey: repo.Name, 1295 })); err != nil { 1296 return fmt.Errorf("failed to list Secret: %w", err) 1297 } 1298 1299 for _, secret := range secretList.Items { 1300 if !isOwned(repo, &secret) { 1301 continue 1302 } 1303 reqCtx.Log.Info("deleting Secret", "secret", client.ObjectKeyFromObject(&secret)) 1304 if err := intctrlutil.BackgroundDeleteObject(r.Client, reqCtx.Ctx, &secret); err != nil { 1305 return err 1306 } 1307 } 1308 return nil 1309 } 1310 1311 func (r *BackupRepoReconciler) mapBackupToRepo(ctx context.Context, obj client.Object) []ctrl.Request { 1312 backup := obj.(*dpv1alpha1.Backup) 1313 repoName, ok := backup.Labels[dataProtectionBackupRepoKey] 1314 if !ok { 1315 return nil 1316 } 1317 // ignore failed backups 1318 if backup.Status.Phase == dpv1alpha1.BackupPhaseFailed { 1319 return nil 1320 } 1321 // we should reconcile the BackupRepo when: 1322 // 1. the Backup needs to use the BackupRepo, but it's not ready for the namespace. 1323 // 2. the Backup is being deleted, because it may block the deletion of the BackupRepo. 1324 shouldReconcileRepo := backup.Labels[dataProtectionWaitRepoPreparationKey] == trueVal || 1325 !backup.DeletionTimestamp.IsZero() 1326 if shouldReconcileRepo { 1327 return []ctrl.Request{{ 1328 NamespacedName: client.ObjectKey{Name: repoName}, 1329 }} 1330 } 1331 return nil 1332 } 1333 1334 func (r *BackupRepoReconciler) mapProviderToRepos(ctx context.Context, obj client.Object) []ctrl.Request { 1335 return r.providerRefMapper.mapToRequests(obj) 1336 } 1337 1338 func (r *BackupRepoReconciler) mapSecretToRepos(ctx context.Context, obj client.Object) []ctrl.Request { 1339 // check if the secret is created by this controller 1340 owner := metav1.GetControllerOf(obj) 1341 if owner != nil { 1342 apiGVStr := dpv1alpha1.GroupVersion.String() 1343 if owner.APIVersion == apiGVStr && owner.Kind == "BackupRepo" { 1344 return []ctrl.Request{{ 1345 NamespacedName: types.NamespacedName{ 1346 Name: owner.Name, 1347 Namespace: obj.GetNamespace(), 1348 }, 1349 }} 1350 } 1351 } 1352 1353 // get repos which is referencing this secret 1354 return r.secretRefMapper.mapToRequests(obj) 1355 } 1356 1357 // SetupWithManager sets up the controller with the Manager. 1358 func (r *BackupRepoReconciler) SetupWithManager(mgr ctrl.Manager) error { 1359 if err := mgr.GetFieldIndexer().IndexField(context.Background(), &corev1.Event{}, "involvedObject.uid", func(rawObj client.Object) []string { 1360 event := rawObj.(*corev1.Event) 1361 return []string{string(event.InvolvedObject.UID)} 1362 }); err != nil { 1363 return err 1364 } 1365 return ctrl.NewControllerManagedBy(mgr). 1366 For(&dpv1alpha1.BackupRepo{}). 1367 Watches(&storagev1alpha1.StorageProvider{}, handler.EnqueueRequestsFromMapFunc(r.mapProviderToRepos)). 1368 Watches(&dpv1alpha1.Backup{}, handler.EnqueueRequestsFromMapFunc(r.mapBackupToRepo)). 1369 Watches(&corev1.Secret{}, handler.EnqueueRequestsFromMapFunc(r.mapSecretToRepos)). 1370 Owns(&storagev1.StorageClass{}). 1371 Owns(&corev1.PersistentVolumeClaim{}). 1372 Owns(&batchv1.Job{}). 1373 Complete(r) 1374 } 1375 1376 // ============================================================================ 1377 // helper functions 1378 // ============================================================================ 1379 1380 // dependencyError indicates that the error itself cannot be resolved 1381 // unless the dependent object is updated. 1382 type dependencyError struct { 1383 msg string 1384 } 1385 1386 func (e *dependencyError) Error() string { 1387 return e.msg 1388 } 1389 1390 func newDependencyError(msg string) error { 1391 return &dependencyError{msg: msg} 1392 } 1393 1394 func isDependencyError(err error) bool { 1395 de, ok := err.(*dependencyError) 1396 return ok || errors.As(err, &de) 1397 } 1398 1399 func checkedRequeueWithError(err error, logger logr.Logger, msg string, keysAndValues ...interface{}) (reconcile.Result, error) { 1400 if re, ok := err.(intctrlutil.RequeueError); ok { 1401 return intctrlutil.RequeueAfter(re.RequeueAfter(), logger, re.Reason()) 1402 } 1403 if apierrors.IsNotFound(err) || isDependencyError(err) { 1404 return intctrlutil.Reconciled() 1405 } 1406 return intctrlutil.RequeueWithError(err, logger, msg, keysAndValues...) 1407 } 1408 1409 type renderContext struct { 1410 Parameters map[string]string 1411 CSIDriverSecretRef corev1.SecretReference 1412 GeneratedStorageClassName string 1413 } 1414 1415 func renderTemplate(name, tpl string, rCtx renderContext) (string, error) { 1416 fmap := sprig.TxtFuncMap() 1417 t, err := template.New(name).Funcs(fmap).Parse(tpl) 1418 if err != nil { 1419 return "", err 1420 } 1421 var b bytes.Buffer 1422 err = t.Execute(&b, rCtx) 1423 return b.String(), err 1424 } 1425 1426 func createOrUpdateObject[T any, PT generics.PObject[T]]( 1427 ctx context.Context, 1428 c client.Client, 1429 obj PT, 1430 mutateFunc func() error, 1431 shouldUpdate func() bool) (created bool, err error) { 1432 key := client.ObjectKeyFromObject(obj) 1433 err = c.Get(ctx, key, obj) 1434 if err != nil && !apierrors.IsNotFound(err) { 1435 return false, fmt.Errorf("failed to check existence of object %s: %w", key, err) 1436 } 1437 var patch client.Patch 1438 if err == nil { 1439 // object already exists, check if it needs to be updated 1440 if !shouldUpdate() { 1441 return false, nil 1442 } 1443 patch = client.MergeFrom(PT(obj.DeepCopy())) 1444 } 1445 if mutateFunc != nil { 1446 err := mutateFunc() 1447 if err != nil { 1448 return false, err 1449 } 1450 } 1451 if patch != nil { 1452 err = c.Patch(ctx, obj, patch) 1453 if err != nil { 1454 err = fmt.Errorf("failed to patch object %s: %w", key, err) 1455 } 1456 return false, err 1457 } else { 1458 err = c.Create(ctx, obj) 1459 if err != nil { 1460 return false, fmt.Errorf("failed to create object %s: %w", key, err) 1461 } 1462 return true, nil 1463 } 1464 } 1465 1466 func createObjectIfNotExist[T any, PT generics.PObject[T]]( 1467 ctx context.Context, 1468 c client.Client, 1469 obj PT, 1470 mutateFunc func() error) (created bool, err error) { 1471 noUpdate := func() bool { return false } 1472 return createOrUpdateObject(ctx, c, obj, mutateFunc, noUpdate) 1473 } 1474 1475 func setCondition( 1476 repo *dpv1alpha1.BackupRepo, condType string, status metav1.ConditionStatus, 1477 reason string, message string) { 1478 cond := metav1.Condition{ 1479 Type: condType, 1480 Status: status, 1481 ObservedGeneration: repo.Generation, 1482 LastTransitionTime: metav1.Now(), 1483 Reason: reason, 1484 Message: message, 1485 } 1486 meta.SetStatusCondition(&repo.Status.Conditions, cond) 1487 } 1488 1489 func updateCondition( 1490 ctx context.Context, c client.Client, repo *dpv1alpha1.BackupRepo, 1491 condType string, status metav1.ConditionStatus, reason string, message string) error { 1492 cond := meta.FindStatusCondition(repo.Status.Conditions, condType) 1493 if cond != nil { 1494 // skip 1495 if cond.Status == status && cond.Reason == reason && cond.Message == message { 1496 return nil 1497 } 1498 } 1499 patch := client.MergeFrom(repo.DeepCopy()) 1500 setCondition(repo, condType, status, reason, message) 1501 return c.Status().Patch(ctx, repo, patch) 1502 } 1503 1504 func updateAnnotations(ctx context.Context, c client.Client, 1505 repo *dpv1alpha1.BackupRepo, annotations map[string]string) error { 1506 patch := client.MergeFrom(repo.DeepCopy()) 1507 if repo.Annotations == nil { 1508 repo.Annotations = make(map[string]string) 1509 } 1510 updated := false 1511 for k, v := range annotations { 1512 if curr, ok := repo.Annotations[k]; !ok || curr != v { 1513 repo.Annotations[k] = v 1514 updated = true 1515 } 1516 } 1517 if !updated { 1518 return nil 1519 } 1520 return c.Patch(ctx, repo, patch) 1521 } 1522 1523 func md5Digest(s string) string { 1524 h := md5.New() 1525 h.Write([]byte(s)) 1526 return hex.EncodeToString(h.Sum(nil)) 1527 } 1528 1529 func stableSerializeMap(m map[string]string) string { 1530 keys := make([]string, 0, len(m)) 1531 for k := range m { 1532 keys = append(keys, k) 1533 } 1534 sort.Strings(keys) 1535 sb := strings.Builder{} 1536 for _, k := range keys { 1537 sb.WriteString(k) 1538 sb.WriteByte('=') 1539 sb.WriteString(m[k]) 1540 sb.WriteByte(';') 1541 } 1542 return sb.String() 1543 } 1544 1545 func isOwned(owner client.Object, dependent client.Object) bool { 1546 ownerUID := owner.GetUID() 1547 for _, ref := range dependent.GetOwnerReferences() { 1548 if ref.UID == ownerUID { 1549 return true 1550 } 1551 } 1552 return false 1553 } 1554 1555 func randomNameForDerivedObject(repo *dpv1alpha1.BackupRepo, prefix string) string { 1556 // the final name should not exceed 63 characters 1557 const maxBaseNameLength = 56 1558 baseName := fmt.Sprintf("%s-%s", prefix, repo.Name) 1559 if len(baseName) > maxBaseNameLength { 1560 baseName = baseName[:maxBaseNameLength] 1561 } 1562 return baseName + "-" + rand.String(6) 1563 } 1564 1565 func cutName(name string) string { 1566 if len(name) > 63 { 1567 return name[:63] 1568 } 1569 return name 1570 } 1571 1572 // this method requires the corresponding field index to be added to the Manager 1573 func fetchObjectEvents(ctx context.Context, cli client.Client, object client.Object) (*corev1.EventList, error) { 1574 eventList := &corev1.EventList{} 1575 err := cli.List(ctx, eventList, client.MatchingFields{ 1576 "involvedObject.uid": string(object.GetUID()), 1577 }) 1578 if err != nil { 1579 return nil, err 1580 } 1581 return eventList, nil 1582 }