github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/dataprotection/restore_controller.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package dataprotection 21 22 import ( 23 "context" 24 "fmt" 25 "reflect" 26 "time" 27 28 batchv1 "k8s.io/api/batch/v1" 29 corev1 "k8s.io/api/core/v1" 30 "k8s.io/apimachinery/pkg/api/meta" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 "k8s.io/apimachinery/pkg/runtime" 33 "k8s.io/client-go/tools/record" 34 "k8s.io/klog/v2" 35 ctrl "sigs.k8s.io/controller-runtime" 36 "sigs.k8s.io/controller-runtime/pkg/client" 37 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 38 "sigs.k8s.io/controller-runtime/pkg/log" 39 40 dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1" 41 "github.com/1aal/kubeblocks/pkg/constant" 42 intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil" 43 dprestore "github.com/1aal/kubeblocks/pkg/dataprotection/restore" 44 dptypes "github.com/1aal/kubeblocks/pkg/dataprotection/types" 45 ) 46 47 // RestoreReconciler reconciles a Restore object 48 type RestoreReconciler struct { 49 client.Client 50 Scheme *runtime.Scheme 51 Recorder record.EventRecorder 52 } 53 54 // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=restores,verbs=get;list;watch;create;update;patch;delete 55 // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=restores/status,verbs=get;update;patch 56 // +kubebuilder:rbac:groups=dataprotection.kubeblocks.io,resources=restores/finalizers,verbs=update 57 // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete 58 59 // Reconcile is part of the main kubernetes reconciliation loop which aims to 60 // move the current state of the cluster closer to the desired state. 61 // 62 // For more details, check Reconcile and its Result here: 63 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile 64 func (r *RestoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 65 reqCtx := intctrlutil.RequestCtx{ 66 Ctx: ctx, 67 Req: req, 68 Log: log.FromContext(ctx).WithValues("backup", req.NamespacedName), 69 Recorder: r.Recorder, 70 } 71 72 // Get restore CR 73 restore := &dpv1alpha1.Restore{} 74 if err := r.Client.Get(reqCtx.Ctx, reqCtx.Req.NamespacedName, restore); err != nil { 75 return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "") 76 } 77 78 // handle finalizer 79 res, err := intctrlutil.HandleCRDeletion(reqCtx, r, restore, dptypes.DataProtectionFinalizerName, func() (*ctrl.Result, error) { 80 return nil, r.deleteExternalResources(reqCtx, restore) 81 }) 82 if res != nil { 83 return *res, err 84 } 85 86 switch restore.Status.Phase { 87 case "": 88 return r.newAction(reqCtx, restore) 89 case dpv1alpha1.RestorePhaseRunning: 90 return r.inProgressAction(reqCtx, restore) 91 } 92 return intctrlutil.Reconciled() 93 } 94 95 // SetupWithManager sets up the controller with the Manager. 96 func (r *RestoreReconciler) SetupWithManager(mgr ctrl.Manager) error { 97 return ctrl.NewControllerManagedBy(mgr). 98 For(&dpv1alpha1.Restore{}). 99 Owns(&batchv1.Job{}). 100 Complete(r) 101 } 102 103 func (r *RestoreReconciler) deleteExternalResources(reqCtx intctrlutil.RequestCtx, restore *dpv1alpha1.Restore) error { 104 jobs := &batchv1.JobList{} 105 if err := r.Client.List(reqCtx.Ctx, jobs, 106 client.InNamespace(restore.Namespace), 107 client.MatchingLabels(dprestore.BuildRestoreLabels(restore.Name))); err != nil { 108 return client.IgnoreNotFound(err) 109 } 110 for i := range jobs.Items { 111 job := &jobs.Items[i] 112 if controllerutil.ContainsFinalizer(job, dptypes.DataProtectionFinalizerName) { 113 patch := client.MergeFrom(job.DeepCopy()) 114 controllerutil.RemoveFinalizer(job, dptypes.DataProtectionFinalizerName) 115 if err := r.Patch(reqCtx.Ctx, job, patch); err != nil { 116 return err 117 } 118 } 119 } 120 return nil 121 } 122 123 func (r *RestoreReconciler) newAction(reqCtx intctrlutil.RequestCtx, restore *dpv1alpha1.Restore) (ctrl.Result, error) { 124 oldRestore := restore.DeepCopy() 125 patch := client.MergeFrom(oldRestore) 126 // patch metaObject 127 if restore.Labels == nil { 128 restore.Labels = map[string]string{} 129 } 130 restore.Labels[constant.AppManagedByLabelKey] = constant.AppName 131 if !reflect.DeepEqual(restore.ObjectMeta, oldRestore.ObjectMeta) { 132 if err := r.Client.Patch(reqCtx.Ctx, restore, patch); err != nil { 133 return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "") 134 } 135 return intctrlutil.Reconciled() 136 } 137 if restore.Spec.PrepareDataConfig != nil && restore.Spec.PrepareDataConfig.DataSourceRef != nil { 138 restore.Status.Phase = dpv1alpha1.RestorePhaseAsDataSource 139 } else { 140 // patch status 141 restore.Status.StartTimestamp = &metav1.Time{Time: time.Now()} 142 restore.Status.Phase = dpv1alpha1.RestorePhaseRunning 143 r.Recorder.Event(restore, corev1.EventTypeNormal, dprestore.ReasonRestoreStarting, "start to restore") 144 } 145 if err := r.Client.Status().Patch(reqCtx.Ctx, restore, patch); err != nil { 146 return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "") 147 } 148 return intctrlutil.Reconciled() 149 } 150 151 func (r *RestoreReconciler) inProgressAction(reqCtx intctrlutil.RequestCtx, restore *dpv1alpha1.Restore) (ctrl.Result, error) { 152 restoreMgr := dprestore.NewRestoreManager(restore, r.Recorder, r.Scheme) 153 // handle restore actions 154 err := r.handleRestoreActions(reqCtx, restoreMgr) 155 if intctrlutil.IsTargetError(err, intctrlutil.ErrorTypeFatal) { 156 // set restore phase to failed if the error is fatal. 157 restoreMgr.Restore.Status.Phase = dpv1alpha1.RestorePhaseFailed 158 restoreMgr.Restore.Status.CompletionTimestamp = &metav1.Time{Time: time.Now()} 159 restoreMgr.Restore.Status.Duration = dprestore.GetRestoreDuration(restoreMgr.Restore.Status) 160 r.Recorder.Event(restore, corev1.EventTypeWarning, dprestore.ReasonRestoreFailed, err.Error()) 161 err = nil 162 } 163 // patch restore status if changes occur 164 if !reflect.DeepEqual(restoreMgr.OriginalRestore.Status, restoreMgr.Restore.Status) { 165 err = r.Client.Status().Patch(reqCtx.Ctx, restoreMgr.Restore, client.MergeFrom(restoreMgr.OriginalRestore)) 166 } 167 if err != nil { 168 r.Recorder.Event(restore, corev1.EventTypeWarning, corev1.EventTypeWarning, err.Error()) 169 return intctrlutil.RequeueWithError(err, reqCtx.Log, "") 170 } 171 return intctrlutil.Reconciled() 172 } 173 174 func (r *RestoreReconciler) handleRestoreActions(reqCtx intctrlutil.RequestCtx, restoreMgr *dprestore.RestoreManager) error { 175 // 1. validate if the restore.spec is valid and build restore manager. 176 if err := r.validateAndBuildMGR(reqCtx, restoreMgr); err != nil { 177 return err 178 } 179 180 // 2. handle the prepareData stage. 181 isCompleted, err := r.prepareData(reqCtx, restoreMgr) 182 if err != nil { 183 return err 184 } 185 // if prepareData is not completed, return 186 if !isCompleted { 187 return nil 188 } 189 // 3. handle the postReady stage. 190 isCompleted, err = r.postReady(reqCtx, restoreMgr) 191 if err != nil { 192 return err 193 } 194 if isCompleted { 195 restoreMgr.Restore.Status.Phase = dpv1alpha1.RestorePhaseCompleted 196 restoreMgr.Restore.Status.CompletionTimestamp = &metav1.Time{Time: time.Now()} 197 restoreMgr.Restore.Status.Duration = dprestore.GetRestoreDuration(restoreMgr.Restore.Status) 198 r.Recorder.Event(restoreMgr.Restore, corev1.EventTypeNormal, dprestore.ReasonRestoreCompleted, "restore completed.") 199 } 200 return nil 201 } 202 203 // validateAndBuildMGR validates the spec is valid to restore. if ok, build a manager for restoring. 204 func (r *RestoreReconciler) validateAndBuildMGR(reqCtx intctrlutil.RequestCtx, restoreMgr *dprestore.RestoreManager) (err error) { 205 defer func() { 206 if err == nil { 207 dprestore.SetRestoreValidationCondition(restoreMgr.Restore, dprestore.ReasonValidateSuccessfully, "validate restore spec successfully") 208 } else if intctrlutil.IsTargetError(err, intctrlutil.ErrorTypeFatal) { 209 dprestore.SetRestoreValidationCondition(restoreMgr.Restore, dprestore.ReasonValidateFailed, err.Error()) 210 r.Recorder.Event(restoreMgr.Restore, corev1.EventTypeWarning, dprestore.ReasonValidateFailed, err.Error()) 211 } 212 }() 213 214 err = dprestore.ValidateAndInitRestoreMGR(reqCtx, r.Client, r.Recorder, restoreMgr) 215 return err 216 } 217 218 // prepareData handles the prepareData stage of the backups. 219 func (r *RestoreReconciler) prepareData(reqCtx intctrlutil.RequestCtx, restoreMgr *dprestore.RestoreManager) (bool, error) { 220 if len(restoreMgr.PrepareDataBackupSets) == 0 { 221 return true, nil 222 } 223 prepareDataConfig := restoreMgr.Restore.Spec.PrepareDataConfig 224 if prepareDataConfig == nil || (prepareDataConfig.RestoreVolumeClaimsTemplate == nil && len(prepareDataConfig.RestoreVolumeClaims) == 0) { 225 return true, nil 226 } 227 if meta.IsStatusConditionTrue(restoreMgr.Restore.Status.Conditions, dprestore.ConditionTypeRestorePreparedData) { 228 return true, nil 229 } 230 var ( 231 err error 232 isCompleted bool 233 ) 234 defer func() { 235 r.handleRestoreStageError(restoreMgr.Restore, dpv1alpha1.PrepareData, err) 236 }() 237 // set processing prepare data condition 238 dprestore.SetRestoreStageCondition(restoreMgr.Restore, dpv1alpha1.PrepareData, dprestore.ReasonProcessing, "processing prepareData stage.") 239 for i, v := range restoreMgr.PrepareDataBackupSets { 240 isCompleted, err = r.handleBackupActionSet(reqCtx, restoreMgr, v, dpv1alpha1.PrepareData, i) 241 if err != nil { 242 return false, err 243 } 244 // waiting for restore jobs finished. 245 if !isCompleted { 246 return false, nil 247 } 248 } 249 // set prepare data successfully condition 250 dprestore.SetRestoreStageCondition(restoreMgr.Restore, dpv1alpha1.PrepareData, dprestore.ReasonSucceed, "prepare data successfully") 251 return true, nil 252 } 253 254 func (r *RestoreReconciler) postReady(reqCtx intctrlutil.RequestCtx, restoreMgr *dprestore.RestoreManager) (bool, error) { 255 readyConfig := restoreMgr.Restore.Spec.ReadyConfig 256 if len(restoreMgr.PostReadyBackupSets) == 0 || readyConfig == nil { 257 return true, nil 258 } 259 if meta.IsStatusConditionTrue(restoreMgr.Restore.Status.Conditions, dprestore.ConditionTypeRestorePostReady) { 260 return true, nil 261 } 262 dprestore.SetRestoreStageCondition(restoreMgr.Restore, dpv1alpha1.PostReady, dprestore.ReasonProcessing, "processing postReady stage") 263 var ( 264 err error 265 isCompleted bool 266 ) 267 defer func() { 268 r.handleRestoreStageError(restoreMgr.Restore, dpv1alpha1.PrepareData, err) 269 }() 270 if readyConfig.ReadinessProbe != nil && !meta.IsStatusConditionTrue(restoreMgr.Restore.Status.Conditions, dprestore.ConditionTypeReadinessProbe) { 271 // TODO: check readiness probe, use a job and kubectl exec? 272 _ = klog.TODO() 273 } 274 for _, v := range restoreMgr.PostReadyBackupSets { 275 // handle postReady actions 276 for i := range v.ActionSet.Spec.Restore.PostReady { 277 isCompleted, err = r.handleBackupActionSet(reqCtx, restoreMgr, v, dpv1alpha1.PostReady, i) 278 if err != nil { 279 return false, err 280 } 281 // waiting for restore jobs finished. 282 if !isCompleted { 283 return false, nil 284 } 285 } 286 } 287 dprestore.SetRestoreStageCondition(restoreMgr.Restore, dpv1alpha1.PostReady, dprestore.ReasonSucceed, "processing postReady stage successfully") 288 return true, nil 289 } 290 291 func (r *RestoreReconciler) handleBackupActionSet(reqCtx intctrlutil.RequestCtx, 292 restoreMgr *dprestore.RestoreManager, 293 backupSet dprestore.BackupActionSet, 294 stage dpv1alpha1.RestoreStage, 295 step int) (bool, error) { 296 handleFailed := func(restore *dpv1alpha1.Restore, backupName string) error { 297 errorMsg := fmt.Sprintf(`restore failed for backup "%s", more information can be found in status.actions.%s`, backupName, stage) 298 dprestore.SetRestoreStageCondition(restore, stage, dprestore.ReasonFailed, errorMsg) 299 return intctrlutil.NewFatalError(errorMsg) 300 } 301 302 checkIsCompleted := func(allActionsFinished, existFailedAction bool) (bool, error) { 303 if !allActionsFinished { 304 return false, nil 305 } 306 if existFailedAction { 307 return true, handleFailed(restoreMgr.Restore, backupSet.Backup.Name) 308 } 309 return true, nil 310 } 311 312 actionName := fmt.Sprintf("%s-%d", stage, step) 313 // 1. check if the restore actions are completed from status.actions firstly. 314 allActionsFinished, existFailedAction := restoreMgr.AnalysisRestoreActionsWithBackup(stage, backupSet.Backup.Name, actionName) 315 isCompleted, err := checkIsCompleted(allActionsFinished, existFailedAction) 316 if isCompleted || err != nil { 317 return isCompleted, err 318 } 319 320 var jobs []*batchv1.Job 321 switch stage { 322 case dpv1alpha1.PrepareData: 323 if backupSet.UseVolumeSnapshot { 324 if err = restoreMgr.RestorePVCFromSnapshot(reqCtx, r.Client, backupSet); err != nil { 325 return false, nil 326 } 327 } 328 jobs, err = restoreMgr.BuildPrepareDataJobs(reqCtx, r.Client, backupSet, actionName) 329 case dpv1alpha1.PostReady: 330 // 2. build jobs for postReady action 331 jobs, err = restoreMgr.BuildPostReadyActionJobs(reqCtx, r.Client, backupSet, step) 332 } 333 if err != nil { 334 return false, err 335 } 336 if len(jobs) == 0 { 337 return true, nil 338 } 339 // 3. create jobs 340 jobs, err = restoreMgr.CreateJobsIfNotExist(reqCtx, r.Client, restoreMgr.Restore, jobs) 341 if err != nil { 342 return false, err 343 } 344 345 // 4. check if jobs are finished. 346 allActionsFinished, existFailedAction = restoreMgr.CheckJobsDone(stage, actionName, backupSet, jobs) 347 if stage == dpv1alpha1.PrepareData { 348 // recalculation whether all actions have been completed. 349 restoreMgr.Recalculation(backupSet.Backup.Name, actionName, &allActionsFinished, &existFailedAction) 350 } 351 return checkIsCompleted(allActionsFinished, existFailedAction) 352 } 353 354 func (r *RestoreReconciler) handleRestoreStageError(restore *dpv1alpha1.Restore, stage dpv1alpha1.RestoreStage, err error) { 355 if intctrlutil.IsTargetError(err, intctrlutil.ErrorTypeFatal) { 356 condition := meta.FindStatusCondition(restore.Status.Conditions, dprestore.ConditionTypeRestorePreparedData) 357 if condition != nil && condition.Reason != dprestore.ReasonFailed { 358 dprestore.SetRestoreStageCondition(restore, stage, dprestore.ReasonFailed, err.Error()) 359 } 360 } 361 }