github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/pkg/dataprotection/restore/manager.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package restore 21 22 import ( 23 "fmt" 24 "sort" 25 26 vsv1 "github.com/kubernetes-csi/external-snapshotter/client/v6/apis/volumesnapshot/v1" 27 batchv1 "k8s.io/api/batch/v1" 28 corev1 "k8s.io/api/core/v1" 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/runtime" 32 "k8s.io/apimachinery/pkg/types" 33 "k8s.io/client-go/tools/record" 34 "sigs.k8s.io/controller-runtime/pkg/client" 35 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 36 37 dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1" 38 "github.com/1aal/kubeblocks/pkg/constant" 39 intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil" 40 "github.com/1aal/kubeblocks/pkg/dataprotection/utils" 41 "github.com/1aal/kubeblocks/pkg/dataprotection/utils/boolptr" 42 ) 43 44 type BackupActionSet struct { 45 Backup *dpv1alpha1.Backup 46 ActionSet *dpv1alpha1.ActionSet 47 UseVolumeSnapshot bool 48 } 49 50 type RestoreManager struct { 51 OriginalRestore *dpv1alpha1.Restore 52 Restore *dpv1alpha1.Restore 53 PrepareDataBackupSets []BackupActionSet 54 PostReadyBackupSets []BackupActionSet 55 Schema *runtime.Scheme 56 Recorder record.EventRecorder 57 } 58 59 func NewRestoreManager(restore *dpv1alpha1.Restore, recorder record.EventRecorder, schema *runtime.Scheme) *RestoreManager { 60 return &RestoreManager{ 61 OriginalRestore: restore.DeepCopy(), 62 Restore: restore, 63 PrepareDataBackupSets: []BackupActionSet{}, 64 PostReadyBackupSets: []BackupActionSet{}, 65 Schema: schema, 66 Recorder: recorder, 67 } 68 } 69 70 // GetBackupActionSetByNamespaced gets the BackupActionSet by name and namespace of backup. 71 func (r *RestoreManager) GetBackupActionSetByNamespaced(reqCtx intctrlutil.RequestCtx, 72 cli client.Client, 73 backupName, 74 namespace string) (*BackupActionSet, error) { 75 backup := &dpv1alpha1.Backup{} 76 if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: namespace, Name: backupName}, backup); err != nil { 77 if apierrors.IsNotFound(err) { 78 err = intctrlutil.NewFatalError(err.Error()) 79 } 80 return nil, err 81 } 82 backupMethod := backup.Status.BackupMethod 83 if backupMethod == nil { 84 return nil, intctrlutil.NewFatalError(fmt.Sprintf(`status.backupMethod of backup "%s" is empty`, backupName)) 85 } 86 useVolumeSnapshot := backupMethod.SnapshotVolumes != nil && *backupMethod.SnapshotVolumes 87 actionSet, err := utils.GetActionSetByName(reqCtx, cli, backup.Status.BackupMethod.ActionSetName) 88 if err != nil { 89 return nil, err 90 } 91 return &BackupActionSet{Backup: backup, ActionSet: actionSet, UseVolumeSnapshot: useVolumeSnapshot}, nil 92 } 93 94 // BuildDifferentialBackupActionSets builds the backupActionSets for specified incremental backup. 95 func (r *RestoreManager) BuildDifferentialBackupActionSets(reqCtx intctrlutil.RequestCtx, cli client.Client, sourceBackupSet BackupActionSet) error { 96 parentBackupSet, err := r.GetBackupActionSetByNamespaced(reqCtx, cli, sourceBackupSet.Backup.Spec.ParentBackupName, sourceBackupSet.Backup.Namespace) 97 if err != nil || parentBackupSet == nil { 98 return err 99 } 100 r.SetBackupSets(*parentBackupSet, sourceBackupSet) 101 return nil 102 } 103 104 // BuildIncrementalBackupActionSets builds the backupActionSets for specified incremental backup. 105 func (r *RestoreManager) BuildIncrementalBackupActionSets(reqCtx intctrlutil.RequestCtx, cli client.Client, sourceBackupSet BackupActionSet) error { 106 r.SetBackupSets(sourceBackupSet) 107 if sourceBackupSet.ActionSet != nil && sourceBackupSet.ActionSet.Spec.BackupType == dpv1alpha1.BackupTypeIncremental { 108 // get the parent BackupActionSet for incremental. 109 backupSet, err := r.GetBackupActionSetByNamespaced(reqCtx, cli, sourceBackupSet.Backup.Spec.ParentBackupName, sourceBackupSet.Backup.Namespace) 110 if err != nil || backupSet == nil { 111 return err 112 } 113 return r.BuildIncrementalBackupActionSets(reqCtx, cli, *backupSet) 114 } 115 // if reaches full backup, sort the BackupActionSets and return 116 sortBackupSets := func(backupSets []BackupActionSet, reverse bool) []BackupActionSet { 117 sort.Slice(backupSets, func(i, j int) bool { 118 if reverse { 119 i, j = j, i 120 } 121 backupI := backupSets[i].Backup 122 backupJ := backupSets[j].Backup 123 if backupI == nil { 124 return false 125 } 126 if backupJ == nil { 127 return true 128 } 129 return compareWithBackupStopTime(*backupI, *backupJ) 130 }) 131 return backupSets 132 } 133 r.PrepareDataBackupSets = sortBackupSets(r.PrepareDataBackupSets, false) 134 r.PostReadyBackupSets = sortBackupSets(r.PostReadyBackupSets, false) 135 return nil 136 } 137 138 func (r *RestoreManager) SetBackupSets(backupSets ...BackupActionSet) { 139 for i := range backupSets { 140 if backupSets[i].UseVolumeSnapshot { 141 r.PrepareDataBackupSets = append(r.PrepareDataBackupSets, backupSets[i]) 142 continue 143 } 144 if backupSets[i].ActionSet == nil || backupSets[i].ActionSet.Spec.Restore == nil { 145 continue 146 } 147 if backupSets[i].ActionSet.Spec.Restore.PrepareData != nil { 148 r.PrepareDataBackupSets = append(r.PrepareDataBackupSets, backupSets[i]) 149 } 150 151 if len(backupSets[i].ActionSet.Spec.Restore.PostReady) > 0 { 152 r.PostReadyBackupSets = append(r.PostReadyBackupSets, backupSets[i]) 153 } 154 } 155 } 156 157 // AnalysisRestoreActionsWithBackup analysis the restore actions progress group by backup. 158 // check if the restore jobs are completed or failed or processing. 159 func (r *RestoreManager) AnalysisRestoreActionsWithBackup(stage dpv1alpha1.RestoreStage, backupName string, actionName string) (bool, bool) { 160 var ( 161 restoreActionCount int 162 finishedActionCount int 163 existFailedAction bool 164 ) 165 restoreActions := r.Restore.Status.Actions.PostReady 166 if stage == dpv1alpha1.PrepareData { 167 restoreActions = r.Restore.Status.Actions.PrepareData 168 // if the stage is prepareData, actionCount keeps up with pvc count. 169 restoreActionCount = GetRestoreActionsCountForPrepareData(r.Restore.Spec.PrepareDataConfig) 170 } 171 for i := range restoreActions { 172 if restoreActions[i].BackupName != backupName || restoreActions[i].Name != actionName { 173 continue 174 } 175 // if the stage is PostReady, actionCount keeps up with actions 176 if stage == dpv1alpha1.PostReady { 177 restoreActionCount += 1 178 } 179 switch restoreActions[i].Status { 180 case dpv1alpha1.RestoreActionFailed: 181 finishedActionCount += 1 182 existFailedAction = true 183 case dpv1alpha1.RestoreActionCompleted: 184 finishedActionCount += 1 185 } 186 } 187 188 allActionsFinished := restoreActionCount > 0 && finishedActionCount == restoreActionCount 189 return allActionsFinished, existFailedAction 190 } 191 192 func (r *RestoreManager) RestorePVCFromSnapshot(reqCtx intctrlutil.RequestCtx, cli client.Client, backupSet BackupActionSet) error { 193 prepareDataConfig := r.Restore.Spec.PrepareDataConfig 194 if prepareDataConfig == nil { 195 return nil 196 } 197 createPVCWithSnapshot := func(claim dpv1alpha1.RestoreVolumeClaim) error { 198 if claim.VolumeSource == "" { 199 return intctrlutil.NewFatalError(fmt.Sprintf(`claim "%s"" volumeSource can not be empty if the backup uses volume snapshot`, claim.Name)) 200 } 201 202 // TODO: compatibility handling for version 0.6/0.5, will be removed in 0.8. 203 volumeSnapshotName := backupSet.Backup.Name 204 vsCli := &intctrlutil.VolumeSnapshotCompatClient{ 205 Client: cli, 206 Ctx: reqCtx.Ctx, 207 } 208 if exist, err := vsCli.CheckResourceExists(types.NamespacedName{Namespace: backupSet.Backup.Namespace, Name: volumeSnapshotName}, &vsv1.VolumeSnapshot{}); err != nil { 209 return err 210 } else if !exist { 211 volumeSnapshotName = utils.GetBackupVolumeSnapshotName(backupSet.Backup.Name, claim.VolumeSource) 212 } 213 // get volumeSnapshot by backup and volumeSource. 214 claim.VolumeClaimSpec.DataSource = &corev1.TypedLocalObjectReference{ 215 Name: volumeSnapshotName, 216 Kind: constant.VolumeSnapshotKind, 217 APIGroup: &VolumeSnapshotGroup, 218 } 219 return r.createPVCIfNotExist(reqCtx, cli, claim.ObjectMeta, claim.VolumeClaimSpec) 220 } 221 222 for _, claim := range prepareDataConfig.RestoreVolumeClaims { 223 if err := createPVCWithSnapshot(claim); err != nil { 224 return err 225 } 226 } 227 claimTemplate := prepareDataConfig.RestoreVolumeClaimsTemplate 228 229 if claimTemplate != nil { 230 restoreJobReplicas := GetRestoreActionsCountForPrepareData(prepareDataConfig) 231 for i := 0; i < restoreJobReplicas; i++ { 232 // create pvc from claims template, build volumes and volumeMounts 233 for _, claim := range prepareDataConfig.RestoreVolumeClaimsTemplate.Templates { 234 claim.Name = fmt.Sprintf("%s-%d", claim.Name, i+int(claimTemplate.StartingIndex)) 235 if err := createPVCWithSnapshot(claim); err != nil { 236 return err 237 } 238 } 239 } 240 } 241 // NOTE: do not to record status action for restoring from snapshot. it is not defined in ActionSet. 242 return nil 243 } 244 245 func (r *RestoreManager) prepareBackupRepo(reqCtx intctrlutil.RequestCtx, cli client.Client, backupSet BackupActionSet) (*dpv1alpha1.BackupRepo, error) { 246 if backupSet.Backup.Status.BackupRepoName != "" { 247 backupRepo := &dpv1alpha1.BackupRepo{} 248 err := cli.Get(reqCtx.Ctx, client.ObjectKey{Name: backupSet.Backup.Status.BackupRepoName}, backupRepo) 249 if err != nil { 250 if apierrors.IsNotFound(err) { 251 err = intctrlutil.NewFatalError(err.Error()) 252 } 253 return nil, err 254 } 255 return backupRepo, nil 256 } 257 return nil, nil 258 } 259 260 // BuildPrepareDataJobs builds the restore jobs for prepare pvc's data, and will create the target pvcs if not exist. 261 func (r *RestoreManager) BuildPrepareDataJobs(reqCtx intctrlutil.RequestCtx, cli client.Client, backupSet BackupActionSet, actionName string) ([]*batchv1.Job, error) { 262 prepareDataConfig := r.Restore.Spec.PrepareDataConfig 263 if prepareDataConfig == nil { 264 return nil, nil 265 } 266 if !backupSet.ActionSet.HasPrepareDataStage() { 267 return nil, nil 268 } 269 backupRepo, err := r.prepareBackupRepo(reqCtx, cli, backupSet) 270 if err != nil { 271 return nil, err 272 } 273 jobBuilder := newRestoreJobBuilder(r.Restore, backupSet, backupRepo, dpv1alpha1.PrepareData). 274 setImage(backupSet.ActionSet.Spec.Restore.PrepareData.Image). 275 setCommand(backupSet.ActionSet.Spec.Restore.PrepareData.Command). 276 addCommonEnv(). 277 attachBackupRepo() 278 279 createPVCIfNotExistsAndBuildVolume := func(claim dpv1alpha1.RestoreVolumeClaim, identifier string) (*corev1.Volume, *corev1.VolumeMount, error) { 280 if err := r.createPVCIfNotExist(reqCtx, cli, claim.ObjectMeta, claim.VolumeClaimSpec); err != nil { 281 return nil, nil, err 282 } 283 return jobBuilder.buildPVCVolumeAndMount(claim.VolumeConfig, claim.Name, identifier) 284 } 285 286 // create pvc from volumeClaims, set volume and volumeMount to jobBuilder 287 for _, claim := range prepareDataConfig.RestoreVolumeClaims { 288 volume, volumeMount, err := createPVCIfNotExistsAndBuildVolume(claim, "dp-claim") 289 if err != nil { 290 return nil, err 291 } 292 jobBuilder.addToCommonVolumesAndMounts(volume, volumeMount) 293 } 294 295 var ( 296 restoreJobs []*batchv1.Job 297 restoreJobReplicas = GetRestoreActionsCountForPrepareData(prepareDataConfig) 298 claimsTemplate = prepareDataConfig.RestoreVolumeClaimsTemplate 299 ) 300 301 if prepareDataConfig.IsSerialPolicy() { 302 // obtain the PVC serial number that needs to be restored 303 currentOrder := 1 304 prepareActions := r.Restore.Status.Actions.PrepareData 305 for i := range prepareActions { 306 if prepareActions[i].BackupName != backupSet.Backup.Name || prepareActions[i].Name != actionName { 307 continue 308 } 309 if prepareActions[i].Status == dpv1alpha1.RestoreActionCompleted && currentOrder < restoreJobReplicas { 310 currentOrder += 1 311 if prepareDataConfig.IsSerialPolicy() { 312 // if the restore policy is Serial, should delete the completed job to release the pvc. 313 if err := deleteRestoreJob(reqCtx, cli, prepareActions[i].ObjectKey, r.Restore.Namespace); err != nil { 314 return nil, err 315 } 316 } 317 } 318 } 319 restoreJobReplicas = currentOrder 320 } 321 322 // build restore job to prepare pvc's data 323 for i := 0; i < restoreJobReplicas; i++ { 324 // reset specific volumes and volumeMounts 325 jobBuilder.resetSpecificVolumesAndMounts() 326 if claimsTemplate != nil { 327 // create pvc from claims template, build volumes and volumeMounts 328 for _, claim := range claimsTemplate.Templates { 329 claim.Name = fmt.Sprintf("%s-%d", claim.Name, i+int(claimsTemplate.StartingIndex)) 330 volume, volumeMount, err := createPVCIfNotExistsAndBuildVolume(claim, "dp-claim-tpl") 331 if err != nil { 332 return nil, err 333 } 334 jobBuilder.addToSpecificVolumesAndMounts(volume, volumeMount) 335 } 336 } 337 // build job and append 338 job := jobBuilder.setJobName(jobBuilder.builderRestoreJobName(i)).build() 339 if prepareDataConfig.IsSerialPolicy() && 340 restoreJobHasCompleted(r.Restore.Status.Actions.PrepareData, job.Name) { 341 // if the job has completed and the restore policy is Serial, continue 342 continue 343 } 344 restoreJobs = append(restoreJobs, job) 345 } 346 return restoreJobs, nil 347 } 348 349 func (r *RestoreManager) BuildVolumePopulateJob( 350 reqCtx intctrlutil.RequestCtx, 351 cli client.Client, 352 backupSet BackupActionSet, 353 populatePVC *corev1.PersistentVolumeClaim, 354 index int) (*batchv1.Job, error) { 355 prepareDataConfig := r.Restore.Spec.PrepareDataConfig 356 if prepareDataConfig == nil && prepareDataConfig.DataSourceRef == nil { 357 return nil, nil 358 } 359 if !backupSet.ActionSet.HasPrepareDataStage() { 360 return nil, nil 361 } 362 backupRepo, err := r.prepareBackupRepo(reqCtx, cli, backupSet) 363 if err != nil { 364 return nil, err 365 } 366 jobBuilder := newRestoreJobBuilder(r.Restore, backupSet, backupRepo, dpv1alpha1.PrepareData). 367 setJobName(fmt.Sprintf("%s-%d", populatePVC.Name, index)). 368 addLabel(DataProtectionLabelPopulatePVCKey, populatePVC.Name). 369 setImage(backupSet.ActionSet.Spec.Restore.PrepareData.Image). 370 setCommand(backupSet.ActionSet.Spec.Restore.PrepareData.Command). 371 attachBackupRepo(). 372 addCommonEnv() 373 volume, volumeMount, err := jobBuilder.buildPVCVolumeAndMount(*prepareDataConfig.DataSourceRef, populatePVC.Name, "dp-claim") 374 if err != nil { 375 return nil, err 376 } 377 job := jobBuilder.addToSpecificVolumesAndMounts(volume, volumeMount).build() 378 return job, nil 379 } 380 381 // BuildPostReadyActionJobs builds the post ready jobs. 382 func (r *RestoreManager) BuildPostReadyActionJobs(reqCtx intctrlutil.RequestCtx, cli client.Client, backupSet BackupActionSet, step int) ([]*batchv1.Job, error) { 383 readyConfig := r.Restore.Spec.ReadyConfig 384 if readyConfig == nil { 385 return nil, nil 386 } 387 if !backupSet.ActionSet.HasPostReadyStage() { 388 return nil, nil 389 } 390 backupRepo, err := r.prepareBackupRepo(reqCtx, cli, backupSet) 391 if err != nil { 392 return nil, err 393 } 394 actionSpec := backupSet.ActionSet.Spec.Restore.PostReady[step] 395 getTargetPodList := func(labelSelector metav1.LabelSelector, msgKey string) ([]corev1.Pod, error) { 396 targetPodList, err := utils.GetPodListByLabelSelector(reqCtx, cli, labelSelector) 397 if err != nil { 398 return nil, err 399 } 400 if len(targetPodList.Items) == 0 { 401 return nil, fmt.Errorf("can not found any pod by spec.readyConfig.%s.target.podSelector", msgKey) 402 } 403 return targetPodList.Items, nil 404 } 405 406 jobBuilder := newRestoreJobBuilder(r.Restore, backupSet, backupRepo, dpv1alpha1.PostReady).addCommonEnv() 407 408 buildJobName := func(index int) string { 409 jobName := fmt.Sprintf("restore-post-ready-%s-%s-%d-%d", r.Restore.UID[:8], backupSet.Backup.Name, step, index) 410 return cutJobName(jobName) 411 } 412 413 buildJobsForJobAction := func() ([]*batchv1.Job, error) { 414 jobAction := r.Restore.Spec.ReadyConfig.JobAction 415 if jobAction == nil { 416 return nil, intctrlutil.NewFatalError("spec.readyConfig.jobAction can not be empty") 417 } 418 targetPodList, err := getTargetPodList(jobAction.Target.PodSelector, "jobAction") 419 if err != nil { 420 return nil, err 421 } 422 targetPod := targetPodList[0] 423 if boolptr.IsSetToTrue(actionSpec.Job.RunOnTargetPodNode) { 424 jobBuilder.setNodeNameToNodeSelector(targetPod.Spec.NodeName) 425 // mount the targe pod's volumes when RunOnTargetPodNode is true 426 for _, volumeMount := range jobAction.Target.VolumeMounts { 427 for _, volume := range targetPod.Spec.Volumes { 428 if volume.Name != volumeMount.Name { 429 continue 430 } 431 jobBuilder.addToSpecificVolumesAndMounts(&volume, &volumeMount) 432 } 433 } 434 } 435 job := jobBuilder.setImage(actionSpec.Job.Image). 436 setJobName(buildJobName(0)). 437 attachBackupRepo(). 438 setCommand(actionSpec.Job.Command). 439 setToleration(targetPod.Spec.Tolerations). 440 addTargetPodAndCredentialEnv(&targetPod, r.Restore.Spec.ReadyConfig.ConnectionCredential). 441 build() 442 return []*batchv1.Job{job}, nil 443 } 444 445 buildJobsForExecAction := func() ([]*batchv1.Job, error) { 446 execAction := r.Restore.Spec.ReadyConfig.ExecAction 447 if execAction == nil { 448 return nil, intctrlutil.NewFatalError("spec.readyConfig.execAction can not be empty") 449 } 450 targetPodList, err := getTargetPodList(execAction.Target.PodSelector, "execAction") 451 if err != nil { 452 return nil, err 453 } 454 var restoreJobs []*batchv1.Job 455 for i := range targetPodList { 456 containerName := actionSpec.Exec.Container 457 if containerName == "" { 458 containerName = targetPodList[i].Spec.Containers[0].Name 459 } 460 command := fmt.Sprintf("kubectl -n %s exec -it pod/%s -c %s -- %s", targetPodList[i].Namespace, targetPodList[i].Name, containerName, actionSpec.Exec.Command) 461 jobBuilder.setImage(constant.KBToolsImage).setCommand([]string{"sh", "-c", command}). 462 setJobName(buildJobName(i)). 463 setToleration(targetPodList[i].Spec.Tolerations). 464 addTargetPodAndCredentialEnv(&targetPodList[i], r.Restore.Spec.ReadyConfig.ConnectionCredential) 465 restoreJobs = append(restoreJobs, jobBuilder.build()) 466 } 467 return restoreJobs, nil 468 } 469 470 if actionSpec.Job != nil { 471 return buildJobsForJobAction() 472 } 473 return buildJobsForExecAction() 474 } 475 476 func (r *RestoreManager) createPVCIfNotExist( 477 reqCtx intctrlutil.RequestCtx, 478 cli client.Client, 479 claimMetadata metav1.ObjectMeta, 480 claimSpec corev1.PersistentVolumeClaimSpec) error { 481 claimMetadata.Namespace = reqCtx.Req.Namespace 482 pvc := &corev1.PersistentVolumeClaim{ 483 ObjectMeta: claimMetadata, 484 Spec: claimSpec, 485 } 486 tmpPVC := &corev1.PersistentVolumeClaim{} 487 if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Name: claimMetadata.Name, Namespace: claimMetadata.Namespace}, tmpPVC); err != nil { 488 if !apierrors.IsNotFound(err) { 489 return err 490 } 491 msg := fmt.Sprintf("created pvc %s/%s", pvc.Namespace, pvc.Name) 492 r.Recorder.Event(r.Restore, corev1.EventTypeNormal, reasonCreateRestorePVC, msg) 493 if err = cli.Create(reqCtx.Ctx, pvc); err != nil { 494 return client.IgnoreAlreadyExists(err) 495 } 496 } 497 return nil 498 } 499 500 // CreateJobsIfNotExist creates the jobs if not exist. 501 func (r *RestoreManager) CreateJobsIfNotExist(reqCtx intctrlutil.RequestCtx, 502 cli client.Client, 503 ownerObj client.Object, 504 objs []*batchv1.Job) ([]*batchv1.Job, error) { 505 // creates jobs if not exist 506 var fetchedJobs []*batchv1.Job 507 for i := range objs { 508 if objs[i] == nil { 509 continue 510 } 511 fetchedJob := &batchv1.Job{} 512 if err := cli.Get(reqCtx.Ctx, client.ObjectKeyFromObject(objs[i]), fetchedJob); err != nil { 513 if !apierrors.IsNotFound(err) { 514 return nil, err 515 } 516 if err = controllerutil.SetControllerReference(ownerObj, objs[i], r.Schema); err != nil { 517 return nil, err 518 } 519 if err = cli.Create(reqCtx.Ctx, objs[i]); err != nil && !apierrors.IsAlreadyExists(err) { 520 return nil, err 521 } 522 msg := fmt.Sprintf("created job %s/%s", objs[i].Namespace, objs[i].Name) 523 r.Recorder.Event(r.Restore, corev1.EventTypeNormal, reasonCreateRestoreJob, msg) 524 fetchedJobs = append(fetchedJobs, objs[i]) 525 } else { 526 fetchedJobs = append(fetchedJobs, fetchedJob) 527 } 528 } 529 return fetchedJobs, nil 530 } 531 532 // CheckJobsDone checks if jobs are completed or failed. 533 func (r *RestoreManager) CheckJobsDone( 534 stage dpv1alpha1.RestoreStage, 535 actionName string, 536 backupSet BackupActionSet, 537 fetchedJobs []*batchv1.Job) (bool, bool) { 538 var ( 539 allJobFinished = true 540 existFailedJob bool 541 ) 542 restoreActions := &r.Restore.Status.Actions.PrepareData 543 if stage == dpv1alpha1.PostReady { 544 restoreActions = &r.Restore.Status.Actions.PostReady 545 } 546 for i := range fetchedJobs { 547 statusAction := dpv1alpha1.RestoreStatusAction{ 548 Name: actionName, 549 ObjectKey: buildJobKeyForActionStatus(fetchedJobs[i].Name), 550 BackupName: backupSet.Backup.Name, 551 } 552 done, _, errMsg := utils.IsJobFinished(fetchedJobs[i]) 553 switch { 554 case errMsg != "": 555 existFailedJob = true 556 statusAction.Status = dpv1alpha1.RestoreActionFailed 557 statusAction.Message = errMsg 558 SetRestoreStatusAction(restoreActions, statusAction) 559 case done: 560 statusAction.Status = dpv1alpha1.RestoreActionCompleted 561 SetRestoreStatusAction(restoreActions, statusAction) 562 default: 563 allJobFinished = false 564 statusAction.Status = dpv1alpha1.RestoreActionProcessing 565 SetRestoreStatusAction(restoreActions, statusAction) 566 } 567 } 568 return allJobFinished, existFailedJob 569 } 570 571 // Recalculation whether all actions have been completed. 572 func (r *RestoreManager) Recalculation(backupName, actionName string, allActionsFinished, existFailedAction *bool) { 573 prepareDataConfig := r.Restore.Spec.PrepareDataConfig 574 if !prepareDataConfig.IsSerialPolicy() { 575 return 576 } 577 578 if *existFailedAction { 579 // under the Serial policy, restore will be failed if any action is failed. 580 *allActionsFinished = true 581 return 582 } 583 var actionCount int 584 for _, v := range r.Restore.Status.Actions.PrepareData { 585 if v.Name == actionName && v.BackupName == backupName { 586 actionCount += 1 587 } 588 } 589 if actionCount != GetRestoreActionsCountForPrepareData(prepareDataConfig) { 590 // if the number of actions is not equal to the number of target actions, the recovery has not yet ended 591 *allActionsFinished = false 592 } 593 }