github.com/percona/percona-xtradb-cluster-operator@v1.14.0/pkg/controller/pxcbackup/controller.go (about) 1 package pxcbackup 2 3 import ( 4 "context" 5 "os" 6 "reflect" 7 "strconv" 8 "strings" 9 "sync" 10 "time" 11 12 "github.com/pkg/errors" 13 batchv1 "k8s.io/api/batch/v1" 14 corev1 "k8s.io/api/core/v1" 15 k8sErrors "k8s.io/apimachinery/pkg/api/errors" 16 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 "k8s.io/apimachinery/pkg/labels" 18 "k8s.io/apimachinery/pkg/runtime" 19 "k8s.io/apimachinery/pkg/types" 20 "k8s.io/client-go/util/retry" 21 "sigs.k8s.io/controller-runtime/pkg/builder" 22 "sigs.k8s.io/controller-runtime/pkg/client" 23 "sigs.k8s.io/controller-runtime/pkg/handler" 24 logf "sigs.k8s.io/controller-runtime/pkg/log" 25 "sigs.k8s.io/controller-runtime/pkg/manager" 26 "sigs.k8s.io/controller-runtime/pkg/reconcile" 27 28 "github.com/percona/percona-xtradb-cluster-operator/clientcmd" 29 api "github.com/percona/percona-xtradb-cluster-operator/pkg/apis/pxc/v1" 30 "github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/app/deployment" 31 "github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/backup" 32 "github.com/percona/percona-xtradb-cluster-operator/pkg/pxc/backup/storage" 33 "github.com/percona/percona-xtradb-cluster-operator/version" 34 ) 35 36 // Add creates a new PerconaXtraDBClusterBackup Controller and adds it to the Manager. The Manager will set fields on the Controller 37 // and Start it when the Manager is Started. 38 func Add(mgr manager.Manager) error { 39 r, err := newReconciler(mgr) 40 if err != nil { 41 return err 42 } 43 44 return add(mgr, r) 45 } 46 47 // newReconciler returns a new reconcile.Reconciler 48 func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) { 49 sv, err := version.Server() 50 if err != nil { 51 return nil, errors.Wrap(err, "get version") 52 } 53 54 limit := 10 55 56 envLimStr := os.Getenv("S3_WORKERS_LIMIT") 57 if envLimStr != "" { 58 envLim, err := strconv.Atoi(envLimStr) 59 if err != nil || envLim <= 0 { 60 return nil, errors.Wrapf(err, "invalid S3_WORKERS_LIMIT value (%s), should be positive int", envLimStr) 61 } 62 63 limit = envLim 64 } 65 66 cli, err := clientcmd.NewClient() 67 if err != nil { 68 return nil, errors.Wrap(err, "create clientcmd") 69 } 70 71 return &ReconcilePerconaXtraDBClusterBackup{ 72 client: mgr.GetClient(), 73 scheme: mgr.GetScheme(), 74 serverVersion: sv, 75 clientcmd: cli, 76 chLimit: make(chan struct{}, limit), 77 bcpDeleteInProgress: new(sync.Map), 78 }, nil 79 } 80 81 // add adds a new Controller to mgr with r as the reconcile.Reconciler 82 func add(mgr manager.Manager, r reconcile.Reconciler) error { 83 return builder.ControllerManagedBy(mgr). 84 Named("pxcbackup-controller"). 85 Watches(&api.PerconaXtraDBClusterBackup{}, &handler.EnqueueRequestForObject{}). 86 Complete(r) 87 } 88 89 var _ reconcile.Reconciler = &ReconcilePerconaXtraDBClusterBackup{} 90 91 // ReconcilePerconaXtraDBClusterBackup reconciles a PerconaXtraDBClusterBackup object 92 type ReconcilePerconaXtraDBClusterBackup struct { 93 // This client, initialized using mgr.Client() above, is a split client 94 // that reads objects from the cache and writes to the apiserver 95 client client.Client 96 scheme *runtime.Scheme 97 98 serverVersion *version.ServerVersion 99 clientcmd *clientcmd.Client 100 chLimit chan struct{} 101 bcpDeleteInProgress *sync.Map 102 } 103 104 // Reconcile reads that state of the cluster for a PerconaXtraDBClusterBackup object and makes changes based on the state read 105 // and what is in the PerconaXtraDBClusterBackup.Spec 106 // Note: 107 // The Controller will requeue the Request to be processed again if the returned error is non-nil or 108 // Result.Requeue is true, otherwise upon completion it will remove the work from the queue. 109 func (r *ReconcilePerconaXtraDBClusterBackup) Reconcile(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { 110 log := logf.FromContext(ctx) 111 112 rr := reconcile.Result{ 113 RequeueAfter: time.Second * 5, 114 } 115 116 // Fetch the PerconaXtraDBClusterBackup instance 117 cr := &api.PerconaXtraDBClusterBackup{} 118 err := r.client.Get(context.TODO(), request.NamespacedName, cr) 119 if err != nil { 120 if k8sErrors.IsNotFound(err) { 121 // Request object not found, could have been deleted after reconcile request. 122 // Owned objects are automatically garbage collected. For additional cleanup logic use finalizers. 123 // Return and don't requeue 124 return rr, nil 125 } 126 // Error reading the object - requeue the request. 127 return reconcile.Result{}, err 128 } 129 130 err = r.tryRunBackupFinalizers(ctx, cr) 131 if err != nil { 132 return reconcile.Result{}, errors.Wrap(err, "failed to run finalizers") 133 } 134 135 if cr.Status.State == api.BackupSucceeded || 136 cr.Status.State == api.BackupFailed { 137 if len(cr.GetFinalizers()) > 0 { 138 return rr, nil 139 } 140 return reconcile.Result{}, nil 141 } 142 143 if cr.DeletionTimestamp != nil { 144 return rr, nil 145 } 146 147 cluster, err := r.getCluster(cr) 148 if err != nil { 149 log.Error(err, "invalid backup cluster") 150 return rr, nil 151 } 152 153 err = cluster.CheckNSetDefaults(r.serverVersion, log) 154 if err != nil { 155 return rr, errors.Wrap(err, "wrong PXC options") 156 } 157 158 if cluster.Spec.Backup == nil { 159 return rr, errors.New("a backup image should be set in the PXC config") 160 } 161 162 if err := cluster.CanBackup(); err != nil { 163 return rr, errors.Wrap(err, "failed to run backup") 164 } 165 166 if !cluster.Spec.Backup.GetAllowParallel() { 167 isRunning, err := r.isOtherBackupRunning(ctx, cr) 168 if err != nil { 169 return rr, errors.Wrap(err, "failed to check if other backups running") 170 } 171 if isRunning { 172 log.Info("backup already running, waiting until it's done") 173 return rr, nil 174 } 175 } 176 177 storage, ok := cluster.Spec.Backup.Storages[cr.Spec.StorageName] 178 if !ok { 179 return rr, errors.Errorf("storage %s doesn't exist", cr.Spec.StorageName) 180 } 181 if cr.Status.S3 == nil || cr.Status.Azure == nil { 182 cr.Status.S3 = storage.S3 183 cr.Status.Azure = storage.Azure 184 cr.Status.StorageType = storage.Type 185 cr.Status.Image = cluster.Spec.Backup.Image 186 cr.Status.SSLSecretName = cluster.Spec.PXC.SSLSecretName 187 cr.Status.SSLInternalSecretName = cluster.Spec.PXC.SSLInternalSecretName 188 cr.Status.VaultSecretName = cluster.Spec.PXC.VaultSecretName 189 cr.Status.VerifyTLS = storage.VerifyTLS 190 } 191 192 bcp := backup.New(cluster) 193 job := bcp.Job(cr, cluster) 194 job.Spec, err = bcp.JobSpec(cr.Spec, cluster, job) 195 if err != nil { 196 return rr, errors.Wrap(err, "can't create job spec") 197 } 198 199 switch storage.Type { 200 case api.BackupStorageFilesystem: 201 pvc := backup.NewPVC(cr) 202 pvc.Spec = *storage.Volume.PersistentVolumeClaim 203 204 cr.Status.Destination.SetPVCDestination(pvc.Name) 205 206 // Set PerconaXtraDBClusterBackup instance as the owner and controller 207 if err := setControllerReference(cr, pvc, r.scheme); err != nil { 208 return rr, errors.Wrap(err, "setControllerReference") 209 } 210 211 // Check if this PVC already exists 212 err = r.client.Get(context.TODO(), types.NamespacedName{Name: pvc.Name, Namespace: pvc.Namespace}, pvc) 213 if err != nil && k8sErrors.IsNotFound(err) { 214 log.Info("Creating a new volume for backup", "Namespace", pvc.Namespace, "Name", pvc.Name) 215 err = r.client.Create(context.TODO(), pvc) 216 if err != nil { 217 return rr, errors.Wrap(err, "create backup pvc") 218 } 219 } else if err != nil { 220 return rr, errors.Wrap(err, "get backup pvc") 221 } 222 223 err := backup.SetStoragePVC(&job.Spec, cr, pvc.Name) 224 if err != nil { 225 return rr, errors.Wrap(err, "set storage FS") 226 } 227 case api.BackupStorageS3: 228 if storage.S3 == nil { 229 return rr, errors.New("s3 storage is not specified") 230 } 231 cr.Status.Destination.SetS3Destination(storage.S3.Bucket, cr.Spec.PXCCluster+"-"+cr.CreationTimestamp.Time.Format("2006-01-02-15:04:05")+"-full") 232 233 err := backup.SetStorageS3(&job.Spec, cr) 234 if err != nil { 235 return rr, errors.Wrap(err, "set storage FS") 236 } 237 case api.BackupStorageAzure: 238 if storage.Azure == nil { 239 return rr, errors.New("azure storage is not specified") 240 } 241 cr.Status.Destination.SetAzureDestination(storage.Azure.ContainerPath, cr.Spec.PXCCluster+"-"+cr.CreationTimestamp.Time.Format("2006-01-02-15:04:05")+"-full") 242 243 err := backup.SetStorageAzure(&job.Spec, cr) 244 if err != nil { 245 return rr, errors.Wrap(err, "set storage FS for Azure") 246 } 247 } 248 249 // Set PerconaXtraDBClusterBackup instance as the owner and controller 250 if err := setControllerReference(cr, job, r.scheme); err != nil { 251 return rr, errors.Wrap(err, "job/setControllerReference") 252 } 253 254 err = r.client.Create(context.TODO(), job) 255 if err != nil && !k8sErrors.IsAlreadyExists(err) { 256 return rr, errors.Wrap(err, "create backup job") 257 } else if err == nil { 258 log.Info("Created a new backup job", "Namespace", job.Namespace, "Name", job.Name) 259 } 260 261 err = r.updateJobStatus(cr, job, cr.Spec.StorageName, storage, cluster) 262 263 return rr, err 264 } 265 266 func (r *ReconcilePerconaXtraDBClusterBackup) tryRunBackupFinalizers(ctx context.Context, cr *api.PerconaXtraDBClusterBackup) error { 267 if cr.ObjectMeta.DeletionTimestamp == nil { 268 return nil 269 } 270 271 select { 272 case r.chLimit <- struct{}{}: 273 _, ok := r.bcpDeleteInProgress.LoadOrStore(cr.Name, struct{}{}) 274 if ok { 275 <-r.chLimit 276 return nil 277 } 278 279 go r.runDeleteBackupFinalizer(ctx, cr) 280 default: 281 if _, ok := r.bcpDeleteInProgress.Load(cr.Name); !ok { 282 inprog := []string{} 283 r.bcpDeleteInProgress.Range(func(key, value interface{}) bool { 284 inprog = append(inprog, key.(string)) 285 return true 286 }) 287 288 logf.FromContext(ctx).Info("all workers are busy - skip backup deletion for now", 289 "backup", cr.Name, "in progress", strings.Join(inprog, ", ")) 290 } 291 } 292 293 return nil 294 } 295 296 func (r *ReconcilePerconaXtraDBClusterBackup) runDeleteBackupFinalizer(ctx context.Context, cr *api.PerconaXtraDBClusterBackup) { 297 log := logf.FromContext(ctx) 298 299 defer func() { 300 r.bcpDeleteInProgress.Delete(cr.Name) 301 <-r.chLimit 302 }() 303 304 var finalizers []string 305 for _, f := range cr.GetFinalizers() { 306 var err error 307 switch f { 308 case api.FinalizerDeleteS3Backup: 309 if (cr.Status.S3 == nil && cr.Status.Azure == nil) || cr.Status.Destination == "" { 310 continue 311 } 312 switch cr.Status.GetStorageType(nil) { 313 case api.BackupStorageS3: 314 if cr.Status.Destination.StorageTypePrefix() != api.AwsBlobStoragePrefix { 315 continue 316 } 317 err = r.runS3BackupFinalizer(ctx, cr) 318 case api.BackupStorageAzure: 319 err = r.runAzureBackupFinalizer(ctx, cr) 320 default: 321 continue 322 } 323 default: 324 finalizers = append(finalizers, f) 325 } 326 if err != nil { 327 log.Info("failed to delete backup", "backup path", cr.Status.Destination, "error", err.Error()) 328 finalizers = append(finalizers, f) 329 } else if f == api.FinalizerDeleteS3Backup { 330 log.Info("backup was removed", "name", cr.Name) 331 } 332 } 333 cr.SetFinalizers(finalizers) 334 335 err := r.client.Update(ctx, cr) 336 if err != nil { 337 log.Error(err, "failed to update finalizers for backup", "backup", cr.Name) 338 } 339 } 340 341 func (r *ReconcilePerconaXtraDBClusterBackup) runS3BackupFinalizer(ctx context.Context, cr *api.PerconaXtraDBClusterBackup) error { 342 log := logf.FromContext(ctx) 343 344 if cr.Status.S3 == nil { 345 return errors.New("s3 storage is not specified") 346 } 347 348 sec := corev1.Secret{} 349 err := r.client.Get(ctx, 350 types.NamespacedName{Name: cr.Status.S3.CredentialsSecret, Namespace: cr.Namespace}, &sec) 351 if err != nil { 352 return errors.Wrap(err, "failed to get secret") 353 } 354 355 opts, err := storage.GetOptionsFromBackup(ctx, r.client, nil, cr) 356 if err != nil { 357 return errors.Wrap(err, "get storage options") 358 } 359 storage, err := storage.NewClient(ctx, opts) 360 if err != nil { 361 return errors.Wrap(err, "new s3 storage") 362 } 363 364 backupName := cr.Status.Destination.BackupName() 365 log.Info("deleting backup from s3", "name", cr.Name, "bucket", cr.Status.S3.Bucket, "backupName", backupName) 366 err = retry.OnError(retry.DefaultBackoff, func(e error) bool { return true }, removeBackupObjects(ctx, storage, backupName)) 367 if err != nil { 368 return errors.Wrapf(err, "failed to delete backup %s", cr.Name) 369 } 370 return nil 371 } 372 373 func (r *ReconcilePerconaXtraDBClusterBackup) runAzureBackupFinalizer(ctx context.Context, cr *api.PerconaXtraDBClusterBackup) error { 374 log := logf.FromContext(ctx) 375 376 if cr.Status.Azure == nil { 377 return errors.New("azure storage is not specified") 378 } 379 380 opts, err := storage.GetOptionsFromBackup(ctx, r.client, nil, cr) 381 if err != nil { 382 return errors.Wrap(err, "get storage options") 383 } 384 azureStorage, err := storage.NewClient(ctx, opts) 385 if err != nil { 386 return errors.Wrap(err, "new azure storage") 387 } 388 389 backupName := cr.Status.Destination.BackupName() 390 log.Info("Deleting backup from azure", "name", cr.Name, "backupName", backupName) 391 err = retry.OnError(retry.DefaultBackoff, 392 func(e error) bool { 393 return true 394 }, 395 removeBackupObjects(ctx, azureStorage, backupName)) 396 if err != nil { 397 return errors.Wrapf(err, "failed to delete backup %s", cr.Name) 398 } 399 return nil 400 } 401 402 func removeBackupObjects(ctx context.Context, s storage.Storage, destination string) func() error { 403 return func() error { 404 blobs, err := s.ListObjects(ctx, destination) 405 if err != nil { 406 return errors.Wrap(err, "list backup blobs") 407 } 408 for _, blob := range blobs { 409 if err := s.DeleteObject(ctx, blob); err != nil { 410 return errors.Wrapf(err, "delete object %s", blob) 411 } 412 } 413 if err := s.DeleteObject(ctx, strings.TrimSuffix(destination, "/")+".md5"); err != nil && err != storage.ErrObjectNotFound { 414 return errors.Wrapf(err, "delete object %s", strings.TrimSuffix(destination, "/")+".md5") 415 } 416 destination = strings.TrimSuffix(destination, "/") + ".sst_info/" 417 blobs, err = s.ListObjects(ctx, destination) 418 if err != nil { 419 return errors.Wrap(err, "list backup objects") 420 } 421 for _, blob := range blobs { 422 if err := s.DeleteObject(ctx, blob); err != nil { 423 return errors.Wrapf(err, "delete object %s", blob) 424 } 425 } 426 if err := s.DeleteObject(ctx, strings.TrimSuffix(destination, "/")+".md5"); err != nil && err != storage.ErrObjectNotFound { 427 return errors.Wrapf(err, "delete object %s", strings.TrimSuffix(destination, "/")+".md5") 428 } 429 return nil 430 } 431 } 432 433 func (r *ReconcilePerconaXtraDBClusterBackup) getCluster(cr *api.PerconaXtraDBClusterBackup) (*api.PerconaXtraDBCluster, error) { 434 cluster := api.PerconaXtraDBCluster{} 435 err := r.client.Get(context.TODO(), types.NamespacedName{Namespace: cr.Namespace, Name: cr.Spec.PXCCluster}, &cluster) 436 if err != nil { 437 return nil, errors.Wrap(err, "get PXC cluster") 438 } 439 440 return &cluster, nil 441 } 442 443 func (r *ReconcilePerconaXtraDBClusterBackup) updateJobStatus(bcp *api.PerconaXtraDBClusterBackup, job *batchv1.Job, 444 storageName string, storage *api.BackupStorageSpec, cluster *api.PerconaXtraDBCluster, 445 ) error { 446 err := r.client.Get(context.TODO(), types.NamespacedName{Name: job.Name, Namespace: job.Namespace}, job) 447 if err != nil { 448 if k8sErrors.IsNotFound(err) { 449 return nil 450 } 451 452 return errors.Wrap(err, "get backup status") 453 } 454 455 status := api.PXCBackupStatus{ 456 State: api.BackupStarting, 457 Destination: bcp.Status.Destination, 458 StorageName: storageName, 459 S3: storage.S3, 460 Azure: storage.Azure, 461 StorageType: storage.Type, 462 Image: bcp.Status.Image, 463 SSLSecretName: bcp.Status.SSLSecretName, 464 SSLInternalSecretName: bcp.Status.SSLInternalSecretName, 465 VaultSecretName: bcp.Status.VaultSecretName, 466 VerifyTLS: storage.VerifyTLS, 467 } 468 469 if job.Status.Active == 1 { 470 status.State = api.BackupRunning 471 } 472 473 for _, cond := range job.Status.Conditions { 474 if cond.Status != corev1.ConditionTrue { 475 continue 476 } 477 switch cond.Type { 478 case batchv1.JobFailed: 479 status.State = api.BackupFailed 480 case batchv1.JobComplete: 481 status.State = api.BackupSucceeded 482 status.CompletedAt = job.Status.CompletionTime 483 } 484 } 485 486 // don't update the status if there aren't any changes. 487 if reflect.DeepEqual(bcp.Status, status) { 488 return nil 489 } 490 491 bcp.Status = status 492 493 if status.State == api.BackupSucceeded { 494 if cluster.PITREnabled() { 495 collectorPod, err := deployment.GetBinlogCollectorPod(context.TODO(), r.client, cluster) 496 if err != nil { 497 return errors.Wrap(err, "get binlog collector pod") 498 } 499 500 if err := deployment.RemoveGapFile(context.TODO(), r.clientcmd, collectorPod); err != nil { 501 if !errors.Is(err, deployment.GapFileNotFound) { 502 return errors.Wrap(err, "remove gap file") 503 } 504 } 505 506 if err := deployment.RemoveTimelineFile(context.TODO(), r.clientcmd, collectorPod); err != nil { 507 return errors.Wrap(err, "remove timeline file") 508 } 509 } 510 511 initSecret := corev1.Secret{ 512 ObjectMeta: metav1.ObjectMeta{ 513 Name: cluster.Name + "-mysql-init", 514 Namespace: cluster.Namespace, 515 }, 516 } 517 if err := r.client.Delete(context.TODO(), &initSecret); client.IgnoreNotFound(err) != nil { 518 return errors.Wrap(err, "delete mysql-init secret") 519 } 520 } 521 522 err = r.client.Status().Update(context.TODO(), bcp) 523 if err != nil { 524 return errors.Wrap(err, "send update") 525 } 526 527 return nil 528 } 529 530 func setControllerReference(cr *api.PerconaXtraDBClusterBackup, obj metav1.Object, scheme *runtime.Scheme) error { 531 ownerRef, err := cr.OwnerRef(scheme) 532 if err != nil { 533 return err 534 } 535 obj.SetOwnerReferences(append(obj.GetOwnerReferences(), ownerRef)) 536 return nil 537 } 538 539 func (r *ReconcilePerconaXtraDBClusterBackup) isOtherBackupRunning(ctx context.Context, cr *api.PerconaXtraDBClusterBackup) (bool, error) { 540 list := new(batchv1.JobList) 541 lbls := map[string]string{ 542 "type": "xtrabackup", 543 "cluster": cr.Spec.PXCCluster, 544 } 545 if err := r.client.List(ctx, list, &client.ListOptions{ 546 Namespace: cr.Namespace, 547 LabelSelector: labels.SelectorFromSet(lbls), 548 }); err != nil { 549 return false, errors.Wrap(err, "list jobs") 550 } 551 552 for _, job := range list.Items { 553 if job.Labels["backup-name"] == cr.Name || job.Labels["backup-name"] == "" { 554 continue 555 } 556 if job.Status.Succeeded > 0 { 557 continue 558 } 559 560 return true, nil 561 } 562 563 return false, nil 564 }