github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/apps/opsrequest_controller.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package apps
    21  
    22  import (
    23  	"context"
    24  	"reflect"
    25  	"time"
    26  
    27  	"golang.org/x/exp/slices"
    28  	corev1 "k8s.io/api/core/v1"
    29  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	"k8s.io/apimachinery/pkg/runtime"
    32  	"k8s.io/apimachinery/pkg/types"
    33  	"k8s.io/client-go/tools/record"
    34  	ctrl "sigs.k8s.io/controller-runtime"
    35  	"sigs.k8s.io/controller-runtime/pkg/client"
    36  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    37  	"sigs.k8s.io/controller-runtime/pkg/handler"
    38  	"sigs.k8s.io/controller-runtime/pkg/log"
    39  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    40  
    41  	appsv1alpha1 "github.com/1aal/kubeblocks/apis/apps/v1alpha1"
    42  	dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1"
    43  	workloadsv1alpha1 "github.com/1aal/kubeblocks/apis/workloads/v1alpha1"
    44  	"github.com/1aal/kubeblocks/controllers/apps/operations"
    45  	opsutil "github.com/1aal/kubeblocks/controllers/apps/operations/util"
    46  	"github.com/1aal/kubeblocks/pkg/constant"
    47  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    48  )
    49  
    50  // OpsRequestReconciler reconciles a OpsRequest object
    51  type OpsRequestReconciler struct {
    52  	client.Client
    53  	Scheme   *runtime.Scheme
    54  	Recorder record.EventRecorder
    55  }
    56  
    57  // +kubebuilder:rbac:groups=apps.kubeblocks.io,resources=opsrequests,verbs=get;list;watch;create;update;patch;delete
    58  // +kubebuilder:rbac:groups=apps.kubeblocks.io,resources=opsrequests/status,verbs=get;update;patch
    59  // +kubebuilder:rbac:groups=apps.kubeblocks.io,resources=opsrequests/finalizers,verbs=update
    60  
    61  // Reconcile is part of the main kubernetes reconciliation loop which aims to
    62  // move the current state of the cluster closer to the desired state.
    63  // For more details, check Reconcile and its Result here:
    64  // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile
    65  func (r *OpsRequestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
    66  	reqCtx := intctrlutil.RequestCtx{
    67  		Ctx:      ctx,
    68  		Req:      req,
    69  		Log:      log.FromContext(ctx).WithValues("opsRequest", req.NamespacedName),
    70  		Recorder: r.Recorder,
    71  	}
    72  	opsCtrlHandler := &opsControllerHandler{}
    73  	return opsCtrlHandler.Handle(reqCtx, &operations.OpsResource{Recorder: r.Recorder},
    74  		r.fetchOpsRequest,
    75  		r.handleDeletion,
    76  		r.fetchCluster,
    77  		r.addClusterLabelAndSetOwnerReference,
    78  		r.handleCancelSignal,
    79  		r.handleOpsRequestByPhase,
    80  	)
    81  }
    82  
    83  // SetupWithManager sets up the controller with the Manager.
    84  func (r *OpsRequestReconciler) SetupWithManager(mgr ctrl.Manager) error {
    85  	return ctrl.NewControllerManagedBy(mgr).
    86  		For(&appsv1alpha1.OpsRequest{}).
    87  		Watches(&appsv1alpha1.Cluster{}, handler.EnqueueRequestsFromMapFunc(r.parseAllOpsRequest)).
    88  		Watches(&workloadsv1alpha1.ReplicatedStateMachine{}, handler.EnqueueRequestsFromMapFunc(r.parseAllOpsRequestForRSM)).
    89  		Watches(&dpv1alpha1.Backup{}, handler.EnqueueRequestsFromMapFunc(r.parseBackupOpsRequest)).
    90  		Watches(&corev1.PersistentVolumeClaim{}, handler.EnqueueRequestsFromMapFunc(r.parseVolumeExpansionOpsRequest)).
    91  		Complete(r)
    92  }
    93  
    94  // fetchOpsRequestAndCluster fetches the OpsRequest from the request.
    95  func (r *OpsRequestReconciler) fetchOpsRequest(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) {
    96  	opsRequest := &appsv1alpha1.OpsRequest{}
    97  	if err := r.Client.Get(reqCtx.Ctx, reqCtx.Req.NamespacedName, opsRequest); err != nil {
    98  		if !apierrors.IsNotFound(err) {
    99  			return intctrlutil.ResultToP(intctrlutil.RequeueWithError(err, reqCtx.Log, ""))
   100  		}
   101  		// if the opsRequest is not found, we need to check if this opsRequest is deleted abnormally
   102  		if err = r.handleOpsReqDeletedDuringRunning(reqCtx); err != nil {
   103  			return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   104  		}
   105  		return intctrlutil.ResultToP(intctrlutil.Reconciled())
   106  	}
   107  	opsRes.OpsRequest = opsRequest
   108  	return nil, nil
   109  }
   110  
   111  // handleDeletion handles the delete event of the OpsRequest.
   112  func (r *OpsRequestReconciler) handleDeletion(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) {
   113  	if opsRes.OpsRequest.Status.Phase == appsv1alpha1.OpsRunningPhase {
   114  		return nil, nil
   115  	}
   116  	return intctrlutil.HandleCRDeletion(reqCtx, r, opsRes.OpsRequest, opsRequestFinalizerName, func() (*ctrl.Result, error) {
   117  		// if the OpsRequest is deleted, we should clear the OpsRequest annotation in reference cluster.
   118  		// this is mainly to prevent OpsRequest from being deleted by mistake, resulting in inconsistency.
   119  		return nil, operations.DeleteOpsRequestAnnotationInCluster(reqCtx.Ctx, r.Client, opsRes)
   120  	})
   121  }
   122  
   123  // fetchCluster fetches the Cluster from the OpsRequest.
   124  func (r *OpsRequestReconciler) fetchCluster(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) {
   125  	cluster := &appsv1alpha1.Cluster{}
   126  	if err := r.Client.Get(reqCtx.Ctx, client.ObjectKey{
   127  		Namespace: opsRes.OpsRequest.GetNamespace(),
   128  		Name:      opsRes.OpsRequest.Spec.ClusterRef,
   129  	}, cluster); err != nil {
   130  		if apierrors.IsNotFound(err) {
   131  			_ = operations.PatchClusterNotFound(reqCtx.Ctx, r.Client, opsRes)
   132  		}
   133  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   134  	}
   135  	// set cluster variable
   136  	opsRes.Cluster = cluster
   137  	return nil, nil
   138  }
   139  
   140  // handleOpsRequestByPhase handles the OpsRequest by its phase.
   141  func (r *OpsRequestReconciler) handleOpsRequestByPhase(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) {
   142  	switch opsRes.OpsRequest.Status.Phase {
   143  	case "":
   144  		// update status.phase to pending
   145  		if err := operations.PatchOpsStatus(reqCtx.Ctx, r.Client, opsRes, appsv1alpha1.OpsPendingPhase, appsv1alpha1.NewProgressingCondition(opsRes.OpsRequest)); err != nil {
   146  			return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   147  		}
   148  		return intctrlutil.ResultToP(intctrlutil.Reconciled())
   149  	case appsv1alpha1.OpsPendingPhase, appsv1alpha1.OpsCreatingPhase:
   150  		return r.doOpsRequestAction(reqCtx, opsRes)
   151  	case appsv1alpha1.OpsRunningPhase, appsv1alpha1.OpsCancellingPhase:
   152  		return r.reconcileStatusDuringRunningOrCanceling(reqCtx, opsRes)
   153  	case appsv1alpha1.OpsSucceedPhase:
   154  		return r.handleSucceedOpsRequest(reqCtx, opsRes.OpsRequest)
   155  	case appsv1alpha1.OpsFailedPhase, appsv1alpha1.OpsCancelledPhase:
   156  		return intctrlutil.ResultToP(intctrlutil.Reconciled())
   157  	}
   158  	return intctrlutil.ResultToP(intctrlutil.Reconciled())
   159  }
   160  
   161  // handleCancelSignal handles the cancel signal for opsRequest.
   162  func (r *OpsRequestReconciler) handleCancelSignal(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) {
   163  	opsRequest := opsRes.OpsRequest
   164  	if !opsRequest.Spec.Cancel {
   165  		return nil, nil
   166  	}
   167  	if opsRequest.IsComplete() || opsRequest.Status.Phase == appsv1alpha1.OpsCancellingPhase {
   168  		return nil, nil
   169  	}
   170  	opsBehaviour := operations.GetOpsManager().OpsMap[opsRequest.Spec.Type]
   171  	if opsBehaviour.CancelFunc == nil {
   172  		r.Recorder.Eventf(opsRequest, corev1.EventTypeWarning, reasonOpsCancelActionNotSupported,
   173  			"Type: %s does not support cancel action.", opsRequest.Spec.Type)
   174  		return nil, nil
   175  	}
   176  	deepCopyOps := opsRequest.DeepCopy()
   177  	if err := opsBehaviour.CancelFunc(reqCtx, r.Client, opsRes); err != nil {
   178  		r.Recorder.Eventf(opsRequest, corev1.EventTypeWarning, reasonOpsCancelActionFailed, err.Error())
   179  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   180  	}
   181  	opsRequest.Status.CancelTimestamp = metav1.Time{Time: time.Now()}
   182  	if err := operations.PatchOpsStatusWithOpsDeepCopy(reqCtx.Ctx, r.Client, opsRes, deepCopyOps,
   183  		appsv1alpha1.OpsCancellingPhase, appsv1alpha1.NewCancelingCondition(opsRes.OpsRequest)); err != nil {
   184  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   185  	}
   186  	return intctrlutil.ResultToP(intctrlutil.Reconciled())
   187  }
   188  
   189  // handleSucceedOpsRequest the opsRequest will be deleted after one hour when status.phase is Succeed
   190  func (r *OpsRequestReconciler) handleSucceedOpsRequest(reqCtx intctrlutil.RequestCtx, opsRequest *appsv1alpha1.OpsRequest) (*ctrl.Result, error) {
   191  	if opsRequest.Status.CompletionTimestamp.IsZero() || opsRequest.Spec.TTLSecondsAfterSucceed == 0 {
   192  		return intctrlutil.ResultToP(intctrlutil.Reconciled())
   193  	}
   194  	deadline := opsRequest.Status.CompletionTimestamp.Add(time.Duration(opsRequest.Spec.TTLSecondsAfterSucceed) * time.Second)
   195  	if time.Now().Before(deadline) {
   196  		return intctrlutil.ResultToP(intctrlutil.RequeueAfter(time.Until(deadline), reqCtx.Log, ""))
   197  	}
   198  	// the opsRequest will be deleted after spec.ttlSecondsAfterSucceed seconds when status.phase is Succeed
   199  	if err := r.Client.Delete(reqCtx.Ctx, opsRequest); err != nil {
   200  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   201  	}
   202  	return intctrlutil.ResultToP(intctrlutil.Reconciled())
   203  }
   204  
   205  // reconcileStatusDuringRunningOrCanceling reconciles the status of OpsRequest when it is running or canceling.
   206  func (r *OpsRequestReconciler) reconcileStatusDuringRunningOrCanceling(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) {
   207  	opsRequest := opsRes.OpsRequest
   208  	// wait for OpsRequest.status.phase to Succeed
   209  	if requeueAfter, err := operations.GetOpsManager().Reconcile(reqCtx, r.Client, opsRes); err != nil {
   210  		r.Recorder.Eventf(opsRequest, corev1.EventTypeWarning, reasonOpsReconcileStatusFailed, "Failed to reconcile the status of OpsRequest: %s", err.Error())
   211  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   212  	} else if requeueAfter != 0 {
   213  		// if the reconcileAction need requeue, do it
   214  		return intctrlutil.ResultToP(intctrlutil.RequeueAfter(requeueAfter, reqCtx.Log, ""))
   215  	}
   216  	return intctrlutil.ResultToP(intctrlutil.Reconciled())
   217  }
   218  
   219  // addClusterLabelAndSetOwnerReference adds the cluster label and set the owner reference of the OpsRequest.
   220  func (r *OpsRequestReconciler) addClusterLabelAndSetOwnerReference(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) {
   221  	// add label of clusterRef
   222  	opsRequest := opsRes.OpsRequest
   223  	clusterName := opsRequest.Labels[constant.AppInstanceLabelKey]
   224  	opsType := opsRequest.Labels[constant.OpsRequestTypeLabelKey]
   225  	if clusterName == opsRequest.Spec.ClusterRef && opsType == string(opsRequest.Spec.Type) {
   226  		return nil, nil
   227  	}
   228  	patch := client.MergeFrom(opsRequest.DeepCopy())
   229  	if opsRequest.Labels == nil {
   230  		opsRequest.Labels = map[string]string{}
   231  	}
   232  	opsRequest.Labels[constant.AppInstanceLabelKey] = opsRequest.Spec.ClusterRef
   233  	opsRequest.Labels[constant.OpsRequestTypeLabelKey] = string(opsRequest.Spec.Type)
   234  	scheme, _ := appsv1alpha1.SchemeBuilder.Build()
   235  	if err := controllerutil.SetOwnerReference(opsRes.Cluster, opsRequest, scheme); err != nil {
   236  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   237  	}
   238  	if err := r.Client.Patch(reqCtx.Ctx, opsRequest, patch); err != nil {
   239  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   240  	}
   241  	return intctrlutil.ResultToP(intctrlutil.Reconciled())
   242  }
   243  
   244  // doOpsRequestAction will do the action of the OpsRequest.
   245  func (r *OpsRequestReconciler) doOpsRequestAction(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) {
   246  	// process opsRequest entry function
   247  	opsRequest := opsRes.OpsRequest
   248  	opsDeepCopy := opsRequest.DeepCopy()
   249  	res, err := operations.GetOpsManager().Do(reqCtx, r.Client, opsRes)
   250  	if err != nil {
   251  		r.Recorder.Eventf(opsRequest, corev1.EventTypeWarning, reasonOpsDoActionFailed, "Failed to process the operation of OpsRequest: %s", err.Error())
   252  		if !reflect.DeepEqual(opsRequest.Status, opsDeepCopy.Status) {
   253  			if patchErr := r.Client.Status().Patch(reqCtx.Ctx, opsRequest, client.MergeFrom(opsDeepCopy)); patchErr != nil {
   254  				return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   255  			}
   256  		}
   257  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   258  	}
   259  
   260  	if res != nil {
   261  		return res, nil
   262  	}
   263  	opsRequest.Status.Phase = appsv1alpha1.OpsRunningPhase
   264  	opsRequest.Status.ClusterGeneration = opsRes.Cluster.Generation
   265  	if err = r.Client.Status().Patch(reqCtx.Ctx, opsRequest, client.MergeFrom(opsDeepCopy)); err != nil {
   266  		return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, ""))
   267  	}
   268  	return intctrlutil.ResultToP(intctrlutil.Reconciled())
   269  }
   270  
   271  // handleOpsReqDeletedDuringRunning handles the cluster annotation if the OpsRequest is deleted during running.
   272  func (r *OpsRequestReconciler) handleOpsReqDeletedDuringRunning(reqCtx intctrlutil.RequestCtx) error {
   273  	clusterList := &appsv1alpha1.ClusterList{}
   274  	if err := r.Client.List(reqCtx.Ctx, clusterList, client.InNamespace(reqCtx.Req.Namespace)); err != nil {
   275  		return err
   276  	}
   277  	for _, cluster := range clusterList.Items {
   278  		opsRequestSlice, _ := opsutil.GetOpsRequestSliceFromCluster(&cluster)
   279  		index, _ := operations.GetOpsRecorderFromSlice(opsRequestSlice, reqCtx.Req.Name)
   280  		if index == -1 {
   281  			continue
   282  		}
   283  		// if the OpsRequest is abnormal, we should clear the OpsRequest annotation in referencing cluster.
   284  		opsRequestSlice = slices.Delete(opsRequestSlice, index, index+1)
   285  		return opsutil.PatchClusterOpsAnnotations(reqCtx.Ctx, r.Client, &cluster, opsRequestSlice)
   286  	}
   287  	return nil
   288  }
   289  
   290  func (r *OpsRequestReconciler) getRequestsFromCluster(cluster *appsv1alpha1.Cluster) []reconcile.Request {
   291  	var (
   292  		opsRequestSlice []appsv1alpha1.OpsRecorder
   293  		err             error
   294  		requests        []reconcile.Request
   295  	)
   296  	if opsRequestSlice, err = opsutil.GetOpsRequestSliceFromCluster(cluster); err != nil {
   297  		return nil
   298  	}
   299  	for _, v := range opsRequestSlice {
   300  		requests = append(requests, reconcile.Request{
   301  			NamespacedName: types.NamespacedName{
   302  				Namespace: cluster.Namespace,
   303  				Name:      v.Name,
   304  			},
   305  		})
   306  	}
   307  	return requests
   308  }
   309  
   310  func (r *OpsRequestReconciler) parseAllOpsRequest(ctx context.Context, object client.Object) []reconcile.Request {
   311  	cluster := object.(*appsv1alpha1.Cluster)
   312  	return r.getRequestsFromCluster(cluster)
   313  }
   314  
   315  func (r *OpsRequestReconciler) parseAllOpsRequestForRSM(ctx context.Context, object client.Object) []reconcile.Request {
   316  	rsm := object.(*workloadsv1alpha1.ReplicatedStateMachine)
   317  	clusterName := rsm.Labels[constant.AppInstanceLabelKey]
   318  	if clusterName == "" {
   319  		return nil
   320  	}
   321  	cluster := &appsv1alpha1.Cluster{}
   322  	if err := r.Client.Get(ctx, client.ObjectKey{Name: clusterName, Namespace: rsm.Namespace}, cluster); err != nil {
   323  		return nil
   324  	}
   325  	return r.getRequestsFromCluster(cluster)
   326  }
   327  
   328  func (r *OpsRequestReconciler) parseVolumeExpansionOpsRequest(ctx context.Context, object client.Object) []reconcile.Request {
   329  	pvc := object.(*corev1.PersistentVolumeClaim)
   330  	if pvc.Labels[constant.AppManagedByLabelKey] != constant.AppName {
   331  		return nil
   332  	}
   333  	clusterName := pvc.Labels[constant.AppInstanceLabelKey]
   334  	if clusterName == "" {
   335  		return nil
   336  	}
   337  	opsRequestList, err := appsv1alpha1.GetRunningOpsByOpsType(ctx, r.Client,
   338  		pvc.Labels[constant.AppInstanceLabelKey], pvc.Namespace, string(appsv1alpha1.VolumeExpansionType))
   339  	if err != nil {
   340  		return nil
   341  	}
   342  	var requests []reconcile.Request
   343  	for _, v := range opsRequestList {
   344  		requests = append(requests, reconcile.Request{
   345  			NamespacedName: types.NamespacedName{
   346  				Namespace: v.Namespace,
   347  				Name:      v.Name,
   348  			},
   349  		})
   350  	}
   351  	return requests
   352  }
   353  
   354  func (r *OpsRequestReconciler) parseBackupOpsRequest(ctx context.Context, object client.Object) []reconcile.Request {
   355  	backup := object.(*dpv1alpha1.Backup)
   356  	var (
   357  		requests []reconcile.Request
   358  	)
   359  	opsRequestRecorder := opsutil.GetOpsRequestFromBackup(backup)
   360  	if opsRequestRecorder != nil {
   361  		requests = append(requests, reconcile.Request{
   362  			NamespacedName: types.NamespacedName{
   363  				Namespace: backup.Namespace,
   364  				Name:      opsRequestRecorder.Name,
   365  			},
   366  		})
   367  	}
   368  	return requests
   369  }
   370  
   371  type opsRequestStep func(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error)
   372  
   373  type opsControllerHandler struct {
   374  }
   375  
   376  func (h *opsControllerHandler) Handle(reqCtx intctrlutil.RequestCtx,
   377  	opsRes *operations.OpsResource,
   378  	steps ...opsRequestStep) (ctrl.Result, error) {
   379  	for _, step := range steps {
   380  		res, err := step(reqCtx, opsRes)
   381  		if res != nil {
   382  			return *res, err
   383  		}
   384  		if err != nil {
   385  			return intctrlutil.RequeueWithError(err, reqCtx.Log, "")
   386  		}
   387  	}
   388  	return intctrlutil.Reconciled()
   389  }