github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/apps/opsrequest_controller.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package apps 21 22 import ( 23 "context" 24 "reflect" 25 "time" 26 27 "golang.org/x/exp/slices" 28 corev1 "k8s.io/api/core/v1" 29 apierrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 "k8s.io/apimachinery/pkg/runtime" 32 "k8s.io/apimachinery/pkg/types" 33 "k8s.io/client-go/tools/record" 34 ctrl "sigs.k8s.io/controller-runtime" 35 "sigs.k8s.io/controller-runtime/pkg/client" 36 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 37 "sigs.k8s.io/controller-runtime/pkg/handler" 38 "sigs.k8s.io/controller-runtime/pkg/log" 39 "sigs.k8s.io/controller-runtime/pkg/reconcile" 40 41 appsv1alpha1 "github.com/1aal/kubeblocks/apis/apps/v1alpha1" 42 dpv1alpha1 "github.com/1aal/kubeblocks/apis/dataprotection/v1alpha1" 43 workloadsv1alpha1 "github.com/1aal/kubeblocks/apis/workloads/v1alpha1" 44 "github.com/1aal/kubeblocks/controllers/apps/operations" 45 opsutil "github.com/1aal/kubeblocks/controllers/apps/operations/util" 46 "github.com/1aal/kubeblocks/pkg/constant" 47 intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil" 48 ) 49 50 // OpsRequestReconciler reconciles a OpsRequest object 51 type OpsRequestReconciler struct { 52 client.Client 53 Scheme *runtime.Scheme 54 Recorder record.EventRecorder 55 } 56 57 // +kubebuilder:rbac:groups=apps.kubeblocks.io,resources=opsrequests,verbs=get;list;watch;create;update;patch;delete 58 // +kubebuilder:rbac:groups=apps.kubeblocks.io,resources=opsrequests/status,verbs=get;update;patch 59 // +kubebuilder:rbac:groups=apps.kubeblocks.io,resources=opsrequests/finalizers,verbs=update 60 61 // Reconcile is part of the main kubernetes reconciliation loop which aims to 62 // move the current state of the cluster closer to the desired state. 63 // For more details, check Reconcile and its Result here: 64 // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile 65 func (r *OpsRequestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 66 reqCtx := intctrlutil.RequestCtx{ 67 Ctx: ctx, 68 Req: req, 69 Log: log.FromContext(ctx).WithValues("opsRequest", req.NamespacedName), 70 Recorder: r.Recorder, 71 } 72 opsCtrlHandler := &opsControllerHandler{} 73 return opsCtrlHandler.Handle(reqCtx, &operations.OpsResource{Recorder: r.Recorder}, 74 r.fetchOpsRequest, 75 r.handleDeletion, 76 r.fetchCluster, 77 r.addClusterLabelAndSetOwnerReference, 78 r.handleCancelSignal, 79 r.handleOpsRequestByPhase, 80 ) 81 } 82 83 // SetupWithManager sets up the controller with the Manager. 84 func (r *OpsRequestReconciler) SetupWithManager(mgr ctrl.Manager) error { 85 return ctrl.NewControllerManagedBy(mgr). 86 For(&appsv1alpha1.OpsRequest{}). 87 Watches(&appsv1alpha1.Cluster{}, handler.EnqueueRequestsFromMapFunc(r.parseAllOpsRequest)). 88 Watches(&workloadsv1alpha1.ReplicatedStateMachine{}, handler.EnqueueRequestsFromMapFunc(r.parseAllOpsRequestForRSM)). 89 Watches(&dpv1alpha1.Backup{}, handler.EnqueueRequestsFromMapFunc(r.parseBackupOpsRequest)). 90 Watches(&corev1.PersistentVolumeClaim{}, handler.EnqueueRequestsFromMapFunc(r.parseVolumeExpansionOpsRequest)). 91 Complete(r) 92 } 93 94 // fetchOpsRequestAndCluster fetches the OpsRequest from the request. 95 func (r *OpsRequestReconciler) fetchOpsRequest(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) { 96 opsRequest := &appsv1alpha1.OpsRequest{} 97 if err := r.Client.Get(reqCtx.Ctx, reqCtx.Req.NamespacedName, opsRequest); err != nil { 98 if !apierrors.IsNotFound(err) { 99 return intctrlutil.ResultToP(intctrlutil.RequeueWithError(err, reqCtx.Log, "")) 100 } 101 // if the opsRequest is not found, we need to check if this opsRequest is deleted abnormally 102 if err = r.handleOpsReqDeletedDuringRunning(reqCtx); err != nil { 103 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 104 } 105 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 106 } 107 opsRes.OpsRequest = opsRequest 108 return nil, nil 109 } 110 111 // handleDeletion handles the delete event of the OpsRequest. 112 func (r *OpsRequestReconciler) handleDeletion(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) { 113 if opsRes.OpsRequest.Status.Phase == appsv1alpha1.OpsRunningPhase { 114 return nil, nil 115 } 116 return intctrlutil.HandleCRDeletion(reqCtx, r, opsRes.OpsRequest, opsRequestFinalizerName, func() (*ctrl.Result, error) { 117 // if the OpsRequest is deleted, we should clear the OpsRequest annotation in reference cluster. 118 // this is mainly to prevent OpsRequest from being deleted by mistake, resulting in inconsistency. 119 return nil, operations.DeleteOpsRequestAnnotationInCluster(reqCtx.Ctx, r.Client, opsRes) 120 }) 121 } 122 123 // fetchCluster fetches the Cluster from the OpsRequest. 124 func (r *OpsRequestReconciler) fetchCluster(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) { 125 cluster := &appsv1alpha1.Cluster{} 126 if err := r.Client.Get(reqCtx.Ctx, client.ObjectKey{ 127 Namespace: opsRes.OpsRequest.GetNamespace(), 128 Name: opsRes.OpsRequest.Spec.ClusterRef, 129 }, cluster); err != nil { 130 if apierrors.IsNotFound(err) { 131 _ = operations.PatchClusterNotFound(reqCtx.Ctx, r.Client, opsRes) 132 } 133 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 134 } 135 // set cluster variable 136 opsRes.Cluster = cluster 137 return nil, nil 138 } 139 140 // handleOpsRequestByPhase handles the OpsRequest by its phase. 141 func (r *OpsRequestReconciler) handleOpsRequestByPhase(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) { 142 switch opsRes.OpsRequest.Status.Phase { 143 case "": 144 // update status.phase to pending 145 if err := operations.PatchOpsStatus(reqCtx.Ctx, r.Client, opsRes, appsv1alpha1.OpsPendingPhase, appsv1alpha1.NewProgressingCondition(opsRes.OpsRequest)); err != nil { 146 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 147 } 148 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 149 case appsv1alpha1.OpsPendingPhase, appsv1alpha1.OpsCreatingPhase: 150 return r.doOpsRequestAction(reqCtx, opsRes) 151 case appsv1alpha1.OpsRunningPhase, appsv1alpha1.OpsCancellingPhase: 152 return r.reconcileStatusDuringRunningOrCanceling(reqCtx, opsRes) 153 case appsv1alpha1.OpsSucceedPhase: 154 return r.handleSucceedOpsRequest(reqCtx, opsRes.OpsRequest) 155 case appsv1alpha1.OpsFailedPhase, appsv1alpha1.OpsCancelledPhase: 156 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 157 } 158 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 159 } 160 161 // handleCancelSignal handles the cancel signal for opsRequest. 162 func (r *OpsRequestReconciler) handleCancelSignal(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) { 163 opsRequest := opsRes.OpsRequest 164 if !opsRequest.Spec.Cancel { 165 return nil, nil 166 } 167 if opsRequest.IsComplete() || opsRequest.Status.Phase == appsv1alpha1.OpsCancellingPhase { 168 return nil, nil 169 } 170 opsBehaviour := operations.GetOpsManager().OpsMap[opsRequest.Spec.Type] 171 if opsBehaviour.CancelFunc == nil { 172 r.Recorder.Eventf(opsRequest, corev1.EventTypeWarning, reasonOpsCancelActionNotSupported, 173 "Type: %s does not support cancel action.", opsRequest.Spec.Type) 174 return nil, nil 175 } 176 deepCopyOps := opsRequest.DeepCopy() 177 if err := opsBehaviour.CancelFunc(reqCtx, r.Client, opsRes); err != nil { 178 r.Recorder.Eventf(opsRequest, corev1.EventTypeWarning, reasonOpsCancelActionFailed, err.Error()) 179 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 180 } 181 opsRequest.Status.CancelTimestamp = metav1.Time{Time: time.Now()} 182 if err := operations.PatchOpsStatusWithOpsDeepCopy(reqCtx.Ctx, r.Client, opsRes, deepCopyOps, 183 appsv1alpha1.OpsCancellingPhase, appsv1alpha1.NewCancelingCondition(opsRes.OpsRequest)); err != nil { 184 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 185 } 186 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 187 } 188 189 // handleSucceedOpsRequest the opsRequest will be deleted after one hour when status.phase is Succeed 190 func (r *OpsRequestReconciler) handleSucceedOpsRequest(reqCtx intctrlutil.RequestCtx, opsRequest *appsv1alpha1.OpsRequest) (*ctrl.Result, error) { 191 if opsRequest.Status.CompletionTimestamp.IsZero() || opsRequest.Spec.TTLSecondsAfterSucceed == 0 { 192 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 193 } 194 deadline := opsRequest.Status.CompletionTimestamp.Add(time.Duration(opsRequest.Spec.TTLSecondsAfterSucceed) * time.Second) 195 if time.Now().Before(deadline) { 196 return intctrlutil.ResultToP(intctrlutil.RequeueAfter(time.Until(deadline), reqCtx.Log, "")) 197 } 198 // the opsRequest will be deleted after spec.ttlSecondsAfterSucceed seconds when status.phase is Succeed 199 if err := r.Client.Delete(reqCtx.Ctx, opsRequest); err != nil { 200 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 201 } 202 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 203 } 204 205 // reconcileStatusDuringRunningOrCanceling reconciles the status of OpsRequest when it is running or canceling. 206 func (r *OpsRequestReconciler) reconcileStatusDuringRunningOrCanceling(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) { 207 opsRequest := opsRes.OpsRequest 208 // wait for OpsRequest.status.phase to Succeed 209 if requeueAfter, err := operations.GetOpsManager().Reconcile(reqCtx, r.Client, opsRes); err != nil { 210 r.Recorder.Eventf(opsRequest, corev1.EventTypeWarning, reasonOpsReconcileStatusFailed, "Failed to reconcile the status of OpsRequest: %s", err.Error()) 211 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 212 } else if requeueAfter != 0 { 213 // if the reconcileAction need requeue, do it 214 return intctrlutil.ResultToP(intctrlutil.RequeueAfter(requeueAfter, reqCtx.Log, "")) 215 } 216 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 217 } 218 219 // addClusterLabelAndSetOwnerReference adds the cluster label and set the owner reference of the OpsRequest. 220 func (r *OpsRequestReconciler) addClusterLabelAndSetOwnerReference(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) { 221 // add label of clusterRef 222 opsRequest := opsRes.OpsRequest 223 clusterName := opsRequest.Labels[constant.AppInstanceLabelKey] 224 opsType := opsRequest.Labels[constant.OpsRequestTypeLabelKey] 225 if clusterName == opsRequest.Spec.ClusterRef && opsType == string(opsRequest.Spec.Type) { 226 return nil, nil 227 } 228 patch := client.MergeFrom(opsRequest.DeepCopy()) 229 if opsRequest.Labels == nil { 230 opsRequest.Labels = map[string]string{} 231 } 232 opsRequest.Labels[constant.AppInstanceLabelKey] = opsRequest.Spec.ClusterRef 233 opsRequest.Labels[constant.OpsRequestTypeLabelKey] = string(opsRequest.Spec.Type) 234 scheme, _ := appsv1alpha1.SchemeBuilder.Build() 235 if err := controllerutil.SetOwnerReference(opsRes.Cluster, opsRequest, scheme); err != nil { 236 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 237 } 238 if err := r.Client.Patch(reqCtx.Ctx, opsRequest, patch); err != nil { 239 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 240 } 241 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 242 } 243 244 // doOpsRequestAction will do the action of the OpsRequest. 245 func (r *OpsRequestReconciler) doOpsRequestAction(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) { 246 // process opsRequest entry function 247 opsRequest := opsRes.OpsRequest 248 opsDeepCopy := opsRequest.DeepCopy() 249 res, err := operations.GetOpsManager().Do(reqCtx, r.Client, opsRes) 250 if err != nil { 251 r.Recorder.Eventf(opsRequest, corev1.EventTypeWarning, reasonOpsDoActionFailed, "Failed to process the operation of OpsRequest: %s", err.Error()) 252 if !reflect.DeepEqual(opsRequest.Status, opsDeepCopy.Status) { 253 if patchErr := r.Client.Status().Patch(reqCtx.Ctx, opsRequest, client.MergeFrom(opsDeepCopy)); patchErr != nil { 254 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 255 } 256 } 257 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 258 } 259 260 if res != nil { 261 return res, nil 262 } 263 opsRequest.Status.Phase = appsv1alpha1.OpsRunningPhase 264 opsRequest.Status.ClusterGeneration = opsRes.Cluster.Generation 265 if err = r.Client.Status().Patch(reqCtx.Ctx, opsRequest, client.MergeFrom(opsDeepCopy)); err != nil { 266 return intctrlutil.ResultToP(intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")) 267 } 268 return intctrlutil.ResultToP(intctrlutil.Reconciled()) 269 } 270 271 // handleOpsReqDeletedDuringRunning handles the cluster annotation if the OpsRequest is deleted during running. 272 func (r *OpsRequestReconciler) handleOpsReqDeletedDuringRunning(reqCtx intctrlutil.RequestCtx) error { 273 clusterList := &appsv1alpha1.ClusterList{} 274 if err := r.Client.List(reqCtx.Ctx, clusterList, client.InNamespace(reqCtx.Req.Namespace)); err != nil { 275 return err 276 } 277 for _, cluster := range clusterList.Items { 278 opsRequestSlice, _ := opsutil.GetOpsRequestSliceFromCluster(&cluster) 279 index, _ := operations.GetOpsRecorderFromSlice(opsRequestSlice, reqCtx.Req.Name) 280 if index == -1 { 281 continue 282 } 283 // if the OpsRequest is abnormal, we should clear the OpsRequest annotation in referencing cluster. 284 opsRequestSlice = slices.Delete(opsRequestSlice, index, index+1) 285 return opsutil.PatchClusterOpsAnnotations(reqCtx.Ctx, r.Client, &cluster, opsRequestSlice) 286 } 287 return nil 288 } 289 290 func (r *OpsRequestReconciler) getRequestsFromCluster(cluster *appsv1alpha1.Cluster) []reconcile.Request { 291 var ( 292 opsRequestSlice []appsv1alpha1.OpsRecorder 293 err error 294 requests []reconcile.Request 295 ) 296 if opsRequestSlice, err = opsutil.GetOpsRequestSliceFromCluster(cluster); err != nil { 297 return nil 298 } 299 for _, v := range opsRequestSlice { 300 requests = append(requests, reconcile.Request{ 301 NamespacedName: types.NamespacedName{ 302 Namespace: cluster.Namespace, 303 Name: v.Name, 304 }, 305 }) 306 } 307 return requests 308 } 309 310 func (r *OpsRequestReconciler) parseAllOpsRequest(ctx context.Context, object client.Object) []reconcile.Request { 311 cluster := object.(*appsv1alpha1.Cluster) 312 return r.getRequestsFromCluster(cluster) 313 } 314 315 func (r *OpsRequestReconciler) parseAllOpsRequestForRSM(ctx context.Context, object client.Object) []reconcile.Request { 316 rsm := object.(*workloadsv1alpha1.ReplicatedStateMachine) 317 clusterName := rsm.Labels[constant.AppInstanceLabelKey] 318 if clusterName == "" { 319 return nil 320 } 321 cluster := &appsv1alpha1.Cluster{} 322 if err := r.Client.Get(ctx, client.ObjectKey{Name: clusterName, Namespace: rsm.Namespace}, cluster); err != nil { 323 return nil 324 } 325 return r.getRequestsFromCluster(cluster) 326 } 327 328 func (r *OpsRequestReconciler) parseVolumeExpansionOpsRequest(ctx context.Context, object client.Object) []reconcile.Request { 329 pvc := object.(*corev1.PersistentVolumeClaim) 330 if pvc.Labels[constant.AppManagedByLabelKey] != constant.AppName { 331 return nil 332 } 333 clusterName := pvc.Labels[constant.AppInstanceLabelKey] 334 if clusterName == "" { 335 return nil 336 } 337 opsRequestList, err := appsv1alpha1.GetRunningOpsByOpsType(ctx, r.Client, 338 pvc.Labels[constant.AppInstanceLabelKey], pvc.Namespace, string(appsv1alpha1.VolumeExpansionType)) 339 if err != nil { 340 return nil 341 } 342 var requests []reconcile.Request 343 for _, v := range opsRequestList { 344 requests = append(requests, reconcile.Request{ 345 NamespacedName: types.NamespacedName{ 346 Namespace: v.Namespace, 347 Name: v.Name, 348 }, 349 }) 350 } 351 return requests 352 } 353 354 func (r *OpsRequestReconciler) parseBackupOpsRequest(ctx context.Context, object client.Object) []reconcile.Request { 355 backup := object.(*dpv1alpha1.Backup) 356 var ( 357 requests []reconcile.Request 358 ) 359 opsRequestRecorder := opsutil.GetOpsRequestFromBackup(backup) 360 if opsRequestRecorder != nil { 361 requests = append(requests, reconcile.Request{ 362 NamespacedName: types.NamespacedName{ 363 Namespace: backup.Namespace, 364 Name: opsRequestRecorder.Name, 365 }, 366 }) 367 } 368 return requests 369 } 370 371 type opsRequestStep func(reqCtx intctrlutil.RequestCtx, opsRes *operations.OpsResource) (*ctrl.Result, error) 372 373 type opsControllerHandler struct { 374 } 375 376 func (h *opsControllerHandler) Handle(reqCtx intctrlutil.RequestCtx, 377 opsRes *operations.OpsResource, 378 steps ...opsRequestStep) (ctrl.Result, error) { 379 for _, step := range steps { 380 res, err := step(reqCtx, opsRes) 381 if res != nil { 382 return *res, err 383 } 384 if err != nil { 385 return intctrlutil.RequeueWithError(err, reqCtx.Log, "") 386 } 387 } 388 return intctrlutil.Reconciled() 389 }