github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/extensions/addon_controller_stages.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package extensions
    21  
    22  import (
    23  	"context"
    24  	"encoding/json"
    25  	"fmt"
    26  	"strings"
    27  	"time"
    28  
    29  	ctrlerihandler "github.com/authzed/controller-idioms/handler"
    30  	"golang.org/x/exp/slices"
    31  	batchv1 "k8s.io/api/batch/v1"
    32  	corev1 "k8s.io/api/core/v1"
    33  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    34  	"k8s.io/apimachinery/pkg/api/meta"
    35  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    36  	corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
    37  	ctrl "sigs.k8s.io/controller-runtime"
    38  	"sigs.k8s.io/controller-runtime/pkg/client"
    39  
    40  	extensionsv1alpha1 "github.com/1aal/kubeblocks/apis/extensions/v1alpha1"
    41  	"github.com/1aal/kubeblocks/pkg/constant"
    42  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    43  	viper "github.com/1aal/kubeblocks/pkg/viperx"
    44  )
    45  
    46  type stageCtx struct {
    47  	reqCtx     *intctrlutil.RequestCtx
    48  	reconciler *AddonReconciler
    49  	next       ctrlerihandler.Handler
    50  }
    51  
    52  const (
    53  	resultValueKey  = "result"
    54  	errorValueKey   = "err"
    55  	operandValueKey = "operand"
    56  	trueVal         = "true"
    57  	localChartsPath = "/charts"
    58  )
    59  
    60  func init() {
    61  	viper.SetDefault(addonSANameKey, "kubeblocks-addon-installer")
    62  	viper.SetDefault(addonHelmInstallOptKey, []string{
    63  		"--atomic",
    64  		"--cleanup-on-fail",
    65  		"--wait",
    66  	})
    67  	viper.SetDefault(addonHelmUninstallOptKey, []string{})
    68  }
    69  
    70  func (r *stageCtx) setReconciled() {
    71  	res, err := intctrlutil.Reconciled()
    72  	r.updateResultNErr(&res, err)
    73  }
    74  
    75  func (r *stageCtx) setRequeueAfter(duration time.Duration, msg string) {
    76  	res, err := intctrlutil.RequeueAfter(duration, r.reqCtx.Log, msg)
    77  	r.updateResultNErr(&res, err)
    78  }
    79  
    80  // func (r *stageCtx) setRequeue(msg string) {
    81  // 	res, err := intctrlutil.Requeue(r.reqCtx.Log, msg)
    82  // 	r.updateResultNErr(&res, err)
    83  // }
    84  
    85  func (r *stageCtx) setRequeueWithErr(err error, msg string) {
    86  	res, err := intctrlutil.CheckedRequeueWithError(err, r.reqCtx.Log, msg)
    87  	r.updateResultNErr(&res, err)
    88  }
    89  
    90  func (r *stageCtx) updateResultNErr(res *ctrl.Result, err error) {
    91  	r.reqCtx.UpdateCtxValue(errorValueKey, err)
    92  	r.reqCtx.UpdateCtxValue(resultValueKey, res)
    93  }
    94  
    95  func (r *stageCtx) doReturn() (*ctrl.Result, error) {
    96  	res, _ := r.reqCtx.Ctx.Value(resultValueKey).(*ctrl.Result)
    97  	err, _ := r.reqCtx.Ctx.Value(errorValueKey).(error)
    98  	return res, err
    99  }
   100  
   101  func (r *stageCtx) process(processor func(*extensionsv1alpha1.Addon)) {
   102  	res, _ := r.doReturn()
   103  	if res != nil {
   104  		return
   105  	}
   106  	addon := r.reqCtx.Ctx.Value(operandValueKey).(*extensionsv1alpha1.Addon)
   107  	processor(addon)
   108  }
   109  
   110  type fetchNDeletionCheckStage struct {
   111  	stageCtx
   112  	deletionStage deletionStage
   113  }
   114  
   115  type deletionStage struct {
   116  	stageCtx
   117  	disablingStage disablingStage
   118  }
   119  
   120  type genIDProceedCheckStage struct {
   121  	stageCtx
   122  }
   123  
   124  type installableCheckStage struct {
   125  	stageCtx
   126  }
   127  
   128  type autoInstallCheckStage struct {
   129  	stageCtx
   130  }
   131  
   132  type enabledWithDefaultValuesStage struct {
   133  	stageCtx
   134  }
   135  
   136  type progressingHandler struct {
   137  	stageCtx
   138  	enablingStage  enablingStage
   139  	disablingStage disablingStage
   140  }
   141  
   142  type helmTypeInstallStage struct {
   143  	stageCtx
   144  }
   145  
   146  type helmTypeUninstallStage struct {
   147  	stageCtx
   148  }
   149  
   150  type enablingStage struct {
   151  	stageCtx
   152  	helmTypeInstallStage helmTypeInstallStage
   153  }
   154  
   155  type disablingStage struct {
   156  	stageCtx
   157  	helmTypeUninstallStage helmTypeUninstallStage
   158  }
   159  
   160  type terminalStateStage struct {
   161  	stageCtx
   162  }
   163  
   164  func (r *fetchNDeletionCheckStage) Handle(ctx context.Context) {
   165  	addon := &extensionsv1alpha1.Addon{}
   166  	if err := r.reconciler.Client.Get(ctx, r.reqCtx.Req.NamespacedName, addon); err != nil {
   167  		res, err := intctrlutil.CheckedRequeueWithError(err, r.reqCtx.Log, "")
   168  		r.updateResultNErr(&res, err)
   169  		return
   170  	}
   171  	r.reqCtx.Log.V(1).Info("get addon", "generation", addon.Generation, "observedGeneration", addon.Status.ObservedGeneration)
   172  	r.reqCtx.UpdateCtxValue(operandValueKey, addon)
   173  	res, err := intctrlutil.HandleCRDeletion(*r.reqCtx, r.reconciler, addon, addonFinalizerName, func() (*ctrl.Result, error) {
   174  		r.deletionStage.Handle(ctx)
   175  		return r.deletionStage.doReturn()
   176  	})
   177  	if res != nil || err != nil {
   178  		r.updateResultNErr(res, err)
   179  		return
   180  	}
   181  	r.reqCtx.Log.V(1).Info("start normal reconcile")
   182  	r.next.Handle(ctx)
   183  }
   184  
   185  func (r *genIDProceedCheckStage) Handle(ctx context.Context) {
   186  	r.process(func(addon *extensionsv1alpha1.Addon) {
   187  		r.reqCtx.Log.V(1).Info("genIDProceedCheckStage", "phase", addon.Status.Phase)
   188  		switch addon.Status.Phase {
   189  		case extensionsv1alpha1.AddonEnabled, extensionsv1alpha1.AddonDisabled:
   190  			if addon.Generation == addon.Status.ObservedGeneration {
   191  				res, err := r.reconciler.deleteExternalResources(*r.reqCtx, addon)
   192  				if res != nil || err != nil {
   193  					r.updateResultNErr(res, err)
   194  					return
   195  				}
   196  				r.setReconciled()
   197  				return
   198  			}
   199  		case extensionsv1alpha1.AddonFailed:
   200  			if addon.Generation == addon.Status.ObservedGeneration {
   201  				r.setReconciled()
   202  				return
   203  			}
   204  		}
   205  	})
   206  	r.next.Handle(ctx)
   207  }
   208  
   209  func (r *deletionStage) Handle(ctx context.Context) {
   210  	r.disablingStage.stageCtx = r.stageCtx
   211  	r.process(func(addon *extensionsv1alpha1.Addon) {
   212  		r.reqCtx.Log.V(1).Info("deletionStage", "phase", addon.Status.Phase)
   213  		patchPhase := func(phase extensionsv1alpha1.AddonPhase, reason string) {
   214  			r.reqCtx.Log.V(1).Info("patching status", "phase", phase)
   215  			patch := client.MergeFrom(addon.DeepCopy())
   216  			addon.Status.Phase = phase
   217  			addon.Status.ObservedGeneration = addon.Generation
   218  			if err := r.reconciler.Status().Patch(ctx, addon, patch); err != nil {
   219  				r.setRequeueWithErr(err, "")
   220  				return
   221  			}
   222  			r.reqCtx.Log.V(1).Info("progress to", "phase", phase)
   223  			r.reconciler.Event(addon, corev1.EventTypeNormal, reason,
   224  				fmt.Sprintf("Progress to %s phase", phase))
   225  			r.setReconciled()
   226  		}
   227  		switch addon.Status.Phase {
   228  		case extensionsv1alpha1.AddonEnabling:
   229  			// delete running jobs
   230  			res, err := r.reconciler.deleteExternalResources(*r.reqCtx, addon)
   231  			if err != nil {
   232  				r.updateResultNErr(res, err)
   233  				return
   234  			}
   235  			patchPhase(extensionsv1alpha1.AddonDisabling, DisablingAddon)
   236  			return
   237  		case extensionsv1alpha1.AddonEnabled:
   238  			patchPhase(extensionsv1alpha1.AddonDisabling, DisablingAddon)
   239  			return
   240  		case extensionsv1alpha1.AddonDisabling:
   241  			r.disablingStage.Handle(ctx)
   242  			res, err := r.disablingStage.doReturn()
   243  
   244  			if res != nil || err != nil {
   245  				return
   246  			}
   247  			patchPhase(extensionsv1alpha1.AddonDisabled, AddonDisabled)
   248  			return
   249  		default:
   250  			r.reqCtx.Log.V(1).Info("delete external resources", "phase", addon.Status.Phase)
   251  			res, err := r.reconciler.deleteExternalResources(*r.reqCtx, addon)
   252  			if res != nil || err != nil {
   253  				r.updateResultNErr(res, err)
   254  				return
   255  			}
   256  			return
   257  		}
   258  	})
   259  	r.next.Handle(ctx)
   260  }
   261  
   262  func (r *installableCheckStage) Handle(ctx context.Context) {
   263  	r.process(func(addon *extensionsv1alpha1.Addon) {
   264  		r.reqCtx.Log.V(1).Info("installableCheckStage", "phase", addon.Status.Phase)
   265  		if addon.Spec.Installable == nil {
   266  			return
   267  		}
   268  		// proceed if has specified addon.spec.installSpec
   269  		if addon.Spec.InstallSpec != nil {
   270  			return
   271  		}
   272  		if addon.Annotations != nil && addon.Annotations[SkipInstallableCheck] == trueVal {
   273  			r.reconciler.Event(addon, corev1.EventTypeWarning, InstallableCheckSkipped,
   274  				"Installable check skipped.")
   275  			return
   276  		}
   277  		switch addon.Status.Phase {
   278  		case extensionsv1alpha1.AddonEnabling, extensionsv1alpha1.AddonDisabling:
   279  			return
   280  		}
   281  		for _, s := range addon.Spec.Installable.Selectors {
   282  			if s.MatchesFromConfig() {
   283  				continue
   284  			}
   285  			patch := client.MergeFrom(addon.DeepCopy())
   286  			addon.Status.ObservedGeneration = addon.Generation
   287  			addon.Status.Phase = extensionsv1alpha1.AddonDisabled
   288  			meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
   289  				Type:               extensionsv1alpha1.ConditionTypeChecked,
   290  				Status:             metav1.ConditionFalse,
   291  				ObservedGeneration: addon.Generation,
   292  				Reason:             InstallableRequirementUnmatched,
   293  				Message:            "spec.installable.selectors has no matching requirement.",
   294  				LastTransitionTime: metav1.Now(),
   295  			})
   296  
   297  			if err := r.reconciler.Status().Patch(ctx, addon, patch); err != nil {
   298  				r.setRequeueWithErr(err, "")
   299  				return
   300  			}
   301  			r.reconciler.Event(addon, corev1.EventTypeWarning, InstallableRequirementUnmatched,
   302  				fmt.Sprintf("Does not meet installable requirements for key %v", s))
   303  			r.setReconciled()
   304  			return
   305  		}
   306  	})
   307  	r.next.Handle(ctx)
   308  }
   309  
   310  func (r *autoInstallCheckStage) Handle(ctx context.Context) {
   311  	r.process(func(addon *extensionsv1alpha1.Addon) {
   312  		r.reqCtx.Log.V(1).Info("autoInstallCheckStage", "phase", addon.Status.Phase)
   313  		if addon.Spec.Installable == nil || !addon.Spec.Installable.AutoInstall {
   314  			return
   315  		}
   316  		// proceed if has specified addon.spec.installSpec
   317  		if addon.Spec.InstallSpec != nil {
   318  			r.reqCtx.Log.V(1).Info("has specified addon.spec.installSpec")
   319  			return
   320  		}
   321  		enabledAddonWithDefaultValues(ctx, &r.stageCtx, addon, AddonAutoInstall, "Addon enabled auto-install")
   322  	})
   323  	r.next.Handle(ctx)
   324  }
   325  
   326  func (r *enabledWithDefaultValuesStage) Handle(ctx context.Context) {
   327  	r.process(func(addon *extensionsv1alpha1.Addon) {
   328  		r.reqCtx.Log.V(1).Info("enabledWithDefaultValuesStage", "phase", addon.Status.Phase)
   329  		if addon.Spec.InstallSpec.HasSetValues() || addon.Spec.InstallSpec.IsDisabled() {
   330  			r.reqCtx.Log.V(1).Info("has specified addon.spec.installSpec")
   331  			return
   332  		}
   333  		if v, ok := addon.Annotations[AddonDefaultIsEmpty]; ok && v == trueVal {
   334  			return
   335  		}
   336  		enabledAddonWithDefaultValues(ctx, &r.stageCtx, addon, AddonSetDefaultValues, "Addon enabled with default values")
   337  	})
   338  	r.next.Handle(ctx)
   339  }
   340  
   341  func (r *progressingHandler) Handle(ctx context.Context) {
   342  	r.enablingStage.stageCtx = r.stageCtx
   343  	r.disablingStage.stageCtx = r.stageCtx
   344  	r.process(func(addon *extensionsv1alpha1.Addon) {
   345  		r.reqCtx.Log.V(1).Info("progressingHandler", "phase", addon.Status.Phase)
   346  		patchPhase := func(phase extensionsv1alpha1.AddonPhase, reason string) {
   347  			r.reqCtx.Log.V(1).Info("patching status", "phase", phase)
   348  			patch := client.MergeFrom(addon.DeepCopy())
   349  			addon.Status.Phase = phase
   350  			addon.Status.ObservedGeneration = addon.Generation
   351  			if err := r.reconciler.Status().Patch(ctx, addon, patch); err != nil {
   352  				r.setRequeueWithErr(err, "")
   353  				return
   354  			}
   355  			r.reconciler.Event(addon, corev1.EventTypeNormal, reason,
   356  				fmt.Sprintf("Progress to %s phase", phase))
   357  			r.setReconciled()
   358  		}
   359  
   360  		// decision enabling or disabling
   361  		if !addon.Spec.InstallSpec.GetEnabled() {
   362  			r.reqCtx.Log.V(1).Info("progress to disabling stage handler")
   363  			// if it's new simply return
   364  			if addon.Status.Phase == "" {
   365  				return
   366  			}
   367  			if addon.Status.Phase != extensionsv1alpha1.AddonDisabling {
   368  				patchPhase(extensionsv1alpha1.AddonDisabling, DisablingAddon)
   369  				return
   370  			}
   371  			r.disablingStage.Handle(ctx)
   372  			return
   373  		}
   374  		// handling enabling state
   375  		if addon.Status.Phase != extensionsv1alpha1.AddonEnabling {
   376  			if addon.Status.Phase == extensionsv1alpha1.AddonFailed {
   377  				// clean up existing failed installation job
   378  				mgrNS := viper.GetString(constant.CfgKeyCtrlrMgrNS)
   379  				key := client.ObjectKey{
   380  					Namespace: mgrNS,
   381  					Name:      getInstallJobName(addon),
   382  				}
   383  				installJob := &batchv1.Job{}
   384  				if err := r.reconciler.Get(ctx, key, installJob); client.IgnoreNotFound(err) != nil {
   385  					r.setRequeueWithErr(err, "")
   386  					return
   387  				} else if err == nil && installJob.GetDeletionTimestamp().IsZero() {
   388  					if err = r.reconciler.Delete(ctx, installJob); err != nil {
   389  						r.setRequeueWithErr(err, "")
   390  						return
   391  					}
   392  				}
   393  			}
   394  			patchPhase(extensionsv1alpha1.AddonEnabling, EnablingAddon)
   395  			return
   396  		}
   397  		r.reqCtx.Log.V(1).Info("progress to enabling stage handler")
   398  		r.enablingStage.Handle(ctx)
   399  	})
   400  	r.next.Handle(ctx)
   401  }
   402  
   403  func getInstallJobName(addon *extensionsv1alpha1.Addon) string {
   404  	return fmt.Sprintf("install-%s-addon", addon.Name)
   405  }
   406  
   407  func getUninstallJobName(addon *extensionsv1alpha1.Addon) string {
   408  	return fmt.Sprintf("uninstall-%s-addon", addon.Name)
   409  }
   410  
   411  func getHelmReleaseName(addon *extensionsv1alpha1.Addon) string {
   412  	return fmt.Sprintf("kb-addon-%s", addon.Name)
   413  }
   414  
   415  func useLocalCharts(addon *extensionsv1alpha1.Addon) bool {
   416  	return addon.Spec.Helm != nil && strings.HasPrefix(addon.Spec.Helm.ChartLocationURL, "file://")
   417  }
   418  
   419  // buildLocalChartsPath builds the local charts path if the chartLocationURL starts with "file://"
   420  func buildLocalChartsPath(addon *extensionsv1alpha1.Addon) (string, error) {
   421  	if !useLocalCharts(addon) {
   422  		return "$(CHART)", nil
   423  	}
   424  
   425  	url := addon.Spec.Helm.ChartLocationURL
   426  	last := strings.LastIndex(url, "/")
   427  	name := url[last+1:]
   428  	return fmt.Sprintf("%s/%s", localChartsPath, name), nil
   429  }
   430  
   431  // setSharedVolume sets shared volume to copy helm charts from charts image
   432  func setSharedVolume(addon *extensionsv1alpha1.Addon, helmJobPodSpec *corev1.PodSpec) {
   433  	if !useLocalCharts(addon) {
   434  		return
   435  	}
   436  
   437  	helmJobPodSpec.Volumes = append(helmJobPodSpec.Volumes, corev1.Volume{
   438  		Name: "charts",
   439  		VolumeSource: corev1.VolumeSource{
   440  			EmptyDir: &corev1.EmptyDirVolumeSource{},
   441  		},
   442  	})
   443  
   444  	helmJobPodSpec.Containers[0].VolumeMounts = append(helmJobPodSpec.Containers[0].VolumeMounts, corev1.VolumeMount{
   445  		Name:      "charts",
   446  		MountPath: localChartsPath,
   447  	})
   448  }
   449  
   450  // setInitContainer sets init containers to copy dependent charts to shared volume
   451  func setInitContainer(addon *extensionsv1alpha1.Addon, helmJobPodSpec *corev1.PodSpec) {
   452  	if !useLocalCharts(addon) {
   453  		return
   454  	}
   455  
   456  	fromPath := addon.Spec.Helm.ChartsPathInImage
   457  	if fromPath == "" {
   458  		fromPath = localChartsPath
   459  	}
   460  	helmJobPodSpec.InitContainers = append(helmJobPodSpec.InitContainers, corev1.Container{
   461  		Name:    "copy-charts",
   462  		Image:   addon.Spec.Helm.ChartsImage,
   463  		Command: []string{"sh", "-c", fmt.Sprintf("cp %s/* /mnt/charts", fromPath)},
   464  		VolumeMounts: []corev1.VolumeMount{
   465  			{
   466  				Name:      "charts",
   467  				MountPath: "/mnt/charts",
   468  			},
   469  		},
   470  	})
   471  }
   472  
   473  func (r *helmTypeInstallStage) Handle(ctx context.Context) {
   474  	r.process(func(addon *extensionsv1alpha1.Addon) {
   475  		r.reqCtx.Log.V(1).Info("helmTypeInstallStage", "phase", addon.Status.Phase)
   476  		mgrNS := viper.GetString(constant.CfgKeyCtrlrMgrNS)
   477  
   478  		key := client.ObjectKey{
   479  			Namespace: mgrNS,
   480  			Name:      getInstallJobName(addon),
   481  		}
   482  
   483  		helmInstallJob := &batchv1.Job{}
   484  		if err := r.reconciler.Get(ctx, key, helmInstallJob); client.IgnoreNotFound(err) != nil {
   485  			r.setRequeueWithErr(err, "")
   486  			return
   487  		} else if err == nil {
   488  			if helmInstallJob.Status.Succeeded > 0 {
   489  				return
   490  			}
   491  
   492  			if helmInstallJob.Status.Active > 0 {
   493  				r.setRequeueAfter(time.Second, fmt.Sprintf("running Helm install job %s", key.Name))
   494  				return
   495  			}
   496  			// there are situations that job.status.[Active | Failed | Succeeded ] are all
   497  			// 0, and len(job.status.conditions) > 0, and need to handle failed
   498  			// info. from conditions.
   499  			if helmInstallJob.Status.Failed > 0 {
   500  				// job failed set terminal state phase
   501  				setAddonErrorConditions(ctx, &r.stageCtx, addon, true, true, InstallationFailed,
   502  					fmt.Sprintf("Installation failed, do inspect error from jobs.batch %s", key.String()))
   503  				// only allow to do pod logs if max concurrent reconciles > 1, also considered that helm
   504  				// cmd error only has limited contents
   505  				if viper.GetInt(maxConcurrentReconcilesKey) > 1 {
   506  					if err := logFailedJobPodToCondError(ctx, &r.stageCtx, addon, key.Name, InstallationFailedLogs); err != nil {
   507  						r.setRequeueWithErr(err, "")
   508  						return
   509  					}
   510  				}
   511  				return
   512  			}
   513  			r.setRequeueAfter(time.Second, "")
   514  			return
   515  		}
   516  
   517  		var err error
   518  		helmInstallJob, err = createHelmJobProto(addon)
   519  		if err != nil {
   520  			r.setRequeueWithErr(err, "")
   521  			return
   522  		}
   523  
   524  		// set addon installation job to use local charts instead of remote charts,
   525  		// the init container will copy the local charts to the shared volume
   526  		chartsPath, err := buildLocalChartsPath(addon)
   527  		if err != nil {
   528  			r.setRequeueWithErr(err, "")
   529  			return
   530  		}
   531  
   532  		helmInstallJob.ObjectMeta.Name = key.Name
   533  		helmInstallJob.ObjectMeta.Namespace = key.Namespace
   534  		helmJobPodSpec := &helmInstallJob.Spec.Template.Spec
   535  		helmContainer := &helmInstallJob.Spec.Template.Spec.Containers[0]
   536  		helmContainer.Args = append([]string{
   537  			"upgrade",
   538  			"--install",
   539  			"$(RELEASE_NAME)",
   540  			chartsPath,
   541  			"--namespace",
   542  			"$(RELEASE_NS)",
   543  			"--create-namespace",
   544  		}, viper.GetStringSlice(addonHelmInstallOptKey)...)
   545  
   546  		installValues := addon.Spec.Helm.BuildMergedValues(addon.Spec.InstallSpec)
   547  		if err = addon.Spec.Helm.BuildContainerArgs(helmContainer, installValues); err != nil {
   548  			r.setRequeueWithErr(err, "")
   549  			return
   550  		}
   551  
   552  		// set values from file
   553  		for _, cmRef := range installValues.ConfigMapRefs {
   554  			cm := &corev1.ConfigMap{}
   555  			key := client.ObjectKey{
   556  				Name:      cmRef.Name,
   557  				Namespace: mgrNS}
   558  			if err := r.reconciler.Get(ctx, key, cm); err != nil {
   559  				if !apierrors.IsNotFound(err) {
   560  					r.setRequeueWithErr(err, "")
   561  					return
   562  				}
   563  				r.setRequeueAfter(time.Second, fmt.Sprintf("ConfigMap %s not found", cmRef.Name))
   564  				setAddonErrorConditions(ctx, &r.stageCtx, addon, false, true, AddonRefObjError,
   565  					fmt.Sprintf("ConfigMap object %v not found", key))
   566  				return
   567  			}
   568  			if !findDataKey(cm.Data, cmRef) {
   569  				setAddonErrorConditions(ctx, &r.stageCtx, addon, true, true, AddonRefObjError,
   570  					fmt.Sprintf("Attach ConfigMap %v volume source failed, key %s not found", key, cmRef.Key))
   571  				r.setReconciled()
   572  				return
   573  			}
   574  			attachVolumeMount(helmJobPodSpec, cmRef, cm.Name, "cm",
   575  				func() corev1.VolumeSource {
   576  					return corev1.VolumeSource{
   577  						ConfigMap: &corev1.ConfigMapVolumeSource{
   578  							LocalObjectReference: corev1.LocalObjectReference{
   579  								Name: cm.Name,
   580  							},
   581  							Items: []corev1.KeyToPath{
   582  								{
   583  									Key:  cmRef.Key,
   584  									Path: cmRef.Key,
   585  								},
   586  							},
   587  						},
   588  					}
   589  				})
   590  		}
   591  
   592  		for _, secretRef := range installValues.SecretRefs {
   593  			secret := &corev1.Secret{}
   594  			key := client.ObjectKey{
   595  				Name:      secretRef.Name,
   596  				Namespace: mgrNS}
   597  			if err := r.reconciler.Get(ctx, key, secret); err != nil {
   598  				if !apierrors.IsNotFound(err) {
   599  					r.setRequeueWithErr(err, "")
   600  					return
   601  				}
   602  				r.setRequeueAfter(time.Second, fmt.Sprintf("Secret %s not found", secret.Name))
   603  				setAddonErrorConditions(ctx, &r.stageCtx, addon, false, true, AddonRefObjError,
   604  					fmt.Sprintf("Secret object %v not found", key))
   605  				return
   606  			}
   607  			if !findDataKey(secret.Data, secretRef) {
   608  				setAddonErrorConditions(ctx, &r.stageCtx, addon, true, true, AddonRefObjError,
   609  					fmt.Sprintf("Attach Secret %v volume source failed, key %s not found", key, secretRef.Key))
   610  				r.setReconciled()
   611  				return
   612  			}
   613  			attachVolumeMount(helmJobPodSpec, secretRef, secret.Name, "secret",
   614  				func() corev1.VolumeSource {
   615  					return corev1.VolumeSource{
   616  						Secret: &corev1.SecretVolumeSource{
   617  							SecretName: secret.Name,
   618  							Items: []corev1.KeyToPath{
   619  								{
   620  									Key:  secretRef.Key,
   621  									Path: secretRef.Key,
   622  								},
   623  							},
   624  						},
   625  					}
   626  				})
   627  		}
   628  
   629  		// if chartLocationURL starts with 'file://', it means the charts is from local file system
   630  		// we will copy the charts from charts image to shared volume. Addon container will use the
   631  		// charts from shared volume to install the addon.
   632  		setSharedVolume(addon, helmJobPodSpec)
   633  		setInitContainer(addon, helmJobPodSpec)
   634  
   635  		if err := r.reconciler.Create(ctx, helmInstallJob); err != nil {
   636  			r.setRequeueWithErr(err, "")
   637  			return
   638  		}
   639  		r.setRequeueAfter(time.Second, "")
   640  	})
   641  	r.next.Handle(ctx)
   642  }
   643  
   644  func (r *helmTypeUninstallStage) Handle(ctx context.Context) {
   645  	r.process(func(addon *extensionsv1alpha1.Addon) {
   646  		r.reqCtx.Log.V(1).Info("helmTypeUninstallStage", "phase", addon.Status.Phase, "next", r.next.ID())
   647  		key := client.ObjectKey{
   648  			Namespace: viper.GetString(constant.CfgKeyCtrlrMgrNS),
   649  			Name:      getUninstallJobName(addon),
   650  		}
   651  		helmUninstallJob := &batchv1.Job{}
   652  		if err := r.reconciler.Get(ctx, key, helmUninstallJob); client.IgnoreNotFound(err) != nil {
   653  			r.setRequeueWithErr(err, "")
   654  			return
   655  		} else if err == nil {
   656  			if helmUninstallJob.Status.Succeeded > 0 {
   657  				r.reqCtx.Log.V(1).Info("helm uninstall job succeed", "job", key)
   658  				// TODO:
   659  				// helm uninstall should always succeed, therefore need additional label selector to check any
   660  				// helm managed object is not properly cleaned up
   661  				return
   662  			}
   663  
   664  			// Job controller has yet handling Job or job controller is not running, i.e., testenv
   665  			// only handles this situation when addon is at terminating state.
   666  			if helmUninstallJob.Status.StartTime.IsZero() && !addon.GetDeletionTimestamp().IsZero() {
   667  				return
   668  			}
   669  
   670  			// requeue if uninstall job is active or under deleting
   671  			if !helmUninstallJob.GetDeletionTimestamp().IsZero() || helmUninstallJob.Status.Active > 0 {
   672  				r.setRequeueAfter(time.Second, "")
   673  				return
   674  			}
   675  			// there are situations that job.status.[Active | Failed | Succeeded ] are all
   676  			// 0, and len(job.status.conditions) > 0, and need to handle failed
   677  			// info. from conditions.
   678  			if helmUninstallJob.Status.Failed > 0 {
   679  				r.reqCtx.Log.V(1).Info("helm uninstall job failed", "job", key)
   680  				r.reconciler.Event(addon, corev1.EventTypeWarning, UninstallationFailed,
   681  					fmt.Sprintf("Uninstallation failed, do inspect error from jobs.batch %s",
   682  						key.String()))
   683  				// only allow to do pod logs if max concurrent reconciles > 1, also considered that helm
   684  				// cmd error only has limited contents
   685  				if viper.GetInt(maxConcurrentReconcilesKey) > 1 {
   686  					if err := logFailedJobPodToCondError(ctx, &r.stageCtx, addon, key.Name, UninstallationFailedLogs); err != nil {
   687  						r.setRequeueWithErr(err, "")
   688  						return
   689  					}
   690  				}
   691  
   692  				if err := r.reconciler.Delete(ctx, helmUninstallJob); client.IgnoreNotFound(err) != nil {
   693  					r.setRequeueWithErr(err, "")
   694  					return
   695  				}
   696  				if err := r.reconciler.cleanupJobPods(*r.reqCtx); err != nil {
   697  					r.setRequeueWithErr(err, "")
   698  					return
   699  				}
   700  			}
   701  			r.setRequeueAfter(time.Second, "")
   702  			return
   703  		}
   704  
   705  		// inspect helm releases secrets
   706  		helmSecrets := &corev1.SecretList{}
   707  		if err := r.reconciler.List(ctx, helmSecrets, client.MatchingLabels{
   708  			"name":  getHelmReleaseName(addon),
   709  			"owner": "helm",
   710  		}); err != nil {
   711  			r.setRequeueWithErr(err, "")
   712  			return
   713  		}
   714  		releaseExist := false
   715  		for _, s := range helmSecrets.Items {
   716  			if string(s.Type) == "helm.sh/release.v1" {
   717  				releaseExist = true
   718  				break
   719  			}
   720  		}
   721  
   722  		// has no installed release simply return
   723  		if !releaseExist {
   724  			r.reqCtx.Log.V(1).Info("helmTypeUninstallStage release not exist", "job", key)
   725  			return
   726  		}
   727  
   728  		r.reqCtx.Log.V(1).Info("creating helm uninstall job", "job", key)
   729  		var err error
   730  		// create `helm delete <release>` job
   731  		helmUninstallJob, err = createHelmJobProto(addon)
   732  		if err != nil {
   733  			r.reqCtx.Log.V(1).Info("helmTypeUninstallStage", "job", key, "err", err)
   734  			r.setRequeueWithErr(err, "")
   735  			return
   736  		}
   737  		helmUninstallJob.ObjectMeta.Name = key.Name
   738  		helmUninstallJob.ObjectMeta.Namespace = key.Namespace
   739  		helmUninstallJob.Spec.Template.Spec.Containers[0].Args = append([]string{
   740  			"delete",
   741  			"$(RELEASE_NAME)",
   742  			"--namespace",
   743  			"$(RELEASE_NS)",
   744  		}, viper.GetStringSlice(addonHelmUninstallOptKey)...)
   745  		r.reqCtx.Log.V(1).Info("create helm uninstall job", "job", key)
   746  		if err := r.reconciler.Create(ctx, helmUninstallJob); err != nil {
   747  			r.reqCtx.Log.V(1).Info("helmTypeUninstallStage", "job", key, "err", err)
   748  			r.setRequeueWithErr(err, "")
   749  			return
   750  		}
   751  		r.setRequeueAfter(time.Second, "")
   752  	})
   753  	r.next.Handle(ctx)
   754  }
   755  
   756  func (r *enablingStage) Handle(ctx context.Context) {
   757  	r.helmTypeInstallStage.stageCtx = r.stageCtx
   758  	r.process(func(addon *extensionsv1alpha1.Addon) {
   759  		r.reqCtx.Log.V(1).Info("enablingStage", "phase", addon.Status.Phase)
   760  		switch addon.Spec.Type {
   761  		case extensionsv1alpha1.HelmType:
   762  			r.helmTypeInstallStage.Handle(ctx)
   763  		default:
   764  		}
   765  	})
   766  	r.next.Handle(ctx)
   767  }
   768  
   769  func (r *disablingStage) Handle(ctx context.Context) {
   770  	r.helmTypeUninstallStage.stageCtx = r.stageCtx
   771  	r.process(func(addon *extensionsv1alpha1.Addon) {
   772  		r.reqCtx.Log.V(1).Info("disablingStage", "phase", addon.Status.Phase, "type", addon.Spec.Type)
   773  		switch addon.Spec.Type {
   774  		case extensionsv1alpha1.HelmType:
   775  			r.helmTypeUninstallStage.Handle(ctx)
   776  		default:
   777  		}
   778  	})
   779  	r.next.Handle(ctx)
   780  }
   781  
   782  func (r *terminalStateStage) Handle(ctx context.Context) {
   783  	r.process(func(addon *extensionsv1alpha1.Addon) {
   784  		r.reqCtx.Log.V(1).Info("terminalStateStage", "phase", addon.Status.Phase)
   785  		patchPhaseNCondition := func(phase extensionsv1alpha1.AddonPhase, reason string) {
   786  			r.reqCtx.Log.V(1).Info("patching status", "phase", phase)
   787  			patch := client.MergeFrom(addon.DeepCopy())
   788  			addon.Status.Phase = phase
   789  			addon.Status.ObservedGeneration = addon.Generation
   790  
   791  			meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
   792  				Type:               extensionsv1alpha1.ConditionTypeSucceed,
   793  				Status:             metav1.ConditionTrue,
   794  				ObservedGeneration: addon.Generation,
   795  				Reason:             reason,
   796  				LastTransitionTime: metav1.Now(),
   797  			})
   798  
   799  			if err := r.reconciler.Status().Patch(ctx, addon, patch); err != nil {
   800  				r.setRequeueWithErr(err, "")
   801  				return
   802  			}
   803  			r.reconciler.Event(addon, corev1.EventTypeNormal, reason,
   804  				fmt.Sprintf("Progress to %s phase", phase))
   805  			r.setReconciled()
   806  		}
   807  
   808  		// transit to enabled or disable phase
   809  		switch addon.Status.Phase {
   810  		case "", extensionsv1alpha1.AddonDisabling:
   811  			patchPhaseNCondition(extensionsv1alpha1.AddonDisabled, AddonDisabled)
   812  			return
   813  		case extensionsv1alpha1.AddonEnabling:
   814  			patchPhaseNCondition(extensionsv1alpha1.AddonEnabled, AddonEnabled)
   815  			return
   816  		}
   817  	})
   818  	r.next.Handle(ctx)
   819  }
   820  
   821  // attachVolumeMount attaches a volumes to pod and added container.VolumeMounts to a ConfigMap
   822  // or Secret referenced key as file, and add --values={volumeMountPath}/{selector.Key} to
   823  // helm install/upgrade args
   824  func attachVolumeMount(
   825  	podSpec *corev1.PodSpec,
   826  	selector extensionsv1alpha1.DataObjectKeySelector,
   827  	objName, suff string,
   828  	volumeSrcBuilder func() corev1.VolumeSource,
   829  ) {
   830  	container := &podSpec.Containers[0]
   831  	volName := fmt.Sprintf("%s-%s", objName, suff)
   832  	mountPath := fmt.Sprintf("/vol/%s/%s", suff, objName)
   833  	podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{
   834  		Name:         volName,
   835  		VolumeSource: volumeSrcBuilder(),
   836  	})
   837  	container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
   838  		Name:      volName,
   839  		ReadOnly:  true,
   840  		MountPath: mountPath,
   841  	})
   842  	container.Args = append(container.Args, "--values",
   843  		fmt.Sprintf("%s/%s", mountPath, selector.Key))
   844  }
   845  
   846  // createHelmJobProto creates a job.batch prototyped object
   847  func createHelmJobProto(addon *extensionsv1alpha1.Addon) (*batchv1.Job, error) {
   848  	ttl := time.Minute * 5
   849  	if jobTTL := viper.GetString(constant.CfgKeyAddonJobTTL); jobTTL != "" {
   850  		var err error
   851  		if ttl, err = time.ParseDuration(jobTTL); err != nil {
   852  			return nil, err
   853  		}
   854  	}
   855  	ttlSec := int32(ttl.Seconds())
   856  	backoffLimit := int32(3)
   857  	helmProtoJob := &batchv1.Job{
   858  		ObjectMeta: metav1.ObjectMeta{
   859  			Labels: map[string]string{
   860  				constant.AddonNameLabelKey:    addon.Name,
   861  				constant.AppManagedByLabelKey: constant.AppName,
   862  			},
   863  		},
   864  		Spec: batchv1.JobSpec{
   865  			BackoffLimit:            &backoffLimit,
   866  			TTLSecondsAfterFinished: &ttlSec,
   867  			Template: corev1.PodTemplateSpec{
   868  				ObjectMeta: metav1.ObjectMeta{
   869  					Labels: map[string]string{
   870  						constant.AddonNameLabelKey:    addon.Name,
   871  						constant.AppManagedByLabelKey: constant.AppName,
   872  					},
   873  				},
   874  				Spec: corev1.PodSpec{
   875  					RestartPolicy:      corev1.RestartPolicyNever,
   876  					ServiceAccountName: viper.GetString("KUBEBLOCKS_ADDON_SA_NAME"),
   877  					Containers: []corev1.Container{
   878  						{
   879  							Name:            getJobMainContainerName(addon),
   880  							Image:           viper.GetString(constant.KBToolsImage),
   881  							ImagePullPolicy: corev1.PullPolicy(viper.GetString(constant.CfgAddonJobImgPullPolicy)),
   882  							// TODO: need have image that is capable of following settings, current settings
   883  							// may expose potential security risk, as this pod is using cluster-admin clusterrole.
   884  							// SecurityContext: &corev1.SecurityContext{
   885  							//	RunAsNonRoot:             &[]bool{true}[0],
   886  							//	RunAsUser:                &[]int64{1001}[0],
   887  							//	AllowPrivilegeEscalation: &[]bool{false}[0],
   888  							//	Capabilities: &corev1.Capabilities{
   889  							//		Drop: []corev1.Capability{
   890  							//			"ALL",
   891  							//		},
   892  							//	},
   893  							// },
   894  							Command: []string{"helm"},
   895  							Env: []corev1.EnvVar{
   896  								{
   897  									Name:  "RELEASE_NAME",
   898  									Value: getHelmReleaseName(addon),
   899  								},
   900  								{
   901  									Name:  "RELEASE_NS",
   902  									Value: viper.GetString(constant.CfgKeyCtrlrMgrNS),
   903  								},
   904  								{
   905  									Name:  "CHART",
   906  									Value: addon.Spec.Helm.ChartLocationURL,
   907  								},
   908  							},
   909  							VolumeMounts: []corev1.VolumeMount{},
   910  						},
   911  					},
   912  					Volumes:      []corev1.Volume{},
   913  					Tolerations:  []corev1.Toleration{},
   914  					Affinity:     &corev1.Affinity{},
   915  					NodeSelector: map[string]string{},
   916  				},
   917  			},
   918  		},
   919  	}
   920  	// inherit kubeblocks.io labels from primary resource
   921  	for k, v := range addon.Labels {
   922  		if !strings.Contains(k, constant.APIGroup) {
   923  			continue
   924  		}
   925  		if _, ok := helmProtoJob.ObjectMeta.Labels[k]; !ok {
   926  			helmProtoJob.ObjectMeta.Labels[k] = v
   927  		}
   928  	}
   929  
   930  	podSpec := &helmProtoJob.Spec.Template.Spec
   931  	if cmTolerations := viper.GetString(constant.CfgKeyCtrlrMgrTolerations); cmTolerations != "" &&
   932  		cmTolerations != "[]" && cmTolerations != "[{}]" {
   933  		if err := json.Unmarshal([]byte(cmTolerations), &podSpec.Tolerations); err != nil {
   934  			return nil, err
   935  		}
   936  		isAllEmptyElem := true
   937  		for _, t := range podSpec.Tolerations {
   938  			if t.String() != "{}" {
   939  				isAllEmptyElem = false
   940  				break
   941  			}
   942  		}
   943  		if isAllEmptyElem {
   944  			podSpec.Tolerations = nil
   945  		}
   946  	}
   947  	if cmAffinity := viper.GetString(constant.CfgKeyCtrlrMgrAffinity); cmAffinity != "" {
   948  		if err := json.Unmarshal([]byte(cmAffinity), &podSpec.Affinity); err != nil {
   949  			return nil, err
   950  		}
   951  	}
   952  	if cmNodeSelector := viper.GetString(constant.CfgKeyCtrlrMgrNodeSelector); cmNodeSelector != "" {
   953  		if err := json.Unmarshal([]byte(cmNodeSelector), &podSpec.NodeSelector); err != nil {
   954  			return nil, err
   955  		}
   956  	}
   957  	return helmProtoJob, nil
   958  }
   959  
   960  func enabledAddonWithDefaultValues(ctx context.Context, stageCtx *stageCtx,
   961  	addon *extensionsv1alpha1.Addon, reason, message string) {
   962  	setInstallSpec := func(di *extensionsv1alpha1.AddonDefaultInstallSpecItem) {
   963  		addon.Spec.InstallSpec = di.AddonInstallSpec.DeepCopy()
   964  		addon.Spec.InstallSpec.Enabled = true
   965  		if addon.Annotations == nil {
   966  			addon.Annotations = map[string]string{}
   967  		}
   968  		if di.AddonInstallSpec.IsEmpty() {
   969  			addon.Annotations[AddonDefaultIsEmpty] = trueVal
   970  		}
   971  		if err := stageCtx.reconciler.Client.Update(ctx, addon); err != nil {
   972  			stageCtx.setRequeueWithErr(err, "")
   973  			return
   974  		}
   975  		stageCtx.reconciler.Event(addon, corev1.EventTypeNormal, reason, message)
   976  		stageCtx.setReconciled()
   977  	}
   978  
   979  	for _, di := range addon.Spec.GetSortedDefaultInstallValues() {
   980  		if len(di.Selectors) == 0 {
   981  			setInstallSpec(&di)
   982  			return
   983  		}
   984  		for _, s := range di.Selectors {
   985  			if !s.MatchesFromConfig() {
   986  				continue
   987  			}
   988  			setInstallSpec(&di)
   989  			return
   990  		}
   991  	}
   992  }
   993  
   994  func setAddonErrorConditions(ctx context.Context,
   995  	stageCtx *stageCtx,
   996  	addon *extensionsv1alpha1.Addon,
   997  	setFailedStatus, recordEvent bool,
   998  	reason, message string,
   999  	eventMessage ...string) {
  1000  	patch := client.MergeFrom(addon.DeepCopy())
  1001  	addon.Status.ObservedGeneration = addon.Generation
  1002  	if setFailedStatus {
  1003  		addon.Status.Phase = extensionsv1alpha1.AddonFailed
  1004  	}
  1005  	meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
  1006  		Type:               extensionsv1alpha1.ConditionTypeChecked,
  1007  		Status:             metav1.ConditionFalse,
  1008  		ObservedGeneration: addon.Generation,
  1009  		Reason:             reason,
  1010  		Message:            message,
  1011  		LastTransitionTime: metav1.Now(),
  1012  	})
  1013  
  1014  	if err := stageCtx.reconciler.Status().Patch(ctx, addon, patch); err != nil {
  1015  		stageCtx.setRequeueWithErr(err, "")
  1016  		return
  1017  	}
  1018  	if !recordEvent {
  1019  		return
  1020  	}
  1021  	if len(eventMessage) > 0 && eventMessage[0] != "" {
  1022  		stageCtx.reconciler.Event(addon, corev1.EventTypeWarning, reason, eventMessage[0])
  1023  	} else {
  1024  		stageCtx.reconciler.Event(addon, corev1.EventTypeWarning, reason, message)
  1025  	}
  1026  }
  1027  
  1028  func getJobMainContainerName(addon *extensionsv1alpha1.Addon) string {
  1029  	return strings.ToLower(string(addon.Spec.Type))
  1030  }
  1031  
  1032  func logFailedJobPodToCondError(ctx context.Context, stageCtx *stageCtx, addon *extensionsv1alpha1.Addon,
  1033  	jobName, reason string) error {
  1034  	podList := &corev1.PodList{}
  1035  	if err := stageCtx.reconciler.List(ctx, podList,
  1036  		client.InNamespace(viper.GetString(constant.CfgKeyCtrlrMgrNS)),
  1037  		client.MatchingLabels{
  1038  			constant.AddonNameLabelKey:    stageCtx.reqCtx.Req.Name,
  1039  			constant.AppManagedByLabelKey: constant.AppName,
  1040  			"job-name":                    jobName,
  1041  		}); err != nil {
  1042  		return err
  1043  	}
  1044  
  1045  	// sort pod with latest creation place front
  1046  	slices.SortFunc(podList.Items, func(a, b corev1.Pod) bool {
  1047  		return b.CreationTimestamp.Before(&(a.CreationTimestamp))
  1048  	})
  1049  
  1050  podsloop:
  1051  	for _, pod := range podList.Items {
  1052  		switch pod.Status.Phase {
  1053  		case corev1.PodFailed:
  1054  			clientset, err := corev1client.NewForConfig(stageCtx.reconciler.RestConfig)
  1055  			if err != nil {
  1056  				return err
  1057  			}
  1058  			currOpts := &corev1.PodLogOptions{
  1059  				Container: getJobMainContainerName(addon),
  1060  			}
  1061  			req := clientset.Pods(pod.Namespace).GetLogs(pod.Name, currOpts)
  1062  			data, err := req.DoRaw(ctx)
  1063  			if err != nil {
  1064  				return err
  1065  			}
  1066  			setAddonErrorConditions(ctx, stageCtx, addon, false, true, reason, string(data))
  1067  			break podsloop
  1068  		}
  1069  	}
  1070  	return nil
  1071  }
  1072  
  1073  func findDataKey[V string | []byte](data map[string]V, refObj extensionsv1alpha1.DataObjectKeySelector) bool {
  1074  	for k := range data {
  1075  		if k != refObj.Key {
  1076  			continue
  1077  		}
  1078  		return true
  1079  	}
  1080  	return false
  1081  }