github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/apps/operations/datascript.go (about)

     1  /*
     2  Copyright (C) 2022-2023 ApeCloud Co., Ltd
     3  
     4  This file is part of KubeBlocks project
     5  
     6  This program is free software: you can redistribute it and/or modify
     7  it under the terms of the GNU Affero General Public License as published by
     8  the Free Software Foundation, either version 3 of the License, or
     9  (at your option) any later version.
    10  
    11  This program is distributed in the hope that it will be useful
    12  but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  GNU Affero General Public License for more details.
    15  
    16  You should have received a copy of the GNU Affero General Public License
    17  along with this program.  If not, see <http://www.gnu.org/licenses/>.
    18  */
    19  
    20  package operations
    21  
    22  import (
    23  	"fmt"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/sethvargo/go-password/password"
    28  
    29  	batchv1 "k8s.io/api/batch/v1"
    30  	corev1 "k8s.io/api/core/v1"
    31  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    32  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    33  	"k8s.io/apimachinery/pkg/types"
    34  	"k8s.io/utils/pointer"
    35  	"sigs.k8s.io/controller-runtime/pkg/client"
    36  	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
    37  
    38  	appsv1alpha1 "github.com/1aal/kubeblocks/apis/apps/v1alpha1"
    39  	"github.com/1aal/kubeblocks/pkg/constant"
    40  	componetutil "github.com/1aal/kubeblocks/pkg/controller/component"
    41  	intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil"
    42  	"github.com/1aal/kubeblocks/pkg/lorry/engines/register"
    43  	viper "github.com/1aal/kubeblocks/pkg/viperx"
    44  )
    45  
    46  var _ OpsHandler = DataScriptOpsHandler{}
    47  var _ error = &FastFaileError{}
    48  
    49  // DataScriptOpsHandler handles DataScript operation, it is more like a one-time command operation.
    50  type DataScriptOpsHandler struct {
    51  }
    52  
    53  // FastFaileError is a error type that will not retry the operation.
    54  type FastFaileError struct {
    55  	message string
    56  }
    57  
    58  func (e *FastFaileError) Error() string {
    59  	return fmt.Sprintf("fail with message: %s", e.message)
    60  }
    61  
    62  func init() {
    63  	// ToClusterPhase is not defined, because 'datascript' does not affect the cluster status.
    64  	dataScriptOpsHander := DataScriptOpsHandler{}
    65  	dataScriptBehavior := OpsBehaviour{
    66  		FromClusterPhases: []appsv1alpha1.ClusterPhase{appsv1alpha1.RunningClusterPhase},
    67  		OpsHandler:        dataScriptOpsHander,
    68  	}
    69  	opsMgr := GetOpsManager()
    70  	opsMgr.RegisterOps(appsv1alpha1.DataScriptType, dataScriptBehavior)
    71  }
    72  
    73  // Action implements OpsHandler.Action
    74  // It will create a job to execute the script. It will fail fast if the script is not valid, or the target pod is not found.
    75  func (o DataScriptOpsHandler) Action(reqCtx intctrlutil.RequestCtx, cli client.Client, opsResource *OpsResource) error {
    76  	opsRequest := opsResource.OpsRequest
    77  	cluster := opsResource.Cluster
    78  	spec := opsRequest.Spec.ScriptSpec
    79  
    80  	// get component
    81  	component := cluster.Spec.GetComponentByName(spec.ComponentName)
    82  	if component == nil {
    83  		// we have checked component exists in validation, so this should not happen
    84  		return &FastFaileError{message: fmt.Sprintf("component %s not found in cluster %s", spec.ComponentName, cluster.Name)}
    85  	}
    86  
    87  	clusterDef, err := getClusterDefByName(reqCtx.Ctx, cli, cluster.Spec.ClusterDefRef)
    88  	if err != nil {
    89  		if apierrors.IsNotFound(err) {
    90  			// fail fast if cluster def does not exists
    91  			return &FastFaileError{message: err.Error()}
    92  		}
    93  		return err
    94  	}
    95  	// get componentDef
    96  	componentDef := clusterDef.GetComponentDefByName(component.ComponentDefRef)
    97  	if componentDef == nil {
    98  		return &FastFaileError{message: fmt.Sprintf("componentDef %s not found in clusterDef %s", component.ComponentDefRef, clusterDef.Name)}
    99  	}
   100  
   101  	// create jobs
   102  	var jobs []*batchv1.Job
   103  	if jobs, err = buildDataScriptJobs(reqCtx, cli, opsResource.Cluster, component, opsRequest, componentDef.CharacterType); err != nil {
   104  		return err
   105  	}
   106  	for _, job := range jobs {
   107  		if err = cli.Create(reqCtx.Ctx, job); err != nil {
   108  			return err
   109  		}
   110  	}
   111  	return nil
   112  }
   113  
   114  // ReconcileAction implements OpsHandler.ReconcileAction
   115  // It will check the job status, and update the opsRequest status.
   116  // If the job is neither completed nor failed, it will retry after 1 second.
   117  // If the job is completed, it will return OpsSucceedPhase
   118  // If the job is failed, it will return OpsFailedPhase.
   119  func (o DataScriptOpsHandler) ReconcileAction(reqCtx intctrlutil.RequestCtx, cli client.Client, opsResource *OpsResource) (appsv1alpha1.OpsPhase, time.Duration, error) {
   120  	opsRequest := opsResource.OpsRequest
   121  	cluster := opsResource.Cluster
   122  	spec := opsRequest.Spec.ScriptSpec
   123  
   124  	meetsJobConditions := func(job *batchv1.Job, condType batchv1.JobConditionType, condStatus corev1.ConditionStatus) bool {
   125  		for _, condition := range job.Status.Conditions {
   126  			if condition.Type == condType && condition.Status == condStatus {
   127  				return true
   128  			}
   129  		}
   130  		return false
   131  	}
   132  
   133  	// retrieve job for this opsRequest
   134  	jobList := &batchv1.JobList{}
   135  	if err := cli.List(reqCtx.Ctx, jobList, client.InNamespace(cluster.Namespace), client.MatchingLabels(getDataScriptJobLabels(cluster.Name, spec.ComponentName, opsRequest.Name))); err != nil {
   136  		return appsv1alpha1.OpsFailedPhase, 0, err
   137  	} else if len(jobList.Items) == 0 {
   138  		return appsv1alpha1.OpsFailedPhase, 0, fmt.Errorf("job not found")
   139  	}
   140  
   141  	var (
   142  		expectedCount int
   143  		succedCount   int
   144  		failedCount   int
   145  	)
   146  
   147  	expectedCount = len(jobList.Items)
   148  	// check job status
   149  	for _, job := range jobList.Items {
   150  		if meetsJobConditions(&job, batchv1.JobComplete, corev1.ConditionTrue) {
   151  			succedCount++
   152  		} else if meetsJobConditions(&job, batchv1.JobFailed, corev1.ConditionTrue) {
   153  			failedCount++
   154  		}
   155  	}
   156  
   157  	opsStatus := appsv1alpha1.OpsRunningPhase
   158  	if succedCount == expectedCount {
   159  		opsStatus = appsv1alpha1.OpsSucceedPhase
   160  	} else if failedCount+succedCount == expectedCount {
   161  		opsStatus = appsv1alpha1.OpsFailedPhase
   162  	}
   163  
   164  	patch := client.MergeFrom(opsRequest.DeepCopy())
   165  	opsRequest.Status.Progress = fmt.Sprintf("%d/%d", succedCount, expectedCount)
   166  
   167  	// patch OpsRequest.status.components
   168  	if err := cli.Status().Patch(reqCtx.Ctx, opsRequest, patch); err != nil {
   169  		return opsStatus, time.Second, err
   170  	}
   171  
   172  	if succedCount == expectedCount {
   173  		return appsv1alpha1.OpsSucceedPhase, 0, nil
   174  	} else if failedCount+succedCount == expectedCount {
   175  		return appsv1alpha1.OpsFailedPhase, 0, fmt.Errorf("%d job execution failed, please check the job log ", failedCount)
   176  	}
   177  	return appsv1alpha1.OpsRunningPhase, 5 * time.Second, nil
   178  }
   179  
   180  func (o DataScriptOpsHandler) ActionStartedCondition(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource) (*metav1.Condition, error) {
   181  	return appsv1alpha1.NewDataScriptCondition(opsRes.OpsRequest), nil
   182  }
   183  
   184  func (o DataScriptOpsHandler) SaveLastConfiguration(reqCtx intctrlutil.RequestCtx, cli client.Client, opsResource *OpsResource) error {
   185  	return nil
   186  }
   187  
   188  // getScriptContent will get script content from script or scriptFrom
   189  func getScriptContent(reqCtx intctrlutil.RequestCtx, cli client.Client, spec *appsv1alpha1.ScriptSpec) ([]string, error) {
   190  	script := make([]string, 0)
   191  	if len(spec.Script) > 0 {
   192  		script = append(script, spec.Script...)
   193  	}
   194  	if spec.ScriptFrom == nil {
   195  		return script, nil
   196  	}
   197  	configMapsRefs := spec.ScriptFrom.ConfigMapRef
   198  	secretRefs := spec.ScriptFrom.SecretRef
   199  
   200  	if len(configMapsRefs) > 0 {
   201  		obj := &corev1.ConfigMap{}
   202  		for _, cm := range configMapsRefs {
   203  			if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: reqCtx.Req.Namespace, Name: cm.Name}, obj); err != nil {
   204  				return nil, err
   205  			}
   206  			script = append(script, obj.Data[cm.Key])
   207  		}
   208  	}
   209  
   210  	if len(secretRefs) > 0 {
   211  		obj := &corev1.Secret{}
   212  		for _, secret := range secretRefs {
   213  			if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: reqCtx.Req.Namespace, Name: secret.Name}, obj); err != nil {
   214  				return nil, err
   215  			}
   216  			if obj.Data[secret.Key] == nil {
   217  				return nil, fmt.Errorf("secret %s/%s does not have key %s", reqCtx.Req.Namespace, secret.Name, secret.Key)
   218  			}
   219  			secretData := string(obj.Data[secret.Key])
   220  			// trim the last \n
   221  			if len(secretData) > 0 && secretData[len(secretData)-1] == '\n' {
   222  				secretData = secretData[:len(secretData)-1]
   223  			}
   224  			script = append(script, secretData)
   225  		}
   226  	}
   227  	return script, nil
   228  }
   229  
   230  func getTargetService(reqCtx intctrlutil.RequestCtx, cli client.Client, clusterObjectKey client.ObjectKey, componentName string) (string, error) {
   231  	// get svc
   232  	service := &corev1.Service{}
   233  	serviceName := fmt.Sprintf("%s-%s", clusterObjectKey.Name, componentName)
   234  	if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: clusterObjectKey.Namespace, Name: serviceName}, service); err != nil {
   235  		return "", err
   236  	}
   237  	return serviceName, nil
   238  }
   239  
   240  func buildDataScriptJobs(reqCtx intctrlutil.RequestCtx, cli client.Client, cluster *appsv1alpha1.Cluster, component *appsv1alpha1.ClusterComponentSpec,
   241  	ops *appsv1alpha1.OpsRequest, charType string) ([]*batchv1.Job, error) {
   242  	engineForJob, err := register.NewClusterCommands(charType)
   243  	if err != nil || engineForJob == nil {
   244  		return nil, &FastFaileError{message: err.Error()}
   245  	}
   246  
   247  	buildJob := func(endpoint string) (*batchv1.Job, error) {
   248  		envs := []corev1.EnvVar{}
   249  
   250  		envs = append(envs, corev1.EnvVar{
   251  			Name:  "KB_HOST",
   252  			Value: endpoint,
   253  		})
   254  
   255  		// parse username and password
   256  		secretFrom := ops.Spec.ScriptSpec.Secret
   257  		if secretFrom == nil {
   258  			secretFrom = &appsv1alpha1.ScriptSecret{
   259  				Name:        fmt.Sprintf("%s-conn-credential", cluster.Name),
   260  				PasswordKey: "password",
   261  				UsernameKey: "username",
   262  			}
   263  		}
   264  		// verify secrets exist
   265  		if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: reqCtx.Req.Namespace, Name: secretFrom.Name}, &corev1.Secret{}); err != nil {
   266  			return nil, &FastFaileError{message: err.Error()}
   267  		}
   268  
   269  		envs = append(envs, corev1.EnvVar{
   270  			Name: "KB_USER",
   271  			ValueFrom: &corev1.EnvVarSource{
   272  				SecretKeyRef: &corev1.SecretKeySelector{
   273  					Key: secretFrom.UsernameKey,
   274  					LocalObjectReference: corev1.LocalObjectReference{
   275  						Name: secretFrom.Name,
   276  					},
   277  				},
   278  			},
   279  		})
   280  		envs = append(envs, corev1.EnvVar{
   281  			Name: "KB_PASSWD",
   282  			ValueFrom: &corev1.EnvVarSource{
   283  				SecretKeyRef: &corev1.SecretKeySelector{
   284  					Key: secretFrom.PasswordKey,
   285  					LocalObjectReference: corev1.LocalObjectReference{
   286  						Name: secretFrom.Name,
   287  					},
   288  				},
   289  			},
   290  		})
   291  
   292  		// parse scripts
   293  		scripts, err := getScriptContent(reqCtx, cli, ops.Spec.ScriptSpec)
   294  		if err != nil {
   295  			return nil, &FastFaileError{message: err.Error()}
   296  		}
   297  
   298  		envs = append(envs, corev1.EnvVar{
   299  			Name:  "KB_SCRIPT",
   300  			Value: strings.Join(scripts, "\n"),
   301  		})
   302  
   303  		jobCmdTpl, envVars, err := engineForJob.ExecuteCommand(scripts)
   304  		if err != nil {
   305  			return nil, &FastFaileError{message: err.Error()}
   306  		}
   307  		if envVars != nil {
   308  			envs = append(envs, envVars...)
   309  		}
   310  		containerImg := viper.GetString(constant.KBDataScriptClientsImage)
   311  		if len(ops.Spec.ScriptSpec.Image) != 0 {
   312  			containerImg = ops.Spec.ScriptSpec.Image
   313  		}
   314  		if len(containerImg) == 0 {
   315  			return nil, &FastFaileError{message: "image is empty"}
   316  		}
   317  
   318  		container := corev1.Container{
   319  			Name:            "datascript",
   320  			Image:           containerImg,
   321  			ImagePullPolicy: corev1.PullPolicy(viper.GetString(constant.KBImagePullPolicy)),
   322  			Command:         jobCmdTpl,
   323  			Env:             envs,
   324  		}
   325  		randomStr, _ := password.Generate(4, 0, 0, true, false)
   326  		jobName := fmt.Sprintf("%s-%s-%s-%s", cluster.Name, "script", ops.Name, randomStr)
   327  		if len(jobName) > 63 {
   328  			jobName = jobName[:63]
   329  		}
   330  
   331  		job := &batchv1.Job{
   332  			ObjectMeta: metav1.ObjectMeta{
   333  				Name:      jobName,
   334  				Namespace: cluster.Namespace,
   335  			},
   336  		}
   337  
   338  		// set backoff limit to 0, so that the job will not be restarted
   339  		job.Spec.BackoffLimit = pointer.Int32(0)
   340  		job.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever
   341  		job.Spec.Template.Spec.Containers = []corev1.Container{container}
   342  
   343  		// add labels
   344  		job.Labels = getDataScriptJobLabels(cluster.Name, component.Name, ops.Name)
   345  		// add tolerations
   346  		tolerations, err := componetutil.BuildTolerations(cluster, component)
   347  		if err != nil {
   348  			return nil, &FastFaileError{message: err.Error()}
   349  		}
   350  		job.Spec.Template.Spec.Tolerations = tolerations
   351  		// add owner reference
   352  		scheme, _ := appsv1alpha1.SchemeBuilder.Build()
   353  		if err := controllerutil.SetOwnerReference(ops, job, scheme); err != nil {
   354  			return nil, &FastFaileError{message: err.Error()}
   355  		}
   356  		return job, nil
   357  	}
   358  
   359  	// parse kb host
   360  	var endpoint string
   361  	var job *batchv1.Job
   362  
   363  	jobs := make([]*batchv1.Job, 0)
   364  	if ops.Spec.ScriptSpec.Selector == nil {
   365  		if endpoint, err = getTargetService(reqCtx, cli, client.ObjectKeyFromObject(cluster), component.Name); err != nil {
   366  			return nil, &FastFaileError{message: err.Error()}
   367  		}
   368  		if job, err = buildJob(endpoint); err != nil {
   369  			return nil, &FastFaileError{message: err.Error()}
   370  		}
   371  		jobs = append(jobs, job)
   372  		return jobs, nil
   373  	}
   374  
   375  	selector, err := metav1.LabelSelectorAsSelector(ops.Spec.ScriptSpec.Selector)
   376  	if err != nil {
   377  		return nil, &FastFaileError{message: err.Error()}
   378  	}
   379  
   380  	pods := &corev1.PodList{}
   381  	if err = cli.List(reqCtx.Ctx, pods, client.InNamespace(cluster.Namespace),
   382  		client.MatchingLabels{
   383  			constant.AppInstanceLabelKey:    cluster.Name,
   384  			constant.KBAppComponentLabelKey: component.Name,
   385  		},
   386  		client.MatchingLabelsSelector{Selector: selector},
   387  	); err != nil {
   388  		return nil, &FastFaileError{message: err.Error()}
   389  	} else if len(pods.Items) == 0 {
   390  		return nil, &FastFaileError{message: "no pods found"}
   391  	}
   392  
   393  	for _, pod := range pods.Items {
   394  		endpoint = pod.Status.PodIP
   395  		if job, err = buildJob(endpoint); err != nil {
   396  			return nil, &FastFaileError{message: err.Error()}
   397  		} else {
   398  			jobs = append(jobs, job)
   399  		}
   400  	}
   401  	return jobs, nil
   402  }
   403  
   404  func getDataScriptJobLabels(cluster, component, request string) map[string]string {
   405  	return map[string]string{
   406  		constant.AppInstanceLabelKey:    cluster,
   407  		constant.KBAppComponentLabelKey: component,
   408  		constant.OpsRequestNameLabelKey: request,
   409  		constant.OpsRequestTypeLabelKey: string(appsv1alpha1.DataScriptType),
   410  	}
   411  }