github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/controllers/apps/operations/datascript.go (about) 1 /* 2 Copyright (C) 2022-2023 ApeCloud Co., Ltd 3 4 This file is part of KubeBlocks project 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU Affero General Public License as published by 8 the Free Software Foundation, either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU Affero General Public License for more details. 15 16 You should have received a copy of the GNU Affero General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. 18 */ 19 20 package operations 21 22 import ( 23 "fmt" 24 "strings" 25 "time" 26 27 "github.com/sethvargo/go-password/password" 28 29 batchv1 "k8s.io/api/batch/v1" 30 corev1 "k8s.io/api/core/v1" 31 apierrors "k8s.io/apimachinery/pkg/api/errors" 32 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 33 "k8s.io/apimachinery/pkg/types" 34 "k8s.io/utils/pointer" 35 "sigs.k8s.io/controller-runtime/pkg/client" 36 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 37 38 appsv1alpha1 "github.com/1aal/kubeblocks/apis/apps/v1alpha1" 39 "github.com/1aal/kubeblocks/pkg/constant" 40 componetutil "github.com/1aal/kubeblocks/pkg/controller/component" 41 intctrlutil "github.com/1aal/kubeblocks/pkg/controllerutil" 42 "github.com/1aal/kubeblocks/pkg/lorry/engines/register" 43 viper "github.com/1aal/kubeblocks/pkg/viperx" 44 ) 45 46 var _ OpsHandler = DataScriptOpsHandler{} 47 var _ error = &FastFaileError{} 48 49 // DataScriptOpsHandler handles DataScript operation, it is more like a one-time command operation. 50 type DataScriptOpsHandler struct { 51 } 52 53 // FastFaileError is a error type that will not retry the operation. 54 type FastFaileError struct { 55 message string 56 } 57 58 func (e *FastFaileError) Error() string { 59 return fmt.Sprintf("fail with message: %s", e.message) 60 } 61 62 func init() { 63 // ToClusterPhase is not defined, because 'datascript' does not affect the cluster status. 64 dataScriptOpsHander := DataScriptOpsHandler{} 65 dataScriptBehavior := OpsBehaviour{ 66 FromClusterPhases: []appsv1alpha1.ClusterPhase{appsv1alpha1.RunningClusterPhase}, 67 OpsHandler: dataScriptOpsHander, 68 } 69 opsMgr := GetOpsManager() 70 opsMgr.RegisterOps(appsv1alpha1.DataScriptType, dataScriptBehavior) 71 } 72 73 // Action implements OpsHandler.Action 74 // It will create a job to execute the script. It will fail fast if the script is not valid, or the target pod is not found. 75 func (o DataScriptOpsHandler) Action(reqCtx intctrlutil.RequestCtx, cli client.Client, opsResource *OpsResource) error { 76 opsRequest := opsResource.OpsRequest 77 cluster := opsResource.Cluster 78 spec := opsRequest.Spec.ScriptSpec 79 80 // get component 81 component := cluster.Spec.GetComponentByName(spec.ComponentName) 82 if component == nil { 83 // we have checked component exists in validation, so this should not happen 84 return &FastFaileError{message: fmt.Sprintf("component %s not found in cluster %s", spec.ComponentName, cluster.Name)} 85 } 86 87 clusterDef, err := getClusterDefByName(reqCtx.Ctx, cli, cluster.Spec.ClusterDefRef) 88 if err != nil { 89 if apierrors.IsNotFound(err) { 90 // fail fast if cluster def does not exists 91 return &FastFaileError{message: err.Error()} 92 } 93 return err 94 } 95 // get componentDef 96 componentDef := clusterDef.GetComponentDefByName(component.ComponentDefRef) 97 if componentDef == nil { 98 return &FastFaileError{message: fmt.Sprintf("componentDef %s not found in clusterDef %s", component.ComponentDefRef, clusterDef.Name)} 99 } 100 101 // create jobs 102 var jobs []*batchv1.Job 103 if jobs, err = buildDataScriptJobs(reqCtx, cli, opsResource.Cluster, component, opsRequest, componentDef.CharacterType); err != nil { 104 return err 105 } 106 for _, job := range jobs { 107 if err = cli.Create(reqCtx.Ctx, job); err != nil { 108 return err 109 } 110 } 111 return nil 112 } 113 114 // ReconcileAction implements OpsHandler.ReconcileAction 115 // It will check the job status, and update the opsRequest status. 116 // If the job is neither completed nor failed, it will retry after 1 second. 117 // If the job is completed, it will return OpsSucceedPhase 118 // If the job is failed, it will return OpsFailedPhase. 119 func (o DataScriptOpsHandler) ReconcileAction(reqCtx intctrlutil.RequestCtx, cli client.Client, opsResource *OpsResource) (appsv1alpha1.OpsPhase, time.Duration, error) { 120 opsRequest := opsResource.OpsRequest 121 cluster := opsResource.Cluster 122 spec := opsRequest.Spec.ScriptSpec 123 124 meetsJobConditions := func(job *batchv1.Job, condType batchv1.JobConditionType, condStatus corev1.ConditionStatus) bool { 125 for _, condition := range job.Status.Conditions { 126 if condition.Type == condType && condition.Status == condStatus { 127 return true 128 } 129 } 130 return false 131 } 132 133 // retrieve job for this opsRequest 134 jobList := &batchv1.JobList{} 135 if err := cli.List(reqCtx.Ctx, jobList, client.InNamespace(cluster.Namespace), client.MatchingLabels(getDataScriptJobLabels(cluster.Name, spec.ComponentName, opsRequest.Name))); err != nil { 136 return appsv1alpha1.OpsFailedPhase, 0, err 137 } else if len(jobList.Items) == 0 { 138 return appsv1alpha1.OpsFailedPhase, 0, fmt.Errorf("job not found") 139 } 140 141 var ( 142 expectedCount int 143 succedCount int 144 failedCount int 145 ) 146 147 expectedCount = len(jobList.Items) 148 // check job status 149 for _, job := range jobList.Items { 150 if meetsJobConditions(&job, batchv1.JobComplete, corev1.ConditionTrue) { 151 succedCount++ 152 } else if meetsJobConditions(&job, batchv1.JobFailed, corev1.ConditionTrue) { 153 failedCount++ 154 } 155 } 156 157 opsStatus := appsv1alpha1.OpsRunningPhase 158 if succedCount == expectedCount { 159 opsStatus = appsv1alpha1.OpsSucceedPhase 160 } else if failedCount+succedCount == expectedCount { 161 opsStatus = appsv1alpha1.OpsFailedPhase 162 } 163 164 patch := client.MergeFrom(opsRequest.DeepCopy()) 165 opsRequest.Status.Progress = fmt.Sprintf("%d/%d", succedCount, expectedCount) 166 167 // patch OpsRequest.status.components 168 if err := cli.Status().Patch(reqCtx.Ctx, opsRequest, patch); err != nil { 169 return opsStatus, time.Second, err 170 } 171 172 if succedCount == expectedCount { 173 return appsv1alpha1.OpsSucceedPhase, 0, nil 174 } else if failedCount+succedCount == expectedCount { 175 return appsv1alpha1.OpsFailedPhase, 0, fmt.Errorf("%d job execution failed, please check the job log ", failedCount) 176 } 177 return appsv1alpha1.OpsRunningPhase, 5 * time.Second, nil 178 } 179 180 func (o DataScriptOpsHandler) ActionStartedCondition(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource) (*metav1.Condition, error) { 181 return appsv1alpha1.NewDataScriptCondition(opsRes.OpsRequest), nil 182 } 183 184 func (o DataScriptOpsHandler) SaveLastConfiguration(reqCtx intctrlutil.RequestCtx, cli client.Client, opsResource *OpsResource) error { 185 return nil 186 } 187 188 // getScriptContent will get script content from script or scriptFrom 189 func getScriptContent(reqCtx intctrlutil.RequestCtx, cli client.Client, spec *appsv1alpha1.ScriptSpec) ([]string, error) { 190 script := make([]string, 0) 191 if len(spec.Script) > 0 { 192 script = append(script, spec.Script...) 193 } 194 if spec.ScriptFrom == nil { 195 return script, nil 196 } 197 configMapsRefs := spec.ScriptFrom.ConfigMapRef 198 secretRefs := spec.ScriptFrom.SecretRef 199 200 if len(configMapsRefs) > 0 { 201 obj := &corev1.ConfigMap{} 202 for _, cm := range configMapsRefs { 203 if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: reqCtx.Req.Namespace, Name: cm.Name}, obj); err != nil { 204 return nil, err 205 } 206 script = append(script, obj.Data[cm.Key]) 207 } 208 } 209 210 if len(secretRefs) > 0 { 211 obj := &corev1.Secret{} 212 for _, secret := range secretRefs { 213 if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: reqCtx.Req.Namespace, Name: secret.Name}, obj); err != nil { 214 return nil, err 215 } 216 if obj.Data[secret.Key] == nil { 217 return nil, fmt.Errorf("secret %s/%s does not have key %s", reqCtx.Req.Namespace, secret.Name, secret.Key) 218 } 219 secretData := string(obj.Data[secret.Key]) 220 // trim the last \n 221 if len(secretData) > 0 && secretData[len(secretData)-1] == '\n' { 222 secretData = secretData[:len(secretData)-1] 223 } 224 script = append(script, secretData) 225 } 226 } 227 return script, nil 228 } 229 230 func getTargetService(reqCtx intctrlutil.RequestCtx, cli client.Client, clusterObjectKey client.ObjectKey, componentName string) (string, error) { 231 // get svc 232 service := &corev1.Service{} 233 serviceName := fmt.Sprintf("%s-%s", clusterObjectKey.Name, componentName) 234 if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: clusterObjectKey.Namespace, Name: serviceName}, service); err != nil { 235 return "", err 236 } 237 return serviceName, nil 238 } 239 240 func buildDataScriptJobs(reqCtx intctrlutil.RequestCtx, cli client.Client, cluster *appsv1alpha1.Cluster, component *appsv1alpha1.ClusterComponentSpec, 241 ops *appsv1alpha1.OpsRequest, charType string) ([]*batchv1.Job, error) { 242 engineForJob, err := register.NewClusterCommands(charType) 243 if err != nil || engineForJob == nil { 244 return nil, &FastFaileError{message: err.Error()} 245 } 246 247 buildJob := func(endpoint string) (*batchv1.Job, error) { 248 envs := []corev1.EnvVar{} 249 250 envs = append(envs, corev1.EnvVar{ 251 Name: "KB_HOST", 252 Value: endpoint, 253 }) 254 255 // parse username and password 256 secretFrom := ops.Spec.ScriptSpec.Secret 257 if secretFrom == nil { 258 secretFrom = &appsv1alpha1.ScriptSecret{ 259 Name: fmt.Sprintf("%s-conn-credential", cluster.Name), 260 PasswordKey: "password", 261 UsernameKey: "username", 262 } 263 } 264 // verify secrets exist 265 if err := cli.Get(reqCtx.Ctx, types.NamespacedName{Namespace: reqCtx.Req.Namespace, Name: secretFrom.Name}, &corev1.Secret{}); err != nil { 266 return nil, &FastFaileError{message: err.Error()} 267 } 268 269 envs = append(envs, corev1.EnvVar{ 270 Name: "KB_USER", 271 ValueFrom: &corev1.EnvVarSource{ 272 SecretKeyRef: &corev1.SecretKeySelector{ 273 Key: secretFrom.UsernameKey, 274 LocalObjectReference: corev1.LocalObjectReference{ 275 Name: secretFrom.Name, 276 }, 277 }, 278 }, 279 }) 280 envs = append(envs, corev1.EnvVar{ 281 Name: "KB_PASSWD", 282 ValueFrom: &corev1.EnvVarSource{ 283 SecretKeyRef: &corev1.SecretKeySelector{ 284 Key: secretFrom.PasswordKey, 285 LocalObjectReference: corev1.LocalObjectReference{ 286 Name: secretFrom.Name, 287 }, 288 }, 289 }, 290 }) 291 292 // parse scripts 293 scripts, err := getScriptContent(reqCtx, cli, ops.Spec.ScriptSpec) 294 if err != nil { 295 return nil, &FastFaileError{message: err.Error()} 296 } 297 298 envs = append(envs, corev1.EnvVar{ 299 Name: "KB_SCRIPT", 300 Value: strings.Join(scripts, "\n"), 301 }) 302 303 jobCmdTpl, envVars, err := engineForJob.ExecuteCommand(scripts) 304 if err != nil { 305 return nil, &FastFaileError{message: err.Error()} 306 } 307 if envVars != nil { 308 envs = append(envs, envVars...) 309 } 310 containerImg := viper.GetString(constant.KBDataScriptClientsImage) 311 if len(ops.Spec.ScriptSpec.Image) != 0 { 312 containerImg = ops.Spec.ScriptSpec.Image 313 } 314 if len(containerImg) == 0 { 315 return nil, &FastFaileError{message: "image is empty"} 316 } 317 318 container := corev1.Container{ 319 Name: "datascript", 320 Image: containerImg, 321 ImagePullPolicy: corev1.PullPolicy(viper.GetString(constant.KBImagePullPolicy)), 322 Command: jobCmdTpl, 323 Env: envs, 324 } 325 randomStr, _ := password.Generate(4, 0, 0, true, false) 326 jobName := fmt.Sprintf("%s-%s-%s-%s", cluster.Name, "script", ops.Name, randomStr) 327 if len(jobName) > 63 { 328 jobName = jobName[:63] 329 } 330 331 job := &batchv1.Job{ 332 ObjectMeta: metav1.ObjectMeta{ 333 Name: jobName, 334 Namespace: cluster.Namespace, 335 }, 336 } 337 338 // set backoff limit to 0, so that the job will not be restarted 339 job.Spec.BackoffLimit = pointer.Int32(0) 340 job.Spec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever 341 job.Spec.Template.Spec.Containers = []corev1.Container{container} 342 343 // add labels 344 job.Labels = getDataScriptJobLabels(cluster.Name, component.Name, ops.Name) 345 // add tolerations 346 tolerations, err := componetutil.BuildTolerations(cluster, component) 347 if err != nil { 348 return nil, &FastFaileError{message: err.Error()} 349 } 350 job.Spec.Template.Spec.Tolerations = tolerations 351 // add owner reference 352 scheme, _ := appsv1alpha1.SchemeBuilder.Build() 353 if err := controllerutil.SetOwnerReference(ops, job, scheme); err != nil { 354 return nil, &FastFaileError{message: err.Error()} 355 } 356 return job, nil 357 } 358 359 // parse kb host 360 var endpoint string 361 var job *batchv1.Job 362 363 jobs := make([]*batchv1.Job, 0) 364 if ops.Spec.ScriptSpec.Selector == nil { 365 if endpoint, err = getTargetService(reqCtx, cli, client.ObjectKeyFromObject(cluster), component.Name); err != nil { 366 return nil, &FastFaileError{message: err.Error()} 367 } 368 if job, err = buildJob(endpoint); err != nil { 369 return nil, &FastFaileError{message: err.Error()} 370 } 371 jobs = append(jobs, job) 372 return jobs, nil 373 } 374 375 selector, err := metav1.LabelSelectorAsSelector(ops.Spec.ScriptSpec.Selector) 376 if err != nil { 377 return nil, &FastFaileError{message: err.Error()} 378 } 379 380 pods := &corev1.PodList{} 381 if err = cli.List(reqCtx.Ctx, pods, client.InNamespace(cluster.Namespace), 382 client.MatchingLabels{ 383 constant.AppInstanceLabelKey: cluster.Name, 384 constant.KBAppComponentLabelKey: component.Name, 385 }, 386 client.MatchingLabelsSelector{Selector: selector}, 387 ); err != nil { 388 return nil, &FastFaileError{message: err.Error()} 389 } else if len(pods.Items) == 0 { 390 return nil, &FastFaileError{message: "no pods found"} 391 } 392 393 for _, pod := range pods.Items { 394 endpoint = pod.Status.PodIP 395 if job, err = buildJob(endpoint); err != nil { 396 return nil, &FastFaileError{message: err.Error()} 397 } else { 398 jobs = append(jobs, job) 399 } 400 } 401 return jobs, nil 402 } 403 404 func getDataScriptJobLabels(cluster, component, request string) map[string]string { 405 return map[string]string{ 406 constant.AppInstanceLabelKey: cluster, 407 constant.KBAppComponentLabelKey: component, 408 constant.OpsRequestNameLabelKey: request, 409 constant.OpsRequestTypeLabelKey: string(appsv1alpha1.DataScriptType), 410 } 411 }