github.com/kubeshop/testkube@v1.17.23/pkg/executor/common.go (about) 1 package executor 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "io" 8 "os" 9 "strings" 10 "time" 11 12 "github.com/pkg/errors" 13 14 corev1 "k8s.io/api/core/v1" 15 k8serrors "k8s.io/apimachinery/pkg/api/errors" 16 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 17 "k8s.io/apimachinery/pkg/util/wait" 18 "k8s.io/client-go/kubernetes" 19 tcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" 20 21 executorv1 "github.com/kubeshop/testkube-operator/api/executor/v1" 22 executorsclientv1 "github.com/kubeshop/testkube-operator/pkg/client/executors/v1" 23 "github.com/kubeshop/testkube/pkg/api/v1/testkube" 24 "github.com/kubeshop/testkube/pkg/log" 25 executorsmapper "github.com/kubeshop/testkube/pkg/mapper/executors" 26 "github.com/kubeshop/testkube/pkg/utils" 27 ) 28 29 var ErrPodInitializing = errors.New("PodInitializing") 30 31 const ( 32 // VolumeDir is volume dir 33 VolumeDir = "/data" 34 defaultLogLinesCount = 100 35 // GitUsernameSecretName is git username secret name 36 GitUsernameSecretName = "git-username" 37 // GitTokenSecretName is git token secret name 38 GitTokenSecretName = "git-token" 39 // SlavesConfigsEnv is slave configs for creating slaves in executor 40 SlavesConfigsEnv = "RUNNER_SLAVES_CONFIGS" 41 42 SidecarImage = "kubeshop/testkube-logs-sidecar:v0-3" // TODO - change it to valid image name after deployment will be ready 43 ) 44 45 var RunnerEnvVars = []corev1.EnvVar{ 46 { 47 Name: "DEBUG", 48 Value: getOr("DEBUG", "false"), 49 }, 50 { 51 Name: "RUNNER_ENDPOINT", 52 Value: os.Getenv("STORAGE_ENDPOINT"), 53 }, 54 { 55 Name: "RUNNER_ACCESSKEYID", 56 Value: os.Getenv("STORAGE_ACCESSKEYID"), 57 }, 58 { 59 Name: "RUNNER_SECRETACCESSKEY", 60 Value: os.Getenv("STORAGE_SECRETACCESSKEY"), 61 }, 62 { 63 Name: "RUNNER_REGION", 64 Value: os.Getenv("STORAGE_REGION"), 65 }, 66 { 67 Name: "RUNNER_TOKEN", 68 Value: os.Getenv("STORAGE_TOKEN"), 69 }, 70 { 71 Name: "RUNNER_SSL", 72 Value: getOr("STORAGE_SSL", "false"), 73 }, 74 { 75 Name: "RUNNER_SKIP_VERIFY", 76 Value: getOr("STORAGE_SKIP_VERIFY", "false"), 77 }, 78 { 79 Name: "RUNNER_CERT_FILE", 80 Value: os.Getenv("STORAGE_CERT_FILE"), 81 }, 82 { 83 Name: "RUNNER_KEY_FILE", 84 Value: os.Getenv("STORAGE_KEY_FILE"), 85 }, 86 { 87 Name: "RUNNER_CA_FILE", 88 Value: os.Getenv("STORAGE_CA_FILE"), 89 }, 90 { 91 Name: "RUNNER_SCRAPPERENABLED", 92 Value: getOr("SCRAPPERENABLED", "false"), 93 }, 94 { 95 Name: "RUNNER_DATADIR", 96 Value: VolumeDir, 97 }, 98 { 99 Name: "RUNNER_CDEVENTS_TARGET", 100 Value: os.Getenv("CDEVENTS_TARGET"), 101 }, 102 { 103 Name: "RUNNER_COMPRESSARTIFACTS", 104 Value: getOr("COMPRESSARTIFACTS", "false"), 105 }, 106 { 107 Name: "RUNNER_PRO_MODE", 108 Value: getRunnerProMode(), 109 }, 110 { 111 Name: "RUNNER_PRO_API_KEY", 112 Value: utils.GetEnvVarWithDeprecation("TESTKUBE_PRO_API_KEY", "TESTKUBE_CLOUD_API_KEY", ""), 113 }, 114 { 115 Name: "RUNNER_PRO_API_TLS_INSECURE", 116 Value: utils.GetEnvVarWithDeprecation("TESTKUBE_PRO_TLS_INSECURE", "TESTKUBE_CLOUD_TLS_INSECURE", "false"), 117 }, 118 { 119 Name: "RUNNER_PRO_API_URL", 120 Value: utils.GetEnvVarWithDeprecation("TESTKUBE_PRO_URL", "TESTKUBE_CLOUD_URL", ""), 121 }, 122 { 123 Name: "RUNNER_PRO_API_SKIP_VERIFY", 124 Value: getOr("TESTKUBE_PRO_SKIP_VERIFY", "false"), 125 }, 126 { 127 Name: "RUNNER_PRO_CONNECTION_TIMEOUT", 128 Value: getOr("TESTKUBE_PRO_CONNECTION_TIMEOUT", "10"), 129 }, 130 { 131 Name: "RUNNER_PRO_API_CERT_FILE", 132 Value: os.Getenv("TESTKUBE_PRO_CERT_FILE"), 133 }, 134 { 135 Name: "RUNNER_PRO_API_KEY_FILE", 136 Value: os.Getenv("TESTKUBE_PRO_KEY_FILE"), 137 }, 138 { 139 Name: "RUNNER_PRO_API_CA_FILE", 140 Value: os.Getenv("TESTKUBE_PRO_CA_FILE"), 141 }, 142 { 143 Name: "RUNNER_DASHBOARD_URI", 144 Value: os.Getenv("TESTKUBE_DASHBOARD_URI"), 145 }, 146 { 147 Name: "CI", 148 Value: "1", 149 }, 150 // DEPRECATED: Use RUNNER_PRO_MODE instead 151 { 152 Name: "RUNNER_CLOUD_MODE", 153 Value: getRunnerProMode(), 154 }, 155 // DEPRECATED: Use RUNNER_PRO_API_KEY instead 156 { 157 Name: "RUNNER_CLOUD_API_KEY", 158 Value: utils.GetEnvVarWithDeprecation("TESTKUBE_PRO_API_KEY", "TESTKUBE_CLOUD_API_KEY", ""), 159 }, 160 // DEPRECATED: Use RUNNER_PRO_API_TLS_INSECURE instead 161 { 162 Name: "RUNNER_CLOUD_API_TLS_INSECURE", 163 Value: utils.GetEnvVarWithDeprecation("TESTKUBE_PRO_TLS_INSECURE", "TESTKUBE_CLOUD_TLS_INSECURE", "false"), 164 }, 165 // DEPRECATED: Use RUNNER_PRO_API_URL instead 166 { 167 Name: "RUNNER_CLOUD_API_URL", 168 Value: utils.GetEnvVarWithDeprecation("TESTKUBE_PRO_URL", "TESTKUBE_CLOUD_URL", ""), 169 }, 170 // DEPRECATED: Use RUNNER_PRO_API_SKIP_VERIFY instead 171 { 172 Name: "RUNNER_CLOUD_API_SKIP_VERIFY", 173 Value: getOr("TESTKUBE_PRO_SKIP_VERIFY", "false"), 174 }, 175 } 176 177 type SlavesConfigs struct { 178 Images SlaveImages `json:"images"` 179 ServiceAccountName string `json:"serviceAccountName"` 180 CertificateSecret string `json:"certificateSecret"` 181 SlavePodTemplate string `json:"slavePodTemplate"` 182 ImagePullSecrets []string `json:"imagePullSecrets"` 183 EnvConfigMaps []testkube.EnvReference `json:"envConfigMaps"` 184 EnvSecrets []testkube.EnvReference `json:"envSecrets"` 185 ActiveDeadlineSeconds int `json:"activeDeadlineSeconds"` 186 Features testkube.Features `json:"features"` 187 NatsUri string `json:"natsUri"` 188 LogSidecarImage string `json:"logSidecarImage"` 189 RunnerCustomCASecret string `json:"runnerCustomCASecret"` 190 } 191 192 type SlaveImages struct { 193 Init string `json:"init"` 194 Slave string `json:"slave"` 195 Registry string `json:"registry"` 196 } 197 198 func GetSlavesConfigs(initImage string, 199 slavesMeta executorv1.SlavesMeta, 200 registry string, 201 serviceAccountName string, 202 certificateSecret string, 203 slavePodTemplate string, 204 imagePullSecrets []string, 205 envConfigMaps []testkube.EnvReference, 206 envSecrets []testkube.EnvReference, 207 activeDeadlineSeconds int, 208 features testkube.Features, 209 natsUri string, 210 logSidecarImage string, 211 runnerCustomCASecret string, 212 ) SlavesConfigs { 213 return SlavesConfigs{ 214 Images: SlaveImages{ 215 Init: initImage, 216 Slave: slavesMeta.Image, 217 Registry: registry, 218 }, 219 ServiceAccountName: serviceAccountName, 220 CertificateSecret: certificateSecret, 221 SlavePodTemplate: slavePodTemplate, 222 ImagePullSecrets: imagePullSecrets, 223 EnvConfigMaps: envConfigMaps, 224 EnvSecrets: envSecrets, 225 ActiveDeadlineSeconds: activeDeadlineSeconds, 226 Features: features, 227 NatsUri: natsUri, 228 LogSidecarImage: logSidecarImage, 229 RunnerCustomCASecret: runnerCustomCASecret, 230 } 231 } 232 233 func getOr(key, defaultVal string) string { 234 if val, ok := os.LookupEnv(key); ok { 235 return val 236 } 237 return defaultVal 238 } 239 240 func getRunnerProMode() string { 241 val := "false" 242 if utils.GetEnvVarWithDeprecation("TESTKUBE_PRO_API_KEY", "TESTKUBE_CLOUD_API_KEY", "") != "" { 243 val = "true" 244 } 245 return val 246 } 247 248 // Templates contains templates for executor 249 type Templates struct { 250 Job string `json:"job"` 251 PVC string `json:"pvc"` 252 Scraper string `json:"scraper"` 253 Slave string `json:"slave"` 254 } 255 256 // Images contains images for executor 257 type Images struct { 258 Init string 259 Scraper string 260 LogSidecar string 261 } 262 263 // IsPodReady defines if pod is ready or failed for logs scrapping 264 func IsPodReady(c kubernetes.Interface, podName, namespace string) wait.ConditionWithContextFunc { 265 return func(ctx context.Context) (bool, error) { 266 pod, err := c.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) 267 if err != nil { 268 return false, err 269 } 270 271 if pod.Status.Phase == corev1.PodSucceeded { 272 return true, nil 273 } 274 275 if err = IsPodFailed(pod); err != nil { 276 return true, err 277 } 278 279 return false, nil 280 } 281 } 282 283 // IsPodLoggable defines if pod is ready to get logs from it 284 func IsPodLoggable(c kubernetes.Interface, podName, namespace string) wait.ConditionWithContextFunc { 285 return func(ctx context.Context) (bool, error) { 286 pod, err := c.CoreV1().Pods(namespace).Get(ctx, podName, metav1.GetOptions{}) 287 if err != nil { 288 return false, err 289 } 290 291 if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodRunning { 292 return true, nil 293 } 294 295 if err = IsPodFailed(pod); err != nil { 296 return true, err 297 } 298 299 return false, nil 300 } 301 } 302 303 // IsWaitStateFailed defines possible failed wait state 304 // those states are defined and throwed as errors in Kubernetes runtime 305 // https://github.com/kubernetes/kubernetes/blob/127f33f63d118d8d61bebaba2a240c60f71c824a/pkg/kubelet/kuberuntime/kuberuntime_container.go#L59 306 func IsWaitStateFailed(state string) bool { 307 var failedWaitingStates = []string{ 308 "CreateContainerConfigError", 309 "PreCreateHookError", 310 "CreateContainerError", 311 "PreStartHookError", 312 "PostStartHookError", 313 } 314 315 for _, fws := range failedWaitingStates { 316 if state == fws { 317 return true 318 } 319 } 320 321 return false 322 } 323 324 // IsPodFailed checks if pod failed 325 // pod can be in wait state with reason which is error for us on the end 326 func IsPodFailed(pod *corev1.Pod) (err error) { 327 if pod.Status.Phase == corev1.PodFailed { 328 return errors.New(pod.Status.Message) 329 } 330 331 for _, initContainerStatus := range pod.Status.InitContainerStatuses { 332 waitState := initContainerStatus.State.Waiting 333 // TODO there could be more edge cases but didn't found any constants in go libraries 334 if waitState != nil && IsWaitStateFailed(waitState.Reason) { 335 return errors.New(waitState.Message) 336 } 337 } 338 339 return 340 } 341 342 // GetJobPods returns job pods 343 func GetJobPods(ctx context.Context, podsClient tcorev1.PodInterface, jobName string, retryNr, retryCount int) (*corev1.PodList, error) { 344 pods, err := podsClient.List(ctx, metav1.ListOptions{LabelSelector: "job-name=" + jobName}) 345 if err != nil { 346 return nil, err 347 } 348 if retryNr == retryCount { 349 return nil, fmt.Errorf("retry count exceeeded, there are no active pods with given id=%s", jobName) 350 } 351 if len(pods.Items) == 0 { 352 time.Sleep(time.Duration(retryNr * 500 * int(time.Millisecond))) // increase backoff timeout 353 return GetJobPods(ctx, podsClient, jobName, retryNr+1, retryCount) 354 } 355 return pods, nil 356 } 357 358 // GetPodLogs returns pod logs bytes 359 func GetPodLogs(ctx context.Context, c kubernetes.Interface, namespace string, pod corev1.Pod, logLinesCount ...int64) (logs []byte, err error) { 360 var count int64 = defaultLogLinesCount 361 if len(logLinesCount) > 0 { 362 count = logLinesCount[0] 363 } 364 365 var containers []string 366 for _, container := range pod.Spec.InitContainers { 367 containers = append(containers, container.Name) 368 } 369 370 for _, container := range pod.Spec.Containers { 371 containers = append(containers, container.Name) 372 } 373 374 for _, container := range containers { 375 containerLogs, err := GetContainerLogs(ctx, c, &pod, container, namespace, &count) 376 if err != nil { 377 if errors.Is(err, ErrPodInitializing) { 378 return logs, nil 379 } 380 return logs, err 381 } 382 383 logs = append(logs, containerLogs...) 384 } 385 386 return logs, nil 387 } 388 389 // GetContainerLogs returns container logs 390 func GetContainerLogs(ctx context.Context, c kubernetes.Interface, pod *corev1.Pod, container, namespace string, tailLines *int64) ([]byte, error) { 391 podLogOptions := corev1.PodLogOptions{ 392 Container: container, 393 } 394 395 podLogRequest := c.CoreV1(). 396 Pods(namespace). 397 GetLogs(pod.Name, &podLogOptions) 398 399 stream, err := podLogRequest.Stream(ctx) 400 if err != nil { 401 isPodInitializingError := strings.Contains(err.Error(), "PodInitializing") 402 if isPodInitializingError { 403 return nil, errors.WithStack(ErrPodInitializing) 404 } 405 406 return nil, err 407 } 408 defer stream.Close() 409 410 var buff bytes.Buffer 411 _, err = io.Copy(&buff, stream) 412 if err != nil { 413 return nil, err 414 } 415 416 return buff.Bytes(), nil 417 } 418 419 // AbortJob - aborts Kubernetes Job with no grace period 420 func AbortJob(ctx context.Context, c kubernetes.Interface, namespace string, jobName string) (*testkube.ExecutionResult, error) { 421 var zero int64 = 0 422 bg := metav1.DeletePropagationBackground 423 jobs := c.BatchV1().Jobs(namespace) 424 err := jobs.Delete(ctx, jobName, metav1.DeleteOptions{ 425 GracePeriodSeconds: &zero, 426 PropagationPolicy: &bg, 427 }) 428 if err != nil { 429 log.DefaultLogger.Errorf("Error while aborting job %s: %s", jobName, err.Error()) 430 return &testkube.ExecutionResult{ 431 Status: testkube.ExecutionStatusFailed, 432 Output: err.Error(), 433 }, nil 434 } 435 log.DefaultLogger.Infof("Job %s aborted", jobName) 436 return &testkube.ExecutionResult{ 437 Status: testkube.ExecutionStatusAborted, 438 }, nil 439 } 440 441 // SyncDefaultExecutors creates or updates default executors 442 func SyncDefaultExecutors( 443 executorsClient executorsclientv1.Interface, 444 namespace string, 445 executors []testkube.ExecutorDetails, 446 readOnlyExecutors bool, 447 ) (images Images, err error) { 448 if len(executors) == 0 { 449 return images, nil 450 } 451 452 // TODO - remove it after merging helm templates fully 453 images.LogSidecar = SidecarImage 454 455 for _, executor := range executors { 456 457 if executor.Executor == nil { 458 continue 459 } 460 461 if executor.Name == "logs-sidecar" { 462 images.LogSidecar = executor.Executor.Image 463 continue 464 } 465 466 if executor.Name == "init-executor" { 467 images.Init = executor.Executor.Image 468 continue 469 } 470 471 if executor.Name == "scraper-executor" { 472 images.Scraper = executor.Executor.Image 473 continue 474 } 475 476 if readOnlyExecutors { 477 continue 478 } 479 480 obj := &executorv1.Executor{ 481 ObjectMeta: metav1.ObjectMeta{ 482 Name: executor.Name, 483 Namespace: namespace, 484 }, 485 Spec: executorv1.ExecutorSpec{ 486 Types: executor.Executor.Types, 487 ExecutorType: executorv1.ExecutorType(executor.Executor.ExecutorType), 488 Image: executor.Executor.Image, 489 Slaves: executorsmapper.MapSlavesConfigsToCRD(executor.Executor.Slaves), 490 Command: executor.Executor.Command, 491 Args: executor.Executor.Args, 492 Features: executorsmapper.MapFeaturesToCRD(executor.Executor.Features), 493 ContentTypes: executorsmapper.MapContentTypesToCRD(executor.Executor.ContentTypes), 494 Meta: executorsmapper.MapMetaToCRD(executor.Executor.Meta), 495 }, 496 } 497 498 result, err := executorsClient.Get(executor.Name) 499 if err != nil && !k8serrors.IsNotFound(err) { 500 return images, err 501 } 502 if err != nil { 503 if _, err = executorsClient.Create(obj); err != nil { 504 return images, err 505 } 506 } else { 507 obj.Spec.JobTemplate = result.Spec.JobTemplate 508 obj.Spec.JobTemplateReference = result.Spec.JobTemplateReference 509 obj.Spec.UseDataDirAsWorkingDir = result.Spec.UseDataDirAsWorkingDir 510 result.Spec = obj.Spec 511 if _, err = executorsClient.Update(result); err != nil { 512 return images, err 513 } 514 } 515 } 516 517 return images, nil 518 } 519 520 // GetPodErrorMessage returns pod error message 521 func GetPodErrorMessage(ctx context.Context, client kubernetes.Interface, pod *corev1.Pod) string { 522 message := "" 523 if pod.Status.Message != "" || pod.Status.Reason != "" { 524 message = fmt.Sprintf("pod message: %s reason: %s", pod.Status.Message, pod.Status.Reason) 525 } 526 527 for _, initContainerStatus := range pod.Status.InitContainerStatuses { 528 if initContainerStatus.State.Terminated != nil && 529 (initContainerStatus.State.Terminated.ExitCode > 1 || initContainerStatus.State.Terminated.ExitCode < -1) && 530 (initContainerStatus.State.Terminated.Message != "" || initContainerStatus.State.Terminated.Reason != "") { 531 if message != "" { 532 message += "\n" 533 } 534 535 message += fmt.Sprintf("init container message: %s reason: %s", initContainerStatus.State.Terminated.Message, 536 initContainerStatus.State.Terminated.Reason) 537 message += fmt.Sprintf("\nexit code: %d", initContainerStatus.State.Terminated.ExitCode) 538 return message 539 } 540 } 541 542 for _, containerStatus := range pod.Status.ContainerStatuses { 543 if containerStatus.State.Terminated != nil && 544 (containerStatus.State.Terminated.ExitCode > 1 || containerStatus.State.Terminated.ExitCode < -1) && 545 (containerStatus.State.Terminated.Message != "" || containerStatus.State.Terminated.Reason != "") { 546 if message != "" { 547 message += "\n" 548 } 549 550 message += fmt.Sprintf("test container message: %s reason: %s", containerStatus.State.Terminated.Message, 551 containerStatus.State.Terminated.Reason) 552 message += fmt.Sprintf("\nexit code: %d", containerStatus.State.Terminated.ExitCode) 553 return message 554 } 555 } 556 557 if message == "" { 558 message = fmt.Sprintf("execution pod %s failed", pod.Name) 559 } 560 561 return message 562 } 563 564 // GetPodExitCode returns pod exit code 565 func GetPodExitCode(pod *corev1.Pod) int32 { 566 for _, initContainerStatus := range pod.Status.InitContainerStatuses { 567 if initContainerStatus.State.Terminated != nil && initContainerStatus.State.Terminated.ExitCode != 0 { 568 return initContainerStatus.State.Terminated.ExitCode 569 } 570 } 571 572 for _, containerStatus := range pod.Status.ContainerStatuses { 573 if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.ExitCode != 0 { 574 return containerStatus.State.Terminated.ExitCode 575 } 576 } 577 578 return 0 579 } 580 581 // GetPodEventsSummary returns pod events summary 582 func GetPodEventsSummary(ctx context.Context, client kubernetes.Interface, pod *corev1.Pod) (string, error) { 583 message := "" 584 list, err := client.CoreV1().Events(pod.Namespace).List(ctx, metav1.ListOptions{}) 585 if err != nil { 586 return "", err 587 } 588 589 for _, item := range list.Items { 590 if item.InvolvedObject.Name != pod.Name { 591 continue 592 } 593 594 if message != "" { 595 message += "\n" 596 } 597 598 message += fmt.Sprintf("event type: %s, reason: %s, message: %s", 599 item.Type, item.Reason, item.Message) 600 } 601 602 return message, nil 603 }