github.com/kubeshop/testkube@v1.17.23/pkg/executor/client/job.go (about) 1 package client 2 3 import ( 4 "bufio" 5 "bytes" 6 "context" 7 "encoding/json" 8 "fmt" 9 "io" 10 "os" 11 "path/filepath" 12 "strings" 13 "sync" 14 "text/template" 15 "time" 16 17 "github.com/kubeshop/testkube/pkg/featureflags" 18 "github.com/kubeshop/testkube/pkg/repository/config" 19 20 "github.com/pkg/errors" 21 22 "github.com/kubeshop/testkube/pkg/version" 23 24 "github.com/kubeshop/testkube/pkg/repository/result" 25 26 "go.uber.org/zap" 27 batchv1 "k8s.io/api/batch/v1" 28 corev1 "k8s.io/api/core/v1" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 "k8s.io/apimachinery/pkg/util/wait" 31 "k8s.io/apimachinery/pkg/util/yaml" 32 "k8s.io/client-go/kubernetes" 33 "sigs.k8s.io/kustomize/kyaml/yaml/merge2" 34 35 kyaml "sigs.k8s.io/kustomize/kyaml/yaml" 36 37 executorv1 "github.com/kubeshop/testkube-operator/api/executor/v1" 38 templatesv1 "github.com/kubeshop/testkube-operator/pkg/client/templates/v1" 39 testexecutionsv1 "github.com/kubeshop/testkube-operator/pkg/client/testexecutions/v1" 40 testsv3 "github.com/kubeshop/testkube-operator/pkg/client/tests/v3" 41 "github.com/kubeshop/testkube/pkg/api/v1/testkube" 42 "github.com/kubeshop/testkube/pkg/event" 43 "github.com/kubeshop/testkube/pkg/executor" 44 "github.com/kubeshop/testkube/pkg/executor/agent" 45 "github.com/kubeshop/testkube/pkg/executor/env" 46 "github.com/kubeshop/testkube/pkg/executor/output" 47 "github.com/kubeshop/testkube/pkg/log" 48 logsclient "github.com/kubeshop/testkube/pkg/logs/client" 49 "github.com/kubeshop/testkube/pkg/logs/events" 50 testexecutionsmapper "github.com/kubeshop/testkube/pkg/mapper/testexecutions" 51 testsmapper "github.com/kubeshop/testkube/pkg/mapper/tests" 52 "github.com/kubeshop/testkube/pkg/telemetry" 53 "github.com/kubeshop/testkube/pkg/utils" 54 ) 55 56 const ( 57 // GitUsernameSecretName is git username secret name 58 GitUsernameSecretName = "git-username" 59 // GitUsernameEnvVarName is git username environment var name 60 GitUsernameEnvVarName = "RUNNER_GITUSERNAME" 61 // GitTokenSecretName is git token secret name 62 GitTokenSecretName = "git-token" 63 // GitTokenEnvVarName is git token environment var name 64 GitTokenEnvVarName = "RUNNER_GITTOKEN" 65 // SecretTest is a test secret 66 SecretTest = "secrets" 67 // SecretSource is a source secret 68 SecretSource = "source-secrets" 69 70 pollTimeout = 24 * time.Hour 71 pollInterval = 200 * time.Millisecond 72 // pollJobStatus is interval for checking if job timeout occurred 73 pollJobStatus = 1 * time.Second 74 // timeoutIndicator is string that is added to job logs when timeout occurs 75 timeoutIndicator = "DeadlineExceeded" 76 77 logsStreamBuffer = 1000 78 ) 79 80 // NewJobExecutor creates new job executor 81 func NewJobExecutor( 82 repo result.Repository, 83 images executor.Images, 84 templates executor.Templates, 85 serviceAccountNames map[string]string, 86 metrics ExecutionMetric, 87 emiter *event.Emitter, 88 configMap config.Repository, 89 testsClient testsv3.Interface, 90 clientset kubernetes.Interface, 91 testExecutionsClient testexecutionsv1.Interface, 92 templatesClient templatesv1.Interface, 93 registry string, 94 podStartTimeout time.Duration, 95 clusterID string, 96 dashboardURI string, 97 apiURI string, 98 natsURI string, 99 debug bool, 100 logsStream logsclient.Stream, 101 features featureflags.FeatureFlags, 102 defaultStorageClassName string, 103 ) (client *JobExecutor, err error) { 104 if serviceAccountNames == nil { 105 serviceAccountNames = make(map[string]string) 106 } 107 108 return &JobExecutor{ 109 ClientSet: clientset, 110 Repository: repo, 111 Log: log.DefaultLogger, 112 images: images, 113 templates: templates, 114 serviceAccountNames: serviceAccountNames, 115 metrics: metrics, 116 Emitter: emiter, 117 configMap: configMap, 118 testsClient: testsClient, 119 testExecutionsClient: testExecutionsClient, 120 templatesClient: templatesClient, 121 registry: registry, 122 podStartTimeout: podStartTimeout, 123 clusterID: clusterID, 124 dashboardURI: dashboardURI, 125 apiURI: apiURI, 126 natsURI: natsURI, 127 debug: debug, 128 logsStream: logsStream, 129 features: features, 130 defaultStorageClassName: defaultStorageClassName, 131 }, nil 132 } 133 134 type ExecutionMetric interface { 135 IncAndObserveExecuteTest(execution testkube.Execution, dashboardURI string) 136 } 137 138 // JobExecutor is container for managing job executor dependencies 139 type JobExecutor struct { 140 Repository result.Repository 141 Log *zap.SugaredLogger 142 ClientSet kubernetes.Interface 143 Cmd string 144 images executor.Images 145 templates executor.Templates 146 serviceAccountNames map[string]string 147 metrics ExecutionMetric 148 Emitter *event.Emitter 149 configMap config.Repository 150 testsClient testsv3.Interface 151 testExecutionsClient testexecutionsv1.Interface 152 templatesClient templatesv1.Interface 153 registry string 154 podStartTimeout time.Duration 155 clusterID string 156 dashboardURI string 157 apiURI string 158 natsURI string 159 debug bool 160 logsStream logsclient.Stream 161 features featureflags.FeatureFlags 162 defaultStorageClassName string 163 } 164 165 type JobOptions struct { 166 Name string 167 Namespace string 168 Image string 169 ImagePullSecrets []string 170 Jsn string 171 TestName string 172 InitImage string 173 JobTemplate string 174 Envs map[string]string 175 SecretEnvs map[string]string 176 HTTPProxy string 177 HTTPSProxy string 178 UsernameSecret *testkube.SecretRef 179 TokenSecret *testkube.SecretRef 180 RunnerCustomCASecret string 181 CertificateSecret string 182 AgentAPITLSSecret string 183 Variables map[string]testkube.Variable 184 ActiveDeadlineSeconds int64 185 ServiceAccountName string 186 JobTemplateExtensions string 187 EnvConfigMaps []testkube.EnvReference 188 EnvSecrets []testkube.EnvReference 189 Labels map[string]string 190 Registry string 191 ClusterID string 192 ArtifactRequest *testkube.ArtifactRequest 193 WorkingDir string 194 ExecutionNumber int32 195 ContextType string 196 ContextData string 197 Debug bool 198 NatsUri string 199 LogSidecarImage string 200 APIURI string 201 SlavePodTemplate string 202 Features featureflags.FeatureFlags 203 PvcTemplate string 204 PvcTemplateExtensions string 205 } 206 207 // Logs returns job logs stream channel using kubernetes api 208 func (c *JobExecutor) Logs(ctx context.Context, id, namespace string) (out chan output.Output, err error) { 209 out = make(chan output.Output, logsStreamBuffer) 210 logs := make(chan []byte, logsStreamBuffer) 211 212 go func() { 213 defer func() { 214 c.Log.Debug("closing JobExecutor.Logs out log") 215 close(out) 216 }() 217 218 if err := c.TailJobLogs(ctx, id, namespace, logs); err != nil { 219 out <- output.NewOutputError(err) 220 return 221 } 222 223 for l := range logs { 224 out <- output.GetLogEntry(l) 225 } 226 }() 227 228 return 229 } 230 231 // Execute starts new external test execution, reads data and returns ID 232 // Execution is started asynchronously client can check later for results 233 func (c *JobExecutor) Execute(ctx context.Context, execution *testkube.Execution, options ExecuteOptions) (result *testkube.ExecutionResult, err error) { 234 result = testkube.NewRunningExecutionResult() 235 execution.ExecutionResult = result 236 237 err = c.CreateJob(ctx, *execution, options) 238 if err != nil { 239 if cErr := c.cleanPVCVolume(ctx, execution); cErr != nil { 240 c.Log.Errorw("error deleting pvc volume", "error", cErr) 241 } 242 243 return result.Err(err), err 244 } 245 246 c.streamLog(ctx, execution.Id, events.NewLog("created kubernetes job").WithSource(events.SourceJobExecutor)) 247 248 if !options.Sync { 249 go c.MonitorJobForTimeout(ctx, execution.Id, execution.TestNamespace) 250 } 251 252 podsClient := c.ClientSet.CoreV1().Pods(execution.TestNamespace) 253 pods, err := executor.GetJobPods(ctx, podsClient, execution.Id, 1, 10) 254 if err != nil { 255 if cErr := c.cleanPVCVolume(ctx, execution); cErr != nil { 256 c.Log.Errorw("error deleting pvc volume", "error", cErr) 257 } 258 259 return result.Err(err), err 260 } 261 262 l := c.Log.With("executionID", execution.Id, "type", "async") 263 264 c.streamLog(ctx, execution.Id, events.NewLog("waiting for pod to spin up").WithSource(events.SourceJobExecutor)) 265 266 for _, pod := range pods.Items { 267 if pod.Status.Phase != corev1.PodRunning && pod.Labels["job-name"] == execution.Id { 268 // for sync block and complete 269 if options.Sync { 270 return c.updateResultsFromPod(ctx, pod, l, execution, options.Request.NegativeTest) 271 } 272 273 // for async start goroutine and return in progress job 274 go func(pod corev1.Pod) { 275 _, err := c.updateResultsFromPod(ctx, pod, l, execution, options.Request.NegativeTest) 276 if err != nil { 277 l.Errorw("update results from jobs pod error", "error", err) 278 } 279 }(pod) 280 281 return result, nil 282 } 283 } 284 285 l.Debugw("no pods was found", "totalPodsCount", len(pods.Items)) 286 287 return result, nil 288 } 289 290 func (c *JobExecutor) MonitorJobForTimeout(ctx context.Context, jobName, namespace string) { 291 ticker := time.NewTicker(pollJobStatus) 292 l := c.Log.With("jobName", jobName) 293 for { 294 select { 295 case <-ctx.Done(): 296 l.Infow("context done, stopping job timeout monitor") 297 return 298 case <-ticker.C: 299 jobs, err := c.ClientSet.BatchV1().Jobs(namespace).List(ctx, metav1.ListOptions{LabelSelector: "job-name=" + jobName}) 300 if err != nil { 301 l.Errorw("could not get jobs", "error", err) 302 return 303 } 304 if jobs == nil || len(jobs.Items) == 0 { 305 return 306 } 307 308 job := jobs.Items[0] 309 310 if job.Status.Succeeded > 0 { 311 l.Debugw("job succeeded", "status", "succeded") 312 return 313 } 314 315 if job.Status.Failed > 0 { 316 l.Debugw("job failed") 317 if len(job.Status.Conditions) > 0 { 318 for _, condition := range job.Status.Conditions { 319 l.Infow("job timeout", "condition.reason", condition.Reason) 320 if condition.Reason == timeoutIndicator { 321 c.Timeout(ctx, jobName) 322 } 323 } 324 } 325 return 326 } 327 328 if job.Status.Active > 0 { 329 continue 330 } 331 } 332 } 333 } 334 335 // CreateJob creates new Kubernetes job based on execution and execute options 336 func (c *JobExecutor) CreateJob(ctx context.Context, execution testkube.Execution, options ExecuteOptions) error { 337 jobs := c.ClientSet.BatchV1().Jobs(execution.TestNamespace) 338 jobOptions, err := NewJobOptions(c.Log, c.templatesClient, c.images, c.templates, 339 c.serviceAccountNames, c.registry, c.clusterID, c.apiURI, execution, options, c.natsURI, c.debug) 340 if err != nil { 341 return err 342 } 343 344 if jobOptions.ArtifactRequest != nil && 345 (jobOptions.ArtifactRequest.StorageClassName != "" || jobOptions.ArtifactRequest.UseDefaultStorageClassName) { 346 c.Log.Debug("creating persistent volume claim with options", "options", jobOptions) 347 pvcsClient := c.ClientSet.CoreV1().PersistentVolumeClaims(execution.TestNamespace) 348 pvcSpec, err := NewPersistentVolumeClaimSpec(c.Log, NewPVCOptionsFromJobOptions(jobOptions, c.defaultStorageClassName)) 349 if err != nil { 350 return err 351 } 352 353 _, err = pvcsClient.Create(ctx, pvcSpec, metav1.CreateOptions{}) 354 if err != nil { 355 return err 356 } 357 } 358 359 c.Log.Debug("creating job with options", "options", jobOptions) 360 jobSpec, err := NewJobSpec(c.Log, jobOptions) 361 if err != nil { 362 return err 363 } 364 365 _, err = jobs.Create(ctx, jobSpec, metav1.CreateOptions{}) 366 return err 367 } 368 369 func (c *JobExecutor) cleanPVCVolume(ctx context.Context, execution *testkube.Execution) error { 370 if execution.ArtifactRequest != nil && 371 (execution.ArtifactRequest.StorageClassName != "" || execution.ArtifactRequest.UseDefaultStorageClassName) { 372 pvcsClient := c.ClientSet.CoreV1().PersistentVolumeClaims(execution.TestNamespace) 373 if err := pvcsClient.Delete(ctx, execution.Id+"-pvc", metav1.DeleteOptions{}); err != nil { 374 return err 375 } 376 } 377 378 return nil 379 } 380 381 // updateResultsFromPod watches logs and stores results if execution is finished 382 func (c *JobExecutor) updateResultsFromPod(ctx context.Context, pod corev1.Pod, l *zap.SugaredLogger, execution *testkube.Execution, isNegativeTest bool) (*testkube.ExecutionResult, error) { 383 var err error 384 385 // save stop time and final state 386 defer func() { 387 if err := c.stopExecution(ctx, l, execution, execution.ExecutionResult, isNegativeTest); err != nil { 388 c.streamLog(ctx, execution.Id, events.NewErrorLog(err)) 389 l.Errorw("error stopping execution after updating results from pod", "error", err) 390 } 391 392 if err := c.cleanPVCVolume(ctx, execution); err != nil { 393 l.Errorw("error cleaning pvc volume", "error", err) 394 } 395 }() 396 397 // wait for pod to be loggable 398 if err = wait.PollUntilContextTimeout(ctx, pollInterval, c.podStartTimeout, true, executor.IsPodLoggable(c.ClientSet, pod.Name, execution.TestNamespace)); err != nil { 399 c.streamLog(ctx, execution.Id, events.NewErrorLog(errors.Wrap(err, "can't start test job pod"))) 400 l.Errorw("waiting for pod started error", "error", err) 401 } 402 403 l.Debug("poll immediate waiting for pod") 404 // wait for pod 405 if err = wait.PollUntilContextTimeout(ctx, pollInterval, pollTimeout, true, executor.IsPodReady(c.ClientSet, pod.Name, execution.TestNamespace)); err != nil { 406 // continue on poll err and try to get logs later 407 c.streamLog(ctx, execution.Id, events.NewErrorLog(errors.Wrap(err, "can't read data from pod, pod was not completed"))) 408 l.Errorw("waiting for pod complete error", "error", err) 409 } 410 411 if err != nil { 412 execution.ExecutionResult.Err(err) 413 } 414 l.Debug("poll immediate end") 415 416 c.streamLog(ctx, execution.Id, events.NewLog("analyzing test results and artfacts")) 417 418 logs, err := executor.GetPodLogs(ctx, c.ClientSet, execution.TestNamespace, pod) 419 if err != nil { 420 l.Errorw("get pod logs error", "error", err) 421 c.streamLog(ctx, execution.Id, events.NewErrorLog(err)) 422 } 423 424 // don't attach logs if logs v2 is enabled - they will be streamed through the logs service 425 attachLogs := !c.features.LogsV2 426 if len(logs) != 0 { 427 // parse job output log (JSON stream) 428 execution.ExecutionResult, err = output.ParseRunnerOutput(logs, attachLogs) 429 if err != nil { 430 l.Errorw("parse output error", "error", err) 431 c.streamLog(ctx, execution.Id, events.NewErrorLog(errors.Wrap(err, "can't get test execution job output"))) 432 return execution.ExecutionResult, err 433 } 434 } 435 436 if execution.ExecutionResult.IsFailed() { 437 errorMessage := execution.ExecutionResult.ErrorMessage 438 if errorMessage == "" { 439 errorMessage = executor.GetPodErrorMessage(ctx, c.ClientSet, &pod) 440 } 441 442 execution.ExecutionResult.ErrorMessage = errorMessage 443 444 c.streamLog(ctx, execution.Id, events.NewErrorLog(errors.Wrap(err, "test execution finished with failed state"))) 445 } else { 446 c.streamLog(ctx, execution.Id, events.NewLog("test execution finshed").WithMetadataEntry("status", string(*execution.ExecutionResult.Status))) 447 } 448 449 // saving result in the defer function 450 return execution.ExecutionResult, nil 451 } 452 453 func (c *JobExecutor) stopExecution(ctx context.Context, l *zap.SugaredLogger, execution *testkube.Execution, result *testkube.ExecutionResult, isNegativeTest bool) error { 454 savedExecution, err := c.Repository.Get(ctx, execution.Id) 455 if err != nil { 456 l.Errorw("get execution error", "error", err) 457 return err 458 } 459 460 logEvent := events.NewLog().WithSource(events.SourceJobExecutor) 461 462 l.Debugw("stopping execution", "executionId", execution.Id, "status", result.Status, "executionStatus", execution.ExecutionResult.Status, "savedExecutionStatus", savedExecution.ExecutionResult.Status) 463 464 c.streamLog(ctx, execution.Id, logEvent.WithContent("stopping execution")) 465 defer c.streamLog(ctx, execution.Id, logEvent.WithContent("execution stopped")) 466 467 if savedExecution.IsCanceled() || savedExecution.IsTimeout() { 468 c.streamLog(ctx, execution.Id, logEvent.WithContent("execution is cancelled")) 469 return nil 470 } 471 472 execution.Stop() 473 if isNegativeTest { 474 if result.IsFailed() { 475 l.Debugw("test run was expected to fail, and it failed as expected", "test", execution.TestName) 476 execution.ExecutionResult.Status = testkube.ExecutionStatusPassed 477 execution.ExecutionResult.ErrorMessage = "" 478 result.Output = result.Output + "\nTest run was expected to fail, and it failed as expected" 479 } else { 480 l.Debugw("test run was expected to fail - the result will be reversed", "test", execution.TestName) 481 execution.ExecutionResult.Status = testkube.ExecutionStatusFailed 482 execution.ExecutionResult.ErrorMessage = "negative test error" 483 result.Output = result.Output + "\nTest run was expected to fail, the result will be reversed" 484 } 485 486 result.Status = execution.ExecutionResult.Status 487 result.ErrorMessage = execution.ExecutionResult.ErrorMessage 488 } 489 490 err = c.Repository.EndExecution(ctx, *execution) 491 if err != nil { 492 l.Errorw("Update execution result error", "error", err) 493 return err 494 } 495 496 eventToSend := testkube.NewEventEndTestSuccess(execution) 497 if result.IsAborted() { 498 result.Output = result.Output + "\nTest run was aborted manually." 499 eventToSend = testkube.NewEventEndTestAborted(execution) 500 } else if result.IsTimeout() { 501 result.Output = result.Output + "\nTest run was aborted due to timeout." 502 eventToSend = testkube.NewEventEndTestTimeout(execution) 503 } else if result.IsFailed() { 504 eventToSend = testkube.NewEventEndTestFailed(execution) 505 } 506 507 // metrics increase 508 execution.ExecutionResult = result 509 l.Infow("execution ended, saving result", "executionId", execution.Id, "status", result.Status) 510 if err = c.Repository.UpdateResult(ctx, execution.Id, *execution); err != nil { 511 l.Errorw("Update execution result error", "error", err) 512 return err 513 } 514 515 test, err := c.testsClient.Get(execution.TestName) 516 if err != nil { 517 l.Errorw("getting test error", "error", err) 518 return err 519 } 520 521 test.Status = testsmapper.MapExecutionToTestStatus(execution) 522 if err = c.testsClient.UpdateStatus(test); err != nil { 523 l.Errorw("updating test error", "error", err) 524 return err 525 } 526 527 if execution.TestExecutionName != "" { 528 testExecution, err := c.testExecutionsClient.Get(execution.TestExecutionName) 529 if err != nil { 530 l.Errorw("getting test execution error", "error", err) 531 return err 532 } 533 534 testExecution.Status = testexecutionsmapper.MapAPIToCRD(execution, testExecution.Generation) 535 if err = c.testExecutionsClient.UpdateStatus(testExecution); err != nil { 536 l.Errorw("updating test execution error", "error", err) 537 return err 538 } 539 } 540 541 c.metrics.IncAndObserveExecuteTest(*execution, c.dashboardURI) 542 c.Emitter.Notify(eventToSend) 543 544 telemetryEnabled, err := c.configMap.GetTelemetryEnabled(ctx) 545 if err != nil { 546 l.Debugw("getting telemetry enabled error", "error", err) 547 } 548 549 if !telemetryEnabled { 550 return nil 551 } 552 553 clusterID, err := c.configMap.GetUniqueClusterId(ctx) 554 if err != nil { 555 l.Debugw("getting cluster id error", "error", err) 556 } 557 558 host, err := os.Hostname() 559 if err != nil { 560 l.Debugw("getting hostname error", "hostname", host, "error", err) 561 } 562 563 var dataSource string 564 if execution.Content != nil { 565 dataSource = execution.Content.Type_ 566 } 567 568 status := "" 569 if execution.ExecutionResult != nil && execution.ExecutionResult.Status != nil { 570 status = string(*execution.ExecutionResult.Status) 571 } 572 573 out, err := telemetry.SendRunEvent("testkube_api_run_test", telemetry.RunParams{ 574 AppVersion: version.Version, 575 DataSource: dataSource, 576 Host: host, 577 ClusterID: clusterID, 578 TestType: execution.TestType, 579 DurationMs: execution.DurationMs, 580 Status: status, 581 }) 582 if err != nil { 583 l.Debugw("sending run test telemetry event error", "error", err) 584 } else { 585 l.Debugw("sending run test telemetry event", "output", out) 586 } 587 588 return nil 589 } 590 591 // NewJobOptionsFromExecutionOptions compose JobOptions based on ExecuteOptions 592 func NewJobOptionsFromExecutionOptions(options ExecuteOptions) JobOptions { 593 labels := map[string]string{ 594 testkube.TestLabelTestType: utils.SanitizeName(options.TestSpec.Type_), 595 testkube.TestLabelExecutor: options.ExecutorName, 596 testkube.TestLabelTestName: options.TestName, 597 } 598 for key, value := range options.Labels { 599 labels[key] = value 600 } 601 602 contextType := "" 603 contextData := "" 604 if options.Request.RunningContext != nil { 605 contextType = options.Request.RunningContext.Type_ 606 contextData = options.Request.RunningContext.Context 607 } 608 609 var image string 610 if options.ExecutorSpec.Image != "" { 611 image = options.ExecutorSpec.Image 612 } 613 614 if options.TestSpec.ExecutionRequest != nil && 615 options.TestSpec.ExecutionRequest.Image != "" { 616 image = options.TestSpec.ExecutionRequest.Image 617 } 618 619 if options.Request.Image != "" { 620 image = options.Request.Image 621 } 622 623 return JobOptions{ 624 Image: image, 625 ImagePullSecrets: options.ImagePullSecretNames, 626 JobTemplate: options.ExecutorSpec.JobTemplate, 627 TestName: options.TestName, 628 Namespace: options.Namespace, 629 Envs: options.Request.Envs, 630 SecretEnvs: options.Request.SecretEnvs, 631 HTTPProxy: options.Request.HttpProxy, 632 HTTPSProxy: options.Request.HttpsProxy, 633 UsernameSecret: options.UsernameSecret, 634 TokenSecret: options.TokenSecret, 635 RunnerCustomCASecret: options.RunnerCustomCASecret, 636 CertificateSecret: options.CertificateSecret, 637 ActiveDeadlineSeconds: options.Request.ActiveDeadlineSeconds, 638 JobTemplateExtensions: options.Request.JobTemplate, 639 EnvConfigMaps: options.Request.EnvConfigMaps, 640 EnvSecrets: options.Request.EnvSecrets, 641 Labels: labels, 642 ExecutionNumber: options.Request.Number, 643 ContextType: contextType, 644 ContextData: contextData, 645 Features: options.Features, 646 PvcTemplateExtensions: options.Request.PvcTemplate, 647 } 648 } 649 650 // TailJobLogs - locates logs for job pod(s) 651 func (c *JobExecutor) TailJobLogs(ctx context.Context, id, namespace string, logs chan []byte) (err error) { 652 653 podsClient := c.ClientSet.CoreV1().Pods(namespace) 654 655 pods, err := executor.GetJobPods(ctx, podsClient, id, 1, 10) 656 if err != nil { 657 close(logs) 658 return err 659 } 660 661 for _, pod := range pods.Items { 662 if pod.Labels["job-name"] == id { 663 664 l := c.Log.With("podNamespace", pod.Namespace, "podName", pod.Name, "podStatus", pod.Status) 665 666 switch pod.Status.Phase { 667 668 case corev1.PodRunning: 669 l.Debug("tailing pod logs: immediately") 670 return c.TailPodLogs(ctx, pod, logs) 671 672 case corev1.PodFailed: 673 err := errors.Errorf("can't get pod logs, pod failed: %s/%s", pod.Namespace, pod.Name) 674 l.Errorw(err.Error()) 675 return c.GetLastLogLineError(ctx, pod) 676 677 default: 678 l.Debugw("tailing job logs: waiting for pod to be ready") 679 if err = wait.PollUntilContextTimeout(ctx, pollInterval, c.podStartTimeout, true, executor.IsPodLoggable(c.ClientSet, pod.Name, namespace)); err != nil { 680 l.Errorw("poll immediate error when tailing logs", "error", err) 681 return err 682 } 683 684 l.Debug("tailing pod logs") 685 return c.TailPodLogs(ctx, pod, logs) 686 } 687 } 688 } 689 690 return 691 } 692 693 func (c *JobExecutor) TailPodLogs(ctx context.Context, pod corev1.Pod, logs chan []byte) (err error) { 694 var containers []string 695 for _, container := range pod.Spec.InitContainers { 696 containers = append(containers, container.Name) 697 } 698 699 for _, container := range pod.Spec.Containers { 700 containers = append(containers, container.Name) 701 } 702 703 l := c.Log.With("method", "TailPodLogs", "pod", pod.Name, "namespace", pod.Namespace, "containersCount", len(containers)) 704 705 wg := sync.WaitGroup{} 706 wg.Add(len(containers)) 707 708 for _, container := range containers { 709 go func(container string) { 710 defer wg.Done() 711 712 podLogOptions := corev1.PodLogOptions{ 713 Follow: true, 714 Container: container, 715 } 716 717 podLogRequest := c.ClientSet.CoreV1(). 718 Pods(pod.Namespace). 719 GetLogs(pod.Name, &podLogOptions) 720 721 stream, err := podLogRequest.Stream(ctx) 722 if err != nil { 723 l.Errorw("stream error", "error", err) 724 return 725 } 726 727 reader := bufio.NewReader(stream) 728 729 for { 730 b, err := utils.ReadLongLine(reader) 731 if err == io.EOF { 732 return 733 } else if err != nil { 734 l.Errorw("scanner error", "error", err) 735 return 736 } 737 l.Debugw("log chunk pushed", "out", string(b), "pod", pod.Name) 738 logs <- b 739 } 740 }(container) 741 } 742 743 go func() { 744 defer close(logs) 745 l.Debugw("waiting for all containers to finish", "containers", containers) 746 wg.Wait() 747 l.Infow("log stream finished") 748 }() 749 750 return 751 } 752 753 // GetPodLogError returns last line as error 754 func (c *JobExecutor) GetPodLogError(ctx context.Context, pod corev1.Pod) (logsBytes []byte, err error) { 755 // error line should be last one 756 return executor.GetPodLogs(ctx, c.ClientSet, pod.Namespace, pod, 1) 757 } 758 759 // GetLastLogLineError return error if last line is failed 760 func (c *JobExecutor) GetLastLogLineError(ctx context.Context, pod corev1.Pod) error { 761 l := c.Log.With("pod", pod.Name, "namespace", pod.Namespace) 762 errorLog, err := c.GetPodLogError(ctx, pod) 763 if err != nil { 764 l.Errorw("getPodLogs error", "error", err, "pod", pod) 765 return errors.Errorf("getPodLogs error: %v", err) 766 } 767 768 l.Debugw("log", "got last log bytes", string(errorLog)) // in case distorted log bytes 769 entry := output.GetLogEntry(errorLog) 770 l.Infow("got last log entry", "log", entry.String()) 771 return errors.Errorf("error from last log entry: %s", entry.String()) 772 } 773 774 // Abort aborts K8S by job name 775 func (c *JobExecutor) Abort(ctx context.Context, execution *testkube.Execution) (result *testkube.ExecutionResult, err error) { 776 l := c.Log.With("execution", execution.Id) 777 result, err = executor.AbortJob(ctx, c.ClientSet, execution.TestNamespace, execution.Id) 778 if err != nil { 779 l.Errorw("error aborting job", "execution", execution.Id, "error", err) 780 } 781 l.Debugw("job aborted", "execution", execution.Id, "result", result) 782 if err := c.stopExecution(ctx, l, execution, result, false); err != nil { 783 l.Errorw("error stopping execution on job executor abort", "error", err) 784 } 785 return result, nil 786 } 787 788 func (c *JobExecutor) Timeout(ctx context.Context, jobName string) (result *testkube.ExecutionResult) { 789 l := c.Log.With("jobName", jobName) 790 l.Infow("job timeout") 791 execution, err := c.Repository.Get(ctx, jobName) 792 if err != nil { 793 l.Errorw("error getting execution", "error", err) 794 return 795 } 796 797 c.streamLog(ctx, execution.Id, events.NewLog("execution took too long, pod deadline exceeded")) 798 799 result = &testkube.ExecutionResult{ 800 Status: testkube.ExecutionStatusTimeout, 801 } 802 if err := c.stopExecution(ctx, l, &execution, result, false); err != nil { 803 l.Errorw("error stopping execution on job executor timeout", "error", err) 804 } 805 806 return 807 } 808 809 func (c *JobExecutor) streamLog(ctx context.Context, id string, log *events.Log) { 810 if c.features.LogsV2 { 811 c.logsStream.Push(ctx, id, log) 812 } 813 } 814 815 // NewJobSpec is a method to create new job spec 816 func NewJobSpec(log *zap.SugaredLogger, options JobOptions) (*batchv1.Job, error) { 817 envManager := env.NewManager() 818 secretEnvVars := append(envManager.PrepareSecrets(options.SecretEnvs, options.Variables), 819 envManager.PrepareGitCredentials(options.UsernameSecret, options.TokenSecret)...) 820 821 tmpl, err := utils.NewTemplate("job").Funcs(template.FuncMap{"vartypeptrtostring": testkube.VariableTypeString}). 822 Parse(options.JobTemplate) 823 if err != nil { 824 return nil, errors.Errorf("creating job spec from options.JobTemplate error: %v", err) 825 } 826 827 options.Jsn = strings.ReplaceAll(options.Jsn, "'", "''") 828 var buffer bytes.Buffer 829 if err = tmpl.ExecuteTemplate(&buffer, "job", options); err != nil { 830 return nil, errors.Errorf("executing job spec template: %v", err) 831 } 832 833 var job batchv1.Job 834 jobSpec := buffer.String() 835 if options.JobTemplateExtensions != "" { 836 tmplExt, err := utils.NewTemplate("jobExt").Funcs(template.FuncMap{"vartypeptrtostring": testkube.VariableTypeString}). 837 Parse(options.JobTemplateExtensions) 838 if err != nil { 839 return nil, errors.Errorf("creating job extensions spec from template error: %v", err) 840 } 841 842 var bufferExt bytes.Buffer 843 if err = tmplExt.ExecuteTemplate(&bufferExt, "jobExt", options); err != nil { 844 return nil, errors.Errorf("executing job extensions spec template: %v", err) 845 } 846 847 if jobSpec, err = merge2.MergeStrings(bufferExt.String(), jobSpec, false, kyaml.MergeOptions{}); err != nil { 848 return nil, errors.Errorf("merging job spec templates: %v", err) 849 } 850 } 851 852 log.Debug("Job specification", jobSpec) 853 decoder := yaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(jobSpec), len(jobSpec)) 854 if err := decoder.Decode(&job); err != nil { 855 return nil, errors.Errorf("decoding job spec error: %v", err) 856 } 857 858 for key, value := range options.Labels { 859 if job.Labels == nil { 860 job.Labels = make(map[string]string) 861 } 862 863 job.Labels[key] = value 864 865 if job.Spec.Template.Labels == nil { 866 job.Spec.Template.Labels = make(map[string]string) 867 } 868 869 job.Spec.Template.Labels[key] = value 870 } 871 872 envs := append(executor.RunnerEnvVars, corev1.EnvVar{Name: "RUNNER_CLUSTERID", Value: options.ClusterID}) 873 if options.ArtifactRequest != nil && options.ArtifactRequest.StorageBucket != "" { 874 envs = append(envs, corev1.EnvVar{Name: "RUNNER_BUCKET", Value: options.ArtifactRequest.StorageBucket}) 875 } else { 876 envs = append(envs, corev1.EnvVar{Name: "RUNNER_BUCKET", Value: os.Getenv("STORAGE_BUCKET")}) 877 } 878 879 envs = append(envs, secretEnvVars...) 880 if options.HTTPProxy != "" { 881 envs = append(envs, corev1.EnvVar{Name: "HTTP_PROXY", Value: options.HTTPProxy}) 882 } 883 884 if options.HTTPSProxy != "" { 885 envs = append(envs, corev1.EnvVar{Name: "HTTPS_PROXY", Value: options.HTTPSProxy}) 886 } 887 888 envs = append(envs, envManager.PrepareEnvs(options.Envs, options.Variables)...) 889 envs = append(envs, corev1.EnvVar{Name: "RUNNER_WORKINGDIR", Value: options.WorkingDir}) 890 envs = append(envs, corev1.EnvVar{Name: "RUNNER_EXECUTIONID", Value: options.Name}) 891 envs = append(envs, corev1.EnvVar{Name: "RUNNER_TESTNAME", Value: options.TestName}) 892 envs = append(envs, corev1.EnvVar{Name: "RUNNER_EXECUTIONNUMBER", Value: fmt.Sprint(options.ExecutionNumber)}) 893 envs = append(envs, corev1.EnvVar{Name: "RUNNER_CONTEXTTYPE", Value: options.ContextType}) 894 envs = append(envs, corev1.EnvVar{Name: "RUNNER_CONTEXTDATA", Value: options.ContextData}) 895 envs = append(envs, corev1.EnvVar{Name: "RUNNER_APIURI", Value: options.APIURI}) 896 897 for i := range job.Spec.Template.Spec.InitContainers { 898 job.Spec.Template.Spec.InitContainers[i].Env = append(job.Spec.Template.Spec.InitContainers[i].Env, envs...) 899 } 900 901 for i := range job.Spec.Template.Spec.Containers { 902 job.Spec.Template.Spec.Containers[i].Env = append(job.Spec.Template.Spec.Containers[i].Env, envs...) 903 } 904 905 return &job, nil 906 } 907 908 func NewJobOptions(log *zap.SugaredLogger, templatesClient templatesv1.Interface, images executor.Images, 909 templates executor.Templates, serviceAccountNames map[string]string, registry, clusterID, apiURI string, 910 execution testkube.Execution, options ExecuteOptions, natsURI string, debug bool) (jobOptions JobOptions, err error) { 911 jsn, err := json.Marshal(execution) 912 if err != nil { 913 return jobOptions, err 914 } 915 916 jobOptions = NewJobOptionsFromExecutionOptions(options) 917 jobOptions.Name = execution.Id 918 jobOptions.Namespace = execution.TestNamespace 919 jobOptions.Jsn = string(jsn) 920 jobOptions.InitImage = images.Init 921 jobOptions.TestName = execution.TestName 922 jobOptions.Features = options.Features 923 924 // options needed for Log sidecar 925 if options.Features.LogsV2 { 926 // TODO pass them from some config? we dont' have any in this context? 927 jobOptions.Debug = debug 928 jobOptions.NatsUri = natsURI 929 jobOptions.LogSidecarImage = images.LogSidecar 930 } 931 932 if jobOptions.JobTemplate == "" { 933 jobOptions.JobTemplate = templates.Job 934 } 935 936 if options.ExecutorSpec.JobTemplateReference != "" { 937 template, err := templatesClient.Get(options.ExecutorSpec.JobTemplateReference) 938 if err != nil { 939 return jobOptions, err 940 } 941 942 if template.Spec.Type_ != nil && testkube.TemplateType(*template.Spec.Type_) == testkube.JOB_TemplateType { 943 jobOptions.JobTemplate = template.Spec.Body 944 } else { 945 log.Warnw("Not matched template type", "template", options.ExecutorSpec.JobTemplateReference) 946 } 947 } 948 949 if options.Request.JobTemplateReference != "" { 950 template, err := templatesClient.Get(options.Request.JobTemplateReference) 951 if err != nil { 952 return jobOptions, err 953 } 954 955 if template.Spec.Type_ != nil && testkube.TemplateType(*template.Spec.Type_) == testkube.JOB_TemplateType { 956 jobOptions.JobTemplate = template.Spec.Body 957 } else { 958 log.Warnw("Not matched template type", "template", options.Request.JobTemplateReference) 959 } 960 } 961 962 jobOptions.Variables = execution.Variables 963 serviceAccountName, ok := serviceAccountNames[execution.TestNamespace] 964 if !ok { 965 return jobOptions, fmt.Errorf("not supported namespace %s", execution.TestNamespace) 966 } 967 968 jobOptions.ServiceAccountName = serviceAccountName 969 jobOptions.Registry = registry 970 jobOptions.ClusterID = clusterID 971 972 supportArtifacts := false 973 for _, feature := range options.ExecutorSpec.Features { 974 if feature == executorv1.FeatureArtifacts { 975 supportArtifacts = true 976 break 977 } 978 } 979 980 if supportArtifacts { 981 jobOptions.ArtifactRequest = execution.ArtifactRequest 982 } 983 984 workingDir := agent.GetDefaultWorkingDir(executor.VolumeDir, execution) 985 if execution.Content != nil && execution.Content.Repository != nil && execution.Content.Repository.WorkingDir != "" { 986 workingDir = filepath.Join(executor.VolumeDir, "repo", execution.Content.Repository.WorkingDir) 987 } 988 989 jobOptions.WorkingDir = workingDir 990 jobOptions.APIURI = apiURI 991 992 jobOptions.SlavePodTemplate = templates.Slave 993 if options.Request.SlavePodRequest != nil && options.Request.SlavePodRequest.PodTemplateReference != "" { 994 template, err := templatesClient.Get(options.Request.SlavePodRequest.PodTemplateReference) 995 if err != nil { 996 return jobOptions, err 997 } 998 999 if template.Spec.Type_ != nil && testkube.TemplateType(*template.Spec.Type_) == testkube.POD_TemplateType { 1000 jobOptions.SlavePodTemplate = template.Spec.Body 1001 } else { 1002 log.Warnw("Not matched template type", "template", options.Request.SlavePodRequest.PodTemplateReference) 1003 } 1004 } 1005 1006 if options.ExecutorSpec.Slaves != nil { 1007 slvesConfigs, err := json.Marshal(executor.GetSlavesConfigs( 1008 images.Init, 1009 *options.ExecutorSpec.Slaves, 1010 jobOptions.Registry, 1011 jobOptions.ServiceAccountName, 1012 jobOptions.CertificateSecret, 1013 jobOptions.SlavePodTemplate, 1014 jobOptions.ImagePullSecrets, 1015 jobOptions.EnvConfigMaps, 1016 jobOptions.EnvSecrets, 1017 int(jobOptions.ActiveDeadlineSeconds), 1018 testkube.Features(options.Features), 1019 natsURI, 1020 images.LogSidecar, 1021 jobOptions.RunnerCustomCASecret, 1022 )) 1023 1024 if err != nil { 1025 return jobOptions, err 1026 } 1027 1028 if jobOptions.Variables == nil { 1029 jobOptions.Variables = make(map[string]testkube.Variable) 1030 } 1031 1032 jobOptions.Variables[executor.SlavesConfigsEnv] = testkube.NewBasicVariable(executor.SlavesConfigsEnv, string(slvesConfigs)) 1033 } 1034 1035 jobOptions.PvcTemplate = templates.PVC 1036 if options.Request.PvcTemplateReference != "" { 1037 template, err := templatesClient.Get(options.Request.PvcTemplateReference) 1038 if err != nil { 1039 return jobOptions, err 1040 } 1041 1042 if template.Spec.Type_ != nil && testkube.TemplateType(*template.Spec.Type_) == testkube.PVC_TemplateType { 1043 jobOptions.PvcTemplate = template.Spec.Body 1044 } else { 1045 log.Warnw("Not matched template type", "template", options.Request.PvcTemplateReference) 1046 } 1047 } 1048 1049 // used for adding custom certificates for Agent (gRPC) API 1050 jobOptions.AgentAPITLSSecret = options.AgentAPITLSSecret 1051 1052 return 1053 } 1054 1055 func NewPVCOptionsFromJobOptions(options JobOptions, defaultStorageClassName string) PVCOptions { 1056 return PVCOptions{ 1057 Name: options.Name, 1058 Namespace: options.Namespace, 1059 PvcTemplate: options.PvcTemplate, 1060 PvcTemplateExtensions: options.PvcTemplateExtensions, 1061 ArtifactRequest: options.ArtifactRequest, 1062 DefaultStorageClassName: defaultStorageClassName, 1063 } 1064 }