github.com/kubeshop/testkube@v1.17.23/pkg/executor/client/job.go (about)

     1  package client
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"context"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"path/filepath"
    12  	"strings"
    13  	"sync"
    14  	"text/template"
    15  	"time"
    16  
    17  	"github.com/kubeshop/testkube/pkg/featureflags"
    18  	"github.com/kubeshop/testkube/pkg/repository/config"
    19  
    20  	"github.com/pkg/errors"
    21  
    22  	"github.com/kubeshop/testkube/pkg/version"
    23  
    24  	"github.com/kubeshop/testkube/pkg/repository/result"
    25  
    26  	"go.uber.org/zap"
    27  	batchv1 "k8s.io/api/batch/v1"
    28  	corev1 "k8s.io/api/core/v1"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/util/wait"
    31  	"k8s.io/apimachinery/pkg/util/yaml"
    32  	"k8s.io/client-go/kubernetes"
    33  	"sigs.k8s.io/kustomize/kyaml/yaml/merge2"
    34  
    35  	kyaml "sigs.k8s.io/kustomize/kyaml/yaml"
    36  
    37  	executorv1 "github.com/kubeshop/testkube-operator/api/executor/v1"
    38  	templatesv1 "github.com/kubeshop/testkube-operator/pkg/client/templates/v1"
    39  	testexecutionsv1 "github.com/kubeshop/testkube-operator/pkg/client/testexecutions/v1"
    40  	testsv3 "github.com/kubeshop/testkube-operator/pkg/client/tests/v3"
    41  	"github.com/kubeshop/testkube/pkg/api/v1/testkube"
    42  	"github.com/kubeshop/testkube/pkg/event"
    43  	"github.com/kubeshop/testkube/pkg/executor"
    44  	"github.com/kubeshop/testkube/pkg/executor/agent"
    45  	"github.com/kubeshop/testkube/pkg/executor/env"
    46  	"github.com/kubeshop/testkube/pkg/executor/output"
    47  	"github.com/kubeshop/testkube/pkg/log"
    48  	logsclient "github.com/kubeshop/testkube/pkg/logs/client"
    49  	"github.com/kubeshop/testkube/pkg/logs/events"
    50  	testexecutionsmapper "github.com/kubeshop/testkube/pkg/mapper/testexecutions"
    51  	testsmapper "github.com/kubeshop/testkube/pkg/mapper/tests"
    52  	"github.com/kubeshop/testkube/pkg/telemetry"
    53  	"github.com/kubeshop/testkube/pkg/utils"
    54  )
    55  
    56  const (
    57  	// GitUsernameSecretName is git username secret name
    58  	GitUsernameSecretName = "git-username"
    59  	// GitUsernameEnvVarName is git username environment var name
    60  	GitUsernameEnvVarName = "RUNNER_GITUSERNAME"
    61  	// GitTokenSecretName is git token secret name
    62  	GitTokenSecretName = "git-token"
    63  	// GitTokenEnvVarName is git token environment var name
    64  	GitTokenEnvVarName = "RUNNER_GITTOKEN"
    65  	// SecretTest is a test secret
    66  	SecretTest = "secrets"
    67  	// SecretSource is a source secret
    68  	SecretSource = "source-secrets"
    69  
    70  	pollTimeout  = 24 * time.Hour
    71  	pollInterval = 200 * time.Millisecond
    72  	// pollJobStatus is interval for checking if job timeout occurred
    73  	pollJobStatus = 1 * time.Second
    74  	// timeoutIndicator is string that is added to job logs when timeout occurs
    75  	timeoutIndicator = "DeadlineExceeded"
    76  
    77  	logsStreamBuffer = 1000
    78  )
    79  
    80  // NewJobExecutor creates new job executor
    81  func NewJobExecutor(
    82  	repo result.Repository,
    83  	images executor.Images,
    84  	templates executor.Templates,
    85  	serviceAccountNames map[string]string,
    86  	metrics ExecutionMetric,
    87  	emiter *event.Emitter,
    88  	configMap config.Repository,
    89  	testsClient testsv3.Interface,
    90  	clientset kubernetes.Interface,
    91  	testExecutionsClient testexecutionsv1.Interface,
    92  	templatesClient templatesv1.Interface,
    93  	registry string,
    94  	podStartTimeout time.Duration,
    95  	clusterID string,
    96  	dashboardURI string,
    97  	apiURI string,
    98  	natsURI string,
    99  	debug bool,
   100  	logsStream logsclient.Stream,
   101  	features featureflags.FeatureFlags,
   102  	defaultStorageClassName string,
   103  ) (client *JobExecutor, err error) {
   104  	if serviceAccountNames == nil {
   105  		serviceAccountNames = make(map[string]string)
   106  	}
   107  
   108  	return &JobExecutor{
   109  		ClientSet:               clientset,
   110  		Repository:              repo,
   111  		Log:                     log.DefaultLogger,
   112  		images:                  images,
   113  		templates:               templates,
   114  		serviceAccountNames:     serviceAccountNames,
   115  		metrics:                 metrics,
   116  		Emitter:                 emiter,
   117  		configMap:               configMap,
   118  		testsClient:             testsClient,
   119  		testExecutionsClient:    testExecutionsClient,
   120  		templatesClient:         templatesClient,
   121  		registry:                registry,
   122  		podStartTimeout:         podStartTimeout,
   123  		clusterID:               clusterID,
   124  		dashboardURI:            dashboardURI,
   125  		apiURI:                  apiURI,
   126  		natsURI:                 natsURI,
   127  		debug:                   debug,
   128  		logsStream:              logsStream,
   129  		features:                features,
   130  		defaultStorageClassName: defaultStorageClassName,
   131  	}, nil
   132  }
   133  
   134  type ExecutionMetric interface {
   135  	IncAndObserveExecuteTest(execution testkube.Execution, dashboardURI string)
   136  }
   137  
   138  // JobExecutor is container for managing job executor dependencies
   139  type JobExecutor struct {
   140  	Repository              result.Repository
   141  	Log                     *zap.SugaredLogger
   142  	ClientSet               kubernetes.Interface
   143  	Cmd                     string
   144  	images                  executor.Images
   145  	templates               executor.Templates
   146  	serviceAccountNames     map[string]string
   147  	metrics                 ExecutionMetric
   148  	Emitter                 *event.Emitter
   149  	configMap               config.Repository
   150  	testsClient             testsv3.Interface
   151  	testExecutionsClient    testexecutionsv1.Interface
   152  	templatesClient         templatesv1.Interface
   153  	registry                string
   154  	podStartTimeout         time.Duration
   155  	clusterID               string
   156  	dashboardURI            string
   157  	apiURI                  string
   158  	natsURI                 string
   159  	debug                   bool
   160  	logsStream              logsclient.Stream
   161  	features                featureflags.FeatureFlags
   162  	defaultStorageClassName string
   163  }
   164  
   165  type JobOptions struct {
   166  	Name                  string
   167  	Namespace             string
   168  	Image                 string
   169  	ImagePullSecrets      []string
   170  	Jsn                   string
   171  	TestName              string
   172  	InitImage             string
   173  	JobTemplate           string
   174  	Envs                  map[string]string
   175  	SecretEnvs            map[string]string
   176  	HTTPProxy             string
   177  	HTTPSProxy            string
   178  	UsernameSecret        *testkube.SecretRef
   179  	TokenSecret           *testkube.SecretRef
   180  	RunnerCustomCASecret  string
   181  	CertificateSecret     string
   182  	AgentAPITLSSecret     string
   183  	Variables             map[string]testkube.Variable
   184  	ActiveDeadlineSeconds int64
   185  	ServiceAccountName    string
   186  	JobTemplateExtensions string
   187  	EnvConfigMaps         []testkube.EnvReference
   188  	EnvSecrets            []testkube.EnvReference
   189  	Labels                map[string]string
   190  	Registry              string
   191  	ClusterID             string
   192  	ArtifactRequest       *testkube.ArtifactRequest
   193  	WorkingDir            string
   194  	ExecutionNumber       int32
   195  	ContextType           string
   196  	ContextData           string
   197  	Debug                 bool
   198  	NatsUri               string
   199  	LogSidecarImage       string
   200  	APIURI                string
   201  	SlavePodTemplate      string
   202  	Features              featureflags.FeatureFlags
   203  	PvcTemplate           string
   204  	PvcTemplateExtensions string
   205  }
   206  
   207  // Logs returns job logs stream channel using kubernetes api
   208  func (c *JobExecutor) Logs(ctx context.Context, id, namespace string) (out chan output.Output, err error) {
   209  	out = make(chan output.Output, logsStreamBuffer)
   210  	logs := make(chan []byte, logsStreamBuffer)
   211  
   212  	go func() {
   213  		defer func() {
   214  			c.Log.Debug("closing JobExecutor.Logs out log")
   215  			close(out)
   216  		}()
   217  
   218  		if err := c.TailJobLogs(ctx, id, namespace, logs); err != nil {
   219  			out <- output.NewOutputError(err)
   220  			return
   221  		}
   222  
   223  		for l := range logs {
   224  			out <- output.GetLogEntry(l)
   225  		}
   226  	}()
   227  
   228  	return
   229  }
   230  
   231  // Execute starts new external test execution, reads data and returns ID
   232  // Execution is started asynchronously client can check later for results
   233  func (c *JobExecutor) Execute(ctx context.Context, execution *testkube.Execution, options ExecuteOptions) (result *testkube.ExecutionResult, err error) {
   234  	result = testkube.NewRunningExecutionResult()
   235  	execution.ExecutionResult = result
   236  
   237  	err = c.CreateJob(ctx, *execution, options)
   238  	if err != nil {
   239  		if cErr := c.cleanPVCVolume(ctx, execution); cErr != nil {
   240  			c.Log.Errorw("error deleting pvc volume", "error", cErr)
   241  		}
   242  
   243  		return result.Err(err), err
   244  	}
   245  
   246  	c.streamLog(ctx, execution.Id, events.NewLog("created kubernetes job").WithSource(events.SourceJobExecutor))
   247  
   248  	if !options.Sync {
   249  		go c.MonitorJobForTimeout(ctx, execution.Id, execution.TestNamespace)
   250  	}
   251  
   252  	podsClient := c.ClientSet.CoreV1().Pods(execution.TestNamespace)
   253  	pods, err := executor.GetJobPods(ctx, podsClient, execution.Id, 1, 10)
   254  	if err != nil {
   255  		if cErr := c.cleanPVCVolume(ctx, execution); cErr != nil {
   256  			c.Log.Errorw("error deleting pvc volume", "error", cErr)
   257  		}
   258  
   259  		return result.Err(err), err
   260  	}
   261  
   262  	l := c.Log.With("executionID", execution.Id, "type", "async")
   263  
   264  	c.streamLog(ctx, execution.Id, events.NewLog("waiting for pod to spin up").WithSource(events.SourceJobExecutor))
   265  
   266  	for _, pod := range pods.Items {
   267  		if pod.Status.Phase != corev1.PodRunning && pod.Labels["job-name"] == execution.Id {
   268  			// for sync block and complete
   269  			if options.Sync {
   270  				return c.updateResultsFromPod(ctx, pod, l, execution, options.Request.NegativeTest)
   271  			}
   272  
   273  			// for async start goroutine and return in progress job
   274  			go func(pod corev1.Pod) {
   275  				_, err := c.updateResultsFromPod(ctx, pod, l, execution, options.Request.NegativeTest)
   276  				if err != nil {
   277  					l.Errorw("update results from jobs pod error", "error", err)
   278  				}
   279  			}(pod)
   280  
   281  			return result, nil
   282  		}
   283  	}
   284  
   285  	l.Debugw("no pods was found", "totalPodsCount", len(pods.Items))
   286  
   287  	return result, nil
   288  }
   289  
   290  func (c *JobExecutor) MonitorJobForTimeout(ctx context.Context, jobName, namespace string) {
   291  	ticker := time.NewTicker(pollJobStatus)
   292  	l := c.Log.With("jobName", jobName)
   293  	for {
   294  		select {
   295  		case <-ctx.Done():
   296  			l.Infow("context done, stopping job timeout monitor")
   297  			return
   298  		case <-ticker.C:
   299  			jobs, err := c.ClientSet.BatchV1().Jobs(namespace).List(ctx, metav1.ListOptions{LabelSelector: "job-name=" + jobName})
   300  			if err != nil {
   301  				l.Errorw("could not get jobs", "error", err)
   302  				return
   303  			}
   304  			if jobs == nil || len(jobs.Items) == 0 {
   305  				return
   306  			}
   307  
   308  			job := jobs.Items[0]
   309  
   310  			if job.Status.Succeeded > 0 {
   311  				l.Debugw("job succeeded", "status", "succeded")
   312  				return
   313  			}
   314  
   315  			if job.Status.Failed > 0 {
   316  				l.Debugw("job failed")
   317  				if len(job.Status.Conditions) > 0 {
   318  					for _, condition := range job.Status.Conditions {
   319  						l.Infow("job timeout", "condition.reason", condition.Reason)
   320  						if condition.Reason == timeoutIndicator {
   321  							c.Timeout(ctx, jobName)
   322  						}
   323  					}
   324  				}
   325  				return
   326  			}
   327  
   328  			if job.Status.Active > 0 {
   329  				continue
   330  			}
   331  		}
   332  	}
   333  }
   334  
   335  // CreateJob creates new Kubernetes job based on execution and execute options
   336  func (c *JobExecutor) CreateJob(ctx context.Context, execution testkube.Execution, options ExecuteOptions) error {
   337  	jobs := c.ClientSet.BatchV1().Jobs(execution.TestNamespace)
   338  	jobOptions, err := NewJobOptions(c.Log, c.templatesClient, c.images, c.templates,
   339  		c.serviceAccountNames, c.registry, c.clusterID, c.apiURI, execution, options, c.natsURI, c.debug)
   340  	if err != nil {
   341  		return err
   342  	}
   343  
   344  	if jobOptions.ArtifactRequest != nil &&
   345  		(jobOptions.ArtifactRequest.StorageClassName != "" || jobOptions.ArtifactRequest.UseDefaultStorageClassName) {
   346  		c.Log.Debug("creating persistent volume claim with options", "options", jobOptions)
   347  		pvcsClient := c.ClientSet.CoreV1().PersistentVolumeClaims(execution.TestNamespace)
   348  		pvcSpec, err := NewPersistentVolumeClaimSpec(c.Log, NewPVCOptionsFromJobOptions(jobOptions, c.defaultStorageClassName))
   349  		if err != nil {
   350  			return err
   351  		}
   352  
   353  		_, err = pvcsClient.Create(ctx, pvcSpec, metav1.CreateOptions{})
   354  		if err != nil {
   355  			return err
   356  		}
   357  	}
   358  
   359  	c.Log.Debug("creating job with options", "options", jobOptions)
   360  	jobSpec, err := NewJobSpec(c.Log, jobOptions)
   361  	if err != nil {
   362  		return err
   363  	}
   364  
   365  	_, err = jobs.Create(ctx, jobSpec, metav1.CreateOptions{})
   366  	return err
   367  }
   368  
   369  func (c *JobExecutor) cleanPVCVolume(ctx context.Context, execution *testkube.Execution) error {
   370  	if execution.ArtifactRequest != nil &&
   371  		(execution.ArtifactRequest.StorageClassName != "" || execution.ArtifactRequest.UseDefaultStorageClassName) {
   372  		pvcsClient := c.ClientSet.CoreV1().PersistentVolumeClaims(execution.TestNamespace)
   373  		if err := pvcsClient.Delete(ctx, execution.Id+"-pvc", metav1.DeleteOptions{}); err != nil {
   374  			return err
   375  		}
   376  	}
   377  
   378  	return nil
   379  }
   380  
   381  // updateResultsFromPod watches logs and stores results if execution is finished
   382  func (c *JobExecutor) updateResultsFromPod(ctx context.Context, pod corev1.Pod, l *zap.SugaredLogger, execution *testkube.Execution, isNegativeTest bool) (*testkube.ExecutionResult, error) {
   383  	var err error
   384  
   385  	// save stop time and final state
   386  	defer func() {
   387  		if err := c.stopExecution(ctx, l, execution, execution.ExecutionResult, isNegativeTest); err != nil {
   388  			c.streamLog(ctx, execution.Id, events.NewErrorLog(err))
   389  			l.Errorw("error stopping execution after updating results from pod", "error", err)
   390  		}
   391  
   392  		if err := c.cleanPVCVolume(ctx, execution); err != nil {
   393  			l.Errorw("error cleaning pvc volume", "error", err)
   394  		}
   395  	}()
   396  
   397  	// wait for pod to be loggable
   398  	if err = wait.PollUntilContextTimeout(ctx, pollInterval, c.podStartTimeout, true, executor.IsPodLoggable(c.ClientSet, pod.Name, execution.TestNamespace)); err != nil {
   399  		c.streamLog(ctx, execution.Id, events.NewErrorLog(errors.Wrap(err, "can't start test job pod")))
   400  		l.Errorw("waiting for pod started error", "error", err)
   401  	}
   402  
   403  	l.Debug("poll immediate waiting for pod")
   404  	// wait for pod
   405  	if err = wait.PollUntilContextTimeout(ctx, pollInterval, pollTimeout, true, executor.IsPodReady(c.ClientSet, pod.Name, execution.TestNamespace)); err != nil {
   406  		// continue on poll err and try to get logs later
   407  		c.streamLog(ctx, execution.Id, events.NewErrorLog(errors.Wrap(err, "can't read data from pod, pod was not completed")))
   408  		l.Errorw("waiting for pod complete error", "error", err)
   409  	}
   410  
   411  	if err != nil {
   412  		execution.ExecutionResult.Err(err)
   413  	}
   414  	l.Debug("poll immediate end")
   415  
   416  	c.streamLog(ctx, execution.Id, events.NewLog("analyzing test results and artfacts"))
   417  
   418  	logs, err := executor.GetPodLogs(ctx, c.ClientSet, execution.TestNamespace, pod)
   419  	if err != nil {
   420  		l.Errorw("get pod logs error", "error", err)
   421  		c.streamLog(ctx, execution.Id, events.NewErrorLog(err))
   422  	}
   423  
   424  	// don't attach logs if logs v2 is enabled - they will be streamed through the logs service
   425  	attachLogs := !c.features.LogsV2
   426  	if len(logs) != 0 {
   427  		// parse job output log (JSON stream)
   428  		execution.ExecutionResult, err = output.ParseRunnerOutput(logs, attachLogs)
   429  		if err != nil {
   430  			l.Errorw("parse output error", "error", err)
   431  			c.streamLog(ctx, execution.Id, events.NewErrorLog(errors.Wrap(err, "can't get test execution job output")))
   432  			return execution.ExecutionResult, err
   433  		}
   434  	}
   435  
   436  	if execution.ExecutionResult.IsFailed() {
   437  		errorMessage := execution.ExecutionResult.ErrorMessage
   438  		if errorMessage == "" {
   439  			errorMessage = executor.GetPodErrorMessage(ctx, c.ClientSet, &pod)
   440  		}
   441  
   442  		execution.ExecutionResult.ErrorMessage = errorMessage
   443  
   444  		c.streamLog(ctx, execution.Id, events.NewErrorLog(errors.Wrap(err, "test execution finished with failed state")))
   445  	} else {
   446  		c.streamLog(ctx, execution.Id, events.NewLog("test execution finshed").WithMetadataEntry("status", string(*execution.ExecutionResult.Status)))
   447  	}
   448  
   449  	// saving result in the defer function
   450  	return execution.ExecutionResult, nil
   451  }
   452  
   453  func (c *JobExecutor) stopExecution(ctx context.Context, l *zap.SugaredLogger, execution *testkube.Execution, result *testkube.ExecutionResult, isNegativeTest bool) error {
   454  	savedExecution, err := c.Repository.Get(ctx, execution.Id)
   455  	if err != nil {
   456  		l.Errorw("get execution error", "error", err)
   457  		return err
   458  	}
   459  
   460  	logEvent := events.NewLog().WithSource(events.SourceJobExecutor)
   461  
   462  	l.Debugw("stopping execution", "executionId", execution.Id, "status", result.Status, "executionStatus", execution.ExecutionResult.Status, "savedExecutionStatus", savedExecution.ExecutionResult.Status)
   463  
   464  	c.streamLog(ctx, execution.Id, logEvent.WithContent("stopping execution"))
   465  	defer c.streamLog(ctx, execution.Id, logEvent.WithContent("execution stopped"))
   466  
   467  	if savedExecution.IsCanceled() || savedExecution.IsTimeout() {
   468  		c.streamLog(ctx, execution.Id, logEvent.WithContent("execution is cancelled"))
   469  		return nil
   470  	}
   471  
   472  	execution.Stop()
   473  	if isNegativeTest {
   474  		if result.IsFailed() {
   475  			l.Debugw("test run was expected to fail, and it failed as expected", "test", execution.TestName)
   476  			execution.ExecutionResult.Status = testkube.ExecutionStatusPassed
   477  			execution.ExecutionResult.ErrorMessage = ""
   478  			result.Output = result.Output + "\nTest run was expected to fail, and it failed as expected"
   479  		} else {
   480  			l.Debugw("test run was expected to fail - the result will be reversed", "test", execution.TestName)
   481  			execution.ExecutionResult.Status = testkube.ExecutionStatusFailed
   482  			execution.ExecutionResult.ErrorMessage = "negative test error"
   483  			result.Output = result.Output + "\nTest run was expected to fail, the result will be reversed"
   484  		}
   485  
   486  		result.Status = execution.ExecutionResult.Status
   487  		result.ErrorMessage = execution.ExecutionResult.ErrorMessage
   488  	}
   489  
   490  	err = c.Repository.EndExecution(ctx, *execution)
   491  	if err != nil {
   492  		l.Errorw("Update execution result error", "error", err)
   493  		return err
   494  	}
   495  
   496  	eventToSend := testkube.NewEventEndTestSuccess(execution)
   497  	if result.IsAborted() {
   498  		result.Output = result.Output + "\nTest run was aborted manually."
   499  		eventToSend = testkube.NewEventEndTestAborted(execution)
   500  	} else if result.IsTimeout() {
   501  		result.Output = result.Output + "\nTest run was aborted due to timeout."
   502  		eventToSend = testkube.NewEventEndTestTimeout(execution)
   503  	} else if result.IsFailed() {
   504  		eventToSend = testkube.NewEventEndTestFailed(execution)
   505  	}
   506  
   507  	// metrics increase
   508  	execution.ExecutionResult = result
   509  	l.Infow("execution ended, saving result", "executionId", execution.Id, "status", result.Status)
   510  	if err = c.Repository.UpdateResult(ctx, execution.Id, *execution); err != nil {
   511  		l.Errorw("Update execution result error", "error", err)
   512  		return err
   513  	}
   514  
   515  	test, err := c.testsClient.Get(execution.TestName)
   516  	if err != nil {
   517  		l.Errorw("getting test error", "error", err)
   518  		return err
   519  	}
   520  
   521  	test.Status = testsmapper.MapExecutionToTestStatus(execution)
   522  	if err = c.testsClient.UpdateStatus(test); err != nil {
   523  		l.Errorw("updating test error", "error", err)
   524  		return err
   525  	}
   526  
   527  	if execution.TestExecutionName != "" {
   528  		testExecution, err := c.testExecutionsClient.Get(execution.TestExecutionName)
   529  		if err != nil {
   530  			l.Errorw("getting test execution error", "error", err)
   531  			return err
   532  		}
   533  
   534  		testExecution.Status = testexecutionsmapper.MapAPIToCRD(execution, testExecution.Generation)
   535  		if err = c.testExecutionsClient.UpdateStatus(testExecution); err != nil {
   536  			l.Errorw("updating test execution error", "error", err)
   537  			return err
   538  		}
   539  	}
   540  
   541  	c.metrics.IncAndObserveExecuteTest(*execution, c.dashboardURI)
   542  	c.Emitter.Notify(eventToSend)
   543  
   544  	telemetryEnabled, err := c.configMap.GetTelemetryEnabled(ctx)
   545  	if err != nil {
   546  		l.Debugw("getting telemetry enabled error", "error", err)
   547  	}
   548  
   549  	if !telemetryEnabled {
   550  		return nil
   551  	}
   552  
   553  	clusterID, err := c.configMap.GetUniqueClusterId(ctx)
   554  	if err != nil {
   555  		l.Debugw("getting cluster id error", "error", err)
   556  	}
   557  
   558  	host, err := os.Hostname()
   559  	if err != nil {
   560  		l.Debugw("getting hostname error", "hostname", host, "error", err)
   561  	}
   562  
   563  	var dataSource string
   564  	if execution.Content != nil {
   565  		dataSource = execution.Content.Type_
   566  	}
   567  
   568  	status := ""
   569  	if execution.ExecutionResult != nil && execution.ExecutionResult.Status != nil {
   570  		status = string(*execution.ExecutionResult.Status)
   571  	}
   572  
   573  	out, err := telemetry.SendRunEvent("testkube_api_run_test", telemetry.RunParams{
   574  		AppVersion: version.Version,
   575  		DataSource: dataSource,
   576  		Host:       host,
   577  		ClusterID:  clusterID,
   578  		TestType:   execution.TestType,
   579  		DurationMs: execution.DurationMs,
   580  		Status:     status,
   581  	})
   582  	if err != nil {
   583  		l.Debugw("sending run test telemetry event error", "error", err)
   584  	} else {
   585  		l.Debugw("sending run test telemetry event", "output", out)
   586  	}
   587  
   588  	return nil
   589  }
   590  
   591  // NewJobOptionsFromExecutionOptions compose JobOptions based on ExecuteOptions
   592  func NewJobOptionsFromExecutionOptions(options ExecuteOptions) JobOptions {
   593  	labels := map[string]string{
   594  		testkube.TestLabelTestType: utils.SanitizeName(options.TestSpec.Type_),
   595  		testkube.TestLabelExecutor: options.ExecutorName,
   596  		testkube.TestLabelTestName: options.TestName,
   597  	}
   598  	for key, value := range options.Labels {
   599  		labels[key] = value
   600  	}
   601  
   602  	contextType := ""
   603  	contextData := ""
   604  	if options.Request.RunningContext != nil {
   605  		contextType = options.Request.RunningContext.Type_
   606  		contextData = options.Request.RunningContext.Context
   607  	}
   608  
   609  	var image string
   610  	if options.ExecutorSpec.Image != "" {
   611  		image = options.ExecutorSpec.Image
   612  	}
   613  
   614  	if options.TestSpec.ExecutionRequest != nil &&
   615  		options.TestSpec.ExecutionRequest.Image != "" {
   616  		image = options.TestSpec.ExecutionRequest.Image
   617  	}
   618  
   619  	if options.Request.Image != "" {
   620  		image = options.Request.Image
   621  	}
   622  
   623  	return JobOptions{
   624  		Image:                 image,
   625  		ImagePullSecrets:      options.ImagePullSecretNames,
   626  		JobTemplate:           options.ExecutorSpec.JobTemplate,
   627  		TestName:              options.TestName,
   628  		Namespace:             options.Namespace,
   629  		Envs:                  options.Request.Envs,
   630  		SecretEnvs:            options.Request.SecretEnvs,
   631  		HTTPProxy:             options.Request.HttpProxy,
   632  		HTTPSProxy:            options.Request.HttpsProxy,
   633  		UsernameSecret:        options.UsernameSecret,
   634  		TokenSecret:           options.TokenSecret,
   635  		RunnerCustomCASecret:  options.RunnerCustomCASecret,
   636  		CertificateSecret:     options.CertificateSecret,
   637  		ActiveDeadlineSeconds: options.Request.ActiveDeadlineSeconds,
   638  		JobTemplateExtensions: options.Request.JobTemplate,
   639  		EnvConfigMaps:         options.Request.EnvConfigMaps,
   640  		EnvSecrets:            options.Request.EnvSecrets,
   641  		Labels:                labels,
   642  		ExecutionNumber:       options.Request.Number,
   643  		ContextType:           contextType,
   644  		ContextData:           contextData,
   645  		Features:              options.Features,
   646  		PvcTemplateExtensions: options.Request.PvcTemplate,
   647  	}
   648  }
   649  
   650  // TailJobLogs - locates logs for job pod(s)
   651  func (c *JobExecutor) TailJobLogs(ctx context.Context, id, namespace string, logs chan []byte) (err error) {
   652  
   653  	podsClient := c.ClientSet.CoreV1().Pods(namespace)
   654  
   655  	pods, err := executor.GetJobPods(ctx, podsClient, id, 1, 10)
   656  	if err != nil {
   657  		close(logs)
   658  		return err
   659  	}
   660  
   661  	for _, pod := range pods.Items {
   662  		if pod.Labels["job-name"] == id {
   663  
   664  			l := c.Log.With("podNamespace", pod.Namespace, "podName", pod.Name, "podStatus", pod.Status)
   665  
   666  			switch pod.Status.Phase {
   667  
   668  			case corev1.PodRunning:
   669  				l.Debug("tailing pod logs: immediately")
   670  				return c.TailPodLogs(ctx, pod, logs)
   671  
   672  			case corev1.PodFailed:
   673  				err := errors.Errorf("can't get pod logs, pod failed: %s/%s", pod.Namespace, pod.Name)
   674  				l.Errorw(err.Error())
   675  				return c.GetLastLogLineError(ctx, pod)
   676  
   677  			default:
   678  				l.Debugw("tailing job logs: waiting for pod to be ready")
   679  				if err = wait.PollUntilContextTimeout(ctx, pollInterval, c.podStartTimeout, true, executor.IsPodLoggable(c.ClientSet, pod.Name, namespace)); err != nil {
   680  					l.Errorw("poll immediate error when tailing logs", "error", err)
   681  					return err
   682  				}
   683  
   684  				l.Debug("tailing pod logs")
   685  				return c.TailPodLogs(ctx, pod, logs)
   686  			}
   687  		}
   688  	}
   689  
   690  	return
   691  }
   692  
   693  func (c *JobExecutor) TailPodLogs(ctx context.Context, pod corev1.Pod, logs chan []byte) (err error) {
   694  	var containers []string
   695  	for _, container := range pod.Spec.InitContainers {
   696  		containers = append(containers, container.Name)
   697  	}
   698  
   699  	for _, container := range pod.Spec.Containers {
   700  		containers = append(containers, container.Name)
   701  	}
   702  
   703  	l := c.Log.With("method", "TailPodLogs", "pod", pod.Name, "namespace", pod.Namespace, "containersCount", len(containers))
   704  
   705  	wg := sync.WaitGroup{}
   706  	wg.Add(len(containers))
   707  
   708  	for _, container := range containers {
   709  		go func(container string) {
   710  			defer wg.Done()
   711  
   712  			podLogOptions := corev1.PodLogOptions{
   713  				Follow:    true,
   714  				Container: container,
   715  			}
   716  
   717  			podLogRequest := c.ClientSet.CoreV1().
   718  				Pods(pod.Namespace).
   719  				GetLogs(pod.Name, &podLogOptions)
   720  
   721  			stream, err := podLogRequest.Stream(ctx)
   722  			if err != nil {
   723  				l.Errorw("stream error", "error", err)
   724  				return
   725  			}
   726  
   727  			reader := bufio.NewReader(stream)
   728  
   729  			for {
   730  				b, err := utils.ReadLongLine(reader)
   731  				if err == io.EOF {
   732  					return
   733  				} else if err != nil {
   734  					l.Errorw("scanner error", "error", err)
   735  					return
   736  				}
   737  				l.Debugw("log chunk pushed", "out", string(b), "pod", pod.Name)
   738  				logs <- b
   739  			}
   740  		}(container)
   741  	}
   742  
   743  	go func() {
   744  		defer close(logs)
   745  		l.Debugw("waiting for all containers to finish", "containers", containers)
   746  		wg.Wait()
   747  		l.Infow("log stream finished")
   748  	}()
   749  
   750  	return
   751  }
   752  
   753  // GetPodLogError returns last line as error
   754  func (c *JobExecutor) GetPodLogError(ctx context.Context, pod corev1.Pod) (logsBytes []byte, err error) {
   755  	// error line should be last one
   756  	return executor.GetPodLogs(ctx, c.ClientSet, pod.Namespace, pod, 1)
   757  }
   758  
   759  // GetLastLogLineError return error if last line is failed
   760  func (c *JobExecutor) GetLastLogLineError(ctx context.Context, pod corev1.Pod) error {
   761  	l := c.Log.With("pod", pod.Name, "namespace", pod.Namespace)
   762  	errorLog, err := c.GetPodLogError(ctx, pod)
   763  	if err != nil {
   764  		l.Errorw("getPodLogs error", "error", err, "pod", pod)
   765  		return errors.Errorf("getPodLogs error: %v", err)
   766  	}
   767  
   768  	l.Debugw("log", "got last log bytes", string(errorLog)) // in case distorted log bytes
   769  	entry := output.GetLogEntry(errorLog)
   770  	l.Infow("got last log entry", "log", entry.String())
   771  	return errors.Errorf("error from last log entry: %s", entry.String())
   772  }
   773  
   774  // Abort aborts K8S by job name
   775  func (c *JobExecutor) Abort(ctx context.Context, execution *testkube.Execution) (result *testkube.ExecutionResult, err error) {
   776  	l := c.Log.With("execution", execution.Id)
   777  	result, err = executor.AbortJob(ctx, c.ClientSet, execution.TestNamespace, execution.Id)
   778  	if err != nil {
   779  		l.Errorw("error aborting job", "execution", execution.Id, "error", err)
   780  	}
   781  	l.Debugw("job aborted", "execution", execution.Id, "result", result)
   782  	if err := c.stopExecution(ctx, l, execution, result, false); err != nil {
   783  		l.Errorw("error stopping execution on job executor abort", "error", err)
   784  	}
   785  	return result, nil
   786  }
   787  
   788  func (c *JobExecutor) Timeout(ctx context.Context, jobName string) (result *testkube.ExecutionResult) {
   789  	l := c.Log.With("jobName", jobName)
   790  	l.Infow("job timeout")
   791  	execution, err := c.Repository.Get(ctx, jobName)
   792  	if err != nil {
   793  		l.Errorw("error getting execution", "error", err)
   794  		return
   795  	}
   796  
   797  	c.streamLog(ctx, execution.Id, events.NewLog("execution took too long, pod deadline exceeded"))
   798  
   799  	result = &testkube.ExecutionResult{
   800  		Status: testkube.ExecutionStatusTimeout,
   801  	}
   802  	if err := c.stopExecution(ctx, l, &execution, result, false); err != nil {
   803  		l.Errorw("error stopping execution on job executor timeout", "error", err)
   804  	}
   805  
   806  	return
   807  }
   808  
   809  func (c *JobExecutor) streamLog(ctx context.Context, id string, log *events.Log) {
   810  	if c.features.LogsV2 {
   811  		c.logsStream.Push(ctx, id, log)
   812  	}
   813  }
   814  
   815  // NewJobSpec is a method to create new job spec
   816  func NewJobSpec(log *zap.SugaredLogger, options JobOptions) (*batchv1.Job, error) {
   817  	envManager := env.NewManager()
   818  	secretEnvVars := append(envManager.PrepareSecrets(options.SecretEnvs, options.Variables),
   819  		envManager.PrepareGitCredentials(options.UsernameSecret, options.TokenSecret)...)
   820  
   821  	tmpl, err := utils.NewTemplate("job").Funcs(template.FuncMap{"vartypeptrtostring": testkube.VariableTypeString}).
   822  		Parse(options.JobTemplate)
   823  	if err != nil {
   824  		return nil, errors.Errorf("creating job spec from options.JobTemplate error: %v", err)
   825  	}
   826  
   827  	options.Jsn = strings.ReplaceAll(options.Jsn, "'", "''")
   828  	var buffer bytes.Buffer
   829  	if err = tmpl.ExecuteTemplate(&buffer, "job", options); err != nil {
   830  		return nil, errors.Errorf("executing job spec template: %v", err)
   831  	}
   832  
   833  	var job batchv1.Job
   834  	jobSpec := buffer.String()
   835  	if options.JobTemplateExtensions != "" {
   836  		tmplExt, err := utils.NewTemplate("jobExt").Funcs(template.FuncMap{"vartypeptrtostring": testkube.VariableTypeString}).
   837  			Parse(options.JobTemplateExtensions)
   838  		if err != nil {
   839  			return nil, errors.Errorf("creating job extensions spec from template error: %v", err)
   840  		}
   841  
   842  		var bufferExt bytes.Buffer
   843  		if err = tmplExt.ExecuteTemplate(&bufferExt, "jobExt", options); err != nil {
   844  			return nil, errors.Errorf("executing job extensions spec template: %v", err)
   845  		}
   846  
   847  		if jobSpec, err = merge2.MergeStrings(bufferExt.String(), jobSpec, false, kyaml.MergeOptions{}); err != nil {
   848  			return nil, errors.Errorf("merging job spec templates: %v", err)
   849  		}
   850  	}
   851  
   852  	log.Debug("Job specification", jobSpec)
   853  	decoder := yaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(jobSpec), len(jobSpec))
   854  	if err := decoder.Decode(&job); err != nil {
   855  		return nil, errors.Errorf("decoding job spec error: %v", err)
   856  	}
   857  
   858  	for key, value := range options.Labels {
   859  		if job.Labels == nil {
   860  			job.Labels = make(map[string]string)
   861  		}
   862  
   863  		job.Labels[key] = value
   864  
   865  		if job.Spec.Template.Labels == nil {
   866  			job.Spec.Template.Labels = make(map[string]string)
   867  		}
   868  
   869  		job.Spec.Template.Labels[key] = value
   870  	}
   871  
   872  	envs := append(executor.RunnerEnvVars, corev1.EnvVar{Name: "RUNNER_CLUSTERID", Value: options.ClusterID})
   873  	if options.ArtifactRequest != nil && options.ArtifactRequest.StorageBucket != "" {
   874  		envs = append(envs, corev1.EnvVar{Name: "RUNNER_BUCKET", Value: options.ArtifactRequest.StorageBucket})
   875  	} else {
   876  		envs = append(envs, corev1.EnvVar{Name: "RUNNER_BUCKET", Value: os.Getenv("STORAGE_BUCKET")})
   877  	}
   878  
   879  	envs = append(envs, secretEnvVars...)
   880  	if options.HTTPProxy != "" {
   881  		envs = append(envs, corev1.EnvVar{Name: "HTTP_PROXY", Value: options.HTTPProxy})
   882  	}
   883  
   884  	if options.HTTPSProxy != "" {
   885  		envs = append(envs, corev1.EnvVar{Name: "HTTPS_PROXY", Value: options.HTTPSProxy})
   886  	}
   887  
   888  	envs = append(envs, envManager.PrepareEnvs(options.Envs, options.Variables)...)
   889  	envs = append(envs, corev1.EnvVar{Name: "RUNNER_WORKINGDIR", Value: options.WorkingDir})
   890  	envs = append(envs, corev1.EnvVar{Name: "RUNNER_EXECUTIONID", Value: options.Name})
   891  	envs = append(envs, corev1.EnvVar{Name: "RUNNER_TESTNAME", Value: options.TestName})
   892  	envs = append(envs, corev1.EnvVar{Name: "RUNNER_EXECUTIONNUMBER", Value: fmt.Sprint(options.ExecutionNumber)})
   893  	envs = append(envs, corev1.EnvVar{Name: "RUNNER_CONTEXTTYPE", Value: options.ContextType})
   894  	envs = append(envs, corev1.EnvVar{Name: "RUNNER_CONTEXTDATA", Value: options.ContextData})
   895  	envs = append(envs, corev1.EnvVar{Name: "RUNNER_APIURI", Value: options.APIURI})
   896  
   897  	for i := range job.Spec.Template.Spec.InitContainers {
   898  		job.Spec.Template.Spec.InitContainers[i].Env = append(job.Spec.Template.Spec.InitContainers[i].Env, envs...)
   899  	}
   900  
   901  	for i := range job.Spec.Template.Spec.Containers {
   902  		job.Spec.Template.Spec.Containers[i].Env = append(job.Spec.Template.Spec.Containers[i].Env, envs...)
   903  	}
   904  
   905  	return &job, nil
   906  }
   907  
   908  func NewJobOptions(log *zap.SugaredLogger, templatesClient templatesv1.Interface, images executor.Images,
   909  	templates executor.Templates, serviceAccountNames map[string]string, registry, clusterID, apiURI string,
   910  	execution testkube.Execution, options ExecuteOptions, natsURI string, debug bool) (jobOptions JobOptions, err error) {
   911  	jsn, err := json.Marshal(execution)
   912  	if err != nil {
   913  		return jobOptions, err
   914  	}
   915  
   916  	jobOptions = NewJobOptionsFromExecutionOptions(options)
   917  	jobOptions.Name = execution.Id
   918  	jobOptions.Namespace = execution.TestNamespace
   919  	jobOptions.Jsn = string(jsn)
   920  	jobOptions.InitImage = images.Init
   921  	jobOptions.TestName = execution.TestName
   922  	jobOptions.Features = options.Features
   923  
   924  	// options needed for Log sidecar
   925  	if options.Features.LogsV2 {
   926  		// TODO pass them from some config? we dont' have any in this context?
   927  		jobOptions.Debug = debug
   928  		jobOptions.NatsUri = natsURI
   929  		jobOptions.LogSidecarImage = images.LogSidecar
   930  	}
   931  
   932  	if jobOptions.JobTemplate == "" {
   933  		jobOptions.JobTemplate = templates.Job
   934  	}
   935  
   936  	if options.ExecutorSpec.JobTemplateReference != "" {
   937  		template, err := templatesClient.Get(options.ExecutorSpec.JobTemplateReference)
   938  		if err != nil {
   939  			return jobOptions, err
   940  		}
   941  
   942  		if template.Spec.Type_ != nil && testkube.TemplateType(*template.Spec.Type_) == testkube.JOB_TemplateType {
   943  			jobOptions.JobTemplate = template.Spec.Body
   944  		} else {
   945  			log.Warnw("Not matched template type", "template", options.ExecutorSpec.JobTemplateReference)
   946  		}
   947  	}
   948  
   949  	if options.Request.JobTemplateReference != "" {
   950  		template, err := templatesClient.Get(options.Request.JobTemplateReference)
   951  		if err != nil {
   952  			return jobOptions, err
   953  		}
   954  
   955  		if template.Spec.Type_ != nil && testkube.TemplateType(*template.Spec.Type_) == testkube.JOB_TemplateType {
   956  			jobOptions.JobTemplate = template.Spec.Body
   957  		} else {
   958  			log.Warnw("Not matched template type", "template", options.Request.JobTemplateReference)
   959  		}
   960  	}
   961  
   962  	jobOptions.Variables = execution.Variables
   963  	serviceAccountName, ok := serviceAccountNames[execution.TestNamespace]
   964  	if !ok {
   965  		return jobOptions, fmt.Errorf("not supported namespace %s", execution.TestNamespace)
   966  	}
   967  
   968  	jobOptions.ServiceAccountName = serviceAccountName
   969  	jobOptions.Registry = registry
   970  	jobOptions.ClusterID = clusterID
   971  
   972  	supportArtifacts := false
   973  	for _, feature := range options.ExecutorSpec.Features {
   974  		if feature == executorv1.FeatureArtifacts {
   975  			supportArtifacts = true
   976  			break
   977  		}
   978  	}
   979  
   980  	if supportArtifacts {
   981  		jobOptions.ArtifactRequest = execution.ArtifactRequest
   982  	}
   983  
   984  	workingDir := agent.GetDefaultWorkingDir(executor.VolumeDir, execution)
   985  	if execution.Content != nil && execution.Content.Repository != nil && execution.Content.Repository.WorkingDir != "" {
   986  		workingDir = filepath.Join(executor.VolumeDir, "repo", execution.Content.Repository.WorkingDir)
   987  	}
   988  
   989  	jobOptions.WorkingDir = workingDir
   990  	jobOptions.APIURI = apiURI
   991  
   992  	jobOptions.SlavePodTemplate = templates.Slave
   993  	if options.Request.SlavePodRequest != nil && options.Request.SlavePodRequest.PodTemplateReference != "" {
   994  		template, err := templatesClient.Get(options.Request.SlavePodRequest.PodTemplateReference)
   995  		if err != nil {
   996  			return jobOptions, err
   997  		}
   998  
   999  		if template.Spec.Type_ != nil && testkube.TemplateType(*template.Spec.Type_) == testkube.POD_TemplateType {
  1000  			jobOptions.SlavePodTemplate = template.Spec.Body
  1001  		} else {
  1002  			log.Warnw("Not matched template type", "template", options.Request.SlavePodRequest.PodTemplateReference)
  1003  		}
  1004  	}
  1005  
  1006  	if options.ExecutorSpec.Slaves != nil {
  1007  		slvesConfigs, err := json.Marshal(executor.GetSlavesConfigs(
  1008  			images.Init,
  1009  			*options.ExecutorSpec.Slaves,
  1010  			jobOptions.Registry,
  1011  			jobOptions.ServiceAccountName,
  1012  			jobOptions.CertificateSecret,
  1013  			jobOptions.SlavePodTemplate,
  1014  			jobOptions.ImagePullSecrets,
  1015  			jobOptions.EnvConfigMaps,
  1016  			jobOptions.EnvSecrets,
  1017  			int(jobOptions.ActiveDeadlineSeconds),
  1018  			testkube.Features(options.Features),
  1019  			natsURI,
  1020  			images.LogSidecar,
  1021  			jobOptions.RunnerCustomCASecret,
  1022  		))
  1023  
  1024  		if err != nil {
  1025  			return jobOptions, err
  1026  		}
  1027  
  1028  		if jobOptions.Variables == nil {
  1029  			jobOptions.Variables = make(map[string]testkube.Variable)
  1030  		}
  1031  
  1032  		jobOptions.Variables[executor.SlavesConfigsEnv] = testkube.NewBasicVariable(executor.SlavesConfigsEnv, string(slvesConfigs))
  1033  	}
  1034  
  1035  	jobOptions.PvcTemplate = templates.PVC
  1036  	if options.Request.PvcTemplateReference != "" {
  1037  		template, err := templatesClient.Get(options.Request.PvcTemplateReference)
  1038  		if err != nil {
  1039  			return jobOptions, err
  1040  		}
  1041  
  1042  		if template.Spec.Type_ != nil && testkube.TemplateType(*template.Spec.Type_) == testkube.PVC_TemplateType {
  1043  			jobOptions.PvcTemplate = template.Spec.Body
  1044  		} else {
  1045  			log.Warnw("Not matched template type", "template", options.Request.PvcTemplateReference)
  1046  		}
  1047  	}
  1048  
  1049  	// used for adding custom certificates for Agent (gRPC) API
  1050  	jobOptions.AgentAPITLSSecret = options.AgentAPITLSSecret
  1051  
  1052  	return
  1053  }
  1054  
  1055  func NewPVCOptionsFromJobOptions(options JobOptions, defaultStorageClassName string) PVCOptions {
  1056  	return PVCOptions{
  1057  		Name:                    options.Name,
  1058  		Namespace:               options.Namespace,
  1059  		PvcTemplate:             options.PvcTemplate,
  1060  		PvcTemplateExtensions:   options.PvcTemplateExtensions,
  1061  		ArtifactRequest:         options.ArtifactRequest,
  1062  		DefaultStorageClassName: defaultStorageClassName,
  1063  	}
  1064  }