k8s.io/perf-tests/clusterloader2@v0.0.0-20240304094227-64bdb12da87e/pkg/measurement/common/pod_command.go (about)

     1  /*
     2  Copyright 2023 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package common
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"fmt"
    23  	"strings"
    24  	"sync"
    25  	"time"
    26  
    27  	v1 "k8s.io/api/core/v1"
    28  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    29  	"k8s.io/apimachinery/pkg/labels"
    30  	"k8s.io/apimachinery/pkg/runtime"
    31  	"k8s.io/apimachinery/pkg/watch"
    32  	"k8s.io/client-go/kubernetes"
    33  	"k8s.io/client-go/kubernetes/scheme"
    34  	"k8s.io/client-go/rest"
    35  	"k8s.io/client-go/tools/cache"
    36  	"k8s.io/client-go/tools/remotecommand"
    37  	"k8s.io/client-go/util/exec"
    38  	"k8s.io/klog/v2"
    39  	"k8s.io/perf-tests/clusterloader2/pkg/measurement"
    40  	"k8s.io/perf-tests/clusterloader2/pkg/measurement/util/informer"
    41  	"k8s.io/perf-tests/clusterloader2/pkg/util"
    42  )
    43  
    44  const (
    45  	podPeriodicCommandMeasurementName = "PodPeriodicCommand"
    46  )
    47  
    48  type podPeriodicCommandMeasurementCommandParams struct {
    49  	// Name is an identifier for the command.
    50  	Name string
    51  	// Command is the actual Command to execute in a pod.
    52  	Command []string
    53  	// Timeout is the maximum amount of time the command will have to finish.
    54  	Timeout time.Duration
    55  }
    56  
    57  type podPeriodicCommandMeasurementParams struct {
    58  	// LabelSelector is used to select applicable pods to run commands on.
    59  	LabelSelector *labels.Selector
    60  	// Interval is the time between sequential command executions.
    61  	Interval time.Duration
    62  	// Container is the name of the Container to run the command in.
    63  	Container string
    64  	// Limit is the maximum number of pods that will have the commands executed in on every interval.
    65  	Limit int
    66  	// FailOnCommandError controls if the measurement will fail if a command has a non-zero RC during the life of the measurement.
    67  	FailOnCommandError bool
    68  	// FailOnExecError controls if the measurement will fail if an error occurs while trying to execute a command.
    69  	// For example, this would include any error returned from the k8s client-go library.
    70  	FailOnExecError bool
    71  	// FailOnTimeout controls if the measurement will fail if a command times out.
    72  	FailOnTimeout bool
    73  	// Commands is the list of Commands that will be executed in each pod on each interval.
    74  	Commands []*podPeriodicCommandMeasurementCommandParams
    75  }
    76  
    77  func newPodPeriodCommandMeasurementParams(
    78  	params map[string]interface{},
    79  ) (p *podPeriodicCommandMeasurementParams, err error) {
    80  	p = &podPeriodicCommandMeasurementParams{}
    81  
    82  	p.LabelSelector, err = util.GetLabelSelector(params, "labelSelector")
    83  	if err != nil {
    84  		return
    85  	}
    86  	p.Interval, err = util.GetDuration(params, "interval")
    87  	if err != nil {
    88  		return
    89  	}
    90  
    91  	p.Container, err = util.GetString(params, "container")
    92  	if err != nil {
    93  		return
    94  	}
    95  
    96  	p.Limit, err = util.GetInt(params, "limit")
    97  	if err != nil {
    98  		return
    99  	}
   100  
   101  	p.FailOnCommandError, err = util.GetBool(params, "failOnCommandError")
   102  	if err != nil {
   103  		return
   104  	}
   105  
   106  	p.FailOnExecError, err = util.GetBool(params, "failOnExecError")
   107  	if err != nil {
   108  		return
   109  	}
   110  
   111  	p.FailOnTimeout, err = util.GetBool(params, "failOnTimeout")
   112  	if err != nil {
   113  		return
   114  	}
   115  
   116  	var commandMaps []map[string]interface{}
   117  	commandMaps, err = util.GetMapArray(params, "commands")
   118  	if err != nil {
   119  		return
   120  	}
   121  
   122  	p.Commands = []*podPeriodicCommandMeasurementCommandParams{}
   123  	for _, commandMap := range commandMaps {
   124  		c := &podPeriodicCommandMeasurementCommandParams{}
   125  
   126  		c.Name, err = util.GetString(commandMap, "name")
   127  		if err != nil {
   128  			return
   129  		}
   130  
   131  		c.Command, err = util.GetStringArray(commandMap, "command")
   132  		if err != nil {
   133  			return
   134  		}
   135  
   136  		c.Timeout, err = util.GetDuration(commandMap, "timeout")
   137  		if err != nil {
   138  			return
   139  		}
   140  
   141  		p.Commands = append(p.Commands, c)
   142  	}
   143  
   144  	return p, nil
   145  }
   146  
   147  type runCommandResult struct {
   148  	// stdout is the saved stdout from the command. Will be stored as its own measurement summary.
   149  	stdout string
   150  	// stderr is the saved stderr from the command. Will be stored as its own measurement summary.
   151  	stderr string
   152  	// ExitCode is the RC from the command. Defaults to zero and will not be set if the command times
   153  	// out or fails to run.
   154  	ExitCode int `json:"exitCode"`
   155  	// Name is the name of the command that was run, set in the config.
   156  	Name string `json:"name"`
   157  	// Command is the actual command which was executed.
   158  	Command []string `json:"command"`
   159  	// Timeout is the configured timeout duration.
   160  	Timeout string `json:"timeout"`
   161  	// HitTimeout is set to true if the command did not finish before the timeout.
   162  	HitTimeout bool `json:"hitTimeout"`
   163  	// StartTime is the time the command began executing. Isn't super precise.
   164  	StartTime time.Time `json:"startTime"`
   165  	// EndTime is the time the command finished executing. Isn't super precise.
   166  	EndTime time.Time `json:"endTime"`
   167  	// ExecError is set to any go error raised while executing the command.
   168  	ExecError string `json:"execError"`
   169  }
   170  
   171  type runAllCommandsResult struct {
   172  	Pod       string              `json:"pod"`
   173  	Namespace string              `json:"namespace"`
   174  	Container string              `json:"container"`
   175  	Commands  []*runCommandResult `json:"commands"`
   176  }
   177  
   178  type stats struct {
   179  	// Execs is the total number of times a command was executed in a pod.
   180  	Execs int `json:"execs"`
   181  	// ExecErrors is the total number of errors that were observed, not including errors from the executed commands.
   182  	// For example, this includes any errors that are returned by the k8s client-go library.
   183  	ExecErrors int `json:"execErrors"`
   184  	// Timeouts is the number of commands which hit a timeout.
   185  	Timeouts int `json:"timeouts"`
   186  	// NonZeroRCs is the total number of non-zero return codes that were collected from the commands executed.
   187  	NonZeroRCs int `json:"nonZeroRCs"`
   188  	// Measurements is the total number of measurements gathered.
   189  	Measurements int `json:"measurements"`
   190  	// Ticks is the total number of intervals that were executed.
   191  	Ticks int `json:"ticks"`
   192  	// TicksNoPods is the total number of intervals that were skipped because no applicable pods could be found.
   193  	TicksNoPods int `json:"ticksNoPods"`
   194  }
   195  
   196  // podPeriodicCommandMeasurement can be used to continually run commands within pods at an interval.
   197  //
   198  // It works by performing the following on each tick:
   199  //
   200  //  1. Creating a list of pods, with maximum size `params.Limit`, which will execute the configured commands.
   201  //     Pods are selected using `params.LabelSelector`, must contain `params.Container`, and must be in a running
   202  //     state. If no applicable pods are available, then no step is performed for the tick.
   203  //  2. For each pod, spin a goroutine which will run all configured commands in the pod.
   204  //  3. For each command, spin a goroutine to handle running the command.
   205  //  4. If the command returns non-zero, this will be reflected in the associated measurement.
   206  //  5. If a go error occurred while trying to execute the command, this will be reflected in the associated measurement.
   207  //
   208  // The following measurements are produced during the gather step:
   209  //
   210  //  1. One summary measurement, which includes information on all executed commands, such as if the command
   211  //     took longer than `params.Timeout`, the command's RC, and the pod the command was executed on.
   212  //  2. One measurement for each command's non-empty stdout and stderr.
   213  //  3. One measurement containing statistics, such as the number of commands executed, the number of errors observed,
   214  //     and the number of non-zero RCs.
   215  //
   216  // The measurement fails in the following scenarios:
   217  //
   218  //  1. `params.FailOnCommandError` is set to true and a command has a non-zero RC.
   219  //  2. `params.FailOnExecError` is set to true and an error occurs while trying to execute a command.
   220  //  3. `params.FailOnTimeout` is set to true and a command takes longer than its configured timeout to execute.
   221  type podPeriodicCommandMeasurement struct {
   222  	clientset  kubernetes.Interface
   223  	restConfig *rest.Config
   224  	params     *podPeriodicCommandMeasurementParams
   225  	isRunning  bool
   226  	// skipGather signals if the gather step should be skipped, mainly used to bail if param parsing failed.
   227  	skipGather bool
   228  	// stopCh is closed when stop() is called.
   229  	stopCh chan struct{}
   230  	// doneCh is closed after stopCh is closed and all in progress commands have finished.
   231  	doneCh   chan struct{}
   232  	results  []*runAllCommandsResult
   233  	informer cache.SharedInformer
   234  	stats    *stats
   235  	// statsLock needs to be held to modify and read the stats field.
   236  	statsLock *sync.Mutex
   237  }
   238  
   239  // isApplicablePod checks if a pod is a viable candidate for running a command on.
   240  func (p *podPeriodicCommandMeasurement) isApplicablePod(pod *v1.Pod) bool {
   241  	if pod.Status.Phase != v1.PodRunning {
   242  		return false
   243  	}
   244  
   245  	hasContainer := false
   246  	for _, c := range pod.Spec.Containers {
   247  		if c.Name == p.params.Container {
   248  			hasContainer = true
   249  
   250  			break
   251  		}
   252  	}
   253  
   254  	if !hasContainer {
   255  		return false
   256  	}
   257  
   258  	for _, c := range pod.Status.Conditions {
   259  		if c.Type == v1.PodReady && c.Status == v1.ConditionTrue {
   260  			return true
   261  		}
   262  	}
   263  
   264  	return false
   265  }
   266  
   267  // getMaxNPods gets at most N pods from the internal informer's store.
   268  // The informer uses a ThreadSafeStore, which stores objects in a map. When List is called, the map is
   269  // iterated over using range, which ensures a random order.
   270  func (p *podPeriodicCommandMeasurement) getMaxNPods(n int) []*v1.Pod {
   271  	store := p.informer.GetStore()
   272  	pods := []*v1.Pod{}
   273  
   274  	podList := store.List()
   275  	if len(podList) == 0 {
   276  		return pods
   277  	}
   278  
   279  	for _, podInterface := range podList {
   280  		pod := podInterface.(*v1.Pod)
   281  		if !p.isApplicablePod(pod) {
   282  			continue
   283  		}
   284  
   285  		pods = append(pods, pod)
   286  
   287  		if len(pods) >= n {
   288  			return pods
   289  		}
   290  	}
   291  
   292  	return pods
   293  }
   294  
   295  // runCommandInPod runs a specific given command in the specific given pod.
   296  func (p *podPeriodicCommandMeasurement) runCommandInPod(
   297  	pod *v1.Pod, params *podPeriodicCommandMeasurementCommandParams,
   298  ) *runCommandResult {
   299  	klog.V(4).Infof(
   300  		"%s: running named command %s in pod %s/%s",
   301  		podPeriodicCommandMeasurementName, params.Name, pod.Namespace, pod.Name,
   302  	)
   303  
   304  	p.statsLock.Lock()
   305  	p.stats.Execs++
   306  	p.statsLock.Unlock()
   307  
   308  	result := &runCommandResult{
   309  		Name:       params.Name,
   310  		Command:    params.Command,
   311  		Timeout:    params.Timeout.String(),
   312  		ExitCode:   0,
   313  		HitTimeout: false,
   314  	}
   315  
   316  	req := p.clientset.CoreV1().RESTClient().
   317  		Post().
   318  		Namespace(pod.Namespace).
   319  		Resource("pods").
   320  		Name(pod.Name).
   321  		SubResource("exec").
   322  		VersionedParams(&v1.PodExecOptions{
   323  			Container: p.params.Container,
   324  			Command:   params.Command,
   325  			Stdin:     false,
   326  			Stdout:    true,
   327  			Stderr:    true,
   328  			TTY:       false,
   329  		}, scheme.ParameterCodec)
   330  
   331  	executor, err := remotecommand.NewSPDYExecutor(p.restConfig, "POST", req.URL())
   332  	if err != nil {
   333  		result.ExecError = err.Error()
   334  
   335  		return result
   336  	}
   337  
   338  	stdoutBuf := &bytes.Buffer{}
   339  	stderrBuf := &bytes.Buffer{}
   340  	// Holds error returned from executor.Stream.
   341  	execErrChan := make(chan error, 1)
   342  
   343  	// The logic used here to start the executor and the timeout timer isn't super precise, but
   344  	// it is good enough for this use case. It is ok that the timeout timer is started after the
   345  	// executor, since we still guarantee that the timeout is at least the configured value.
   346  	result.StartTime = time.Now()
   347  
   348  	go func() {
   349  		err := executor.Stream(remotecommand.StreamOptions{
   350  			Stdout: stdoutBuf,
   351  			Stderr: stderrBuf,
   352  		})
   353  		execErrChan <- err
   354  	}()
   355  
   356  	// Two different cases: (1) if the command returns before the timeout, and (2) if the timeout
   357  	// triggers before the command is done.
   358  	// The value result.EndTime is set in both cases.
   359  	// If the timeout triggers, then the command isn't actually cancelled. This logic isn't available until
   360  	// client-go version 0.26 (see Executor.StreamWithContext).
   361  	select {
   362  	case err = <-execErrChan:
   363  		result.EndTime = time.Now()
   364  
   365  		if err == nil {
   366  			break
   367  		}
   368  
   369  		switch e := err.(type) {
   370  		case exec.CodeExitError:
   371  			result.ExitCode = e.ExitStatus()
   372  
   373  			p.statsLock.Lock()
   374  			p.stats.NonZeroRCs++
   375  			p.statsLock.Unlock()
   376  
   377  			klog.V(2).Infof(
   378  				"%s: warning: non-zero exit code %d for named command %s in pod %s/%s",
   379  				podPeriodicCommandMeasurementName, result.ExitCode, params.Name, pod.Namespace, pod.Name,
   380  			)
   381  		default:
   382  			result.ExecError = err.Error()
   383  			return result
   384  		}
   385  	case <-time.After(params.Timeout):
   386  		result.EndTime = time.Now()
   387  		result.HitTimeout = true
   388  
   389  		p.statsLock.Lock()
   390  		p.stats.Timeouts++
   391  		p.statsLock.Unlock()
   392  
   393  		klog.V(2).Infof(
   394  			"%s: warning: hit timeout of %s for named command %s in pod %s/%s",
   395  			podPeriodicCommandMeasurementName, params.Timeout.String(), params.Name, pod.Namespace, pod.Name,
   396  		)
   397  	}
   398  
   399  	klog.V(4).Infof(
   400  		"%s: finished running named command %s in pod %s/%s",
   401  		podPeriodicCommandMeasurementName, params.Name, pod.Namespace, pod.Name,
   402  	)
   403  
   404  	result.stdout = stdoutBuf.String()
   405  	result.stderr = stderrBuf.String()
   406  
   407  	return result
   408  }
   409  
   410  // runAllCommandsInPod runs all of the configured commands in the given specific pod.
   411  func (p *podPeriodicCommandMeasurement) runAllCommandsInPod(pod *v1.Pod) *runAllCommandsResult {
   412  	wg := &sync.WaitGroup{}
   413  	commandResultCh := make(chan *runCommandResult, len(p.params.Commands))
   414  
   415  	getRunCommandFunc := func(c *podPeriodicCommandMeasurementCommandParams) func() {
   416  		return func() {
   417  			defer wg.Done()
   418  
   419  			if c := p.runCommandInPod(pod, c); c != nil {
   420  				if c.ExecError != "" {
   421  					p.statsLock.Lock()
   422  					p.stats.ExecErrors++
   423  					p.statsLock.Unlock()
   424  
   425  					klog.V(2).Infof(
   426  						"%s: error while running named command %s on pod %s/%s: %v",
   427  						podPeriodicCommandMeasurementName, c.Name, pod.Namespace, pod.Name, c.ExecError,
   428  					)
   429  				}
   430  
   431  				commandResultCh <- c
   432  			}
   433  		}
   434  	}
   435  
   436  	klog.V(4).Infof(
   437  		"%s: running commands on pod %s/%s", podPeriodicCommandMeasurementName, pod.Namespace, pod.Name,
   438  	)
   439  
   440  	for _, command := range p.params.Commands {
   441  		wg.Add(1)
   442  
   443  		go getRunCommandFunc(command)()
   444  	}
   445  
   446  	wg.Wait()
   447  	close(commandResultCh)
   448  
   449  	klog.V(4).Infof(
   450  		"%s: finished running commands on pod %s/%s", podPeriodicCommandMeasurementName, pod.Namespace, pod.Name,
   451  	)
   452  
   453  	results := &runAllCommandsResult{
   454  		Pod:       pod.Name,
   455  		Namespace: pod.Namespace,
   456  		Container: p.params.Container,
   457  		Commands:  []*runCommandResult{},
   458  	}
   459  
   460  	for c := range commandResultCh {
   461  		results.Commands = append(results.Commands, c)
   462  	}
   463  
   464  	klog.V(8).Infof("%s: %#v", podPeriodicCommandMeasurementName, results)
   465  
   466  	return results
   467  }
   468  
   469  // commandWorker runs the configured commands in applicable pods on the configured interval.
   470  func (p *podPeriodicCommandMeasurement) commandWorker() {
   471  	ticker := time.NewTicker(p.params.Interval)
   472  	defer func() {
   473  		ticker.Stop()
   474  		// Close doneCh to signal the worker has exited.
   475  		close(p.doneCh)
   476  	}()
   477  
   478  	doTick := func() {
   479  		p.statsLock.Lock()
   480  		p.stats.Ticks++
   481  		p.statsLock.Unlock()
   482  
   483  		targetPods := p.getMaxNPods(p.params.Limit)
   484  		if len(targetPods) == 0 {
   485  			klog.V(2).Infof("%s: warning: no pods available to run commands on", podPeriodicCommandMeasurementName)
   486  
   487  			p.statsLock.Lock()
   488  			p.stats.TicksNoPods++
   489  			p.statsLock.Unlock()
   490  
   491  			return
   492  		}
   493  
   494  		wg := &sync.WaitGroup{}
   495  		resultsChan := make(chan *runAllCommandsResult, len(targetPods))
   496  
   497  		for _, pod := range targetPods {
   498  			wg.Add(1)
   499  			go func(targetPod *v1.Pod) {
   500  				defer wg.Done()
   501  				resultsChan <- p.runAllCommandsInPod(targetPod)
   502  			}(pod)
   503  		}
   504  
   505  		wg.Wait()
   506  		close(resultsChan)
   507  
   508  		for r := range resultsChan {
   509  			p.results = append(p.results, r)
   510  		}
   511  	}
   512  
   513  	// Do an initial tick
   514  	doTick()
   515  	for {
   516  		select {
   517  		case <-p.stopCh:
   518  			return
   519  		case <-ticker.C:
   520  			doTick()
   521  		}
   522  	}
   523  }
   524  
   525  func (p *podPeriodicCommandMeasurement) start(
   526  	clientset kubernetes.Interface, restConfig *rest.Config, params *podPeriodicCommandMeasurementParams,
   527  ) error {
   528  	if p.isRunning {
   529  		return fmt.Errorf("%s: measurement already running", podPeriodicCommandMeasurementName)
   530  	}
   531  
   532  	klog.V(2).Infof("%s: starting pod periodic command measurement...", podPeriodicCommandMeasurementName)
   533  
   534  	p.clientset = clientset
   535  	p.restConfig = restConfig
   536  	p.params = params
   537  	p.isRunning = true
   538  	p.skipGather = false
   539  	p.stopCh = make(chan struct{})
   540  	p.doneCh = make(chan struct{})
   541  	p.results = []*runAllCommandsResult{}
   542  	p.stats = &stats{}
   543  	p.statsLock = &sync.Mutex{}
   544  
   545  	labelSelectorString := (*params.LabelSelector).String()
   546  	p.informer = informer.NewInformer(
   547  		&cache.ListWatch{
   548  			ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
   549  				options.LabelSelector = labelSelectorString
   550  				return clientset.CoreV1().Pods("").List(context.TODO(), options)
   551  			},
   552  			WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
   553  				options.LabelSelector = labelSelectorString
   554  				return clientset.CoreV1().Pods("").Watch(context.TODO(), options)
   555  			},
   556  		},
   557  		// Use the informer's internal cache to handle listing pods, no need to handle events.
   558  		func(_, _ interface{}) {},
   559  	)
   560  
   561  	if err := informer.StartAndSync(p.informer, p.stopCh, informerSyncTimeout); err != nil {
   562  		return err
   563  	}
   564  
   565  	go p.commandWorker()
   566  
   567  	return nil
   568  }
   569  
   570  func (p *podPeriodicCommandMeasurement) stop() {
   571  	if p.isRunning {
   572  		p.isRunning = false
   573  		close(p.stopCh)
   574  		// Wait for the commandWorker function to stop.
   575  		<-p.doneCh
   576  	}
   577  }
   578  
   579  func (p *podPeriodicCommandMeasurement) gather() ([]measurement.Summary, error) {
   580  	p.stop()
   581  
   582  	klog.V(2).Infof("%s: gathered %d command results", podPeriodicCommandMeasurementName, len(p.results))
   583  
   584  	// Create summary for all results.
   585  	content, err := util.PrettyPrintJSON(p.results)
   586  	if err != nil {
   587  		// Ignore p.params.FailOnError here, since this is fatal.
   588  		return nil, fmt.Errorf("unable to convert results to JSON: %w", err)
   589  	}
   590  
   591  	measurements := []measurement.Summary{
   592  		measurement.CreateSummary(podPeriodicCommandMeasurementName, "json", content),
   593  	}
   594  
   595  	// Hold error to be returned to signal that the measurement failed, or nil.
   596  	// Should only be non-nil if one of the FailOnXYZ params is set.
   597  	var resultErr error
   598  
   599  	// Create individual results for stdout and stderr.
   600  	// Saving these as a value in a json document can lead to weird issues in reading the data
   601  	// properly, especially if the data is binary, such as for profiling results.
   602  	// Additionally, check for any errors or timeouts that may have occurred.
   603  	for _, r := range p.results {
   604  		getSummaryName := func(c *runCommandResult, suffix string) string {
   605  			return strings.Join(
   606  				[]string{
   607  					podPeriodicCommandMeasurementName, c.StartTime.Format(time.RFC3339), r.Namespace, r.Pod, c.Name, suffix,
   608  				}, "-",
   609  			)
   610  		}
   611  
   612  		for _, c := range r.Commands {
   613  			if c.stdout != "" {
   614  				measurements = append(measurements, measurement.CreateSummary(getSummaryName(c, "stdout"), "txt", c.stdout))
   615  			}
   616  			if c.stderr != "" {
   617  				measurements = append(measurements, measurement.CreateSummary(getSummaryName(c, "stderr"), "txt", c.stderr))
   618  			}
   619  
   620  			// If the result error has already been set, we don't need to set it again.
   621  			if resultErr != nil {
   622  				continue
   623  			}
   624  
   625  			if p.params.FailOnCommandError && c.ExitCode != 0 {
   626  				resultErr = fmt.Errorf(
   627  					"unexpected non-zero RC while executing command %s in pod %s/%s: got RC %d",
   628  					c.Name, r.Namespace, r.Pod, c.ExitCode,
   629  				)
   630  				continue
   631  			}
   632  
   633  			if p.params.FailOnExecError && c.ExecError != "" {
   634  				resultErr = fmt.Errorf(
   635  					"unexpected error while executing command %s in pod %s/%s: %s", c.Name, r.Namespace, r.Pod, c.ExecError,
   636  				)
   637  				continue
   638  			}
   639  
   640  			if p.params.FailOnTimeout && c.HitTimeout {
   641  				resultErr = fmt.Errorf(
   642  					"hit timeout of %s while executing command %s in pod %s/%s",
   643  					c.Timeout, c.Name, r.Namespace, r.Pod,
   644  				)
   645  			}
   646  		}
   647  	}
   648  
   649  	// Create summary for stats.
   650  	p.stats.Measurements = len(measurements) + 1 // Adding another measurement for the stats.
   651  	content, err = util.PrettyPrintJSON(p.stats)
   652  	if err != nil {
   653  		// Ignore p.params.FailOnError here, since this is fatal.
   654  		return nil, fmt.Errorf("unable to convert stats to JSON: %w", err)
   655  	}
   656  
   657  	measurements = append(
   658  		measurements,
   659  		measurement.CreateSummary(
   660  			strings.Join([]string{podPeriodicCommandMeasurementName, "stats"}, "-"), "json", content,
   661  		),
   662  	)
   663  
   664  	// resultErr can only be set if one of the FailOnXYZ params is set.
   665  	if resultErr != nil {
   666  		return measurements, resultErr
   667  	}
   668  
   669  	return measurements, nil
   670  }
   671  
   672  func (*podPeriodicCommandMeasurement) String() string {
   673  	return podPeriodicCommandMeasurementName
   674  }
   675  
   676  func (p *podPeriodicCommandMeasurement) Execute(config *measurement.Config) ([]measurement.Summary, error) {
   677  	action, err := util.GetString(config.Params, "action")
   678  	if err != nil {
   679  		return nil, err
   680  	}
   681  
   682  	switch action {
   683  	case "start":
   684  		params, err := newPodPeriodCommandMeasurementParams(config.Params)
   685  		if err != nil {
   686  			p.skipGather = true
   687  			return nil, err
   688  		}
   689  
   690  		return nil, p.start(
   691  			config.ClusterFramework.GetClientSets().GetClient(), config.ClusterFramework.GetRestClient(), params,
   692  		)
   693  	case "gather":
   694  		if p.skipGather {
   695  			return nil, nil
   696  		}
   697  
   698  		return p.gather()
   699  	default:
   700  		return nil, fmt.Errorf("unknown action %s", action)
   701  	}
   702  }
   703  
   704  func (p *podPeriodicCommandMeasurement) Dispose() {
   705  	p.stop()
   706  }
   707  
   708  func createPodPeriodicCommandMeasurement() measurement.Measurement {
   709  	return &podPeriodicCommandMeasurement{}
   710  }
   711  
   712  func init() {
   713  	if err := measurement.Register(podPeriodicCommandMeasurementName, createPodPeriodicCommandMeasurement); err != nil {
   714  		klog.Fatalf("Cannot register %s: %v", podPeriodicCommandMeasurementName, err)
   715  	}
   716  }