sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/entrypoint/run.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package entrypoint
    18  
    19  import (
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"os/exec"
    26  	"os/signal"
    27  	"path/filepath"
    28  	"strconv"
    29  	"syscall"
    30  	"time"
    31  
    32  	"github.com/sirupsen/logrus"
    33  
    34  	utilerrors "k8s.io/apimachinery/pkg/util/errors"
    35  	"sigs.k8s.io/prow/pkg/pod-utils/wrapper"
    36  )
    37  
    38  const (
    39  	// internalCode is greater than 256 to signify entrypoint
    40  	// chose the code rather than the command it ran
    41  	// http://tldp.org/LDP/abs/html/exitcodes.html
    42  	//
    43  	// TODO(fejta): consider making all entrypoint-chosen codes internal
    44  	internalCode = 1000
    45  	// InternalErrorCode is what we write to the marker file to
    46  	// indicate that we failed to start the wrapped command
    47  	InternalErrorCode = 127
    48  	// AbortedErrorCode is what we write to the marker file to
    49  	// indicate that we were terminated via a signal.
    50  	AbortedErrorCode = 130
    51  
    52  	// PreviousErrorCode indicates a previous step failed so we
    53  	// did not run this step.
    54  	PreviousErrorCode = internalCode + AbortedErrorCode
    55  
    56  	// DefaultTimeout is the default timeout for the test
    57  	// process before SIGINT is sent
    58  	DefaultTimeout = 120 * time.Minute
    59  
    60  	// DefaultGracePeriod is the default timeout for the test
    61  	// process after SIGINT is sent before SIGKILL is sent
    62  	DefaultGracePeriod = 15 * time.Second
    63  )
    64  
    65  var (
    66  	// errTimedOut is used as the command's error when the command
    67  	// is terminated after the timeout is reached
    68  	errTimedOut = errors.New("process timed out")
    69  	// errAborted is used as the command's error when the command
    70  	// is shut down by an external signal
    71  	errAborted = errors.New("process aborted")
    72  )
    73  
    74  // Run executes the test process then writes the exit code to the marker file.
    75  // This function returns the status code that should be passed to os.Exit().
    76  func (o Options) Run() int {
    77  	interrupt := make(chan os.Signal, 1)
    78  	return o.internalRun(interrupt)
    79  }
    80  
    81  func (o Options) internalRun(interrupt chan os.Signal) int {
    82  	code, err := o.ExecuteProcess(interrupt)
    83  	if err != nil {
    84  		logrus.WithError(err).Error("Error executing test process")
    85  	}
    86  	if err := o.Mark(code); err != nil {
    87  		logrus.WithError(err).Error("Error writing exit code to marker file")
    88  		return InternalErrorCode // we need to mark the real error code to safely return AlwaysZero
    89  	}
    90  	if o.AlwaysZero {
    91  		return 0
    92  	}
    93  	return code
    94  }
    95  
    96  // ExecuteProcess creates the artifact directory then executes the process as
    97  // configured, writing the output to the process log.
    98  func (o Options) ExecuteProcess(signaledInterrupt chan os.Signal) (int, error) {
    99  	if o.ArtifactDir != "" {
   100  		if err := os.MkdirAll(o.ArtifactDir, os.ModePerm); err != nil {
   101  			return InternalErrorCode, fmt.Errorf("could not create artifact directory(%s): %w", o.ArtifactDir, err)
   102  		}
   103  	}
   104  	processLogFile, err := os.Create(o.ProcessLog)
   105  	if err != nil {
   106  		return InternalErrorCode, fmt.Errorf("could not create process logfile(%s): %w", o.ProcessLog, err)
   107  	}
   108  	defer processLogFile.Close()
   109  
   110  	output := io.MultiWriter(os.Stdout, processLogFile)
   111  	logrus.SetOutput(output)
   112  	defer logrus.SetOutput(os.Stdout)
   113  
   114  	// if we get asked to terminate we need to forward
   115  	// that to the wrapped process as if it timed out
   116  	interrupt := signaledInterrupt
   117  	signal.Notify(interrupt, os.Interrupt, syscall.SIGTERM)
   118  
   119  	if o.PreviousMarker != "" {
   120  		ctx, cancel := context.WithCancel(context.Background())
   121  		go func() {
   122  			select {
   123  			case s := <-interrupt:
   124  				logrus.Errorf("Received interrupt %s, cancelling...", s)
   125  				cancel()
   126  			case <-ctx.Done():
   127  			}
   128  		}()
   129  		prevMarkerResult := wrapper.WaitForMarkers(ctx, o.PreviousMarker)[o.PreviousMarker]
   130  		code, err := prevMarkerResult.ReturnCode, prevMarkerResult.Err
   131  		cancel() // end previous go-routine when not interrupted
   132  		if err != nil {
   133  			return InternalErrorCode, fmt.Errorf("wait for previous marker %s: %w", o.PreviousMarker, err)
   134  		}
   135  		if code != 0 {
   136  			logrus.Infof("Skipping as previous step exited %d", code)
   137  			return PreviousErrorCode, nil
   138  		}
   139  	}
   140  
   141  	executable := o.Args[0]
   142  	var arguments []string
   143  	if len(o.Args) > 1 {
   144  		arguments = o.Args[1:]
   145  	}
   146  	command := exec.Command(executable, arguments...)
   147  	command.Stderr = output
   148  	command.Stdout = output
   149  	if err := command.Start(); err != nil {
   150  		errs := []error{fmt.Errorf("could not start the process: %w", err)}
   151  		if _, err := processLogFile.Write([]byte(errs[0].Error())); err != nil {
   152  			errs = append(errs, err)
   153  		}
   154  		return InternalErrorCode, utilerrors.NewAggregate(errs)
   155  	}
   156  
   157  	timeout := optionOrDefault(o.Timeout, DefaultTimeout)
   158  	gracePeriod := optionOrDefault(o.GracePeriod, DefaultGracePeriod)
   159  	var commandErr error
   160  	cancelled, aborted := false, false
   161  	done := make(chan error)
   162  	go func() {
   163  		done <- command.Wait()
   164  	}()
   165  	select {
   166  	case err := <-done:
   167  		commandErr = err
   168  	case <-time.After(timeout):
   169  		logrus.Errorf("Process did not finish before %s timeout", timeout)
   170  		cancelled = true
   171  		gracefullyTerminate(command, done, gracePeriod, nil)
   172  	case s := <-interrupt:
   173  		logrus.Errorf("Entrypoint received interrupt: %v", s)
   174  		cancelled = true
   175  		aborted = true
   176  		gracefullyTerminate(command, done, gracePeriod, &s)
   177  	}
   178  
   179  	var returnCode int
   180  	if cancelled {
   181  		if aborted {
   182  			commandErr = errAborted
   183  			if o.PropagateErrorCode {
   184  				returnCode = command.ProcessState.ExitCode()
   185  			} else {
   186  				returnCode = AbortedErrorCode
   187  			}
   188  		} else {
   189  			commandErr = errTimedOut
   190  			if o.PropagateErrorCode {
   191  				returnCode = command.ProcessState.ExitCode()
   192  			} else {
   193  				returnCode = InternalErrorCode
   194  			}
   195  		}
   196  	} else {
   197  		if status, ok := command.ProcessState.Sys().(syscall.WaitStatus); ok {
   198  			returnCode = status.ExitStatus()
   199  		} else if commandErr == nil {
   200  			returnCode = 0
   201  		} else {
   202  			returnCode = 1
   203  		}
   204  
   205  		if returnCode != 0 {
   206  			commandErr = fmt.Errorf("wrapped process failed: %w", commandErr)
   207  		}
   208  	}
   209  	return returnCode, commandErr
   210  }
   211  
   212  func (o *Options) Mark(exitCode int) error {
   213  	content := []byte(strconv.Itoa(exitCode))
   214  
   215  	// create temp file in the same directory as the desired marker file
   216  	dir := filepath.Dir(o.MarkerFile)
   217  	tmpDir, err := os.MkdirTemp(dir, o.ContainerName)
   218  	if err != nil {
   219  		return fmt.Errorf("%s: error creating temp dir: %w", o.ContainerName, err)
   220  	}
   221  	tempFile, err := os.CreateTemp(tmpDir, "temp-marker")
   222  	if err != nil {
   223  		return fmt.Errorf("could not create temp marker file in %s: %w", tmpDir, err)
   224  	}
   225  	// write the exit code to the tempfile, sync to disk and close
   226  	if _, err = tempFile.Write(content); err != nil {
   227  		return fmt.Errorf("could not write to temp marker file (%s): %w", tempFile.Name(), err)
   228  	}
   229  	if err = tempFile.Sync(); err != nil {
   230  		return fmt.Errorf("could not sync temp marker file (%s): %w", tempFile.Name(), err)
   231  	}
   232  	tempFile.Close()
   233  	// set desired permission bits, then rename to the desired file name
   234  	if err = os.Chmod(tempFile.Name(), os.ModePerm); err != nil {
   235  		return fmt.Errorf("could not chmod (%x) temp marker file (%s): %w", os.ModePerm, tempFile.Name(), err)
   236  	}
   237  	if err := os.Rename(tempFile.Name(), o.MarkerFile); err != nil {
   238  		return fmt.Errorf("could not move marker file to destination path (%s): %w", o.MarkerFile, err)
   239  	}
   240  	return nil
   241  }
   242  
   243  // optionOrDefault defaults to a value if option
   244  // is the zero value
   245  func optionOrDefault(option, defaultValue time.Duration) time.Duration {
   246  	if option == 0 {
   247  		return defaultValue
   248  	}
   249  
   250  	return option
   251  }
   252  
   253  func gracefullyTerminate(command *exec.Cmd, done <-chan error, gracePeriod time.Duration, signal *os.Signal) {
   254  	if err := command.Process.Signal(os.Interrupt); err != nil {
   255  		logrus.WithError(err).Error("Could not interrupt process after timeout")
   256  	}
   257  	if signal != nil {
   258  		if err := command.Process.Signal(*signal); err != nil {
   259  			logrus.WithError(err).Errorf("Could not send signal %v to process after timeout", signal)
   260  		}
   261  	}
   262  	select {
   263  	case <-done:
   264  		logrus.Errorf("Process gracefully exited before %s grace period", gracePeriod)
   265  		// but we ignore the output error as we will want errTimedOut
   266  	case <-time.After(gracePeriod):
   267  		logrus.Errorf("Process did not exit before %s grace period", gracePeriod)
   268  		if err := command.Process.Kill(); err != nil {
   269  			logrus.WithError(err).Error("Could not kill process after grace period")
   270  		}
   271  	}
   272  }