sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/entrypoint/run.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package entrypoint 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "os/exec" 26 "os/signal" 27 "path/filepath" 28 "strconv" 29 "syscall" 30 "time" 31 32 "github.com/sirupsen/logrus" 33 34 utilerrors "k8s.io/apimachinery/pkg/util/errors" 35 "sigs.k8s.io/prow/pkg/pod-utils/wrapper" 36 ) 37 38 const ( 39 // internalCode is greater than 256 to signify entrypoint 40 // chose the code rather than the command it ran 41 // http://tldp.org/LDP/abs/html/exitcodes.html 42 // 43 // TODO(fejta): consider making all entrypoint-chosen codes internal 44 internalCode = 1000 45 // InternalErrorCode is what we write to the marker file to 46 // indicate that we failed to start the wrapped command 47 InternalErrorCode = 127 48 // AbortedErrorCode is what we write to the marker file to 49 // indicate that we were terminated via a signal. 50 AbortedErrorCode = 130 51 52 // PreviousErrorCode indicates a previous step failed so we 53 // did not run this step. 54 PreviousErrorCode = internalCode + AbortedErrorCode 55 56 // DefaultTimeout is the default timeout for the test 57 // process before SIGINT is sent 58 DefaultTimeout = 120 * time.Minute 59 60 // DefaultGracePeriod is the default timeout for the test 61 // process after SIGINT is sent before SIGKILL is sent 62 DefaultGracePeriod = 15 * time.Second 63 ) 64 65 var ( 66 // errTimedOut is used as the command's error when the command 67 // is terminated after the timeout is reached 68 errTimedOut = errors.New("process timed out") 69 // errAborted is used as the command's error when the command 70 // is shut down by an external signal 71 errAborted = errors.New("process aborted") 72 ) 73 74 // Run executes the test process then writes the exit code to the marker file. 75 // This function returns the status code that should be passed to os.Exit(). 76 func (o Options) Run() int { 77 interrupt := make(chan os.Signal, 1) 78 return o.internalRun(interrupt) 79 } 80 81 func (o Options) internalRun(interrupt chan os.Signal) int { 82 code, err := o.ExecuteProcess(interrupt) 83 if err != nil { 84 logrus.WithError(err).Error("Error executing test process") 85 } 86 if err := o.Mark(code); err != nil { 87 logrus.WithError(err).Error("Error writing exit code to marker file") 88 return InternalErrorCode // we need to mark the real error code to safely return AlwaysZero 89 } 90 if o.AlwaysZero { 91 return 0 92 } 93 return code 94 } 95 96 // ExecuteProcess creates the artifact directory then executes the process as 97 // configured, writing the output to the process log. 98 func (o Options) ExecuteProcess(signaledInterrupt chan os.Signal) (int, error) { 99 if o.ArtifactDir != "" { 100 if err := os.MkdirAll(o.ArtifactDir, os.ModePerm); err != nil { 101 return InternalErrorCode, fmt.Errorf("could not create artifact directory(%s): %w", o.ArtifactDir, err) 102 } 103 } 104 processLogFile, err := os.Create(o.ProcessLog) 105 if err != nil { 106 return InternalErrorCode, fmt.Errorf("could not create process logfile(%s): %w", o.ProcessLog, err) 107 } 108 defer processLogFile.Close() 109 110 output := io.MultiWriter(os.Stdout, processLogFile) 111 logrus.SetOutput(output) 112 defer logrus.SetOutput(os.Stdout) 113 114 // if we get asked to terminate we need to forward 115 // that to the wrapped process as if it timed out 116 interrupt := signaledInterrupt 117 signal.Notify(interrupt, os.Interrupt, syscall.SIGTERM) 118 119 if o.PreviousMarker != "" { 120 ctx, cancel := context.WithCancel(context.Background()) 121 go func() { 122 select { 123 case s := <-interrupt: 124 logrus.Errorf("Received interrupt %s, cancelling...", s) 125 cancel() 126 case <-ctx.Done(): 127 } 128 }() 129 prevMarkerResult := wrapper.WaitForMarkers(ctx, o.PreviousMarker)[o.PreviousMarker] 130 code, err := prevMarkerResult.ReturnCode, prevMarkerResult.Err 131 cancel() // end previous go-routine when not interrupted 132 if err != nil { 133 return InternalErrorCode, fmt.Errorf("wait for previous marker %s: %w", o.PreviousMarker, err) 134 } 135 if code != 0 { 136 logrus.Infof("Skipping as previous step exited %d", code) 137 return PreviousErrorCode, nil 138 } 139 } 140 141 executable := o.Args[0] 142 var arguments []string 143 if len(o.Args) > 1 { 144 arguments = o.Args[1:] 145 } 146 command := exec.Command(executable, arguments...) 147 command.Stderr = output 148 command.Stdout = output 149 if err := command.Start(); err != nil { 150 errs := []error{fmt.Errorf("could not start the process: %w", err)} 151 if _, err := processLogFile.Write([]byte(errs[0].Error())); err != nil { 152 errs = append(errs, err) 153 } 154 return InternalErrorCode, utilerrors.NewAggregate(errs) 155 } 156 157 timeout := optionOrDefault(o.Timeout, DefaultTimeout) 158 gracePeriod := optionOrDefault(o.GracePeriod, DefaultGracePeriod) 159 var commandErr error 160 cancelled, aborted := false, false 161 done := make(chan error) 162 go func() { 163 done <- command.Wait() 164 }() 165 select { 166 case err := <-done: 167 commandErr = err 168 case <-time.After(timeout): 169 logrus.Errorf("Process did not finish before %s timeout", timeout) 170 cancelled = true 171 gracefullyTerminate(command, done, gracePeriod, nil) 172 case s := <-interrupt: 173 logrus.Errorf("Entrypoint received interrupt: %v", s) 174 cancelled = true 175 aborted = true 176 gracefullyTerminate(command, done, gracePeriod, &s) 177 } 178 179 var returnCode int 180 if cancelled { 181 if aborted { 182 commandErr = errAborted 183 if o.PropagateErrorCode { 184 returnCode = command.ProcessState.ExitCode() 185 } else { 186 returnCode = AbortedErrorCode 187 } 188 } else { 189 commandErr = errTimedOut 190 if o.PropagateErrorCode { 191 returnCode = command.ProcessState.ExitCode() 192 } else { 193 returnCode = InternalErrorCode 194 } 195 } 196 } else { 197 if status, ok := command.ProcessState.Sys().(syscall.WaitStatus); ok { 198 returnCode = status.ExitStatus() 199 } else if commandErr == nil { 200 returnCode = 0 201 } else { 202 returnCode = 1 203 } 204 205 if returnCode != 0 { 206 commandErr = fmt.Errorf("wrapped process failed: %w", commandErr) 207 } 208 } 209 return returnCode, commandErr 210 } 211 212 func (o *Options) Mark(exitCode int) error { 213 content := []byte(strconv.Itoa(exitCode)) 214 215 // create temp file in the same directory as the desired marker file 216 dir := filepath.Dir(o.MarkerFile) 217 tmpDir, err := os.MkdirTemp(dir, o.ContainerName) 218 if err != nil { 219 return fmt.Errorf("%s: error creating temp dir: %w", o.ContainerName, err) 220 } 221 tempFile, err := os.CreateTemp(tmpDir, "temp-marker") 222 if err != nil { 223 return fmt.Errorf("could not create temp marker file in %s: %w", tmpDir, err) 224 } 225 // write the exit code to the tempfile, sync to disk and close 226 if _, err = tempFile.Write(content); err != nil { 227 return fmt.Errorf("could not write to temp marker file (%s): %w", tempFile.Name(), err) 228 } 229 if err = tempFile.Sync(); err != nil { 230 return fmt.Errorf("could not sync temp marker file (%s): %w", tempFile.Name(), err) 231 } 232 tempFile.Close() 233 // set desired permission bits, then rename to the desired file name 234 if err = os.Chmod(tempFile.Name(), os.ModePerm); err != nil { 235 return fmt.Errorf("could not chmod (%x) temp marker file (%s): %w", os.ModePerm, tempFile.Name(), err) 236 } 237 if err := os.Rename(tempFile.Name(), o.MarkerFile); err != nil { 238 return fmt.Errorf("could not move marker file to destination path (%s): %w", o.MarkerFile, err) 239 } 240 return nil 241 } 242 243 // optionOrDefault defaults to a value if option 244 // is the zero value 245 func optionOrDefault(option, defaultValue time.Duration) time.Duration { 246 if option == 0 { 247 return defaultValue 248 } 249 250 return option 251 } 252 253 func gracefullyTerminate(command *exec.Cmd, done <-chan error, gracePeriod time.Duration, signal *os.Signal) { 254 if err := command.Process.Signal(os.Interrupt); err != nil { 255 logrus.WithError(err).Error("Could not interrupt process after timeout") 256 } 257 if signal != nil { 258 if err := command.Process.Signal(*signal); err != nil { 259 logrus.WithError(err).Errorf("Could not send signal %v to process after timeout", signal) 260 } 261 } 262 select { 263 case <-done: 264 logrus.Errorf("Process gracefully exited before %s grace period", gracePeriod) 265 // but we ignore the output error as we will want errTimedOut 266 case <-time.After(gracePeriod): 267 logrus.Errorf("Process did not exit before %s grace period", gracePeriod) 268 if err := command.Process.Kill(); err != nil { 269 logrus.WithError(err).Error("Could not kill process after grace period") 270 } 271 } 272 }