github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/kubetest/process/process.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package process 18 19 import ( 20 "bytes" 21 "encoding/xml" 22 "fmt" 23 "io" 24 "log" 25 "os" 26 "os/exec" 27 "os/signal" 28 "path/filepath" 29 "strings" 30 "sync" 31 "syscall" 32 "time" 33 34 "k8s.io/test-infra/kubetest/util" 35 ) 36 37 // Control can commands until a timeout is reached, at which point it signals and then terminates them. 38 type Control struct { 39 termLock *sync.RWMutex 40 terminated bool 41 intLock *sync.RWMutex 42 interrupted bool 43 44 Timeout time.Duration 45 Interrupt *time.Timer 46 Terminate *time.Timer 47 48 verbose bool 49 } 50 51 // NewControl constructs a Control with the specified arguments, instiating other necessary fields. 52 func NewControl(timeout time.Duration, interrupt, terminate *time.Timer, verbose bool) *Control { 53 return &Control{ 54 termLock: new(sync.RWMutex), 55 terminated: false, 56 intLock: new(sync.RWMutex), 57 interrupted: false, 58 Timeout: timeout, 59 Interrupt: interrupt, 60 Terminate: terminate, 61 verbose: verbose, 62 } 63 } 64 65 // WriteXML creates a util.TestCase{} junit_runner.xml file inside the dump dir. 66 func (c *Control) WriteXML(suite *util.TestSuite, dump string, start time.Time) { 67 // Note whether timeout occurred 68 tc := util.TestCase{ 69 Name: "Timeout", 70 ClassName: "e2e.go", 71 Time: c.Timeout.Seconds(), 72 } 73 if c.isInterrupted() { 74 tc.Failure = "kubetest --timeout triggered" 75 suite.Failures++ 76 } 77 suite.Cases = append(suite.Cases, tc) 78 // Write xml 79 suite.Time = time.Since(start).Seconds() 80 out, err := xml.MarshalIndent(&suite, "", " ") 81 if err != nil { 82 log.Fatalf("Could not marshal XML: %s", err) 83 } 84 path := filepath.Join(dump, "junit_runner.xml") 85 f, err := os.Create(path) 86 if err != nil { 87 log.Fatalf("Could not create file: %s", err) 88 } 89 defer f.Close() 90 if _, err := f.WriteString(xml.Header); err != nil { 91 log.Fatalf("Error writing XML header: %s", err) 92 } 93 if _, err := f.Write(out); err != nil { 94 log.Fatalf("Error writing XML data: %s", err) 95 } 96 log.Printf("Saved XML output to %s.", path) 97 } 98 99 // XMLWrap returns f(), adding junit xml testcase result for name 100 func (c *Control) XMLWrap(suite *util.TestSuite, name string, f func() error) error { 101 alreadyInterrupted := c.isInterrupted() 102 start := time.Now() 103 err := f() 104 duration := time.Since(start) 105 tc := util.TestCase{ 106 Name: name, 107 ClassName: "e2e.go", 108 Time: duration.Seconds(), 109 } 110 if err == nil && !alreadyInterrupted && c.isInterrupted() { 111 err = fmt.Errorf("kubetest interrupted during step %s", name) 112 } 113 if err != nil { 114 if !alreadyInterrupted { 115 tc.Failure = err.Error() 116 } else { 117 tc.Skipped = err.Error() 118 } 119 suite.Failures++ 120 } 121 122 suite.Cases = append(suite.Cases, tc) 123 suite.Tests++ 124 return err 125 } 126 127 func (c *Control) isTerminated() bool { 128 c.termLock.RLock() 129 t := c.terminated 130 c.termLock.RUnlock() 131 return t 132 } 133 134 func (c *Control) isInterrupted() bool { 135 c.intLock.RLock() 136 i := c.interrupted 137 c.intLock.RUnlock() 138 return i 139 } 140 141 // FinishRunning returns cmd.Wait() and/or times out. 142 func (c *Control) FinishRunning(cmd *exec.Cmd) error { 143 stepName := strings.Join(cmd.Args, " ") 144 if c.isTerminated() { 145 return fmt.Errorf("skipped %s (kubetest is terminated)", stepName) 146 } 147 if cmd.Stdout == nil && c.verbose { 148 cmd.Stdout = os.Stdout 149 } 150 if cmd.Stderr == nil && c.verbose { 151 cmd.Stderr = os.Stderr 152 } 153 log.Printf("Running: %v", stepName) 154 defer func(start time.Time) { 155 log.Printf("Step '%s' finished in %s", stepName, time.Since(start)) 156 }(time.Now()) 157 158 cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} 159 if err := cmd.Start(); err != nil { 160 return fmt.Errorf("error starting %v: %v", stepName, err) 161 } 162 163 finished := make(chan error) 164 165 sigChannel := make(chan os.Signal, 1) 166 signal.Notify(sigChannel, os.Interrupt) 167 168 go func() { 169 finished <- cmd.Wait() 170 }() 171 172 for { 173 select { 174 case <-sigChannel: 175 log.Printf("Killing %v(%v) after receiving signal", stepName, -cmd.Process.Pid) 176 177 pgid := getGroupPid(cmd.Process.Pid) 178 179 if err := syscall.Kill(-pgid, syscall.SIGKILL); err != nil { 180 log.Printf("Failed to kill %v: %v", stepName, err) 181 } 182 183 case <-c.Terminate.C: 184 c.termLock.Lock() 185 c.terminated = true 186 c.termLock.Unlock() 187 c.Terminate.Reset(time.Duration(-1)) // Kill subsequent processes immediately. 188 pgid := getGroupPid(cmd.Process.Pid) 189 if err := syscall.Kill(-pgid, syscall.SIGKILL); err != nil { 190 log.Printf("Failed to kill %v: %v", stepName, err) 191 } 192 if err := cmd.Process.Kill(); err != nil { 193 log.Printf("Failed to terminate %s (terminated 15m after interrupt): %v", stepName, err) 194 } 195 case <-c.Interrupt.C: 196 c.intLock.Lock() 197 c.interrupted = true 198 c.intLock.Unlock() 199 log.Printf("Abort after %s timeout during %s. Will terminate in another 15m", c.Timeout, stepName) 200 c.Terminate.Reset(15 * time.Minute) 201 pgid := getGroupPid(cmd.Process.Pid) 202 if err := syscall.Kill(-pgid, syscall.SIGABRT); err != nil { 203 log.Printf("Failed to abort %s. Will terminate immediately: %v", stepName, err) 204 syscall.Kill(-pgid, syscall.SIGTERM) 205 cmd.Process.Kill() 206 } 207 case err := <-finished: 208 if err != nil { 209 var suffix string 210 if c.isTerminated() { 211 suffix = " (terminated)" 212 } else if c.isInterrupted() { 213 suffix = " (interrupted)" 214 } 215 return fmt.Errorf("error during %s%s: %v", stepName, suffix, err) 216 } 217 return err 218 } 219 } 220 } 221 222 type cmdExecResult struct { 223 stepName string 224 output string 225 execTime time.Duration 226 err error 227 } 228 229 // executeParallelCommand executes a given command and send output and error via channel 230 func (c *Control) executeParallelCommand(cmd *exec.Cmd, resChan chan cmdExecResult, termChan, intChan chan struct{}) { 231 stepName := strings.Join(cmd.Args, " ") 232 stdout := bytes.Buffer{} 233 cmd.Stdout = &stdout 234 cmd.Stderr = &stdout 235 236 start := time.Now() 237 log.Printf("Running: %v in parallel", stepName) 238 239 if c.isTerminated() { 240 resChan <- cmdExecResult{stepName: stepName, output: stdout.String(), execTime: time.Since(start), err: fmt.Errorf("skipped %s (kubetest is terminated)", stepName)} 241 return 242 } 243 244 cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} 245 if err := cmd.Start(); err != nil { 246 resChan <- cmdExecResult{stepName: stepName, output: stdout.String(), execTime: time.Since(start), err: fmt.Errorf("error starting %v: %v", stepName, err)} 247 return 248 } 249 250 finished := make(chan error) 251 go func() { 252 finished <- cmd.Wait() 253 }() 254 255 for { 256 select { 257 case err := <-finished: 258 if err != nil { 259 var suffix string 260 if c.isTerminated() { 261 suffix = " (terminated)" 262 } else if c.isInterrupted() { 263 suffix = " (interrupted)" 264 } 265 err = fmt.Errorf("error during %s%s: %v", stepName, suffix, err) 266 } 267 resChan <- cmdExecResult{stepName: stepName, output: stdout.String(), execTime: time.Since(start), err: err} 268 return 269 270 case <-termChan: 271 pgid := getGroupPid(cmd.Process.Pid) 272 syscall.Kill(-pgid, syscall.SIGKILL) 273 if err := cmd.Process.Kill(); err != nil { 274 log.Printf("Failed to terminate %s (terminated 15m after interrupt): %v", strings.Join(cmd.Args, " "), err) 275 } 276 277 case <-intChan: 278 log.Printf("Abort after %s timeout during %s. Will terminate in another 15m", c.Timeout, strings.Join(cmd.Args, " ")) 279 pgid := getGroupPid(cmd.Process.Pid) 280 if err := syscall.Kill(-pgid, syscall.SIGABRT); err != nil { 281 log.Printf("Failed to abort %s. Will terminate immediately: %v", strings.Join(cmd.Args, " "), err) 282 syscall.Kill(-pgid, syscall.SIGTERM) 283 cmd.Process.Kill() 284 } 285 } 286 } 287 } 288 289 // FinishRunningParallel executes multiple commands in parallel 290 func (c *Control) FinishRunningParallel(cmds ...*exec.Cmd) error { 291 var wg sync.WaitGroup 292 resultChan := make(chan cmdExecResult, len(cmds)) 293 termChan := make(chan struct{}, len(cmds)) 294 intChan := make(chan struct{}, len(cmds)) 295 296 for _, cmd := range cmds { 297 wg.Add(1) 298 go func(cmd *exec.Cmd) { 299 defer wg.Done() 300 c.executeParallelCommand(cmd, resultChan, termChan, intChan) 301 }(cmd) 302 } 303 304 go func() { 305 wg.Wait() 306 close(resultChan) 307 }() 308 309 cmdFailed := false 310 for { 311 select { 312 case <-c.Terminate.C: 313 c.termLock.Lock() 314 c.terminated = true 315 c.termLock.Unlock() 316 c.Terminate.Reset(time.Duration(0)) 317 select { 318 case <-termChan: 319 default: 320 close(termChan) 321 } 322 323 case <-c.Interrupt.C: 324 c.intLock.Lock() 325 c.interrupted = true 326 c.intLock.Unlock() 327 c.Terminate.Reset(15 * time.Minute) 328 close(intChan) 329 330 case result, ok := <-resultChan: 331 if !ok { 332 if cmdFailed { 333 return fmt.Errorf("one or more commands failed") 334 } 335 return nil 336 } 337 log.Print(result.output) 338 if result.err != nil { 339 cmdFailed = true 340 } 341 log.Printf("Step '%s' finished in %s", result.stepName, result.execTime) 342 } 343 } 344 } 345 346 // InputCommand returns exec.Command(cmd, args...) while calling .StdinPipe().WriteString(input) 347 func (c *Control) InputCommand(input, cmd string, args ...string) (*exec.Cmd, error) { 348 command := exec.Command(cmd, args...) 349 w, e := command.StdinPipe() 350 if e != nil { 351 return nil, e 352 } 353 go func() { 354 if _, e = io.WriteString(w, input); e != nil { 355 log.Printf("Failed to write all %d chars to %s: %v", len(input), cmd, e) 356 } 357 if e = w.Close(); e != nil { 358 log.Printf("Failed to close stdin for %s: %v", cmd, e) 359 } 360 }() 361 return command, nil 362 } 363 364 // Output returns cmd.Output(), potentially timing out in the process. 365 func (c *Control) Output(cmd *exec.Cmd) ([]byte, error) { 366 var stdout bytes.Buffer 367 cmd.Stdout = &stdout 368 err := c.FinishRunning(cmd) 369 return stdout.Bytes(), err 370 } 371 372 // NoOutput ignores all output from the command, potentially timing out in the process. 373 func (c *Control) NoOutput(cmd *exec.Cmd) error { 374 var void bytes.Buffer 375 cmd.Stdout = &void 376 cmd.Stderr = &void 377 return c.FinishRunning(cmd) 378 } 379 380 // getGroupPid gets the process group to kill the entire main/child process 381 // if Getpgid return error use the current process Pid 382 func getGroupPid(pid int) int { 383 pgid, err := syscall.Getpgid(pid) 384 if err != nil { 385 log.Printf("Failed to get the group process from %v: %v", pid, err) 386 return pid 387 } 388 return pgid 389 }