github.com/munnerz/test-infra@v0.0.0-20190108210205-ce3d181dc989/kubetest/process/process.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package process
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/xml"
    22  	"fmt"
    23  	"io"
    24  	"log"
    25  	"os"
    26  	"os/exec"
    27  	"os/signal"
    28  	"path/filepath"
    29  	"strings"
    30  	"sync"
    31  	"syscall"
    32  	"time"
    33  
    34  	"k8s.io/test-infra/kubetest/util"
    35  )
    36  
    37  // Control can commands until a timeout is reached, at which point it signals and then terminates them.
    38  type Control struct {
    39  	termLock    *sync.RWMutex
    40  	terminated  bool
    41  	intLock     *sync.RWMutex
    42  	interrupted bool
    43  
    44  	Timeout   time.Duration
    45  	Interrupt *time.Timer
    46  	Terminate *time.Timer
    47  
    48  	verbose bool
    49  }
    50  
    51  // NewControl constructs a Control with the specified arguments, instiating other necessary fields.
    52  func NewControl(timeout time.Duration, interrupt, terminate *time.Timer, verbose bool) *Control {
    53  	return &Control{
    54  		termLock:    new(sync.RWMutex),
    55  		terminated:  false,
    56  		intLock:     new(sync.RWMutex),
    57  		interrupted: false,
    58  		Timeout:     timeout,
    59  		Interrupt:   interrupt,
    60  		Terminate:   terminate,
    61  		verbose:     verbose,
    62  	}
    63  }
    64  
    65  // WriteXML creates a util.TestCase{} junit_runner.xml file inside the dump dir.
    66  func (c *Control) WriteXML(suite *util.TestSuite, dump string, start time.Time) {
    67  	// Note whether timeout occurred
    68  	tc := util.TestCase{
    69  		Name:      "Timeout",
    70  		ClassName: "e2e.go",
    71  		Time:      c.Timeout.Seconds(),
    72  	}
    73  	if c.isInterrupted() {
    74  		tc.Failure = "kubetest --timeout triggered"
    75  		suite.Failures++
    76  	}
    77  	suite.Cases = append(suite.Cases, tc)
    78  	// Write xml
    79  	suite.Time = time.Since(start).Seconds()
    80  	out, err := xml.MarshalIndent(&suite, "", "    ")
    81  	if err != nil {
    82  		log.Fatalf("Could not marshal XML: %s", err)
    83  	}
    84  	path := filepath.Join(dump, "junit_runner.xml")
    85  	f, err := os.Create(path)
    86  	if err != nil {
    87  		log.Fatalf("Could not create file: %s", err)
    88  	}
    89  	defer f.Close()
    90  	if _, err := f.WriteString(xml.Header); err != nil {
    91  		log.Fatalf("Error writing XML header: %s", err)
    92  	}
    93  	if _, err := f.Write(out); err != nil {
    94  		log.Fatalf("Error writing XML data: %s", err)
    95  	}
    96  	log.Printf("Saved XML output to %s.", path)
    97  }
    98  
    99  // XMLWrap returns f(), adding junit xml testcase result for name
   100  func (c *Control) XMLWrap(suite *util.TestSuite, name string, f func() error) error {
   101  	alreadyInterrupted := c.isInterrupted()
   102  	start := time.Now()
   103  	err := f()
   104  	duration := time.Since(start)
   105  	tc := util.TestCase{
   106  		Name:      name,
   107  		ClassName: "e2e.go",
   108  		Time:      duration.Seconds(),
   109  	}
   110  	if err == nil && !alreadyInterrupted && c.isInterrupted() {
   111  		err = fmt.Errorf("kubetest interrupted during step %s", name)
   112  	}
   113  	if err != nil {
   114  		if !alreadyInterrupted {
   115  			tc.Failure = err.Error()
   116  		} else {
   117  			tc.Skipped = err.Error()
   118  		}
   119  		suite.Failures++
   120  	}
   121  
   122  	suite.Cases = append(suite.Cases, tc)
   123  	suite.Tests++
   124  	return err
   125  }
   126  
   127  func (c *Control) isTerminated() bool {
   128  	c.termLock.RLock()
   129  	t := c.terminated
   130  	c.termLock.RUnlock()
   131  	return t
   132  }
   133  
   134  func (c *Control) isInterrupted() bool {
   135  	c.intLock.RLock()
   136  	i := c.interrupted
   137  	c.intLock.RUnlock()
   138  	return i
   139  }
   140  
   141  // FinishRunning returns cmd.Wait() and/or times out.
   142  func (c *Control) FinishRunning(cmd *exec.Cmd) error {
   143  	stepName := strings.Join(cmd.Args, " ")
   144  	if c.isTerminated() {
   145  		return fmt.Errorf("skipped %s (kubetest is terminated)", stepName)
   146  	}
   147  	if cmd.Stdout == nil && c.verbose {
   148  		cmd.Stdout = os.Stdout
   149  	}
   150  	if cmd.Stderr == nil && c.verbose {
   151  		cmd.Stderr = os.Stderr
   152  	}
   153  	log.Printf("Running: %v", stepName)
   154  	defer func(start time.Time) {
   155  		log.Printf("Step '%s' finished in %s", stepName, time.Since(start))
   156  	}(time.Now())
   157  
   158  	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
   159  	if err := cmd.Start(); err != nil {
   160  		return fmt.Errorf("error starting %v: %v", stepName, err)
   161  	}
   162  
   163  	finished := make(chan error)
   164  
   165  	sigChannel := make(chan os.Signal, 1)
   166  	signal.Notify(sigChannel, os.Interrupt)
   167  
   168  	go func() {
   169  		finished <- cmd.Wait()
   170  	}()
   171  
   172  	for {
   173  		select {
   174  		case <-sigChannel:
   175  			log.Printf("Killing %v(%v) after receiving signal", stepName, -cmd.Process.Pid)
   176  
   177  			pgid := getGroupPid(cmd.Process.Pid)
   178  
   179  			if err := syscall.Kill(-pgid, syscall.SIGKILL); err != nil {
   180  				log.Printf("Failed to kill %v: %v", stepName, err)
   181  			}
   182  
   183  		case <-c.Terminate.C:
   184  			c.termLock.Lock()
   185  			c.terminated = true
   186  			c.termLock.Unlock()
   187  			c.Terminate.Reset(time.Duration(-1)) // Kill subsequent processes immediately.
   188  			pgid := getGroupPid(cmd.Process.Pid)
   189  			if err := syscall.Kill(-pgid, syscall.SIGKILL); err != nil {
   190  				log.Printf("Failed to kill %v: %v", stepName, err)
   191  			}
   192  			if err := cmd.Process.Kill(); err != nil {
   193  				log.Printf("Failed to terminate %s (terminated 15m after interrupt): %v", stepName, err)
   194  			}
   195  		case <-c.Interrupt.C:
   196  			c.intLock.Lock()
   197  			c.interrupted = true
   198  			c.intLock.Unlock()
   199  			log.Printf("Abort after %s timeout during %s. Will terminate in another 15m", c.Timeout, stepName)
   200  			c.Terminate.Reset(15 * time.Minute)
   201  			pgid := getGroupPid(cmd.Process.Pid)
   202  			if err := syscall.Kill(-pgid, syscall.SIGABRT); err != nil {
   203  				log.Printf("Failed to abort %s. Will terminate immediately: %v", stepName, err)
   204  				syscall.Kill(-pgid, syscall.SIGTERM)
   205  				cmd.Process.Kill()
   206  			}
   207  		case err := <-finished:
   208  			if err != nil {
   209  				var suffix string
   210  				if c.isTerminated() {
   211  					suffix = " (terminated)"
   212  				} else if c.isInterrupted() {
   213  					suffix = " (interrupted)"
   214  				}
   215  				return fmt.Errorf("error during %s%s: %v", stepName, suffix, err)
   216  			}
   217  			return err
   218  		}
   219  	}
   220  }
   221  
   222  type cmdExecResult struct {
   223  	stepName string
   224  	output   string
   225  	execTime time.Duration
   226  	err      error
   227  }
   228  
   229  // executeParallelCommand executes a given command and send output and error via channel
   230  func (c *Control) executeParallelCommand(cmd *exec.Cmd, resChan chan cmdExecResult, termChan, intChan chan struct{}) {
   231  	stepName := strings.Join(cmd.Args, " ")
   232  	stdout := bytes.Buffer{}
   233  	cmd.Stdout = &stdout
   234  	cmd.Stderr = &stdout
   235  
   236  	start := time.Now()
   237  	log.Printf("Running: %v in parallel", stepName)
   238  
   239  	if c.isTerminated() {
   240  		resChan <- cmdExecResult{stepName: stepName, output: stdout.String(), execTime: time.Since(start), err: fmt.Errorf("skipped %s (kubetest is terminated)", stepName)}
   241  		return
   242  	}
   243  
   244  	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
   245  	if err := cmd.Start(); err != nil {
   246  		resChan <- cmdExecResult{stepName: stepName, output: stdout.String(), execTime: time.Since(start), err: fmt.Errorf("error starting %v: %v", stepName, err)}
   247  		return
   248  	}
   249  
   250  	finished := make(chan error)
   251  	go func() {
   252  		finished <- cmd.Wait()
   253  	}()
   254  
   255  	for {
   256  		select {
   257  		case err := <-finished:
   258  			if err != nil {
   259  				var suffix string
   260  				if c.isTerminated() {
   261  					suffix = " (terminated)"
   262  				} else if c.isInterrupted() {
   263  					suffix = " (interrupted)"
   264  				}
   265  				err = fmt.Errorf("error during %s%s: %v", stepName, suffix, err)
   266  			}
   267  			resChan <- cmdExecResult{stepName: stepName, output: stdout.String(), execTime: time.Since(start), err: err}
   268  			return
   269  
   270  		case <-termChan:
   271  			pgid := getGroupPid(cmd.Process.Pid)
   272  			syscall.Kill(-pgid, syscall.SIGKILL)
   273  			if err := cmd.Process.Kill(); err != nil {
   274  				log.Printf("Failed to terminate %s (terminated 15m after interrupt): %v", strings.Join(cmd.Args, " "), err)
   275  			}
   276  
   277  		case <-intChan:
   278  			log.Printf("Abort after %s timeout during %s. Will terminate in another 15m", c.Timeout, strings.Join(cmd.Args, " "))
   279  			pgid := getGroupPid(cmd.Process.Pid)
   280  			if err := syscall.Kill(-pgid, syscall.SIGABRT); err != nil {
   281  				log.Printf("Failed to abort %s. Will terminate immediately: %v", strings.Join(cmd.Args, " "), err)
   282  				syscall.Kill(-pgid, syscall.SIGTERM)
   283  				cmd.Process.Kill()
   284  			}
   285  		}
   286  	}
   287  }
   288  
   289  // FinishRunningParallel executes multiple commands in parallel
   290  func (c *Control) FinishRunningParallel(cmds ...*exec.Cmd) error {
   291  	var wg sync.WaitGroup
   292  	resultChan := make(chan cmdExecResult, len(cmds))
   293  	termChan := make(chan struct{}, len(cmds))
   294  	intChan := make(chan struct{}, len(cmds))
   295  
   296  	for _, cmd := range cmds {
   297  		wg.Add(1)
   298  		go func(cmd *exec.Cmd) {
   299  			defer wg.Done()
   300  			c.executeParallelCommand(cmd, resultChan, termChan, intChan)
   301  		}(cmd)
   302  	}
   303  
   304  	go func() {
   305  		wg.Wait()
   306  		close(resultChan)
   307  	}()
   308  
   309  	cmdFailed := false
   310  	for {
   311  		select {
   312  		case <-c.Terminate.C:
   313  			c.termLock.Lock()
   314  			c.terminated = true
   315  			c.termLock.Unlock()
   316  			c.Terminate.Reset(time.Duration(0))
   317  			select {
   318  			case <-termChan:
   319  			default:
   320  				close(termChan)
   321  			}
   322  
   323  		case <-c.Interrupt.C:
   324  			c.intLock.Lock()
   325  			c.interrupted = true
   326  			c.intLock.Unlock()
   327  			c.Terminate.Reset(15 * time.Minute)
   328  			close(intChan)
   329  
   330  		case result, ok := <-resultChan:
   331  			if !ok {
   332  				if cmdFailed {
   333  					return fmt.Errorf("one or more commands failed")
   334  				}
   335  				return nil
   336  			}
   337  			log.Print(result.output)
   338  			if result.err != nil {
   339  				cmdFailed = true
   340  			}
   341  			log.Printf("Step '%s' finished in %s", result.stepName, result.execTime)
   342  		}
   343  	}
   344  }
   345  
   346  // InputCommand returns exec.Command(cmd, args...) while calling .StdinPipe().WriteString(input)
   347  func (c *Control) InputCommand(input, cmd string, args ...string) (*exec.Cmd, error) {
   348  	command := exec.Command(cmd, args...)
   349  	w, e := command.StdinPipe()
   350  	if e != nil {
   351  		return nil, e
   352  	}
   353  	go func() {
   354  		if _, e = io.WriteString(w, input); e != nil {
   355  			log.Printf("Failed to write all %d chars to %s: %v", len(input), cmd, e)
   356  		}
   357  		if e = w.Close(); e != nil {
   358  			log.Printf("Failed to close stdin for %s: %v", cmd, e)
   359  		}
   360  	}()
   361  	return command, nil
   362  }
   363  
   364  // Output returns cmd.Output(), potentially timing out in the process.
   365  func (c *Control) Output(cmd *exec.Cmd) ([]byte, error) {
   366  	var stdout bytes.Buffer
   367  	cmd.Stdout = &stdout
   368  	err := c.FinishRunning(cmd)
   369  	return stdout.Bytes(), err
   370  }
   371  
   372  // NoOutput ignores all output from the command, potentially timing out in the process.
   373  func (c *Control) NoOutput(cmd *exec.Cmd) error {
   374  	var void bytes.Buffer
   375  	cmd.Stdout = &void
   376  	cmd.Stderr = &void
   377  	return c.FinishRunning(cmd)
   378  }
   379  
   380  // getGroupPid gets the process group to kill the entire main/child process
   381  // if Getpgid return error use the current process Pid
   382  func getGroupPid(pid int) int {
   383  	pgid, err := syscall.Getpgid(pid)
   384  	if err != nil {
   385  		log.Printf("Failed to get the group process from %v: %v", pid, err)
   386  		return pid
   387  	}
   388  	return pgid
   389  }