github.com/discordapp/buildkite-agent@v2.6.6+incompatible/process/process.go (about)

     1  package process
     2  
     3  // Logic for this file is largely based on:
     4  // https://github.com/jarib/childprocess/blob/783f7a00a1678b5d929062564ef5ae76822dfd62/lib/childprocess/unix/process.rb
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"os/exec"
    14  	"path/filepath"
    15  	"regexp"
    16  	"sync"
    17  	"sync/atomic"
    18  	"syscall"
    19  	"time"
    20  
    21  	"github.com/buildkite/agent/logger"
    22  )
    23  
    24  type Process struct {
    25  	Pid        int
    26  	PTY        bool
    27  	Timestamp  bool
    28  	Script     string
    29  	Env        []string
    30  	ExitStatus string
    31  
    32  	buffer bytes.Buffer
    33  
    34  	command *exec.Cmd
    35  
    36  	// This callback is called when the process offically starts
    37  	StartCallback func()
    38  
    39  	// For every line in the process output, this callback will be called
    40  	// with the contents of the line if its filter returns true.
    41  	LineCallback       func(string)
    42  	LinePreProcessor   func(string) string
    43  	LineCallbackFilter func(string) bool
    44  
    45  	// Running is stored as an int32 so we can use atomic operations to
    46  	// set/get it (it's accessed by multiple goroutines)
    47  	running int32
    48  }
    49  
    50  func (p Process) Create() *Process {
    51  	// Find the script to run
    52  	absolutePath, _ := filepath.Abs(p.Script)
    53  	scriptDirectory := filepath.Dir(absolutePath)
    54  
    55  	// Create the command that will be run
    56  	p.command = exec.Command(absolutePath)
    57  	p.command.Dir = scriptDirectory
    58  
    59  	// Copy the current processes ENV and merge in the new ones. We do this
    60  	// so the sub process gets PATH and stuff. We merge our path in over
    61  	// the top of the current one so the ENV from Buildkite and the agent
    62  	// take precedence over the agent
    63  	currentEnv := os.Environ()
    64  	p.command.Env = append(currentEnv, p.Env...)
    65  
    66  	return &p
    67  }
    68  
    69  var headerExpansionRegex = regexp.MustCompile("^(?:\\^\\^\\^\\s+\\+\\+\\+)$")
    70  
    71  func (p *Process) Start() error {
    72  	var waitGroup sync.WaitGroup
    73  
    74  	lineReaderPipe, lineWriterPipe := io.Pipe()
    75  
    76  	var multiWriter io.Writer
    77  	if p.Timestamp {
    78  		multiWriter = io.MultiWriter(lineWriterPipe)
    79  	} else {
    80  		multiWriter = io.MultiWriter(&p.buffer, lineWriterPipe)
    81  	}
    82  
    83  	logger.Info("Starting to run script: %s", p.command.Path)
    84  
    85  	// Toggle between running in a pty
    86  	if p.PTY {
    87  		pty, err := StartPTY(p.command)
    88  		if err != nil {
    89  			p.ExitStatus = "1"
    90  			return err
    91  		}
    92  
    93  		p.Pid = p.command.Process.Pid
    94  		p.setRunning(true)
    95  
    96  		waitGroup.Add(1)
    97  
    98  		go func() {
    99  			logger.Debug("[Process] Starting to copy PTY to the buffer")
   100  
   101  			// Copy the pty to our buffer. This will block until it
   102  			// EOF's or something breaks.
   103  			_, err = io.Copy(multiWriter, pty)
   104  			if e, ok := err.(*os.PathError); ok && e.Err == syscall.EIO {
   105  				// We can safely ignore this error, because
   106  				// it's just the PTY telling us that it closed
   107  				// successfully.  See:
   108  				// https://github.com/buildkite/agent/pull/34#issuecomment-46080419
   109  				err = nil
   110  			}
   111  
   112  			if err != nil {
   113  				logger.Error("[Process] PTY output copy failed with error: %T: %v", err, err)
   114  			} else {
   115  				logger.Debug("[Process] PTY has finished being copied to the buffer")
   116  			}
   117  
   118  			waitGroup.Done()
   119  		}()
   120  	} else {
   121  		p.command.Stdout = multiWriter
   122  		p.command.Stderr = multiWriter
   123  
   124  		err := p.command.Start()
   125  		if err != nil {
   126  			p.ExitStatus = "1"
   127  			return err
   128  		}
   129  
   130  		p.Pid = p.command.Process.Pid
   131  		p.setRunning(true)
   132  	}
   133  
   134  	logger.Info("[Process] Process is running with PID: %d", p.Pid)
   135  
   136  	// Add the line callback routine to the waitGroup
   137  	waitGroup.Add(1)
   138  
   139  	go func() {
   140  		logger.Debug("[LineScanner] Starting to read lines")
   141  
   142  		reader := bufio.NewReader(lineReaderPipe)
   143  
   144  		var appending []byte
   145  		var lineCallbackWaitGroup sync.WaitGroup
   146  
   147  		for {
   148  			line, isPrefix, err := reader.ReadLine()
   149  			if err != nil {
   150  				if err == io.EOF {
   151  					logger.Debug("[LineScanner] Encountered EOF")
   152  					break
   153  				}
   154  
   155  				logger.Error("[LineScanner] Failed to read: (%T: %v)", err, err)
   156  			}
   157  
   158  			// If isPrefix is true, that means we've got a really
   159  			// long line incoming, and we'll keep appending to it
   160  			// until isPrefix is false (which means the long line
   161  			// has ended.
   162  			if isPrefix && appending == nil {
   163  				logger.Debug("[LineScanner] Line is too long to read, going to buffer it until it finishes")
   164  				// bufio.ReadLine returns a slice which is only valid until the next invocation
   165  				// since it points to its own internal buffer array. To accumulate the entire
   166  				// result we make a copy of the first prefix, and insure there is spare capacity
   167  				// for future appends to minimize the need for resizing on append.
   168  				appending = make([]byte, len(line), (cap(line))*2)
   169  				copy(appending, line)
   170  
   171  				continue
   172  			}
   173  
   174  			// Should we be appending?
   175  			if appending != nil {
   176  				appending = append(appending, line...)
   177  
   178  				// No more isPrefix! Line is finished!
   179  				if !isPrefix {
   180  					logger.Debug("[LineScanner] Finished buffering long line")
   181  					line = appending
   182  
   183  					// Reset appending back to nil
   184  					appending = nil
   185  				} else {
   186  					continue
   187  				}
   188  			}
   189  
   190  			// If we're timestamping this main thread will take
   191  			// the hit of running the regex so we can build up
   192  			// the timestamped buffer without breaking headers,
   193  			// otherwise we let the goroutines take the perf hit.
   194  
   195  			checkedForCallback := false
   196  			lineHasCallback := false
   197  			lineString := p.LinePreProcessor(string(line))
   198  
   199  			// Create the prefixed buffer
   200  			if p.Timestamp {
   201  				lineHasCallback = p.LineCallbackFilter(lineString)
   202  				checkedForCallback = true
   203  				if lineHasCallback || headerExpansionRegex.MatchString(lineString) {
   204  					// Don't timestamp special lines (e.g. header)
   205  					p.buffer.WriteString(fmt.Sprintf("%s\n", line))
   206  				} else {
   207  					currentTime := time.Now().UTC().Format(time.RFC3339)
   208  					p.buffer.WriteString(fmt.Sprintf("[%s] %s\n", currentTime, line))
   209  				}
   210  			}
   211  
   212  			if lineHasCallback || !checkedForCallback {
   213  				lineCallbackWaitGroup.Add(1)
   214  				go func(line string) {
   215  					defer lineCallbackWaitGroup.Done()
   216  					if (checkedForCallback && lineHasCallback) || p.LineCallbackFilter(lineString) {
   217  						p.LineCallback(line)
   218  					}
   219  				}(lineString)
   220  			}
   221  		}
   222  
   223  		// We need to make sure all the line callbacks have finish before
   224  		// finish up the process
   225  		logger.Debug("[LineScanner] Waiting for callbacks to finish")
   226  		lineCallbackWaitGroup.Wait()
   227  
   228  		logger.Debug("[LineScanner] Finished")
   229  		waitGroup.Done()
   230  	}()
   231  
   232  	// Call the StartCallback
   233  	go p.StartCallback()
   234  
   235  	// Wait until the process has finished. The returned error is nil if the command runs,
   236  	// has no problems copying stdin, stdout, and stderr, and exits with a zero exit status.
   237  	waitResult := p.command.Wait()
   238  
   239  	// Close the line writer pipe
   240  	lineWriterPipe.Close()
   241  
   242  	// The process is no longer running at this point
   243  	p.setRunning(false)
   244  
   245  	// Find the exit status of the script
   246  	p.ExitStatus = getExitStatus(waitResult)
   247  
   248  	logger.Info("Process with PID: %d finished with Exit Status: %s", p.Pid, p.ExitStatus)
   249  
   250  	// Sometimes (in docker containers) io.Copy never seems to finish. This is a mega
   251  	// hack around it. If it doesn't finish after 1 second, just continue.
   252  	logger.Debug("[Process] Waiting for routines to finish")
   253  	err := timeoutWait(&waitGroup)
   254  	if err != nil {
   255  		logger.Debug("[Process] Timed out waiting for wait group: (%T: %v)", err, err)
   256  	}
   257  
   258  	// No error occurred so we can return nil
   259  	return nil
   260  }
   261  
   262  func (p *Process) Output() string {
   263  	return p.buffer.String()
   264  }
   265  
   266  func (p *Process) Kill() error {
   267  	// Send a sigterm
   268  	err := p.signal(syscall.SIGTERM)
   269  	if err != nil {
   270  		return err
   271  	}
   272  
   273  	// Make a channel that we'll use as a timeout
   274  	c := make(chan int, 1)
   275  	checking := true
   276  
   277  	// Start a routine that checks to see if the process
   278  	// is still alive.
   279  	go func() {
   280  		for checking {
   281  			logger.Debug("[Process] Checking to see if PID: %d is still alive", p.Pid)
   282  
   283  			foundProcess, err := os.FindProcess(p.Pid)
   284  
   285  			// Can't find the process at all
   286  			if err != nil {
   287  				logger.Debug("[Process] Could not find process with PID: %d", p.Pid)
   288  
   289  				break
   290  			}
   291  
   292  			// We have some information about the process
   293  			if foundProcess != nil {
   294  				processState, err := foundProcess.Wait()
   295  
   296  				if err != nil || processState.Exited() {
   297  					logger.Debug("[Process] Process with PID: %d has exited.", p.Pid)
   298  
   299  					break
   300  				}
   301  			}
   302  
   303  			// Retry in a moment
   304  			sleepTime := time.Duration(1 * time.Second)
   305  			time.Sleep(sleepTime)
   306  		}
   307  
   308  		c <- 1
   309  	}()
   310  
   311  	// Timeout this process after 3 seconds
   312  	select {
   313  	case _ = <-c:
   314  		// Was successfully terminated
   315  	case <-time.After(10 * time.Second):
   316  		// Stop checking in the routine above
   317  		checking = false
   318  
   319  		// Forcefully kill the thing
   320  		err = p.signal(syscall.SIGKILL)
   321  
   322  		if err != nil {
   323  			return err
   324  		}
   325  	}
   326  
   327  	return nil
   328  }
   329  
   330  func (p *Process) signal(sig os.Signal) error {
   331  	if p.command != nil && p.command.Process != nil {
   332  		logger.Debug("[Process] Sending signal: %s to PID: %d", sig.String(), p.Pid)
   333  
   334  		err := p.command.Process.Signal(sig)
   335  		if err != nil {
   336  			logger.Error("[Process] Failed to send signal: %s to PID: %d (%T: %v)", sig.String(), p.Pid, err, err)
   337  			return err
   338  		}
   339  	} else {
   340  		logger.Debug("[Process] No process to signal yet")
   341  	}
   342  
   343  	return nil
   344  }
   345  
   346  // Returns whether or not the process is running
   347  func (p *Process) IsRunning() bool {
   348  	return atomic.LoadInt32(&p.running) != 0
   349  }
   350  
   351  // Sets the running flag of the process
   352  func (p *Process) setRunning(r bool) {
   353  	// Use the atomic package to avoid race conditions when setting the
   354  	// `running` value from multiple routines
   355  	if r {
   356  		atomic.StoreInt32(&p.running, 1)
   357  	} else {
   358  		atomic.StoreInt32(&p.running, 0)
   359  	}
   360  }
   361  
   362  // https://github.com/hnakamur/commango/blob/fe42b1cf82bf536ce7e24dceaef6656002e03743/os/executil/executil.go#L29
   363  // TODO: Can this be better?
   364  func getExitStatus(waitResult error) string {
   365  	exitStatus := -1
   366  
   367  	if waitResult != nil {
   368  		if err, ok := waitResult.(*exec.ExitError); ok {
   369  			if s, ok := err.Sys().(syscall.WaitStatus); ok {
   370  				exitStatus = s.ExitStatus()
   371  			} else {
   372  				logger.Error("[Process] Unimplemented for system where exec.ExitError.Sys() is not syscall.WaitStatus.")
   373  			}
   374  		} else {
   375  			logger.Error("[Process] Unexpected error type in getExitStatus: %#v", waitResult)
   376  		}
   377  	} else {
   378  		exitStatus = 0
   379  	}
   380  
   381  	return fmt.Sprintf("%d", exitStatus)
   382  }
   383  
   384  func timeoutWait(waitGroup *sync.WaitGroup) error {
   385  	// Make a chanel that we'll use as a timeout
   386  	c := make(chan int, 1)
   387  
   388  	// Start waiting for the routines to finish
   389  	go func() {
   390  		waitGroup.Wait()
   391  		c <- 1
   392  	}()
   393  
   394  	select {
   395  	case _ = <-c:
   396  		return nil
   397  	case <-time.After(10 * time.Second):
   398  		return errors.New("Timeout")
   399  	}
   400  
   401  	return nil
   402  }