github.com/influxdata/telegraf@v1.30.3/internal/process/process.go (about)

     1  package process
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"os/exec"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	"github.com/influxdata/telegraf"
    15  )
    16  
    17  // Process is a long-running process manager that will restart processes if they stop.
    18  type Process struct {
    19  	Cmd          *exec.Cmd
    20  	Stdin        io.WriteCloser
    21  	Stdout       io.ReadCloser
    22  	Stderr       io.ReadCloser
    23  	ReadStdoutFn func(io.Reader)
    24  	ReadStderrFn func(io.Reader)
    25  	RestartDelay time.Duration
    26  	Log          telegraf.Logger
    27  
    28  	name       string
    29  	args       []string
    30  	envs       []string
    31  	pid        int32
    32  	cancel     context.CancelFunc
    33  	mainLoopWg sync.WaitGroup
    34  }
    35  
    36  // New creates a new process wrapper
    37  func New(command []string, envs []string) (*Process, error) {
    38  	if len(command) == 0 {
    39  		return nil, errors.New("no command")
    40  	}
    41  
    42  	p := &Process{
    43  		RestartDelay: 5 * time.Second,
    44  		name:         command[0],
    45  		args:         []string{},
    46  		envs:         envs,
    47  	}
    48  
    49  	if len(command) > 1 {
    50  		p.args = command[1:]
    51  	}
    52  
    53  	return p, nil
    54  }
    55  
    56  // Start the process. A &Process can only be started once. It will restart itself
    57  // as necessary.
    58  func (p *Process) Start() error {
    59  	ctx, cancel := context.WithCancel(context.Background())
    60  	p.cancel = cancel
    61  
    62  	if err := p.cmdStart(); err != nil {
    63  		return err
    64  	}
    65  
    66  	p.mainLoopWg.Add(1)
    67  	go func() {
    68  		if err := p.cmdLoop(ctx); err != nil {
    69  			p.Log.Errorf("Process quit with message: %v", err)
    70  		}
    71  		p.mainLoopWg.Done()
    72  	}()
    73  
    74  	return nil
    75  }
    76  
    77  // Stop is called when the process isn't needed anymore
    78  func (p *Process) Stop() {
    79  	if p.cancel != nil {
    80  		// signal our intent to shut down and not restart the process
    81  		p.cancel()
    82  	}
    83  	// close stdin so the app can shut down gracefully.
    84  	if err := p.Stdin.Close(); err != nil {
    85  		p.Log.Errorf("Stdin closed with message: %v", err)
    86  	}
    87  	p.mainLoopWg.Wait()
    88  }
    89  
    90  func (p *Process) cmdStart() error {
    91  	p.Cmd = exec.Command(p.name, p.args...)
    92  
    93  	if len(p.envs) > 0 {
    94  		p.Cmd.Env = append(os.Environ(), p.envs...)
    95  	}
    96  
    97  	var err error
    98  	p.Stdin, err = p.Cmd.StdinPipe()
    99  	if err != nil {
   100  		return fmt.Errorf("error opening stdin pipe: %w", err)
   101  	}
   102  
   103  	p.Stdout, err = p.Cmd.StdoutPipe()
   104  	if err != nil {
   105  		return fmt.Errorf("error opening stdout pipe: %w", err)
   106  	}
   107  
   108  	p.Stderr, err = p.Cmd.StderrPipe()
   109  	if err != nil {
   110  		return fmt.Errorf("error opening stderr pipe: %w", err)
   111  	}
   112  
   113  	p.Log.Infof("Starting process: %s %s", p.name, p.args)
   114  
   115  	if err := p.Cmd.Start(); err != nil {
   116  		return fmt.Errorf("error starting process: %w", err)
   117  	}
   118  	atomic.StoreInt32(&p.pid, int32(p.Cmd.Process.Pid))
   119  	return nil
   120  }
   121  
   122  func (p *Process) Pid() int {
   123  	pid := atomic.LoadInt32(&p.pid)
   124  	return int(pid)
   125  }
   126  
   127  // cmdLoop watches an already running process, restarting it when appropriate.
   128  func (p *Process) cmdLoop(ctx context.Context) error {
   129  	for {
   130  		err := p.cmdWait(ctx)
   131  		if isQuitting(ctx) {
   132  			p.Log.Infof("Process %s shut down", p.Cmd.Path)
   133  			return nil
   134  		}
   135  
   136  		p.Log.Errorf("Process %s exited: %v", p.Cmd.Path, err)
   137  		p.Log.Infof("Restarting in %s...", p.RestartDelay)
   138  
   139  		select {
   140  		case <-ctx.Done():
   141  			return nil
   142  		case <-time.After(p.RestartDelay):
   143  			// Continue the loop and restart the process
   144  			if err := p.cmdStart(); err != nil {
   145  				return err
   146  			}
   147  		}
   148  	}
   149  }
   150  
   151  // cmdWait waits for the process to finish.
   152  func (p *Process) cmdWait(ctx context.Context) error {
   153  	var wg sync.WaitGroup
   154  
   155  	if p.ReadStdoutFn == nil {
   156  		p.ReadStdoutFn = defaultReadPipe
   157  	}
   158  	if p.ReadStderrFn == nil {
   159  		p.ReadStderrFn = defaultReadPipe
   160  	}
   161  
   162  	processCtx, processCancel := context.WithCancel(context.Background())
   163  	defer processCancel()
   164  
   165  	wg.Add(1)
   166  	go func() {
   167  		p.ReadStdoutFn(p.Stdout)
   168  		wg.Done()
   169  	}()
   170  
   171  	wg.Add(1)
   172  	go func() {
   173  		p.ReadStderrFn(p.Stderr)
   174  		wg.Done()
   175  	}()
   176  
   177  	wg.Add(1)
   178  	go func() {
   179  		select {
   180  		case <-ctx.Done():
   181  			p.gracefulStop(processCtx, p.Cmd, 5*time.Second)
   182  		case <-processCtx.Done():
   183  		}
   184  		wg.Done()
   185  	}()
   186  
   187  	err := p.Cmd.Wait()
   188  	processCancel()
   189  	wg.Wait()
   190  	return err
   191  }
   192  
   193  func isQuitting(ctx context.Context) bool {
   194  	return ctx.Err() != nil
   195  }
   196  
   197  func defaultReadPipe(r io.Reader) {
   198  	_, _ = io.Copy(io.Discard, r)
   199  }