github.com/openshift/installer@v1.4.17/pkg/clusterapi/internal/process/process.go (about)

     1  package process
     2  
     3  import (
     4  	"context"
     5  	"crypto/tls"
     6  	"fmt"
     7  	"io"
     8  	"net"
     9  	"net/http"
    10  	"net/url"
    11  	"os/exec"
    12  	"path"
    13  	"regexp"
    14  	"sync"
    15  	"syscall"
    16  	"time"
    17  
    18  	"github.com/sirupsen/logrus"
    19  )
    20  
    21  // ListenAddr represents some listening address and port.
    22  type ListenAddr struct {
    23  	Address string
    24  	Port    string
    25  }
    26  
    27  // URL returns a URL for this address with the given scheme and subpath.
    28  func (l *ListenAddr) URL(scheme string, path string) *url.URL {
    29  	return &url.URL{
    30  		Scheme: scheme,
    31  		Host:   l.HostPort(),
    32  		Path:   path,
    33  	}
    34  }
    35  
    36  // HostPort returns the joined host-port pair for this address.
    37  func (l *ListenAddr) HostPort() string {
    38  	return net.JoinHostPort(l.Address, l.Port)
    39  }
    40  
    41  // HealthCheck describes the information needed to health-check a process via
    42  // some health-check URL.
    43  type HealthCheck struct {
    44  	url.URL
    45  
    46  	// HealthCheckPollInterval is the interval which will be used for polling the
    47  	// endpoint described by Host, Port, and Path.
    48  	//
    49  	// If left empty it will default to 100 Milliseconds.
    50  	PollInterval time.Duration
    51  }
    52  
    53  // State define the state of the process.
    54  type State struct {
    55  	Cmd *exec.Cmd
    56  
    57  	// HealthCheck describes how to check if this process is up.  If we get an http.StatusOK,
    58  	// we assume the process is ready to operate.
    59  	//
    60  	// For example, the /healthz endpoint of the k8s API server, or the /health endpoint of etcd.
    61  	HealthCheck *HealthCheck
    62  
    63  	Dir  string
    64  	Args []string
    65  	Env  []string
    66  
    67  	StopTimeout  time.Duration
    68  	StartTimeout time.Duration
    69  
    70  	Path string
    71  
    72  	// ready holds whether the process is currently in ready state (hit the ready condition) or not.
    73  	// It will be set to true on a successful `Start()` and set to false on a successful `Stop()`
    74  	ready bool
    75  
    76  	// waitDone is closed when our call to wait finishes up, and indicates that
    77  	// our process has terminated.
    78  	waitDone chan struct{}
    79  	errMu    sync.Mutex
    80  	exitErr  error
    81  	exited   bool
    82  }
    83  
    84  // Init sets up this process, configuring binary paths if missing, initializing
    85  // temporary directories, etc.
    86  //
    87  // This defaults all defaultable fields.
    88  func (ps *State) Init(name string) error {
    89  	if ps.Path == "" {
    90  		if name == "" {
    91  			return fmt.Errorf("must have at least one of name or path")
    92  		}
    93  	}
    94  
    95  	if ps.StartTimeout == 0 {
    96  		ps.StartTimeout = 20 * time.Second
    97  	}
    98  
    99  	if ps.StopTimeout == 0 {
   100  		ps.StopTimeout = 20 * time.Second
   101  	}
   102  	return nil
   103  }
   104  
   105  type stopChannel chan struct{}
   106  
   107  // CheckFlag checks the help output of this command for the presence of the given flag, specified
   108  // without the leading `--` (e.g. `CheckFlag("insecure-port")` checks for `--insecure-port`),
   109  // returning true if the flag is present.
   110  func (ps *State) CheckFlag(flag string) (bool, error) {
   111  	cmd := exec.Command(ps.Path, "--help") //nolint:gosec
   112  	outContents, err := cmd.CombinedOutput()
   113  	if err != nil {
   114  		return false, fmt.Errorf("unable to run command %q to check for flag %q: %w", ps.Path, flag, err)
   115  	}
   116  	pat := `(?m)^\s*--` + flag + `\b` // (m --> multi-line --> ^ matches start of line)
   117  	matched, err := regexp.Match(pat, outContents)
   118  	if err != nil {
   119  		return false, fmt.Errorf("unable to check command %q for flag %q in help output: %w", ps.Path, flag, err)
   120  	}
   121  	return matched, nil
   122  }
   123  
   124  // Start starts the apiserver, waits for it to come up, and returns an error,
   125  // if occurred.
   126  func (ps *State) Start(ctx context.Context, stdout io.Writer, stderr io.Writer) (err error) {
   127  	if ps.ready {
   128  		return nil
   129  	}
   130  
   131  	ps.Cmd = exec.CommandContext(ctx, ps.Path, ps.Args...) //nolint:gosec
   132  	ps.Cmd.Env = append(ps.Cmd.Environ(), ps.Env...)
   133  	ps.Cmd.Stdout = stdout
   134  	ps.Cmd.Stderr = stderr
   135  	ps.Cmd.Dir = ps.Dir
   136  	ps.Cmd.SysProcAttr = &syscall.SysProcAttr{
   137  		Setpgid: true,
   138  	}
   139  
   140  	ready := make(chan bool)
   141  	timedOut := time.After(ps.StartTimeout)
   142  
   143  	pollerStopCh := make(stopChannel)
   144  	if ps.HealthCheck != nil {
   145  		go pollURLUntilOK(ps.HealthCheck.URL, ps.HealthCheck.PollInterval, ready, pollerStopCh)
   146  	} else {
   147  		// Assume that if we're not health-checking, we're ready to go.
   148  		close(ready)
   149  	}
   150  
   151  	ps.waitDone = make(chan struct{})
   152  	if err := ps.Cmd.Start(); err != nil {
   153  		ps.errMu.Lock()
   154  		defer ps.errMu.Unlock()
   155  		ps.exited = true
   156  		return err
   157  	}
   158  	go func() {
   159  		defer close(ps.waitDone)
   160  		err := ps.Cmd.Wait()
   161  
   162  		ps.errMu.Lock()
   163  		defer ps.errMu.Unlock()
   164  		ps.exitErr = err
   165  		ps.exited = true
   166  	}()
   167  
   168  	select {
   169  	case <-ready:
   170  		ps.ready = true
   171  		return nil
   172  	case <-ps.waitDone:
   173  		close(pollerStopCh)
   174  		return fmt.Errorf("timeout waiting for process %s to start successfully "+
   175  			"(it may have failed to start, or stopped unexpectedly before becoming ready)",
   176  			path.Base(ps.Path))
   177  	case <-timedOut:
   178  		close(pollerStopCh)
   179  		if ps.Cmd != nil {
   180  			// intentionally ignore this -- we might've crashed, failed to start, etc
   181  			ps.Cmd.Process.Signal(syscall.SIGTERM) //nolint:errcheck
   182  		}
   183  		return fmt.Errorf("timeout waiting for process %s to start", path.Base(ps.Path))
   184  	}
   185  }
   186  
   187  // Exited returns true if the process exited, and may also
   188  // return an error (as per Cmd.Wait) if the process did not
   189  // exit with error code 0.
   190  func (ps *State) Exited() (bool, error) {
   191  	ps.errMu.Lock()
   192  	defer ps.errMu.Unlock()
   193  	return ps.exited, ps.exitErr
   194  }
   195  
   196  func pollURLUntilOK(url url.URL, interval time.Duration, ready chan bool, stopCh stopChannel) {
   197  	client := &http.Client{
   198  		Transport: &http.Transport{
   199  			TLSClientConfig: &tls.Config{
   200  				// there's probably certs *somewhere*,
   201  				// but it's fine to just skip validating
   202  				// them for health checks during testing
   203  				InsecureSkipVerify: true, //nolint:gosec
   204  			},
   205  		},
   206  	}
   207  	if interval <= 0 {
   208  		interval = 100 * time.Millisecond
   209  	}
   210  	for {
   211  		res, err := client.Get(url.String())
   212  		if err == nil {
   213  			res.Body.Close()
   214  			if res.StatusCode == http.StatusOK {
   215  				ready <- true
   216  				return
   217  			}
   218  		}
   219  
   220  		select {
   221  		case <-stopCh:
   222  			return
   223  		default:
   224  			time.Sleep(interval)
   225  		}
   226  	}
   227  }
   228  
   229  // Stop stops this process gracefully, waits for its termination.
   230  func (ps *State) Stop() error {
   231  	if ps.Cmd == nil {
   232  		return nil
   233  	}
   234  	if done, err := ps.Exited(); done {
   235  		if err != nil {
   236  			logrus.Warnf("process %s exited with error: %v", path.Base(ps.Path), err)
   237  		}
   238  		return nil
   239  	}
   240  	if err := ps.Cmd.Process.Signal(syscall.SIGTERM); err != nil {
   241  		return fmt.Errorf("unable to signal for process %s to stop: %w", ps.Path, err)
   242  	}
   243  
   244  	timedOut := time.After(ps.StopTimeout)
   245  	select {
   246  	case <-ps.waitDone:
   247  		break
   248  	case <-timedOut:
   249  		if err := ps.Cmd.Process.Signal(syscall.SIGKILL); err != nil {
   250  			return fmt.Errorf("unable to signal for process %s to stop: %w", ps.Path, err)
   251  		}
   252  		return fmt.Errorf("timeout waiting for process %s to stop, sent SIGKILL", path.Base(ps.Path))
   253  	}
   254  	ps.ready = false
   255  	return nil
   256  }