github.com/openshift/installer@v1.4.17/pkg/clusterapi/internal/process/process.go (about) 1 package process 2 3 import ( 4 "context" 5 "crypto/tls" 6 "fmt" 7 "io" 8 "net" 9 "net/http" 10 "net/url" 11 "os/exec" 12 "path" 13 "regexp" 14 "sync" 15 "syscall" 16 "time" 17 18 "github.com/sirupsen/logrus" 19 ) 20 21 // ListenAddr represents some listening address and port. 22 type ListenAddr struct { 23 Address string 24 Port string 25 } 26 27 // URL returns a URL for this address with the given scheme and subpath. 28 func (l *ListenAddr) URL(scheme string, path string) *url.URL { 29 return &url.URL{ 30 Scheme: scheme, 31 Host: l.HostPort(), 32 Path: path, 33 } 34 } 35 36 // HostPort returns the joined host-port pair for this address. 37 func (l *ListenAddr) HostPort() string { 38 return net.JoinHostPort(l.Address, l.Port) 39 } 40 41 // HealthCheck describes the information needed to health-check a process via 42 // some health-check URL. 43 type HealthCheck struct { 44 url.URL 45 46 // HealthCheckPollInterval is the interval which will be used for polling the 47 // endpoint described by Host, Port, and Path. 48 // 49 // If left empty it will default to 100 Milliseconds. 50 PollInterval time.Duration 51 } 52 53 // State define the state of the process. 54 type State struct { 55 Cmd *exec.Cmd 56 57 // HealthCheck describes how to check if this process is up. If we get an http.StatusOK, 58 // we assume the process is ready to operate. 59 // 60 // For example, the /healthz endpoint of the k8s API server, or the /health endpoint of etcd. 61 HealthCheck *HealthCheck 62 63 Dir string 64 Args []string 65 Env []string 66 67 StopTimeout time.Duration 68 StartTimeout time.Duration 69 70 Path string 71 72 // ready holds whether the process is currently in ready state (hit the ready condition) or not. 73 // It will be set to true on a successful `Start()` and set to false on a successful `Stop()` 74 ready bool 75 76 // waitDone is closed when our call to wait finishes up, and indicates that 77 // our process has terminated. 78 waitDone chan struct{} 79 errMu sync.Mutex 80 exitErr error 81 exited bool 82 } 83 84 // Init sets up this process, configuring binary paths if missing, initializing 85 // temporary directories, etc. 86 // 87 // This defaults all defaultable fields. 88 func (ps *State) Init(name string) error { 89 if ps.Path == "" { 90 if name == "" { 91 return fmt.Errorf("must have at least one of name or path") 92 } 93 } 94 95 if ps.StartTimeout == 0 { 96 ps.StartTimeout = 20 * time.Second 97 } 98 99 if ps.StopTimeout == 0 { 100 ps.StopTimeout = 20 * time.Second 101 } 102 return nil 103 } 104 105 type stopChannel chan struct{} 106 107 // CheckFlag checks the help output of this command for the presence of the given flag, specified 108 // without the leading `--` (e.g. `CheckFlag("insecure-port")` checks for `--insecure-port`), 109 // returning true if the flag is present. 110 func (ps *State) CheckFlag(flag string) (bool, error) { 111 cmd := exec.Command(ps.Path, "--help") //nolint:gosec 112 outContents, err := cmd.CombinedOutput() 113 if err != nil { 114 return false, fmt.Errorf("unable to run command %q to check for flag %q: %w", ps.Path, flag, err) 115 } 116 pat := `(?m)^\s*--` + flag + `\b` // (m --> multi-line --> ^ matches start of line) 117 matched, err := regexp.Match(pat, outContents) 118 if err != nil { 119 return false, fmt.Errorf("unable to check command %q for flag %q in help output: %w", ps.Path, flag, err) 120 } 121 return matched, nil 122 } 123 124 // Start starts the apiserver, waits for it to come up, and returns an error, 125 // if occurred. 126 func (ps *State) Start(ctx context.Context, stdout io.Writer, stderr io.Writer) (err error) { 127 if ps.ready { 128 return nil 129 } 130 131 ps.Cmd = exec.CommandContext(ctx, ps.Path, ps.Args...) //nolint:gosec 132 ps.Cmd.Env = append(ps.Cmd.Environ(), ps.Env...) 133 ps.Cmd.Stdout = stdout 134 ps.Cmd.Stderr = stderr 135 ps.Cmd.Dir = ps.Dir 136 ps.Cmd.SysProcAttr = &syscall.SysProcAttr{ 137 Setpgid: true, 138 } 139 140 ready := make(chan bool) 141 timedOut := time.After(ps.StartTimeout) 142 143 pollerStopCh := make(stopChannel) 144 if ps.HealthCheck != nil { 145 go pollURLUntilOK(ps.HealthCheck.URL, ps.HealthCheck.PollInterval, ready, pollerStopCh) 146 } else { 147 // Assume that if we're not health-checking, we're ready to go. 148 close(ready) 149 } 150 151 ps.waitDone = make(chan struct{}) 152 if err := ps.Cmd.Start(); err != nil { 153 ps.errMu.Lock() 154 defer ps.errMu.Unlock() 155 ps.exited = true 156 return err 157 } 158 go func() { 159 defer close(ps.waitDone) 160 err := ps.Cmd.Wait() 161 162 ps.errMu.Lock() 163 defer ps.errMu.Unlock() 164 ps.exitErr = err 165 ps.exited = true 166 }() 167 168 select { 169 case <-ready: 170 ps.ready = true 171 return nil 172 case <-ps.waitDone: 173 close(pollerStopCh) 174 return fmt.Errorf("timeout waiting for process %s to start successfully "+ 175 "(it may have failed to start, or stopped unexpectedly before becoming ready)", 176 path.Base(ps.Path)) 177 case <-timedOut: 178 close(pollerStopCh) 179 if ps.Cmd != nil { 180 // intentionally ignore this -- we might've crashed, failed to start, etc 181 ps.Cmd.Process.Signal(syscall.SIGTERM) //nolint:errcheck 182 } 183 return fmt.Errorf("timeout waiting for process %s to start", path.Base(ps.Path)) 184 } 185 } 186 187 // Exited returns true if the process exited, and may also 188 // return an error (as per Cmd.Wait) if the process did not 189 // exit with error code 0. 190 func (ps *State) Exited() (bool, error) { 191 ps.errMu.Lock() 192 defer ps.errMu.Unlock() 193 return ps.exited, ps.exitErr 194 } 195 196 func pollURLUntilOK(url url.URL, interval time.Duration, ready chan bool, stopCh stopChannel) { 197 client := &http.Client{ 198 Transport: &http.Transport{ 199 TLSClientConfig: &tls.Config{ 200 // there's probably certs *somewhere*, 201 // but it's fine to just skip validating 202 // them for health checks during testing 203 InsecureSkipVerify: true, //nolint:gosec 204 }, 205 }, 206 } 207 if interval <= 0 { 208 interval = 100 * time.Millisecond 209 } 210 for { 211 res, err := client.Get(url.String()) 212 if err == nil { 213 res.Body.Close() 214 if res.StatusCode == http.StatusOK { 215 ready <- true 216 return 217 } 218 } 219 220 select { 221 case <-stopCh: 222 return 223 default: 224 time.Sleep(interval) 225 } 226 } 227 } 228 229 // Stop stops this process gracefully, waits for its termination. 230 func (ps *State) Stop() error { 231 if ps.Cmd == nil { 232 return nil 233 } 234 if done, err := ps.Exited(); done { 235 if err != nil { 236 logrus.Warnf("process %s exited with error: %v", path.Base(ps.Path), err) 237 } 238 return nil 239 } 240 if err := ps.Cmd.Process.Signal(syscall.SIGTERM); err != nil { 241 return fmt.Errorf("unable to signal for process %s to stop: %w", ps.Path, err) 242 } 243 244 timedOut := time.After(ps.StopTimeout) 245 select { 246 case <-ps.waitDone: 247 break 248 case <-timedOut: 249 if err := ps.Cmd.Process.Signal(syscall.SIGKILL); err != nil { 250 return fmt.Errorf("unable to signal for process %s to stop: %w", ps.Path, err) 251 } 252 return fmt.Errorf("timeout waiting for process %s to stop, sent SIGKILL", path.Base(ps.Path)) 253 } 254 ps.ready = false 255 return nil 256 }