k8s.io/kubernetes@v1.29.3/test/e2e_node/services/server.go (about) 1 /* 2 Copyright 2016 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package services 18 19 import ( 20 "flag" 21 "fmt" 22 "net/http" 23 "os" 24 "os/exec" 25 "path" 26 "reflect" 27 "strconv" 28 "strings" 29 "syscall" 30 "time" 31 32 "k8s.io/klog/v2" 33 34 "k8s.io/kubernetes/test/e2e/framework" 35 ) 36 37 var serverStartTimeout = flag.Duration("server-start-timeout", time.Second*120, "Time to wait for each server to become healthy.") 38 39 // A server manages a separate server process started and killed with 40 // commands. 41 type server struct { 42 // name is the name of the server, it is only used for logging. 43 name string 44 // startCommand is the command used to start the server 45 startCommand *exec.Cmd 46 // killCommand is the command used to stop the server. It is not required. If it 47 // is not specified, `kill` will be used to stop the server. 48 killCommand *exec.Cmd 49 // restartCommand is the command used to restart the server. If provided, it will be used 50 // instead of startCommand when restarting the server. 51 restartCommand *exec.Cmd 52 // healthCheckUrls is the urls used to check whether the server is ready. 53 healthCheckUrls []string 54 // outFilename is the name of the log file. The stdout and stderr of the server 55 // will be redirected to this file. 56 outFilename string 57 // monitorParent determines whether the server should watch its parent process and exit 58 // if its parent is gone. 59 monitorParent bool 60 // restartOnExit determines whether a restart loop is launched with the server 61 restartOnExit bool 62 // Writing to this channel, if it is not nil, stops the restart loop. 63 // When tearing down a server, you should check for this channel and write to it if it exists. 64 stopRestartingCh chan<- bool 65 // Read from this to confirm that the restart loop has stopped. 66 ackStopRestartingCh <-chan bool 67 // The systemd unit name for the service if it exists. If server is not managed by systemd, field is empty. 68 systemdUnitName string 69 } 70 71 // newServer returns a new server with the given name, commands, health check 72 // URLs, etc. 73 func newServer(name string, start, kill, restart *exec.Cmd, urls []string, outputFileName string, monitorParent, restartOnExit bool, systemdUnitName string) *server { 74 return &server{ 75 name: name, 76 startCommand: start, 77 killCommand: kill, 78 restartCommand: restart, 79 healthCheckUrls: urls, 80 outFilename: outputFileName, 81 monitorParent: monitorParent, 82 restartOnExit: restartOnExit, 83 systemdUnitName: systemdUnitName, 84 } 85 } 86 87 // commandToString format command to string. 88 func commandToString(c *exec.Cmd) string { 89 if c == nil { 90 return "" 91 } 92 return strings.Join(append([]string{c.Path}, c.Args[1:]...), " ") 93 } 94 95 func (s *server) String() string { 96 return fmt.Sprintf("server %q start-command: `%s`, kill-command: `%s`, restart-command: `%s`, health-check: %v, output-file: %q", s.name, 97 commandToString(s.startCommand), commandToString(s.killCommand), commandToString(s.restartCommand), s.healthCheckUrls, s.outFilename) 98 } 99 100 // start starts the server by running its commands, monitors it with a health 101 // check, and ensures that it is restarted if applicable. 102 // 103 // Note: restartOnExit == true requires len(s.healthCheckUrls) > 0 to work properly. 104 func (s *server) start() error { 105 klog.Infof("Starting server %q with command %q", s.name, commandToString(s.startCommand)) 106 errCh := make(chan error) 107 108 // Set up restart channels if the server is configured for restart on exit. 109 var stopRestartingCh, ackStopRestartingCh chan bool 110 if s.restartOnExit { 111 if len(s.healthCheckUrls) == 0 { 112 return fmt.Errorf("tried to start %s which has s.restartOnExit == true, but no health check urls provided", s) 113 } 114 115 stopRestartingCh = make(chan bool) 116 ackStopRestartingCh = make(chan bool) 117 118 s.stopRestartingCh = stopRestartingCh 119 s.ackStopRestartingCh = ackStopRestartingCh 120 } 121 122 // This goroutine actually runs the start command for the server. 123 go func() { 124 defer close(errCh) 125 126 // Create the output filename 127 outPath := path.Join(framework.TestContext.ReportDir, s.outFilename) 128 outfile, err := os.Create(outPath) 129 if err != nil { 130 errCh <- fmt.Errorf("failed to create file %q for `%s` %v", outPath, s, err) 131 return 132 } 133 klog.Infof("Output file for server %q: %v", s.name, outfile.Name()) 134 defer outfile.Close() 135 defer outfile.Sync() 136 137 // Set the command to write the output file 138 s.startCommand.Stdout = outfile 139 s.startCommand.Stderr = outfile 140 141 // If monitorParent is set, set Pdeathsig when starting the server. 142 if s.monitorParent { 143 // Death of this test process should kill the server as well. 144 attrs := &syscall.SysProcAttr{} 145 // Hack to set linux-only field without build tags. 146 deathSigField := reflect.ValueOf(attrs).Elem().FieldByName("Pdeathsig") 147 if deathSigField.IsValid() { 148 deathSigField.Set(reflect.ValueOf(syscall.SIGTERM)) 149 } else { 150 errCh <- fmt.Errorf("failed to set Pdeathsig field (non-linux build)") 151 return 152 } 153 s.startCommand.SysProcAttr = attrs 154 } 155 156 // Start the command 157 err = s.startCommand.Start() 158 if err != nil { 159 errCh <- fmt.Errorf("failed to run %s: %w", s, err) 160 return 161 } 162 if !s.restartOnExit { 163 klog.Infof("Waiting for server %q start command to complete", s.name) 164 // If we aren't planning on restarting, ok to Wait() here to release resources. 165 // Otherwise, we Wait() in the restart loop. 166 err = s.startCommand.Wait() 167 if err != nil { 168 errCh <- fmt.Errorf("failed to run start command for server %q: %w", s.name, err) 169 return 170 } 171 } else { 172 usedStartCmd := true 173 for { 174 klog.Infof("Running health check for service %q", s.name) 175 // Wait for an initial health check to pass, so that we are sure the server started. 176 err := readinessCheck(s.name, s.healthCheckUrls, nil) 177 if err != nil { 178 if usedStartCmd { 179 klog.Infof("Waiting for server %q start command to complete after initial health check failed", s.name) 180 s.startCommand.Wait() // Release resources if necessary. 181 } 182 // This should not happen, immediately stop the e2eService process. 183 klog.Fatalf("Restart loop readinessCheck failed for %q", s.name) 184 } else { 185 klog.Infof("Initial health check passed for service %q", s.name) 186 } 187 188 // Initial health check passed, wait until a health check fails again. 189 stillAlive: 190 for { 191 select { 192 case <-stopRestartingCh: 193 ackStopRestartingCh <- true 194 return 195 case <-time.After(time.Second): 196 for _, url := range s.healthCheckUrls { 197 resp, err := http.Head(url) 198 if err != nil || resp.StatusCode != http.StatusOK { 199 break stillAlive 200 } 201 } 202 } 203 } 204 205 if usedStartCmd { 206 s.startCommand.Wait() // Release resources from last cmd 207 usedStartCmd = false 208 } 209 if s.restartCommand != nil { 210 // Always make a fresh copy of restartCommand before 211 // running, we may have to restart multiple times 212 s.restartCommand = &exec.Cmd{ 213 Path: s.restartCommand.Path, 214 Args: s.restartCommand.Args, 215 Env: s.restartCommand.Env, 216 Dir: s.restartCommand.Dir, 217 Stdin: s.restartCommand.Stdin, 218 Stdout: s.restartCommand.Stdout, 219 Stderr: s.restartCommand.Stderr, 220 ExtraFiles: s.restartCommand.ExtraFiles, 221 SysProcAttr: s.restartCommand.SysProcAttr, 222 } 223 // Run and wait for exit. This command is assumed to have 224 // short duration, e.g. systemctl restart 225 klog.Infof("Restarting server %q with restart command", s.name) 226 err = s.restartCommand.Run() 227 if err != nil { 228 // This should not happen, immediately stop the e2eService process. 229 klog.Fatalf("Restarting server %s with restartCommand failed. Error: %v.", s, err) 230 } 231 } else { 232 s.startCommand = &exec.Cmd{ 233 Path: s.startCommand.Path, 234 Args: s.startCommand.Args, 235 Env: s.startCommand.Env, 236 Dir: s.startCommand.Dir, 237 Stdin: s.startCommand.Stdin, 238 Stdout: s.startCommand.Stdout, 239 Stderr: s.startCommand.Stderr, 240 ExtraFiles: s.startCommand.ExtraFiles, 241 SysProcAttr: s.startCommand.SysProcAttr, 242 } 243 klog.Infof("Restarting server %q with start command", s.name) 244 err = s.startCommand.Start() 245 usedStartCmd = true 246 if err != nil { 247 // This should not happen, immediately stop the e2eService process. 248 klog.Fatalf("Restarting server %s with startCommand failed. Error: %v.", s, err) 249 } 250 } 251 } 252 } 253 }() 254 255 return readinessCheck(s.name, s.healthCheckUrls, errCh) 256 } 257 258 // kill runs the server's kill command. 259 func (s *server) kill() error { 260 klog.Infof("Kill server %q", s.name) 261 name := s.name 262 cmd := s.startCommand 263 264 // If s has a restart loop, turn it off. 265 if s.restartOnExit { 266 s.stopRestartingCh <- true 267 <-s.ackStopRestartingCh 268 } 269 270 if s.killCommand != nil { 271 return s.killCommand.Run() 272 } 273 274 if cmd == nil { 275 return fmt.Errorf("could not kill %q because both `killCommand` and `startCommand` are nil", name) 276 } 277 278 if cmd.Process == nil { 279 klog.V(2).Infof("%q not running", name) 280 return nil 281 } 282 pid := cmd.Process.Pid 283 if pid <= 1 { 284 return fmt.Errorf("invalid PID %d for %q", pid, name) 285 } 286 287 // Attempt to shut down the process in a friendly manner before forcing it. 288 waitChan := make(chan error) 289 go func() { 290 _, err := cmd.Process.Wait() 291 waitChan <- err 292 close(waitChan) 293 }() 294 295 const timeout = 10 * time.Second 296 for _, signal := range []string{"-TERM", "-KILL"} { 297 klog.V(2).Infof("Killing process %d (%s) with %s", pid, name, signal) 298 cmd := exec.Command("kill", signal, strconv.Itoa(pid)) 299 _, err := cmd.Output() 300 if err != nil { 301 klog.Errorf("Error signaling process %d (%s) with %s: %v", pid, name, signal, err) 302 continue 303 } 304 305 select { 306 case err := <-waitChan: 307 if err != nil { 308 return fmt.Errorf("error stopping %q: %w", name, err) 309 } 310 // Success! 311 return nil 312 case <-time.After(timeout): 313 // Continue. 314 } 315 } 316 317 return fmt.Errorf("unable to stop %q", name) 318 } 319 320 func (s *server) stopUnit() error { 321 klog.Infof("Stopping systemd unit for server %q with unit name: %q", s.name, s.systemdUnitName) 322 if s.systemdUnitName != "" { 323 err := exec.Command("sudo", "systemctl", "stop", s.systemdUnitName).Run() 324 if err != nil { 325 return fmt.Errorf("Failed to stop systemd unit name: %q: %w", s.systemdUnitName, err) 326 } 327 } 328 return nil 329 }