gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/runtimes/proctor/main.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Binary proctor runs the test for a particular runtime. It is meant to be 16 // included in Docker images for all runtime tests. 17 package main 18 19 import ( 20 "flag" 21 "fmt" 22 "log" 23 "os" 24 "strings" 25 "time" 26 27 "golang.org/x/sys/unix" 28 "gvisor.dev/gvisor/test/runtimes/proctor/lib" 29 ) 30 31 var ( 32 runtime = flag.String("runtime", "", "name of runtime") 33 list = flag.Bool("list", false, "list all available tests") 34 testNames = flag.String("tests", "", "run a subset of the available tests") 35 pause = flag.Bool("pause", false, "cause container to pause indefinitely, reaping any zombie children") 36 timeout = flag.Duration("timeout", 90*time.Minute, "batch timeout") 37 perTestTimeout = flag.Duration("per_test_timeout", 20*time.Minute, "per-test timeout (a value of 0 disables per-test timeouts)") 38 runsPerTest = flag.Int("runs_per_test", 1, "number of times to run each test (a value of 0 is the same as a value of 1, i.e. running once)") 39 flakyIsError = flag.Bool("flaky_is_error", true, "if true, when running with multiple --runs_per_test, tests with inconsistent status will result in a failure status code for the batch; if false, they will be considered as passing") 40 flakyShortCircuit = flag.Bool("flaky_short_circuit", true, "if true, when running with multiple --runs_per_test and a test is detected as flaky, exit immediately rather than running all --runs_per_test") 41 ) 42 43 // setNumFilesLimit changes the NOFILE soft rlimit if it is too high. 44 func setNumFilesLimit() error { 45 // In docker containers, the default value of the NOFILE limit is 46 // 1048576. A few runtime tests (e.g. python:test_subprocess) 47 // enumerates all possible file descriptors and these tests can fail by 48 // timeout if the NOFILE limit is too high. On gVisor, syscalls are 49 // slower so these tests will need even more time to pass. 50 const nofile = 32768 51 rLimit := unix.Rlimit{} 52 err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rLimit) 53 if err != nil { 54 return fmt.Errorf("failed to get RLIMIT_NOFILE: %v", err) 55 } 56 if rLimit.Cur > nofile { 57 rLimit.Cur = nofile 58 err := unix.Setrlimit(unix.RLIMIT_NOFILE, &rLimit) 59 if err != nil { 60 return fmt.Errorf("failed to set RLIMIT_NOFILE: %v", err) 61 } 62 } 63 return nil 64 } 65 66 func main() { 67 flag.Parse() 68 69 if *pause { 70 lib.PauseAndReap() 71 panic("pauseAndReap should never return") 72 } 73 74 if *runtime == "" { 75 log.Fatalf("runtime flag must be provided") 76 } 77 78 timer := time.NewTimer(*timeout) 79 80 tr, err := lib.TestRunnerForRuntime(*runtime) 81 if err != nil { 82 log.Fatalf("%v", err) 83 } 84 85 // List tests. 86 if *list { 87 tests, err := tr.ListTests() 88 if err != nil { 89 log.Fatalf("failed to list tests: %v", err) 90 } 91 for _, test := range tests { 92 fmt.Println(test) 93 } 94 return 95 } 96 97 // heartbeat 98 go func() { 99 for { 100 time.Sleep(15 * time.Second) 101 log.Println("Proctor checking in " + time.Now().String()) 102 } 103 }() 104 105 var tests []string 106 if *testNames == "" { 107 // Run every test. 108 tests, err = tr.ListTests() 109 if err != nil { 110 log.Fatalf("failed to get all tests: %v", err) 111 } 112 } else { 113 // Run subset of test. 114 tests = strings.Split(*testNames, ",") 115 } 116 117 if err := setNumFilesLimit(); err != nil { 118 log.Fatalf("%v", err) 119 } 120 121 // Run tests. 122 cmds := tr.TestCmds(tests) 123 done := make(chan struct{}) 124 defer close(done) 125 go func() { 126 select { 127 case <-done: 128 return 129 case <-timer.C: 130 log.Println("The batch timeout duration is exceeded") 131 killed := false 132 for _, cmd := range cmds { 133 p := cmd.Process 134 if p == nil || cmd.ProcessState != nil { 135 continue 136 } 137 pid := p.Pid 138 if pid > 0 { 139 unix.Kill(pid, unix.SIGTERM) 140 killed = true 141 } 142 } 143 if killed { 144 // Let tests to handle signals 145 time.Sleep(5 * time.Second) 146 } 147 panic("FAIL: The batch timeout duration is exceeded") 148 } 149 }() 150 numIterations := *runsPerTest 151 if numIterations == 0 { 152 numIterations = 1 153 } 154 for _, cmd := range cmds { 155 iterations := 0 156 successes := 0 157 var firstFailure error 158 for iteration := 1; iteration <= *runsPerTest; iteration++ { 159 // Make a copy of the command, as the same exec.Cmd object cannot be started multiple times. 160 cmdCopy := *cmd 161 162 // Handle test timeout. 163 testDone := make(chan struct{}) 164 testTimedOutCh := make(chan bool, 1) 165 if *perTestTimeout != 0 { 166 go func() { 167 timer := time.NewTimer(*perTestTimeout) 168 defer timer.Stop() 169 select { 170 case <-timer.C: 171 testTimedOutCh <- true 172 cmdCopy.Process.Kill() 173 case <-done: 174 testTimedOutCh <- false 175 case <-testDone: 176 testTimedOutCh <- false 177 } 178 }() 179 } 180 181 // Run the test. 182 cmdCopy.Stdout, cmdCopy.Stderr = os.Stdout, os.Stderr 183 testErr := cmdCopy.Run() 184 close(testDone) 185 if <-testTimedOutCh { 186 testErr = fmt.Errorf("test timed out after %v", *perTestTimeout) 187 } 188 189 // Tally result. 190 iterations++ 191 if testErr == nil { 192 successes++ 193 } else if firstFailure == nil { 194 firstFailure = testErr 195 } 196 if *flakyShortCircuit && successes > 0 && firstFailure != nil { 197 break 198 } 199 } 200 if successes > 0 && firstFailure != nil { 201 // Test is flaky. 202 if *flakyIsError { 203 log.Fatalf("FLAKY: %v (%d failures out of %d)", firstFailure, iterations-successes, iterations) 204 } else { 205 log.Println(fmt.Sprintf("FLAKY: %v (%d failures out of %d)", firstFailure, iterations-successes, iterations)) 206 } 207 } else if successes == 0 && firstFailure != nil { 208 // Test is 100% failing. 209 log.Fatalf("FAIL: %v", firstFailure) 210 } else if successes > 0 && firstFailure == nil { 211 // Test is 100% succeeding, do nothing. 212 } else { 213 log.Fatalf("Internal logic error") 214 } 215 } 216 }