github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/citogo/main.go (about) 1 package main 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "errors" 7 "flag" 8 "fmt" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "strings" 13 "time" 14 15 "golang.org/x/sync/errgroup" 16 17 "github.com/keybase/client/go/citogo/types" 18 ) 19 20 type opts struct { 21 Flakes int 22 Fails int 23 Prefix string 24 S3Bucket string 25 ReportLambdaFunction string 26 DirBasename string 27 BuildID string 28 Branch string 29 Parallel int 30 Preserve bool 31 BuildURL string 32 NoCompile bool 33 TestBinary string 34 Timeout string 35 Pause time.Duration 36 } 37 38 func logError(f string, args ...interface{}) { 39 s := fmt.Sprintf(f, args...) 40 if s[len(s)-1] != '\n' { 41 s += "\n" 42 } 43 fmt.Fprintf(os.Stderr, "%s", s) 44 } 45 46 type runner struct { 47 opts opts 48 flakes []string 49 fails []string 50 tests []string 51 } 52 53 func convertBreakingChars(s string) string { 54 // replace either the unix or the DOS directory marker 55 // with an underscore, so as not to break the directory 56 // structure of where we are storing the log 57 s = strings.ReplaceAll(s, "/", "_") 58 s = strings.ReplaceAll(s, "\\", "_") 59 s = strings.ReplaceAll(s, "-", "_") 60 return s 61 } 62 63 func (r *runner) parseArgs() (err error) { 64 flag.IntVar(&r.opts.Flakes, "flakes", 3, "number of allowed flakes") 65 flag.IntVar(&r.opts.Fails, "fails", -1, "number of fails allowed before quitting") 66 flag.IntVar(&r.opts.Parallel, "parallel", 1, "number of tests to run in parallel") 67 flag.StringVar(&r.opts.Prefix, "prefix", "", "test set prefix") 68 flag.StringVar(&r.opts.S3Bucket, "s3bucket", "", "AWS S3 bucket to write failures to") 69 flag.StringVar(&r.opts.ReportLambdaFunction, "report-lambda-function", "", "lambda function to report results to") 70 flag.StringVar(&r.opts.BuildID, "build-id", "", "build ID of the current build") 71 flag.StringVar(&r.opts.Branch, "branch", "", "the branch of the current build") 72 flag.BoolVar(&r.opts.Preserve, "preserve", false, "preserve test binary after done") 73 flag.StringVar(&r.opts.BuildURL, "build-url", "", "URL for this build (in CI mainly)") 74 flag.BoolVar(&r.opts.NoCompile, "no-compile", false, "specify flag if you've pre-compiled the test") 75 flag.StringVar(&r.opts.TestBinary, "test-binary", "", "specify the test binary to run") 76 flag.StringVar(&r.opts.Timeout, "timeout", "60s", "timeout (in seconds) for any one individual test") 77 flag.DurationVar(&r.opts.Pause, "pause", 0, "pause duration between each test (default 0)") 78 flag.Parse() 79 var d string 80 d, err = os.Getwd() 81 if err != nil { 82 return err 83 } 84 r.opts.DirBasename = filepath.Base(d) 85 return nil 86 } 87 88 func (r *runner) compile() error { 89 if r.opts.NoCompile { 90 return nil 91 } 92 fmt.Printf("CMPL: %s\n", r.testerName()) 93 cmd := exec.Command("go", "test", "-c") 94 cmd.Stdout = os.Stdout 95 cmd.Stderr = os.Stderr 96 return cmd.Run() 97 } 98 99 func filter(v []string) []string { 100 var ret []string 101 for _, s := range v { 102 if s != "" { 103 ret = append(ret, s) 104 } 105 } 106 return ret 107 } 108 109 func (r *runner) testerName() string { 110 if r.opts.TestBinary != "" { 111 return r.opts.TestBinary 112 } 113 return fmt.Sprintf(".%c%s.test", os.PathSeparator, r.opts.DirBasename) 114 } 115 116 func (r *runner) listTests() error { 117 cmd := exec.Command(r.testerName(), "-test.list", ".") 118 var out bytes.Buffer 119 cmd.Stdout = &out 120 err := cmd.Run() 121 if err != nil { 122 return err 123 } 124 r.tests = filter(strings.Split(out.String(), "\n")) 125 return nil 126 } 127 128 func (r *runner) flushTestLogs(test string, log bytes.Buffer) (string, error) { 129 logName := fmt.Sprintf("citogo-%s-%s-%s-%s", convertBreakingChars(r.opts.Branch), 130 convertBreakingChars(r.opts.BuildID), convertBreakingChars(r.opts.Prefix), test) 131 if r.opts.S3Bucket != "" { 132 return r.flushLogsToS3(logName, log) 133 } 134 return r.flushTestLogsToTemp(logName, log) 135 } 136 137 func (r *runner) flushLogsToS3(logName string, log bytes.Buffer) (string, error) { 138 return s3put(&log, r.opts.S3Bucket, logName) 139 } 140 141 func (r *runner) flushTestLogsToTemp(logName string, log bytes.Buffer) (string, error) { 142 tmpfile, err := os.CreateTemp("", fmt.Sprintf("%s-", logName)) 143 if err != nil { 144 return "", err 145 } 146 _, err = tmpfile.Write(log.Bytes()) 147 if err != nil { 148 return "", err 149 } 150 err = tmpfile.Close() 151 if err != nil { 152 return "", err 153 } 154 return fmt.Sprintf("see log: %s", tmpfile.Name()), nil 155 } 156 157 func (r *runner) report(result types.TestResult) { 158 if r.opts.ReportLambdaFunction == "" { 159 return 160 } 161 162 b, err := json.Marshal(result) 163 if err != nil { 164 logError("error marshalling result: %s", err.Error()) 165 return 166 } 167 168 err = lambdaInvoke(r.opts.ReportLambdaFunction, b) 169 if err != nil { 170 logError("error reporting flake: %s", err.Error()) 171 } 172 } 173 174 func (r *runner) newTestResult(outcome types.Outcome, testName string, where string) types.TestResult { 175 return types.TestResult{ 176 Outcome: outcome, 177 TestName: testName, 178 Where: where, 179 Branch: r.opts.Branch, 180 BuildID: r.opts.BuildID, 181 Prefix: r.opts.Prefix, 182 BuildURL: r.opts.BuildURL, 183 } 184 } 185 186 func (r *runner) runTest(test string) error { 187 canRerun := len(r.flakes) < r.opts.Flakes 188 outcome, where, err := r.runTestOnce(test, false /* isRerun */, canRerun) 189 if err != nil { 190 return err 191 } 192 if outcome == types.OutcomeSuccess { 193 return nil 194 } 195 if len(r.flakes) >= r.opts.Flakes { 196 return errTestFailed 197 } 198 outcome2, _, err2 := r.runTestOnce(test, true /* isRerun */, false /* canRerun */) 199 if err2 != nil { 200 return err2 201 } 202 switch outcome2 { 203 case types.OutcomeFail: 204 return errTestFailed 205 case types.OutcomeSuccess: 206 r.report(r.newTestResult(types.OutcomeFlake, test, where)) 207 r.flakes = append(r.flakes, test) 208 } 209 return nil 210 } 211 212 var errTestFailed = errors.New("test failed") 213 214 // runTestOnce only returns an error if there was a problem with the test 215 // harness code; it does not return an error if the test failed. 216 func (r *runner) runTestOnce(test string, isRerun bool, canRerun bool) (outcome types.Outcome, where string, err error) { 217 defer func() { 218 logOutcome := outcome 219 if outcome == types.OutcomeFail && canRerun { 220 logOutcome = types.OutcomeFlake 221 } 222 fmt.Printf("%s: %s %s\n", logOutcome.Abbrv(), test, where) 223 if logOutcome != types.OutcomeFlake && r.opts.Branch == "master" && err == nil { 224 r.report(r.newTestResult(logOutcome, test, where)) 225 } 226 }() 227 228 cmd := exec.Command(r.testerName(), "-test.run", "^"+test+"$", "-test.timeout", r.opts.Timeout) 229 if isRerun { 230 cmd.Env = append(os.Environ(), "CITOGO_FLAKE_RERUN=1") 231 } 232 var combined bytes.Buffer 233 cmd.Stdout = &combined 234 cmd.Stderr = &combined 235 testErr := cmd.Run() 236 if testErr != nil { 237 err = errTestFailed 238 239 var flushErr error 240 where, flushErr := r.flushTestLogs(test, combined) 241 if flushErr != nil { 242 return types.OutcomeFail, "", flushErr 243 } 244 return types.OutcomeFail, where, nil 245 } 246 return types.OutcomeSuccess, "", nil 247 } 248 249 func (r *runner) runTestFixError(t string) error { 250 err := r.runTest(t) 251 if err == nil { 252 return nil 253 } 254 if err != errTestFailed { 255 return err 256 } 257 r.fails = append(r.fails, t) 258 if r.opts.Fails < 0 { 259 // We have an infinite fail budget, so keep plowing through 260 // failed tests. This test run is still going to fail. 261 return nil 262 } 263 if r.opts.Fails >= len(r.fails) { 264 // We've failed less than our budget, so we can still keep going. 265 // This test run is still going to fail. 266 return nil 267 } 268 // We ate up our fail budget. 269 return err 270 } 271 272 func (r *runner) runTests() error { 273 var eg errgroup.Group 274 q := make(chan string, len(r.tests)) 275 for i := 0; i < r.opts.Parallel; i++ { 276 eg.Go(func() error { 277 for f := range q { 278 err := r.runTestFixError(f) 279 if err != nil { 280 return err 281 } 282 if r.opts.Pause > 0 { 283 time.Sleep(r.opts.Pause) 284 } 285 } 286 return nil 287 }) 288 } 289 for _, f := range r.tests { 290 q <- f 291 } 292 close(q) 293 return eg.Wait() 294 } 295 296 func (r *runner) cleanup() { 297 if r.opts.Preserve || r.opts.NoCompile { 298 return 299 } 300 n := r.testerName() 301 err := os.Remove(n) 302 fmt.Printf("RMOV: %s\n", n) 303 if err != nil { 304 logError("could not remove %s: %s", n, err.Error()) 305 } 306 } 307 308 func (r *runner) debugStartup() { 309 dir, _ := os.Getwd() 310 fmt.Printf("WDIR: %s\n", dir) 311 } 312 313 func (r *runner) testExists() (bool, error) { 314 f := r.testerName() 315 info, err := os.Stat(f) 316 if os.IsNotExist(err) { 317 return false, nil 318 } 319 if err != nil { 320 return false, err 321 } 322 if info.Mode().IsRegular() { 323 return true, nil 324 } 325 return false, fmt.Errorf("%s: file of wrong type", f) 326 327 } 328 329 func (r *runner) run() error { 330 start := time.Now() 331 err := r.parseArgs() 332 if err != nil { 333 return err 334 } 335 336 r.debugStartup() 337 err = r.compile() 338 if err != nil { 339 return err 340 } 341 exists, err := r.testExists() 342 if exists { 343 err = r.listTests() 344 if err != nil { 345 return err 346 } 347 err = r.runTests() 348 r.cleanup() 349 } 350 end := time.Now() 351 diff := end.Sub(start) 352 fmt.Printf("DONE: in %s\n", diff) 353 if err != nil { 354 return err 355 } 356 if len(r.fails) > 0 { 357 // If we have more than 15 tests, repeat at the end which tests failed, 358 // so we don't have to scroll all the way up. 359 if len(r.tests) > 15 { 360 for _, t := range r.fails { 361 fmt.Printf("FAIL: %s\n", t) 362 } 363 } 364 return fmt.Errorf("RED!: %d total tests failed", len(r.fails)) 365 } 366 return nil 367 } 368 369 func main2() error { 370 runner := runner{} 371 return runner.run() 372 } 373 374 func main() { 375 err := main2() 376 if err != nil { 377 logError(err.Error()) 378 fmt.Printf("EXIT: 2\n") 379 os.Exit(2) 380 } 381 fmt.Printf("EXIT: 0\n") 382 os.Exit(0) 383 }