github.com/nya3jp/tast@v0.0.0-20230601000426-85c8e4d83a9b/src/go.chromium.org/tast/core/internal/devserver/real.go (about) 1 // Copyright 2018 The ChromiumOS Authors 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 package devserver 6 7 import ( 8 "context" 9 "errors" 10 "fmt" 11 "hash/crc32" 12 "io" 13 "io/ioutil" 14 "math/rand" 15 "net/http" 16 "net/url" 17 "os" 18 "path" 19 "regexp" 20 "sort" 21 "strings" 22 "time" 23 24 "go.chromium.org/tast/core/internal/logging" 25 ) 26 27 const ( 28 swarmingTaskIDHeaderName = "X-SWARMING-TASK-ID" 29 buildBucketIDHeaderName = "X-BBID" 30 ) 31 32 var errNotStaged = errors.New("no staged file found") 33 34 // checkHealth makes a HTTP request to the devserver at dsURL to check if it is up. 35 func checkHealth(ctx context.Context, cl *http.Client, dsURL string) error { 36 req, err := http.NewRequest("GET", fmt.Sprintf("%s/check_health", dsURL), nil) 37 if err != nil { 38 return err 39 } 40 41 req = req.WithContext(ctx) 42 res, err := cl.Do(req) 43 if err != nil { 44 return err 45 } 46 res.Body.Close() 47 if res.StatusCode != http.StatusOK { 48 out, _ := ioutil.ReadAll(res.Body) 49 s := scrapeInternalError(out) 50 return fmt.Errorf("check_health returned %d: %s", res.StatusCode, s) 51 } 52 return nil 53 } 54 55 type server struct { 56 url string // URL of a devserver in "http://host:port" format 57 err error // nil if the server is up; otherwise describes why it is considered down 58 } 59 60 func (s server) String() string { 61 if s.err == nil { 62 return fmt.Sprintf("[%s UP]", s.url) 63 } 64 return fmt.Sprintf("[%s DOWN (%v)]", s.url, s.err) 65 } 66 67 // RealClient is an implementation of Client to communicate with real devservers. 68 type RealClient struct { 69 servers []server 70 cl *http.Client 71 stageRetryWaits []time.Duration 72 swarmingTaskID string 73 buildBucketID string 74 } 75 76 var _ Client = &RealClient{} 77 78 // RealClientOptions contains options used when connecting to devserver. 79 type RealClientOptions struct { 80 // HTTPClient is HTTP client to use. If nil, defaultHTTPClient is used. 81 HTTPClient *http.Client 82 83 // StageRetryWaits instructs retry strategy for stage. 84 // Its length is the number of retries and the i-th value is the interval before i-th retry. 85 // If nil, default strategy is used. If zero-length slice, no retry is attempted. 86 StageRetryWaits []time.Duration 87 88 // SwarmingTaskID specifies the task ID of the scheduled job that run Tast tests. 89 SwarmingTaskID string 90 91 // BuildBucketID specifies the build bucket ID for the schedule job that run Tast tests 92 BuildBucketID string 93 } 94 95 var defaultOptions = &RealClientOptions{ 96 HTTPClient: defaultHTTPClient, 97 StageRetryWaits: []time.Duration{2 * time.Second, 4 * time.Second, 8 * time.Second}, 98 } 99 100 // NewRealClient creates a RealClient. 101 // This function checks if devservers at dsURLs are up, and selects a subset of devservers to use. 102 // A devserver URL is usually in the form of "http://<hostname>:<port>", without trailing slashes. 103 // If we can not verify a devserver is up within ctx's timeout, it is considered down. Be sure to 104 // set ctx's timeout carefully since this function can block until it expires if any devserver is down. 105 // If o is nil, default options are used. If o is partially nil, defaults are used for them. 106 func NewRealClient(ctx context.Context, dsURLs []string, o *RealClientOptions) *RealClient { 107 if o == nil { 108 o = &RealClientOptions{} 109 } 110 cl := o.HTTPClient 111 if cl == nil { 112 cl = defaultOptions.HTTPClient 113 } 114 stageRetryWaits := o.StageRetryWaits 115 if stageRetryWaits == nil { 116 stageRetryWaits = defaultOptions.StageRetryWaits 117 } 118 119 ch := make(chan server, len(dsURLs)) 120 121 for _, dsURL := range dsURLs { 122 go func(dsURL string) { 123 err := checkHealth(ctx, cl, dsURL) 124 ch <- server{dsURL, err} 125 }(dsURL) 126 } 127 128 var servers []server 129 for range dsURLs { 130 servers = append(servers, <-ch) 131 } 132 sort.Slice(servers, func(i, j int) bool { 133 return servers[i].url < servers[j].url 134 }) 135 swarmingTaskID := o.SwarmingTaskID 136 if swarmingTaskID == "" { 137 swarmingTaskID = "none" 138 } 139 buildBucketID := o.BuildBucketID 140 if buildBucketID == "" { 141 buildBucketID = "none" 142 } 143 return &RealClient{servers, cl, stageRetryWaits, swarmingTaskID, buildBucketID} 144 } 145 146 // UpServerURLs returns URLs of operational devservers. 147 func (c *RealClient) UpServerURLs() []string { 148 var urls []string 149 for _, s := range c.servers { 150 if s.err == nil { 151 urls = append(urls, s.url) 152 } 153 } 154 return urls 155 } 156 157 // Status returns a message describing the status of devservers. 158 func (c *RealClient) Status() string { 159 return fmt.Sprint(c.servers) 160 } 161 162 // TearDown does nothing. 163 func (c *RealClient) TearDown() error { 164 return nil 165 } 166 167 // Stage stages a file on GCS via devservers. It returns an error if no devserver is up. 168 func (c *RealClient) Stage(ctx context.Context, gsURL string) (*url.URL, error) { 169 bucket, path, err := ParseGSURL(gsURL) 170 if err != nil { 171 return nil, err 172 } 173 174 if len(c.UpServerURLs()) == 0 { 175 return nil, errors.New("no devserver is up") 176 } 177 178 sctx, cancel := context.WithTimeout(ctx, 3*time.Second) 179 defer cancel() 180 181 // Use an already staged file if there is any. 182 if dsURL, err := c.findStaged(sctx, bucket, path); err == nil { 183 logging.Infof(ctx, "Downloading %s via %s (already staged)", gsURL, dsURL) 184 staticURL, err := c.staticURL(ctx, dsURL, bucket, path) 185 if err != nil { 186 return nil, fmt.Errorf("failed to stage from %s: %v", dsURL, err) 187 } 188 return staticURL, nil 189 } else if err != errNotStaged { 190 return nil, fmt.Errorf("failed to find a staged file: %v", err) 191 } 192 193 // Choose a devserver and download the file via it. 194 dsURL := c.chooseServer(gsURL) 195 logging.Infof(ctx, "Staging %s to %s", gsURL, dsURL) 196 if err := c.stage(ctx, dsURL, bucket, path); err != nil { 197 if os.IsNotExist(err) { 198 return nil, err 199 } 200 return nil, fmt.Errorf("failed to stage on %s: %v", dsURL, err) 201 } 202 203 // Do a validity check that the file has been staged successfully. 204 if err := c.checkStaged(ctx, dsURL, bucket, path); err != nil { 205 return nil, fmt.Errorf("failed to stage on %s: %v", dsURL, err) 206 } 207 208 logging.Infof(ctx, "Downloading %s via %s (newly staged)", gsURL, dsURL) 209 staticURL, err := c.staticURL(ctx, dsURL, bucket, path) 210 if err != nil { 211 return nil, fmt.Errorf("failed to stage from %s: %v", dsURL, err) 212 } 213 return staticURL, nil 214 } 215 216 // Open downloads a file on GCS via devservers. It returns an error if no devserver is up. 217 func (c *RealClient) Open(ctx context.Context, gsURL string) (io.ReadCloser, error) { 218 staticURL, err := c.Stage(ctx, gsURL) 219 if err != nil { 220 return nil, err 221 } 222 223 r, err := c.openStaged(ctx, staticURL) 224 if err != nil { 225 return nil, fmt.Errorf("failed to download from %s: %v", staticURL, err) 226 } 227 return r, nil 228 } 229 230 // findStaged tries to find an already staged file from selected servers. 231 // It returns errNotStaged if no staged file is found. 232 func (c *RealClient) findStaged(ctx context.Context, bucket, path string) (dsURL string, err error) { 233 dsURLs := c.UpServerURLs() 234 ch := make(chan string, len(dsURLs)) 235 236 for _, dsURL := range dsURLs { 237 go func(dsURL string) { 238 if err := c.checkStaged(ctx, dsURL, bucket, path); err != nil { 239 ch <- "" 240 } else { 241 ch <- dsURL 242 } 243 }(dsURL) 244 } 245 246 var found []string 247 for range dsURLs { 248 dsURL := <-ch 249 if dsURL != "" { 250 found = append(found, dsURL) 251 } 252 } 253 254 if len(found) == 0 { 255 return "", errNotStaged 256 } 257 return found[rand.Intn(len(found))], nil 258 } 259 260 // checkStaged checks if a file is staged on the devserver at dsURL. 261 // It returns errNotStaged if a file is not yet staged. 262 func (c *RealClient) checkStaged(ctx context.Context, dsURL, bucket, gsPath string) error { 263 checkURL := buildRequestURL(dsURL+"/is_staged", bucket, gsPath) 264 req, err := http.NewRequest("GET", checkURL, nil) 265 if err != nil { 266 return err 267 } 268 req = req.WithContext(ctx) 269 req.Header.Set(swarmingTaskIDHeaderName, c.swarmingTaskID) 270 req.Header.Set(buildBucketIDHeaderName, c.buildBucketID) 271 272 res, err := c.cl.Do(req) 273 if err != nil { 274 return err 275 } 276 defer res.Body.Close() 277 278 switch res.StatusCode { 279 case http.StatusOK: 280 b, err := ioutil.ReadAll(res.Body) 281 if err != nil { 282 return fmt.Errorf("failed to read response body: %v", err) 283 } 284 switch val := strings.TrimSpace(string(b)); val { 285 case "True": 286 return nil 287 case "False": 288 return errNotStaged 289 case "This is an ephemeral devserver provided by Tast.": 290 // TODO(nya): Remove this check after 20190710. 291 return fmt.Errorf("tast command is old; please run ./update_chroot") 292 default: 293 return fmt.Errorf("got response %q", val) 294 } 295 case http.StatusInternalServerError: 296 out, _ := ioutil.ReadAll(res.Body) 297 err := scrapeInternalError(out) 298 return fmt.Errorf("got status %d: %s", res.StatusCode, err) 299 default: 300 return fmt.Errorf("got status %d", res.StatusCode) 301 } 302 } 303 304 // chooseServer chooses a devserver to use from c.selected. It tries to choose 305 // the same server for the same gsURL. 306 func (c *RealClient) chooseServer(gsURL string) string { 307 dsURLs := c.UpServerURLs() 308 309 // score returns a random number from a devserver URL and a file URL as seeds. 310 // By using this function, the same devserver is usually selected for a file 311 // provided that the same set of devservers are up. 312 score := func(i int) uint32 { 313 return crc32.ChecksumIEEE([]byte(dsURLs[i] + "\x00" + gsURL)) 314 } 315 sort.Slice(dsURLs, func(i, j int) bool { 316 return score(i) < score(j) 317 }) 318 return dsURLs[0] 319 } 320 321 // stage requests the devserver at dsURL to stage a file. 322 func (c *RealClient) stage(ctx context.Context, dsURL, bucket, gsPath string) error { 323 stageURL := buildRequestURL(dsURL+"/stage", bucket, gsPath) 324 req, err := http.NewRequest("GET", stageURL, nil) 325 if err != nil { 326 return err 327 } 328 req = req.WithContext(ctx) 329 req.Header.Set(swarmingTaskIDHeaderName, c.swarmingTaskID) 330 req.Header.Set(buildBucketIDHeaderName, c.buildBucketID) 331 332 for i := 0; ; i++ { 333 start := time.Now() 334 335 retryable, err := c.sendStageRequest(ctx, req) 336 if err == nil || !retryable || i >= len(c.stageRetryWaits) { 337 return err 338 } 339 340 elapsed := time.Now().Sub(start) 341 if remaining := c.stageRetryWaits[i] - elapsed; remaining > 0 { 342 logging.Infof(ctx, "Retry stage in %v: %v", remaining.Round(time.Millisecond), err) 343 select { 344 case <-time.After(remaining): 345 case <-ctx.Done(): 346 return ctx.Err() 347 } 348 } else { 349 logging.Infof(ctx, "Retrying stage: %v", err) 350 } 351 } 352 } 353 354 // sendStageRequest sends the stage request to devserver. 355 // It analyzes error (if any) and determines if it is retryable. 356 func (c *RealClient) sendStageRequest(ctx context.Context, req *http.Request) (retryable bool, err error) { 357 res, err := c.cl.Do(req) 358 if err != nil { 359 return true, err 360 } 361 defer res.Body.Close() 362 363 switch res.StatusCode { 364 case http.StatusOK: 365 return false, nil 366 case http.StatusInternalServerError: 367 out, _ := ioutil.ReadAll(res.Body) 368 s := scrapeInternalError(out) 369 if strings.Contains(s, "Could not find") || strings.Contains(s, "file not found") { 370 return false, os.ErrNotExist 371 } 372 return true, fmt.Errorf("got status %d: %s", res.StatusCode, s) 373 default: 374 return true, fmt.Errorf("got status %d", res.StatusCode) 375 } 376 } 377 378 func (c *RealClient) staticURL(ctx context.Context, dsURL, bucket, path string) (*url.URL, error) { 379 staticURL, err := url.Parse(dsURL) 380 if err != nil { 381 return nil, err 382 } 383 staticURL.Path += "/static/" + path 384 query := make(url.Values) 385 query.Set("gs_bucket", bucket) 386 staticURL.RawQuery = query.Encode() 387 return staticURL, nil 388 } 389 390 // openStaged opens a staged file from the devserver at staticURL. 391 func (c *RealClient) openStaged(ctx context.Context, staticURL *url.URL) (io.ReadCloser, error) { 392 open := func(offset int64) (io.ReadCloser, error) { 393 req, err := http.NewRequest("GET", staticURL.String(), nil) 394 if err != nil { 395 return nil, err 396 } 397 req.Header.Set(swarmingTaskIDHeaderName, c.swarmingTaskID) 398 req.Header.Set(buildBucketIDHeaderName, c.buildBucketID) 399 // Negotiate header disables automatic content negotiation. See: 400 // https://crbug.com/967305 401 // https://tools.ietf.org/html/rfc2295#section-8.4 402 req.Header.Set("Negotiate", "vlist") 403 if offset > 0 { 404 req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset)) 405 } 406 req = req.WithContext(ctx) 407 408 // TODO: b/279489613 -- Remove following log after verifying new headers working with 409 // new cache servers. 410 logging.Infof(ctx, "Sending GET request %s to cache server with headers %s=%s and %s=%s", 411 staticURL.String(), swarmingTaskIDHeaderName, c.swarmingTaskID, buildBucketIDHeaderName, c.buildBucketID) 412 413 res, err := c.cl.Do(req) 414 if err != nil { 415 return nil, err 416 } 417 418 switch res.StatusCode { 419 case http.StatusOK, http.StatusPartialContent: 420 return res.Body, nil 421 case http.StatusInternalServerError: 422 defer res.Body.Close() 423 out, _ := ioutil.ReadAll(res.Body) 424 s := scrapeInternalError(out) 425 return nil, fmt.Errorf("got status %d: %s", res.StatusCode, s) 426 default: 427 res.Body.Close() 428 return nil, fmt.Errorf("got status %d", res.StatusCode) 429 } 430 } 431 432 return newResumingReader(open) 433 } 434 435 // resumingReader is io.ReadCloser that tries to reopen when it encounters 436 // resumable errors. 437 type resumingReader struct { 438 // open is a function to open a reader with an offset. It is immutable. 439 open func(offset int64) (io.ReadCloser, error) 440 441 // reader is a current underlying ReadCloser. It can be updated on Read 442 // if we encounter resumable errors. It can never be nil. 443 reader io.ReadCloser 444 // pos is the number of bytes read so far. 445 pos int64 446 // err is set when we encounter a non-resumable error on Read. 447 err error 448 } 449 450 var _ io.ReadCloser = &resumingReader{} 451 452 // newResumingReader creates a new resumingReader from a function open that 453 // returns io.ReadCloser with a specified offset. 454 // open is called immediately in this function, and also can be called multiple 455 // times in resumingReader.Read when errors are seen. 456 func newResumingReader(open func(offset int64) (io.ReadCloser, error)) (*resumingReader, error) { 457 reader, err := open(0) 458 if err != nil { 459 return nil, err 460 } 461 return &resumingReader{ 462 open: open, 463 reader: reader, 464 }, nil 465 } 466 467 func (r *resumingReader) Read(p []byte) (int, error) { 468 // Return immediately if we have encountered a non-resumable error. 469 if r.err != nil { 470 return 0, r.err 471 } 472 473 reopened := false 474 for { 475 // Attempt a read. 476 n, err := r.reader.Read(p) 477 r.pos += int64(n) 478 if err == nil { 479 return n, nil 480 } 481 482 // If the error is non-resumable, save it and return. 483 if !isResumable(err) { 484 r.err = err 485 return n, err 486 } 487 488 // If we've just reopened the stream and we still can't read any data, 489 // do not reopen it again to avoid entering an infinite loop of retries. 490 if reopened && n == 0 { 491 r.err = err 492 return n, err 493 } 494 495 // The error is resumable, try reopening. 496 reader, err := r.open(r.pos) 497 if err != nil { 498 // Errors from open are always non-resumable. 499 r.err = err 500 return n, err 501 } 502 503 r.reader.Close() 504 r.reader = reader 505 506 // Return if we read some bytes. Otherwise, retry immediately after 507 // setting the reopened flag. 508 if n > 0 { 509 return n, nil 510 } 511 reopened = true 512 } 513 } 514 515 func (r *resumingReader) Close() error { 516 return r.reader.Close() 517 } 518 519 func isResumable(err error) bool { 520 return err == io.ErrUnexpectedEOF 521 } 522 523 var internalErrorRegexp = regexp.MustCompile(`(?m)^(.*)\n\s*</pre>`) 524 525 // scrapeInternalError scrapes an error message from an internal server response 526 // from devservers. 527 func scrapeInternalError(out []byte) string { 528 m := internalErrorRegexp.FindStringSubmatch(string(out)) 529 if m == nil { 530 return "unknown error" 531 } 532 return m[1] 533 } 534 535 // buildRequestURL builds a URL for devserver requests. endpoint is either 536 // .../stage or .../is_staged. 537 func buildRequestURL(endpoint, bucket, gsPath string) string { 538 gsDirURL := url.URL{ 539 Scheme: "gs", 540 Host: bucket, 541 } 542 if dir := path.Dir(gsPath); dir != "." { 543 gsDirURL.Path = dir 544 } 545 // URL.String() escapes URL, including spaces, which is then escaped again in 546 // values.Encode() below. Specifically, spaces are double escaped " " -> %20 -> %2520. 547 // To prevent this, unescaping the path before passing it to values.Encode(). 548 unescapedGsDirURL, err := url.PathUnescape(gsDirURL.String()) 549 if err != nil { 550 unescapedGsDirURL = gsDirURL.String() 551 } 552 values := url.Values{ 553 "archive_url": {unescapedGsDirURL}, 554 "files": {path.Base(gsPath)}, 555 } 556 return fmt.Sprintf("%s?%s", endpoint, values.Encode()) 557 }