github.com/nya3jp/tast@v0.0.0-20230601000426-85c8e4d83a9b/src/go.chromium.org/tast/core/internal/devserver/real.go (about)

     1  // Copyright 2018 The ChromiumOS Authors
     2  // Use of this source code is governed by a BSD-style license that can be
     3  // found in the LICENSE file.
     4  
     5  package devserver
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"hash/crc32"
    12  	"io"
    13  	"io/ioutil"
    14  	"math/rand"
    15  	"net/http"
    16  	"net/url"
    17  	"os"
    18  	"path"
    19  	"regexp"
    20  	"sort"
    21  	"strings"
    22  	"time"
    23  
    24  	"go.chromium.org/tast/core/internal/logging"
    25  )
    26  
    27  const (
    28  	swarmingTaskIDHeaderName = "X-SWARMING-TASK-ID"
    29  	buildBucketIDHeaderName  = "X-BBID"
    30  )
    31  
    32  var errNotStaged = errors.New("no staged file found")
    33  
    34  // checkHealth makes a HTTP request to the devserver at dsURL to check if it is up.
    35  func checkHealth(ctx context.Context, cl *http.Client, dsURL string) error {
    36  	req, err := http.NewRequest("GET", fmt.Sprintf("%s/check_health", dsURL), nil)
    37  	if err != nil {
    38  		return err
    39  	}
    40  
    41  	req = req.WithContext(ctx)
    42  	res, err := cl.Do(req)
    43  	if err != nil {
    44  		return err
    45  	}
    46  	res.Body.Close()
    47  	if res.StatusCode != http.StatusOK {
    48  		out, _ := ioutil.ReadAll(res.Body)
    49  		s := scrapeInternalError(out)
    50  		return fmt.Errorf("check_health returned %d: %s", res.StatusCode, s)
    51  	}
    52  	return nil
    53  }
    54  
    55  type server struct {
    56  	url string // URL of a devserver in "http://host:port" format
    57  	err error  // nil if the server is up; otherwise describes why it is considered down
    58  }
    59  
    60  func (s server) String() string {
    61  	if s.err == nil {
    62  		return fmt.Sprintf("[%s UP]", s.url)
    63  	}
    64  	return fmt.Sprintf("[%s DOWN (%v)]", s.url, s.err)
    65  }
    66  
    67  // RealClient is an implementation of Client to communicate with real devservers.
    68  type RealClient struct {
    69  	servers         []server
    70  	cl              *http.Client
    71  	stageRetryWaits []time.Duration
    72  	swarmingTaskID  string
    73  	buildBucketID   string
    74  }
    75  
    76  var _ Client = &RealClient{}
    77  
    78  // RealClientOptions contains options used when connecting to devserver.
    79  type RealClientOptions struct {
    80  	// HTTPClient is HTTP client to use. If nil, defaultHTTPClient is used.
    81  	HTTPClient *http.Client
    82  
    83  	// StageRetryWaits instructs retry strategy for stage.
    84  	// Its length is the number of retries and the i-th value is the interval before i-th retry.
    85  	// If nil, default strategy is used. If zero-length slice, no retry is attempted.
    86  	StageRetryWaits []time.Duration
    87  
    88  	// SwarmingTaskID specifies the task ID of the scheduled job that run Tast tests.
    89  	SwarmingTaskID string
    90  
    91  	// BuildBucketID specifies the build bucket ID for the schedule job that run Tast tests
    92  	BuildBucketID string
    93  }
    94  
    95  var defaultOptions = &RealClientOptions{
    96  	HTTPClient:      defaultHTTPClient,
    97  	StageRetryWaits: []time.Duration{2 * time.Second, 4 * time.Second, 8 * time.Second},
    98  }
    99  
   100  // NewRealClient creates a RealClient.
   101  // This function checks if devservers at dsURLs are up, and selects a subset of devservers to use.
   102  // A devserver URL is usually in the form of "http://<hostname>:<port>", without trailing slashes.
   103  // If we can not verify a devserver is up within ctx's timeout, it is considered down. Be sure to
   104  // set ctx's timeout carefully since this function can block until it expires if any devserver is down.
   105  // If o is nil, default options are used. If o is partially nil, defaults are used for them.
   106  func NewRealClient(ctx context.Context, dsURLs []string, o *RealClientOptions) *RealClient {
   107  	if o == nil {
   108  		o = &RealClientOptions{}
   109  	}
   110  	cl := o.HTTPClient
   111  	if cl == nil {
   112  		cl = defaultOptions.HTTPClient
   113  	}
   114  	stageRetryWaits := o.StageRetryWaits
   115  	if stageRetryWaits == nil {
   116  		stageRetryWaits = defaultOptions.StageRetryWaits
   117  	}
   118  
   119  	ch := make(chan server, len(dsURLs))
   120  
   121  	for _, dsURL := range dsURLs {
   122  		go func(dsURL string) {
   123  			err := checkHealth(ctx, cl, dsURL)
   124  			ch <- server{dsURL, err}
   125  		}(dsURL)
   126  	}
   127  
   128  	var servers []server
   129  	for range dsURLs {
   130  		servers = append(servers, <-ch)
   131  	}
   132  	sort.Slice(servers, func(i, j int) bool {
   133  		return servers[i].url < servers[j].url
   134  	})
   135  	swarmingTaskID := o.SwarmingTaskID
   136  	if swarmingTaskID == "" {
   137  		swarmingTaskID = "none"
   138  	}
   139  	buildBucketID := o.BuildBucketID
   140  	if buildBucketID == "" {
   141  		buildBucketID = "none"
   142  	}
   143  	return &RealClient{servers, cl, stageRetryWaits, swarmingTaskID, buildBucketID}
   144  }
   145  
   146  // UpServerURLs returns URLs of operational devservers.
   147  func (c *RealClient) UpServerURLs() []string {
   148  	var urls []string
   149  	for _, s := range c.servers {
   150  		if s.err == nil {
   151  			urls = append(urls, s.url)
   152  		}
   153  	}
   154  	return urls
   155  }
   156  
   157  // Status returns a message describing the status of devservers.
   158  func (c *RealClient) Status() string {
   159  	return fmt.Sprint(c.servers)
   160  }
   161  
   162  // TearDown does nothing.
   163  func (c *RealClient) TearDown() error {
   164  	return nil
   165  }
   166  
   167  // Stage stages a file on GCS via devservers. It returns an error if no devserver is up.
   168  func (c *RealClient) Stage(ctx context.Context, gsURL string) (*url.URL, error) {
   169  	bucket, path, err := ParseGSURL(gsURL)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  
   174  	if len(c.UpServerURLs()) == 0 {
   175  		return nil, errors.New("no devserver is up")
   176  	}
   177  
   178  	sctx, cancel := context.WithTimeout(ctx, 3*time.Second)
   179  	defer cancel()
   180  
   181  	// Use an already staged file if there is any.
   182  	if dsURL, err := c.findStaged(sctx, bucket, path); err == nil {
   183  		logging.Infof(ctx, "Downloading %s via %s (already staged)", gsURL, dsURL)
   184  		staticURL, err := c.staticURL(ctx, dsURL, bucket, path)
   185  		if err != nil {
   186  			return nil, fmt.Errorf("failed to stage from %s: %v", dsURL, err)
   187  		}
   188  		return staticURL, nil
   189  	} else if err != errNotStaged {
   190  		return nil, fmt.Errorf("failed to find a staged file: %v", err)
   191  	}
   192  
   193  	// Choose a devserver and download the file via it.
   194  	dsURL := c.chooseServer(gsURL)
   195  	logging.Infof(ctx, "Staging %s to %s", gsURL, dsURL)
   196  	if err := c.stage(ctx, dsURL, bucket, path); err != nil {
   197  		if os.IsNotExist(err) {
   198  			return nil, err
   199  		}
   200  		return nil, fmt.Errorf("failed to stage on %s: %v", dsURL, err)
   201  	}
   202  
   203  	// Do a validity check that the file has been staged successfully.
   204  	if err := c.checkStaged(ctx, dsURL, bucket, path); err != nil {
   205  		return nil, fmt.Errorf("failed to stage on %s: %v", dsURL, err)
   206  	}
   207  
   208  	logging.Infof(ctx, "Downloading %s via %s (newly staged)", gsURL, dsURL)
   209  	staticURL, err := c.staticURL(ctx, dsURL, bucket, path)
   210  	if err != nil {
   211  		return nil, fmt.Errorf("failed to stage from %s: %v", dsURL, err)
   212  	}
   213  	return staticURL, nil
   214  }
   215  
   216  // Open downloads a file on GCS via devservers. It returns an error if no devserver is up.
   217  func (c *RealClient) Open(ctx context.Context, gsURL string) (io.ReadCloser, error) {
   218  	staticURL, err := c.Stage(ctx, gsURL)
   219  	if err != nil {
   220  		return nil, err
   221  	}
   222  
   223  	r, err := c.openStaged(ctx, staticURL)
   224  	if err != nil {
   225  		return nil, fmt.Errorf("failed to download from %s: %v", staticURL, err)
   226  	}
   227  	return r, nil
   228  }
   229  
   230  // findStaged tries to find an already staged file from selected servers.
   231  // It returns errNotStaged if no staged file is found.
   232  func (c *RealClient) findStaged(ctx context.Context, bucket, path string) (dsURL string, err error) {
   233  	dsURLs := c.UpServerURLs()
   234  	ch := make(chan string, len(dsURLs))
   235  
   236  	for _, dsURL := range dsURLs {
   237  		go func(dsURL string) {
   238  			if err := c.checkStaged(ctx, dsURL, bucket, path); err != nil {
   239  				ch <- ""
   240  			} else {
   241  				ch <- dsURL
   242  			}
   243  		}(dsURL)
   244  	}
   245  
   246  	var found []string
   247  	for range dsURLs {
   248  		dsURL := <-ch
   249  		if dsURL != "" {
   250  			found = append(found, dsURL)
   251  		}
   252  	}
   253  
   254  	if len(found) == 0 {
   255  		return "", errNotStaged
   256  	}
   257  	return found[rand.Intn(len(found))], nil
   258  }
   259  
   260  // checkStaged checks if a file is staged on the devserver at dsURL.
   261  // It returns errNotStaged if a file is not yet staged.
   262  func (c *RealClient) checkStaged(ctx context.Context, dsURL, bucket, gsPath string) error {
   263  	checkURL := buildRequestURL(dsURL+"/is_staged", bucket, gsPath)
   264  	req, err := http.NewRequest("GET", checkURL, nil)
   265  	if err != nil {
   266  		return err
   267  	}
   268  	req = req.WithContext(ctx)
   269  	req.Header.Set(swarmingTaskIDHeaderName, c.swarmingTaskID)
   270  	req.Header.Set(buildBucketIDHeaderName, c.buildBucketID)
   271  
   272  	res, err := c.cl.Do(req)
   273  	if err != nil {
   274  		return err
   275  	}
   276  	defer res.Body.Close()
   277  
   278  	switch res.StatusCode {
   279  	case http.StatusOK:
   280  		b, err := ioutil.ReadAll(res.Body)
   281  		if err != nil {
   282  			return fmt.Errorf("failed to read response body: %v", err)
   283  		}
   284  		switch val := strings.TrimSpace(string(b)); val {
   285  		case "True":
   286  			return nil
   287  		case "False":
   288  			return errNotStaged
   289  		case "This is an ephemeral devserver provided by Tast.":
   290  			// TODO(nya): Remove this check after 20190710.
   291  			return fmt.Errorf("tast command is old; please run ./update_chroot")
   292  		default:
   293  			return fmt.Errorf("got response %q", val)
   294  		}
   295  	case http.StatusInternalServerError:
   296  		out, _ := ioutil.ReadAll(res.Body)
   297  		err := scrapeInternalError(out)
   298  		return fmt.Errorf("got status %d: %s", res.StatusCode, err)
   299  	default:
   300  		return fmt.Errorf("got status %d", res.StatusCode)
   301  	}
   302  }
   303  
   304  // chooseServer chooses a devserver to use from c.selected. It tries to choose
   305  // the same server for the same gsURL.
   306  func (c *RealClient) chooseServer(gsURL string) string {
   307  	dsURLs := c.UpServerURLs()
   308  
   309  	// score returns a random number from a devserver URL and a file URL as seeds.
   310  	// By using this function, the same devserver is usually selected for a file
   311  	// provided that the same set of devservers are up.
   312  	score := func(i int) uint32 {
   313  		return crc32.ChecksumIEEE([]byte(dsURLs[i] + "\x00" + gsURL))
   314  	}
   315  	sort.Slice(dsURLs, func(i, j int) bool {
   316  		return score(i) < score(j)
   317  	})
   318  	return dsURLs[0]
   319  }
   320  
   321  // stage requests the devserver at dsURL to stage a file.
   322  func (c *RealClient) stage(ctx context.Context, dsURL, bucket, gsPath string) error {
   323  	stageURL := buildRequestURL(dsURL+"/stage", bucket, gsPath)
   324  	req, err := http.NewRequest("GET", stageURL, nil)
   325  	if err != nil {
   326  		return err
   327  	}
   328  	req = req.WithContext(ctx)
   329  	req.Header.Set(swarmingTaskIDHeaderName, c.swarmingTaskID)
   330  	req.Header.Set(buildBucketIDHeaderName, c.buildBucketID)
   331  
   332  	for i := 0; ; i++ {
   333  		start := time.Now()
   334  
   335  		retryable, err := c.sendStageRequest(ctx, req)
   336  		if err == nil || !retryable || i >= len(c.stageRetryWaits) {
   337  			return err
   338  		}
   339  
   340  		elapsed := time.Now().Sub(start)
   341  		if remaining := c.stageRetryWaits[i] - elapsed; remaining > 0 {
   342  			logging.Infof(ctx, "Retry stage in %v: %v", remaining.Round(time.Millisecond), err)
   343  			select {
   344  			case <-time.After(remaining):
   345  			case <-ctx.Done():
   346  				return ctx.Err()
   347  			}
   348  		} else {
   349  			logging.Infof(ctx, "Retrying stage: %v", err)
   350  		}
   351  	}
   352  }
   353  
   354  // sendStageRequest sends the stage request to devserver.
   355  // It analyzes error (if any) and determines if it is retryable.
   356  func (c *RealClient) sendStageRequest(ctx context.Context, req *http.Request) (retryable bool, err error) {
   357  	res, err := c.cl.Do(req)
   358  	if err != nil {
   359  		return true, err
   360  	}
   361  	defer res.Body.Close()
   362  
   363  	switch res.StatusCode {
   364  	case http.StatusOK:
   365  		return false, nil
   366  	case http.StatusInternalServerError:
   367  		out, _ := ioutil.ReadAll(res.Body)
   368  		s := scrapeInternalError(out)
   369  		if strings.Contains(s, "Could not find") || strings.Contains(s, "file not found") {
   370  			return false, os.ErrNotExist
   371  		}
   372  		return true, fmt.Errorf("got status %d: %s", res.StatusCode, s)
   373  	default:
   374  		return true, fmt.Errorf("got status %d", res.StatusCode)
   375  	}
   376  }
   377  
   378  func (c *RealClient) staticURL(ctx context.Context, dsURL, bucket, path string) (*url.URL, error) {
   379  	staticURL, err := url.Parse(dsURL)
   380  	if err != nil {
   381  		return nil, err
   382  	}
   383  	staticURL.Path += "/static/" + path
   384  	query := make(url.Values)
   385  	query.Set("gs_bucket", bucket)
   386  	staticURL.RawQuery = query.Encode()
   387  	return staticURL, nil
   388  }
   389  
   390  // openStaged opens a staged file from the devserver at staticURL.
   391  func (c *RealClient) openStaged(ctx context.Context, staticURL *url.URL) (io.ReadCloser, error) {
   392  	open := func(offset int64) (io.ReadCloser, error) {
   393  		req, err := http.NewRequest("GET", staticURL.String(), nil)
   394  		if err != nil {
   395  			return nil, err
   396  		}
   397  		req.Header.Set(swarmingTaskIDHeaderName, c.swarmingTaskID)
   398  		req.Header.Set(buildBucketIDHeaderName, c.buildBucketID)
   399  		// Negotiate header disables automatic content negotiation. See:
   400  		// https://crbug.com/967305
   401  		// https://tools.ietf.org/html/rfc2295#section-8.4
   402  		req.Header.Set("Negotiate", "vlist")
   403  		if offset > 0 {
   404  			req.Header.Set("Range", fmt.Sprintf("bytes=%d-", offset))
   405  		}
   406  		req = req.WithContext(ctx)
   407  
   408  		// TODO: b/279489613 -- Remove following log after verifying new headers working with
   409  		// new cache servers.
   410  		logging.Infof(ctx, "Sending GET request %s to cache server with headers %s=%s and %s=%s",
   411  			staticURL.String(), swarmingTaskIDHeaderName, c.swarmingTaskID, buildBucketIDHeaderName, c.buildBucketID)
   412  
   413  		res, err := c.cl.Do(req)
   414  		if err != nil {
   415  			return nil, err
   416  		}
   417  
   418  		switch res.StatusCode {
   419  		case http.StatusOK, http.StatusPartialContent:
   420  			return res.Body, nil
   421  		case http.StatusInternalServerError:
   422  			defer res.Body.Close()
   423  			out, _ := ioutil.ReadAll(res.Body)
   424  			s := scrapeInternalError(out)
   425  			return nil, fmt.Errorf("got status %d: %s", res.StatusCode, s)
   426  		default:
   427  			res.Body.Close()
   428  			return nil, fmt.Errorf("got status %d", res.StatusCode)
   429  		}
   430  	}
   431  
   432  	return newResumingReader(open)
   433  }
   434  
   435  // resumingReader is io.ReadCloser that tries to reopen when it encounters
   436  // resumable errors.
   437  type resumingReader struct {
   438  	// open is a function to open a reader with an offset. It is immutable.
   439  	open func(offset int64) (io.ReadCloser, error)
   440  
   441  	// reader is a current underlying ReadCloser. It can be updated on Read
   442  	// if we encounter resumable errors. It can never be nil.
   443  	reader io.ReadCloser
   444  	// pos is the number of bytes read so far.
   445  	pos int64
   446  	// err is set when we encounter a non-resumable error on Read.
   447  	err error
   448  }
   449  
   450  var _ io.ReadCloser = &resumingReader{}
   451  
   452  // newResumingReader creates a new resumingReader from a function open that
   453  // returns io.ReadCloser with a specified offset.
   454  // open is called immediately in this function, and also can be called multiple
   455  // times in resumingReader.Read when errors are seen.
   456  func newResumingReader(open func(offset int64) (io.ReadCloser, error)) (*resumingReader, error) {
   457  	reader, err := open(0)
   458  	if err != nil {
   459  		return nil, err
   460  	}
   461  	return &resumingReader{
   462  		open:   open,
   463  		reader: reader,
   464  	}, nil
   465  }
   466  
   467  func (r *resumingReader) Read(p []byte) (int, error) {
   468  	// Return immediately if we have encountered a non-resumable error.
   469  	if r.err != nil {
   470  		return 0, r.err
   471  	}
   472  
   473  	reopened := false
   474  	for {
   475  		// Attempt a read.
   476  		n, err := r.reader.Read(p)
   477  		r.pos += int64(n)
   478  		if err == nil {
   479  			return n, nil
   480  		}
   481  
   482  		// If the error is non-resumable, save it and return.
   483  		if !isResumable(err) {
   484  			r.err = err
   485  			return n, err
   486  		}
   487  
   488  		// If we've just reopened the stream and we still can't read any data,
   489  		// do not reopen it again to avoid entering an infinite loop of retries.
   490  		if reopened && n == 0 {
   491  			r.err = err
   492  			return n, err
   493  		}
   494  
   495  		// The error is resumable, try reopening.
   496  		reader, err := r.open(r.pos)
   497  		if err != nil {
   498  			// Errors from open are always non-resumable.
   499  			r.err = err
   500  			return n, err
   501  		}
   502  
   503  		r.reader.Close()
   504  		r.reader = reader
   505  
   506  		// Return if we read some bytes. Otherwise, retry immediately after
   507  		// setting the reopened flag.
   508  		if n > 0 {
   509  			return n, nil
   510  		}
   511  		reopened = true
   512  	}
   513  }
   514  
   515  func (r *resumingReader) Close() error {
   516  	return r.reader.Close()
   517  }
   518  
   519  func isResumable(err error) bool {
   520  	return err == io.ErrUnexpectedEOF
   521  }
   522  
   523  var internalErrorRegexp = regexp.MustCompile(`(?m)^(.*)\n\s*</pre>`)
   524  
   525  // scrapeInternalError scrapes an error message from an internal server response
   526  // from devservers.
   527  func scrapeInternalError(out []byte) string {
   528  	m := internalErrorRegexp.FindStringSubmatch(string(out))
   529  	if m == nil {
   530  		return "unknown error"
   531  	}
   532  	return m[1]
   533  }
   534  
   535  // buildRequestURL builds a URL for devserver requests. endpoint is either
   536  // .../stage or .../is_staged.
   537  func buildRequestURL(endpoint, bucket, gsPath string) string {
   538  	gsDirURL := url.URL{
   539  		Scheme: "gs",
   540  		Host:   bucket,
   541  	}
   542  	if dir := path.Dir(gsPath); dir != "." {
   543  		gsDirURL.Path = dir
   544  	}
   545  	// URL.String() escapes URL, including spaces, which is then escaped again in
   546  	// values.Encode() below. Specifically, spaces are double escaped " " -> %20 -> %2520.
   547  	// To prevent this, unescaping the path before passing it to values.Encode().
   548  	unescapedGsDirURL, err := url.PathUnescape(gsDirURL.String())
   549  	if err != nil {
   550  		unescapedGsDirURL = gsDirURL.String()
   551  	}
   552  	values := url.Values{
   553  		"archive_url": {unescapedGsDirURL},
   554  		"files":       {path.Base(gsPath)},
   555  	}
   556  	return fmt.Sprintf("%s?%s", endpoint, values.Encode())
   557  }