golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/sourcecache/source.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package sourcecache provides a cache of code found in Git repositories.
     6  // It takes directly to the Gerrit instance at go.googlesource.com.
     7  // If RegisterGitMirrorDial is called, it will first try to get code from gitmirror before falling back on Gerrit.
     8  package sourcecache
     9  
    10  import (
    11  	"bytes"
    12  	"context"
    13  	"errors"
    14  	"fmt"
    15  	"io"
    16  	"log"
    17  	"net"
    18  	"net/http"
    19  	"time"
    20  
    21  	"golang.org/x/build/internal/lru"
    22  	"golang.org/x/build/internal/singleflight"
    23  	"golang.org/x/build/internal/spanlog"
    24  )
    25  
    26  var processStartTime = time.Now()
    27  
    28  var sourceGroup singleflight.Group
    29  
    30  var sourceCache = lru.New(40) // repo-rev -> source
    31  
    32  // source is the cache entry type for sourceCache.
    33  type source struct {
    34  	Tgz    []byte // Source tarball bytes.
    35  	TooBig bool
    36  }
    37  
    38  // GetSourceTgz returns a Reader that provides a tgz of the requested source revision.
    39  // repo is go.googlesource.com repo ("go", "net", and so on).
    40  // rev is git revision.
    41  //
    42  // An error of type TooBigError is returned if the compressed tarball exceeds a size that
    43  // on 2021-11-22 was deemed to be enough to meet expected legitimate future needs for a while.
    44  // See golang.org/issue/46379.
    45  func GetSourceTgz(sl spanlog.Logger, repo, rev string) (tgz io.Reader, err error) {
    46  	sp := sl.CreateSpan("get_source", repo+"@"+rev)
    47  	defer func() { sp.Done(err) }()
    48  
    49  	key := fmt.Sprintf("%v-%v", repo, rev)
    50  	v, err, _ := sourceGroup.Do(key, func() (interface{}, error) {
    51  		if src, ok := sourceCache.Get(key); ok {
    52  			return src, nil
    53  		}
    54  
    55  		if gitMirrorClient != nil {
    56  			sp := sl.CreateSpan("get_source_from_gitmirror")
    57  			src, err := getSourceTgzFromGitMirror(repo, rev)
    58  			if err == nil {
    59  				sourceCache.Add(key, src)
    60  				sp.Done(nil)
    61  				return src, nil
    62  			}
    63  			log.Printf("Error fetching source %s/%s from gitmirror (after %v uptime): %v",
    64  				repo, rev, time.Since(processStartTime), err)
    65  			sp.Done(errors.New("timeout"))
    66  		}
    67  
    68  		sp := sl.CreateSpan("get_source_from_gerrit", fmt.Sprintf("%v from gerrit", key))
    69  		src, err := getSourceTgzFromGerrit(repo, rev)
    70  		sp.Done(err)
    71  		if err == nil {
    72  			sourceCache.Add(key, src)
    73  		}
    74  		return src, err
    75  	})
    76  	if err != nil {
    77  		return nil, err
    78  	}
    79  	if v.(source).TooBig {
    80  		return nil, TooBigError{Repo: repo, Rev: rev, Limit: maxSize(repo)}
    81  	}
    82  	return bytes.NewReader(v.(source).Tgz), nil
    83  }
    84  
    85  // TooBigError is the error returned when the source revision is considered too big.
    86  type TooBigError struct {
    87  	Repo  string
    88  	Rev   string
    89  	Limit int64 // Max size in bytes.
    90  }
    91  
    92  func (e TooBigError) Error() string {
    93  	return fmt.Sprintf("rejected because compressed tarball of repository go.googlesource.com/%s at revision %s exceeded a limit of %d MB; see golang.org/issue/46379",
    94  		e.Repo, e.Rev, e.Limit/1024/1024)
    95  }
    96  
    97  var gitMirrorClient *http.Client
    98  
    99  // RegisterGitMirrorDial registers a dial function which will be used to reach gitmirror.
   100  // If used, this function must be called before GetSourceTgz.
   101  func RegisterGitMirrorDial(dial func(context.Context) (net.Conn, error)) {
   102  	gitMirrorClient = &http.Client{
   103  		Timeout: 30 * time.Second,
   104  		Transport: &http.Transport{
   105  			IdleConnTimeout: 30 * time.Second,
   106  			DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
   107  				return dial(ctx)
   108  			},
   109  		},
   110  	}
   111  }
   112  
   113  var gerritHTTPClient = &http.Client{
   114  	Timeout: 30 * time.Second,
   115  }
   116  
   117  func getSourceTgzFromGerrit(repo, rev string) (source, error) {
   118  	return getSourceTgzFromURL(gerritHTTPClient, "gerrit", repo, rev, "https://go.googlesource.com/"+repo+"/+archive/"+rev+".tar.gz")
   119  }
   120  
   121  func getSourceTgzFromGitMirror(repo, rev string) (src source, err error) {
   122  	for i := 0; i < 2; i++ { // two tries; different pods maybe?
   123  		if i > 0 {
   124  			time.Sleep(1 * time.Second)
   125  		}
   126  		// The "gitmirror" hostname is unused:
   127  		src, err = getSourceTgzFromURL(gitMirrorClient, "gitmirror", repo, rev, "http://gitmirror/"+repo+".tar.gz?rev="+rev)
   128  		if err == nil {
   129  			return src, nil
   130  		}
   131  		if tr, ok := http.DefaultTransport.(*http.Transport); ok {
   132  			tr.CloseIdleConnections()
   133  		}
   134  	}
   135  	return source{}, err
   136  }
   137  
   138  // getSourceTgzFromURL fetches a source tarball from url.
   139  // If url serves more than maxSize bytes, it stops short.
   140  func getSourceTgzFromURL(hc *http.Client, service, repo, rev, url string) (source, error) {
   141  	res, err := hc.Get(url)
   142  	if err != nil {
   143  		return source{}, fmt.Errorf("fetching %s/%s from %s: %v", repo, rev, service, err)
   144  	}
   145  	defer res.Body.Close()
   146  	if res.StatusCode/100 != 2 {
   147  		slurp, _ := io.ReadAll(io.LimitReader(res.Body, 4<<10))
   148  		return source{}, fmt.Errorf("fetching %s/%s from %s: %v; body: %s", repo, rev, service, res.Status, slurp)
   149  	}
   150  	// See golang.org/issue/11224 for a discussion on tree filtering.
   151  	b, err := io.ReadAll(io.LimitReader(res.Body, maxSize(repo)+1))
   152  	if int64(len(b)) > maxSize(repo) && err == nil {
   153  		return source{TooBig: true}, nil
   154  	}
   155  	if err != nil {
   156  		return source{}, fmt.Errorf("reading %s/%s from %s: %v", repo, rev, service, err)
   157  	}
   158  	return source{Tgz: b}, nil
   159  }
   160  
   161  // maxSize controls artificial limits on how big of a compressed source tarball
   162  // this package is willing to accept. It's expected humans may need to manage
   163  // these limits every couple of years for the evolving needs of the Go project,
   164  // and ideally not much more often.
   165  //
   166  // repo is a go.googlesource.com repo ("go", "net", and so on).
   167  func maxSize(repo string) int64 {
   168  	switch repo {
   169  	default:
   170  		// As of 2021-11-22, a compressed tarball of Go source is 23 MB,
   171  		// x/net is 1.2 MB,
   172  		// x/build is 1.1 MB,
   173  		// x/tools is 2.9 MB.
   174  		return 100 << 20
   175  	case "website":
   176  		// In 2021, all content in x/blog (52 MB) and x/talks (74 MB) moved
   177  		// to x/website. This makes x/website an outlier, with a compressed
   178  		// tarball size of 135 MB. Give it some room to grow from there.
   179  		return 200 << 20
   180  	}
   181  }