golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/sourcecache/source.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package sourcecache provides a cache of code found in Git repositories. 6 // It takes directly to the Gerrit instance at go.googlesource.com. 7 // If RegisterGitMirrorDial is called, it will first try to get code from gitmirror before falling back on Gerrit. 8 package sourcecache 9 10 import ( 11 "bytes" 12 "context" 13 "errors" 14 "fmt" 15 "io" 16 "log" 17 "net" 18 "net/http" 19 "time" 20 21 "golang.org/x/build/internal/lru" 22 "golang.org/x/build/internal/singleflight" 23 "golang.org/x/build/internal/spanlog" 24 ) 25 26 var processStartTime = time.Now() 27 28 var sourceGroup singleflight.Group 29 30 var sourceCache = lru.New(40) // repo-rev -> source 31 32 // source is the cache entry type for sourceCache. 33 type source struct { 34 Tgz []byte // Source tarball bytes. 35 TooBig bool 36 } 37 38 // GetSourceTgz returns a Reader that provides a tgz of the requested source revision. 39 // repo is go.googlesource.com repo ("go", "net", and so on). 40 // rev is git revision. 41 // 42 // An error of type TooBigError is returned if the compressed tarball exceeds a size that 43 // on 2021-11-22 was deemed to be enough to meet expected legitimate future needs for a while. 44 // See golang.org/issue/46379. 45 func GetSourceTgz(sl spanlog.Logger, repo, rev string) (tgz io.Reader, err error) { 46 sp := sl.CreateSpan("get_source", repo+"@"+rev) 47 defer func() { sp.Done(err) }() 48 49 key := fmt.Sprintf("%v-%v", repo, rev) 50 v, err, _ := sourceGroup.Do(key, func() (interface{}, error) { 51 if src, ok := sourceCache.Get(key); ok { 52 return src, nil 53 } 54 55 if gitMirrorClient != nil { 56 sp := sl.CreateSpan("get_source_from_gitmirror") 57 src, err := getSourceTgzFromGitMirror(repo, rev) 58 if err == nil { 59 sourceCache.Add(key, src) 60 sp.Done(nil) 61 return src, nil 62 } 63 log.Printf("Error fetching source %s/%s from gitmirror (after %v uptime): %v", 64 repo, rev, time.Since(processStartTime), err) 65 sp.Done(errors.New("timeout")) 66 } 67 68 sp := sl.CreateSpan("get_source_from_gerrit", fmt.Sprintf("%v from gerrit", key)) 69 src, err := getSourceTgzFromGerrit(repo, rev) 70 sp.Done(err) 71 if err == nil { 72 sourceCache.Add(key, src) 73 } 74 return src, err 75 }) 76 if err != nil { 77 return nil, err 78 } 79 if v.(source).TooBig { 80 return nil, TooBigError{Repo: repo, Rev: rev, Limit: maxSize(repo)} 81 } 82 return bytes.NewReader(v.(source).Tgz), nil 83 } 84 85 // TooBigError is the error returned when the source revision is considered too big. 86 type TooBigError struct { 87 Repo string 88 Rev string 89 Limit int64 // Max size in bytes. 90 } 91 92 func (e TooBigError) Error() string { 93 return fmt.Sprintf("rejected because compressed tarball of repository go.googlesource.com/%s at revision %s exceeded a limit of %d MB; see golang.org/issue/46379", 94 e.Repo, e.Rev, e.Limit/1024/1024) 95 } 96 97 var gitMirrorClient *http.Client 98 99 // RegisterGitMirrorDial registers a dial function which will be used to reach gitmirror. 100 // If used, this function must be called before GetSourceTgz. 101 func RegisterGitMirrorDial(dial func(context.Context) (net.Conn, error)) { 102 gitMirrorClient = &http.Client{ 103 Timeout: 30 * time.Second, 104 Transport: &http.Transport{ 105 IdleConnTimeout: 30 * time.Second, 106 DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { 107 return dial(ctx) 108 }, 109 }, 110 } 111 } 112 113 var gerritHTTPClient = &http.Client{ 114 Timeout: 30 * time.Second, 115 } 116 117 func getSourceTgzFromGerrit(repo, rev string) (source, error) { 118 return getSourceTgzFromURL(gerritHTTPClient, "gerrit", repo, rev, "https://go.googlesource.com/"+repo+"/+archive/"+rev+".tar.gz") 119 } 120 121 func getSourceTgzFromGitMirror(repo, rev string) (src source, err error) { 122 for i := 0; i < 2; i++ { // two tries; different pods maybe? 123 if i > 0 { 124 time.Sleep(1 * time.Second) 125 } 126 // The "gitmirror" hostname is unused: 127 src, err = getSourceTgzFromURL(gitMirrorClient, "gitmirror", repo, rev, "http://gitmirror/"+repo+".tar.gz?rev="+rev) 128 if err == nil { 129 return src, nil 130 } 131 if tr, ok := http.DefaultTransport.(*http.Transport); ok { 132 tr.CloseIdleConnections() 133 } 134 } 135 return source{}, err 136 } 137 138 // getSourceTgzFromURL fetches a source tarball from url. 139 // If url serves more than maxSize bytes, it stops short. 140 func getSourceTgzFromURL(hc *http.Client, service, repo, rev, url string) (source, error) { 141 res, err := hc.Get(url) 142 if err != nil { 143 return source{}, fmt.Errorf("fetching %s/%s from %s: %v", repo, rev, service, err) 144 } 145 defer res.Body.Close() 146 if res.StatusCode/100 != 2 { 147 slurp, _ := io.ReadAll(io.LimitReader(res.Body, 4<<10)) 148 return source{}, fmt.Errorf("fetching %s/%s from %s: %v; body: %s", repo, rev, service, res.Status, slurp) 149 } 150 // See golang.org/issue/11224 for a discussion on tree filtering. 151 b, err := io.ReadAll(io.LimitReader(res.Body, maxSize(repo)+1)) 152 if int64(len(b)) > maxSize(repo) && err == nil { 153 return source{TooBig: true}, nil 154 } 155 if err != nil { 156 return source{}, fmt.Errorf("reading %s/%s from %s: %v", repo, rev, service, err) 157 } 158 return source{Tgz: b}, nil 159 } 160 161 // maxSize controls artificial limits on how big of a compressed source tarball 162 // this package is willing to accept. It's expected humans may need to manage 163 // these limits every couple of years for the evolving needs of the Go project, 164 // and ideally not much more often. 165 // 166 // repo is a go.googlesource.com repo ("go", "net", and so on). 167 func maxSize(repo string) int64 { 168 switch repo { 169 default: 170 // As of 2021-11-22, a compressed tarball of Go source is 23 MB, 171 // x/net is 1.2 MB, 172 // x/build is 1.1 MB, 173 // x/tools is 2.9 MB. 174 return 100 << 20 175 case "website": 176 // In 2021, all content in x/blog (52 MB) and x/talks (74 MB) moved 177 // to x/website. This makes x/website an outlier, with a compressed 178 // tarball size of 135 MB. Give it some room to grow from there. 179 return 200 << 20 180 } 181 }