github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/go/modfetch/codehost/codehost.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package codehost defines the interface implemented by a code hosting source, 6 // along with support code for use by implementations. 7 package codehost 8 9 import ( 10 "bytes" 11 "context" 12 "crypto/sha256" 13 "fmt" 14 "io" 15 "io/fs" 16 "os" 17 "os/exec" 18 "path/filepath" 19 "strings" 20 "sync" 21 "time" 22 23 "github.com/go-asm/go/cmd/go/cfg" 24 "github.com/go-asm/go/cmd/go/lockedfile" 25 "github.com/go-asm/go/cmd/go/str" 26 27 "golang.org/x/mod/module" 28 "golang.org/x/mod/semver" 29 ) 30 31 // Downloaded size limits. 32 const ( 33 MaxGoMod = 16 << 20 // maximum size of go.mod file 34 MaxLICENSE = 16 << 20 // maximum size of LICENSE file 35 MaxZipFile = 500 << 20 // maximum size of downloaded zip file 36 ) 37 38 // A Repo represents a code hosting source. 39 // Typical implementations include local version control repositories, 40 // remote version control servers, and code hosting sites. 41 // 42 // A Repo must be safe for simultaneous use by multiple goroutines, 43 // and callers must not modify returned values, which may be cached and shared. 44 type Repo interface { 45 // CheckReuse checks whether the old origin information 46 // remains up to date. If so, whatever cached object it was 47 // taken from can be reused. 48 // The subdir gives subdirectory name where the module root is expected to be found, 49 // "" for the root or "sub/dir" for a subdirectory (no trailing slash). 50 CheckReuse(ctx context.Context, old *Origin, subdir string) error 51 52 // List lists all tags with the given prefix. 53 Tags(ctx context.Context, prefix string) (*Tags, error) 54 55 // Stat returns information about the revision rev. 56 // A revision can be any identifier known to the underlying service: 57 // commit hash, branch, tag, and so on. 58 Stat(ctx context.Context, rev string) (*RevInfo, error) 59 60 // Latest returns the latest revision on the default branch, 61 // whatever that means in the underlying implementation. 62 Latest(ctx context.Context) (*RevInfo, error) 63 64 // ReadFile reads the given file in the file tree corresponding to revision rev. 65 // It should refuse to read more than maxSize bytes. 66 // 67 // If the requested file does not exist it should return an error for which 68 // os.IsNotExist(err) returns true. 69 ReadFile(ctx context.Context, rev, file string, maxSize int64) (data []byte, err error) 70 71 // ReadZip downloads a zip file for the subdir subdirectory 72 // of the given revision to a new file in a given temporary directory. 73 // It should refuse to read more than maxSize bytes. 74 // It returns a ReadCloser for a streamed copy of the zip file. 75 // All files in the zip file are expected to be 76 // nested in a single top-level directory, whose name is not specified. 77 ReadZip(ctx context.Context, rev, subdir string, maxSize int64) (zip io.ReadCloser, err error) 78 79 // RecentTag returns the most recent tag on rev or one of its predecessors 80 // with the given prefix. allowed may be used to filter out unwanted versions. 81 RecentTag(ctx context.Context, rev, prefix string, allowed func(tag string) bool) (tag string, err error) 82 83 // DescendsFrom reports whether rev or any of its ancestors has the given tag. 84 // 85 // DescendsFrom must return true for any tag returned by RecentTag for the 86 // same revision. 87 DescendsFrom(ctx context.Context, rev, tag string) (bool, error) 88 } 89 90 // An Origin describes the provenance of a given repo method result. 91 // It can be passed to CheckReuse (usually in a different go command invocation) 92 // to see whether the result remains up-to-date. 93 type Origin struct { 94 VCS string `json:",omitempty"` // "git" etc 95 URL string `json:",omitempty"` // URL of repository 96 Subdir string `json:",omitempty"` // subdirectory in repo 97 98 Hash string `json:",omitempty"` // commit hash or ID 99 100 // If TagSum is non-empty, then the resolution of this module version 101 // depends on the set of tags present in the repo, specifically the tags 102 // of the form TagPrefix + a valid semver version. 103 // If the matching repo tags and their commit hashes still hash to TagSum, 104 // the Origin is still valid (at least as far as the tags are concerned). 105 // The exact checksum is up to the Repo implementation; see (*gitRepo).Tags. 106 TagPrefix string `json:",omitempty"` 107 TagSum string `json:",omitempty"` 108 109 // If Ref is non-empty, then the resolution of this module version 110 // depends on Ref resolving to the revision identified by Hash. 111 // If Ref still resolves to Hash, the Origin is still valid (at least as far as Ref is concerned). 112 // For Git, the Ref is a full ref like "refs/heads/main" or "refs/tags/v1.2.3", 113 // and the Hash is the Git object hash the ref maps to. 114 // Other VCS might choose differently, but the idea is that Ref is the name 115 // with a mutable meaning while Hash is a name with an immutable meaning. 116 Ref string `json:",omitempty"` 117 118 // If RepoSum is non-empty, then the resolution of this module version 119 // failed due to the repo being available but the version not being present. 120 // This depends on the entire state of the repo, which RepoSum summarizes. 121 // For Git, this is a hash of all the refs and their hashes. 122 RepoSum string `json:",omitempty"` 123 } 124 125 // A Tags describes the available tags in a code repository. 126 type Tags struct { 127 Origin *Origin 128 List []Tag 129 } 130 131 // A Tag describes a single tag in a code repository. 132 type Tag struct { 133 Name string 134 Hash string // content hash identifying tag's content, if available 135 } 136 137 // isOriginTag reports whether tag should be preserved 138 // in the Tags method's Origin calculation. 139 // We can safely ignore tags that are not look like pseudo-versions, 140 // because ../coderepo.go's (*codeRepo).Versions ignores them too. 141 // We can also ignore non-semver tags, but we have to include semver 142 // tags with extra suffixes, because the pseudo-version base finder uses them. 143 func isOriginTag(tag string) bool { 144 // modfetch.(*codeRepo).Versions uses Canonical == tag, 145 // but pseudo-version calculation has a weaker condition that 146 // the canonical is a prefix of the tag. 147 // Include those too, so that if any new one appears, we'll invalidate the cache entry. 148 // This will lead to spurious invalidation of version list results, 149 // but tags of this form being created should be fairly rare 150 // (and invalidate pseudo-version results anyway). 151 c := semver.Canonical(tag) 152 return c != "" && strings.HasPrefix(tag, c) && !module.IsPseudoVersion(tag) 153 } 154 155 // A RevInfo describes a single revision in a source code repository. 156 type RevInfo struct { 157 Origin *Origin 158 Name string // complete ID in underlying repository 159 Short string // shortened ID, for use in pseudo-version 160 Version string // version used in lookup 161 Time time.Time // commit time 162 Tags []string // known tags for commit 163 } 164 165 // UnknownRevisionError is an error equivalent to fs.ErrNotExist, but for a 166 // revision rather than a file. 167 type UnknownRevisionError struct { 168 Rev string 169 } 170 171 func (e *UnknownRevisionError) Error() string { 172 return "unknown revision " + e.Rev 173 } 174 func (UnknownRevisionError) Is(err error) bool { 175 return err == fs.ErrNotExist 176 } 177 178 // ErrNoCommits is an error equivalent to fs.ErrNotExist indicating that a given 179 // repository or module contains no commits. 180 var ErrNoCommits error = noCommitsError{} 181 182 type noCommitsError struct{} 183 184 func (noCommitsError) Error() string { 185 return "no commits" 186 } 187 func (noCommitsError) Is(err error) bool { 188 return err == fs.ErrNotExist 189 } 190 191 // AllHex reports whether the revision rev is entirely lower-case hexadecimal digits. 192 func AllHex(rev string) bool { 193 for i := 0; i < len(rev); i++ { 194 c := rev[i] 195 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' { 196 continue 197 } 198 return false 199 } 200 return true 201 } 202 203 // ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length 204 // used in pseudo-versions (12 hex digits). 205 func ShortenSHA1(rev string) string { 206 if AllHex(rev) && len(rev) == 40 { 207 return rev[:12] 208 } 209 return rev 210 } 211 212 // WorkDir returns the name of the cached work directory to use for the 213 // given repository type and name. 214 func WorkDir(ctx context.Context, typ, name string) (dir, lockfile string, err error) { 215 if cfg.GOMODCACHE == "" { 216 return "", "", fmt.Errorf("neither GOPATH nor GOMODCACHE are set") 217 } 218 219 // We name the work directory for the SHA256 hash of the type and name. 220 // We intentionally avoid the actual name both because of possible 221 // conflicts with valid file system paths and because we want to ensure 222 // that one checkout is never nested inside another. That nesting has 223 // led to security problems in the past. 224 if strings.Contains(typ, ":") { 225 return "", "", fmt.Errorf("codehost.WorkDir: type cannot contain colon") 226 } 227 key := typ + ":" + name 228 dir = filepath.Join(cfg.GOMODCACHE, "cache/vcs", fmt.Sprintf("%x", sha256.Sum256([]byte(key)))) 229 230 xLog, buildX := cfg.BuildXWriter(ctx) 231 if buildX { 232 fmt.Fprintf(xLog, "mkdir -p %s # %s %s\n", filepath.Dir(dir), typ, name) 233 } 234 if err := os.MkdirAll(filepath.Dir(dir), 0777); err != nil { 235 return "", "", err 236 } 237 238 lockfile = dir + ".lock" 239 if buildX { 240 fmt.Fprintf(xLog, "# lock %s\n", lockfile) 241 } 242 243 unlock, err := lockedfile.MutexAt(lockfile).Lock() 244 if err != nil { 245 return "", "", fmt.Errorf("codehost.WorkDir: can't find or create lock file: %v", err) 246 } 247 defer unlock() 248 249 data, err := os.ReadFile(dir + ".info") 250 info, err2 := os.Stat(dir) 251 if err == nil && err2 == nil && info.IsDir() { 252 // Info file and directory both already exist: reuse. 253 have := strings.TrimSuffix(string(data), "\n") 254 if have != key { 255 return "", "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key) 256 } 257 if buildX { 258 fmt.Fprintf(xLog, "# %s for %s %s\n", dir, typ, name) 259 } 260 return dir, lockfile, nil 261 } 262 263 // Info file or directory missing. Start from scratch. 264 if xLog != nil { 265 fmt.Fprintf(xLog, "mkdir -p %s # %s %s\n", dir, typ, name) 266 } 267 os.RemoveAll(dir) 268 if err := os.MkdirAll(dir, 0777); err != nil { 269 return "", "", err 270 } 271 if err := os.WriteFile(dir+".info", []byte(key), 0666); err != nil { 272 os.RemoveAll(dir) 273 return "", "", err 274 } 275 return dir, lockfile, nil 276 } 277 278 type RunError struct { 279 Cmd string 280 Err error 281 Stderr []byte 282 HelpText string 283 } 284 285 func (e *RunError) Error() string { 286 text := e.Cmd + ": " + e.Err.Error() 287 stderr := bytes.TrimRight(e.Stderr, "\n") 288 if len(stderr) > 0 { 289 text += ":\n\t" + strings.ReplaceAll(string(stderr), "\n", "\n\t") 290 } 291 if len(e.HelpText) > 0 { 292 text += "\n" + e.HelpText 293 } 294 return text 295 } 296 297 var dirLock sync.Map 298 299 // Run runs the command line in the given directory 300 // (an empty dir means the current directory). 301 // It returns the standard output and, for a non-zero exit, 302 // a *RunError indicating the command, exit status, and standard error. 303 // Standard error is unavailable for commands that exit successfully. 304 func Run(ctx context.Context, dir string, cmdline ...any) ([]byte, error) { 305 return RunWithStdin(ctx, dir, nil, cmdline...) 306 } 307 308 // bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell. 309 // See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html. 310 var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`) 311 312 func RunWithStdin(ctx context.Context, dir string, stdin io.Reader, cmdline ...any) ([]byte, error) { 313 if dir != "" { 314 muIface, ok := dirLock.Load(dir) 315 if !ok { 316 muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex)) 317 } 318 mu := muIface.(*sync.Mutex) 319 mu.Lock() 320 defer mu.Unlock() 321 } 322 323 cmd := str.StringList(cmdline...) 324 if os.Getenv("TESTGOVCS") == "panic" { 325 panic(fmt.Sprintf("use of vcs: %v", cmd)) 326 } 327 if xLog, ok := cfg.BuildXWriter(ctx); ok { 328 text := new(strings.Builder) 329 if dir != "" { 330 text.WriteString("cd ") 331 text.WriteString(dir) 332 text.WriteString("; ") 333 } 334 for i, arg := range cmd { 335 if i > 0 { 336 text.WriteByte(' ') 337 } 338 switch { 339 case strings.ContainsAny(arg, "'"): 340 // Quote args that could be mistaken for quoted args. 341 text.WriteByte('"') 342 text.WriteString(bashQuoter.Replace(arg)) 343 text.WriteByte('"') 344 case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"): 345 // Quote args that contain special characters, glob patterns, or spaces. 346 text.WriteByte('\'') 347 text.WriteString(arg) 348 text.WriteByte('\'') 349 default: 350 text.WriteString(arg) 351 } 352 } 353 fmt.Fprintf(xLog, "%s\n", text) 354 start := time.Now() 355 defer func() { 356 fmt.Fprintf(xLog, "%.3fs # %s\n", time.Since(start).Seconds(), text) 357 }() 358 } 359 // TODO: Impose limits on command output size. 360 // TODO: Set environment to get English error messages. 361 var stderr bytes.Buffer 362 var stdout bytes.Buffer 363 c := exec.CommandContext(ctx, cmd[0], cmd[1:]...) 364 c.Cancel = func() error { return c.Process.Signal(os.Interrupt) } 365 c.Dir = dir 366 c.Stdin = stdin 367 c.Stderr = &stderr 368 c.Stdout = &stdout 369 // For Git commands, manually supply GIT_DIR so Git works with safe.bareRepository=explicit set. Noop for other commands. 370 c.Env = append(c.Environ(), "GIT_DIR="+dir) 371 err := c.Run() 372 if err != nil { 373 err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err} 374 } 375 return stdout.Bytes(), err 376 }