github.com/sirkon/goproxy@v1.4.8/internal/modfetch/codehost/codehost.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package codehost defines the interface implemented by a code hosting source, 6 // along with support code for use by implementations. 7 package codehost 8 9 import ( 10 "bytes" 11 "crypto/sha256" 12 "fmt" 13 "io" 14 "io/ioutil" 15 "os" 16 "os/exec" 17 "path/filepath" 18 "strings" 19 "sync" 20 "time" 21 22 "github.com/sirkon/goproxy/internal/cfg" 23 "github.com/sirkon/goproxy/internal/str" 24 ) 25 26 // Downloaded size limits. 27 const ( 28 MaxGoMod = 16 << 20 // maximum size of go.mod file 29 MaxLICENSE = 16 << 20 // maximum size of LICENSE file 30 MaxZipFile = 500 << 20 // maximum size of downloaded zip file 31 ) 32 33 // A Repo represents a code hosting source. 34 // Typical implementations include local version control repositories, 35 // remote version control servers, and code hosting sites. 36 // A Repo must be safe for simultaneous use by multiple goroutines. 37 type Repo interface { 38 // List lists all tags with the given prefix. 39 Tags(prefix string) (tags []string, err error) 40 41 // Stat returns information about the revision rev. 42 // A revision can be any identifier known to the underlying service: 43 // commit hash, branch, tag, and so on. 44 Stat(rev string) (*RevInfo, error) 45 46 // Latest returns the latest revision on the default branch, 47 // whatever that means in the underlying implementation. 48 Latest() (*RevInfo, error) 49 50 // ReadFile reads the given file in the file tree corresponding to revision rev. 51 // It should refuse to read more than maxSize bytes. 52 // 53 // If the requested file does not exist it should return an error for which 54 // os.IsNotExist(err) returns true. 55 ReadFile(rev, file string, maxSize int64) (data []byte, err error) 56 57 // ReadFileRevs reads a single file at multiple versions. 58 // It should refuse to read more than maxSize bytes. 59 // The result is a map from each requested rev strings 60 // to the associated FileRev. The map must have a non-nil 61 // entry for every requested rev (unless ReadFileRevs returned an error). 62 // A file simply being missing or even corrupted in revs[i] 63 // should be reported only in files[revs[i]].Err, not in the error result 64 // from ReadFileRevs. 65 // The overall call should return an error (and no map) only 66 // in the case of a problem with obtaining the data, such as 67 // a network failure. 68 // Implementations may assume that revs only contain tags, 69 // not direct commit hashes. 70 ReadFileRevs(revs []string, file string, maxSize int64) (files map[string]*FileRev, err error) 71 72 // ReadZip downloads a zip file for the subdir subdirectory 73 // of the given revision to a new file in a given temporary directory. 74 // It should refuse to read more than maxSize bytes. 75 // It returns a ReadCloser for a streamed copy of the zip file, 76 // along with the actual subdirectory (possibly shorter than subdir) 77 // contained in the zip file. All files in the zip file are expected to be 78 // nested in a single top-level directory, whose name is not specified. 79 ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, actualSubdir string, err error) 80 81 // RecentTag returns the most recent tag at or before the given rev 82 // with the given prefix. It should make a best-effort attempt to 83 // find a tag that is a valid semantic version (following the prefix), 84 // or else the result is not useful to the caller, but it need not 85 // incur great expense in doing so. For example, the git implementation 86 // of RecentTag limits git's search to tags matching the glob expression 87 // "v[0-9]*.[0-9]*.[0-9]*" (after the prefix). 88 RecentTag(rev, prefix string) (tag string, err error) 89 } 90 91 // A Rev describes a single revision in a source code repository. 92 type RevInfo struct { 93 Name string // complete ID in underlying repository 94 Short string // shortened ID, for use in pseudo-version 95 Version string // version used in lookup 96 Time time.Time // commit time 97 Tags []string // known tags for commit 98 } 99 100 // A FileRev describes the result of reading a file at a given revision. 101 type FileRev struct { 102 Rev string // requested revision 103 Data []byte // file data 104 Err error // error if any; os.IsNotExist(Err)==true if rev exists but file does not exist in that rev 105 } 106 107 // AllHex reports whether the revision rev is entirely lower-case hexadecimal digits. 108 func AllHex(rev string) bool { 109 for i := 0; i < len(rev); i++ { 110 c := rev[i] 111 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' { 112 continue 113 } 114 return false 115 } 116 return true 117 } 118 119 // ShortenSHA1 shortens a SHA1 hash (40 hex digits) to the canonical length 120 // used in pseudo-versions (12 hex digits). 121 func ShortenSHA1(rev string) string { 122 if AllHex(rev) && len(rev) == 40 { 123 return rev[:12] 124 } 125 return rev 126 } 127 128 // WorkRoot is the root of the cached work directory. 129 // It is set by github.com/sirkon/goproxy/internal/modload.InitMod. 130 var WorkRoot string 131 132 // WorkDir returns the name of the cached work directory to use for the 133 // given repository type and name. 134 func WorkDir(typ, name string) (string, error) { 135 if WorkRoot == "" { 136 return "", fmt.Errorf("codehost.WorkRoot not set") 137 } 138 139 // We name the work directory for the SHA256 hash of the type and name. 140 // We intentionally avoid the actual name both because of possible 141 // conflicts with valid file system paths and because we want to ensure 142 // that one checkout is never nested inside another. That nesting has 143 // led to security problems in the past. 144 if strings.Contains(typ, ":") { 145 return "", fmt.Errorf("codehost.WorkDir: type cannot contain colon") 146 } 147 key := typ + ":" + name 148 dir := filepath.Join(WorkRoot, fmt.Sprintf("%x", sha256.Sum256([]byte(key)))) 149 data, err := ioutil.ReadFile(dir + ".info") 150 info, err2 := os.Stat(dir) 151 if err == nil && err2 == nil && info.IsDir() { 152 // Info file and directory both already exist: reuse. 153 have := strings.TrimSuffix(string(data), "\n") 154 if have != key { 155 return "", fmt.Errorf("%s exists with wrong content (have %q want %q)", dir+".info", have, key) 156 } 157 if cfg.BuildX { 158 fmt.Fprintf(os.Stderr, "# %s for %s %s\n", dir, typ, name) 159 } 160 return dir, nil 161 } 162 163 // Info file or directory missing. Start from scratch. 164 if cfg.BuildX { 165 fmt.Fprintf(os.Stderr, "mkdir -p %s # %s %s\n", dir, typ, name) 166 } 167 os.RemoveAll(dir) 168 if err := os.MkdirAll(dir, 0777); err != nil { 169 return "", err 170 } 171 if err := ioutil.WriteFile(dir+".info", []byte(key), 0666); err != nil { 172 os.RemoveAll(dir) 173 return "", err 174 } 175 return dir, nil 176 } 177 178 type RunError struct { 179 Cmd string 180 Err error 181 Stderr []byte 182 } 183 184 func (e *RunError) Error() string { 185 text := e.Cmd + ": " + e.Err.Error() 186 stderr := bytes.TrimRight(e.Stderr, "\n") 187 if len(stderr) > 0 { 188 text += ":\n\t" + strings.Replace(string(stderr), "\n", "\n\t", -1) 189 } 190 return text 191 } 192 193 var dirLock sync.Map 194 195 // Run runs the command line in the given directory 196 // (an empty dir means the current directory). 197 // It returns the standard output and, for a non-zero exit, 198 // a *RunError indicating the command, exit status, and standard error. 199 // Standard error is unavailable for commands that exit successfully. 200 func Run(dir string, cmdline ...interface{}) ([]byte, error) { 201 return RunWithStdin(dir, nil, cmdline...) 202 } 203 204 // bashQuoter escapes characters that have special meaning in double-quoted strings in the bash shell. 205 // See https://www.gnu.org/software/bash/manual/html_node/Double-Quotes.html. 206 var bashQuoter = strings.NewReplacer(`"`, `\"`, `$`, `\$`, "`", "\\`", `\`, `\\`) 207 208 func RunWithStdin(dir string, stdin io.Reader, cmdline ...interface{}) ([]byte, error) { 209 if dir != "" { 210 muIface, ok := dirLock.Load(dir) 211 if !ok { 212 muIface, _ = dirLock.LoadOrStore(dir, new(sync.Mutex)) 213 } 214 mu := muIface.(*sync.Mutex) 215 mu.Lock() 216 defer mu.Unlock() 217 } 218 219 cmd := str.StringList(cmdline...) 220 if cfg.BuildX { 221 text := new(strings.Builder) 222 if dir != "" { 223 text.WriteString("cd ") 224 text.WriteString(dir) 225 text.WriteString("; ") 226 } 227 for i, arg := range cmd { 228 if i > 0 { 229 text.WriteByte(' ') 230 } 231 switch { 232 case strings.ContainsAny(arg, "'"): 233 // Quote args that could be mistaken for quoted args. 234 text.WriteByte('"') 235 text.WriteString(bashQuoter.Replace(arg)) 236 text.WriteByte('"') 237 case strings.ContainsAny(arg, "$`\\*?[\"\t\n\v\f\r \u0085\u00a0"): 238 // Quote args that contain special characters, glob patterns, or spaces. 239 text.WriteByte('\'') 240 text.WriteString(arg) 241 text.WriteByte('\'') 242 default: 243 text.WriteString(arg) 244 } 245 } 246 fmt.Fprintf(os.Stderr, "%s\n", text) 247 start := time.Now() 248 defer func() { 249 fmt.Fprintf(os.Stderr, "%.3fs # %s\n", time.Since(start).Seconds(), text) 250 }() 251 } 252 // TODO: Impose limits on command output size. 253 // TODO: Set environment to get English error messages. 254 var stderr bytes.Buffer 255 var stdout bytes.Buffer 256 c := exec.Command(cmd[0], cmd[1:]...) 257 c.Dir = dir 258 c.Stdin = stdin 259 c.Stderr = &stderr 260 c.Stdout = &stdout 261 err := c.Run() 262 if err != nil { 263 err = &RunError{Cmd: strings.Join(cmd, " ") + " in " + dir, Stderr: stderr.Bytes(), Err: err} 264 } 265 return stdout.Bytes(), err 266 }