github.com/gernest/nezuko@v0.1.2/internal/modfetch/codehost/vcs.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package codehost
     6  
     7  import (
     8  	"encoding/xml"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"os"
    13  	"path/filepath"
    14  	"regexp"
    15  	"sort"
    16  	"strconv"
    17  	"strings"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/gernest/nezuko/internal/lockedfile"
    22  	"github.com/gernest/nezuko/internal/par"
    23  	"github.com/gernest/nezuko/internal/str"
    24  )
    25  
    26  // A VCSError indicates an error using a version control system.
    27  // The implication of a VCSError is that we know definitively where
    28  // to get the code, but we can't access it due to the error.
    29  // The caller should report this error instead of continuing to probe
    30  // other possible module paths.
    31  //
    32  // TODO(bcmills): See if we can invert this. (Return a distinguished error for
    33  // “repo not found” and treat everything else as terminal.)
    34  type VCSError struct {
    35  	Err error
    36  }
    37  
    38  func (e *VCSError) Error() string { return e.Err.Error() }
    39  
    40  func vcsErrorf(format string, a ...interface{}) error {
    41  	return &VCSError{Err: fmt.Errorf(format, a...)}
    42  }
    43  
    44  func NewRepo(vcs, remote string) (Repo, error) {
    45  	type key struct {
    46  		vcs    string
    47  		remote string
    48  	}
    49  	type cached struct {
    50  		repo Repo
    51  		err  error
    52  	}
    53  	c := vcsRepoCache.Do(key{vcs, remote}, func() interface{} {
    54  		repo, err := newVCSRepo(vcs, remote)
    55  		if err != nil {
    56  			err = &VCSError{err}
    57  		}
    58  		return cached{repo, err}
    59  	}).(cached)
    60  
    61  	return c.repo, c.err
    62  }
    63  
    64  var vcsRepoCache par.Cache
    65  
    66  type vcsRepo struct {
    67  	mu lockedfile.Mutex // protects all commands, so we don't have to decide which are safe on a per-VCS basis
    68  
    69  	remote string
    70  	cmd    *vcsCmd
    71  	dir    string
    72  
    73  	tagsOnce sync.Once
    74  	tags     map[string]bool
    75  
    76  	branchesOnce sync.Once
    77  	branches     map[string]bool
    78  
    79  	fetchOnce sync.Once
    80  	fetchErr  error
    81  }
    82  
    83  func newVCSRepo(vcs, remote string) (Repo, error) {
    84  	if vcs == "git" {
    85  		return newGitRepo(remote, false)
    86  	}
    87  	cmd := vcsCmds[vcs]
    88  	if cmd == nil {
    89  		return nil, fmt.Errorf("unknown vcs: %s %s", vcs, remote)
    90  	}
    91  	if !strings.Contains(remote, "://") {
    92  		return nil, fmt.Errorf("invalid vcs remote: %s %s", vcs, remote)
    93  	}
    94  
    95  	r := &vcsRepo{remote: remote, cmd: cmd}
    96  	var err error
    97  	r.dir, r.mu.Path, err = WorkDir(vcsWorkDirType+vcs, r.remote)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	if cmd.init == nil {
   103  		return r, nil
   104  	}
   105  
   106  	unlock, err := r.mu.Lock()
   107  	if err != nil {
   108  		return nil, err
   109  	}
   110  	defer unlock()
   111  
   112  	if _, err := os.Stat(filepath.Join(r.dir, "."+vcs)); err != nil {
   113  		if _, err := Run(r.dir, cmd.init(r.remote)); err != nil {
   114  			os.RemoveAll(r.dir)
   115  			return nil, err
   116  		}
   117  	}
   118  	return r, nil
   119  }
   120  
   121  const vcsWorkDirType = "vcs1."
   122  
   123  type vcsCmd struct {
   124  	vcs           string                                            // vcs name "hg"
   125  	init          func(remote string) []string                      // cmd to init repo to track remote
   126  	tags          func(remote string) []string                      // cmd to list local tags
   127  	tagRE         *regexp.Regexp                                    // regexp to extract tag names from output of tags cmd
   128  	branches      func(remote string) []string                      // cmd to list local branches
   129  	branchRE      *regexp.Regexp                                    // regexp to extract branch names from output of tags cmd
   130  	badLocalRevRE *regexp.Regexp                                    // regexp of names that must not be served out of local cache without doing fetch first
   131  	statLocal     func(rev, remote string) []string                 // cmd to stat local rev
   132  	parseStat     func(rev, out string) (*RevInfo, error)           // cmd to parse output of statLocal
   133  	fetch         []string                                          // cmd to fetch everything from remote
   134  	latest        string                                            // name of latest commit on remote (tip, HEAD, etc)
   135  	readFile      func(rev, file, remote string) []string           // cmd to read rev's file
   136  	readZip       func(rev, subdir, remote, target string) []string // cmd to read rev's subdir as zip file
   137  }
   138  
   139  var re = regexp.MustCompile
   140  
   141  var vcsCmds = map[string]*vcsCmd{
   142  	"hg": {
   143  		vcs: "hg",
   144  		init: func(remote string) []string {
   145  			return []string{"hg", "clone", "-U", remote, "."}
   146  		},
   147  		tags: func(remote string) []string {
   148  			return []string{"hg", "tags", "-q"}
   149  		},
   150  		tagRE: re(`(?m)^[^\n]+$`),
   151  		branches: func(remote string) []string {
   152  			return []string{"hg", "branches", "-c", "-q"}
   153  		},
   154  		branchRE:      re(`(?m)^[^\n]+$`),
   155  		badLocalRevRE: re(`(?m)^(tip)$`),
   156  		statLocal: func(rev, remote string) []string {
   157  			return []string{"hg", "log", "-l1", "-r", rev, "--template", "{node} {date|hgdate} {tags}"}
   158  		},
   159  		parseStat: hgParseStat,
   160  		fetch:     []string{"hg", "pull", "-f"},
   161  		latest:    "tip",
   162  		readFile: func(rev, file, remote string) []string {
   163  			return []string{"hg", "cat", "-r", rev, file}
   164  		},
   165  		readZip: func(rev, subdir, remote, target string) []string {
   166  			pattern := []string{}
   167  			if subdir != "" {
   168  				pattern = []string{"-I", subdir + "/**"}
   169  			}
   170  			return str.StringList("hg", "archive", "-t", "zip", "--no-decode", "-r", rev, "--prefix=prefix/", pattern, target)
   171  		},
   172  	},
   173  
   174  	"svn": {
   175  		vcs:  "svn",
   176  		init: nil, // no local checkout
   177  		tags: func(remote string) []string {
   178  			return []string{"svn", "list", strings.TrimSuffix(remote, "/trunk") + "/tags"}
   179  		},
   180  		tagRE: re(`(?m)^(.*?)/?$`),
   181  		statLocal: func(rev, remote string) []string {
   182  			suffix := "@" + rev
   183  			if rev == "latest" {
   184  				suffix = ""
   185  			}
   186  			return []string{"svn", "log", "-l1", "--xml", remote + suffix}
   187  		},
   188  		parseStat: svnParseStat,
   189  		latest:    "latest",
   190  		readFile: func(rev, file, remote string) []string {
   191  			return []string{"svn", "cat", remote + "/" + file + "@" + rev}
   192  		},
   193  		// TODO: zip
   194  	},
   195  
   196  	"bzr": {
   197  		vcs: "bzr",
   198  		init: func(remote string) []string {
   199  			return []string{"bzr", "branch", "--use-existing-dir", remote, "."}
   200  		},
   201  		fetch: []string{
   202  			"bzr", "pull", "--overwrite-tags",
   203  		},
   204  		tags: func(remote string) []string {
   205  			return []string{"bzr", "tags"}
   206  		},
   207  		tagRE:         re(`(?m)^\S+`),
   208  		badLocalRevRE: re(`^revno:-`),
   209  		statLocal: func(rev, remote string) []string {
   210  			return []string{"bzr", "log", "-l1", "--long", "--show-ids", "-r", rev}
   211  		},
   212  		parseStat: bzrParseStat,
   213  		latest:    "revno:-1",
   214  		readFile: func(rev, file, remote string) []string {
   215  			return []string{"bzr", "cat", "-r", rev, file}
   216  		},
   217  		readZip: func(rev, subdir, remote, target string) []string {
   218  			extra := []string{}
   219  			if subdir != "" {
   220  				extra = []string{"./" + subdir}
   221  			}
   222  			return str.StringList("bzr", "export", "--format=zip", "-r", rev, "--root=prefix/", target, extra)
   223  		},
   224  	},
   225  
   226  	"fossil": {
   227  		vcs: "fossil",
   228  		init: func(remote string) []string {
   229  			return []string{"fossil", "clone", remote, ".fossil"}
   230  		},
   231  		fetch: []string{"fossil", "pull", "-R", ".fossil"},
   232  		tags: func(remote string) []string {
   233  			return []string{"fossil", "tag", "-R", ".fossil", "list"}
   234  		},
   235  		tagRE: re(`XXXTODO`),
   236  		statLocal: func(rev, remote string) []string {
   237  			return []string{"fossil", "info", "-R", ".fossil", rev}
   238  		},
   239  		parseStat: fossilParseStat,
   240  		latest:    "trunk",
   241  		readFile: func(rev, file, remote string) []string {
   242  			return []string{"fossil", "cat", "-R", ".fossil", "-r", rev, file}
   243  		},
   244  		readZip: func(rev, subdir, remote, target string) []string {
   245  			extra := []string{}
   246  			if subdir != "" && !strings.ContainsAny(subdir, "*?[],") {
   247  				extra = []string{"--include", subdir}
   248  			}
   249  			// Note that vcsRepo.ReadZip below rewrites this command
   250  			// to run in a different directory, to work around a fossil bug.
   251  			return str.StringList("fossil", "zip", "-R", ".fossil", "--name", "prefix", extra, rev, target)
   252  		},
   253  	},
   254  }
   255  
   256  func (r *vcsRepo) loadTags() {
   257  	out, err := Run(r.dir, r.cmd.tags(r.remote))
   258  	if err != nil {
   259  		return
   260  	}
   261  
   262  	// Run tag-listing command and extract tags.
   263  	r.tags = make(map[string]bool)
   264  	for _, tag := range r.cmd.tagRE.FindAllString(string(out), -1) {
   265  		if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(tag) {
   266  			continue
   267  		}
   268  		r.tags[tag] = true
   269  	}
   270  }
   271  
   272  func (r *vcsRepo) loadBranches() {
   273  	if r.cmd.branches == nil {
   274  		return
   275  	}
   276  
   277  	out, err := Run(r.dir, r.cmd.branches(r.remote))
   278  	if err != nil {
   279  		return
   280  	}
   281  
   282  	r.branches = make(map[string]bool)
   283  	for _, branch := range r.cmd.branchRE.FindAllString(string(out), -1) {
   284  		if r.cmd.badLocalRevRE != nil && r.cmd.badLocalRevRE.MatchString(branch) {
   285  			continue
   286  		}
   287  		r.branches[branch] = true
   288  	}
   289  }
   290  
   291  func (r *vcsRepo) Tags(prefix string) ([]string, error) {
   292  	unlock, err := r.mu.Lock()
   293  	if err != nil {
   294  		return nil, err
   295  	}
   296  	defer unlock()
   297  
   298  	r.tagsOnce.Do(r.loadTags)
   299  
   300  	tags := []string{}
   301  	for tag := range r.tags {
   302  		if strings.HasPrefix(tag, prefix) {
   303  			tags = append(tags, tag)
   304  		}
   305  	}
   306  	sort.Strings(tags)
   307  	return tags, nil
   308  }
   309  
   310  func (r *vcsRepo) Stat(rev string) (*RevInfo, error) {
   311  	unlock, err := r.mu.Lock()
   312  	if err != nil {
   313  		return nil, err
   314  	}
   315  	defer unlock()
   316  
   317  	if rev == "latest" {
   318  		rev = r.cmd.latest
   319  	}
   320  	r.branchesOnce.Do(r.loadBranches)
   321  	revOK := (r.cmd.badLocalRevRE == nil || !r.cmd.badLocalRevRE.MatchString(rev)) && !r.branches[rev]
   322  	if revOK {
   323  		if info, err := r.statLocal(rev); err == nil {
   324  			return info, nil
   325  		}
   326  	}
   327  
   328  	r.fetchOnce.Do(r.fetch)
   329  	if r.fetchErr != nil {
   330  		return nil, r.fetchErr
   331  	}
   332  	info, err := r.statLocal(rev)
   333  	if err != nil {
   334  		return nil, err
   335  	}
   336  	if !revOK {
   337  		info.Version = info.Name
   338  	}
   339  	return info, nil
   340  }
   341  
   342  func (r *vcsRepo) fetch() {
   343  	_, r.fetchErr = Run(r.dir, r.cmd.fetch)
   344  }
   345  
   346  func (r *vcsRepo) statLocal(rev string) (*RevInfo, error) {
   347  	out, err := Run(r.dir, r.cmd.statLocal(rev, r.remote))
   348  	if err != nil {
   349  		return nil, vcsErrorf("unknown revision %s", rev)
   350  	}
   351  	return r.cmd.parseStat(rev, string(out))
   352  }
   353  
   354  func (r *vcsRepo) Latest() (*RevInfo, error) {
   355  	return r.Stat("latest")
   356  }
   357  
   358  func (r *vcsRepo) ReadFile(rev, file string, maxSize int64) ([]byte, error) {
   359  	if rev == "latest" {
   360  		rev = r.cmd.latest
   361  	}
   362  	_, err := r.Stat(rev) // download rev into local repo
   363  	if err != nil {
   364  		return nil, err
   365  	}
   366  
   367  	// r.Stat acquires r.mu, so lock after that.
   368  	unlock, err := r.mu.Lock()
   369  	if err != nil {
   370  		return nil, err
   371  	}
   372  	defer unlock()
   373  
   374  	out, err := Run(r.dir, r.cmd.readFile(rev, file, r.remote))
   375  	if err != nil {
   376  		return nil, os.ErrNotExist
   377  	}
   378  	return out, nil
   379  }
   380  
   381  func (r *vcsRepo) ReadFileRevs(revs []string, file string, maxSize int64) (map[string]*FileRev, error) {
   382  	// We don't technically need to lock here since we're returning an error
   383  	// uncondititonally, but doing so anyway will help to avoid baking in
   384  	// lock-inversion bugs.
   385  	unlock, err := r.mu.Lock()
   386  	if err != nil {
   387  		return nil, err
   388  	}
   389  	defer unlock()
   390  
   391  	return nil, vcsErrorf("ReadFileRevs not implemented")
   392  }
   393  
   394  func (r *vcsRepo) RecentTag(rev, prefix string) (tag string, err error) {
   395  	// We don't technically need to lock here since we're returning an error
   396  	// uncondititonally, but doing so anyway will help to avoid baking in
   397  	// lock-inversion bugs.
   398  	unlock, err := r.mu.Lock()
   399  	if err != nil {
   400  		return "", err
   401  	}
   402  	defer unlock()
   403  
   404  	return "", vcsErrorf("RecentTag not implemented")
   405  }
   406  
   407  func (r *vcsRepo) ReadZip(rev, subdir string, maxSize int64) (zip io.ReadCloser, actualSubdir string, err error) {
   408  	if r.cmd.readZip == nil {
   409  		return nil, "", vcsErrorf("ReadZip not implemented for %s", r.cmd.vcs)
   410  	}
   411  
   412  	unlock, err := r.mu.Lock()
   413  	if err != nil {
   414  		return nil, "", err
   415  	}
   416  	defer unlock()
   417  
   418  	if rev == "latest" {
   419  		rev = r.cmd.latest
   420  	}
   421  	f, err := ioutil.TempFile("", "go-readzip-*.zip")
   422  	if err != nil {
   423  		return nil, "", err
   424  	}
   425  	if r.cmd.vcs == "fossil" {
   426  		// If you run
   427  		//	fossil zip -R .fossil --name prefix trunk /tmp/x.zip
   428  		// fossil fails with "unable to create directory /tmp" [sic].
   429  		// Change the command to run in /tmp instead,
   430  		// replacing the -R argument with an absolute path.
   431  		args := r.cmd.readZip(rev, subdir, r.remote, filepath.Base(f.Name()))
   432  		for i := range args {
   433  			if args[i] == ".fossil" {
   434  				args[i] = filepath.Join(r.dir, ".fossil")
   435  			}
   436  		}
   437  		_, err = Run(filepath.Dir(f.Name()), args)
   438  	} else {
   439  		_, err = Run(r.dir, r.cmd.readZip(rev, subdir, r.remote, f.Name()))
   440  	}
   441  	if err != nil {
   442  		f.Close()
   443  		os.Remove(f.Name())
   444  		return nil, "", err
   445  	}
   446  	return &deleteCloser{f}, "", nil
   447  }
   448  
   449  // deleteCloser is a file that gets deleted on Close.
   450  type deleteCloser struct {
   451  	*os.File
   452  }
   453  
   454  func (d *deleteCloser) Close() error {
   455  	defer os.Remove(d.File.Name())
   456  	return d.File.Close()
   457  }
   458  
   459  func hgParseStat(rev, out string) (*RevInfo, error) {
   460  	f := strings.Fields(string(out))
   461  	if len(f) < 3 {
   462  		return nil, vcsErrorf("unexpected response from hg log: %q", out)
   463  	}
   464  	hash := f[0]
   465  	version := rev
   466  	if strings.HasPrefix(hash, version) {
   467  		version = hash // extend to full hash
   468  	}
   469  	t, err := strconv.ParseInt(f[1], 10, 64)
   470  	if err != nil {
   471  		return nil, vcsErrorf("invalid time from hg log: %q", out)
   472  	}
   473  
   474  	var tags []string
   475  	for _, tag := range f[3:] {
   476  		if tag != "tip" {
   477  			tags = append(tags, tag)
   478  		}
   479  	}
   480  	sort.Strings(tags)
   481  
   482  	info := &RevInfo{
   483  		Name:    hash,
   484  		Short:   ShortenSHA1(hash),
   485  		Time:    time.Unix(t, 0).UTC(),
   486  		Version: version,
   487  		Tags:    tags,
   488  	}
   489  	return info, nil
   490  }
   491  
   492  func svnParseStat(rev, out string) (*RevInfo, error) {
   493  	var log struct {
   494  		Logentry struct {
   495  			Revision int64  `xml:"revision,attr"`
   496  			Date     string `xml:"date"`
   497  		} `xml:"logentry"`
   498  	}
   499  	if err := xml.Unmarshal([]byte(out), &log); err != nil {
   500  		return nil, vcsErrorf("unexpected response from svn log --xml: %v\n%s", err, out)
   501  	}
   502  
   503  	t, err := time.Parse(time.RFC3339, log.Logentry.Date)
   504  	if err != nil {
   505  		return nil, vcsErrorf("unexpected response from svn log --xml: %v\n%s", err, out)
   506  	}
   507  
   508  	info := &RevInfo{
   509  		Name:    fmt.Sprintf("%d", log.Logentry.Revision),
   510  		Short:   fmt.Sprintf("%012d", log.Logentry.Revision),
   511  		Time:    t.UTC(),
   512  		Version: rev,
   513  	}
   514  	return info, nil
   515  }
   516  
   517  func bzrParseStat(rev, out string) (*RevInfo, error) {
   518  	var revno int64
   519  	var tm time.Time
   520  	for _, line := range strings.Split(out, "\n") {
   521  		if line == "" || line[0] == ' ' || line[0] == '\t' {
   522  			// End of header, start of commit message.
   523  			break
   524  		}
   525  		if line[0] == '-' {
   526  			continue
   527  		}
   528  		i := strings.Index(line, ":")
   529  		if i < 0 {
   530  			// End of header, start of commit message.
   531  			break
   532  		}
   533  		key, val := line[:i], strings.TrimSpace(line[i+1:])
   534  		switch key {
   535  		case "revno":
   536  			if j := strings.Index(val, " "); j >= 0 {
   537  				val = val[:j]
   538  			}
   539  			i, err := strconv.ParseInt(val, 10, 64)
   540  			if err != nil {
   541  				return nil, vcsErrorf("unexpected revno from bzr log: %q", line)
   542  			}
   543  			revno = i
   544  		case "timestamp":
   545  			j := strings.Index(val, " ")
   546  			if j < 0 {
   547  				return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line)
   548  			}
   549  			t, err := time.Parse("2006-01-02 15:04:05 -0700", val[j+1:])
   550  			if err != nil {
   551  				return nil, vcsErrorf("unexpected timestamp from bzr log: %q", line)
   552  			}
   553  			tm = t.UTC()
   554  		}
   555  	}
   556  	if revno == 0 || tm.IsZero() {
   557  		return nil, vcsErrorf("unexpected response from bzr log: %q", out)
   558  	}
   559  
   560  	info := &RevInfo{
   561  		Name:    fmt.Sprintf("%d", revno),
   562  		Short:   fmt.Sprintf("%012d", revno),
   563  		Time:    tm,
   564  		Version: rev,
   565  	}
   566  	return info, nil
   567  }
   568  
   569  func fossilParseStat(rev, out string) (*RevInfo, error) {
   570  	for _, line := range strings.Split(out, "\n") {
   571  		if strings.HasPrefix(line, "uuid:") {
   572  			f := strings.Fields(line)
   573  			if len(f) != 5 || len(f[1]) != 40 || f[4] != "UTC" {
   574  				return nil, vcsErrorf("unexpected response from fossil info: %q", line)
   575  			}
   576  			t, err := time.Parse("2006-01-02 15:04:05", f[2]+" "+f[3])
   577  			if err != nil {
   578  				return nil, vcsErrorf("unexpected response from fossil info: %q", line)
   579  			}
   580  			hash := f[1]
   581  			version := rev
   582  			if strings.HasPrefix(hash, version) {
   583  				version = hash // extend to full hash
   584  			}
   585  			info := &RevInfo{
   586  				Name:    hash,
   587  				Short:   ShortenSHA1(hash),
   588  				Time:    t,
   589  				Version: version,
   590  			}
   591  			return info, nil
   592  		}
   593  	}
   594  	return nil, vcsErrorf("unexpected response from fossil info: %q", out)
   595  }