github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/tools/dashboard/watcher/watcher.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Command watcher watches the specified repository for new commits
     6  // and reports them to the build dashboard.
     7  package main // import "golang.org/x/tools/dashboard/watcher"
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/json"
    12  	"errors"
    13  	"flag"
    14  	"fmt"
    15  	"io/ioutil"
    16  	"log"
    17  	"net/http"
    18  	"net/url"
    19  	"os"
    20  	"os/exec"
    21  	"path"
    22  	"path/filepath"
    23  	"runtime"
    24  	"sort"
    25  	"strings"
    26  	"time"
    27  )
    28  
    29  const (
    30  	goBase         = "https://go.googlesource.com/"
    31  	watcherVersion = 3 // must match dashboard/app/build/handler.go's watcherVersion
    32  	origin         = "origin/"
    33  	master         = origin + "master" // name of the master branch
    34  )
    35  
    36  var (
    37  	repoURL      = flag.String("repo", goBase+"go", "Repository URL")
    38  	dashboard    = flag.String("dash", "https://build.golang.org/", "Dashboard URL (must end in /)")
    39  	keyFile      = flag.String("key", defaultKeyFile, "Build dashboard key file")
    40  	pollInterval = flag.Duration("poll", 10*time.Second, "Remote repo poll interval")
    41  	network      = flag.Bool("network", true, "Enable network calls (disable for testing)")
    42  )
    43  
    44  var (
    45  	defaultKeyFile = filepath.Join(homeDir(), ".gobuildkey")
    46  	dashboardKey   = ""
    47  	networkSeen    = make(map[string]bool) // track known hashes for testing
    48  )
    49  
    50  func main() {
    51  	flag.Parse()
    52  
    53  	err := run()
    54  	fmt.Fprintln(os.Stderr, err)
    55  	os.Exit(1)
    56  }
    57  
    58  // run is a little wrapper so we can use defer and return to signal
    59  // errors. It should only return a non-nil error.
    60  func run() error {
    61  	if !strings.HasSuffix(*dashboard, "/") {
    62  		return errors.New("dashboard URL (-dashboard) must end in /")
    63  	}
    64  
    65  	if k, err := readKey(); err != nil {
    66  		return err
    67  	} else {
    68  		dashboardKey = k
    69  	}
    70  
    71  	dir, err := ioutil.TempDir("", "watcher")
    72  	if err != nil {
    73  		return err
    74  	}
    75  	defer os.RemoveAll(dir)
    76  
    77  	errc := make(chan error)
    78  
    79  	go func() {
    80  		r, err := NewRepo(dir, *repoURL, "")
    81  		if err != nil {
    82  			errc <- err
    83  			return
    84  		}
    85  		errc <- r.Watch()
    86  	}()
    87  
    88  	subrepos, err := subrepoList()
    89  	if err != nil {
    90  		return err
    91  	}
    92  	for _, path := range subrepos {
    93  		go func(path string) {
    94  			url := goBase + strings.TrimPrefix(path, "golang.org/x/")
    95  			r, err := NewRepo(dir, url, path)
    96  			if err != nil {
    97  				errc <- err
    98  				return
    99  			}
   100  			errc <- r.Watch()
   101  		}(path)
   102  	}
   103  
   104  	// Must be non-nil.
   105  	return <-errc
   106  }
   107  
   108  // Repo represents a repository to be watched.
   109  type Repo struct {
   110  	root     string             // on-disk location of the git repo
   111  	path     string             // base import path for repo (blank for main repo)
   112  	commits  map[string]*Commit // keyed by full commit hash (40 lowercase hex digits)
   113  	branches map[string]*Branch // keyed by branch name, eg "release-branch.go1.3" (or empty for default)
   114  }
   115  
   116  // NewRepo checks out a new instance of the Mercurial repository
   117  // specified by url to a new directory inside dir.
   118  // The path argument is the base import path of the repository,
   119  // and should be empty for the main Go repo.
   120  func NewRepo(dir, url, path string) (*Repo, error) {
   121  	r := &Repo{
   122  		path:     path,
   123  		root:     filepath.Join(dir, filepath.Base(path)),
   124  		commits:  make(map[string]*Commit),
   125  		branches: make(map[string]*Branch),
   126  	}
   127  
   128  	r.logf("cloning %v", url)
   129  	cmd := exec.Command("git", "clone", url, r.root)
   130  	if out, err := cmd.CombinedOutput(); err != nil {
   131  		return nil, fmt.Errorf("%v\n\n%s", err, out)
   132  	}
   133  
   134  	r.logf("loading commit log")
   135  	if err := r.update(false); err != nil {
   136  		return nil, err
   137  	}
   138  
   139  	r.logf("found %v branches among %v commits\n", len(r.branches), len(r.commits))
   140  	return r, nil
   141  }
   142  
   143  // Watch continuously runs "git fetch" in the repo, checks for
   144  // new commits, and posts any new commits to the dashboard.
   145  // It only returns a non-nil error.
   146  func (r *Repo) Watch() error {
   147  	for {
   148  		if err := r.fetch(); err != nil {
   149  			return err
   150  		}
   151  		if err := r.update(true); err != nil {
   152  			return err
   153  		}
   154  		remotes, err := r.remotes()
   155  		if err != nil {
   156  			return err
   157  		}
   158  		for _, name := range remotes {
   159  			b, ok := r.branches[name]
   160  			if !ok {
   161  				// skip branch; must be already merged
   162  				continue
   163  			}
   164  			if err := r.postNewCommits(b); err != nil {
   165  				return err
   166  			}
   167  		}
   168  		time.Sleep(*pollInterval)
   169  	}
   170  }
   171  
   172  func (r *Repo) logf(format string, args ...interface{}) {
   173  	p := "go"
   174  	if r.path != "" {
   175  		p = path.Base(r.path)
   176  	}
   177  	log.Printf(p+": "+format, args...)
   178  }
   179  
   180  // postNewCommits looks for unseen commits on the specified branch and
   181  // posts them to the dashboard.
   182  func (r *Repo) postNewCommits(b *Branch) error {
   183  	if b.Head == b.LastSeen {
   184  		return nil
   185  	}
   186  	c := b.LastSeen
   187  	if c == nil {
   188  		// Haven't seen anything on this branch yet:
   189  		if b.Name == master {
   190  			// For the master branch, bootstrap by creating a dummy
   191  			// commit with a lone child that is the initial commit.
   192  			c = &Commit{}
   193  			for _, c2 := range r.commits {
   194  				if c2.Parent == "" {
   195  					c.children = []*Commit{c2}
   196  					break
   197  				}
   198  			}
   199  			if c.children == nil {
   200  				return fmt.Errorf("couldn't find initial commit")
   201  			}
   202  		} else {
   203  			// Find the commit that this branch forked from.
   204  			base, err := r.mergeBase(b.Name, master)
   205  			if err != nil {
   206  				return err
   207  			}
   208  			var ok bool
   209  			c, ok = r.commits[base]
   210  			if !ok {
   211  				return fmt.Errorf("couldn't find base commit: %v", base)
   212  			}
   213  		}
   214  	}
   215  	if err := r.postChildren(b, c); err != nil {
   216  		return err
   217  	}
   218  	b.LastSeen = b.Head
   219  	return nil
   220  }
   221  
   222  // postChildren posts to the dashboard all descendants of the given parent.
   223  // It ignores descendants that are not on the given branch.
   224  func (r *Repo) postChildren(b *Branch, parent *Commit) error {
   225  	for _, c := range parent.children {
   226  		if c.Branch != b.Name {
   227  			continue
   228  		}
   229  		if err := r.postCommit(c); err != nil {
   230  			return err
   231  		}
   232  	}
   233  	for _, c := range parent.children {
   234  		if err := r.postChildren(b, c); err != nil {
   235  			return err
   236  		}
   237  	}
   238  	return nil
   239  }
   240  
   241  // postCommit sends a commit to the build dashboard.
   242  func (r *Repo) postCommit(c *Commit) error {
   243  	r.logf("sending commit to dashboard: %v", c)
   244  
   245  	t, err := time.Parse("Mon, 2 Jan 2006 15:04:05 -0700", c.Date)
   246  	if err != nil {
   247  		return fmt.Errorf("postCommit: parsing date %q for commit %v: %v", c.Date, c, err)
   248  	}
   249  	dc := struct {
   250  		PackagePath string // (empty for main repo commits)
   251  		Hash        string
   252  		ParentHash  string
   253  
   254  		User   string
   255  		Desc   string
   256  		Time   time.Time
   257  		Branch string
   258  
   259  		NeedsBenchmarking bool
   260  	}{
   261  		PackagePath: r.path,
   262  		Hash:        c.Hash,
   263  		ParentHash:  c.Parent,
   264  
   265  		User:   c.Author,
   266  		Desc:   c.Desc,
   267  		Time:   t,
   268  		Branch: strings.TrimPrefix(c.Branch, origin),
   269  
   270  		NeedsBenchmarking: c.NeedsBenchmarking(),
   271  	}
   272  	b, err := json.Marshal(dc)
   273  	if err != nil {
   274  		return fmt.Errorf("postCommit: marshaling request body: %v", err)
   275  	}
   276  
   277  	if !*network {
   278  		if c.Parent != "" {
   279  			if !networkSeen[c.Parent] {
   280  				r.logf("%v: %v", c.Parent, r.commits[c.Parent])
   281  				return fmt.Errorf("postCommit: no parent %v found on dashboard for %v", c.Parent, c)
   282  			}
   283  		}
   284  		if networkSeen[c.Hash] {
   285  			return fmt.Errorf("postCommit: already seen %v", c)
   286  		}
   287  		networkSeen[c.Hash] = true
   288  		return nil
   289  	}
   290  
   291  	u := fmt.Sprintf("%vcommit?version=%v&key=%v", *dashboard, watcherVersion, dashboardKey)
   292  	resp, err := http.Post(u, "text/json", bytes.NewReader(b))
   293  	if err != nil {
   294  		return err
   295  	}
   296  	defer resp.Body.Close()
   297  	if resp.StatusCode != 200 {
   298  		return fmt.Errorf("postCommit: status: %v", resp.Status)
   299  	}
   300  
   301  	var s struct {
   302  		Error string
   303  	}
   304  	err = json.NewDecoder(resp.Body).Decode(&s)
   305  	if err != nil {
   306  		return fmt.Errorf("postCommit: decoding response: %v", err)
   307  	}
   308  	if s.Error != "" {
   309  		return fmt.Errorf("postCommit: error: %v", s.Error)
   310  	}
   311  	return nil
   312  }
   313  
   314  // update looks for new commits and branches,
   315  // and updates the commits and branches maps.
   316  func (r *Repo) update(noisy bool) error {
   317  	remotes, err := r.remotes()
   318  	if err != nil {
   319  		return err
   320  	}
   321  	for _, name := range remotes {
   322  		b := r.branches[name]
   323  
   324  		// Find all unseen commits on this branch.
   325  		revspec := name
   326  		if b != nil {
   327  			// If we know about this branch,
   328  			// only log commits down to the known head.
   329  			revspec = b.Head.Hash + ".." + name
   330  		} else if revspec != master {
   331  			// If this is an unknown non-master branch,
   332  			// log up to where it forked from master.
   333  			base, err := r.mergeBase(name, master)
   334  			if err != nil {
   335  				return err
   336  			}
   337  			revspec = base + ".." + name
   338  		}
   339  		log, err := r.log("--topo-order", revspec)
   340  		if err != nil {
   341  			return err
   342  		}
   343  		if len(log) == 0 {
   344  			// No commits to handle; carry on.
   345  			continue
   346  		}
   347  
   348  		// Add unknown commits to r.commits.
   349  		var added []*Commit
   350  		for _, c := range log {
   351  			// Sanity check: we shouldn't see the same commit twice.
   352  			if _, ok := r.commits[c.Hash]; ok {
   353  				return fmt.Errorf("found commit we already knew about: %v", c.Hash)
   354  			}
   355  			if noisy {
   356  				r.logf("found new commit %v", c)
   357  			}
   358  			c.Branch = name
   359  			r.commits[c.Hash] = c
   360  			added = append(added, c)
   361  		}
   362  
   363  		// Link added commits.
   364  		for _, c := range added {
   365  			if c.Parent == "" {
   366  				// This is the initial commit; no parent.
   367  				r.logf("no parents for initial commit %v", c)
   368  				continue
   369  			}
   370  			// Find parent commit.
   371  			p, ok := r.commits[c.Parent]
   372  			if !ok {
   373  				return fmt.Errorf("can't find parent %q for %v", c.Parent, c)
   374  			}
   375  			// Link parent Commit.
   376  			c.parent = p
   377  			// Link child Commits.
   378  			p.children = append(p.children, c)
   379  		}
   380  
   381  		// Update branch head, or add newly discovered branch.
   382  		head := log[0]
   383  		if b != nil {
   384  			// Known branch; update head.
   385  			b.Head = head
   386  			r.logf("updated branch head: %v", b)
   387  		} else {
   388  			// It's a new branch; add it.
   389  			seen, err := r.lastSeen(head.Hash)
   390  			if err != nil {
   391  				return err
   392  			}
   393  			b = &Branch{Name: name, Head: head, LastSeen: seen}
   394  			r.branches[name] = b
   395  			r.logf("found branch: %v", b)
   396  		}
   397  	}
   398  
   399  	return nil
   400  }
   401  
   402  // lastSeen finds the most recent commit the dashboard has seen,
   403  // starting at the specified head. If the dashboard hasn't seen
   404  // any of the commits from head to the beginning, it returns nil.
   405  func (r *Repo) lastSeen(head string) (*Commit, error) {
   406  	h, ok := r.commits[head]
   407  	if !ok {
   408  		return nil, fmt.Errorf("lastSeen: can't find %q in commits", head)
   409  	}
   410  
   411  	var s []*Commit
   412  	for c := h; c != nil; c = c.parent {
   413  		s = append(s, c)
   414  	}
   415  
   416  	var err error
   417  	i := sort.Search(len(s), func(i int) bool {
   418  		if err != nil {
   419  			return false
   420  		}
   421  		ok, err = r.dashSeen(s[i].Hash)
   422  		return ok
   423  	})
   424  	switch {
   425  	case err != nil:
   426  		return nil, fmt.Errorf("lastSeen: %v", err)
   427  	case i < len(s):
   428  		return s[i], nil
   429  	default:
   430  		// Dashboard saw no commits.
   431  		return nil, nil
   432  	}
   433  }
   434  
   435  // dashSeen reports whether the build dashboard knows the specified commit.
   436  func (r *Repo) dashSeen(hash string) (bool, error) {
   437  	if !*network {
   438  		return networkSeen[hash], nil
   439  	}
   440  	v := url.Values{"hash": {hash}, "packagePath": {r.path}}
   441  	u := *dashboard + "commit?" + v.Encode()
   442  	resp, err := http.Get(u)
   443  	if err != nil {
   444  		return false, err
   445  	}
   446  	defer resp.Body.Close()
   447  	if resp.StatusCode != 200 {
   448  		return false, fmt.Errorf("status: %v", resp.Status)
   449  	}
   450  	var s struct {
   451  		Error string
   452  	}
   453  	err = json.NewDecoder(resp.Body).Decode(&s)
   454  	if err != nil {
   455  		return false, err
   456  	}
   457  	switch s.Error {
   458  	case "":
   459  		// Found one.
   460  		return true, nil
   461  	case "Commit not found":
   462  		// Commit not found, keep looking for earlier commits.
   463  		return false, nil
   464  	default:
   465  		return false, fmt.Errorf("dashboard: %v", s.Error)
   466  	}
   467  }
   468  
   469  // mergeBase returns the hash of the merge base for revspecs a and b.
   470  func (r *Repo) mergeBase(a, b string) (string, error) {
   471  	cmd := exec.Command("git", "merge-base", a, b)
   472  	cmd.Dir = r.root
   473  	out, err := cmd.CombinedOutput()
   474  	if err != nil {
   475  		return "", fmt.Errorf("git merge-base: %v", err)
   476  	}
   477  	return string(bytes.TrimSpace(out)), nil
   478  }
   479  
   480  // remotes returns a slice of remote branches known to the git repo.
   481  // It always puts "origin/master" first.
   482  func (r *Repo) remotes() ([]string, error) {
   483  	cmd := exec.Command("git", "branch", "-r")
   484  	cmd.Dir = r.root
   485  	out, err := cmd.CombinedOutput()
   486  	if err != nil {
   487  		return nil, fmt.Errorf("git branch: %v", err)
   488  	}
   489  	bs := []string{master}
   490  	for _, b := range strings.Split(string(out), "\n") {
   491  		b = strings.TrimSpace(b)
   492  		// Ignore aliases, blank lines, and master (it's already in bs).
   493  		if b == "" || strings.Contains(b, "->") || b == master {
   494  			continue
   495  		}
   496  		// Ignore pre-go1 release branches; they are just noise.
   497  		if strings.HasPrefix(b, origin+"release-branch.r") {
   498  			continue
   499  		}
   500  		bs = append(bs, b)
   501  	}
   502  	return bs, nil
   503  }
   504  
   505  const logFormat = `--format=format:%H
   506  %P
   507  %an <%ae>
   508  %cD
   509  %B
   510  ` + logBoundary
   511  
   512  const logBoundary = `_-_- magic boundary -_-_`
   513  
   514  // log runs "git log" with the supplied arguments
   515  // and parses the output into Commit values.
   516  func (r *Repo) log(dir string, args ...string) ([]*Commit, error) {
   517  	args = append([]string{"log", "--date=rfc", logFormat}, args...)
   518  	cmd := exec.Command("git", args...)
   519  	cmd.Dir = r.root
   520  	out, err := cmd.CombinedOutput()
   521  	if err != nil {
   522  		return nil, fmt.Errorf("git log %v: %v", strings.Join(args, " "), err)
   523  	}
   524  
   525  	// We have a commit with description that contains 0x1b byte.
   526  	// Mercurial does not escape it, but xml.Unmarshal does not accept it.
   527  	// TODO(adg): do we still need to scrub this? Probably.
   528  	out = bytes.Replace(out, []byte{0x1b}, []byte{'?'}, -1)
   529  
   530  	var cs []*Commit
   531  	for _, text := range strings.Split(string(out), logBoundary) {
   532  		text = strings.TrimSpace(text)
   533  		if text == "" {
   534  			continue
   535  		}
   536  		p := strings.SplitN(text, "\n", 5)
   537  		if len(p) != 5 {
   538  			return nil, fmt.Errorf("git log %v: malformed commit: %q", strings.Join(args, " "), text)
   539  		}
   540  		cs = append(cs, &Commit{
   541  			Hash: p[0],
   542  			// TODO(adg): This may break with branch merges.
   543  			Parent: strings.Split(p[1], " ")[0],
   544  			Author: p[2],
   545  			Date:   p[3],
   546  			Desc:   strings.TrimSpace(p[4]),
   547  			// TODO(adg): populate Files
   548  		})
   549  	}
   550  	return cs, nil
   551  }
   552  
   553  // fetch runs "git fetch" in the repository root.
   554  // It tries three times, just in case it failed because of a transient error.
   555  func (r *Repo) fetch() error {
   556  	var err error
   557  	for tries := 0; tries < 3; tries++ {
   558  		time.Sleep(time.Duration(tries) * 5 * time.Second) // Linear back-off.
   559  		cmd := exec.Command("git", "fetch", "--all")
   560  		cmd.Dir = r.root
   561  		if out, e := cmd.CombinedOutput(); err != nil {
   562  			e = fmt.Errorf("%v\n\n%s", e, out)
   563  			log.Printf("git fetch error %v: %v", r.root, e)
   564  			if err == nil {
   565  				err = e
   566  			}
   567  			continue
   568  		}
   569  		return nil
   570  	}
   571  	return err
   572  }
   573  
   574  // Branch represents a Mercurial branch.
   575  type Branch struct {
   576  	Name     string
   577  	Head     *Commit
   578  	LastSeen *Commit // the last commit posted to the dashboard
   579  }
   580  
   581  func (b *Branch) String() string {
   582  	return fmt.Sprintf("%q(Head: %v LastSeen: %v)", b.Name, b.Head, b.LastSeen)
   583  }
   584  
   585  // Commit represents a single Git commit.
   586  type Commit struct {
   587  	Hash   string
   588  	Author string
   589  	Date   string // Format: "Mon, 2 Jan 2006 15:04:05 -0700"
   590  	Desc   string // Plain text, first line is a short description.
   591  	Parent string
   592  	Branch string
   593  	Files  string
   594  
   595  	// For walking the graph.
   596  	parent   *Commit
   597  	children []*Commit
   598  }
   599  
   600  func (c *Commit) String() string {
   601  	s := c.Hash
   602  	if c.Branch != "" {
   603  		s += fmt.Sprintf("[%v]", strings.TrimPrefix(c.Branch, origin))
   604  	}
   605  	s += fmt.Sprintf("(%q)", strings.SplitN(c.Desc, "\n", 2)[0])
   606  	return s
   607  }
   608  
   609  // NeedsBenchmarking reports whether the Commit needs benchmarking.
   610  func (c *Commit) NeedsBenchmarking() bool {
   611  	// Do not benchmark branch commits, they are usually not interesting
   612  	// and fall out of the trunk succession.
   613  	if c.Branch != master {
   614  		return false
   615  	}
   616  	// Do not benchmark commits that do not touch source files (e.g. CONTRIBUTORS).
   617  	for _, f := range strings.Split(c.Files, " ") {
   618  		if (strings.HasPrefix(f, "include") || strings.HasPrefix(f, "src")) &&
   619  			!strings.HasSuffix(f, "_test.go") && !strings.Contains(f, "testdata") {
   620  			return true
   621  		}
   622  	}
   623  	return false
   624  }
   625  
   626  func homeDir() string {
   627  	switch runtime.GOOS {
   628  	case "plan9":
   629  		return os.Getenv("home")
   630  	case "windows":
   631  		return os.Getenv("HOMEDRIVE") + os.Getenv("HOMEPATH")
   632  	}
   633  	return os.Getenv("HOME")
   634  }
   635  
   636  func readKey() (string, error) {
   637  	c, err := ioutil.ReadFile(*keyFile)
   638  	if err != nil {
   639  		return "", err
   640  	}
   641  	return string(bytes.TrimSpace(bytes.SplitN(c, []byte("\n"), 2)[0])), nil
   642  }
   643  
   644  // subrepoList fetches a list of sub-repositories from the dashboard
   645  // and returns them as a slice of base import paths.
   646  // Eg, []string{"golang.org/x/tools", "golang.org/x/net"}.
   647  func subrepoList() ([]string, error) {
   648  	if !*network {
   649  		return nil, nil
   650  	}
   651  
   652  	r, err := http.Get(*dashboard + "packages?kind=subrepo")
   653  	if err != nil {
   654  		return nil, fmt.Errorf("subrepo list: %v", err)
   655  	}
   656  	defer r.Body.Close()
   657  	if r.StatusCode != 200 {
   658  		return nil, fmt.Errorf("subrepo list: got status %v", r.Status)
   659  	}
   660  	var resp struct {
   661  		Response []struct {
   662  			Path string
   663  		}
   664  		Error string
   665  	}
   666  	err = json.NewDecoder(r.Body).Decode(&resp)
   667  	if err != nil {
   668  		return nil, fmt.Errorf("subrepo list: %v", err)
   669  	}
   670  	if resp.Error != "" {
   671  		return nil, fmt.Errorf("subrepo list: %v", resp.Error)
   672  	}
   673  	var pkgs []string
   674  	for _, r := range resp.Response {
   675  		pkgs = append(pkgs, r.Path)
   676  	}
   677  	return pkgs, nil
   678  }