golang.org/x/build@v0.0.0-20240506185731-218518f32b70/maintner/gerrit.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Logic to interact with a Gerrit server. Gerrit has an entire Git-based
     6  // protocol for fetching metadata about CL's, reviewers, patch comments, which
     7  // is used here - we don't use the x/build/gerrit client, which hits the API.
     8  // TODO: write about Gerrit's Git API.
     9  
    10  package maintner
    11  
    12  import (
    13  	"bufio"
    14  	"bytes"
    15  	"context"
    16  	"errors"
    17  	"fmt"
    18  	"log"
    19  	"net/url"
    20  	"os"
    21  	"os/exec"
    22  	"path/filepath"
    23  	"regexp"
    24  	"sort"
    25  	"strconv"
    26  	"strings"
    27  	"time"
    28  
    29  	"golang.org/x/build/internal/envutil"
    30  	"golang.org/x/build/maintner/maintpb"
    31  )
    32  
    33  // Gerrit holds information about a number of Gerrit projects.
    34  type Gerrit struct {
    35  	c        *Corpus
    36  	projects map[string]*GerritProject // keyed by "go.googlesource.com/build"
    37  
    38  	clsReferencingGithubIssue map[GitHubIssueRef][]*GerritCL
    39  }
    40  
    41  func normalizeGerritServer(server string) string {
    42  	u, err := url.Parse(server)
    43  	if err == nil && u.Host != "" {
    44  		server = u.Host
    45  	}
    46  	if strings.HasSuffix(server, "-review.googlesource.com") {
    47  		// special case: the review site is hosted at a different URL than the
    48  		// Git checkout URL.
    49  		return strings.Replace(server, "-review.googlesource.com", ".googlesource.com", 1)
    50  	}
    51  	return server
    52  }
    53  
    54  // Project returns the specified Gerrit project if it's known, otherwise
    55  // it returns nil. Server is the Gerrit server's hostname, such as
    56  // "go.googlesource.com".
    57  func (g *Gerrit) Project(server, project string) *GerritProject {
    58  	server = normalizeGerritServer(server)
    59  	return g.projects[server+"/"+project]
    60  }
    61  
    62  // c.mu must be held
    63  func (g *Gerrit) getOrCreateProject(gerritProj string) *GerritProject {
    64  	proj, ok := g.projects[gerritProj]
    65  	if ok {
    66  		return proj
    67  	}
    68  	proj = &GerritProject{
    69  		gerrit: g,
    70  		proj:   gerritProj,
    71  		cls:    map[int32]*GerritCL{},
    72  		remote: map[gerritCLVersion]GitHash{},
    73  		ref:    map[string]GitHash{},
    74  		commit: map[GitHash]*GitCommit{},
    75  		need:   map[GitHash]bool{},
    76  	}
    77  	g.projects[gerritProj] = proj
    78  	return proj
    79  }
    80  
    81  // ForeachProjectUnsorted calls fn for each known Gerrit project.
    82  // Iteration ends if fn returns a non-nil value.
    83  func (g *Gerrit) ForeachProjectUnsorted(fn func(*GerritProject) error) error {
    84  	for _, p := range g.projects {
    85  		if err := fn(p); err != nil {
    86  			return err
    87  		}
    88  	}
    89  	return nil
    90  }
    91  
    92  // GerritProject represents a single Gerrit project.
    93  type GerritProject struct {
    94  	gerrit          *Gerrit
    95  	proj            string // "go.googlesource.com/net"
    96  	cls             map[int32]*GerritCL
    97  	remote          map[gerritCLVersion]GitHash
    98  	need            map[GitHash]bool
    99  	commit          map[GitHash]*GitCommit
   100  	numLabelChanges int // incremented (too many times) by meta commits with "Label:" updates
   101  	dirtyCL         map[*GerritCL]struct{}
   102  
   103  	// ref are the non-change refs with keys like "HEAD",
   104  	// "refs/heads/master", "refs/tags/v0.8.0", etc.
   105  	//
   106  	// Notably, this excludes the "refs/changes/*" refs matched by
   107  	// rxChangeRef. Those are in the remote map.
   108  	ref map[string]GitHash
   109  }
   110  
   111  // Ref returns a non-change ref, such as "HEAD", "refs/heads/master",
   112  // or "refs/tags/v0.8.0",
   113  // Change refs of the form "refs/changes/*" are not supported.
   114  // The returned hash is the zero value (an empty string) if the ref
   115  // does not exist.
   116  func (gp *GerritProject) Ref(ref string) GitHash {
   117  	return gp.ref[ref]
   118  }
   119  
   120  func (gp *GerritProject) gitDir() string {
   121  	return filepath.Join(gp.gerrit.c.getDataDir(), url.PathEscape(gp.proj))
   122  }
   123  
   124  // NumLabelChanges is an inaccurate count the number of times vote labels have
   125  // changed in this project. This number is monotonically increasing.
   126  // This is not guaranteed to be accurate; it definitely overcounts, but it
   127  // at least increments when changes are made.
   128  // It will not undercount.
   129  func (gp *GerritProject) NumLabelChanges() int {
   130  	// TODO: rename this method.
   131  	return gp.numLabelChanges
   132  }
   133  
   134  // ServerSlashProject returns the server and project together, such as
   135  // "go.googlesource.com/build".
   136  func (gp *GerritProject) ServerSlashProject() string { return gp.proj }
   137  
   138  // Server returns the Gerrit server, such as "go.googlesource.com".
   139  func (gp *GerritProject) Server() string {
   140  	if i := strings.IndexByte(gp.proj, '/'); i != -1 {
   141  		return gp.proj[:i]
   142  	}
   143  	return ""
   144  }
   145  
   146  // Project returns the Gerrit project on the server, such as "go" or "crypto".
   147  func (gp *GerritProject) Project() string {
   148  	if i := strings.IndexByte(gp.proj, '/'); i != -1 {
   149  		return gp.proj[i+1:]
   150  	}
   151  	return ""
   152  }
   153  
   154  // ForeachNonChangeRef calls fn for each git ref on the server that is
   155  // not a change (code review) ref. In general, these correspond to
   156  // submitted changes.
   157  // fn is called serially with sorted ref names.
   158  // Iteration stops with the first non-nil error returned by fn.
   159  func (gp *GerritProject) ForeachNonChangeRef(fn func(ref string, hash GitHash) error) error {
   160  	refs := make([]string, 0, len(gp.ref))
   161  	for ref := range gp.ref {
   162  		refs = append(refs, ref)
   163  	}
   164  	sort.Strings(refs)
   165  	for _, ref := range refs {
   166  		if err := fn(ref, gp.ref[ref]); err != nil {
   167  			return err
   168  		}
   169  	}
   170  	return nil
   171  }
   172  
   173  // ForeachOpenCL calls fn for each open CL in the repo.
   174  //
   175  // If fn returns an error, iteration ends and ForeachOpenCL returns
   176  // with that error.
   177  //
   178  // The fn function is called serially, with increasingly numbered
   179  // CLs.
   180  func (gp *GerritProject) ForeachOpenCL(fn func(*GerritCL) error) error {
   181  	var s []*GerritCL
   182  	for _, cl := range gp.cls {
   183  		if !cl.complete() || cl.Status != "new" || cl.Private {
   184  			continue
   185  		}
   186  		s = append(s, cl)
   187  	}
   188  	sort.Slice(s, func(i, j int) bool { return s[i].Number < s[j].Number })
   189  	for _, cl := range s {
   190  		if err := fn(cl); err != nil {
   191  			return err
   192  		}
   193  	}
   194  	return nil
   195  }
   196  
   197  // ForeachCLUnsorted calls fn for each CL in the repo, in any order.
   198  //
   199  // If fn returns an error, iteration ends and ForeachCLUnsorted returns with
   200  // that error.
   201  func (gp *GerritProject) ForeachCLUnsorted(fn func(*GerritCL) error) error {
   202  	for _, cl := range gp.cls {
   203  		if !cl.complete() {
   204  			continue
   205  		}
   206  		if err := fn(cl); err != nil {
   207  			return err
   208  		}
   209  	}
   210  	return nil
   211  }
   212  
   213  // CL returns the GerritCL with the given number, or nil if it is not present.
   214  //
   215  // CL numbers are shared across all projects on a Gerrit server, so you can get
   216  // nil unless you have the GerritProject containing that CL.
   217  func (gp *GerritProject) CL(number int32) *GerritCL {
   218  	if cl := gp.cls[number]; cl != nil && cl.complete() {
   219  		return cl
   220  	}
   221  	return nil
   222  }
   223  
   224  // GitCommit returns the provided git commit.
   225  func (gp *GerritProject) GitCommit(hash string) (*GitCommit, error) {
   226  	if len(hash) != 40 {
   227  		// TODO: support prefix lookups. build a trie. But
   228  		// for now just avoid panicking in gitHashFromHexStr.
   229  		return nil, fmt.Errorf("git hash %q is not 40 characters", hash)
   230  	}
   231  	var buf [20]byte
   232  	_, err := decodeHexStr(buf[:], hash)
   233  	if err != nil {
   234  		return nil, fmt.Errorf("git hash %q is not a valid hex string: %w", hash, err)
   235  	}
   236  	c := gp.commit[GitHash(buf[:])]
   237  	if c == nil {
   238  		// TODO: return an error that the caller can unpack with errors.Is or
   239  		// errors.As to distinguish this case.
   240  		return nil, fmt.Errorf("git commit %s not found in project", hash)
   241  	}
   242  	return c, nil
   243  }
   244  
   245  func (gp *GerritProject) logf(format string, args ...interface{}) {
   246  	log.Printf("gerrit "+gp.proj+": "+format, args...)
   247  }
   248  
   249  // gerritCLVersion is a value type used as a map key to store a CL
   250  // number and a patchset version. Its Version field is overloaded
   251  // to reference the "meta" metadata commit if the Version is 0.
   252  type gerritCLVersion struct {
   253  	CLNumber int32
   254  	Version  int32 // version 0 is used for the "meta" ref.
   255  }
   256  
   257  // A GerritCL represents a single change in Gerrit.
   258  type GerritCL struct {
   259  	// Project is the project this CL is part of.
   260  	Project *GerritProject
   261  
   262  	// Number is the CL number on the Gerrit server (e.g. 1, 2, 3). Gerrit CL
   263  	// numbers are sparse (CL N does not guarantee that CL N-1 exists) and
   264  	// Gerrit issues CL's out of order - it may issue CL N, then CL (N - 18),
   265  	// then CL (N - 40).
   266  	Number int32
   267  
   268  	// Created is the CL creation time.
   269  	Created time.Time
   270  
   271  	// Version is the number of versions of the patchset for this
   272  	// CL seen so far. It starts at 1.
   273  	Version int32
   274  
   275  	// Commit is the git commit of the latest version of this CL.
   276  	// Previous versions are available via CommitAtVersion.
   277  	// Commit is always non-nil.
   278  	Commit *GitCommit
   279  
   280  	// branch is a cache of the latest "Branch: " value seen from
   281  	// MetaCommits' commit message values, stripped of any
   282  	// "refs/heads/" prefix. It's usually "master".
   283  	branch string
   284  
   285  	// Meta is the head of the most recent Gerrit "meta" commit
   286  	// for this CL. This is guaranteed to be a linear history
   287  	// back to a CL-specific root commit for this meta branch.
   288  	// Meta will always be non-nil.
   289  	Meta *GerritMeta
   290  
   291  	// Metas contains the history of Meta commits, from the oldest (root)
   292  	// to the most recent. The last item in the slice is the same
   293  	// value as the GerritCL.Meta field.
   294  	// The Metas slice will always contain at least 1 element.
   295  	Metas []*GerritMeta
   296  
   297  	// Status will be "merged", "abandoned", "new", or "draft".
   298  	Status string
   299  
   300  	// Private indicates whether this is a private CL.
   301  	// Empirically, it seems that one meta commit of private CLs is
   302  	// sometimes visible to everybody, even when the rest of the details
   303  	// and later meta commits are not. In general, if you see this
   304  	// being set to true, treat this CL as if it doesn't exist.
   305  	Private bool
   306  
   307  	// GitHubIssueRefs are parsed references to GitHub issues.
   308  	// Multiple references to the same issue are deduplicated.
   309  	GitHubIssueRefs []GitHubIssueRef
   310  
   311  	// Messages contains all of the messages for this CL, in sorted order.
   312  	Messages []*GerritMessage
   313  }
   314  
   315  // complete reports whether cl is complete.
   316  // A CL is considered complete if its Meta and Commit fields are non-nil,
   317  // and the Metas slice contains at least 1 element.
   318  func (cl *GerritCL) complete() bool {
   319  	return cl.Meta != nil &&
   320  		len(cl.Metas) >= 1 &&
   321  		cl.Commit != nil
   322  }
   323  
   324  // GerritMessage is a Gerrit reply that is attached to the CL as a whole, and
   325  // not to a file or line of a patch set.
   326  //
   327  // Maintner does very little parsing or formatting of a Message body. Messages
   328  // are stored the same way they are stored in the API.
   329  type GerritMessage struct {
   330  	// Meta is the commit containing the message.
   331  	Meta *GitCommit
   332  
   333  	// Version is the patch set version this message was sent on.
   334  	Version int32
   335  
   336  	// Message is the raw message contents from Gerrit (a subset
   337  	// of the raw git commit message), starting with "Patch Set
   338  	// nnnn".
   339  	Message string
   340  
   341  	// Date is when this message was stored (the commit time of
   342  	// the git commit).
   343  	Date time.Time
   344  
   345  	// Author returns the author of the commit. This takes the form "Gerrit User
   346  	// 13437 <13437@62eb7196-b449-3ce5-99f1-c037f21e1705>", where the number
   347  	// before the '@' sign is your Gerrit user ID, and the UUID after the '@' sign
   348  	// seems to be the same for all commits for the same Gerrit server, across
   349  	// projects.
   350  	//
   351  	// TODO: Merge the *GitPerson object here and for a person's Git commits
   352  	// (which use their real email) via the user ID, so they point to the same
   353  	// object.
   354  	Author *GitPerson
   355  }
   356  
   357  // References reports whether cl includes a commit message reference
   358  // to the provided Github issue ref.
   359  func (cl *GerritCL) References(ref GitHubIssueRef) bool {
   360  	for _, eref := range cl.GitHubIssueRefs {
   361  		if eref == ref {
   362  			return true
   363  		}
   364  	}
   365  	return false
   366  }
   367  
   368  // Branch returns the CL's branch, with any "refs/heads/" prefix removed.
   369  func (cl *GerritCL) Branch() string { return cl.branch }
   370  
   371  func (cl *GerritCL) updateBranch() {
   372  	for i := len(cl.Metas) - 1; i >= 0; i-- {
   373  		mc := cl.Metas[i]
   374  		branch := lineValue(mc.Commit.Msg, "Branch:")
   375  		if branch != "" {
   376  			cl.branch = strings.TrimPrefix(branch, "refs/heads/")
   377  			return
   378  		}
   379  	}
   380  }
   381  
   382  // lineValueOK extracts a value from an RFC 822-style "key: value" series of lines.
   383  // If all is,
   384  //
   385  //	foo: bar
   386  //	bar: baz
   387  //
   388  // lineValue(all, "foo:") returns "bar". It trims any whitespace.
   389  // The prefix is case sensitive and must include the colon.
   390  // The ok value reports whether a line with such a prefix is found, even if its
   391  // value is empty. If ok is true, the rest value contains the subsequent lines.
   392  func lineValueOK(all, prefix string) (value, rest string, ok bool) {
   393  	orig := all
   394  	consumed := 0
   395  	for {
   396  		i := strings.Index(all, prefix)
   397  		if i == -1 {
   398  			return "", "", false
   399  		}
   400  		if i > 0 && all[i-1] != '\n' && all[i-1] != '\r' {
   401  			all = all[i+len(prefix):]
   402  			consumed += i + len(prefix)
   403  			continue
   404  		}
   405  		val := all[i+len(prefix):]
   406  		consumed += i + len(prefix)
   407  		if nl := strings.IndexByte(val, '\n'); nl != -1 {
   408  			consumed += nl + 1
   409  			val = val[:nl+1]
   410  		} else {
   411  			consumed = len(orig)
   412  		}
   413  		return strings.TrimSpace(val), orig[consumed:], true
   414  	}
   415  }
   416  
   417  func lineValue(all, prefix string) string {
   418  	value, _, _ := lineValueOK(all, prefix)
   419  	return value
   420  }
   421  
   422  func lineValueRest(all, prefix string) (value, rest string) {
   423  	value, rest, _ = lineValueOK(all, prefix)
   424  	return
   425  }
   426  
   427  // WorkInProgress reports whether the CL has its Work-in-progress bit set, per
   428  // https://gerrit-review.googlesource.com/Documentation/intro-user.html#wip
   429  func (cl *GerritCL) WorkInProgress() bool {
   430  	var wip bool
   431  	for _, m := range cl.Metas {
   432  		switch lineValue(m.Commit.Msg, "Work-in-progress:") {
   433  		case "true":
   434  			wip = true
   435  		case "false":
   436  			wip = false
   437  		}
   438  	}
   439  	return wip
   440  }
   441  
   442  // ChangeID returns the Gerrit "Change-Id: Ixxxx" line's Ixxxx
   443  // value from the cl.Msg, if any.
   444  func (cl *GerritCL) ChangeID() string {
   445  	id := cl.Footer("Change-Id:")
   446  	if strings.HasPrefix(id, "I") && len(id) == 41 {
   447  		return id
   448  	}
   449  	return ""
   450  }
   451  
   452  // Footer returns the value of a line of the form <key>: value from
   453  // the CL’s commit message. The key is case-sensitive and must end in
   454  // a colon.
   455  // An empty string is returned if there is no value for key.
   456  func (cl *GerritCL) Footer(key string) string {
   457  	if len(key) == 0 || key[len(key)-1] != ':' {
   458  		panic("Footer key does not end in colon")
   459  	}
   460  	// TODO: git footers are treated as multimaps. Account for this.
   461  	return lineValue(cl.Commit.Msg, key)
   462  }
   463  
   464  // OwnerID returns the ID of the CL’s owner. It will return -1 on error.
   465  func (cl *GerritCL) OwnerID() int {
   466  	if !cl.complete() {
   467  		return -1
   468  	}
   469  	// Meta commits caused by the owner of a change have an email of the form
   470  	// <user id>@<uuid of gerrit server>.
   471  	email := cl.Metas[0].Commit.Author.Email()
   472  	idx := strings.Index(email, "@")
   473  	if idx == -1 {
   474  		return -1
   475  	}
   476  	id, err := strconv.Atoi(email[:idx])
   477  	if err != nil {
   478  		return -1
   479  	}
   480  	return id
   481  }
   482  
   483  // Owner returns the author of the first commit to the CL. It returns nil on error.
   484  func (cl *GerritCL) Owner() *GitPerson {
   485  	// The owner of a change is a numeric ID that can have more than one email
   486  	// associated with it, but the email associated with the very first upload is
   487  	// designated as the owner of the change by Gerrit.
   488  	hash, ok := cl.Project.remote[gerritCLVersion{CLNumber: cl.Number, Version: 1}]
   489  	if !ok {
   490  		return nil
   491  	}
   492  	commit, ok := cl.Project.commit[hash]
   493  	if !ok {
   494  		return nil
   495  	}
   496  	return commit.Author
   497  }
   498  
   499  // Subject returns the subject of the latest commit message.
   500  // The subject is separated from the body by a blank line.
   501  func (cl *GerritCL) Subject() string {
   502  	if i := strings.Index(cl.Commit.Msg, "\n\n"); i >= 0 {
   503  		return strings.Replace(cl.Commit.Msg[:i], "\n", " ", -1)
   504  	}
   505  	return strings.Replace(cl.Commit.Msg, "\n", " ", -1)
   506  }
   507  
   508  // CommitAtVersion returns the git commit of the specified version of this CL.
   509  // It returns nil if version is not in the range [1, cl.Version].
   510  func (cl *GerritCL) CommitAtVersion(version int32) *GitCommit {
   511  	if version < 1 || version > cl.Version {
   512  		return nil
   513  	}
   514  	hash, ok := cl.Project.remote[gerritCLVersion{CLNumber: cl.Number, Version: version}]
   515  	if !ok {
   516  		return nil
   517  	}
   518  	return cl.Project.commit[hash]
   519  }
   520  
   521  func (cl *GerritCL) updateGithubIssueRefs() {
   522  	gp := cl.Project
   523  	gerrit := gp.gerrit
   524  	gc := cl.Commit
   525  
   526  	oldRefs := cl.GitHubIssueRefs
   527  	newRefs := gerrit.c.parseGithubRefs(gp.proj, gc.Msg)
   528  	cl.GitHubIssueRefs = newRefs
   529  	for _, ref := range newRefs {
   530  		if !clSliceContains(gerrit.clsReferencingGithubIssue[ref], cl) {
   531  			// TODO: make this as small as
   532  			// possible? Most will have length
   533  			// 1. Care about default capacity of
   534  			// 2?
   535  			gerrit.clsReferencingGithubIssue[ref] = append(gerrit.clsReferencingGithubIssue[ref], cl)
   536  		}
   537  	}
   538  	for _, ref := range oldRefs {
   539  		if !cl.References(ref) {
   540  			// TODO: remove ref from gerrit.clsReferencingGithubIssue
   541  			// It could be a map of maps I suppose, but not as compact.
   542  			// So uses a slice as the second layer, since there will normally
   543  			// be one item.
   544  		}
   545  	}
   546  }
   547  
   548  // c.mu must be held
   549  func (c *Corpus) initGerrit() {
   550  	if c.gerrit != nil {
   551  		return
   552  	}
   553  	c.gerrit = &Gerrit{
   554  		c:                         c,
   555  		projects:                  map[string]*GerritProject{},
   556  		clsReferencingGithubIssue: map[GitHubIssueRef][]*GerritCL{},
   557  	}
   558  }
   559  
   560  type watchedGerritRepo struct {
   561  	project *GerritProject
   562  }
   563  
   564  // TrackGerrit registers the Gerrit project with the given project as a project
   565  // to watch and append to the mutation log. Only valid in leader mode.
   566  // The provided string should be of the form "hostname/project", without a scheme
   567  // or trailing slash.
   568  func (c *Corpus) TrackGerrit(gerritProj string) {
   569  	if c.mutationLogger == nil {
   570  		panic("can't TrackGerrit in non-leader mode")
   571  	}
   572  	c.mu.Lock()
   573  	defer c.mu.Unlock()
   574  
   575  	if strings.Count(gerritProj, "/") != 1 {
   576  		panic(fmt.Sprintf("gerrit project argument %q expected to contain exactly 1 slash", gerritProj))
   577  	}
   578  	c.initGerrit()
   579  	if _, dup := c.gerrit.projects[gerritProj]; dup {
   580  		panic("duplicated watched gerrit project " + gerritProj)
   581  	}
   582  	project := c.gerrit.getOrCreateProject(gerritProj)
   583  	if project == nil {
   584  		panic("gerrit project not created")
   585  	}
   586  	c.watchedGerritRepos = append(c.watchedGerritRepos, watchedGerritRepo{
   587  		project: project,
   588  	})
   589  }
   590  
   591  // called with c.mu Locked
   592  func (c *Corpus) processGerritMutation(gm *maintpb.GerritMutation) {
   593  	if c.gerrit == nil {
   594  		// TODO: option to ignore mutation if user isn't interested.
   595  		c.initGerrit()
   596  	}
   597  	gp, ok := c.gerrit.projects[gm.Project]
   598  	if !ok {
   599  		// TODO: option to ignore mutation if user isn't interested.
   600  		// For now, always process the record.
   601  		gp = c.gerrit.getOrCreateProject(gm.Project)
   602  	}
   603  	gp.processMutation(gm)
   604  }
   605  
   606  var statusIndicator = "\nStatus: "
   607  
   608  // The Go Gerrit site does not really use the "draft" status much, but if
   609  // you need to test it, create a dummy commit and then run
   610  //
   611  //	git push origin HEAD:refs/drafts/master
   612  var statuses = []string{"merged", "abandoned", "draft", "new"}
   613  
   614  // getGerritStatus returns a Gerrit status for a commit, or the empty string to
   615  // indicate the commit did not show a status.
   616  //
   617  // getGerritStatus relies on the Gerrit code review convention of amending
   618  // the meta commit to include the current status of the CL. The Gerrit search
   619  // bar allows you to search for changes with the following statuses: "open",
   620  // "reviewed", "closed", "abandoned", "merged", "draft", "pending". The REST API
   621  // returns only "NEW", "DRAFT", "ABANDONED", "MERGED". Gerrit attaches "draft",
   622  // "abandoned", "new", and "merged" statuses to some meta commits; you may have
   623  // to search the current meta commit's parents to find the last good commit.
   624  func getGerritStatus(commit *GitCommit) string {
   625  	idx := strings.Index(commit.Msg, statusIndicator)
   626  	if idx == -1 {
   627  		return ""
   628  	}
   629  	off := idx + len(statusIndicator)
   630  	for _, status := range statuses {
   631  		if strings.HasPrefix(commit.Msg[off:], status) {
   632  			return status
   633  		}
   634  	}
   635  	return ""
   636  }
   637  
   638  var errTooManyParents = errors.New("maintner: too many commit parents")
   639  
   640  // foreachCommit walks an entire linear git history, starting at commit itself,
   641  // and iterating over all of its parents. commit must be non-nil.
   642  // f is called for each commit until an error is returned from f, or a commit has no parent.
   643  //
   644  // foreachCommit returns errTooManyParents (and stops processing) if a commit
   645  // has more than one parent.
   646  // An error is returned if a commit has a parent that cannot be found.
   647  //
   648  // Corpus.mu must be held.
   649  func (gp *GerritProject) foreachCommit(commit *GitCommit, f func(*GitCommit) error) error {
   650  	c := gp.gerrit.c
   651  	for {
   652  		if err := f(commit); err != nil {
   653  			return err
   654  		}
   655  		if len(commit.Parents) == 0 {
   656  			// No parents, we're at the end of the linear history.
   657  			return nil
   658  		}
   659  		if len(commit.Parents) > 1 {
   660  			return errTooManyParents
   661  		}
   662  		parentHash := commit.Parents[0].Hash // meta tree has no merge commits
   663  		commit = c.gitCommit[parentHash]
   664  		if commit == nil {
   665  			return fmt.Errorf("parent commit %v not found", parentHash)
   666  		}
   667  	}
   668  }
   669  
   670  // getGerritMessage parses a Gerrit comment from the given commit or returns nil
   671  // if there wasn't one.
   672  //
   673  // Corpus.mu must be held.
   674  func (gp *GerritProject) getGerritMessage(commit *GitCommit) *GerritMessage {
   675  	const existVerPhrase = "\nPatch Set "
   676  	const newVerPhrase = "\nUploaded patch set "
   677  
   678  	startExist := strings.Index(commit.Msg, existVerPhrase)
   679  	startNew := strings.Index(commit.Msg, newVerPhrase)
   680  	var start int
   681  	var phrase string
   682  	switch {
   683  	case startExist == -1 && startNew == -1:
   684  		return nil
   685  	case startExist == -1 || (startNew != -1 && startNew < startExist):
   686  		phrase = newVerPhrase
   687  		start = startNew
   688  	case startNew == -1 || (startExist != -1 && startExist < startNew):
   689  		phrase = existVerPhrase
   690  		start = startExist
   691  	}
   692  
   693  	numStart := start + len(phrase)
   694  	colon := strings.IndexByte(commit.Msg[numStart:], ':')
   695  	if colon == -1 {
   696  		return nil
   697  	}
   698  	num := commit.Msg[numStart : numStart+colon]
   699  	if strings.Contains(num, "\n") || strings.Contains(num, ".") {
   700  		// Spanned lines. Didn't match expected comment form
   701  		// we care about (comments with vote changes), like:
   702  		//
   703  		//    Uploaded patch set 5: Some-Vote=+2
   704  		//
   705  		// For now, treat such meta updates (new uploads only)
   706  		// as not comments.
   707  		return nil
   708  	}
   709  	version, err := strconv.ParseInt(num, 10, 32)
   710  	if err != nil {
   711  		gp.logf("for phrase %q at %d, unexpected patch set number in %s; err: %v, message: %s", phrase, start, commit.Hash, err, commit.Msg)
   712  		return nil
   713  	}
   714  	start++
   715  	v := commit.Msg[start:]
   716  	l := 0
   717  	for {
   718  		i := strings.IndexByte(v, '\n')
   719  		if i < 0 {
   720  			return nil
   721  		}
   722  		if strings.HasPrefix(v[:i], "Patch-set:") {
   723  			// two newlines before the Patch-set message
   724  			v = commit.Msg[start : start+l-2]
   725  			break
   726  		}
   727  		v = v[i+1:]
   728  		l = l + i + 1
   729  	}
   730  	return &GerritMessage{
   731  		Meta:    commit,
   732  		Author:  commit.Author,
   733  		Date:    commit.CommitTime,
   734  		Message: v,
   735  		Version: int32(version),
   736  	}
   737  }
   738  
   739  func reverseGerritMessages(ss []*GerritMessage) {
   740  	for i := len(ss)/2 - 1; i >= 0; i-- {
   741  		opp := len(ss) - 1 - i
   742  		ss[i], ss[opp] = ss[opp], ss[i]
   743  	}
   744  }
   745  
   746  func reverseGerritMetas(ss []*GerritMeta) {
   747  	for i := len(ss)/2 - 1; i >= 0; i-- {
   748  		opp := len(ss) - 1 - i
   749  		ss[i], ss[opp] = ss[opp], ss[i]
   750  	}
   751  }
   752  
   753  // called with c.mu Locked
   754  func (gp *GerritProject) processMutation(gm *maintpb.GerritMutation) {
   755  	c := gp.gerrit.c
   756  
   757  	for _, commitp := range gm.Commits {
   758  		gc, err := c.processGitCommit(commitp)
   759  		if err != nil {
   760  			gp.logf("error processing commit %q: %v", commitp.Sha1, err)
   761  			continue
   762  		}
   763  		gp.commit[gc.Hash] = gc
   764  		delete(gp.need, gc.Hash)
   765  
   766  		for _, p := range gc.Parents {
   767  			gp.markNeededCommit(p.Hash)
   768  		}
   769  	}
   770  
   771  	for _, refName := range gm.DeletedRefs {
   772  		delete(gp.ref, refName)
   773  		// TODO: this doesn't delete change refs (from
   774  		// gp.remote) yet, mostly because those don't tend to
   775  		// ever get deleted and we haven't yet needed it. If
   776  		// we ever need it, the mutation generation side would
   777  		// also need to be updated.
   778  	}
   779  
   780  	for _, refp := range gm.Refs {
   781  		refName := refp.Ref
   782  		hash := c.gitHashFromHexStr(refp.Sha1)
   783  		m := rxChangeRef.FindStringSubmatch(refName)
   784  		if m == nil {
   785  			if strings.HasPrefix(refName, "refs/meta/") {
   786  				// Some of these slipped in to the data
   787  				// before we started ignoring them. So ignore them here.
   788  				continue
   789  			}
   790  			// Misc ref, not a change ref.
   791  			if _, ok := c.gitCommit[hash]; !ok {
   792  				gp.logf("ERROR: non-change ref %v references unknown hash %v; ignoring", refp, hash)
   793  				continue
   794  			}
   795  			gp.ref[refName] = hash
   796  			continue
   797  		}
   798  
   799  		clNum64, err := strconv.ParseInt(m[1], 10, 32)
   800  		version, ok := gerritVersionNumber(m[2])
   801  		if !ok || err != nil {
   802  			continue
   803  		}
   804  		gc, ok := c.gitCommit[hash]
   805  		if !ok {
   806  			gp.logf("ERROR: ref %v references unknown hash %v; ignoring", refp, hash)
   807  			continue
   808  		}
   809  		clv := gerritCLVersion{int32(clNum64), version}
   810  		gp.remote[clv] = hash
   811  		cl := gp.getOrCreateCL(clv.CLNumber)
   812  
   813  		if clv.Version == 0 { // is a meta commit
   814  			cl.Meta = newGerritMeta(gc, cl)
   815  			gp.noteDirtyCL(cl) // needs processing at end of sync
   816  		} else {
   817  			cl.Commit = gc
   818  			cl.Version = clv.Version
   819  			cl.updateGithubIssueRefs()
   820  		}
   821  		if c.didInit {
   822  			gp.logf("Ref %+v => %v", clv, hash)
   823  		}
   824  	}
   825  }
   826  
   827  // noteDirtyCL notes a CL that needs further processing before the corpus
   828  // is returned to the user.
   829  // cl.Meta must be non-nil.
   830  //
   831  // called with Corpus.mu Locked
   832  func (gp *GerritProject) noteDirtyCL(cl *GerritCL) {
   833  	if cl.Meta == nil {
   834  		panic("noteDirtyCL given a GerritCL with a nil Meta field")
   835  	}
   836  	if gp.dirtyCL == nil {
   837  		gp.dirtyCL = make(map[*GerritCL]struct{})
   838  	}
   839  	gp.dirtyCL[cl] = struct{}{}
   840  }
   841  
   842  // called with Corpus.mu Locked
   843  func (gp *GerritProject) finishProcessing() {
   844  	for cl := range gp.dirtyCL {
   845  		// All dirty CLs have non-nil Meta, so it's safe to call finishProcessingCL.
   846  		gp.finishProcessingCL(cl)
   847  	}
   848  	gp.dirtyCL = nil
   849  }
   850  
   851  // finishProcessingCL fixes up invariants before the cl can be returned back to the user.
   852  // cl.Meta must be non-nil.
   853  //
   854  // called with Corpus.mu Locked
   855  func (gp *GerritProject) finishProcessingCL(cl *GerritCL) {
   856  	c := gp.gerrit.c
   857  
   858  	mostRecentMetaCommit, ok := c.gitCommit[cl.Meta.Commit.Hash]
   859  	if !ok {
   860  		log.Printf("WARNING: GerritProject(%q).finishProcessingCL failed to find CL %v hash %s",
   861  			gp.ServerSlashProject(), cl.Number, cl.Meta.Commit.Hash)
   862  		return
   863  	}
   864  
   865  	foundStatus := ""
   866  
   867  	// Walk from the newest meta commit backwards, so we store the messages
   868  	// in reverse order and then flip the array before setting on the
   869  	// GerritCL object.
   870  	var backwardMessages []*GerritMessage
   871  	var backwardMetas []*GerritMeta
   872  
   873  	err := gp.foreachCommit(mostRecentMetaCommit, func(gc *GitCommit) error {
   874  		if strings.Contains(gc.Msg, "\nLabel: ") {
   875  			gp.numLabelChanges++
   876  		}
   877  		if strings.Contains(gc.Msg, "\nPrivate: true\n") {
   878  			cl.Private = true
   879  		}
   880  		if gc.GerritMeta == nil {
   881  			gc.GerritMeta = newGerritMeta(gc, cl)
   882  		}
   883  		if foundStatus == "" {
   884  			foundStatus = getGerritStatus(gc)
   885  		}
   886  		backwardMetas = append(backwardMetas, gc.GerritMeta)
   887  		if message := gp.getGerritMessage(gc); message != nil {
   888  			backwardMessages = append(backwardMessages, message)
   889  		}
   890  		return nil
   891  	})
   892  	if err != nil {
   893  		log.Printf("WARNING: GerritProject(%q).finishProcessingCL failed to walk CL %v meta history: %v",
   894  			gp.ServerSlashProject(), cl.Number, err)
   895  		return
   896  	}
   897  
   898  	if foundStatus != "" {
   899  		cl.Status = foundStatus
   900  	} else if cl.Status == "" {
   901  		cl.Status = "new"
   902  	}
   903  
   904  	reverseGerritMessages(backwardMessages)
   905  	cl.Messages = backwardMessages
   906  
   907  	reverseGerritMetas(backwardMetas)
   908  	cl.Metas = backwardMetas
   909  
   910  	cl.Created = cl.Metas[0].Commit.CommitTime
   911  
   912  	cl.updateBranch()
   913  }
   914  
   915  // clSliceContains reports whether cls contains cl.
   916  func clSliceContains(cls []*GerritCL, cl *GerritCL) bool {
   917  	for _, v := range cls {
   918  		if v == cl {
   919  			return true
   920  		}
   921  	}
   922  	return false
   923  }
   924  
   925  // c.mu must be held
   926  func (gp *GerritProject) markNeededCommit(hash GitHash) {
   927  	if _, ok := gp.commit[hash]; ok {
   928  		// Already have it.
   929  		return
   930  	}
   931  	gp.need[hash] = true
   932  }
   933  
   934  // c.mu must be held
   935  func (gp *GerritProject) getOrCreateCL(num int32) *GerritCL {
   936  	cl, ok := gp.cls[num]
   937  	if ok {
   938  		return cl
   939  	}
   940  	cl = &GerritCL{
   941  		Project: gp,
   942  		Number:  num,
   943  	}
   944  	gp.cls[num] = cl
   945  	return cl
   946  }
   947  
   948  func gerritVersionNumber(s string) (version int32, ok bool) {
   949  	if s == "meta" {
   950  		return 0, true
   951  	}
   952  	v, err := strconv.ParseInt(s, 10, 32)
   953  	if err != nil {
   954  		return 0, false
   955  	}
   956  	return int32(v), true
   957  }
   958  
   959  // rxRemoteRef matches "git ls-remote" lines.
   960  //
   961  // sample row:
   962  // fd1e71f1594ce64941a85428ddef2fbb0ad1023e	refs/changes/99/30599/3
   963  //
   964  // Capture values:
   965  //
   966  //	$0: whole match
   967  //	$1: "fd1e71f1594ce64941a85428ddef2fbb0ad1023e"
   968  //	$2: "30599" (CL number)
   969  //	$3: "1", "2" (patchset number) or "meta" (a/ special commit
   970  //	    holding the comments for a commit)
   971  //
   972  // The "99" in the middle covers all CL's that end in "99", so
   973  // refs/changes/99/99/1, refs/changes/99/199/meta.
   974  var rxRemoteRef = regexp.MustCompile(`^([0-9a-f]{40,})\s+refs/changes/[0-9a-f]{2}/([0-9]+)/(.+)$`)
   975  
   976  // $1: change num
   977  // $2: version or "meta"
   978  var rxChangeRef = regexp.MustCompile(`^refs/changes/[0-9a-f]{2}/([0-9]+)/(meta|(?:\d+))`)
   979  
   980  func (gp *GerritProject) sync(ctx context.Context, loop bool) error {
   981  	if err := gp.init(ctx); err != nil {
   982  		gp.logf("init: %v", err)
   983  		return err
   984  	}
   985  	activityCh := gp.gerrit.c.activityChan("gerrit:" + gp.proj)
   986  	for {
   987  		if err := gp.syncOnce(ctx); err != nil {
   988  			if ee, ok := err.(*exec.ExitError); ok {
   989  				err = fmt.Errorf("%v; stderr=%q", err, ee.Stderr)
   990  			}
   991  			gp.logf("sync: %v", err)
   992  			return err
   993  		}
   994  		if !loop {
   995  			return nil
   996  		}
   997  		timer := time.NewTimer(5 * time.Minute)
   998  		select {
   999  		case <-ctx.Done():
  1000  			timer.Stop()
  1001  			return ctx.Err()
  1002  		case <-activityCh:
  1003  			timer.Stop()
  1004  		case <-timer.C:
  1005  		}
  1006  	}
  1007  }
  1008  
  1009  // syncMissingCommits is a cleanup step to fix a previous maintner bug where
  1010  // refs were updated without all their reachable commits being indexed and
  1011  // recorded in the log. This should only ever run once, and only in Go's history.
  1012  // If we restarted the log from the beginning this wouldn't be necessary.
  1013  func (gp *GerritProject) syncMissingCommits(ctx context.Context) error {
  1014  	c := gp.gerrit.c
  1015  	var hashes []GitHash
  1016  	c.mu.Lock()
  1017  	for hash := range gp.need {
  1018  		hashes = append(hashes, hash)
  1019  	}
  1020  	c.mu.Unlock()
  1021  	if len(hashes) == 0 {
  1022  		return nil
  1023  	}
  1024  
  1025  	gp.logf("fixing indexing of %d missing commits", len(hashes))
  1026  	if err := gp.fetchHashes(ctx, hashes); err != nil {
  1027  		return err
  1028  	}
  1029  
  1030  	n, err := gp.syncCommits(ctx)
  1031  	if err != nil {
  1032  		return err
  1033  	}
  1034  	gp.logf("%d missing commits indexed", n)
  1035  	return nil
  1036  }
  1037  
  1038  func (gp *GerritProject) syncOnce(ctx context.Context) error {
  1039  	if err := gp.syncMissingCommits(ctx); err != nil {
  1040  		return err
  1041  	}
  1042  
  1043  	c := gp.gerrit.c
  1044  	gitDir := gp.gitDir()
  1045  
  1046  	t0 := time.Now()
  1047  	cmd := exec.CommandContext(ctx, "git", "fetch", "origin")
  1048  	envutil.SetDir(cmd, gitDir)
  1049  	// Enable extra Git tracing in case the fetch hangs.
  1050  	envutil.SetEnv(cmd,
  1051  		"GIT_TRACE2_EVENT=1",
  1052  		"GIT_TRACE_CURL_NO_DATA=1",
  1053  	)
  1054  	cmd.Stdout = new(bytes.Buffer)
  1055  	cmd.Stderr = cmd.Stdout
  1056  
  1057  	// The 'git fetch' needs a timeout in case it hangs, but to avoid spurious
  1058  	// timeouts (and live-lock) the timeout should be (at least) an order of
  1059  	// magnitude longer than we expect the operation to actually take. Moreover,
  1060  	// exec.CommandContext sends SIGKILL, which may terminate the command without
  1061  	// giving it a chance to flush useful trace entries, so we'll terminate it
  1062  	// manually instead (see https://golang.org/issue/22757).
  1063  	if err := cmd.Start(); err != nil {
  1064  		return fmt.Errorf("git fetch origin: %v", err)
  1065  	}
  1066  	timer := time.AfterFunc(10*time.Minute, func() {
  1067  		cmd.Process.Signal(os.Interrupt)
  1068  	})
  1069  	err := cmd.Wait()
  1070  	fetchDuration := time.Since(t0).Round(time.Millisecond)
  1071  	timer.Stop()
  1072  	if err != nil {
  1073  		return fmt.Errorf("git fetch origin: %v after %v, %s", err, fetchDuration, cmd.Stdout)
  1074  	}
  1075  	gp.logf("ran git fetch origin in %v", fetchDuration)
  1076  
  1077  	t0 = time.Now()
  1078  	cmd = exec.CommandContext(ctx, "git", "ls-remote")
  1079  	envutil.SetDir(cmd, gitDir)
  1080  	out, err := cmd.CombinedOutput()
  1081  	lsRemoteDuration := time.Since(t0).Round(time.Millisecond)
  1082  	if err != nil {
  1083  		return fmt.Errorf("git ls-remote in %s: %v after %v, %s", gitDir, err, lsRemoteDuration, out)
  1084  	}
  1085  	gp.logf("ran git ls-remote in %v", lsRemoteDuration)
  1086  
  1087  	var changedRefs []*maintpb.GitRef
  1088  	var toFetch []GitHash
  1089  
  1090  	bs := bufio.NewScanner(bytes.NewReader(out))
  1091  
  1092  	// Take the lock here to access gp.remote and call c.gitHashFromHex.
  1093  	// It's acceptable to take such a coarse-looking lock because
  1094  	// it's not actually around I/O: all the input from ls-remote has
  1095  	// already been slurped into memory.
  1096  	c.mu.Lock()
  1097  	refExists := map[string]bool{} // whether ref is this ls-remote fetch
  1098  	for bs.Scan() {
  1099  		line := bs.Bytes()
  1100  		tab := bytes.IndexByte(line, '\t')
  1101  		if tab == -1 {
  1102  			if !strings.HasPrefix(bs.Text(), "From ") {
  1103  				gp.logf("bogus ls-remote line: %q", line)
  1104  			}
  1105  			continue
  1106  		}
  1107  		sha1 := string(line[:tab])
  1108  		refName := strings.TrimSpace(string(line[tab+1:]))
  1109  		refExists[refName] = true
  1110  		hash := c.gitHashFromHexStr(sha1)
  1111  
  1112  		var needFetch bool
  1113  
  1114  		m := rxRemoteRef.FindSubmatch(line)
  1115  		if m != nil {
  1116  			clNum, err := strconv.ParseInt(string(m[2]), 10, 32)
  1117  			version, ok := gerritVersionNumber(string(m[3]))
  1118  			if err != nil || !ok {
  1119  				continue
  1120  			}
  1121  			curHash := gp.remote[gerritCLVersion{int32(clNum), version}]
  1122  			needFetch = curHash != hash
  1123  		} else if trackGerritRef(refName) && gp.ref[refName] != hash {
  1124  			needFetch = true
  1125  			gp.logf("ref %q = %q", refName, sha1)
  1126  		}
  1127  
  1128  		if needFetch {
  1129  			toFetch = append(toFetch, hash)
  1130  			changedRefs = append(changedRefs, &maintpb.GitRef{
  1131  				Ref:  refName,
  1132  				Sha1: string(sha1),
  1133  			})
  1134  		}
  1135  	}
  1136  	var deletedRefs []string
  1137  	for n := range gp.ref {
  1138  		if !refExists[n] {
  1139  			gp.logf("ref %q now deleted", n)
  1140  			deletedRefs = append(deletedRefs, n)
  1141  		}
  1142  	}
  1143  	c.mu.Unlock()
  1144  
  1145  	if err := bs.Err(); err != nil {
  1146  		gp.logf("ls-remote scanning error: %v", err)
  1147  		return err
  1148  	}
  1149  	if len(deletedRefs) > 0 {
  1150  		c.addMutation(&maintpb.Mutation{
  1151  			Gerrit: &maintpb.GerritMutation{
  1152  				Project:     gp.proj,
  1153  				DeletedRefs: deletedRefs,
  1154  			},
  1155  		})
  1156  	}
  1157  	if len(changedRefs) == 0 {
  1158  		return nil
  1159  	}
  1160  	gp.logf("%d new refs", len(changedRefs))
  1161  	const batchSize = 250
  1162  	for len(toFetch) > 0 {
  1163  		batch := toFetch
  1164  		if len(batch) > batchSize {
  1165  			batch = batch[:batchSize]
  1166  		}
  1167  		if err := gp.fetchHashes(ctx, batch); err != nil {
  1168  			return err
  1169  		}
  1170  
  1171  		c.mu.Lock()
  1172  		for _, hash := range batch {
  1173  			gp.markNeededCommit(hash)
  1174  		}
  1175  		c.mu.Unlock()
  1176  
  1177  		n, err := gp.syncCommits(ctx)
  1178  		if err != nil {
  1179  			return err
  1180  		}
  1181  		toFetch = toFetch[len(batch):]
  1182  		gp.logf("synced %v commits for %d new hashes, %d hashes remain", n, len(batch), len(toFetch))
  1183  
  1184  		c.addMutation(&maintpb.Mutation{
  1185  			Gerrit: &maintpb.GerritMutation{
  1186  				Project: gp.proj,
  1187  				Refs:    changedRefs[:len(batch)],
  1188  			}})
  1189  		changedRefs = changedRefs[len(batch):]
  1190  	}
  1191  
  1192  	return nil
  1193  }
  1194  
  1195  func (gp *GerritProject) syncCommits(ctx context.Context) (n int, err error) {
  1196  	c := gp.gerrit.c
  1197  	lastLog := time.Now()
  1198  	for {
  1199  		hash := gp.commitToIndex()
  1200  		if hash == "" {
  1201  			return n, nil
  1202  		}
  1203  		now := time.Now()
  1204  		if lastLog.Before(now.Add(-1 * time.Second)) {
  1205  			lastLog = now
  1206  			gp.logf("parsing commits (%v done)", n)
  1207  		}
  1208  		commit, err := parseCommitFromGit(gp.gitDir(), hash)
  1209  		if err != nil {
  1210  			return n, err
  1211  		}
  1212  		c.addMutation(&maintpb.Mutation{
  1213  			Gerrit: &maintpb.GerritMutation{
  1214  				Project: gp.proj,
  1215  				Commits: []*maintpb.GitCommit{commit},
  1216  			},
  1217  		})
  1218  		n++
  1219  	}
  1220  }
  1221  
  1222  func (gp *GerritProject) commitToIndex() GitHash {
  1223  	c := gp.gerrit.c
  1224  
  1225  	c.mu.RLock()
  1226  	defer c.mu.RUnlock()
  1227  	for hash := range gp.need {
  1228  		return hash
  1229  	}
  1230  	return ""
  1231  }
  1232  
  1233  var (
  1234  	statusSpace = []byte("Status: ")
  1235  )
  1236  
  1237  func (gp *GerritProject) fetchHashes(ctx context.Context, hashes []GitHash) error {
  1238  	args := []string{"fetch", "--quiet", "origin"}
  1239  	for _, hash := range hashes {
  1240  		args = append(args, hash.String())
  1241  	}
  1242  	gp.logf("fetching %v hashes...", len(hashes))
  1243  	t0 := time.Now()
  1244  	cmd := exec.CommandContext(ctx, "git", args...)
  1245  	envutil.SetDir(cmd, gp.gitDir())
  1246  	out, err := cmd.CombinedOutput()
  1247  	d := time.Since(t0).Round(time.Millisecond)
  1248  	if err != nil {
  1249  		gp.logf("error fetching %d hashes after %v: %s", len(hashes), d, out)
  1250  		return err
  1251  	}
  1252  	gp.logf("fetched %v hashes in %v", len(hashes), d)
  1253  	return nil
  1254  }
  1255  
  1256  func formatExecError(err error) string {
  1257  	if ee, ok := err.(*exec.ExitError); ok {
  1258  		return fmt.Sprintf("%v; stderr=%q", err, ee.Stderr)
  1259  	}
  1260  	return fmt.Sprint(err)
  1261  }
  1262  
  1263  func (gp *GerritProject) init(ctx context.Context) error {
  1264  	gitDir := gp.gitDir()
  1265  	if err := os.MkdirAll(gitDir, 0755); err != nil {
  1266  		return err
  1267  	}
  1268  	// try to short circuit a git init error, since the init error matching is
  1269  	// brittle
  1270  	if _, err := exec.LookPath("git"); err != nil {
  1271  		return fmt.Errorf("looking for git binary: %v", err)
  1272  	}
  1273  
  1274  	if _, err := os.Stat(filepath.Join(gitDir, ".git", "config")); err == nil {
  1275  		cmd := exec.CommandContext(ctx, "git", "remote", "-v")
  1276  		envutil.SetDir(cmd, gitDir)
  1277  		remoteBytes, err := cmd.Output()
  1278  		if err != nil {
  1279  			return fmt.Errorf("running git remote -v in %v: %v", gitDir, formatExecError(err))
  1280  		}
  1281  		if !strings.Contains(string(remoteBytes), "origin") && !strings.Contains(string(remoteBytes), "https://"+gp.proj) {
  1282  			return fmt.Errorf("didn't find origin & gp.url in remote output %s", string(remoteBytes))
  1283  		}
  1284  		gp.logf("git directory exists.")
  1285  		return nil
  1286  	}
  1287  
  1288  	cmd := exec.CommandContext(ctx, "git", "init")
  1289  	buf := new(bytes.Buffer)
  1290  	cmd.Stdout = buf
  1291  	cmd.Stderr = buf
  1292  	envutil.SetDir(cmd, gitDir)
  1293  	if err := cmd.Run(); err != nil {
  1294  		log.Printf(`Error running "git init": %s`, buf.String())
  1295  		return err
  1296  	}
  1297  	buf.Reset()
  1298  	cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://"+gp.proj)
  1299  	cmd.Stdout = buf
  1300  	cmd.Stderr = buf
  1301  	envutil.SetDir(cmd, gitDir)
  1302  	if err := cmd.Run(); err != nil {
  1303  		log.Printf(`Error running "git remote add origin": %s`, buf.String())
  1304  		return err
  1305  	}
  1306  
  1307  	return nil
  1308  }
  1309  
  1310  // trackGerritRef reports whether we care to record changes about the
  1311  // given ref.
  1312  func trackGerritRef(ref string) bool {
  1313  	if strings.HasPrefix(ref, "refs/users/") {
  1314  		return false
  1315  	}
  1316  	if strings.HasPrefix(ref, "refs/meta/") {
  1317  		return false
  1318  	}
  1319  	if strings.HasPrefix(ref, "refs/cache-automerge/") {
  1320  		return false
  1321  	}
  1322  	return true
  1323  }
  1324  
  1325  func (g *Gerrit) check() error {
  1326  	for key, gp := range g.projects {
  1327  		if err := gp.check(); err != nil {
  1328  			return fmt.Errorf("%s: %v", key, err)
  1329  		}
  1330  	}
  1331  	return nil
  1332  }
  1333  
  1334  // called with its Corpus.mu locked. (called by
  1335  // Corpus.finishProcessing; read comment there)
  1336  func (g *Gerrit) finishProcessing() {
  1337  	if g == nil {
  1338  		return
  1339  	}
  1340  	for _, gp := range g.projects {
  1341  		gp.finishProcessing()
  1342  	}
  1343  }
  1344  
  1345  func (gp *GerritProject) check() error {
  1346  	if len(gp.need) != 0 {
  1347  		return fmt.Errorf("%d missing commits", len(gp.need))
  1348  	}
  1349  	for hash, gc := range gp.commit {
  1350  		if gc.Committer == placeholderCommitter {
  1351  			return fmt.Errorf("git commit for key %q was placeholder", hash)
  1352  		}
  1353  		if gc.Hash != hash {
  1354  			return fmt.Errorf("git commit for key %q had GitCommit.Hash %q", hash, gc.Hash)
  1355  		}
  1356  		for _, pc := range gc.Parents {
  1357  			if _, ok := gp.commit[pc.Hash]; !ok {
  1358  				return fmt.Errorf("git commit %q exists but its parent %q does not", gc.Hash, pc.Hash)
  1359  			}
  1360  		}
  1361  	}
  1362  	return nil
  1363  }
  1364  
  1365  // GerritMeta represents a Git commit in the Gerrit NoteDb meta
  1366  // format.
  1367  type GerritMeta struct {
  1368  	// Commit points up to the git commit for this Gerrit NoteDB meta commit.
  1369  	Commit *GitCommit
  1370  	// CL is the Gerrit CL this metadata is for.
  1371  	CL *GerritCL
  1372  
  1373  	flags gerritMetaFlags
  1374  }
  1375  
  1376  type gerritMetaFlags uint8
  1377  
  1378  const (
  1379  	// metaFlagHashtagEdit indicates that the meta commit edits the hashtags on the commit.
  1380  	metaFlagHashtagEdit gerritMetaFlags = 1 << iota
  1381  )
  1382  
  1383  func newGerritMeta(gc *GitCommit, cl *GerritCL) *GerritMeta {
  1384  	m := &GerritMeta{Commit: gc, CL: cl}
  1385  
  1386  	if msg := m.Commit.Msg; strings.Contains(msg, "autogenerated:gerrit:setHashtag") && m.ActionTag() == "autogenerated:gerrit:setHashtag" {
  1387  		m.flags |= metaFlagHashtagEdit
  1388  	}
  1389  	return m
  1390  }
  1391  
  1392  // Footer returns the "key: value" lines at the base of the commit.
  1393  func (m *GerritMeta) Footer() string {
  1394  	i := strings.LastIndex(m.Commit.Msg, "\n\n")
  1395  	if i == -1 {
  1396  		return ""
  1397  	}
  1398  	return m.Commit.Msg[i+2:]
  1399  }
  1400  
  1401  // Hashtags returns the set of hashtags on m's CL as of the time of m.
  1402  func (m *GerritMeta) Hashtags() GerritHashtags {
  1403  	// If this GerritMeta set hashtags, use it.
  1404  	tags, _, ok := lineValueOK(m.Footer(), "Hashtags: ")
  1405  	if ok {
  1406  		return GerritHashtags(tags)
  1407  	}
  1408  
  1409  	// Otherwise, look at older metas (from most recent to oldest)
  1410  	// to find most recent value. Ignore anything that's newer
  1411  	// than m.
  1412  	sawThisMeta := false // whether we've seen 'm'
  1413  	metas := m.CL.Metas
  1414  	for i := len(metas) - 1; i >= 0; i-- {
  1415  		mp := metas[i]
  1416  		if mp.Commit.Hash == m.Commit.Hash {
  1417  			sawThisMeta = true
  1418  			continue
  1419  		}
  1420  		if !sawThisMeta {
  1421  			continue
  1422  		}
  1423  		if tags, _, ok := lineValueOK(mp.Footer(), "Hashtags: "); ok {
  1424  			return GerritHashtags(tags)
  1425  		}
  1426  	}
  1427  	return ""
  1428  }
  1429  
  1430  // ActionTag returns the Gerrit "Tag" value from the meta commit.
  1431  // These are of the form "autogenerated:gerrit:setHashtag".
  1432  func (m *GerritMeta) ActionTag() string {
  1433  	return lineValue(m.Footer(), "Tag: ")
  1434  }
  1435  
  1436  // HashtagEdits returns the hashtags added and removed by this meta commit,
  1437  // and whether this meta commit actually modified hashtags.
  1438  func (m *GerritMeta) HashtagEdits() (added, removed GerritHashtags, ok bool) {
  1439  	// Return early for the majority of meta commits that don't edit hashtags.
  1440  	if m.flags&metaFlagHashtagEdit == 0 {
  1441  		return
  1442  	}
  1443  
  1444  	msg := m.Commit.Msg
  1445  
  1446  	// Parse lines of form:
  1447  	//
  1448  	// Hashtag removed: bar
  1449  	// Hashtags removed: foo, bar
  1450  	// Hashtag added: bar
  1451  	// Hashtags added: foo, bar
  1452  	for len(msg) > 0 {
  1453  		value, rest := lineValueRest(msg, "Hash")
  1454  		msg = rest
  1455  		colon := strings.IndexByte(value, ':')
  1456  		if colon != -1 {
  1457  			action := value[:colon]
  1458  			value := GerritHashtags(strings.TrimSpace(value[colon+1:]))
  1459  			switch action {
  1460  			case "tag added", "tags added":
  1461  				added = value
  1462  			case "tag removed", "tags removed":
  1463  				removed = value
  1464  			}
  1465  		}
  1466  	}
  1467  	ok = added != "" || removed != ""
  1468  	return
  1469  }
  1470  
  1471  // HashtagsAdded returns the hashtags added by this meta commit, if any.
  1472  func (m *GerritMeta) HashtagsAdded() GerritHashtags {
  1473  	added, _, _ := m.HashtagEdits()
  1474  	return added
  1475  }
  1476  
  1477  // HashtagsRemoved returns the hashtags removed by this meta commit, if any.
  1478  func (m *GerritMeta) HashtagsRemoved() GerritHashtags {
  1479  	_, removed, _ := m.HashtagEdits()
  1480  	return removed
  1481  }
  1482  
  1483  // LabelVotes returns a map from label name to voter email to their vote.
  1484  //
  1485  // This is relatively expensive to call compared to other methods in maintner.
  1486  // It is not currently cached.
  1487  func (m *GerritMeta) LabelVotes() (map[string]map[string]int8, error) {
  1488  	if m.CL == nil {
  1489  		panic("GerritMeta has nil CL field")
  1490  	}
  1491  	// To calculate votes as the time of the 'm' meta commit,
  1492  	// we need to consider the meta commits before it.
  1493  	// Let's see which number in the (linear) meta history
  1494  	// we are.
  1495  	ourIndex := -1
  1496  	for i, mc := range m.CL.Metas {
  1497  		if mc == m {
  1498  			ourIndex = i
  1499  			break
  1500  		}
  1501  	}
  1502  	if ourIndex == -1 {
  1503  		panic("LabelVotes called on GerritMeta not in its m.CL.Metas slice")
  1504  	}
  1505  	labels := map[string]map[string]int8{}
  1506  
  1507  	history := m.CL.Metas[:ourIndex+1]
  1508  	var lastCommit *GitCommit
  1509  	for _, mc := range history {
  1510  		footer := mc.Footer()
  1511  		isNew := strings.Contains(footer, "\nTag: autogenerated:gerrit:newPatchSet\n")
  1512  		email := mc.Commit.Author.Email()
  1513  		if isNew {
  1514  			if commit := lineValue(footer, "Commit: "); commit != "" {
  1515  				// TODO: implement Gerrit's vote copying. For example,
  1516  				// label.Label-Name.copyAllScoresIfNoChange defaults to true (as it is with Go's server)
  1517  				// https://gerrit-review.googlesource.com/Documentation/config-labels.html#label_copyAllScoresIfNoChange
  1518  				// We don't have the information in Maintner to do this, though.
  1519  				// One approximation is:
  1520  				newCommit, err := m.CL.Project.GitCommit(commit)
  1521  				if err != nil {
  1522  					return nil, fmt.Errorf("LabelVotes: invalid Commit in footer on CL %v, meta-CL %x: %v", m.CL.Number, mc.Commit.Hash, err)
  1523  				}
  1524  				if lastCommit != nil {
  1525  					if !lastCommit.SameDiffStat(newCommit) {
  1526  						// TODO: this should really use
  1527  						// the Gerrit server's project
  1528  						// config, including the
  1529  						// All-Projects config, but
  1530  						// that's not in Maintner
  1531  						// either.
  1532  						delete(labels, "Run-TryBot")
  1533  						delete(labels, "TryBot-Result")
  1534  					}
  1535  				}
  1536  				lastCommit = newCommit
  1537  			}
  1538  		}
  1539  
  1540  		remain := footer
  1541  		for len(remain) > 0 {
  1542  			var labelEqVal string
  1543  			labelEqVal, remain = lineValueRest(remain, "Label: ")
  1544  			if labelEqVal != "" {
  1545  				label, value, whose := parseGerritLabelValue(labelEqVal)
  1546  				if label != "" {
  1547  					if whose == "" {
  1548  						whose = email
  1549  					}
  1550  					if label[0] == '-' {
  1551  						label = label[1:]
  1552  						if m := labels[label]; m != nil {
  1553  							delete(m, whose)
  1554  						}
  1555  					} else {
  1556  						m := labels[label]
  1557  						if m == nil {
  1558  							m = make(map[string]int8)
  1559  							labels[label] = m
  1560  						}
  1561  						m[whose] = value
  1562  
  1563  					}
  1564  				}
  1565  			}
  1566  		}
  1567  	}
  1568  
  1569  	return labels, nil
  1570  }
  1571  
  1572  // parseGerritLabelValue parses a Gerrit NoteDb "Label: ..." value.
  1573  // It can take forms and return values such as:
  1574  //
  1575  //	"Run-TryBot=+1" => ("Run-TryBot", 1, "")
  1576  //	"-Run-TryBot" => ("-Run-TryBot", 0, "")
  1577  //	"-Run-TryBot " => ("-Run-TryBot", 0, "")
  1578  //	"Run-TryBot=+1 Brad Fitzpatrick <5065@62eb7196-b449-3ce5-99f1-c037f21e1705>" =>
  1579  //	      ("Run-TryBot", 1, "5065@62eb7196-b449-3ce5-99f1-c037f21e1705")
  1580  //	"-TryBot-Result Gobot Gobot <5976@62eb7196-b449-3ce5-99f1-c037f21e1705>" =>
  1581  //	      ("-TryBot-Result", 0, "5976@62eb7196-b449-3ce5-99f1-c037f21e1705")
  1582  func parseGerritLabelValue(v string) (label string, value int8, whose string) {
  1583  	space := strings.IndexByte(v, ' ')
  1584  	if space != -1 {
  1585  		v, whose = v[:space], v[space+1:]
  1586  		if i := strings.IndexByte(whose, '<'); i == -1 {
  1587  			whose = ""
  1588  		} else {
  1589  			whose = whose[i+1:]
  1590  			if i := strings.IndexByte(whose, '>'); i == -1 {
  1591  				whose = ""
  1592  			} else {
  1593  				whose = whose[:i]
  1594  			}
  1595  		}
  1596  	}
  1597  	v = strings.TrimSpace(v)
  1598  	if eq := strings.IndexByte(v, '='); eq == -1 {
  1599  		label = v
  1600  	} else {
  1601  		label = v[:eq]
  1602  		if n, err := strconv.ParseInt(v[eq+1:], 10, 8); err == nil {
  1603  			value = int8(n)
  1604  		}
  1605  	}
  1606  	return
  1607  }
  1608  
  1609  // GerritHashtags represents a set of "hashtags" on a Gerrit CL.
  1610  //
  1611  // The representation is a comma-separated string, to match Gerrit's
  1612  // internal representation in the meta commits. To support both
  1613  // forms of Gerrit's internal representation, whitespace is optional
  1614  // around the commas.
  1615  type GerritHashtags string
  1616  
  1617  // Contains reports whether the hashtag t is in the set of tags s.
  1618  func (s GerritHashtags) Contains(t string) bool {
  1619  	for len(s) > 0 {
  1620  		comma := strings.IndexByte(string(s), ',')
  1621  		if comma == -1 {
  1622  			return strings.TrimSpace(string(s)) == t
  1623  		}
  1624  		if strings.TrimSpace(string(s[:comma])) == t {
  1625  			return true
  1626  		}
  1627  		s = s[comma+1:]
  1628  	}
  1629  	return false
  1630  }
  1631  
  1632  // Foreach calls fn for each tag in the set s.
  1633  func (s GerritHashtags) Foreach(fn func(string)) {
  1634  	for len(s) > 0 {
  1635  		comma := strings.IndexByte(string(s), ',')
  1636  		if comma == -1 {
  1637  			fn(strings.TrimSpace(string(s)))
  1638  			return
  1639  		}
  1640  		fn(strings.TrimSpace(string(s[:comma])))
  1641  		s = s[comma+1:]
  1642  	}
  1643  }
  1644  
  1645  // Match reports whether fn returns true for any tag in the set s.
  1646  // If fn returns true, iteration stops and Match returns true.
  1647  func (s GerritHashtags) Match(fn func(string) bool) bool {
  1648  	for len(s) > 0 {
  1649  		comma := strings.IndexByte(string(s), ',')
  1650  		if comma == -1 {
  1651  			return fn(strings.TrimSpace(string(s)))
  1652  		}
  1653  		if fn(strings.TrimSpace(string(s[:comma]))) {
  1654  			return true
  1655  		}
  1656  		s = s[comma+1:]
  1657  	}
  1658  	return false
  1659  }
  1660  
  1661  // Len returns the number of tags in the set s.
  1662  func (s GerritHashtags) Len() int {
  1663  	if s == "" {
  1664  		return 0
  1665  	}
  1666  	return strings.Count(string(s), ",") + 1
  1667  }