golang.org/x/build@v0.0.0-20240506185731-218518f32b70/maintner/github.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package maintner
     6  
     7  import (
     8  	"context"
     9  	"encoding/json"
    10  	"fmt"
    11  	"io"
    12  	"log"
    13  	"net/http"
    14  	"net/url"
    15  	"reflect"
    16  	"regexp"
    17  	"runtime"
    18  	"slices"
    19  	"sort"
    20  	"strconv"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/golang/protobuf/ptypes"
    25  	"github.com/golang/protobuf/ptypes/timestamp"
    26  	"github.com/google/go-github/github"
    27  	"github.com/gregjones/httpcache"
    28  
    29  	"golang.org/x/build/maintner/maintpb"
    30  	"golang.org/x/oauth2"
    31  	"golang.org/x/sync/errgroup"
    32  	"golang.org/x/time/rate"
    33  )
    34  
    35  // xFromCache is the synthetic response header added by the httpcache
    36  // package for responses fulfilled from cache due to a 304 from the server.
    37  const xFromCache = "X-From-Cache"
    38  
    39  // GitHubRepoID is a GitHub org & repo, lowercase.
    40  type GitHubRepoID struct {
    41  	Owner, Repo string
    42  }
    43  
    44  func (id GitHubRepoID) String() string { return id.Owner + "/" + id.Repo }
    45  
    46  func (id GitHubRepoID) valid() bool {
    47  	if id.Owner == "" || id.Repo == "" {
    48  		// TODO: more validation. whatever GitHub requires.
    49  		return false
    50  	}
    51  	return true
    52  }
    53  
    54  // GitHub holds data about a GitHub repo.
    55  type GitHub struct {
    56  	c     *Corpus
    57  	users map[int64]*GitHubUser
    58  	teams map[int64]*GitHubTeam
    59  	repos map[GitHubRepoID]*GitHubRepo
    60  }
    61  
    62  // ForeachRepo calls fn serially for each GitHubRepo, stopping if fn
    63  // returns an error. The function is called with lexically increasing
    64  // repo IDs.
    65  func (g *GitHub) ForeachRepo(fn func(*GitHubRepo) error) error {
    66  	var ids []GitHubRepoID
    67  	for id := range g.repos {
    68  		ids = append(ids, id)
    69  	}
    70  	sort.Slice(ids, func(i, j int) bool {
    71  		if ids[i].Owner < ids[j].Owner {
    72  			return true
    73  		}
    74  		return ids[i].Owner == ids[j].Owner && ids[i].Repo < ids[j].Repo
    75  	})
    76  	for _, id := range ids {
    77  		if err := fn(g.repos[id]); err != nil {
    78  			return err
    79  		}
    80  	}
    81  	return nil
    82  }
    83  
    84  // Repo returns the repo if it's known. Otherwise it returns nil.
    85  func (g *GitHub) Repo(owner, repo string) *GitHubRepo {
    86  	return g.repos[GitHubRepoID{owner, repo}]
    87  }
    88  
    89  func (g *GitHub) getOrCreateRepo(owner, repo string) *GitHubRepo {
    90  	if g == nil {
    91  		panic("cannot call methods on nil GitHub")
    92  	}
    93  	id := GitHubRepoID{owner, repo}
    94  	if !id.valid() {
    95  		return nil
    96  	}
    97  	r, ok := g.repos[id]
    98  	if ok {
    99  		return r
   100  	}
   101  	r = &GitHubRepo{
   102  		github: g,
   103  		id:     id,
   104  		issues: map[int32]*GitHubIssue{},
   105  	}
   106  	g.repos[id] = r
   107  	return r
   108  }
   109  
   110  type GitHubRepo struct {
   111  	github     *GitHub
   112  	id         GitHubRepoID
   113  	issues     map[int32]*GitHubIssue // num -> issue
   114  	milestones map[int64]*GitHubMilestone
   115  	labels     map[int64]*GitHubLabel
   116  }
   117  
   118  func (gr *GitHubRepo) ID() GitHubRepoID { return gr.id }
   119  
   120  // Issue returns the provided issue number, or nil if it's not known.
   121  func (gr *GitHubRepo) Issue(n int32) *GitHubIssue { return gr.issues[n] }
   122  
   123  // ForeachLabel calls fn for each label in the repo, in unsorted order.
   124  //
   125  // Iteration ends if fn returns an error, with that error.
   126  func (gr *GitHubRepo) ForeachLabel(fn func(*GitHubLabel) error) error {
   127  	for _, lb := range gr.labels {
   128  		if err := fn(lb); err != nil {
   129  			return err
   130  		}
   131  	}
   132  	return nil
   133  }
   134  
   135  // ForeachMilestone calls fn for each milestone in the repo, in unsorted order.
   136  //
   137  // Iteration ends if fn returns an error, with that error.
   138  func (gr *GitHubRepo) ForeachMilestone(fn func(*GitHubMilestone) error) error {
   139  	for _, m := range gr.milestones {
   140  		if err := fn(m); err != nil {
   141  			return err
   142  		}
   143  	}
   144  	return nil
   145  }
   146  
   147  // ForeachIssue calls fn for each issue in the repo.
   148  //
   149  // If fn returns an error, iteration ends and ForeachIssue returns
   150  // with that error.
   151  //
   152  // The fn function is called serially, with increasingly numbered
   153  // issues.
   154  func (gr *GitHubRepo) ForeachIssue(fn func(*GitHubIssue) error) error {
   155  	s := make([]*GitHubIssue, 0, len(gr.issues))
   156  	for _, gi := range gr.issues {
   157  		s = append(s, gi)
   158  	}
   159  	sort.Slice(s, func(i, j int) bool { return s[i].Number < s[j].Number })
   160  	for _, gi := range s {
   161  		if err := fn(gi); err != nil {
   162  			return err
   163  		}
   164  	}
   165  	return nil
   166  }
   167  
   168  // ForeachReview calls fn for each review event on the issue
   169  //
   170  // If the issue is not a PullRequest, then it returns early with no error.
   171  //
   172  // If fn returns an error, iteration ends and ForeachReview returns
   173  // with that error.
   174  //
   175  // The fn function is called serially, in chronological order.
   176  func (pr *GitHubIssue) ForeachReview(fn func(*GitHubReview) error) error {
   177  	if !pr.PullRequest {
   178  		return nil
   179  	}
   180  	s := make([]*GitHubReview, 0, len(pr.reviews))
   181  	for _, rv := range pr.reviews {
   182  		s = append(s, rv)
   183  	}
   184  	sort.Slice(s, func(i, j int) bool { return s[i].Created.Before(s[j].Created) })
   185  	for _, rv := range s {
   186  		if err := fn(rv); err != nil {
   187  			return err
   188  		}
   189  	}
   190  
   191  	return nil
   192  }
   193  
   194  func (g *GitHubRepo) getOrCreateMilestone(id int64) *GitHubMilestone {
   195  	if id == 0 {
   196  		panic("zero id")
   197  	}
   198  	m, ok := g.milestones[id]
   199  	if ok {
   200  		return m
   201  	}
   202  	if g.milestones == nil {
   203  		g.milestones = map[int64]*GitHubMilestone{}
   204  	}
   205  	m = &GitHubMilestone{ID: id}
   206  	g.milestones[id] = m
   207  	return m
   208  }
   209  
   210  func (g *GitHubRepo) getOrCreateLabel(id int64) *GitHubLabel {
   211  	if id == 0 {
   212  		panic("zero id")
   213  	}
   214  	lb, ok := g.labels[id]
   215  	if ok {
   216  		return lb
   217  	}
   218  	if g.labels == nil {
   219  		g.labels = map[int64]*GitHubLabel{}
   220  	}
   221  	lb = &GitHubLabel{ID: id}
   222  	g.labels[id] = lb
   223  	return lb
   224  }
   225  
   226  func (g *GitHubRepo) verbose() bool {
   227  	return g.github != nil && g.github.c != nil && g.github.c.verbose
   228  }
   229  
   230  // GitHubUser represents a GitHub user.
   231  // It is a subset of https://developer.github.com/v3/users/#get-a-single-user
   232  type GitHubUser struct {
   233  	ID    int64
   234  	Login string
   235  }
   236  
   237  // GitHubTeam represents a GitHub team.
   238  // It is a subset of https://developer.github.com/v3/orgs/teams/#get-team
   239  type GitHubTeam struct {
   240  	ID int64
   241  
   242  	// Slug is a URL-friendly representation of the team name.
   243  	// It is unique across a GitHub organization.
   244  	Slug string
   245  }
   246  
   247  // GitHubIssueRef is a reference to an issue (or pull request) number
   248  // in a repo. These are parsed from text making references such as
   249  // "golang/go#1234" or just "#1234" (with an implicit Repo).
   250  type GitHubIssueRef struct {
   251  	Repo   *GitHubRepo // must be non-nil
   252  	Number int32       // GitHubIssue.Number
   253  }
   254  
   255  func (r GitHubIssueRef) String() string { return fmt.Sprintf("%s#%d", r.Repo.ID(), r.Number) }
   256  
   257  // GitHubIssue represents a GitHub issue.
   258  // This is maintner's in-memory representation. It differs slightly
   259  // from the API's *github.Issue type, notably in the lack of pointers
   260  // for all fields.
   261  // See https://developer.github.com/v3/issues/#get-a-single-issue
   262  type GitHubIssue struct {
   263  	ID          int64
   264  	Number      int32
   265  	NotExist    bool // if true, rest of fields should be ignored.
   266  	Closed      bool
   267  	Locked      bool
   268  	PullRequest bool // if true, this issue is a Pull Request. All PRs are issues, but not all issues are PRs.
   269  	User        *GitHubUser
   270  	Assignees   []*GitHubUser
   271  	Created     time.Time
   272  	Updated     time.Time
   273  	ClosedAt    time.Time
   274  	ClosedBy    *GitHubUser // TODO(dmitshur): Implement (see golang.org/issue/28745).
   275  	Title       string
   276  	Body        string
   277  	Milestone   *GitHubMilestone       // nil for unknown, noMilestone for none
   278  	Labels      map[int64]*GitHubLabel // label ID => label
   279  
   280  	commentsUpdatedTil time.Time                   // max comment modtime seen
   281  	commentsSyncedAsOf time.Time                   // as of server's Date header
   282  	comments           map[int64]*GitHubComment    // by comment.ID
   283  	eventMaxTime       time.Time                   // latest time of any event in events map
   284  	eventsSyncedAsOf   time.Time                   // as of server's Date header
   285  	reviewsSyncedAsOf  time.Time                   // as of server's Date header
   286  	events             map[int64]*GitHubIssueEvent // by event.ID
   287  	reviews            map[int64]*GitHubReview     // by event.ID
   288  }
   289  
   290  // LastModified reports the most recent time that any known metadata was updated.
   291  // In contrast to the Updated field, LastModified includes comments and events.
   292  //
   293  // TODO(bradfitz): this seems to not be working, at least events
   294  // aren't updating it. Investigate.
   295  func (gi *GitHubIssue) LastModified() time.Time {
   296  	ret := gi.Updated
   297  	if gi.commentsUpdatedTil.After(ret) {
   298  		ret = gi.commentsUpdatedTil
   299  	}
   300  	if gi.eventMaxTime.After(ret) {
   301  		ret = gi.eventMaxTime
   302  	}
   303  	return ret
   304  }
   305  
   306  // HasEvent reports whether there's any GitHubIssueEvent in this
   307  // issue's history of the given type.
   308  func (gi *GitHubIssue) HasEvent(eventType string) bool {
   309  	for _, e := range gi.events {
   310  		if e.Type == eventType {
   311  			return true
   312  		}
   313  	}
   314  	return false
   315  }
   316  
   317  // ForeachEvent calls fn for each event on the issue.
   318  //
   319  // If fn returns an error, iteration ends and ForeachEvent returns
   320  // with that error.
   321  //
   322  // The fn function is called serially, in order of the event's time.
   323  func (gi *GitHubIssue) ForeachEvent(fn func(*GitHubIssueEvent) error) error {
   324  	// TODO: keep these sorted in the corpus
   325  	s := make([]*GitHubIssueEvent, 0, len(gi.events))
   326  	for _, e := range gi.events {
   327  		s = append(s, e)
   328  	}
   329  	sort.Slice(s, func(i, j int) bool {
   330  		ci, cj := s[i].Created, s[j].Created
   331  		if ci.Before(cj) {
   332  			return true
   333  		}
   334  		return ci.Equal(cj) && s[i].ID < s[j].ID
   335  	})
   336  	for _, e := range s {
   337  		if err := fn(e); err != nil {
   338  			return err
   339  		}
   340  	}
   341  	return nil
   342  }
   343  
   344  // ForeachComment calls fn for each event on the issue.
   345  //
   346  // If fn returns an error, iteration ends and ForeachComment returns
   347  // with that error.
   348  //
   349  // The fn function is called serially, in order of the comment's time.
   350  func (gi *GitHubIssue) ForeachComment(fn func(*GitHubComment) error) error {
   351  	// TODO: keep these sorted in the corpus
   352  	s := make([]*GitHubComment, 0, len(gi.comments))
   353  	for _, e := range gi.comments {
   354  		s = append(s, e)
   355  	}
   356  	sort.Slice(s, func(i, j int) bool {
   357  		ci, cj := s[i].Created, s[j].Created
   358  		if ci.Before(cj) {
   359  			return true
   360  		}
   361  		return ci.Equal(cj) && s[i].ID < s[j].ID
   362  	})
   363  	for _, e := range s {
   364  		if err := fn(e); err != nil {
   365  			return err
   366  		}
   367  	}
   368  	return nil
   369  }
   370  
   371  // HasLabel reports whether the issue is labeled with the given label.
   372  func (gi *GitHubIssue) HasLabel(label string) bool {
   373  	for _, lb := range gi.Labels {
   374  		if lb.Name == label {
   375  			return true
   376  		}
   377  	}
   378  	return false
   379  }
   380  
   381  // HasLabelID returns whether the issue has a label with the given ID.
   382  func (gi *GitHubIssue) HasLabelID(id int64) bool {
   383  	_, ok := gi.Labels[id]
   384  	return ok
   385  }
   386  
   387  func (gi *GitHubIssue) getCreatedAt() time.Time {
   388  	if gi == nil {
   389  		return time.Time{}
   390  	}
   391  	return gi.Created
   392  }
   393  
   394  func (gi *GitHubIssue) getUpdatedAt() time.Time {
   395  	if gi == nil {
   396  		return time.Time{}
   397  	}
   398  	return gi.Updated
   399  }
   400  
   401  func (gi *GitHubIssue) getClosedAt() time.Time {
   402  	if gi == nil {
   403  		return time.Time{}
   404  	}
   405  	return gi.ClosedAt
   406  }
   407  
   408  // noMilestone is a sentinel value to explicitly mean no milestone.
   409  var noMilestone = new(GitHubMilestone)
   410  
   411  type GitHubLabel struct {
   412  	ID   int64
   413  	Name string
   414  	// TODO: color?
   415  }
   416  
   417  // GenMutationDiff generates a diff from in-memory state 'a' (which
   418  // may be nil) to the current (non-nil) state b from GitHub. It
   419  // returns nil if there's no difference.
   420  func (a *GitHubLabel) GenMutationDiff(b *github.Label) *maintpb.GithubLabel {
   421  	id := int64(b.GetID())
   422  	if a != nil && a.ID == id && a.Name == b.GetName() {
   423  		// No change.
   424  		return nil
   425  	}
   426  	return &maintpb.GithubLabel{Id: id, Name: b.GetName()}
   427  }
   428  
   429  func (lb *GitHubLabel) processMutation(mut maintpb.GithubLabel) {
   430  	if lb.ID == 0 {
   431  		panic("bogus label ID 0")
   432  	}
   433  	if lb.ID != mut.Id {
   434  		panic(fmt.Sprintf("label ID = %v != mutation ID = %v", lb.ID, mut.Id))
   435  	}
   436  	if mut.Name != "" {
   437  		lb.Name = mut.Name
   438  	}
   439  }
   440  
   441  type GitHubMilestone struct {
   442  	ID     int64
   443  	Title  string
   444  	Number int32
   445  	Closed bool
   446  }
   447  
   448  // IsNone reports whether ms represents the sentinel "no milestone" milestone.
   449  func (ms *GitHubMilestone) IsNone() bool { return ms == noMilestone }
   450  
   451  // IsUnknown reports whether ms is nil, which represents the unknown
   452  // state. Milestones should never be in this state, though.
   453  func (ms *GitHubMilestone) IsUnknown() bool { return ms == nil }
   454  
   455  // emptyMilestone is a non-nil *githubMilestone with zero values for
   456  // all fields.
   457  var emptyMilestone = new(GitHubMilestone)
   458  
   459  // GenMutationDiff generates a diff from in-memory state 'a' (which
   460  // may be nil) to the current (non-nil) state b from GitHub. It
   461  // returns nil if there's no difference.
   462  func (a *GitHubMilestone) GenMutationDiff(b *github.Milestone) *maintpb.GithubMilestone {
   463  	var ret *maintpb.GithubMilestone // lazily inited by diff
   464  	diff := func() *maintpb.GithubMilestone {
   465  		if ret == nil {
   466  			ret = &maintpb.GithubMilestone{Id: int64(b.GetID())}
   467  		}
   468  		return ret
   469  	}
   470  	if a == nil {
   471  		a = emptyMilestone
   472  	}
   473  	if a.Title != b.GetTitle() {
   474  		diff().Title = b.GetTitle()
   475  	}
   476  	if a.Number != int32(b.GetNumber()) {
   477  		diff().Number = int64(b.GetNumber())
   478  	}
   479  	if closed := b.GetState() == "closed"; a.Closed != closed {
   480  		diff().Closed = &maintpb.BoolChange{Val: closed}
   481  	}
   482  	return ret
   483  }
   484  
   485  func (ms *GitHubMilestone) processMutation(mut maintpb.GithubMilestone) {
   486  	if ms.ID == 0 {
   487  		panic("bogus milestone ID 0")
   488  	}
   489  	if ms.ID != mut.Id {
   490  		panic(fmt.Sprintf("milestone ID = %v != mutation ID = %v", ms.ID, mut.Id))
   491  	}
   492  	if mut.Title != "" {
   493  		ms.Title = mut.Title
   494  	}
   495  	if mut.Number != 0 {
   496  		ms.Number = int32(mut.Number)
   497  	}
   498  	if mut.Closed != nil {
   499  		ms.Closed = mut.Closed.Val
   500  	}
   501  }
   502  
   503  // GitHubReview represents a review on a Pull Request.
   504  // For more details, see https://developer.github.com/v3/pulls/reviews/
   505  type GitHubReview struct {
   506  	ID               int64
   507  	Actor            *GitHubUser
   508  	Body             string
   509  	State            string // COMMENTED, APPROVED, CHANGES_REQUESTED
   510  	CommitID         string
   511  	ActorAssociation string // CONTRIBUTOR
   512  	Created          time.Time
   513  	OtherJSON        string
   514  }
   515  
   516  // Proto converts GitHubReview to a protobuf
   517  func (e *GitHubReview) Proto() *maintpb.GithubReview {
   518  	p := &maintpb.GithubReview{
   519  		Id:               e.ID,
   520  		Body:             e.Body,
   521  		State:            e.State,
   522  		CommitId:         e.CommitID,
   523  		ActorAssociation: e.ActorAssociation,
   524  	}
   525  	if e.OtherJSON != "" {
   526  		p.OtherJson = []byte(e.OtherJSON)
   527  	}
   528  	if !e.Created.IsZero() {
   529  		if tp, err := ptypes.TimestampProto(e.Created); err == nil {
   530  			p.Created = tp
   531  		}
   532  	}
   533  	if e.Actor != nil {
   534  		p.ActorId = e.Actor.ID
   535  	}
   536  
   537  	return p
   538  }
   539  
   540  // r.github.c.mu must be held.
   541  func (r *GitHubRepo) newGithubReview(p *maintpb.GithubReview) *GitHubReview {
   542  	g := r.github
   543  	e := &GitHubReview{
   544  		ID:               p.Id,
   545  		Actor:            g.getOrCreateUserID(p.ActorId),
   546  		ActorAssociation: p.ActorAssociation,
   547  		CommitID:         p.CommitId,
   548  		Body:             p.Body,
   549  		State:            p.State,
   550  	}
   551  
   552  	if p.Created != nil {
   553  		e.Created, _ = ptypes.Timestamp(p.Created)
   554  	}
   555  	if len(p.OtherJson) > 0 {
   556  		// TODO: parse it and see if we've since learned how
   557  		// to deal with it?
   558  		if r.verbose() {
   559  			log.Printf("newGithubReview: unknown JSON in log: %s", p.OtherJson)
   560  		}
   561  		e.OtherJSON = string(p.OtherJson)
   562  	}
   563  
   564  	return e
   565  }
   566  
   567  type GitHubComment struct {
   568  	ID      int64
   569  	User    *GitHubUser
   570  	Created time.Time
   571  	Updated time.Time
   572  	Body    string
   573  }
   574  
   575  // GitHubDismissedReview is the contents of a dismissed review event. For more
   576  // details, see https://developer.github.com/v3/issues/events/.
   577  type GitHubDismissedReviewEvent struct {
   578  	ReviewID         int64
   579  	State            string // commented, approved, changes_requested
   580  	DismissalMessage string
   581  }
   582  
   583  type GitHubIssueEvent struct {
   584  	// TODO: this struct is a little wide. change it to an interface
   585  	// instead?  Maybe later, if memory profiling suggests it would help.
   586  
   587  	// ID is the ID of the event.
   588  	ID int64
   589  
   590  	// Type is one of:
   591  	// * labeled, unlabeled
   592  	// * milestoned, demilestoned
   593  	// * assigned, unassigned
   594  	// * locked, unlocked
   595  	// * closed
   596  	// * referenced
   597  	// * renamed
   598  	// * reopened
   599  	// * comment_deleted
   600  	// * head_ref_restored
   601  	// * base_ref_changed
   602  	// * subscribed
   603  	// * mentioned
   604  	// * review_requested, review_request_removed, review_dismissed
   605  	Type string
   606  
   607  	// OtherJSON optionally contains a JSON object of GitHub's API
   608  	// response for any fields maintner was unable to extract at
   609  	// the time. It is empty if maintner supported all the fields
   610  	// when the mutation was created.
   611  	OtherJSON string
   612  
   613  	Created time.Time
   614  	Actor   *GitHubUser
   615  
   616  	Label               string      // for type: "unlabeled", "labeled"
   617  	Assignee            *GitHubUser // for type: "assigned", "unassigned"
   618  	Assigner            *GitHubUser // for type: "assigned", "unassigned"
   619  	Milestone           string      // for type: "milestoned", "demilestoned"
   620  	From, To            string      // for type: "renamed"
   621  	CommitID, CommitURL string      // for type: "closed", "referenced" ... ?
   622  
   623  	Reviewer        *GitHubUser
   624  	TeamReviewer    *GitHubTeam
   625  	ReviewRequester *GitHubUser
   626  	DismissedReview *GitHubDismissedReviewEvent
   627  }
   628  
   629  func (e *GitHubIssueEvent) Proto() *maintpb.GithubIssueEvent {
   630  	p := &maintpb.GithubIssueEvent{
   631  		Id:         e.ID,
   632  		EventType:  e.Type,
   633  		RenameFrom: e.From,
   634  		RenameTo:   e.To,
   635  	}
   636  	if e.OtherJSON != "" {
   637  		p.OtherJson = []byte(e.OtherJSON)
   638  	}
   639  	if !e.Created.IsZero() {
   640  		if tp, err := ptypes.TimestampProto(e.Created); err == nil {
   641  			p.Created = tp
   642  		}
   643  	}
   644  	if e.Actor != nil {
   645  		p.ActorId = e.Actor.ID
   646  	}
   647  	if e.Assignee != nil {
   648  		p.AssigneeId = e.Assignee.ID
   649  	}
   650  	if e.Assigner != nil {
   651  		p.AssignerId = e.Assigner.ID
   652  	}
   653  	if e.Label != "" {
   654  		p.Label = &maintpb.GithubLabel{Name: e.Label}
   655  	}
   656  	if e.Milestone != "" {
   657  		p.Milestone = &maintpb.GithubMilestone{Title: e.Milestone}
   658  	}
   659  	if e.CommitID != "" {
   660  		c := &maintpb.GithubCommit{CommitId: e.CommitID}
   661  		if m := rxGithubCommitURL.FindStringSubmatch(e.CommitURL); m != nil {
   662  			c.Owner = m[1]
   663  			c.Repo = m[2]
   664  		}
   665  		p.Commit = c
   666  	}
   667  	if e.Reviewer != nil {
   668  		p.ReviewerId = e.Reviewer.ID
   669  	}
   670  	if e.TeamReviewer != nil {
   671  		p.TeamReviewer = &maintpb.GithubTeam{
   672  			Id:   e.TeamReviewer.ID,
   673  			Slug: e.TeamReviewer.Slug,
   674  		}
   675  	}
   676  	if e.ReviewRequester != nil {
   677  		p.ReviewRequesterId = e.ReviewRequester.ID
   678  	}
   679  	if e.DismissedReview != nil {
   680  		p.DismissedReview = &maintpb.GithubDismissedReviewEvent{
   681  			ReviewId:         e.DismissedReview.ReviewID,
   682  			State:            e.DismissedReview.State,
   683  			DismissalMessage: e.DismissedReview.DismissalMessage,
   684  		}
   685  	}
   686  	return p
   687  }
   688  
   689  var rxGithubCommitURL = regexp.MustCompile(`^https://api\.github\.com/repos/([^/]+)/([^/]+)/commits/`)
   690  
   691  // r.github.c.mu must be held.
   692  func (r *GitHubRepo) newGithubEvent(p *maintpb.GithubIssueEvent) *GitHubIssueEvent {
   693  	g := r.github
   694  	e := &GitHubIssueEvent{
   695  		ID:              p.Id,
   696  		Type:            p.EventType,
   697  		Actor:           g.getOrCreateUserID(p.ActorId),
   698  		Assignee:        g.getOrCreateUserID(p.AssigneeId),
   699  		Assigner:        g.getOrCreateUserID(p.AssignerId),
   700  		Reviewer:        g.getOrCreateUserID(p.ReviewerId),
   701  		TeamReviewer:    g.getTeam(p.TeamReviewer),
   702  		ReviewRequester: g.getOrCreateUserID(p.ReviewRequesterId),
   703  		From:            p.RenameFrom,
   704  		To:              p.RenameTo,
   705  	}
   706  	if p.Created != nil {
   707  		e.Created, _ = ptypes.Timestamp(p.Created)
   708  	}
   709  	if len(p.OtherJson) > 0 {
   710  		// TODO: parse it and see if we've since learned how
   711  		// to deal with it?
   712  		if r.verbose() {
   713  			log.Printf("newGithubEvent: unknown JSON in log: %s", p.OtherJson)
   714  		}
   715  		e.OtherJSON = string(p.OtherJson)
   716  	}
   717  	if p.Label != nil {
   718  		e.Label = g.c.str(p.Label.Name)
   719  	}
   720  	if p.Milestone != nil {
   721  		e.Milestone = g.c.str(p.Milestone.Title)
   722  	}
   723  	if c := p.Commit; c != nil {
   724  		e.CommitID = c.CommitId
   725  		if c.Owner != "" && c.Repo != "" {
   726  			// TODO: this field is dumb. break it down.
   727  			e.CommitURL = "https://api.github.com/repos/" + c.Owner + "/" + c.Repo + "/commits/" + c.CommitId
   728  		}
   729  	}
   730  	if d := p.DismissedReview; d != nil {
   731  		e.DismissedReview = &GitHubDismissedReviewEvent{
   732  			ReviewID:         d.ReviewId,
   733  			State:            d.State,
   734  			DismissalMessage: d.DismissalMessage,
   735  		}
   736  	}
   737  	return e
   738  }
   739  
   740  // (requires corpus be locked for reads)
   741  func (gi *GitHubIssue) commentsSynced() bool {
   742  	if gi.NotExist {
   743  		// Issue doesn't exist, so can't sync its non-issues,
   744  		// so consider it done.
   745  		return true
   746  	}
   747  	return gi.commentsSyncedAsOf.After(gi.Updated)
   748  }
   749  
   750  // (requires corpus be locked for reads)
   751  func (gi *GitHubIssue) eventsSynced() bool {
   752  	if gi.NotExist {
   753  		// Issue doesn't exist, so can't sync its non-issues,
   754  		// so consider it done.
   755  		return true
   756  	}
   757  	return gi.eventsSyncedAsOf.After(gi.Updated)
   758  }
   759  
   760  // (requires corpus be locked for reads)
   761  func (gi *GitHubIssue) reviewsSynced() bool {
   762  	if gi.NotExist {
   763  		// Issue doesn't exist, so can't sync its non-issues,
   764  		// so consider it done.
   765  		return true
   766  	}
   767  	return gi.reviewsSyncedAsOf.After(gi.Updated)
   768  }
   769  
   770  func (c *Corpus) initGithub() {
   771  	if c.github != nil {
   772  		return
   773  	}
   774  	c.github = &GitHub{
   775  		c:     c,
   776  		repos: map[GitHubRepoID]*GitHubRepo{},
   777  	}
   778  }
   779  
   780  // SetGitHubLimiter sets a limiter that controls the rate of requests made
   781  // to GitHub APIs. If nil, requests are not limited. Only valid in leader mode.
   782  // The limiter must only be set before Sync or SyncLoop is called.
   783  func (c *Corpus) SetGitHubLimiter(l *rate.Limiter) {
   784  	c.githubLimiter = l
   785  }
   786  
   787  // TrackGitHub registers the named GitHub repo as a repo to
   788  // watch and append to the mutation log. Only valid in leader mode.
   789  // The token is the auth token to use to make API calls.
   790  func (c *Corpus) TrackGitHub(owner, repo, token string) {
   791  	if c.mutationLogger == nil {
   792  		panic("can't TrackGitHub in non-leader mode")
   793  	}
   794  
   795  	c.mu.Lock()
   796  	defer c.mu.Unlock()
   797  	c.initGithub()
   798  	gr := c.github.getOrCreateRepo(owner, repo)
   799  	if gr == nil {
   800  		log.Fatalf("invalid github owner/repo %q/%q", owner, repo)
   801  	}
   802  	c.watchedGithubRepos = append(c.watchedGithubRepos, watchedGithubRepo{
   803  		gr:    gr,
   804  		token: token,
   805  	})
   806  }
   807  
   808  type watchedGithubRepo struct {
   809  	gr    *GitHubRepo
   810  	token string
   811  }
   812  
   813  // g.c.mu must be held
   814  func (g *GitHub) getUser(pu *maintpb.GithubUser) *GitHubUser {
   815  	if pu == nil {
   816  		return nil
   817  	}
   818  	if u := g.users[pu.Id]; u != nil {
   819  		if pu.Login != "" && pu.Login != u.Login {
   820  			u.Login = pu.Login
   821  		}
   822  		return u
   823  	}
   824  	if g.users == nil {
   825  		g.users = make(map[int64]*GitHubUser)
   826  	}
   827  	u := &GitHubUser{
   828  		ID:    pu.Id,
   829  		Login: pu.Login,
   830  	}
   831  	g.users[pu.Id] = u
   832  	return u
   833  }
   834  
   835  func (g *GitHub) getOrCreateUserID(id int64) *GitHubUser {
   836  	if id == 0 {
   837  		return nil
   838  	}
   839  	if u := g.users[id]; u != nil {
   840  		return u
   841  	}
   842  	if g.users == nil {
   843  		g.users = make(map[int64]*GitHubUser)
   844  	}
   845  	u := &GitHubUser{ID: id}
   846  	g.users[id] = u
   847  	return u
   848  }
   849  
   850  // g.c.mu must be held
   851  func (g *GitHub) getTeam(pt *maintpb.GithubTeam) *GitHubTeam {
   852  	if pt == nil {
   853  		return nil
   854  	}
   855  	if g.teams == nil {
   856  		g.teams = make(map[int64]*GitHubTeam)
   857  	}
   858  
   859  	t := g.teams[pt.Id]
   860  	if t == nil {
   861  		t = &GitHubTeam{
   862  			ID: pt.Id,
   863  		}
   864  		g.teams[pt.Id] = t
   865  	}
   866  	if pt.Slug != "" {
   867  		t.Slug = pt.Slug
   868  	}
   869  	return t
   870  }
   871  
   872  // newGithubUserProto creates a GithubUser with the minimum diff between
   873  // existing and g. The return value is nil if there were no changes. existing
   874  // may also be nil.
   875  func newGithubUserProto(existing *GitHubUser, g *github.User) *maintpb.GithubUser {
   876  	if g == nil {
   877  		return nil
   878  	}
   879  	id := int64(g.GetID())
   880  	if existing == nil {
   881  		return &maintpb.GithubUser{
   882  			Id:    id,
   883  			Login: g.GetLogin(),
   884  		}
   885  	}
   886  	hasChanges := false
   887  	u := &maintpb.GithubUser{Id: id}
   888  	if login := g.GetLogin(); existing.Login != login {
   889  		u.Login = login
   890  		hasChanges = true
   891  	}
   892  	// Add more fields here
   893  	if hasChanges {
   894  		return u
   895  	}
   896  	return nil
   897  }
   898  
   899  // deletedAssignees returns an array of user ID's that are present in existing
   900  // but not present in new.
   901  func deletedAssignees(existing []*GitHubUser, new []*github.User) []int64 {
   902  	mp := make(map[int64]bool, len(existing))
   903  	for _, u := range new {
   904  		id := int64(u.GetID())
   905  		mp[id] = true
   906  	}
   907  	toDelete := []int64{}
   908  	for _, u := range existing {
   909  		if _, ok := mp[u.ID]; !ok {
   910  			toDelete = append(toDelete, u.ID)
   911  		}
   912  	}
   913  	return toDelete
   914  }
   915  
   916  // newAssignees returns an array of diffs between existing and new. New users in
   917  // new will be present in the returned array in their entirety. Modified users
   918  // will appear containing only the ID field and changed fields. Unmodified users
   919  // will not appear in the returned array.
   920  func newAssignees(existing []*GitHubUser, new []*github.User) []*maintpb.GithubUser {
   921  	mp := make(map[int64]*GitHubUser, len(existing))
   922  	for _, u := range existing {
   923  		mp[u.ID] = u
   924  	}
   925  	changes := []*maintpb.GithubUser{}
   926  	for _, u := range new {
   927  		if existingUser, ok := mp[int64(u.GetID())]; ok {
   928  			diffUser := &maintpb.GithubUser{
   929  				Id: int64(u.GetID()),
   930  			}
   931  			hasDiff := false
   932  			if login := u.GetLogin(); existingUser.Login != login {
   933  				diffUser.Login = login
   934  				hasDiff = true
   935  			}
   936  			// check more User fields for diffs here, as we add them to the proto
   937  
   938  			if hasDiff {
   939  				changes = append(changes, diffUser)
   940  			}
   941  		} else {
   942  			changes = append(changes, &maintpb.GithubUser{
   943  				Id:    int64(u.GetID()),
   944  				Login: u.GetLogin(),
   945  			})
   946  		}
   947  	}
   948  	return changes
   949  }
   950  
   951  // setAssigneesFromProto returns a new array of assignees according to the
   952  // instructions in new (adds or modifies users in existing), and toDelete
   953  // (deletes them). c.mu must be held.
   954  func (g *GitHub) setAssigneesFromProto(existing []*GitHubUser, new []*maintpb.GithubUser, toDelete []int64) []*GitHubUser {
   955  	c := g.c
   956  	mp := make(map[int64]*GitHubUser)
   957  	for _, u := range existing {
   958  		mp[u.ID] = u
   959  	}
   960  	for _, u := range new {
   961  		if existingUser, ok := mp[u.Id]; ok {
   962  			if u.Login != "" {
   963  				existingUser.Login = u.Login
   964  			}
   965  			// TODO: add other fields here when we add them for user.
   966  		} else {
   967  			c.debugf("adding assignee %q", u.Login)
   968  			existing = append(existing, g.getUser(u))
   969  		}
   970  	}
   971  	// this is quadratic but the number of assignees is very unlikely to exceed,
   972  	// say, 5.
   973  	existing = slices.DeleteFunc(existing, func(u *GitHubUser) bool {
   974  		return slices.Contains(toDelete, u.ID)
   975  	})
   976  	return existing
   977  }
   978  
   979  // githubIssueDiffer generates a minimal diff (protobuf mutation) to
   980  // get a GitHub Issue from its in-memory state 'a' to the current
   981  // GitHub API state 'b'.
   982  type githubIssueDiffer struct {
   983  	gr *GitHubRepo
   984  	a  *GitHubIssue  // may be nil if no current state
   985  	b  *github.Issue // may NOT be nil
   986  }
   987  
   988  // returns nil if no changes.
   989  func (d githubIssueDiffer) Diff() *maintpb.GithubIssueMutation {
   990  	var changed bool
   991  	m := &maintpb.GithubIssueMutation{
   992  		Owner:       d.gr.id.Owner,
   993  		Repo:        d.gr.id.Repo,
   994  		Number:      int32(d.b.GetNumber()),
   995  		PullRequest: d.b.IsPullRequest(),
   996  	}
   997  	for _, f := range issueDiffMethods {
   998  		if f(d, m) {
   999  			if d.gr.verbose() {
  1000  				fname := strings.TrimPrefix(runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name(), "golang.org/x/build/maintner.githubIssueDiffer.")
  1001  				log.Printf("Issue %d changed: %v", d.b.GetNumber(), fname)
  1002  			}
  1003  			changed = true
  1004  		}
  1005  	}
  1006  	if !changed {
  1007  		return nil
  1008  	}
  1009  	return m
  1010  }
  1011  
  1012  // issueDiffMethods are the different steps githubIssueDiffer.Diff
  1013  // goes through to compute a diff. The methods should return true if
  1014  // any change was made. The order is irrelevant unless otherwise
  1015  // documented in comments in the list below.
  1016  var issueDiffMethods = []func(githubIssueDiffer, *maintpb.GithubIssueMutation) bool{
  1017  	githubIssueDiffer.diffCreatedAt,
  1018  	githubIssueDiffer.diffUpdatedAt,
  1019  	githubIssueDiffer.diffUser,
  1020  	githubIssueDiffer.diffBody,
  1021  	githubIssueDiffer.diffTitle,
  1022  	githubIssueDiffer.diffMilestone,
  1023  	githubIssueDiffer.diffAssignees,
  1024  	githubIssueDiffer.diffClosedState,
  1025  	githubIssueDiffer.diffClosedAt,
  1026  	githubIssueDiffer.diffClosedBy,
  1027  	githubIssueDiffer.diffLockedState,
  1028  	githubIssueDiffer.diffLabels,
  1029  }
  1030  
  1031  func (d githubIssueDiffer) diffCreatedAt(m *maintpb.GithubIssueMutation) bool {
  1032  	return d.diffTimeField(&m.Created, d.a.getCreatedAt(), d.b.GetCreatedAt())
  1033  }
  1034  
  1035  func (d githubIssueDiffer) diffUpdatedAt(m *maintpb.GithubIssueMutation) bool {
  1036  	return d.diffTimeField(&m.Updated, d.a.getUpdatedAt(), d.b.GetUpdatedAt())
  1037  }
  1038  
  1039  func (d githubIssueDiffer) diffClosedAt(m *maintpb.GithubIssueMutation) bool {
  1040  	return d.diffTimeField(&m.ClosedAt, d.a.getClosedAt(), d.b.GetClosedAt())
  1041  }
  1042  
  1043  func (d githubIssueDiffer) diffTimeField(dst **timestamp.Timestamp, memTime, githubTime time.Time) bool {
  1044  	if githubTime.IsZero() || memTime.Equal(githubTime) {
  1045  		return false
  1046  	}
  1047  	tproto, err := ptypes.TimestampProto(githubTime)
  1048  	if err != nil {
  1049  		panic(err)
  1050  	}
  1051  	*dst = tproto
  1052  	return true
  1053  }
  1054  
  1055  func (d githubIssueDiffer) diffUser(m *maintpb.GithubIssueMutation) bool {
  1056  	var existing *GitHubUser
  1057  	if d.a != nil {
  1058  		existing = d.a.User
  1059  	}
  1060  	m.User = newGithubUserProto(existing, d.b.User)
  1061  	return m.User != nil
  1062  }
  1063  
  1064  func (d githubIssueDiffer) diffClosedBy(m *maintpb.GithubIssueMutation) bool {
  1065  	var existing *GitHubUser
  1066  	if d.a != nil {
  1067  		existing = d.a.ClosedBy
  1068  	}
  1069  	m.ClosedBy = newGithubUserProto(existing, d.b.ClosedBy)
  1070  	return m.ClosedBy != nil
  1071  }
  1072  
  1073  func (d githubIssueDiffer) diffBody(m *maintpb.GithubIssueMutation) bool {
  1074  	if d.a != nil && d.a.Body == d.b.GetBody() {
  1075  		return false
  1076  	}
  1077  	m.BodyChange = &maintpb.StringChange{Val: d.b.GetBody()}
  1078  	return true
  1079  }
  1080  
  1081  func (d githubIssueDiffer) diffTitle(m *maintpb.GithubIssueMutation) bool {
  1082  	if d.a != nil && d.a.Title == d.b.GetTitle() {
  1083  		return false
  1084  	}
  1085  	m.Title = d.b.GetTitle()
  1086  	// TODO: emit a StringChange if we ever have a problem that we
  1087  	// legitimately need real issues with no titles reflected in
  1088  	// maintner's model. For now just ignore such changes, if
  1089  	// GitHub even permits the.
  1090  	return m.Title != ""
  1091  }
  1092  
  1093  func (d githubIssueDiffer) diffMilestone(m *maintpb.GithubIssueMutation) bool {
  1094  	if d.a != nil && d.a.Milestone != nil {
  1095  		ma, mb := d.a.Milestone, d.b.Milestone
  1096  		if ma == noMilestone && d.b.Milestone == nil {
  1097  			// Unchanged. Still no milestone.
  1098  			return false
  1099  		}
  1100  		if mb != nil && ma.ID == int64(mb.GetID()) {
  1101  			// Unchanged. Same milestone.
  1102  			// TODO: detect milestone renames and emit mutation for that?
  1103  			return false
  1104  		}
  1105  
  1106  	}
  1107  	if mb := d.b.Milestone; mb != nil {
  1108  		m.MilestoneId = int64(mb.GetID())
  1109  		m.MilestoneNum = int64(mb.GetNumber())
  1110  		m.MilestoneTitle = mb.GetTitle()
  1111  	} else {
  1112  		m.NoMilestone = true
  1113  	}
  1114  	return true
  1115  }
  1116  
  1117  func (d githubIssueDiffer) diffAssignees(m *maintpb.GithubIssueMutation) bool {
  1118  	if d.a == nil {
  1119  		m.Assignees = newAssignees(nil, d.b.Assignees)
  1120  		return true
  1121  	}
  1122  	m.Assignees = newAssignees(d.a.Assignees, d.b.Assignees)
  1123  	m.DeletedAssignees = deletedAssignees(d.a.Assignees, d.b.Assignees)
  1124  	return len(m.Assignees) > 0 || len(m.DeletedAssignees) > 0
  1125  }
  1126  
  1127  func (d githubIssueDiffer) diffLabels(m *maintpb.GithubIssueMutation) bool {
  1128  	// Common case: no changes. Return false quickly without allocations.
  1129  	if d.a != nil && len(d.a.Labels) == len(d.b.Labels) {
  1130  		missing := false
  1131  		for _, gl := range d.b.Labels {
  1132  			if _, ok := d.a.Labels[int64(gl.GetID())]; !ok {
  1133  				missing = true
  1134  				break
  1135  			}
  1136  		}
  1137  		if !missing {
  1138  			return false
  1139  		}
  1140  	}
  1141  
  1142  	toAdd := map[int64]*maintpb.GithubLabel{}
  1143  	for _, gl := range d.b.Labels {
  1144  		id := int64(gl.GetID())
  1145  		if id == 0 {
  1146  			panic("zero label ID")
  1147  		}
  1148  		toAdd[id] = &maintpb.GithubLabel{Id: id, Name: gl.GetName()}
  1149  	}
  1150  
  1151  	var toDelete []int64
  1152  	if d.a != nil {
  1153  		for id := range d.a.Labels {
  1154  			if _, ok := toAdd[id]; ok {
  1155  				// Already had it.
  1156  				delete(toAdd, id)
  1157  			} else {
  1158  				// We had it, but no longer.
  1159  				toDelete = append(toDelete, id)
  1160  			}
  1161  		}
  1162  	}
  1163  
  1164  	m.RemoveLabel = toDelete
  1165  	for _, labpb := range toAdd {
  1166  		m.AddLabel = append(m.AddLabel, labpb)
  1167  	}
  1168  
  1169  	return len(m.RemoveLabel) > 0 || len(m.AddLabel) > 0
  1170  }
  1171  
  1172  func (d githubIssueDiffer) diffClosedState(m *maintpb.GithubIssueMutation) bool {
  1173  	bclosed := d.b.GetState() == "closed"
  1174  	if d.a != nil && d.a.Closed == bclosed {
  1175  		return false
  1176  	}
  1177  	m.Closed = &maintpb.BoolChange{Val: bclosed}
  1178  	return true
  1179  }
  1180  
  1181  func (d githubIssueDiffer) diffLockedState(m *maintpb.GithubIssueMutation) bool {
  1182  	if d.a != nil && d.a.Locked == d.b.GetLocked() {
  1183  		return false
  1184  	}
  1185  	if d.a == nil && !d.b.GetLocked() {
  1186  		return false
  1187  	}
  1188  	m.Locked = &maintpb.BoolChange{Val: d.b.GetLocked()}
  1189  	return true
  1190  }
  1191  
  1192  // newMutationFromIssue generates a GithubIssueMutation using the
  1193  // smallest possible diff between a (the state we have in memory in
  1194  // the corpus) and b (the current GitHub API state).
  1195  //
  1196  // If newMutationFromIssue returns nil, the provided github.Issue is no newer
  1197  // than the data we have in the corpus. 'a' may be nil.
  1198  func (r *GitHubRepo) newMutationFromIssue(a *GitHubIssue, b *github.Issue) *maintpb.Mutation {
  1199  	if b == nil || b.Number == nil {
  1200  		panic(fmt.Sprintf("github issue with nil number: %#v", b))
  1201  	}
  1202  	gim := githubIssueDiffer{gr: r, a: a, b: b}.Diff()
  1203  	if gim == nil {
  1204  		// No changes.
  1205  		return nil
  1206  	}
  1207  	return &maintpb.Mutation{GithubIssue: gim}
  1208  }
  1209  
  1210  func (r *GitHubRepo) missingIssues() []int32 {
  1211  	c := r.github.c
  1212  	c.mu.RLock()
  1213  	defer c.mu.RUnlock()
  1214  
  1215  	var maxNum int32
  1216  	for num := range r.issues {
  1217  		if num > maxNum {
  1218  			maxNum = num
  1219  		}
  1220  	}
  1221  
  1222  	var missing []int32
  1223  	for num := int32(1); num < maxNum; num++ {
  1224  		if _, ok := r.issues[num]; !ok {
  1225  			missing = append(missing, num)
  1226  		}
  1227  	}
  1228  	return missing
  1229  }
  1230  
  1231  // processGithubMutation updates the corpus with the information in m.
  1232  func (c *Corpus) processGithubMutation(m *maintpb.GithubMutation) {
  1233  	if c == nil {
  1234  		panic("nil corpus")
  1235  	}
  1236  	c.initGithub()
  1237  	gr := c.github.getOrCreateRepo(m.Owner, m.Repo)
  1238  	if gr == nil {
  1239  		log.Printf("bogus Owner/Repo %q/%q in mutation: %v", m.Owner, m.Repo, m)
  1240  		return
  1241  	}
  1242  	for _, lp := range m.Labels {
  1243  		lb := gr.getOrCreateLabel(lp.Id)
  1244  		lb.processMutation(*lp)
  1245  	}
  1246  	for _, mp := range m.Milestones {
  1247  		ms := gr.getOrCreateMilestone(mp.Id)
  1248  		ms.processMutation(*mp)
  1249  	}
  1250  }
  1251  
  1252  // processGithubIssueMutation updates the corpus with the information in m.
  1253  func (c *Corpus) processGithubIssueMutation(m *maintpb.GithubIssueMutation) {
  1254  	if c == nil {
  1255  		panic("nil corpus")
  1256  	}
  1257  	c.initGithub()
  1258  	gr := c.github.getOrCreateRepo(m.Owner, m.Repo)
  1259  	if gr == nil {
  1260  		log.Printf("bogus Owner/Repo %q/%q in mutation: %v", m.Owner, m.Repo, m)
  1261  		return
  1262  	}
  1263  	if m.Number == 0 {
  1264  		log.Printf("bogus zero Number in mutation: %v", m)
  1265  		return
  1266  	}
  1267  	gi, ok := gr.issues[m.Number]
  1268  	if !ok {
  1269  		gi = &GitHubIssue{
  1270  			// User added below
  1271  			Number: m.Number,
  1272  			ID:     m.Id,
  1273  		}
  1274  		if gr.issues == nil {
  1275  			gr.issues = make(map[int32]*GitHubIssue)
  1276  		}
  1277  		gr.issues[m.Number] = gi
  1278  
  1279  		if m.NotExist {
  1280  			gi.NotExist = true
  1281  			return
  1282  		}
  1283  
  1284  		var err error
  1285  		gi.Created, err = ptypes.Timestamp(m.Created)
  1286  		if err != nil {
  1287  			panic(err)
  1288  		}
  1289  	}
  1290  	if m.NotExist != gi.NotExist {
  1291  		gi.NotExist = m.NotExist
  1292  	}
  1293  	if gi.NotExist {
  1294  		return
  1295  	}
  1296  
  1297  	// Check Updated before all other fields so they don't update if this
  1298  	// Mutation is stale
  1299  	// (ignoring Created since it *should* never update)
  1300  	if m.Updated != nil {
  1301  		t, err := ptypes.Timestamp(m.Updated)
  1302  		if err != nil {
  1303  			panic(err)
  1304  		}
  1305  		gi.Updated = t
  1306  	}
  1307  	if m.ClosedAt != nil {
  1308  		t, err := ptypes.Timestamp(m.ClosedAt)
  1309  		if err != nil {
  1310  			panic(err)
  1311  		}
  1312  		gi.ClosedAt = t
  1313  	}
  1314  	if m.User != nil {
  1315  		gi.User = c.github.getUser(m.User)
  1316  	}
  1317  	if m.NoMilestone {
  1318  		gi.Milestone = noMilestone
  1319  	} else if m.MilestoneId != 0 {
  1320  		ms := gr.getOrCreateMilestone(m.MilestoneId)
  1321  		ms.processMutation(maintpb.GithubMilestone{
  1322  			Id:     m.MilestoneId,
  1323  			Title:  m.MilestoneTitle,
  1324  			Number: m.MilestoneNum,
  1325  		})
  1326  		gi.Milestone = ms
  1327  	}
  1328  	if m.ClosedBy != nil {
  1329  		gi.ClosedBy = c.github.getUser(m.ClosedBy)
  1330  	}
  1331  	if b := m.Closed; b != nil {
  1332  		gi.Closed = b.Val
  1333  	}
  1334  	if b := m.Locked; b != nil {
  1335  		gi.Locked = b.Val
  1336  	}
  1337  	if m.PullRequest {
  1338  		gi.PullRequest = true
  1339  	}
  1340  
  1341  	gi.Assignees = c.github.setAssigneesFromProto(gi.Assignees, m.Assignees, m.DeletedAssignees)
  1342  
  1343  	if m.Body != "" {
  1344  		gi.Body = m.Body
  1345  	}
  1346  	if m.BodyChange != nil {
  1347  		gi.Body = m.BodyChange.Val
  1348  	}
  1349  	if m.Title != "" {
  1350  		gi.Title = m.Title
  1351  	}
  1352  	if len(m.RemoveLabel) > 0 || len(m.AddLabel) > 0 {
  1353  		if gi.Labels == nil {
  1354  			gi.Labels = make(map[int64]*GitHubLabel)
  1355  		}
  1356  		for _, lid := range m.RemoveLabel {
  1357  			delete(gi.Labels, lid)
  1358  		}
  1359  		for _, lp := range m.AddLabel {
  1360  			lb := gr.getOrCreateLabel(lp.Id)
  1361  			lb.processMutation(*lp)
  1362  			gi.Labels[lp.Id] = lb
  1363  		}
  1364  	}
  1365  
  1366  	for _, cmut := range m.Comment {
  1367  		if cmut.Id == 0 {
  1368  			log.Printf("Ignoring bogus comment mutation lacking Id: %v", cmut)
  1369  			continue
  1370  		}
  1371  		gc, ok := gi.comments[cmut.Id]
  1372  		if !ok {
  1373  			if gi.comments == nil {
  1374  				gi.comments = make(map[int64]*GitHubComment)
  1375  			}
  1376  			gc = &GitHubComment{ID: cmut.Id}
  1377  			gi.comments[gc.ID] = gc
  1378  		}
  1379  		if cmut.User != nil {
  1380  			gc.User = c.github.getUser(cmut.User)
  1381  		}
  1382  		if cmut.Created != nil {
  1383  			gc.Created, _ = ptypes.Timestamp(cmut.Created)
  1384  			gc.Created = gc.Created.UTC()
  1385  		}
  1386  		if cmut.Updated != nil {
  1387  			gc.Updated, _ = ptypes.Timestamp(cmut.Updated)
  1388  			gc.Updated = gc.Updated.UTC()
  1389  		}
  1390  		if cmut.Body != "" {
  1391  			gc.Body = cmut.Body
  1392  		}
  1393  	}
  1394  	if m.CommentStatus != nil && m.CommentStatus.ServerDate != nil {
  1395  		if serverDate, err := ptypes.Timestamp(m.CommentStatus.ServerDate); err == nil {
  1396  			gi.commentsSyncedAsOf = serverDate.UTC()
  1397  		}
  1398  	}
  1399  
  1400  	for _, emut := range m.Event {
  1401  		if emut.Id == 0 {
  1402  			log.Printf("Ignoring bogus event mutation lacking Id: %v", emut)
  1403  			continue
  1404  		}
  1405  		if gi.events == nil {
  1406  			gi.events = make(map[int64]*GitHubIssueEvent)
  1407  		}
  1408  		gie := gr.newGithubEvent(emut)
  1409  		gi.events[emut.Id] = gie
  1410  		if gie.Created.After(gi.eventMaxTime) {
  1411  			gi.eventMaxTime = gie.Created
  1412  		}
  1413  	}
  1414  	if m.EventStatus != nil && m.EventStatus.ServerDate != nil {
  1415  		if serverDate, err := ptypes.Timestamp(m.EventStatus.ServerDate); err == nil {
  1416  			gi.eventsSyncedAsOf = serverDate.UTC()
  1417  		}
  1418  	}
  1419  
  1420  	for _, rmut := range m.Review {
  1421  		if rmut.Id == 0 {
  1422  			log.Printf("Ignoring bogus review mutation lacking Id: %v", rmut)
  1423  			continue
  1424  		}
  1425  		if gi.reviews == nil {
  1426  			gi.reviews = make(map[int64]*GitHubReview)
  1427  		}
  1428  		gre := gr.newGithubReview(rmut)
  1429  		gi.reviews[rmut.Id] = gre
  1430  		if gre.Created.After(gi.eventMaxTime) {
  1431  			gi.eventMaxTime = gre.Created
  1432  		}
  1433  	}
  1434  	if m.ReviewStatus != nil && m.ReviewStatus.ServerDate != nil {
  1435  		if serverDate, err := ptypes.Timestamp(m.ReviewStatus.ServerDate); err == nil {
  1436  			gi.reviewsSyncedAsOf = serverDate.UTC()
  1437  		}
  1438  	}
  1439  }
  1440  
  1441  // githubCache is an httpcache.Cache wrapper that only
  1442  // stores responses for:
  1443  //   - https://api.github.com/repos/$OWNER/$REPO/issues?direction=desc&page=1&sort=updated
  1444  //   - https://api.github.com/repos/$OWNER/$REPO/milestones?page=1
  1445  //   - https://api.github.com/repos/$OWNER/$REPO/labels?page=1
  1446  type githubCache struct {
  1447  	httpcache.Cache
  1448  }
  1449  
  1450  var rxGithubCacheURLs = regexp.MustCompile(`^https://api.github.com/repos/\w+/\w+/(issues|milestones|labels)\?(.+)`)
  1451  
  1452  func cacheableURL(urlStr string) bool {
  1453  	m := rxGithubCacheURLs.FindStringSubmatch(urlStr)
  1454  	if m == nil {
  1455  		return false
  1456  	}
  1457  	v, _ := url.ParseQuery(m[2])
  1458  	if v.Get("page") != "1" {
  1459  		return false
  1460  	}
  1461  	switch m[1] {
  1462  	case "issues":
  1463  		return v.Get("sort") == "updated" && v.Get("direction") == "desc"
  1464  	case "milestones", "labels":
  1465  		return true
  1466  	default:
  1467  		panic("unexpected cache key base " + m[1])
  1468  	}
  1469  }
  1470  
  1471  func (c *githubCache) Set(urlKey string, res []byte) {
  1472  	// TODO: verify that the httpcache package guarantees that the
  1473  	// first string parameter to Set here is actually a
  1474  	// URL. Empirically they appear to be.
  1475  	if cacheableURL(urlKey) {
  1476  		c.Cache.Set(urlKey, res)
  1477  	}
  1478  }
  1479  
  1480  // sync checks for new changes on a single GitHub repository and
  1481  // updates the Corpus with any changes. If loop is true, it runs
  1482  // forever.
  1483  func (gr *GitHubRepo) sync(ctx context.Context, token string, loop bool) error {
  1484  	ts := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token})
  1485  	hc := oauth2.NewClient(ctx, ts)
  1486  	if tr, ok := hc.Transport.(*http.Transport); ok {
  1487  		defer tr.CloseIdleConnections()
  1488  	}
  1489  	directTransport := hc.Transport
  1490  	if gr.github.c.githubLimiter != nil {
  1491  		directTransport = limitTransport{gr.github.c.githubLimiter, hc.Transport}
  1492  	}
  1493  	cachingTransport := &httpcache.Transport{
  1494  		Transport:           directTransport,
  1495  		Cache:               &githubCache{Cache: httpcache.NewMemoryCache()},
  1496  		MarkCachedResponses: true, // adds "X-From-Cache: 1" response header.
  1497  	}
  1498  
  1499  	p := &githubRepoPoller{
  1500  		c:             gr.github.c,
  1501  		token:         token,
  1502  		gr:            gr,
  1503  		githubDirect:  github.NewClient(&http.Client{Transport: directTransport}),
  1504  		githubCaching: github.NewClient(&http.Client{Transport: cachingTransport}),
  1505  		client:        http.DefaultClient,
  1506  	}
  1507  	activityCh := gr.github.c.activityChan("github:" + gr.id.String())
  1508  	var expectChanges bool // got webhook update, but haven't seen new data yet
  1509  	var sleepDelay time.Duration
  1510  	for {
  1511  		prevLastUpdate := p.lastUpdate
  1512  		err := p.sync(ctx, expectChanges)
  1513  		if err == context.Canceled || !loop {
  1514  			return err
  1515  		}
  1516  		sawChanges := !p.lastUpdate.Equal(prevLastUpdate)
  1517  		if sawChanges {
  1518  			expectChanges = false
  1519  		}
  1520  		// If we got woken up by a webhook, sometimes
  1521  		// immediately polling GitHub for the data results in
  1522  		// a cache hit saying nothing's changed. Don't believe
  1523  		// it. Polling quickly with exponential backoff until
  1524  		// we see what we're expecting.
  1525  		if expectChanges {
  1526  			if sleepDelay == 0 {
  1527  				sleepDelay = 1 * time.Second
  1528  			} else {
  1529  				sleepDelay *= 2
  1530  				if sleepDelay > 15*time.Minute {
  1531  					sleepDelay = 15 * time.Minute
  1532  				}
  1533  			}
  1534  			p.logf("expect changes; re-polling in %v", sleepDelay)
  1535  		} else {
  1536  			sleepDelay = 15 * time.Minute
  1537  		}
  1538  		p.logf("sync = %v; sleeping", err)
  1539  		timer := time.NewTimer(sleepDelay)
  1540  		select {
  1541  		case <-ctx.Done():
  1542  			timer.Stop()
  1543  			return ctx.Err()
  1544  		case <-activityCh:
  1545  			timer.Stop()
  1546  			expectChanges = true
  1547  			sleepDelay = 0
  1548  		case <-timer.C:
  1549  		}
  1550  	}
  1551  }
  1552  
  1553  type httpClient interface {
  1554  	Do(req *http.Request) (*http.Response, error)
  1555  }
  1556  
  1557  // A githubRepoPoller updates the Corpus (gr.c) to have the latest
  1558  // version of the GitHub repo rp, using the GitHub client ghc.
  1559  type githubRepoPoller struct {
  1560  	c             *Corpus // shortcut for gr.github.c
  1561  	gr            *GitHubRepo
  1562  	token         string
  1563  	lastUpdate    time.Time // modified by sync
  1564  	githubCaching *github.Client
  1565  	githubDirect  *github.Client // not caching
  1566  	client        httpClient     // the client used to poll github
  1567  }
  1568  
  1569  func (p *githubRepoPoller) Owner() string { return p.gr.id.Owner }
  1570  func (p *githubRepoPoller) Repo() string  { return p.gr.id.Repo }
  1571  
  1572  func (p *githubRepoPoller) logf(format string, args ...interface{}) {
  1573  	log.Printf("sync github "+p.gr.id.String()+": "+format, args...)
  1574  }
  1575  
  1576  func (p *githubRepoPoller) sync(ctx context.Context, expectChanges bool) error {
  1577  	p.logf("Beginning sync.")
  1578  	if err := p.syncIssues(ctx, expectChanges); err != nil {
  1579  		return err
  1580  	}
  1581  	if err := p.syncComments(ctx); err != nil {
  1582  		return err
  1583  	}
  1584  	if err := p.syncEvents(ctx); err != nil {
  1585  		return err
  1586  	}
  1587  	if err := p.syncReviews(ctx); err != nil {
  1588  		return err
  1589  	}
  1590  	return nil
  1591  }
  1592  
  1593  func (p *githubRepoPoller) syncMilestones(ctx context.Context) error {
  1594  	var mut *maintpb.GithubMutation // lazy init
  1595  	var changes int
  1596  	err := p.foreachItem(ctx, 1, p.getMilestonePage, func(e interface{}) error {
  1597  		ms := e.(*github.Milestone)
  1598  		id := int64(ms.GetID())
  1599  		p.c.mu.RLock()
  1600  		diff := p.gr.milestones[id].GenMutationDiff(ms)
  1601  		p.c.mu.RUnlock()
  1602  		if diff == nil {
  1603  			return nil
  1604  		}
  1605  		if mut == nil {
  1606  			mut = &maintpb.GithubMutation{
  1607  				Owner: p.Owner(),
  1608  				Repo:  p.Repo(),
  1609  			}
  1610  		}
  1611  		mut.Milestones = append(mut.Milestones, diff)
  1612  		changes++
  1613  		return nil
  1614  	})
  1615  	if err != nil {
  1616  		return err
  1617  	}
  1618  	p.logf("%d milestone changes.", changes)
  1619  	if changes == 0 {
  1620  		return nil
  1621  	}
  1622  	p.c.addMutation(&maintpb.Mutation{Github: mut})
  1623  	return nil
  1624  }
  1625  
  1626  func (p *githubRepoPoller) syncLabels(ctx context.Context) error {
  1627  	var mut *maintpb.GithubMutation // lazy init
  1628  	var changes int
  1629  	err := p.foreachItem(ctx, 1, p.getLabelPage, func(e interface{}) error {
  1630  		lb := e.(*github.Label)
  1631  		id := int64(lb.GetID())
  1632  		p.c.mu.RLock()
  1633  		diff := p.gr.labels[id].GenMutationDiff(lb)
  1634  		p.c.mu.RUnlock()
  1635  		if diff == nil {
  1636  			return nil
  1637  		}
  1638  		if mut == nil {
  1639  			mut = &maintpb.GithubMutation{
  1640  				Owner: p.Owner(),
  1641  				Repo:  p.Repo(),
  1642  			}
  1643  		}
  1644  		mut.Labels = append(mut.Labels, diff)
  1645  		changes++
  1646  		return nil
  1647  	})
  1648  	if err != nil {
  1649  		return err
  1650  	}
  1651  	p.logf("%d label changes.", changes)
  1652  	if changes == 0 {
  1653  		return nil
  1654  	}
  1655  	p.c.addMutation(&maintpb.Mutation{Github: mut})
  1656  	return nil
  1657  }
  1658  
  1659  func (p *githubRepoPoller) getMilestonePage(ctx context.Context, page int) ([]interface{}, *github.Response, error) {
  1660  	ms, res, err := p.githubCaching.Issues.ListMilestones(ctx, p.Owner(), p.Repo(), &github.MilestoneListOptions{
  1661  		State:       "all",
  1662  		ListOptions: github.ListOptions{Page: page},
  1663  	})
  1664  	if err != nil {
  1665  		return nil, nil, err
  1666  	}
  1667  	its := make([]interface{}, len(ms))
  1668  	for i, m := range ms {
  1669  		its[i] = m
  1670  	}
  1671  	return its, res, err
  1672  }
  1673  
  1674  func (p *githubRepoPoller) getLabelPage(ctx context.Context, page int) ([]interface{}, *github.Response, error) {
  1675  	ls, res, err := p.githubCaching.Issues.ListLabels(ctx, p.Owner(), p.Repo(), &github.ListOptions{
  1676  		Page: page,
  1677  	})
  1678  	if err != nil {
  1679  		return nil, nil, err
  1680  	}
  1681  	its := make([]interface{}, len(ls))
  1682  	for i, lb := range ls {
  1683  		its[i] = lb
  1684  	}
  1685  	return its, res, err
  1686  }
  1687  
  1688  // foreachItem walks over all pages of items from getPage and calls fn for each item.
  1689  // If the first page's response was cached, fn is never called.
  1690  func (p *githubRepoPoller) foreachItem(
  1691  	ctx context.Context,
  1692  	page int,
  1693  	getPage func(ctx context.Context, page int) ([]interface{}, *github.Response, error),
  1694  	fn func(interface{}) error) error {
  1695  	for {
  1696  		select {
  1697  		case <-ctx.Done():
  1698  			return ctx.Err()
  1699  		default:
  1700  		}
  1701  		items, res, err := getPage(ctx, page)
  1702  		if err != nil {
  1703  			if canRetry(ctx, err) {
  1704  				continue
  1705  			}
  1706  			return err
  1707  		}
  1708  		if len(items) == 0 {
  1709  			return nil
  1710  		}
  1711  		fromCache := page == 1 && res.Response.Header.Get(xFromCache) == "1"
  1712  		if fromCache {
  1713  			log.Printf("no new items of type %T", items[0])
  1714  			// No need to walk over these again.
  1715  			return nil
  1716  		}
  1717  		// TODO: use res.Rate (sleep until Reset if Limit == 0)
  1718  		for _, it := range items {
  1719  			if err := fn(it); err != nil {
  1720  				return err
  1721  			}
  1722  		}
  1723  		if res.NextPage == 0 {
  1724  			return nil
  1725  		}
  1726  		page = res.NextPage
  1727  	}
  1728  }
  1729  
  1730  func (p *githubRepoPoller) syncIssues(ctx context.Context, expectChanges bool) error {
  1731  	page := 1
  1732  	seen := make(map[int64]bool)
  1733  	keepGoing := true
  1734  	owner, repo := p.gr.id.Owner, p.gr.id.Repo
  1735  	for keepGoing {
  1736  		ghc := p.githubCaching
  1737  		if expectChanges {
  1738  			ghc = p.githubDirect
  1739  		}
  1740  		issues, res, err := ghc.Issues.ListByRepo(ctx, owner, repo, &github.IssueListByRepoOptions{
  1741  			State:     "all",
  1742  			Sort:      "updated",
  1743  			Direction: "desc",
  1744  			ListOptions: github.ListOptions{
  1745  				Page:    page,
  1746  				PerPage: 100,
  1747  			},
  1748  		})
  1749  		if err != nil {
  1750  			if canRetry(ctx, err) {
  1751  				continue
  1752  			}
  1753  			return err
  1754  		}
  1755  		// See https://developer.github.com/v3/activity/events/ for X-Poll-Interval:
  1756  		if pi := res.Response.Header.Get("X-Poll-Interval"); pi != "" {
  1757  			nsec, _ := strconv.Atoi(pi)
  1758  			d := time.Duration(nsec) * time.Second
  1759  			p.logf("Requested to adjust poll interval to %v", d)
  1760  			// TODO: return an error type up that the sync loop can use
  1761  			// to adjust its default interval.
  1762  			// For now, ignore.
  1763  		}
  1764  		fromCache := res.Response.Header.Get(xFromCache) == "1"
  1765  		if len(issues) == 0 {
  1766  			p.logf("issues: reached end.")
  1767  			break
  1768  		}
  1769  
  1770  		didMilestoneLabelSync := false
  1771  		changes := 0
  1772  		for _, is := range issues {
  1773  			id := int64(is.GetID())
  1774  			if seen[id] {
  1775  				// If an issue gets updated (and bumped to the top) while we
  1776  				// are paging, it's possible the last issue from page N can
  1777  				// appear as the first issue on page N+1. Don't process that
  1778  				// issue twice.
  1779  				// https://github.com/google/go-github/issues/566
  1780  				continue
  1781  			}
  1782  			seen[id] = true
  1783  
  1784  			var mp *maintpb.Mutation
  1785  			p.c.mu.RLock()
  1786  			{
  1787  				gi := p.gr.issues[int32(*is.Number)]
  1788  				mp = p.gr.newMutationFromIssue(gi, is)
  1789  			}
  1790  			p.c.mu.RUnlock()
  1791  
  1792  			if mp == nil {
  1793  				continue
  1794  			}
  1795  
  1796  			// If there's something new (not a cached response),
  1797  			// then check for updated milestones and labels before
  1798  			// creating issue mutations below. Doesn't matter
  1799  			// much, but helps to have it all loaded.
  1800  			if !fromCache && !didMilestoneLabelSync {
  1801  				didMilestoneLabelSync = true
  1802  				group, ctx := errgroup.WithContext(ctx)
  1803  				group.Go(func() error { return p.syncMilestones(ctx) })
  1804  				group.Go(func() error { return p.syncLabels(ctx) })
  1805  				if err := group.Wait(); err != nil {
  1806  					return err
  1807  				}
  1808  			}
  1809  
  1810  			changes++
  1811  			p.logf("changed issue %d: %s", is.GetNumber(), is.GetTitle())
  1812  			p.c.addMutation(mp)
  1813  			p.lastUpdate = time.Now()
  1814  		}
  1815  
  1816  		if changes == 0 {
  1817  			missing := p.gr.missingIssues()
  1818  			if len(missing) == 0 {
  1819  				p.logf("no changed issues; cached=%v", fromCache)
  1820  				return nil
  1821  			}
  1822  			if len(missing) > 0 {
  1823  				p.logf("%d missing github issues.", len(missing))
  1824  			}
  1825  			if len(missing) < 100 {
  1826  				keepGoing = false
  1827  			}
  1828  		}
  1829  
  1830  		p.c.mu.RLock()
  1831  		num := len(p.gr.issues)
  1832  		p.c.mu.RUnlock()
  1833  		p.logf("After page %d: %v issues, %v changes, %v issues in memory", page, len(issues), changes, num)
  1834  
  1835  		page++
  1836  	}
  1837  
  1838  	missing := p.gr.missingIssues()
  1839  	if len(missing) > 0 {
  1840  		p.logf("remaining issues: %v", missing)
  1841  		for _, num := range missing {
  1842  			p.logf("getting issue %v ...", num)
  1843  			var issue *github.Issue
  1844  			var err error
  1845  			for {
  1846  				issue, _, err = p.githubDirect.Issues.Get(ctx, owner, repo, int(num))
  1847  				if canRetry(ctx, err) {
  1848  					continue
  1849  				}
  1850  				break
  1851  			}
  1852  			if ge, ok := err.(*github.ErrorResponse); ok && (ge.Response.StatusCode == http.StatusNotFound || ge.Response.StatusCode == http.StatusGone) {
  1853  				mut := &maintpb.Mutation{
  1854  					GithubIssue: &maintpb.GithubIssueMutation{
  1855  						Owner:    owner,
  1856  						Repo:     repo,
  1857  						Number:   num,
  1858  						NotExist: true,
  1859  					},
  1860  				}
  1861  				p.logf("issue %d is gone, marking as NotExist", num)
  1862  				p.c.addMutation(mut)
  1863  				continue
  1864  			} else if err != nil {
  1865  				return err
  1866  			}
  1867  			mp := p.gr.newMutationFromIssue(nil, issue)
  1868  			if mp == nil {
  1869  				continue
  1870  			}
  1871  			p.logf("modified issue %d: %s", issue.GetNumber(), issue.GetTitle())
  1872  			p.c.addMutation(mp)
  1873  			p.lastUpdate = time.Now()
  1874  		}
  1875  	}
  1876  
  1877  	return nil
  1878  }
  1879  
  1880  func (p *githubRepoPoller) issueNumbersWithStaleCommentSync() (issueNums []int32) {
  1881  	p.c.mu.RLock()
  1882  	defer p.c.mu.RUnlock()
  1883  
  1884  	for n, gi := range p.gr.issues {
  1885  		if !gi.commentsSynced() {
  1886  			issueNums = append(issueNums, n)
  1887  		}
  1888  	}
  1889  	sort.Slice(issueNums, func(i, j int) bool {
  1890  		return issueNums[i] < issueNums[j]
  1891  	})
  1892  	return issueNums
  1893  }
  1894  
  1895  func (p *githubRepoPoller) syncComments(ctx context.Context) error {
  1896  	for {
  1897  		nums := p.issueNumbersWithStaleCommentSync()
  1898  		if len(nums) == 0 {
  1899  			return nil
  1900  		}
  1901  		remain := len(nums)
  1902  		for _, num := range nums {
  1903  			p.logf("comment sync: %d issues remaining; syncing issue %v", remain, num)
  1904  			if err := p.syncCommentsOnIssue(ctx, num); err != nil {
  1905  				p.logf("comment sync on issue %d: %v", num, err)
  1906  				return err
  1907  			}
  1908  			remain--
  1909  		}
  1910  	}
  1911  }
  1912  
  1913  func (p *githubRepoPoller) syncCommentsOnIssue(ctx context.Context, issueNum int32) error {
  1914  	p.c.mu.RLock()
  1915  	issue := p.gr.issues[issueNum]
  1916  	if issue == nil {
  1917  		p.c.mu.RUnlock()
  1918  		return fmt.Errorf("unknown issue number %v", issueNum)
  1919  	}
  1920  	since := issue.commentsUpdatedTil
  1921  	p.c.mu.RUnlock()
  1922  
  1923  	owner, repo := p.gr.id.Owner, p.gr.id.Repo
  1924  	morePages := true // at least try the first. might be empty.
  1925  	for morePages {
  1926  		ics, res, err := p.githubDirect.Issues.ListComments(ctx, owner, repo, int(issueNum), &github.IssueListCommentsOptions{
  1927  			Since:       since,
  1928  			Direction:   "asc",
  1929  			Sort:        "updated",
  1930  			ListOptions: github.ListOptions{PerPage: 100},
  1931  		})
  1932  		if canRetry(ctx, err) {
  1933  			continue
  1934  		} else if ge, ok := err.(*github.ErrorResponse); ok && (ge.Response.StatusCode == http.StatusNotFound || ge.Response.StatusCode == http.StatusGone) {
  1935  			mut := &maintpb.Mutation{
  1936  				GithubIssue: &maintpb.GithubIssueMutation{
  1937  					Owner:    owner,
  1938  					Repo:     repo,
  1939  					Number:   issueNum,
  1940  					NotExist: true,
  1941  				},
  1942  			}
  1943  			p.logf("issue %d comments are gone, marking as NotExist", issueNum)
  1944  			p.c.addMutation(mut)
  1945  			return nil
  1946  		} else if err != nil {
  1947  			return err
  1948  		}
  1949  		serverDate, err := http.ParseTime(res.Header.Get("Date"))
  1950  		if err != nil {
  1951  			return fmt.Errorf("invalid server Date response: %v", err)
  1952  		}
  1953  		serverDate = serverDate.UTC()
  1954  		p.logf("Number of comments on issue %d since %v: %v", issueNum, since, len(ics))
  1955  
  1956  		mut := &maintpb.Mutation{
  1957  			GithubIssue: &maintpb.GithubIssueMutation{
  1958  				Owner:  owner,
  1959  				Repo:   repo,
  1960  				Number: issueNum,
  1961  			},
  1962  		}
  1963  
  1964  		p.c.mu.RLock()
  1965  		for _, ic := range ics {
  1966  			if ic.ID == nil || ic.Body == nil || ic.User == nil || ic.CreatedAt == nil || ic.UpdatedAt == nil {
  1967  				// Bogus.
  1968  				p.logf("bogus comment: %v", ic)
  1969  				continue
  1970  			}
  1971  			created, err := ptypes.TimestampProto(*ic.CreatedAt)
  1972  			if err != nil {
  1973  				continue
  1974  			}
  1975  			updated, err := ptypes.TimestampProto(*ic.UpdatedAt)
  1976  			if err != nil {
  1977  				continue
  1978  			}
  1979  			since = *ic.UpdatedAt // for next round
  1980  
  1981  			id := int64(*ic.ID)
  1982  			cur := issue.comments[id]
  1983  
  1984  			// TODO: does a reaction update a comment's UpdatedAt time?
  1985  			var cmut *maintpb.GithubIssueCommentMutation
  1986  			if cur == nil {
  1987  				cmut = &maintpb.GithubIssueCommentMutation{
  1988  					Id: id,
  1989  					User: &maintpb.GithubUser{
  1990  						Id:    int64(*ic.User.ID),
  1991  						Login: *ic.User.Login,
  1992  					},
  1993  					Body:    *ic.Body,
  1994  					Created: created,
  1995  					Updated: updated,
  1996  				}
  1997  			} else if !cur.Updated.Equal(*ic.UpdatedAt) || cur.Body != *ic.Body {
  1998  				cmut = &maintpb.GithubIssueCommentMutation{
  1999  					Id: id,
  2000  				}
  2001  				if !cur.Updated.Equal(*ic.UpdatedAt) {
  2002  					cmut.Updated = updated
  2003  				}
  2004  				if cur.Body != *ic.Body {
  2005  					cmut.Body = *ic.Body
  2006  				}
  2007  			}
  2008  			if cmut != nil {
  2009  				mut.GithubIssue.Comment = append(mut.GithubIssue.Comment, cmut)
  2010  			}
  2011  		}
  2012  		p.c.mu.RUnlock()
  2013  
  2014  		if res.NextPage == 0 {
  2015  			sdp, _ := ptypes.TimestampProto(serverDate)
  2016  			mut.GithubIssue.CommentStatus = &maintpb.GithubIssueSyncStatus{ServerDate: sdp}
  2017  			morePages = false
  2018  		}
  2019  
  2020  		p.c.addMutation(mut)
  2021  	}
  2022  	return nil
  2023  }
  2024  
  2025  func (p *githubRepoPoller) issueNumbersWithStaleEventSync() (issueNums []int32) {
  2026  	p.c.mu.RLock()
  2027  	defer p.c.mu.RUnlock()
  2028  
  2029  	for n, gi := range p.gr.issues {
  2030  		if !gi.eventsSynced() {
  2031  			issueNums = append(issueNums, n)
  2032  		}
  2033  	}
  2034  	sort.Slice(issueNums, func(i, j int) bool {
  2035  		return issueNums[i] < issueNums[j]
  2036  	})
  2037  	return issueNums
  2038  }
  2039  
  2040  func (p *githubRepoPoller) syncEvents(ctx context.Context) error {
  2041  	for {
  2042  		nums := p.issueNumbersWithStaleEventSync()
  2043  		if len(nums) == 0 {
  2044  			return nil
  2045  		}
  2046  		remain := len(nums)
  2047  		for _, num := range nums {
  2048  			p.logf("event sync: %d issues remaining; syncing issue %v", remain, num)
  2049  			if err := p.syncEventsOnIssue(ctx, num); err != nil {
  2050  				p.logf("event sync on issue %d: %v", num, err)
  2051  				return err
  2052  			}
  2053  			remain--
  2054  		}
  2055  	}
  2056  }
  2057  
  2058  func (p *githubRepoPoller) syncEventsOnIssue(ctx context.Context, issueNum int32) error {
  2059  	const perPage = 100
  2060  	p.c.mu.RLock()
  2061  	gi := p.gr.issues[issueNum]
  2062  	if gi == nil {
  2063  		panic(fmt.Sprintf("bogus issue %v", issueNum))
  2064  	}
  2065  	have := len(gi.events)
  2066  	p.c.mu.RUnlock()
  2067  
  2068  	skipPages := have / perPage
  2069  
  2070  	mut := &maintpb.Mutation{
  2071  		GithubIssue: &maintpb.GithubIssueMutation{
  2072  			Owner:  p.Owner(),
  2073  			Repo:   p.Repo(),
  2074  			Number: issueNum,
  2075  		},
  2076  	}
  2077  
  2078  	err := p.foreachItem(ctx,
  2079  		1+skipPages,
  2080  		func(ctx context.Context, page int) ([]interface{}, *github.Response, error) {
  2081  			u := fmt.Sprintf("https://api.github.com/repos/%s/%s/issues/%v/events?per_page=%v&page=%v",
  2082  				p.Owner(), p.Repo(), issueNum, perPage, page)
  2083  			req, _ := http.NewRequest("GET", u, nil)
  2084  
  2085  			req.Header.Set("Authorization", "Bearer "+p.token)
  2086  			req.Header.Set("User-Agent", "golang-x-build-maintner/1.0")
  2087  			ctx, cancel := context.WithTimeout(ctx, time.Minute)
  2088  			defer cancel()
  2089  			req = req.WithContext(ctx)
  2090  			res, err := p.client.Do(req)
  2091  			if err != nil {
  2092  				log.Printf("Fetching %s: %v", u, err)
  2093  				return nil, nil, err
  2094  			}
  2095  			log.Printf("Fetching %s: %v", u, res.Status)
  2096  			ghResp := makeGithubResponse(res)
  2097  			if err := github.CheckResponse(res); err != nil {
  2098  				log.Printf("Fetching %s: %v: %+v", u, res.Status, res.Header)
  2099  				log.Printf("GitHub error %s: %v", u, ghResp)
  2100  				return nil, nil, err
  2101  			}
  2102  
  2103  			evts, err := parseGithubEvents(res.Body)
  2104  			if err != nil {
  2105  				return nil, nil, fmt.Errorf("%s: parse github events: %v", u, err)
  2106  			}
  2107  			is := make([]interface{}, len(evts))
  2108  			for i, v := range evts {
  2109  				is[i] = v
  2110  			}
  2111  			serverDate, err := http.ParseTime(res.Header.Get("Date"))
  2112  			if err != nil {
  2113  				return nil, nil, fmt.Errorf("invalid server Date response: %v", err)
  2114  			}
  2115  			sdp, _ := ptypes.TimestampProto(serverDate.UTC())
  2116  			mut.GithubIssue.EventStatus = &maintpb.GithubIssueSyncStatus{ServerDate: sdp}
  2117  
  2118  			return is, ghResp, err
  2119  		},
  2120  		func(v interface{}) error {
  2121  			ge := v.(*GitHubIssueEvent)
  2122  			p.c.mu.RLock()
  2123  			_, ok := gi.events[ge.ID]
  2124  			p.c.mu.RUnlock()
  2125  			if ok {
  2126  				// Already have it. And they're
  2127  				// assumed to be immutable, so the
  2128  				// copy we already have should be
  2129  				// good. Don't add to mutation log.
  2130  				return nil
  2131  			}
  2132  			mut.GithubIssue.Event = append(mut.GithubIssue.Event, ge.Proto())
  2133  			return nil
  2134  		})
  2135  	if err != nil {
  2136  		return err
  2137  	}
  2138  	p.c.addMutation(mut)
  2139  	return nil
  2140  }
  2141  
  2142  // parseGithubEvents parses the JSON array of GitHub issue events in r.
  2143  // It does this the very manual way (using map[string]interface{})
  2144  // instead of using nice types because https://golang.org/issue/15314
  2145  // isn't implemented yet and also because even if it were implemented,
  2146  // this code still wants to preserve any unknown fields to store in
  2147  // the "OtherJSON" field for future updates of the code to parse. (If
  2148  // GitHub adds new Event types in the future, we want to archive them,
  2149  // even if we don't understand them)
  2150  func parseGithubEvents(r io.Reader) ([]*GitHubIssueEvent, error) {
  2151  	var jevents []map[string]interface{}
  2152  	jd := json.NewDecoder(r)
  2153  	jd.UseNumber()
  2154  	if err := jd.Decode(&jevents); err != nil {
  2155  		return nil, err
  2156  	}
  2157  	var evts []*GitHubIssueEvent
  2158  	for _, em := range jevents {
  2159  		for k, v := range em {
  2160  			if v == nil {
  2161  				delete(em, k)
  2162  			}
  2163  		}
  2164  		delete(em, "url")
  2165  
  2166  		e := &GitHubIssueEvent{}
  2167  
  2168  		e.Type, _ = em["event"].(string)
  2169  		delete(em, "event")
  2170  
  2171  		e.ID = jint64(em["id"])
  2172  		delete(em, "id")
  2173  
  2174  		// TODO: store these two more compactly:
  2175  		e.CommitID, _ = em["commit_id"].(string) // "5383ecf5a0824649ffcc0349f00f0317575753d0"
  2176  		delete(em, "commit_id")
  2177  		e.CommitURL, _ = em["commit_url"].(string) // "https://api.github.com/repos/bradfitz/go-issue-mirror/commits/5383ecf5a0824649ffcc0349f00f0317575753d0"
  2178  		delete(em, "commit_url")
  2179  
  2180  		getUser := func(field string, gup **GitHubUser) {
  2181  			am, ok := em[field].(map[string]interface{})
  2182  			if !ok {
  2183  				return
  2184  			}
  2185  			delete(em, field)
  2186  			gu := &GitHubUser{ID: jint64(am["id"])}
  2187  			gu.Login, _ = am["login"].(string)
  2188  			*gup = gu
  2189  		}
  2190  
  2191  		getUser("actor", &e.Actor)
  2192  		getUser("assignee", &e.Assignee)
  2193  		getUser("assigner", &e.Assigner)
  2194  		getUser("requested_reviewer", &e.Reviewer)
  2195  		getUser("review_requester", &e.ReviewRequester)
  2196  
  2197  		if lm, ok := em["label"].(map[string]interface{}); ok {
  2198  			delete(em, "label")
  2199  			e.Label, _ = lm["name"].(string)
  2200  		}
  2201  
  2202  		if mm, ok := em["milestone"].(map[string]interface{}); ok {
  2203  			delete(em, "milestone")
  2204  			e.Milestone, _ = mm["title"].(string)
  2205  		}
  2206  
  2207  		if rm, ok := em["rename"].(map[string]interface{}); ok {
  2208  			delete(em, "rename")
  2209  			e.From, _ = rm["from"].(string)
  2210  			e.To, _ = rm["to"].(string)
  2211  		}
  2212  
  2213  		if createdStr, ok := em["created_at"].(string); ok {
  2214  			delete(em, "created_at")
  2215  			var err error
  2216  			e.Created, err = time.Parse(time.RFC3339, createdStr)
  2217  			if err != nil {
  2218  				return nil, err
  2219  			}
  2220  			e.Created = e.Created.UTC()
  2221  		}
  2222  		if dr, ok := em["dismissed_review"]; ok {
  2223  			delete(em, "dismissed_review")
  2224  			drm := dr.(map[string]interface{})
  2225  			dro := &GitHubDismissedReviewEvent{}
  2226  			dro.ReviewID = jint64(drm["review_id"])
  2227  			if state, ok := drm["state"].(string); ok {
  2228  				dro.State = state
  2229  			} else {
  2230  				log.Printf("got type %T for 'state' field, expected string in %+v", drm["state"], drm)
  2231  			}
  2232  			dro.DismissalMessage, _ = drm["dismissal_message"].(string)
  2233  			e.DismissedReview = dro
  2234  		}
  2235  		if rt, ok := em["requested_team"]; ok {
  2236  			delete(em, "requested_team")
  2237  			rtm, ok := rt.(map[string]interface{})
  2238  			if !ok {
  2239  				log.Printf("got value %+v for 'requested_team' field, wanted a map with 'id' and 'slug' fields", rt)
  2240  			} else {
  2241  				t := &GitHubTeam{}
  2242  				t.ID = jint64(rtm["id"])
  2243  				t.Slug, _ = rtm["slug"].(string)
  2244  				e.TeamReviewer = t
  2245  			}
  2246  		}
  2247  		delete(em, "node_id")     // GitHub API v4 Global Node ID; don't store it.
  2248  		delete(em, "lock_reason") // Not stored.
  2249  
  2250  		otherJSON, _ := json.Marshal(em)
  2251  		e.OtherJSON = string(otherJSON)
  2252  		if e.OtherJSON == "{}" {
  2253  			e.OtherJSON = ""
  2254  		}
  2255  		if e.OtherJSON != "" {
  2256  			log.Printf("warning: storing unknown field(s) in GitHub issue event: %s", e.OtherJSON)
  2257  		}
  2258  		evts = append(evts, e)
  2259  	}
  2260  	return evts, nil
  2261  }
  2262  
  2263  func (p *githubRepoPoller) issueNumbersWithStaleReviewsSync() (issueNums []int32) {
  2264  	p.c.mu.RLock()
  2265  	defer p.c.mu.RUnlock()
  2266  
  2267  	for n, gi := range p.gr.issues {
  2268  		if gi.PullRequest && !gi.reviewsSynced() {
  2269  			issueNums = append(issueNums, n)
  2270  		}
  2271  	}
  2272  	sort.Slice(issueNums, func(i, j int) bool {
  2273  		return issueNums[i] < issueNums[j]
  2274  	})
  2275  	return issueNums
  2276  }
  2277  
  2278  func (p *githubRepoPoller) syncReviews(ctx context.Context) error {
  2279  	for {
  2280  		nums := p.issueNumbersWithStaleReviewsSync()
  2281  		if len(nums) == 0 {
  2282  			return nil
  2283  		}
  2284  		remain := len(nums)
  2285  		for _, num := range nums {
  2286  			p.logf("reviews sync: %d issues remaining; syncing issue %v", remain, num)
  2287  			if err := p.syncReviewsOnPullRequest(ctx, num); err != nil {
  2288  				p.logf("review sync on issue %d: %v", num, err)
  2289  				return err
  2290  			}
  2291  			remain--
  2292  		}
  2293  	}
  2294  }
  2295  
  2296  func (p *githubRepoPoller) syncReviewsOnPullRequest(ctx context.Context, issueNum int32) error {
  2297  	const perPage = 100
  2298  	p.c.mu.RLock()
  2299  	gi := p.gr.issues[issueNum]
  2300  	if gi == nil {
  2301  		p.c.mu.RUnlock()
  2302  		panic(fmt.Sprintf("bogus issue %v", issueNum))
  2303  	}
  2304  
  2305  	if !gi.PullRequest {
  2306  		p.c.mu.RUnlock()
  2307  		return nil
  2308  	}
  2309  
  2310  	have := len(gi.reviews)
  2311  	p.c.mu.RUnlock()
  2312  
  2313  	skipPages := have / perPage
  2314  
  2315  	mut := &maintpb.Mutation{
  2316  		GithubIssue: &maintpb.GithubIssueMutation{
  2317  			Owner:  p.Owner(),
  2318  			Repo:   p.Repo(),
  2319  			Number: issueNum,
  2320  		},
  2321  	}
  2322  
  2323  	err := p.foreachItem(ctx,
  2324  		1+skipPages,
  2325  		func(ctx context.Context, page int) ([]interface{}, *github.Response, error) {
  2326  			u := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls/%v/reviews?per_page=%v&page=%v",
  2327  				p.Owner(), p.Repo(), issueNum, perPage, page)
  2328  			req, _ := http.NewRequest("GET", u, nil)
  2329  
  2330  			req.Header.Set("Authorization", "Bearer "+p.token)
  2331  			req.Header.Set("User-Agent", "golang-x-build-maintner/1.0")
  2332  			ctx, cancel := context.WithTimeout(ctx, time.Minute)
  2333  			defer cancel()
  2334  			req = req.WithContext(ctx)
  2335  			res, err := http.DefaultClient.Do(req)
  2336  			if err != nil {
  2337  				log.Printf("Fetching %s: %v", u, err)
  2338  				return nil, nil, err
  2339  			}
  2340  			log.Printf("Fetching %s: %v", u, res.Status)
  2341  			ghResp := makeGithubResponse(res)
  2342  			if err := github.CheckResponse(res); err != nil {
  2343  				log.Printf("Fetching %s: %v: %+v", u, res.Status, res.Header)
  2344  				log.Printf("GitHub error %s: %v", u, ghResp)
  2345  				return nil, nil, err
  2346  			}
  2347  			evts, err := parseGithubReviews(res.Body)
  2348  			if err != nil {
  2349  				return nil, nil, fmt.Errorf("%s: parse github pr reviews: %v", u, err)
  2350  			}
  2351  			is := make([]interface{}, len(evts))
  2352  			for i, v := range evts {
  2353  				is[i] = v
  2354  			}
  2355  			serverDate, err := http.ParseTime(res.Header.Get("Date"))
  2356  			if err != nil {
  2357  				return nil, nil, fmt.Errorf("invalid server Date response: %v", err)
  2358  			}
  2359  			sdp, _ := ptypes.TimestampProto(serverDate.UTC())
  2360  			mut.GithubIssue.ReviewStatus = &maintpb.GithubIssueSyncStatus{ServerDate: sdp}
  2361  
  2362  			return is, ghResp, err
  2363  		},
  2364  		func(v interface{}) error {
  2365  			ge := v.(*GitHubReview)
  2366  			p.c.mu.RLock()
  2367  			_, ok := gi.reviews[ge.ID]
  2368  			p.c.mu.RUnlock()
  2369  			if ok {
  2370  				// Already have it. And they're
  2371  				// assumed to be immutable, so the
  2372  				// copy we already have should be
  2373  				// good. Don't add to mutation log.
  2374  				return nil
  2375  			}
  2376  			mut.GithubIssue.Review = append(mut.GithubIssue.Review, ge.Proto())
  2377  			return nil
  2378  		})
  2379  	if err != nil {
  2380  		return err
  2381  	}
  2382  	p.c.addMutation(mut)
  2383  	return nil
  2384  }
  2385  
  2386  // parseGithubReviews parses the JSON array of GitHub reviews in r.
  2387  // It does this the very manual way (using map[string]interface{})
  2388  // instead of using nice types because https://golang.org/issue/15314
  2389  // isn't implemented yet and also because even if it were implemented,
  2390  // this code still wants to preserve any unknown fields to store in
  2391  // the "OtherJSON" field for future updates of the code to parse. (If
  2392  // GitHub adds new Event types in the future, we want to archive them,
  2393  // even if we don't understand them)
  2394  func parseGithubReviews(r io.Reader) ([]*GitHubReview, error) {
  2395  	var jevents []map[string]interface{}
  2396  	jd := json.NewDecoder(r)
  2397  	jd.UseNumber()
  2398  	if err := jd.Decode(&jevents); err != nil {
  2399  		return nil, err
  2400  	}
  2401  	var evts []*GitHubReview
  2402  	for _, em := range jevents {
  2403  		for k, v := range em {
  2404  			if v == nil {
  2405  				delete(em, k)
  2406  			}
  2407  		}
  2408  
  2409  		e := &GitHubReview{}
  2410  
  2411  		e.ID = jint64(em["id"])
  2412  		delete(em, "id")
  2413  
  2414  		e.Body, _ = em["body"].(string)
  2415  		delete(em, "body")
  2416  
  2417  		e.State, _ = em["state"].(string)
  2418  		delete(em, "state")
  2419  
  2420  		// TODO: store these two more compactly:
  2421  		e.CommitID, _ = em["commit_id"].(string) // "5383ecf5a0824649ffcc0349f00f0317575753d0"
  2422  		delete(em, "commit_id")
  2423  
  2424  		getUser := func(field string, gup **GitHubUser) {
  2425  			am, ok := em[field].(map[string]interface{})
  2426  			if !ok {
  2427  				return
  2428  			}
  2429  			delete(em, field)
  2430  			gu := &GitHubUser{ID: jint64(am["id"])}
  2431  			gu.Login, _ = am["login"].(string)
  2432  			*gup = gu
  2433  		}
  2434  
  2435  		getUser("user", &e.Actor)
  2436  
  2437  		e.ActorAssociation, _ = em["author_association"].(string)
  2438  		delete(em, "author_association")
  2439  
  2440  		if createdStr, ok := em["submitted_at"].(string); ok {
  2441  			delete(em, "submitted_at")
  2442  			var err error
  2443  			e.Created, err = time.Parse(time.RFC3339, createdStr)
  2444  			if err != nil {
  2445  				return nil, err
  2446  			}
  2447  			e.Created = e.Created.UTC()
  2448  		}
  2449  
  2450  		delete(em, "node_id")          // GitHub API v4 Global Node ID; don't store it.
  2451  		delete(em, "html_url")         // not needed.
  2452  		delete(em, "pull_request_url") // not needed.
  2453  		delete(em, "_links")           // not needed. (duplicate data of above two nodes)
  2454  
  2455  		otherJSON, _ := json.Marshal(em)
  2456  		e.OtherJSON = string(otherJSON)
  2457  		if e.OtherJSON == "{}" {
  2458  			e.OtherJSON = ""
  2459  		}
  2460  		if e.OtherJSON != "" {
  2461  			log.Printf("warning: storing unknown field(s) in GitHub review: %s", e.OtherJSON)
  2462  		}
  2463  		evts = append(evts, e)
  2464  	}
  2465  	return evts, nil
  2466  }
  2467  
  2468  // jint64 return an int64 from the provided JSON object value v.
  2469  func jint64(v interface{}) int64 {
  2470  	switch v := v.(type) {
  2471  	case nil:
  2472  		return 0
  2473  	case json.Number:
  2474  		n, _ := strconv.ParseInt(string(v), 10, 64)
  2475  		return n
  2476  	default:
  2477  		panic(fmt.Sprintf("unexpected type %T", v))
  2478  	}
  2479  }
  2480  
  2481  // copy of go-github's parseRate, basically.
  2482  func parseRate(r *http.Response) github.Rate {
  2483  	var rate github.Rate
  2484  	// Note: even though the header names below are not canonical (the
  2485  	// canonical form would be X-Ratelimit-Limit), this particular
  2486  	// casing is what GitHub returns. See headerRateRemaining in
  2487  	// package go-github.
  2488  	if limit := r.Header.Get("X-RateLimit-Limit"); limit != "" {
  2489  		rate.Limit, _ = strconv.Atoi(limit)
  2490  	}
  2491  	if remaining := r.Header.Get("X-RateLimit-Remaining"); remaining != "" {
  2492  		rate.Remaining, _ = strconv.Atoi(remaining)
  2493  	}
  2494  	if reset := r.Header.Get("X-RateLimit-Reset"); reset != "" {
  2495  		if v, _ := strconv.ParseInt(reset, 10, 64); v != 0 {
  2496  			rate.Reset = github.Timestamp{time.Unix(v, 0)}
  2497  		}
  2498  	}
  2499  	return rate
  2500  }
  2501  
  2502  // Copy of go-github's func newResponse, basically.
  2503  func makeGithubResponse(res *http.Response) *github.Response {
  2504  	gr := &github.Response{Response: res}
  2505  	gr.Rate = parseRate(res)
  2506  	for _, lv := range res.Header["Link"] {
  2507  		for _, link := range strings.Split(lv, ",") {
  2508  			segs := strings.Split(strings.TrimSpace(link), ";")
  2509  			if len(segs) < 2 {
  2510  				continue
  2511  			}
  2512  			// ensure href is properly formatted
  2513  			if !strings.HasPrefix(segs[0], "<") || !strings.HasSuffix(segs[0], ">") {
  2514  				continue
  2515  			}
  2516  
  2517  			// try to pull out page parameter
  2518  			u, err := url.Parse(segs[0][1 : len(segs[0])-1])
  2519  			if err != nil {
  2520  				continue
  2521  			}
  2522  			page := u.Query().Get("page")
  2523  			if page == "" {
  2524  				continue
  2525  			}
  2526  
  2527  			for _, seg := range segs[1:] {
  2528  				switch strings.TrimSpace(seg) {
  2529  				case `rel="next"`:
  2530  					gr.NextPage, _ = strconv.Atoi(page)
  2531  				case `rel="prev"`:
  2532  					gr.PrevPage, _ = strconv.Atoi(page)
  2533  				case `rel="first"`:
  2534  					gr.FirstPage, _ = strconv.Atoi(page)
  2535  				case `rel="last"`:
  2536  					gr.LastPage, _ = strconv.Atoi(page)
  2537  				}
  2538  			}
  2539  		}
  2540  	}
  2541  	return gr
  2542  }
  2543  
  2544  var rxReferences = regexp.MustCompile(`(?:\b([\w\-]+)/([\w\-]+))?\#(\d+)\b`)
  2545  
  2546  // parseGithubRefs parses references to GitHub issues from commit message commitMsg.
  2547  // Multiple references to the same issue are deduplicated.
  2548  func (c *Corpus) parseGithubRefs(gerritProj string, commitMsg string) []GitHubIssueRef {
  2549  	// Use of rxReferences by itself caused this function to take 20% of the CPU time.
  2550  	// TODO(bradfitz): stop using regexps here.
  2551  	// But in the meantime, help the regexp engine with this one weird trick:
  2552  	// Reduce the length of the string given to FindAllStringSubmatch.
  2553  	// Discard all lines before the first line containing a '#'.
  2554  	// The "Fixes #nnnn" is usually at the end, so this discards most of the input.
  2555  	// Now CPU is only 2% instead of 20%.
  2556  	hash := strings.IndexByte(commitMsg, '#')
  2557  	if hash == -1 {
  2558  		return nil
  2559  	}
  2560  	nl := strings.LastIndexByte(commitMsg[:hash], '\n')
  2561  	commitMsg = commitMsg[nl+1:]
  2562  
  2563  	// TODO: use FindAllStringSubmatchIndex instead, so we can
  2564  	// back up and see what's behind it and ignore "#1", "#2",
  2565  	// "#3" 'references' which are actually bullets or ARM
  2566  	// disassembly, and only respect them as real if they have the
  2567  	// word "Fixes " or "Issue " or similar before them.
  2568  	ms := rxReferences.FindAllStringSubmatch(commitMsg, -1)
  2569  	if len(ms) == 0 {
  2570  		return nil
  2571  	}
  2572  	/* e.g.
  2573  	2017/03/30 21:42:07 matches: [["golang/go#9327" "golang" "go" "9327"]]
  2574  	2017/03/30 21:42:07 matches: [["golang/go#16512" "golang" "go" "16512"] ["golang/go#18404" "golang" "go" "18404"]]
  2575  	2017/03/30 21:42:07 matches: [["#1" "" "" "1"]]
  2576  	2017/03/30 21:42:07 matches: [["#10234" "" "" "10234"]]
  2577  	2017/03/30 21:42:31 matches: [["GoogleCloudPlatform/gcloud-golang#262" "GoogleCloudPlatform" "gcloud-golang" "262"]]
  2578  	2017/03/30 21:42:31 matches: [["GoogleCloudPlatform/google-cloud-go#481" "GoogleCloudPlatform" "google-cloud-go" "481"]]
  2579  	*/
  2580  	c.initGithub()
  2581  	github := c.GitHub()
  2582  	refs := make([]GitHubIssueRef, 0, len(ms))
  2583  	for _, m := range ms {
  2584  		owner, repo, numStr := strings.ToLower(m[1]), strings.ToLower(m[2]), m[3]
  2585  		num, err := strconv.ParseInt(numStr, 10, 32)
  2586  		if err != nil {
  2587  			continue
  2588  		}
  2589  		if owner == "" {
  2590  			if gerritProj == "go.googlesource.com/go" {
  2591  				owner, repo = "golang", "go"
  2592  			} else {
  2593  				continue
  2594  			}
  2595  		}
  2596  		ref := GitHubIssueRef{github.getOrCreateRepo(owner, repo), int32(num)}
  2597  		if contains(refs, ref) {
  2598  			continue
  2599  		}
  2600  		refs = append(refs, ref)
  2601  	}
  2602  	return refs
  2603  }
  2604  
  2605  // contains reports whether refs contains the reference ref.
  2606  func contains(refs []GitHubIssueRef, ref GitHubIssueRef) bool {
  2607  	for _, r := range refs {
  2608  		if r == ref {
  2609  			return true
  2610  		}
  2611  	}
  2612  	return false
  2613  }
  2614  
  2615  type limitTransport struct {
  2616  	limiter *rate.Limiter
  2617  	base    http.RoundTripper
  2618  }
  2619  
  2620  func (t limitTransport) RoundTrip(r *http.Request) (*http.Response, error) {
  2621  	limiter := t.limiter
  2622  	// NOTE(cbro): limiter should not be nil, but check defensively.
  2623  	if limiter != nil {
  2624  		if err := limiter.Wait(r.Context()); err != nil {
  2625  			return nil, err
  2626  		}
  2627  	}
  2628  	return t.base.RoundTrip(r)
  2629  }
  2630  
  2631  // canRetry reports whether ctx hasn't been canceled and err is a non-nil retryable error.
  2632  // If so, it blocks until enough time passes so that it's acceptable to retry immediately.
  2633  func canRetry(ctx context.Context, err error) bool {
  2634  	switch e := err.(type) {
  2635  	case *github.RateLimitError:
  2636  		log.Printf("GitHub rate limit error: %s, waiting until %s", e.Message, e.Rate.Reset.Time)
  2637  		ctx, cancel := context.WithDeadline(ctx, e.Rate.Reset.Time)
  2638  		defer cancel()
  2639  		<-ctx.Done()
  2640  		return ctx.Err() != context.Canceled
  2641  	case *github.AbuseRateLimitError:
  2642  		if e.RetryAfter != nil {
  2643  			log.Printf("GitHub rate abuse error: %s, waiting for %s", e.Message, *e.RetryAfter)
  2644  			ctx, cancel := context.WithTimeout(ctx, *e.RetryAfter)
  2645  			defer cancel()
  2646  			<-ctx.Done()
  2647  			return ctx.Err() != context.Canceled
  2648  		}
  2649  		log.Printf("GitHub rate abuse error: %s", e.Message)
  2650  	}
  2651  	return false
  2652  }