golang.org/x/build@v0.0.0-20240506185731-218518f32b70/maintner/github.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package maintner 6 7 import ( 8 "context" 9 "encoding/json" 10 "fmt" 11 "io" 12 "log" 13 "net/http" 14 "net/url" 15 "reflect" 16 "regexp" 17 "runtime" 18 "slices" 19 "sort" 20 "strconv" 21 "strings" 22 "time" 23 24 "github.com/golang/protobuf/ptypes" 25 "github.com/golang/protobuf/ptypes/timestamp" 26 "github.com/google/go-github/github" 27 "github.com/gregjones/httpcache" 28 29 "golang.org/x/build/maintner/maintpb" 30 "golang.org/x/oauth2" 31 "golang.org/x/sync/errgroup" 32 "golang.org/x/time/rate" 33 ) 34 35 // xFromCache is the synthetic response header added by the httpcache 36 // package for responses fulfilled from cache due to a 304 from the server. 37 const xFromCache = "X-From-Cache" 38 39 // GitHubRepoID is a GitHub org & repo, lowercase. 40 type GitHubRepoID struct { 41 Owner, Repo string 42 } 43 44 func (id GitHubRepoID) String() string { return id.Owner + "/" + id.Repo } 45 46 func (id GitHubRepoID) valid() bool { 47 if id.Owner == "" || id.Repo == "" { 48 // TODO: more validation. whatever GitHub requires. 49 return false 50 } 51 return true 52 } 53 54 // GitHub holds data about a GitHub repo. 55 type GitHub struct { 56 c *Corpus 57 users map[int64]*GitHubUser 58 teams map[int64]*GitHubTeam 59 repos map[GitHubRepoID]*GitHubRepo 60 } 61 62 // ForeachRepo calls fn serially for each GitHubRepo, stopping if fn 63 // returns an error. The function is called with lexically increasing 64 // repo IDs. 65 func (g *GitHub) ForeachRepo(fn func(*GitHubRepo) error) error { 66 var ids []GitHubRepoID 67 for id := range g.repos { 68 ids = append(ids, id) 69 } 70 sort.Slice(ids, func(i, j int) bool { 71 if ids[i].Owner < ids[j].Owner { 72 return true 73 } 74 return ids[i].Owner == ids[j].Owner && ids[i].Repo < ids[j].Repo 75 }) 76 for _, id := range ids { 77 if err := fn(g.repos[id]); err != nil { 78 return err 79 } 80 } 81 return nil 82 } 83 84 // Repo returns the repo if it's known. Otherwise it returns nil. 85 func (g *GitHub) Repo(owner, repo string) *GitHubRepo { 86 return g.repos[GitHubRepoID{owner, repo}] 87 } 88 89 func (g *GitHub) getOrCreateRepo(owner, repo string) *GitHubRepo { 90 if g == nil { 91 panic("cannot call methods on nil GitHub") 92 } 93 id := GitHubRepoID{owner, repo} 94 if !id.valid() { 95 return nil 96 } 97 r, ok := g.repos[id] 98 if ok { 99 return r 100 } 101 r = &GitHubRepo{ 102 github: g, 103 id: id, 104 issues: map[int32]*GitHubIssue{}, 105 } 106 g.repos[id] = r 107 return r 108 } 109 110 type GitHubRepo struct { 111 github *GitHub 112 id GitHubRepoID 113 issues map[int32]*GitHubIssue // num -> issue 114 milestones map[int64]*GitHubMilestone 115 labels map[int64]*GitHubLabel 116 } 117 118 func (gr *GitHubRepo) ID() GitHubRepoID { return gr.id } 119 120 // Issue returns the provided issue number, or nil if it's not known. 121 func (gr *GitHubRepo) Issue(n int32) *GitHubIssue { return gr.issues[n] } 122 123 // ForeachLabel calls fn for each label in the repo, in unsorted order. 124 // 125 // Iteration ends if fn returns an error, with that error. 126 func (gr *GitHubRepo) ForeachLabel(fn func(*GitHubLabel) error) error { 127 for _, lb := range gr.labels { 128 if err := fn(lb); err != nil { 129 return err 130 } 131 } 132 return nil 133 } 134 135 // ForeachMilestone calls fn for each milestone in the repo, in unsorted order. 136 // 137 // Iteration ends if fn returns an error, with that error. 138 func (gr *GitHubRepo) ForeachMilestone(fn func(*GitHubMilestone) error) error { 139 for _, m := range gr.milestones { 140 if err := fn(m); err != nil { 141 return err 142 } 143 } 144 return nil 145 } 146 147 // ForeachIssue calls fn for each issue in the repo. 148 // 149 // If fn returns an error, iteration ends and ForeachIssue returns 150 // with that error. 151 // 152 // The fn function is called serially, with increasingly numbered 153 // issues. 154 func (gr *GitHubRepo) ForeachIssue(fn func(*GitHubIssue) error) error { 155 s := make([]*GitHubIssue, 0, len(gr.issues)) 156 for _, gi := range gr.issues { 157 s = append(s, gi) 158 } 159 sort.Slice(s, func(i, j int) bool { return s[i].Number < s[j].Number }) 160 for _, gi := range s { 161 if err := fn(gi); err != nil { 162 return err 163 } 164 } 165 return nil 166 } 167 168 // ForeachReview calls fn for each review event on the issue 169 // 170 // If the issue is not a PullRequest, then it returns early with no error. 171 // 172 // If fn returns an error, iteration ends and ForeachReview returns 173 // with that error. 174 // 175 // The fn function is called serially, in chronological order. 176 func (pr *GitHubIssue) ForeachReview(fn func(*GitHubReview) error) error { 177 if !pr.PullRequest { 178 return nil 179 } 180 s := make([]*GitHubReview, 0, len(pr.reviews)) 181 for _, rv := range pr.reviews { 182 s = append(s, rv) 183 } 184 sort.Slice(s, func(i, j int) bool { return s[i].Created.Before(s[j].Created) }) 185 for _, rv := range s { 186 if err := fn(rv); err != nil { 187 return err 188 } 189 } 190 191 return nil 192 } 193 194 func (g *GitHubRepo) getOrCreateMilestone(id int64) *GitHubMilestone { 195 if id == 0 { 196 panic("zero id") 197 } 198 m, ok := g.milestones[id] 199 if ok { 200 return m 201 } 202 if g.milestones == nil { 203 g.milestones = map[int64]*GitHubMilestone{} 204 } 205 m = &GitHubMilestone{ID: id} 206 g.milestones[id] = m 207 return m 208 } 209 210 func (g *GitHubRepo) getOrCreateLabel(id int64) *GitHubLabel { 211 if id == 0 { 212 panic("zero id") 213 } 214 lb, ok := g.labels[id] 215 if ok { 216 return lb 217 } 218 if g.labels == nil { 219 g.labels = map[int64]*GitHubLabel{} 220 } 221 lb = &GitHubLabel{ID: id} 222 g.labels[id] = lb 223 return lb 224 } 225 226 func (g *GitHubRepo) verbose() bool { 227 return g.github != nil && g.github.c != nil && g.github.c.verbose 228 } 229 230 // GitHubUser represents a GitHub user. 231 // It is a subset of https://developer.github.com/v3/users/#get-a-single-user 232 type GitHubUser struct { 233 ID int64 234 Login string 235 } 236 237 // GitHubTeam represents a GitHub team. 238 // It is a subset of https://developer.github.com/v3/orgs/teams/#get-team 239 type GitHubTeam struct { 240 ID int64 241 242 // Slug is a URL-friendly representation of the team name. 243 // It is unique across a GitHub organization. 244 Slug string 245 } 246 247 // GitHubIssueRef is a reference to an issue (or pull request) number 248 // in a repo. These are parsed from text making references such as 249 // "golang/go#1234" or just "#1234" (with an implicit Repo). 250 type GitHubIssueRef struct { 251 Repo *GitHubRepo // must be non-nil 252 Number int32 // GitHubIssue.Number 253 } 254 255 func (r GitHubIssueRef) String() string { return fmt.Sprintf("%s#%d", r.Repo.ID(), r.Number) } 256 257 // GitHubIssue represents a GitHub issue. 258 // This is maintner's in-memory representation. It differs slightly 259 // from the API's *github.Issue type, notably in the lack of pointers 260 // for all fields. 261 // See https://developer.github.com/v3/issues/#get-a-single-issue 262 type GitHubIssue struct { 263 ID int64 264 Number int32 265 NotExist bool // if true, rest of fields should be ignored. 266 Closed bool 267 Locked bool 268 PullRequest bool // if true, this issue is a Pull Request. All PRs are issues, but not all issues are PRs. 269 User *GitHubUser 270 Assignees []*GitHubUser 271 Created time.Time 272 Updated time.Time 273 ClosedAt time.Time 274 ClosedBy *GitHubUser // TODO(dmitshur): Implement (see golang.org/issue/28745). 275 Title string 276 Body string 277 Milestone *GitHubMilestone // nil for unknown, noMilestone for none 278 Labels map[int64]*GitHubLabel // label ID => label 279 280 commentsUpdatedTil time.Time // max comment modtime seen 281 commentsSyncedAsOf time.Time // as of server's Date header 282 comments map[int64]*GitHubComment // by comment.ID 283 eventMaxTime time.Time // latest time of any event in events map 284 eventsSyncedAsOf time.Time // as of server's Date header 285 reviewsSyncedAsOf time.Time // as of server's Date header 286 events map[int64]*GitHubIssueEvent // by event.ID 287 reviews map[int64]*GitHubReview // by event.ID 288 } 289 290 // LastModified reports the most recent time that any known metadata was updated. 291 // In contrast to the Updated field, LastModified includes comments and events. 292 // 293 // TODO(bradfitz): this seems to not be working, at least events 294 // aren't updating it. Investigate. 295 func (gi *GitHubIssue) LastModified() time.Time { 296 ret := gi.Updated 297 if gi.commentsUpdatedTil.After(ret) { 298 ret = gi.commentsUpdatedTil 299 } 300 if gi.eventMaxTime.After(ret) { 301 ret = gi.eventMaxTime 302 } 303 return ret 304 } 305 306 // HasEvent reports whether there's any GitHubIssueEvent in this 307 // issue's history of the given type. 308 func (gi *GitHubIssue) HasEvent(eventType string) bool { 309 for _, e := range gi.events { 310 if e.Type == eventType { 311 return true 312 } 313 } 314 return false 315 } 316 317 // ForeachEvent calls fn for each event on the issue. 318 // 319 // If fn returns an error, iteration ends and ForeachEvent returns 320 // with that error. 321 // 322 // The fn function is called serially, in order of the event's time. 323 func (gi *GitHubIssue) ForeachEvent(fn func(*GitHubIssueEvent) error) error { 324 // TODO: keep these sorted in the corpus 325 s := make([]*GitHubIssueEvent, 0, len(gi.events)) 326 for _, e := range gi.events { 327 s = append(s, e) 328 } 329 sort.Slice(s, func(i, j int) bool { 330 ci, cj := s[i].Created, s[j].Created 331 if ci.Before(cj) { 332 return true 333 } 334 return ci.Equal(cj) && s[i].ID < s[j].ID 335 }) 336 for _, e := range s { 337 if err := fn(e); err != nil { 338 return err 339 } 340 } 341 return nil 342 } 343 344 // ForeachComment calls fn for each event on the issue. 345 // 346 // If fn returns an error, iteration ends and ForeachComment returns 347 // with that error. 348 // 349 // The fn function is called serially, in order of the comment's time. 350 func (gi *GitHubIssue) ForeachComment(fn func(*GitHubComment) error) error { 351 // TODO: keep these sorted in the corpus 352 s := make([]*GitHubComment, 0, len(gi.comments)) 353 for _, e := range gi.comments { 354 s = append(s, e) 355 } 356 sort.Slice(s, func(i, j int) bool { 357 ci, cj := s[i].Created, s[j].Created 358 if ci.Before(cj) { 359 return true 360 } 361 return ci.Equal(cj) && s[i].ID < s[j].ID 362 }) 363 for _, e := range s { 364 if err := fn(e); err != nil { 365 return err 366 } 367 } 368 return nil 369 } 370 371 // HasLabel reports whether the issue is labeled with the given label. 372 func (gi *GitHubIssue) HasLabel(label string) bool { 373 for _, lb := range gi.Labels { 374 if lb.Name == label { 375 return true 376 } 377 } 378 return false 379 } 380 381 // HasLabelID returns whether the issue has a label with the given ID. 382 func (gi *GitHubIssue) HasLabelID(id int64) bool { 383 _, ok := gi.Labels[id] 384 return ok 385 } 386 387 func (gi *GitHubIssue) getCreatedAt() time.Time { 388 if gi == nil { 389 return time.Time{} 390 } 391 return gi.Created 392 } 393 394 func (gi *GitHubIssue) getUpdatedAt() time.Time { 395 if gi == nil { 396 return time.Time{} 397 } 398 return gi.Updated 399 } 400 401 func (gi *GitHubIssue) getClosedAt() time.Time { 402 if gi == nil { 403 return time.Time{} 404 } 405 return gi.ClosedAt 406 } 407 408 // noMilestone is a sentinel value to explicitly mean no milestone. 409 var noMilestone = new(GitHubMilestone) 410 411 type GitHubLabel struct { 412 ID int64 413 Name string 414 // TODO: color? 415 } 416 417 // GenMutationDiff generates a diff from in-memory state 'a' (which 418 // may be nil) to the current (non-nil) state b from GitHub. It 419 // returns nil if there's no difference. 420 func (a *GitHubLabel) GenMutationDiff(b *github.Label) *maintpb.GithubLabel { 421 id := int64(b.GetID()) 422 if a != nil && a.ID == id && a.Name == b.GetName() { 423 // No change. 424 return nil 425 } 426 return &maintpb.GithubLabel{Id: id, Name: b.GetName()} 427 } 428 429 func (lb *GitHubLabel) processMutation(mut maintpb.GithubLabel) { 430 if lb.ID == 0 { 431 panic("bogus label ID 0") 432 } 433 if lb.ID != mut.Id { 434 panic(fmt.Sprintf("label ID = %v != mutation ID = %v", lb.ID, mut.Id)) 435 } 436 if mut.Name != "" { 437 lb.Name = mut.Name 438 } 439 } 440 441 type GitHubMilestone struct { 442 ID int64 443 Title string 444 Number int32 445 Closed bool 446 } 447 448 // IsNone reports whether ms represents the sentinel "no milestone" milestone. 449 func (ms *GitHubMilestone) IsNone() bool { return ms == noMilestone } 450 451 // IsUnknown reports whether ms is nil, which represents the unknown 452 // state. Milestones should never be in this state, though. 453 func (ms *GitHubMilestone) IsUnknown() bool { return ms == nil } 454 455 // emptyMilestone is a non-nil *githubMilestone with zero values for 456 // all fields. 457 var emptyMilestone = new(GitHubMilestone) 458 459 // GenMutationDiff generates a diff from in-memory state 'a' (which 460 // may be nil) to the current (non-nil) state b from GitHub. It 461 // returns nil if there's no difference. 462 func (a *GitHubMilestone) GenMutationDiff(b *github.Milestone) *maintpb.GithubMilestone { 463 var ret *maintpb.GithubMilestone // lazily inited by diff 464 diff := func() *maintpb.GithubMilestone { 465 if ret == nil { 466 ret = &maintpb.GithubMilestone{Id: int64(b.GetID())} 467 } 468 return ret 469 } 470 if a == nil { 471 a = emptyMilestone 472 } 473 if a.Title != b.GetTitle() { 474 diff().Title = b.GetTitle() 475 } 476 if a.Number != int32(b.GetNumber()) { 477 diff().Number = int64(b.GetNumber()) 478 } 479 if closed := b.GetState() == "closed"; a.Closed != closed { 480 diff().Closed = &maintpb.BoolChange{Val: closed} 481 } 482 return ret 483 } 484 485 func (ms *GitHubMilestone) processMutation(mut maintpb.GithubMilestone) { 486 if ms.ID == 0 { 487 panic("bogus milestone ID 0") 488 } 489 if ms.ID != mut.Id { 490 panic(fmt.Sprintf("milestone ID = %v != mutation ID = %v", ms.ID, mut.Id)) 491 } 492 if mut.Title != "" { 493 ms.Title = mut.Title 494 } 495 if mut.Number != 0 { 496 ms.Number = int32(mut.Number) 497 } 498 if mut.Closed != nil { 499 ms.Closed = mut.Closed.Val 500 } 501 } 502 503 // GitHubReview represents a review on a Pull Request. 504 // For more details, see https://developer.github.com/v3/pulls/reviews/ 505 type GitHubReview struct { 506 ID int64 507 Actor *GitHubUser 508 Body string 509 State string // COMMENTED, APPROVED, CHANGES_REQUESTED 510 CommitID string 511 ActorAssociation string // CONTRIBUTOR 512 Created time.Time 513 OtherJSON string 514 } 515 516 // Proto converts GitHubReview to a protobuf 517 func (e *GitHubReview) Proto() *maintpb.GithubReview { 518 p := &maintpb.GithubReview{ 519 Id: e.ID, 520 Body: e.Body, 521 State: e.State, 522 CommitId: e.CommitID, 523 ActorAssociation: e.ActorAssociation, 524 } 525 if e.OtherJSON != "" { 526 p.OtherJson = []byte(e.OtherJSON) 527 } 528 if !e.Created.IsZero() { 529 if tp, err := ptypes.TimestampProto(e.Created); err == nil { 530 p.Created = tp 531 } 532 } 533 if e.Actor != nil { 534 p.ActorId = e.Actor.ID 535 } 536 537 return p 538 } 539 540 // r.github.c.mu must be held. 541 func (r *GitHubRepo) newGithubReview(p *maintpb.GithubReview) *GitHubReview { 542 g := r.github 543 e := &GitHubReview{ 544 ID: p.Id, 545 Actor: g.getOrCreateUserID(p.ActorId), 546 ActorAssociation: p.ActorAssociation, 547 CommitID: p.CommitId, 548 Body: p.Body, 549 State: p.State, 550 } 551 552 if p.Created != nil { 553 e.Created, _ = ptypes.Timestamp(p.Created) 554 } 555 if len(p.OtherJson) > 0 { 556 // TODO: parse it and see if we've since learned how 557 // to deal with it? 558 if r.verbose() { 559 log.Printf("newGithubReview: unknown JSON in log: %s", p.OtherJson) 560 } 561 e.OtherJSON = string(p.OtherJson) 562 } 563 564 return e 565 } 566 567 type GitHubComment struct { 568 ID int64 569 User *GitHubUser 570 Created time.Time 571 Updated time.Time 572 Body string 573 } 574 575 // GitHubDismissedReview is the contents of a dismissed review event. For more 576 // details, see https://developer.github.com/v3/issues/events/. 577 type GitHubDismissedReviewEvent struct { 578 ReviewID int64 579 State string // commented, approved, changes_requested 580 DismissalMessage string 581 } 582 583 type GitHubIssueEvent struct { 584 // TODO: this struct is a little wide. change it to an interface 585 // instead? Maybe later, if memory profiling suggests it would help. 586 587 // ID is the ID of the event. 588 ID int64 589 590 // Type is one of: 591 // * labeled, unlabeled 592 // * milestoned, demilestoned 593 // * assigned, unassigned 594 // * locked, unlocked 595 // * closed 596 // * referenced 597 // * renamed 598 // * reopened 599 // * comment_deleted 600 // * head_ref_restored 601 // * base_ref_changed 602 // * subscribed 603 // * mentioned 604 // * review_requested, review_request_removed, review_dismissed 605 Type string 606 607 // OtherJSON optionally contains a JSON object of GitHub's API 608 // response for any fields maintner was unable to extract at 609 // the time. It is empty if maintner supported all the fields 610 // when the mutation was created. 611 OtherJSON string 612 613 Created time.Time 614 Actor *GitHubUser 615 616 Label string // for type: "unlabeled", "labeled" 617 Assignee *GitHubUser // for type: "assigned", "unassigned" 618 Assigner *GitHubUser // for type: "assigned", "unassigned" 619 Milestone string // for type: "milestoned", "demilestoned" 620 From, To string // for type: "renamed" 621 CommitID, CommitURL string // for type: "closed", "referenced" ... ? 622 623 Reviewer *GitHubUser 624 TeamReviewer *GitHubTeam 625 ReviewRequester *GitHubUser 626 DismissedReview *GitHubDismissedReviewEvent 627 } 628 629 func (e *GitHubIssueEvent) Proto() *maintpb.GithubIssueEvent { 630 p := &maintpb.GithubIssueEvent{ 631 Id: e.ID, 632 EventType: e.Type, 633 RenameFrom: e.From, 634 RenameTo: e.To, 635 } 636 if e.OtherJSON != "" { 637 p.OtherJson = []byte(e.OtherJSON) 638 } 639 if !e.Created.IsZero() { 640 if tp, err := ptypes.TimestampProto(e.Created); err == nil { 641 p.Created = tp 642 } 643 } 644 if e.Actor != nil { 645 p.ActorId = e.Actor.ID 646 } 647 if e.Assignee != nil { 648 p.AssigneeId = e.Assignee.ID 649 } 650 if e.Assigner != nil { 651 p.AssignerId = e.Assigner.ID 652 } 653 if e.Label != "" { 654 p.Label = &maintpb.GithubLabel{Name: e.Label} 655 } 656 if e.Milestone != "" { 657 p.Milestone = &maintpb.GithubMilestone{Title: e.Milestone} 658 } 659 if e.CommitID != "" { 660 c := &maintpb.GithubCommit{CommitId: e.CommitID} 661 if m := rxGithubCommitURL.FindStringSubmatch(e.CommitURL); m != nil { 662 c.Owner = m[1] 663 c.Repo = m[2] 664 } 665 p.Commit = c 666 } 667 if e.Reviewer != nil { 668 p.ReviewerId = e.Reviewer.ID 669 } 670 if e.TeamReviewer != nil { 671 p.TeamReviewer = &maintpb.GithubTeam{ 672 Id: e.TeamReviewer.ID, 673 Slug: e.TeamReviewer.Slug, 674 } 675 } 676 if e.ReviewRequester != nil { 677 p.ReviewRequesterId = e.ReviewRequester.ID 678 } 679 if e.DismissedReview != nil { 680 p.DismissedReview = &maintpb.GithubDismissedReviewEvent{ 681 ReviewId: e.DismissedReview.ReviewID, 682 State: e.DismissedReview.State, 683 DismissalMessage: e.DismissedReview.DismissalMessage, 684 } 685 } 686 return p 687 } 688 689 var rxGithubCommitURL = regexp.MustCompile(`^https://api\.github\.com/repos/([^/]+)/([^/]+)/commits/`) 690 691 // r.github.c.mu must be held. 692 func (r *GitHubRepo) newGithubEvent(p *maintpb.GithubIssueEvent) *GitHubIssueEvent { 693 g := r.github 694 e := &GitHubIssueEvent{ 695 ID: p.Id, 696 Type: p.EventType, 697 Actor: g.getOrCreateUserID(p.ActorId), 698 Assignee: g.getOrCreateUserID(p.AssigneeId), 699 Assigner: g.getOrCreateUserID(p.AssignerId), 700 Reviewer: g.getOrCreateUserID(p.ReviewerId), 701 TeamReviewer: g.getTeam(p.TeamReviewer), 702 ReviewRequester: g.getOrCreateUserID(p.ReviewRequesterId), 703 From: p.RenameFrom, 704 To: p.RenameTo, 705 } 706 if p.Created != nil { 707 e.Created, _ = ptypes.Timestamp(p.Created) 708 } 709 if len(p.OtherJson) > 0 { 710 // TODO: parse it and see if we've since learned how 711 // to deal with it? 712 if r.verbose() { 713 log.Printf("newGithubEvent: unknown JSON in log: %s", p.OtherJson) 714 } 715 e.OtherJSON = string(p.OtherJson) 716 } 717 if p.Label != nil { 718 e.Label = g.c.str(p.Label.Name) 719 } 720 if p.Milestone != nil { 721 e.Milestone = g.c.str(p.Milestone.Title) 722 } 723 if c := p.Commit; c != nil { 724 e.CommitID = c.CommitId 725 if c.Owner != "" && c.Repo != "" { 726 // TODO: this field is dumb. break it down. 727 e.CommitURL = "https://api.github.com/repos/" + c.Owner + "/" + c.Repo + "/commits/" + c.CommitId 728 } 729 } 730 if d := p.DismissedReview; d != nil { 731 e.DismissedReview = &GitHubDismissedReviewEvent{ 732 ReviewID: d.ReviewId, 733 State: d.State, 734 DismissalMessage: d.DismissalMessage, 735 } 736 } 737 return e 738 } 739 740 // (requires corpus be locked for reads) 741 func (gi *GitHubIssue) commentsSynced() bool { 742 if gi.NotExist { 743 // Issue doesn't exist, so can't sync its non-issues, 744 // so consider it done. 745 return true 746 } 747 return gi.commentsSyncedAsOf.After(gi.Updated) 748 } 749 750 // (requires corpus be locked for reads) 751 func (gi *GitHubIssue) eventsSynced() bool { 752 if gi.NotExist { 753 // Issue doesn't exist, so can't sync its non-issues, 754 // so consider it done. 755 return true 756 } 757 return gi.eventsSyncedAsOf.After(gi.Updated) 758 } 759 760 // (requires corpus be locked for reads) 761 func (gi *GitHubIssue) reviewsSynced() bool { 762 if gi.NotExist { 763 // Issue doesn't exist, so can't sync its non-issues, 764 // so consider it done. 765 return true 766 } 767 return gi.reviewsSyncedAsOf.After(gi.Updated) 768 } 769 770 func (c *Corpus) initGithub() { 771 if c.github != nil { 772 return 773 } 774 c.github = &GitHub{ 775 c: c, 776 repos: map[GitHubRepoID]*GitHubRepo{}, 777 } 778 } 779 780 // SetGitHubLimiter sets a limiter that controls the rate of requests made 781 // to GitHub APIs. If nil, requests are not limited. Only valid in leader mode. 782 // The limiter must only be set before Sync or SyncLoop is called. 783 func (c *Corpus) SetGitHubLimiter(l *rate.Limiter) { 784 c.githubLimiter = l 785 } 786 787 // TrackGitHub registers the named GitHub repo as a repo to 788 // watch and append to the mutation log. Only valid in leader mode. 789 // The token is the auth token to use to make API calls. 790 func (c *Corpus) TrackGitHub(owner, repo, token string) { 791 if c.mutationLogger == nil { 792 panic("can't TrackGitHub in non-leader mode") 793 } 794 795 c.mu.Lock() 796 defer c.mu.Unlock() 797 c.initGithub() 798 gr := c.github.getOrCreateRepo(owner, repo) 799 if gr == nil { 800 log.Fatalf("invalid github owner/repo %q/%q", owner, repo) 801 } 802 c.watchedGithubRepos = append(c.watchedGithubRepos, watchedGithubRepo{ 803 gr: gr, 804 token: token, 805 }) 806 } 807 808 type watchedGithubRepo struct { 809 gr *GitHubRepo 810 token string 811 } 812 813 // g.c.mu must be held 814 func (g *GitHub) getUser(pu *maintpb.GithubUser) *GitHubUser { 815 if pu == nil { 816 return nil 817 } 818 if u := g.users[pu.Id]; u != nil { 819 if pu.Login != "" && pu.Login != u.Login { 820 u.Login = pu.Login 821 } 822 return u 823 } 824 if g.users == nil { 825 g.users = make(map[int64]*GitHubUser) 826 } 827 u := &GitHubUser{ 828 ID: pu.Id, 829 Login: pu.Login, 830 } 831 g.users[pu.Id] = u 832 return u 833 } 834 835 func (g *GitHub) getOrCreateUserID(id int64) *GitHubUser { 836 if id == 0 { 837 return nil 838 } 839 if u := g.users[id]; u != nil { 840 return u 841 } 842 if g.users == nil { 843 g.users = make(map[int64]*GitHubUser) 844 } 845 u := &GitHubUser{ID: id} 846 g.users[id] = u 847 return u 848 } 849 850 // g.c.mu must be held 851 func (g *GitHub) getTeam(pt *maintpb.GithubTeam) *GitHubTeam { 852 if pt == nil { 853 return nil 854 } 855 if g.teams == nil { 856 g.teams = make(map[int64]*GitHubTeam) 857 } 858 859 t := g.teams[pt.Id] 860 if t == nil { 861 t = &GitHubTeam{ 862 ID: pt.Id, 863 } 864 g.teams[pt.Id] = t 865 } 866 if pt.Slug != "" { 867 t.Slug = pt.Slug 868 } 869 return t 870 } 871 872 // newGithubUserProto creates a GithubUser with the minimum diff between 873 // existing and g. The return value is nil if there were no changes. existing 874 // may also be nil. 875 func newGithubUserProto(existing *GitHubUser, g *github.User) *maintpb.GithubUser { 876 if g == nil { 877 return nil 878 } 879 id := int64(g.GetID()) 880 if existing == nil { 881 return &maintpb.GithubUser{ 882 Id: id, 883 Login: g.GetLogin(), 884 } 885 } 886 hasChanges := false 887 u := &maintpb.GithubUser{Id: id} 888 if login := g.GetLogin(); existing.Login != login { 889 u.Login = login 890 hasChanges = true 891 } 892 // Add more fields here 893 if hasChanges { 894 return u 895 } 896 return nil 897 } 898 899 // deletedAssignees returns an array of user ID's that are present in existing 900 // but not present in new. 901 func deletedAssignees(existing []*GitHubUser, new []*github.User) []int64 { 902 mp := make(map[int64]bool, len(existing)) 903 for _, u := range new { 904 id := int64(u.GetID()) 905 mp[id] = true 906 } 907 toDelete := []int64{} 908 for _, u := range existing { 909 if _, ok := mp[u.ID]; !ok { 910 toDelete = append(toDelete, u.ID) 911 } 912 } 913 return toDelete 914 } 915 916 // newAssignees returns an array of diffs between existing and new. New users in 917 // new will be present in the returned array in their entirety. Modified users 918 // will appear containing only the ID field and changed fields. Unmodified users 919 // will not appear in the returned array. 920 func newAssignees(existing []*GitHubUser, new []*github.User) []*maintpb.GithubUser { 921 mp := make(map[int64]*GitHubUser, len(existing)) 922 for _, u := range existing { 923 mp[u.ID] = u 924 } 925 changes := []*maintpb.GithubUser{} 926 for _, u := range new { 927 if existingUser, ok := mp[int64(u.GetID())]; ok { 928 diffUser := &maintpb.GithubUser{ 929 Id: int64(u.GetID()), 930 } 931 hasDiff := false 932 if login := u.GetLogin(); existingUser.Login != login { 933 diffUser.Login = login 934 hasDiff = true 935 } 936 // check more User fields for diffs here, as we add them to the proto 937 938 if hasDiff { 939 changes = append(changes, diffUser) 940 } 941 } else { 942 changes = append(changes, &maintpb.GithubUser{ 943 Id: int64(u.GetID()), 944 Login: u.GetLogin(), 945 }) 946 } 947 } 948 return changes 949 } 950 951 // setAssigneesFromProto returns a new array of assignees according to the 952 // instructions in new (adds or modifies users in existing), and toDelete 953 // (deletes them). c.mu must be held. 954 func (g *GitHub) setAssigneesFromProto(existing []*GitHubUser, new []*maintpb.GithubUser, toDelete []int64) []*GitHubUser { 955 c := g.c 956 mp := make(map[int64]*GitHubUser) 957 for _, u := range existing { 958 mp[u.ID] = u 959 } 960 for _, u := range new { 961 if existingUser, ok := mp[u.Id]; ok { 962 if u.Login != "" { 963 existingUser.Login = u.Login 964 } 965 // TODO: add other fields here when we add them for user. 966 } else { 967 c.debugf("adding assignee %q", u.Login) 968 existing = append(existing, g.getUser(u)) 969 } 970 } 971 // this is quadratic but the number of assignees is very unlikely to exceed, 972 // say, 5. 973 existing = slices.DeleteFunc(existing, func(u *GitHubUser) bool { 974 return slices.Contains(toDelete, u.ID) 975 }) 976 return existing 977 } 978 979 // githubIssueDiffer generates a minimal diff (protobuf mutation) to 980 // get a GitHub Issue from its in-memory state 'a' to the current 981 // GitHub API state 'b'. 982 type githubIssueDiffer struct { 983 gr *GitHubRepo 984 a *GitHubIssue // may be nil if no current state 985 b *github.Issue // may NOT be nil 986 } 987 988 // returns nil if no changes. 989 func (d githubIssueDiffer) Diff() *maintpb.GithubIssueMutation { 990 var changed bool 991 m := &maintpb.GithubIssueMutation{ 992 Owner: d.gr.id.Owner, 993 Repo: d.gr.id.Repo, 994 Number: int32(d.b.GetNumber()), 995 PullRequest: d.b.IsPullRequest(), 996 } 997 for _, f := range issueDiffMethods { 998 if f(d, m) { 999 if d.gr.verbose() { 1000 fname := strings.TrimPrefix(runtime.FuncForPC(reflect.ValueOf(f).Pointer()).Name(), "golang.org/x/build/maintner.githubIssueDiffer.") 1001 log.Printf("Issue %d changed: %v", d.b.GetNumber(), fname) 1002 } 1003 changed = true 1004 } 1005 } 1006 if !changed { 1007 return nil 1008 } 1009 return m 1010 } 1011 1012 // issueDiffMethods are the different steps githubIssueDiffer.Diff 1013 // goes through to compute a diff. The methods should return true if 1014 // any change was made. The order is irrelevant unless otherwise 1015 // documented in comments in the list below. 1016 var issueDiffMethods = []func(githubIssueDiffer, *maintpb.GithubIssueMutation) bool{ 1017 githubIssueDiffer.diffCreatedAt, 1018 githubIssueDiffer.diffUpdatedAt, 1019 githubIssueDiffer.diffUser, 1020 githubIssueDiffer.diffBody, 1021 githubIssueDiffer.diffTitle, 1022 githubIssueDiffer.diffMilestone, 1023 githubIssueDiffer.diffAssignees, 1024 githubIssueDiffer.diffClosedState, 1025 githubIssueDiffer.diffClosedAt, 1026 githubIssueDiffer.diffClosedBy, 1027 githubIssueDiffer.diffLockedState, 1028 githubIssueDiffer.diffLabels, 1029 } 1030 1031 func (d githubIssueDiffer) diffCreatedAt(m *maintpb.GithubIssueMutation) bool { 1032 return d.diffTimeField(&m.Created, d.a.getCreatedAt(), d.b.GetCreatedAt()) 1033 } 1034 1035 func (d githubIssueDiffer) diffUpdatedAt(m *maintpb.GithubIssueMutation) bool { 1036 return d.diffTimeField(&m.Updated, d.a.getUpdatedAt(), d.b.GetUpdatedAt()) 1037 } 1038 1039 func (d githubIssueDiffer) diffClosedAt(m *maintpb.GithubIssueMutation) bool { 1040 return d.diffTimeField(&m.ClosedAt, d.a.getClosedAt(), d.b.GetClosedAt()) 1041 } 1042 1043 func (d githubIssueDiffer) diffTimeField(dst **timestamp.Timestamp, memTime, githubTime time.Time) bool { 1044 if githubTime.IsZero() || memTime.Equal(githubTime) { 1045 return false 1046 } 1047 tproto, err := ptypes.TimestampProto(githubTime) 1048 if err != nil { 1049 panic(err) 1050 } 1051 *dst = tproto 1052 return true 1053 } 1054 1055 func (d githubIssueDiffer) diffUser(m *maintpb.GithubIssueMutation) bool { 1056 var existing *GitHubUser 1057 if d.a != nil { 1058 existing = d.a.User 1059 } 1060 m.User = newGithubUserProto(existing, d.b.User) 1061 return m.User != nil 1062 } 1063 1064 func (d githubIssueDiffer) diffClosedBy(m *maintpb.GithubIssueMutation) bool { 1065 var existing *GitHubUser 1066 if d.a != nil { 1067 existing = d.a.ClosedBy 1068 } 1069 m.ClosedBy = newGithubUserProto(existing, d.b.ClosedBy) 1070 return m.ClosedBy != nil 1071 } 1072 1073 func (d githubIssueDiffer) diffBody(m *maintpb.GithubIssueMutation) bool { 1074 if d.a != nil && d.a.Body == d.b.GetBody() { 1075 return false 1076 } 1077 m.BodyChange = &maintpb.StringChange{Val: d.b.GetBody()} 1078 return true 1079 } 1080 1081 func (d githubIssueDiffer) diffTitle(m *maintpb.GithubIssueMutation) bool { 1082 if d.a != nil && d.a.Title == d.b.GetTitle() { 1083 return false 1084 } 1085 m.Title = d.b.GetTitle() 1086 // TODO: emit a StringChange if we ever have a problem that we 1087 // legitimately need real issues with no titles reflected in 1088 // maintner's model. For now just ignore such changes, if 1089 // GitHub even permits the. 1090 return m.Title != "" 1091 } 1092 1093 func (d githubIssueDiffer) diffMilestone(m *maintpb.GithubIssueMutation) bool { 1094 if d.a != nil && d.a.Milestone != nil { 1095 ma, mb := d.a.Milestone, d.b.Milestone 1096 if ma == noMilestone && d.b.Milestone == nil { 1097 // Unchanged. Still no milestone. 1098 return false 1099 } 1100 if mb != nil && ma.ID == int64(mb.GetID()) { 1101 // Unchanged. Same milestone. 1102 // TODO: detect milestone renames and emit mutation for that? 1103 return false 1104 } 1105 1106 } 1107 if mb := d.b.Milestone; mb != nil { 1108 m.MilestoneId = int64(mb.GetID()) 1109 m.MilestoneNum = int64(mb.GetNumber()) 1110 m.MilestoneTitle = mb.GetTitle() 1111 } else { 1112 m.NoMilestone = true 1113 } 1114 return true 1115 } 1116 1117 func (d githubIssueDiffer) diffAssignees(m *maintpb.GithubIssueMutation) bool { 1118 if d.a == nil { 1119 m.Assignees = newAssignees(nil, d.b.Assignees) 1120 return true 1121 } 1122 m.Assignees = newAssignees(d.a.Assignees, d.b.Assignees) 1123 m.DeletedAssignees = deletedAssignees(d.a.Assignees, d.b.Assignees) 1124 return len(m.Assignees) > 0 || len(m.DeletedAssignees) > 0 1125 } 1126 1127 func (d githubIssueDiffer) diffLabels(m *maintpb.GithubIssueMutation) bool { 1128 // Common case: no changes. Return false quickly without allocations. 1129 if d.a != nil && len(d.a.Labels) == len(d.b.Labels) { 1130 missing := false 1131 for _, gl := range d.b.Labels { 1132 if _, ok := d.a.Labels[int64(gl.GetID())]; !ok { 1133 missing = true 1134 break 1135 } 1136 } 1137 if !missing { 1138 return false 1139 } 1140 } 1141 1142 toAdd := map[int64]*maintpb.GithubLabel{} 1143 for _, gl := range d.b.Labels { 1144 id := int64(gl.GetID()) 1145 if id == 0 { 1146 panic("zero label ID") 1147 } 1148 toAdd[id] = &maintpb.GithubLabel{Id: id, Name: gl.GetName()} 1149 } 1150 1151 var toDelete []int64 1152 if d.a != nil { 1153 for id := range d.a.Labels { 1154 if _, ok := toAdd[id]; ok { 1155 // Already had it. 1156 delete(toAdd, id) 1157 } else { 1158 // We had it, but no longer. 1159 toDelete = append(toDelete, id) 1160 } 1161 } 1162 } 1163 1164 m.RemoveLabel = toDelete 1165 for _, labpb := range toAdd { 1166 m.AddLabel = append(m.AddLabel, labpb) 1167 } 1168 1169 return len(m.RemoveLabel) > 0 || len(m.AddLabel) > 0 1170 } 1171 1172 func (d githubIssueDiffer) diffClosedState(m *maintpb.GithubIssueMutation) bool { 1173 bclosed := d.b.GetState() == "closed" 1174 if d.a != nil && d.a.Closed == bclosed { 1175 return false 1176 } 1177 m.Closed = &maintpb.BoolChange{Val: bclosed} 1178 return true 1179 } 1180 1181 func (d githubIssueDiffer) diffLockedState(m *maintpb.GithubIssueMutation) bool { 1182 if d.a != nil && d.a.Locked == d.b.GetLocked() { 1183 return false 1184 } 1185 if d.a == nil && !d.b.GetLocked() { 1186 return false 1187 } 1188 m.Locked = &maintpb.BoolChange{Val: d.b.GetLocked()} 1189 return true 1190 } 1191 1192 // newMutationFromIssue generates a GithubIssueMutation using the 1193 // smallest possible diff between a (the state we have in memory in 1194 // the corpus) and b (the current GitHub API state). 1195 // 1196 // If newMutationFromIssue returns nil, the provided github.Issue is no newer 1197 // than the data we have in the corpus. 'a' may be nil. 1198 func (r *GitHubRepo) newMutationFromIssue(a *GitHubIssue, b *github.Issue) *maintpb.Mutation { 1199 if b == nil || b.Number == nil { 1200 panic(fmt.Sprintf("github issue with nil number: %#v", b)) 1201 } 1202 gim := githubIssueDiffer{gr: r, a: a, b: b}.Diff() 1203 if gim == nil { 1204 // No changes. 1205 return nil 1206 } 1207 return &maintpb.Mutation{GithubIssue: gim} 1208 } 1209 1210 func (r *GitHubRepo) missingIssues() []int32 { 1211 c := r.github.c 1212 c.mu.RLock() 1213 defer c.mu.RUnlock() 1214 1215 var maxNum int32 1216 for num := range r.issues { 1217 if num > maxNum { 1218 maxNum = num 1219 } 1220 } 1221 1222 var missing []int32 1223 for num := int32(1); num < maxNum; num++ { 1224 if _, ok := r.issues[num]; !ok { 1225 missing = append(missing, num) 1226 } 1227 } 1228 return missing 1229 } 1230 1231 // processGithubMutation updates the corpus with the information in m. 1232 func (c *Corpus) processGithubMutation(m *maintpb.GithubMutation) { 1233 if c == nil { 1234 panic("nil corpus") 1235 } 1236 c.initGithub() 1237 gr := c.github.getOrCreateRepo(m.Owner, m.Repo) 1238 if gr == nil { 1239 log.Printf("bogus Owner/Repo %q/%q in mutation: %v", m.Owner, m.Repo, m) 1240 return 1241 } 1242 for _, lp := range m.Labels { 1243 lb := gr.getOrCreateLabel(lp.Id) 1244 lb.processMutation(*lp) 1245 } 1246 for _, mp := range m.Milestones { 1247 ms := gr.getOrCreateMilestone(mp.Id) 1248 ms.processMutation(*mp) 1249 } 1250 } 1251 1252 // processGithubIssueMutation updates the corpus with the information in m. 1253 func (c *Corpus) processGithubIssueMutation(m *maintpb.GithubIssueMutation) { 1254 if c == nil { 1255 panic("nil corpus") 1256 } 1257 c.initGithub() 1258 gr := c.github.getOrCreateRepo(m.Owner, m.Repo) 1259 if gr == nil { 1260 log.Printf("bogus Owner/Repo %q/%q in mutation: %v", m.Owner, m.Repo, m) 1261 return 1262 } 1263 if m.Number == 0 { 1264 log.Printf("bogus zero Number in mutation: %v", m) 1265 return 1266 } 1267 gi, ok := gr.issues[m.Number] 1268 if !ok { 1269 gi = &GitHubIssue{ 1270 // User added below 1271 Number: m.Number, 1272 ID: m.Id, 1273 } 1274 if gr.issues == nil { 1275 gr.issues = make(map[int32]*GitHubIssue) 1276 } 1277 gr.issues[m.Number] = gi 1278 1279 if m.NotExist { 1280 gi.NotExist = true 1281 return 1282 } 1283 1284 var err error 1285 gi.Created, err = ptypes.Timestamp(m.Created) 1286 if err != nil { 1287 panic(err) 1288 } 1289 } 1290 if m.NotExist != gi.NotExist { 1291 gi.NotExist = m.NotExist 1292 } 1293 if gi.NotExist { 1294 return 1295 } 1296 1297 // Check Updated before all other fields so they don't update if this 1298 // Mutation is stale 1299 // (ignoring Created since it *should* never update) 1300 if m.Updated != nil { 1301 t, err := ptypes.Timestamp(m.Updated) 1302 if err != nil { 1303 panic(err) 1304 } 1305 gi.Updated = t 1306 } 1307 if m.ClosedAt != nil { 1308 t, err := ptypes.Timestamp(m.ClosedAt) 1309 if err != nil { 1310 panic(err) 1311 } 1312 gi.ClosedAt = t 1313 } 1314 if m.User != nil { 1315 gi.User = c.github.getUser(m.User) 1316 } 1317 if m.NoMilestone { 1318 gi.Milestone = noMilestone 1319 } else if m.MilestoneId != 0 { 1320 ms := gr.getOrCreateMilestone(m.MilestoneId) 1321 ms.processMutation(maintpb.GithubMilestone{ 1322 Id: m.MilestoneId, 1323 Title: m.MilestoneTitle, 1324 Number: m.MilestoneNum, 1325 }) 1326 gi.Milestone = ms 1327 } 1328 if m.ClosedBy != nil { 1329 gi.ClosedBy = c.github.getUser(m.ClosedBy) 1330 } 1331 if b := m.Closed; b != nil { 1332 gi.Closed = b.Val 1333 } 1334 if b := m.Locked; b != nil { 1335 gi.Locked = b.Val 1336 } 1337 if m.PullRequest { 1338 gi.PullRequest = true 1339 } 1340 1341 gi.Assignees = c.github.setAssigneesFromProto(gi.Assignees, m.Assignees, m.DeletedAssignees) 1342 1343 if m.Body != "" { 1344 gi.Body = m.Body 1345 } 1346 if m.BodyChange != nil { 1347 gi.Body = m.BodyChange.Val 1348 } 1349 if m.Title != "" { 1350 gi.Title = m.Title 1351 } 1352 if len(m.RemoveLabel) > 0 || len(m.AddLabel) > 0 { 1353 if gi.Labels == nil { 1354 gi.Labels = make(map[int64]*GitHubLabel) 1355 } 1356 for _, lid := range m.RemoveLabel { 1357 delete(gi.Labels, lid) 1358 } 1359 for _, lp := range m.AddLabel { 1360 lb := gr.getOrCreateLabel(lp.Id) 1361 lb.processMutation(*lp) 1362 gi.Labels[lp.Id] = lb 1363 } 1364 } 1365 1366 for _, cmut := range m.Comment { 1367 if cmut.Id == 0 { 1368 log.Printf("Ignoring bogus comment mutation lacking Id: %v", cmut) 1369 continue 1370 } 1371 gc, ok := gi.comments[cmut.Id] 1372 if !ok { 1373 if gi.comments == nil { 1374 gi.comments = make(map[int64]*GitHubComment) 1375 } 1376 gc = &GitHubComment{ID: cmut.Id} 1377 gi.comments[gc.ID] = gc 1378 } 1379 if cmut.User != nil { 1380 gc.User = c.github.getUser(cmut.User) 1381 } 1382 if cmut.Created != nil { 1383 gc.Created, _ = ptypes.Timestamp(cmut.Created) 1384 gc.Created = gc.Created.UTC() 1385 } 1386 if cmut.Updated != nil { 1387 gc.Updated, _ = ptypes.Timestamp(cmut.Updated) 1388 gc.Updated = gc.Updated.UTC() 1389 } 1390 if cmut.Body != "" { 1391 gc.Body = cmut.Body 1392 } 1393 } 1394 if m.CommentStatus != nil && m.CommentStatus.ServerDate != nil { 1395 if serverDate, err := ptypes.Timestamp(m.CommentStatus.ServerDate); err == nil { 1396 gi.commentsSyncedAsOf = serverDate.UTC() 1397 } 1398 } 1399 1400 for _, emut := range m.Event { 1401 if emut.Id == 0 { 1402 log.Printf("Ignoring bogus event mutation lacking Id: %v", emut) 1403 continue 1404 } 1405 if gi.events == nil { 1406 gi.events = make(map[int64]*GitHubIssueEvent) 1407 } 1408 gie := gr.newGithubEvent(emut) 1409 gi.events[emut.Id] = gie 1410 if gie.Created.After(gi.eventMaxTime) { 1411 gi.eventMaxTime = gie.Created 1412 } 1413 } 1414 if m.EventStatus != nil && m.EventStatus.ServerDate != nil { 1415 if serverDate, err := ptypes.Timestamp(m.EventStatus.ServerDate); err == nil { 1416 gi.eventsSyncedAsOf = serverDate.UTC() 1417 } 1418 } 1419 1420 for _, rmut := range m.Review { 1421 if rmut.Id == 0 { 1422 log.Printf("Ignoring bogus review mutation lacking Id: %v", rmut) 1423 continue 1424 } 1425 if gi.reviews == nil { 1426 gi.reviews = make(map[int64]*GitHubReview) 1427 } 1428 gre := gr.newGithubReview(rmut) 1429 gi.reviews[rmut.Id] = gre 1430 if gre.Created.After(gi.eventMaxTime) { 1431 gi.eventMaxTime = gre.Created 1432 } 1433 } 1434 if m.ReviewStatus != nil && m.ReviewStatus.ServerDate != nil { 1435 if serverDate, err := ptypes.Timestamp(m.ReviewStatus.ServerDate); err == nil { 1436 gi.reviewsSyncedAsOf = serverDate.UTC() 1437 } 1438 } 1439 } 1440 1441 // githubCache is an httpcache.Cache wrapper that only 1442 // stores responses for: 1443 // - https://api.github.com/repos/$OWNER/$REPO/issues?direction=desc&page=1&sort=updated 1444 // - https://api.github.com/repos/$OWNER/$REPO/milestones?page=1 1445 // - https://api.github.com/repos/$OWNER/$REPO/labels?page=1 1446 type githubCache struct { 1447 httpcache.Cache 1448 } 1449 1450 var rxGithubCacheURLs = regexp.MustCompile(`^https://api.github.com/repos/\w+/\w+/(issues|milestones|labels)\?(.+)`) 1451 1452 func cacheableURL(urlStr string) bool { 1453 m := rxGithubCacheURLs.FindStringSubmatch(urlStr) 1454 if m == nil { 1455 return false 1456 } 1457 v, _ := url.ParseQuery(m[2]) 1458 if v.Get("page") != "1" { 1459 return false 1460 } 1461 switch m[1] { 1462 case "issues": 1463 return v.Get("sort") == "updated" && v.Get("direction") == "desc" 1464 case "milestones", "labels": 1465 return true 1466 default: 1467 panic("unexpected cache key base " + m[1]) 1468 } 1469 } 1470 1471 func (c *githubCache) Set(urlKey string, res []byte) { 1472 // TODO: verify that the httpcache package guarantees that the 1473 // first string parameter to Set here is actually a 1474 // URL. Empirically they appear to be. 1475 if cacheableURL(urlKey) { 1476 c.Cache.Set(urlKey, res) 1477 } 1478 } 1479 1480 // sync checks for new changes on a single GitHub repository and 1481 // updates the Corpus with any changes. If loop is true, it runs 1482 // forever. 1483 func (gr *GitHubRepo) sync(ctx context.Context, token string, loop bool) error { 1484 ts := oauth2.StaticTokenSource(&oauth2.Token{AccessToken: token}) 1485 hc := oauth2.NewClient(ctx, ts) 1486 if tr, ok := hc.Transport.(*http.Transport); ok { 1487 defer tr.CloseIdleConnections() 1488 } 1489 directTransport := hc.Transport 1490 if gr.github.c.githubLimiter != nil { 1491 directTransport = limitTransport{gr.github.c.githubLimiter, hc.Transport} 1492 } 1493 cachingTransport := &httpcache.Transport{ 1494 Transport: directTransport, 1495 Cache: &githubCache{Cache: httpcache.NewMemoryCache()}, 1496 MarkCachedResponses: true, // adds "X-From-Cache: 1" response header. 1497 } 1498 1499 p := &githubRepoPoller{ 1500 c: gr.github.c, 1501 token: token, 1502 gr: gr, 1503 githubDirect: github.NewClient(&http.Client{Transport: directTransport}), 1504 githubCaching: github.NewClient(&http.Client{Transport: cachingTransport}), 1505 client: http.DefaultClient, 1506 } 1507 activityCh := gr.github.c.activityChan("github:" + gr.id.String()) 1508 var expectChanges bool // got webhook update, but haven't seen new data yet 1509 var sleepDelay time.Duration 1510 for { 1511 prevLastUpdate := p.lastUpdate 1512 err := p.sync(ctx, expectChanges) 1513 if err == context.Canceled || !loop { 1514 return err 1515 } 1516 sawChanges := !p.lastUpdate.Equal(prevLastUpdate) 1517 if sawChanges { 1518 expectChanges = false 1519 } 1520 // If we got woken up by a webhook, sometimes 1521 // immediately polling GitHub for the data results in 1522 // a cache hit saying nothing's changed. Don't believe 1523 // it. Polling quickly with exponential backoff until 1524 // we see what we're expecting. 1525 if expectChanges { 1526 if sleepDelay == 0 { 1527 sleepDelay = 1 * time.Second 1528 } else { 1529 sleepDelay *= 2 1530 if sleepDelay > 15*time.Minute { 1531 sleepDelay = 15 * time.Minute 1532 } 1533 } 1534 p.logf("expect changes; re-polling in %v", sleepDelay) 1535 } else { 1536 sleepDelay = 15 * time.Minute 1537 } 1538 p.logf("sync = %v; sleeping", err) 1539 timer := time.NewTimer(sleepDelay) 1540 select { 1541 case <-ctx.Done(): 1542 timer.Stop() 1543 return ctx.Err() 1544 case <-activityCh: 1545 timer.Stop() 1546 expectChanges = true 1547 sleepDelay = 0 1548 case <-timer.C: 1549 } 1550 } 1551 } 1552 1553 type httpClient interface { 1554 Do(req *http.Request) (*http.Response, error) 1555 } 1556 1557 // A githubRepoPoller updates the Corpus (gr.c) to have the latest 1558 // version of the GitHub repo rp, using the GitHub client ghc. 1559 type githubRepoPoller struct { 1560 c *Corpus // shortcut for gr.github.c 1561 gr *GitHubRepo 1562 token string 1563 lastUpdate time.Time // modified by sync 1564 githubCaching *github.Client 1565 githubDirect *github.Client // not caching 1566 client httpClient // the client used to poll github 1567 } 1568 1569 func (p *githubRepoPoller) Owner() string { return p.gr.id.Owner } 1570 func (p *githubRepoPoller) Repo() string { return p.gr.id.Repo } 1571 1572 func (p *githubRepoPoller) logf(format string, args ...interface{}) { 1573 log.Printf("sync github "+p.gr.id.String()+": "+format, args...) 1574 } 1575 1576 func (p *githubRepoPoller) sync(ctx context.Context, expectChanges bool) error { 1577 p.logf("Beginning sync.") 1578 if err := p.syncIssues(ctx, expectChanges); err != nil { 1579 return err 1580 } 1581 if err := p.syncComments(ctx); err != nil { 1582 return err 1583 } 1584 if err := p.syncEvents(ctx); err != nil { 1585 return err 1586 } 1587 if err := p.syncReviews(ctx); err != nil { 1588 return err 1589 } 1590 return nil 1591 } 1592 1593 func (p *githubRepoPoller) syncMilestones(ctx context.Context) error { 1594 var mut *maintpb.GithubMutation // lazy init 1595 var changes int 1596 err := p.foreachItem(ctx, 1, p.getMilestonePage, func(e interface{}) error { 1597 ms := e.(*github.Milestone) 1598 id := int64(ms.GetID()) 1599 p.c.mu.RLock() 1600 diff := p.gr.milestones[id].GenMutationDiff(ms) 1601 p.c.mu.RUnlock() 1602 if diff == nil { 1603 return nil 1604 } 1605 if mut == nil { 1606 mut = &maintpb.GithubMutation{ 1607 Owner: p.Owner(), 1608 Repo: p.Repo(), 1609 } 1610 } 1611 mut.Milestones = append(mut.Milestones, diff) 1612 changes++ 1613 return nil 1614 }) 1615 if err != nil { 1616 return err 1617 } 1618 p.logf("%d milestone changes.", changes) 1619 if changes == 0 { 1620 return nil 1621 } 1622 p.c.addMutation(&maintpb.Mutation{Github: mut}) 1623 return nil 1624 } 1625 1626 func (p *githubRepoPoller) syncLabels(ctx context.Context) error { 1627 var mut *maintpb.GithubMutation // lazy init 1628 var changes int 1629 err := p.foreachItem(ctx, 1, p.getLabelPage, func(e interface{}) error { 1630 lb := e.(*github.Label) 1631 id := int64(lb.GetID()) 1632 p.c.mu.RLock() 1633 diff := p.gr.labels[id].GenMutationDiff(lb) 1634 p.c.mu.RUnlock() 1635 if diff == nil { 1636 return nil 1637 } 1638 if mut == nil { 1639 mut = &maintpb.GithubMutation{ 1640 Owner: p.Owner(), 1641 Repo: p.Repo(), 1642 } 1643 } 1644 mut.Labels = append(mut.Labels, diff) 1645 changes++ 1646 return nil 1647 }) 1648 if err != nil { 1649 return err 1650 } 1651 p.logf("%d label changes.", changes) 1652 if changes == 0 { 1653 return nil 1654 } 1655 p.c.addMutation(&maintpb.Mutation{Github: mut}) 1656 return nil 1657 } 1658 1659 func (p *githubRepoPoller) getMilestonePage(ctx context.Context, page int) ([]interface{}, *github.Response, error) { 1660 ms, res, err := p.githubCaching.Issues.ListMilestones(ctx, p.Owner(), p.Repo(), &github.MilestoneListOptions{ 1661 State: "all", 1662 ListOptions: github.ListOptions{Page: page}, 1663 }) 1664 if err != nil { 1665 return nil, nil, err 1666 } 1667 its := make([]interface{}, len(ms)) 1668 for i, m := range ms { 1669 its[i] = m 1670 } 1671 return its, res, err 1672 } 1673 1674 func (p *githubRepoPoller) getLabelPage(ctx context.Context, page int) ([]interface{}, *github.Response, error) { 1675 ls, res, err := p.githubCaching.Issues.ListLabels(ctx, p.Owner(), p.Repo(), &github.ListOptions{ 1676 Page: page, 1677 }) 1678 if err != nil { 1679 return nil, nil, err 1680 } 1681 its := make([]interface{}, len(ls)) 1682 for i, lb := range ls { 1683 its[i] = lb 1684 } 1685 return its, res, err 1686 } 1687 1688 // foreachItem walks over all pages of items from getPage and calls fn for each item. 1689 // If the first page's response was cached, fn is never called. 1690 func (p *githubRepoPoller) foreachItem( 1691 ctx context.Context, 1692 page int, 1693 getPage func(ctx context.Context, page int) ([]interface{}, *github.Response, error), 1694 fn func(interface{}) error) error { 1695 for { 1696 select { 1697 case <-ctx.Done(): 1698 return ctx.Err() 1699 default: 1700 } 1701 items, res, err := getPage(ctx, page) 1702 if err != nil { 1703 if canRetry(ctx, err) { 1704 continue 1705 } 1706 return err 1707 } 1708 if len(items) == 0 { 1709 return nil 1710 } 1711 fromCache := page == 1 && res.Response.Header.Get(xFromCache) == "1" 1712 if fromCache { 1713 log.Printf("no new items of type %T", items[0]) 1714 // No need to walk over these again. 1715 return nil 1716 } 1717 // TODO: use res.Rate (sleep until Reset if Limit == 0) 1718 for _, it := range items { 1719 if err := fn(it); err != nil { 1720 return err 1721 } 1722 } 1723 if res.NextPage == 0 { 1724 return nil 1725 } 1726 page = res.NextPage 1727 } 1728 } 1729 1730 func (p *githubRepoPoller) syncIssues(ctx context.Context, expectChanges bool) error { 1731 page := 1 1732 seen := make(map[int64]bool) 1733 keepGoing := true 1734 owner, repo := p.gr.id.Owner, p.gr.id.Repo 1735 for keepGoing { 1736 ghc := p.githubCaching 1737 if expectChanges { 1738 ghc = p.githubDirect 1739 } 1740 issues, res, err := ghc.Issues.ListByRepo(ctx, owner, repo, &github.IssueListByRepoOptions{ 1741 State: "all", 1742 Sort: "updated", 1743 Direction: "desc", 1744 ListOptions: github.ListOptions{ 1745 Page: page, 1746 PerPage: 100, 1747 }, 1748 }) 1749 if err != nil { 1750 if canRetry(ctx, err) { 1751 continue 1752 } 1753 return err 1754 } 1755 // See https://developer.github.com/v3/activity/events/ for X-Poll-Interval: 1756 if pi := res.Response.Header.Get("X-Poll-Interval"); pi != "" { 1757 nsec, _ := strconv.Atoi(pi) 1758 d := time.Duration(nsec) * time.Second 1759 p.logf("Requested to adjust poll interval to %v", d) 1760 // TODO: return an error type up that the sync loop can use 1761 // to adjust its default interval. 1762 // For now, ignore. 1763 } 1764 fromCache := res.Response.Header.Get(xFromCache) == "1" 1765 if len(issues) == 0 { 1766 p.logf("issues: reached end.") 1767 break 1768 } 1769 1770 didMilestoneLabelSync := false 1771 changes := 0 1772 for _, is := range issues { 1773 id := int64(is.GetID()) 1774 if seen[id] { 1775 // If an issue gets updated (and bumped to the top) while we 1776 // are paging, it's possible the last issue from page N can 1777 // appear as the first issue on page N+1. Don't process that 1778 // issue twice. 1779 // https://github.com/google/go-github/issues/566 1780 continue 1781 } 1782 seen[id] = true 1783 1784 var mp *maintpb.Mutation 1785 p.c.mu.RLock() 1786 { 1787 gi := p.gr.issues[int32(*is.Number)] 1788 mp = p.gr.newMutationFromIssue(gi, is) 1789 } 1790 p.c.mu.RUnlock() 1791 1792 if mp == nil { 1793 continue 1794 } 1795 1796 // If there's something new (not a cached response), 1797 // then check for updated milestones and labels before 1798 // creating issue mutations below. Doesn't matter 1799 // much, but helps to have it all loaded. 1800 if !fromCache && !didMilestoneLabelSync { 1801 didMilestoneLabelSync = true 1802 group, ctx := errgroup.WithContext(ctx) 1803 group.Go(func() error { return p.syncMilestones(ctx) }) 1804 group.Go(func() error { return p.syncLabels(ctx) }) 1805 if err := group.Wait(); err != nil { 1806 return err 1807 } 1808 } 1809 1810 changes++ 1811 p.logf("changed issue %d: %s", is.GetNumber(), is.GetTitle()) 1812 p.c.addMutation(mp) 1813 p.lastUpdate = time.Now() 1814 } 1815 1816 if changes == 0 { 1817 missing := p.gr.missingIssues() 1818 if len(missing) == 0 { 1819 p.logf("no changed issues; cached=%v", fromCache) 1820 return nil 1821 } 1822 if len(missing) > 0 { 1823 p.logf("%d missing github issues.", len(missing)) 1824 } 1825 if len(missing) < 100 { 1826 keepGoing = false 1827 } 1828 } 1829 1830 p.c.mu.RLock() 1831 num := len(p.gr.issues) 1832 p.c.mu.RUnlock() 1833 p.logf("After page %d: %v issues, %v changes, %v issues in memory", page, len(issues), changes, num) 1834 1835 page++ 1836 } 1837 1838 missing := p.gr.missingIssues() 1839 if len(missing) > 0 { 1840 p.logf("remaining issues: %v", missing) 1841 for _, num := range missing { 1842 p.logf("getting issue %v ...", num) 1843 var issue *github.Issue 1844 var err error 1845 for { 1846 issue, _, err = p.githubDirect.Issues.Get(ctx, owner, repo, int(num)) 1847 if canRetry(ctx, err) { 1848 continue 1849 } 1850 break 1851 } 1852 if ge, ok := err.(*github.ErrorResponse); ok && (ge.Response.StatusCode == http.StatusNotFound || ge.Response.StatusCode == http.StatusGone) { 1853 mut := &maintpb.Mutation{ 1854 GithubIssue: &maintpb.GithubIssueMutation{ 1855 Owner: owner, 1856 Repo: repo, 1857 Number: num, 1858 NotExist: true, 1859 }, 1860 } 1861 p.logf("issue %d is gone, marking as NotExist", num) 1862 p.c.addMutation(mut) 1863 continue 1864 } else if err != nil { 1865 return err 1866 } 1867 mp := p.gr.newMutationFromIssue(nil, issue) 1868 if mp == nil { 1869 continue 1870 } 1871 p.logf("modified issue %d: %s", issue.GetNumber(), issue.GetTitle()) 1872 p.c.addMutation(mp) 1873 p.lastUpdate = time.Now() 1874 } 1875 } 1876 1877 return nil 1878 } 1879 1880 func (p *githubRepoPoller) issueNumbersWithStaleCommentSync() (issueNums []int32) { 1881 p.c.mu.RLock() 1882 defer p.c.mu.RUnlock() 1883 1884 for n, gi := range p.gr.issues { 1885 if !gi.commentsSynced() { 1886 issueNums = append(issueNums, n) 1887 } 1888 } 1889 sort.Slice(issueNums, func(i, j int) bool { 1890 return issueNums[i] < issueNums[j] 1891 }) 1892 return issueNums 1893 } 1894 1895 func (p *githubRepoPoller) syncComments(ctx context.Context) error { 1896 for { 1897 nums := p.issueNumbersWithStaleCommentSync() 1898 if len(nums) == 0 { 1899 return nil 1900 } 1901 remain := len(nums) 1902 for _, num := range nums { 1903 p.logf("comment sync: %d issues remaining; syncing issue %v", remain, num) 1904 if err := p.syncCommentsOnIssue(ctx, num); err != nil { 1905 p.logf("comment sync on issue %d: %v", num, err) 1906 return err 1907 } 1908 remain-- 1909 } 1910 } 1911 } 1912 1913 func (p *githubRepoPoller) syncCommentsOnIssue(ctx context.Context, issueNum int32) error { 1914 p.c.mu.RLock() 1915 issue := p.gr.issues[issueNum] 1916 if issue == nil { 1917 p.c.mu.RUnlock() 1918 return fmt.Errorf("unknown issue number %v", issueNum) 1919 } 1920 since := issue.commentsUpdatedTil 1921 p.c.mu.RUnlock() 1922 1923 owner, repo := p.gr.id.Owner, p.gr.id.Repo 1924 morePages := true // at least try the first. might be empty. 1925 for morePages { 1926 ics, res, err := p.githubDirect.Issues.ListComments(ctx, owner, repo, int(issueNum), &github.IssueListCommentsOptions{ 1927 Since: since, 1928 Direction: "asc", 1929 Sort: "updated", 1930 ListOptions: github.ListOptions{PerPage: 100}, 1931 }) 1932 if canRetry(ctx, err) { 1933 continue 1934 } else if ge, ok := err.(*github.ErrorResponse); ok && (ge.Response.StatusCode == http.StatusNotFound || ge.Response.StatusCode == http.StatusGone) { 1935 mut := &maintpb.Mutation{ 1936 GithubIssue: &maintpb.GithubIssueMutation{ 1937 Owner: owner, 1938 Repo: repo, 1939 Number: issueNum, 1940 NotExist: true, 1941 }, 1942 } 1943 p.logf("issue %d comments are gone, marking as NotExist", issueNum) 1944 p.c.addMutation(mut) 1945 return nil 1946 } else if err != nil { 1947 return err 1948 } 1949 serverDate, err := http.ParseTime(res.Header.Get("Date")) 1950 if err != nil { 1951 return fmt.Errorf("invalid server Date response: %v", err) 1952 } 1953 serverDate = serverDate.UTC() 1954 p.logf("Number of comments on issue %d since %v: %v", issueNum, since, len(ics)) 1955 1956 mut := &maintpb.Mutation{ 1957 GithubIssue: &maintpb.GithubIssueMutation{ 1958 Owner: owner, 1959 Repo: repo, 1960 Number: issueNum, 1961 }, 1962 } 1963 1964 p.c.mu.RLock() 1965 for _, ic := range ics { 1966 if ic.ID == nil || ic.Body == nil || ic.User == nil || ic.CreatedAt == nil || ic.UpdatedAt == nil { 1967 // Bogus. 1968 p.logf("bogus comment: %v", ic) 1969 continue 1970 } 1971 created, err := ptypes.TimestampProto(*ic.CreatedAt) 1972 if err != nil { 1973 continue 1974 } 1975 updated, err := ptypes.TimestampProto(*ic.UpdatedAt) 1976 if err != nil { 1977 continue 1978 } 1979 since = *ic.UpdatedAt // for next round 1980 1981 id := int64(*ic.ID) 1982 cur := issue.comments[id] 1983 1984 // TODO: does a reaction update a comment's UpdatedAt time? 1985 var cmut *maintpb.GithubIssueCommentMutation 1986 if cur == nil { 1987 cmut = &maintpb.GithubIssueCommentMutation{ 1988 Id: id, 1989 User: &maintpb.GithubUser{ 1990 Id: int64(*ic.User.ID), 1991 Login: *ic.User.Login, 1992 }, 1993 Body: *ic.Body, 1994 Created: created, 1995 Updated: updated, 1996 } 1997 } else if !cur.Updated.Equal(*ic.UpdatedAt) || cur.Body != *ic.Body { 1998 cmut = &maintpb.GithubIssueCommentMutation{ 1999 Id: id, 2000 } 2001 if !cur.Updated.Equal(*ic.UpdatedAt) { 2002 cmut.Updated = updated 2003 } 2004 if cur.Body != *ic.Body { 2005 cmut.Body = *ic.Body 2006 } 2007 } 2008 if cmut != nil { 2009 mut.GithubIssue.Comment = append(mut.GithubIssue.Comment, cmut) 2010 } 2011 } 2012 p.c.mu.RUnlock() 2013 2014 if res.NextPage == 0 { 2015 sdp, _ := ptypes.TimestampProto(serverDate) 2016 mut.GithubIssue.CommentStatus = &maintpb.GithubIssueSyncStatus{ServerDate: sdp} 2017 morePages = false 2018 } 2019 2020 p.c.addMutation(mut) 2021 } 2022 return nil 2023 } 2024 2025 func (p *githubRepoPoller) issueNumbersWithStaleEventSync() (issueNums []int32) { 2026 p.c.mu.RLock() 2027 defer p.c.mu.RUnlock() 2028 2029 for n, gi := range p.gr.issues { 2030 if !gi.eventsSynced() { 2031 issueNums = append(issueNums, n) 2032 } 2033 } 2034 sort.Slice(issueNums, func(i, j int) bool { 2035 return issueNums[i] < issueNums[j] 2036 }) 2037 return issueNums 2038 } 2039 2040 func (p *githubRepoPoller) syncEvents(ctx context.Context) error { 2041 for { 2042 nums := p.issueNumbersWithStaleEventSync() 2043 if len(nums) == 0 { 2044 return nil 2045 } 2046 remain := len(nums) 2047 for _, num := range nums { 2048 p.logf("event sync: %d issues remaining; syncing issue %v", remain, num) 2049 if err := p.syncEventsOnIssue(ctx, num); err != nil { 2050 p.logf("event sync on issue %d: %v", num, err) 2051 return err 2052 } 2053 remain-- 2054 } 2055 } 2056 } 2057 2058 func (p *githubRepoPoller) syncEventsOnIssue(ctx context.Context, issueNum int32) error { 2059 const perPage = 100 2060 p.c.mu.RLock() 2061 gi := p.gr.issues[issueNum] 2062 if gi == nil { 2063 panic(fmt.Sprintf("bogus issue %v", issueNum)) 2064 } 2065 have := len(gi.events) 2066 p.c.mu.RUnlock() 2067 2068 skipPages := have / perPage 2069 2070 mut := &maintpb.Mutation{ 2071 GithubIssue: &maintpb.GithubIssueMutation{ 2072 Owner: p.Owner(), 2073 Repo: p.Repo(), 2074 Number: issueNum, 2075 }, 2076 } 2077 2078 err := p.foreachItem(ctx, 2079 1+skipPages, 2080 func(ctx context.Context, page int) ([]interface{}, *github.Response, error) { 2081 u := fmt.Sprintf("https://api.github.com/repos/%s/%s/issues/%v/events?per_page=%v&page=%v", 2082 p.Owner(), p.Repo(), issueNum, perPage, page) 2083 req, _ := http.NewRequest("GET", u, nil) 2084 2085 req.Header.Set("Authorization", "Bearer "+p.token) 2086 req.Header.Set("User-Agent", "golang-x-build-maintner/1.0") 2087 ctx, cancel := context.WithTimeout(ctx, time.Minute) 2088 defer cancel() 2089 req = req.WithContext(ctx) 2090 res, err := p.client.Do(req) 2091 if err != nil { 2092 log.Printf("Fetching %s: %v", u, err) 2093 return nil, nil, err 2094 } 2095 log.Printf("Fetching %s: %v", u, res.Status) 2096 ghResp := makeGithubResponse(res) 2097 if err := github.CheckResponse(res); err != nil { 2098 log.Printf("Fetching %s: %v: %+v", u, res.Status, res.Header) 2099 log.Printf("GitHub error %s: %v", u, ghResp) 2100 return nil, nil, err 2101 } 2102 2103 evts, err := parseGithubEvents(res.Body) 2104 if err != nil { 2105 return nil, nil, fmt.Errorf("%s: parse github events: %v", u, err) 2106 } 2107 is := make([]interface{}, len(evts)) 2108 for i, v := range evts { 2109 is[i] = v 2110 } 2111 serverDate, err := http.ParseTime(res.Header.Get("Date")) 2112 if err != nil { 2113 return nil, nil, fmt.Errorf("invalid server Date response: %v", err) 2114 } 2115 sdp, _ := ptypes.TimestampProto(serverDate.UTC()) 2116 mut.GithubIssue.EventStatus = &maintpb.GithubIssueSyncStatus{ServerDate: sdp} 2117 2118 return is, ghResp, err 2119 }, 2120 func(v interface{}) error { 2121 ge := v.(*GitHubIssueEvent) 2122 p.c.mu.RLock() 2123 _, ok := gi.events[ge.ID] 2124 p.c.mu.RUnlock() 2125 if ok { 2126 // Already have it. And they're 2127 // assumed to be immutable, so the 2128 // copy we already have should be 2129 // good. Don't add to mutation log. 2130 return nil 2131 } 2132 mut.GithubIssue.Event = append(mut.GithubIssue.Event, ge.Proto()) 2133 return nil 2134 }) 2135 if err != nil { 2136 return err 2137 } 2138 p.c.addMutation(mut) 2139 return nil 2140 } 2141 2142 // parseGithubEvents parses the JSON array of GitHub issue events in r. 2143 // It does this the very manual way (using map[string]interface{}) 2144 // instead of using nice types because https://golang.org/issue/15314 2145 // isn't implemented yet and also because even if it were implemented, 2146 // this code still wants to preserve any unknown fields to store in 2147 // the "OtherJSON" field for future updates of the code to parse. (If 2148 // GitHub adds new Event types in the future, we want to archive them, 2149 // even if we don't understand them) 2150 func parseGithubEvents(r io.Reader) ([]*GitHubIssueEvent, error) { 2151 var jevents []map[string]interface{} 2152 jd := json.NewDecoder(r) 2153 jd.UseNumber() 2154 if err := jd.Decode(&jevents); err != nil { 2155 return nil, err 2156 } 2157 var evts []*GitHubIssueEvent 2158 for _, em := range jevents { 2159 for k, v := range em { 2160 if v == nil { 2161 delete(em, k) 2162 } 2163 } 2164 delete(em, "url") 2165 2166 e := &GitHubIssueEvent{} 2167 2168 e.Type, _ = em["event"].(string) 2169 delete(em, "event") 2170 2171 e.ID = jint64(em["id"]) 2172 delete(em, "id") 2173 2174 // TODO: store these two more compactly: 2175 e.CommitID, _ = em["commit_id"].(string) // "5383ecf5a0824649ffcc0349f00f0317575753d0" 2176 delete(em, "commit_id") 2177 e.CommitURL, _ = em["commit_url"].(string) // "https://api.github.com/repos/bradfitz/go-issue-mirror/commits/5383ecf5a0824649ffcc0349f00f0317575753d0" 2178 delete(em, "commit_url") 2179 2180 getUser := func(field string, gup **GitHubUser) { 2181 am, ok := em[field].(map[string]interface{}) 2182 if !ok { 2183 return 2184 } 2185 delete(em, field) 2186 gu := &GitHubUser{ID: jint64(am["id"])} 2187 gu.Login, _ = am["login"].(string) 2188 *gup = gu 2189 } 2190 2191 getUser("actor", &e.Actor) 2192 getUser("assignee", &e.Assignee) 2193 getUser("assigner", &e.Assigner) 2194 getUser("requested_reviewer", &e.Reviewer) 2195 getUser("review_requester", &e.ReviewRequester) 2196 2197 if lm, ok := em["label"].(map[string]interface{}); ok { 2198 delete(em, "label") 2199 e.Label, _ = lm["name"].(string) 2200 } 2201 2202 if mm, ok := em["milestone"].(map[string]interface{}); ok { 2203 delete(em, "milestone") 2204 e.Milestone, _ = mm["title"].(string) 2205 } 2206 2207 if rm, ok := em["rename"].(map[string]interface{}); ok { 2208 delete(em, "rename") 2209 e.From, _ = rm["from"].(string) 2210 e.To, _ = rm["to"].(string) 2211 } 2212 2213 if createdStr, ok := em["created_at"].(string); ok { 2214 delete(em, "created_at") 2215 var err error 2216 e.Created, err = time.Parse(time.RFC3339, createdStr) 2217 if err != nil { 2218 return nil, err 2219 } 2220 e.Created = e.Created.UTC() 2221 } 2222 if dr, ok := em["dismissed_review"]; ok { 2223 delete(em, "dismissed_review") 2224 drm := dr.(map[string]interface{}) 2225 dro := &GitHubDismissedReviewEvent{} 2226 dro.ReviewID = jint64(drm["review_id"]) 2227 if state, ok := drm["state"].(string); ok { 2228 dro.State = state 2229 } else { 2230 log.Printf("got type %T for 'state' field, expected string in %+v", drm["state"], drm) 2231 } 2232 dro.DismissalMessage, _ = drm["dismissal_message"].(string) 2233 e.DismissedReview = dro 2234 } 2235 if rt, ok := em["requested_team"]; ok { 2236 delete(em, "requested_team") 2237 rtm, ok := rt.(map[string]interface{}) 2238 if !ok { 2239 log.Printf("got value %+v for 'requested_team' field, wanted a map with 'id' and 'slug' fields", rt) 2240 } else { 2241 t := &GitHubTeam{} 2242 t.ID = jint64(rtm["id"]) 2243 t.Slug, _ = rtm["slug"].(string) 2244 e.TeamReviewer = t 2245 } 2246 } 2247 delete(em, "node_id") // GitHub API v4 Global Node ID; don't store it. 2248 delete(em, "lock_reason") // Not stored. 2249 2250 otherJSON, _ := json.Marshal(em) 2251 e.OtherJSON = string(otherJSON) 2252 if e.OtherJSON == "{}" { 2253 e.OtherJSON = "" 2254 } 2255 if e.OtherJSON != "" { 2256 log.Printf("warning: storing unknown field(s) in GitHub issue event: %s", e.OtherJSON) 2257 } 2258 evts = append(evts, e) 2259 } 2260 return evts, nil 2261 } 2262 2263 func (p *githubRepoPoller) issueNumbersWithStaleReviewsSync() (issueNums []int32) { 2264 p.c.mu.RLock() 2265 defer p.c.mu.RUnlock() 2266 2267 for n, gi := range p.gr.issues { 2268 if gi.PullRequest && !gi.reviewsSynced() { 2269 issueNums = append(issueNums, n) 2270 } 2271 } 2272 sort.Slice(issueNums, func(i, j int) bool { 2273 return issueNums[i] < issueNums[j] 2274 }) 2275 return issueNums 2276 } 2277 2278 func (p *githubRepoPoller) syncReviews(ctx context.Context) error { 2279 for { 2280 nums := p.issueNumbersWithStaleReviewsSync() 2281 if len(nums) == 0 { 2282 return nil 2283 } 2284 remain := len(nums) 2285 for _, num := range nums { 2286 p.logf("reviews sync: %d issues remaining; syncing issue %v", remain, num) 2287 if err := p.syncReviewsOnPullRequest(ctx, num); err != nil { 2288 p.logf("review sync on issue %d: %v", num, err) 2289 return err 2290 } 2291 remain-- 2292 } 2293 } 2294 } 2295 2296 func (p *githubRepoPoller) syncReviewsOnPullRequest(ctx context.Context, issueNum int32) error { 2297 const perPage = 100 2298 p.c.mu.RLock() 2299 gi := p.gr.issues[issueNum] 2300 if gi == nil { 2301 p.c.mu.RUnlock() 2302 panic(fmt.Sprintf("bogus issue %v", issueNum)) 2303 } 2304 2305 if !gi.PullRequest { 2306 p.c.mu.RUnlock() 2307 return nil 2308 } 2309 2310 have := len(gi.reviews) 2311 p.c.mu.RUnlock() 2312 2313 skipPages := have / perPage 2314 2315 mut := &maintpb.Mutation{ 2316 GithubIssue: &maintpb.GithubIssueMutation{ 2317 Owner: p.Owner(), 2318 Repo: p.Repo(), 2319 Number: issueNum, 2320 }, 2321 } 2322 2323 err := p.foreachItem(ctx, 2324 1+skipPages, 2325 func(ctx context.Context, page int) ([]interface{}, *github.Response, error) { 2326 u := fmt.Sprintf("https://api.github.com/repos/%s/%s/pulls/%v/reviews?per_page=%v&page=%v", 2327 p.Owner(), p.Repo(), issueNum, perPage, page) 2328 req, _ := http.NewRequest("GET", u, nil) 2329 2330 req.Header.Set("Authorization", "Bearer "+p.token) 2331 req.Header.Set("User-Agent", "golang-x-build-maintner/1.0") 2332 ctx, cancel := context.WithTimeout(ctx, time.Minute) 2333 defer cancel() 2334 req = req.WithContext(ctx) 2335 res, err := http.DefaultClient.Do(req) 2336 if err != nil { 2337 log.Printf("Fetching %s: %v", u, err) 2338 return nil, nil, err 2339 } 2340 log.Printf("Fetching %s: %v", u, res.Status) 2341 ghResp := makeGithubResponse(res) 2342 if err := github.CheckResponse(res); err != nil { 2343 log.Printf("Fetching %s: %v: %+v", u, res.Status, res.Header) 2344 log.Printf("GitHub error %s: %v", u, ghResp) 2345 return nil, nil, err 2346 } 2347 evts, err := parseGithubReviews(res.Body) 2348 if err != nil { 2349 return nil, nil, fmt.Errorf("%s: parse github pr reviews: %v", u, err) 2350 } 2351 is := make([]interface{}, len(evts)) 2352 for i, v := range evts { 2353 is[i] = v 2354 } 2355 serverDate, err := http.ParseTime(res.Header.Get("Date")) 2356 if err != nil { 2357 return nil, nil, fmt.Errorf("invalid server Date response: %v", err) 2358 } 2359 sdp, _ := ptypes.TimestampProto(serverDate.UTC()) 2360 mut.GithubIssue.ReviewStatus = &maintpb.GithubIssueSyncStatus{ServerDate: sdp} 2361 2362 return is, ghResp, err 2363 }, 2364 func(v interface{}) error { 2365 ge := v.(*GitHubReview) 2366 p.c.mu.RLock() 2367 _, ok := gi.reviews[ge.ID] 2368 p.c.mu.RUnlock() 2369 if ok { 2370 // Already have it. And they're 2371 // assumed to be immutable, so the 2372 // copy we already have should be 2373 // good. Don't add to mutation log. 2374 return nil 2375 } 2376 mut.GithubIssue.Review = append(mut.GithubIssue.Review, ge.Proto()) 2377 return nil 2378 }) 2379 if err != nil { 2380 return err 2381 } 2382 p.c.addMutation(mut) 2383 return nil 2384 } 2385 2386 // parseGithubReviews parses the JSON array of GitHub reviews in r. 2387 // It does this the very manual way (using map[string]interface{}) 2388 // instead of using nice types because https://golang.org/issue/15314 2389 // isn't implemented yet and also because even if it were implemented, 2390 // this code still wants to preserve any unknown fields to store in 2391 // the "OtherJSON" field for future updates of the code to parse. (If 2392 // GitHub adds new Event types in the future, we want to archive them, 2393 // even if we don't understand them) 2394 func parseGithubReviews(r io.Reader) ([]*GitHubReview, error) { 2395 var jevents []map[string]interface{} 2396 jd := json.NewDecoder(r) 2397 jd.UseNumber() 2398 if err := jd.Decode(&jevents); err != nil { 2399 return nil, err 2400 } 2401 var evts []*GitHubReview 2402 for _, em := range jevents { 2403 for k, v := range em { 2404 if v == nil { 2405 delete(em, k) 2406 } 2407 } 2408 2409 e := &GitHubReview{} 2410 2411 e.ID = jint64(em["id"]) 2412 delete(em, "id") 2413 2414 e.Body, _ = em["body"].(string) 2415 delete(em, "body") 2416 2417 e.State, _ = em["state"].(string) 2418 delete(em, "state") 2419 2420 // TODO: store these two more compactly: 2421 e.CommitID, _ = em["commit_id"].(string) // "5383ecf5a0824649ffcc0349f00f0317575753d0" 2422 delete(em, "commit_id") 2423 2424 getUser := func(field string, gup **GitHubUser) { 2425 am, ok := em[field].(map[string]interface{}) 2426 if !ok { 2427 return 2428 } 2429 delete(em, field) 2430 gu := &GitHubUser{ID: jint64(am["id"])} 2431 gu.Login, _ = am["login"].(string) 2432 *gup = gu 2433 } 2434 2435 getUser("user", &e.Actor) 2436 2437 e.ActorAssociation, _ = em["author_association"].(string) 2438 delete(em, "author_association") 2439 2440 if createdStr, ok := em["submitted_at"].(string); ok { 2441 delete(em, "submitted_at") 2442 var err error 2443 e.Created, err = time.Parse(time.RFC3339, createdStr) 2444 if err != nil { 2445 return nil, err 2446 } 2447 e.Created = e.Created.UTC() 2448 } 2449 2450 delete(em, "node_id") // GitHub API v4 Global Node ID; don't store it. 2451 delete(em, "html_url") // not needed. 2452 delete(em, "pull_request_url") // not needed. 2453 delete(em, "_links") // not needed. (duplicate data of above two nodes) 2454 2455 otherJSON, _ := json.Marshal(em) 2456 e.OtherJSON = string(otherJSON) 2457 if e.OtherJSON == "{}" { 2458 e.OtherJSON = "" 2459 } 2460 if e.OtherJSON != "" { 2461 log.Printf("warning: storing unknown field(s) in GitHub review: %s", e.OtherJSON) 2462 } 2463 evts = append(evts, e) 2464 } 2465 return evts, nil 2466 } 2467 2468 // jint64 return an int64 from the provided JSON object value v. 2469 func jint64(v interface{}) int64 { 2470 switch v := v.(type) { 2471 case nil: 2472 return 0 2473 case json.Number: 2474 n, _ := strconv.ParseInt(string(v), 10, 64) 2475 return n 2476 default: 2477 panic(fmt.Sprintf("unexpected type %T", v)) 2478 } 2479 } 2480 2481 // copy of go-github's parseRate, basically. 2482 func parseRate(r *http.Response) github.Rate { 2483 var rate github.Rate 2484 // Note: even though the header names below are not canonical (the 2485 // canonical form would be X-Ratelimit-Limit), this particular 2486 // casing is what GitHub returns. See headerRateRemaining in 2487 // package go-github. 2488 if limit := r.Header.Get("X-RateLimit-Limit"); limit != "" { 2489 rate.Limit, _ = strconv.Atoi(limit) 2490 } 2491 if remaining := r.Header.Get("X-RateLimit-Remaining"); remaining != "" { 2492 rate.Remaining, _ = strconv.Atoi(remaining) 2493 } 2494 if reset := r.Header.Get("X-RateLimit-Reset"); reset != "" { 2495 if v, _ := strconv.ParseInt(reset, 10, 64); v != 0 { 2496 rate.Reset = github.Timestamp{time.Unix(v, 0)} 2497 } 2498 } 2499 return rate 2500 } 2501 2502 // Copy of go-github's func newResponse, basically. 2503 func makeGithubResponse(res *http.Response) *github.Response { 2504 gr := &github.Response{Response: res} 2505 gr.Rate = parseRate(res) 2506 for _, lv := range res.Header["Link"] { 2507 for _, link := range strings.Split(lv, ",") { 2508 segs := strings.Split(strings.TrimSpace(link), ";") 2509 if len(segs) < 2 { 2510 continue 2511 } 2512 // ensure href is properly formatted 2513 if !strings.HasPrefix(segs[0], "<") || !strings.HasSuffix(segs[0], ">") { 2514 continue 2515 } 2516 2517 // try to pull out page parameter 2518 u, err := url.Parse(segs[0][1 : len(segs[0])-1]) 2519 if err != nil { 2520 continue 2521 } 2522 page := u.Query().Get("page") 2523 if page == "" { 2524 continue 2525 } 2526 2527 for _, seg := range segs[1:] { 2528 switch strings.TrimSpace(seg) { 2529 case `rel="next"`: 2530 gr.NextPage, _ = strconv.Atoi(page) 2531 case `rel="prev"`: 2532 gr.PrevPage, _ = strconv.Atoi(page) 2533 case `rel="first"`: 2534 gr.FirstPage, _ = strconv.Atoi(page) 2535 case `rel="last"`: 2536 gr.LastPage, _ = strconv.Atoi(page) 2537 } 2538 } 2539 } 2540 } 2541 return gr 2542 } 2543 2544 var rxReferences = regexp.MustCompile(`(?:\b([\w\-]+)/([\w\-]+))?\#(\d+)\b`) 2545 2546 // parseGithubRefs parses references to GitHub issues from commit message commitMsg. 2547 // Multiple references to the same issue are deduplicated. 2548 func (c *Corpus) parseGithubRefs(gerritProj string, commitMsg string) []GitHubIssueRef { 2549 // Use of rxReferences by itself caused this function to take 20% of the CPU time. 2550 // TODO(bradfitz): stop using regexps here. 2551 // But in the meantime, help the regexp engine with this one weird trick: 2552 // Reduce the length of the string given to FindAllStringSubmatch. 2553 // Discard all lines before the first line containing a '#'. 2554 // The "Fixes #nnnn" is usually at the end, so this discards most of the input. 2555 // Now CPU is only 2% instead of 20%. 2556 hash := strings.IndexByte(commitMsg, '#') 2557 if hash == -1 { 2558 return nil 2559 } 2560 nl := strings.LastIndexByte(commitMsg[:hash], '\n') 2561 commitMsg = commitMsg[nl+1:] 2562 2563 // TODO: use FindAllStringSubmatchIndex instead, so we can 2564 // back up and see what's behind it and ignore "#1", "#2", 2565 // "#3" 'references' which are actually bullets or ARM 2566 // disassembly, and only respect them as real if they have the 2567 // word "Fixes " or "Issue " or similar before them. 2568 ms := rxReferences.FindAllStringSubmatch(commitMsg, -1) 2569 if len(ms) == 0 { 2570 return nil 2571 } 2572 /* e.g. 2573 2017/03/30 21:42:07 matches: [["golang/go#9327" "golang" "go" "9327"]] 2574 2017/03/30 21:42:07 matches: [["golang/go#16512" "golang" "go" "16512"] ["golang/go#18404" "golang" "go" "18404"]] 2575 2017/03/30 21:42:07 matches: [["#1" "" "" "1"]] 2576 2017/03/30 21:42:07 matches: [["#10234" "" "" "10234"]] 2577 2017/03/30 21:42:31 matches: [["GoogleCloudPlatform/gcloud-golang#262" "GoogleCloudPlatform" "gcloud-golang" "262"]] 2578 2017/03/30 21:42:31 matches: [["GoogleCloudPlatform/google-cloud-go#481" "GoogleCloudPlatform" "google-cloud-go" "481"]] 2579 */ 2580 c.initGithub() 2581 github := c.GitHub() 2582 refs := make([]GitHubIssueRef, 0, len(ms)) 2583 for _, m := range ms { 2584 owner, repo, numStr := strings.ToLower(m[1]), strings.ToLower(m[2]), m[3] 2585 num, err := strconv.ParseInt(numStr, 10, 32) 2586 if err != nil { 2587 continue 2588 } 2589 if owner == "" { 2590 if gerritProj == "go.googlesource.com/go" { 2591 owner, repo = "golang", "go" 2592 } else { 2593 continue 2594 } 2595 } 2596 ref := GitHubIssueRef{github.getOrCreateRepo(owner, repo), int32(num)} 2597 if contains(refs, ref) { 2598 continue 2599 } 2600 refs = append(refs, ref) 2601 } 2602 return refs 2603 } 2604 2605 // contains reports whether refs contains the reference ref. 2606 func contains(refs []GitHubIssueRef, ref GitHubIssueRef) bool { 2607 for _, r := range refs { 2608 if r == ref { 2609 return true 2610 } 2611 } 2612 return false 2613 } 2614 2615 type limitTransport struct { 2616 limiter *rate.Limiter 2617 base http.RoundTripper 2618 } 2619 2620 func (t limitTransport) RoundTrip(r *http.Request) (*http.Response, error) { 2621 limiter := t.limiter 2622 // NOTE(cbro): limiter should not be nil, but check defensively. 2623 if limiter != nil { 2624 if err := limiter.Wait(r.Context()); err != nil { 2625 return nil, err 2626 } 2627 } 2628 return t.base.RoundTrip(r) 2629 } 2630 2631 // canRetry reports whether ctx hasn't been canceled and err is a non-nil retryable error. 2632 // If so, it blocks until enough time passes so that it's acceptable to retry immediately. 2633 func canRetry(ctx context.Context, err error) bool { 2634 switch e := err.(type) { 2635 case *github.RateLimitError: 2636 log.Printf("GitHub rate limit error: %s, waiting until %s", e.Message, e.Rate.Reset.Time) 2637 ctx, cancel := context.WithDeadline(ctx, e.Rate.Reset.Time) 2638 defer cancel() 2639 <-ctx.Done() 2640 return ctx.Err() != context.Canceled 2641 case *github.AbuseRateLimitError: 2642 if e.RetryAfter != nil { 2643 log.Printf("GitHub rate abuse error: %s, waiting for %s", e.Message, *e.RetryAfter) 2644 ctx, cancel := context.WithTimeout(ctx, *e.RetryAfter) 2645 defer cancel() 2646 <-ctx.Done() 2647 return ctx.Err() != context.Canceled 2648 } 2649 log.Printf("GitHub rate abuse error: %s", e.Message) 2650 } 2651 return false 2652 }