golang.org/x/build@v0.0.0-20240506185731-218518f32b70/maintner/gerrit.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Logic to interact with a Gerrit server. Gerrit has an entire Git-based 6 // protocol for fetching metadata about CL's, reviewers, patch comments, which 7 // is used here - we don't use the x/build/gerrit client, which hits the API. 8 // TODO: write about Gerrit's Git API. 9 10 package maintner 11 12 import ( 13 "bufio" 14 "bytes" 15 "context" 16 "errors" 17 "fmt" 18 "log" 19 "net/url" 20 "os" 21 "os/exec" 22 "path/filepath" 23 "regexp" 24 "sort" 25 "strconv" 26 "strings" 27 "time" 28 29 "golang.org/x/build/internal/envutil" 30 "golang.org/x/build/maintner/maintpb" 31 ) 32 33 // Gerrit holds information about a number of Gerrit projects. 34 type Gerrit struct { 35 c *Corpus 36 projects map[string]*GerritProject // keyed by "go.googlesource.com/build" 37 38 clsReferencingGithubIssue map[GitHubIssueRef][]*GerritCL 39 } 40 41 func normalizeGerritServer(server string) string { 42 u, err := url.Parse(server) 43 if err == nil && u.Host != "" { 44 server = u.Host 45 } 46 if strings.HasSuffix(server, "-review.googlesource.com") { 47 // special case: the review site is hosted at a different URL than the 48 // Git checkout URL. 49 return strings.Replace(server, "-review.googlesource.com", ".googlesource.com", 1) 50 } 51 return server 52 } 53 54 // Project returns the specified Gerrit project if it's known, otherwise 55 // it returns nil. Server is the Gerrit server's hostname, such as 56 // "go.googlesource.com". 57 func (g *Gerrit) Project(server, project string) *GerritProject { 58 server = normalizeGerritServer(server) 59 return g.projects[server+"/"+project] 60 } 61 62 // c.mu must be held 63 func (g *Gerrit) getOrCreateProject(gerritProj string) *GerritProject { 64 proj, ok := g.projects[gerritProj] 65 if ok { 66 return proj 67 } 68 proj = &GerritProject{ 69 gerrit: g, 70 proj: gerritProj, 71 cls: map[int32]*GerritCL{}, 72 remote: map[gerritCLVersion]GitHash{}, 73 ref: map[string]GitHash{}, 74 commit: map[GitHash]*GitCommit{}, 75 need: map[GitHash]bool{}, 76 } 77 g.projects[gerritProj] = proj 78 return proj 79 } 80 81 // ForeachProjectUnsorted calls fn for each known Gerrit project. 82 // Iteration ends if fn returns a non-nil value. 83 func (g *Gerrit) ForeachProjectUnsorted(fn func(*GerritProject) error) error { 84 for _, p := range g.projects { 85 if err := fn(p); err != nil { 86 return err 87 } 88 } 89 return nil 90 } 91 92 // GerritProject represents a single Gerrit project. 93 type GerritProject struct { 94 gerrit *Gerrit 95 proj string // "go.googlesource.com/net" 96 cls map[int32]*GerritCL 97 remote map[gerritCLVersion]GitHash 98 need map[GitHash]bool 99 commit map[GitHash]*GitCommit 100 numLabelChanges int // incremented (too many times) by meta commits with "Label:" updates 101 dirtyCL map[*GerritCL]struct{} 102 103 // ref are the non-change refs with keys like "HEAD", 104 // "refs/heads/master", "refs/tags/v0.8.0", etc. 105 // 106 // Notably, this excludes the "refs/changes/*" refs matched by 107 // rxChangeRef. Those are in the remote map. 108 ref map[string]GitHash 109 } 110 111 // Ref returns a non-change ref, such as "HEAD", "refs/heads/master", 112 // or "refs/tags/v0.8.0", 113 // Change refs of the form "refs/changes/*" are not supported. 114 // The returned hash is the zero value (an empty string) if the ref 115 // does not exist. 116 func (gp *GerritProject) Ref(ref string) GitHash { 117 return gp.ref[ref] 118 } 119 120 func (gp *GerritProject) gitDir() string { 121 return filepath.Join(gp.gerrit.c.getDataDir(), url.PathEscape(gp.proj)) 122 } 123 124 // NumLabelChanges is an inaccurate count the number of times vote labels have 125 // changed in this project. This number is monotonically increasing. 126 // This is not guaranteed to be accurate; it definitely overcounts, but it 127 // at least increments when changes are made. 128 // It will not undercount. 129 func (gp *GerritProject) NumLabelChanges() int { 130 // TODO: rename this method. 131 return gp.numLabelChanges 132 } 133 134 // ServerSlashProject returns the server and project together, such as 135 // "go.googlesource.com/build". 136 func (gp *GerritProject) ServerSlashProject() string { return gp.proj } 137 138 // Server returns the Gerrit server, such as "go.googlesource.com". 139 func (gp *GerritProject) Server() string { 140 if i := strings.IndexByte(gp.proj, '/'); i != -1 { 141 return gp.proj[:i] 142 } 143 return "" 144 } 145 146 // Project returns the Gerrit project on the server, such as "go" or "crypto". 147 func (gp *GerritProject) Project() string { 148 if i := strings.IndexByte(gp.proj, '/'); i != -1 { 149 return gp.proj[i+1:] 150 } 151 return "" 152 } 153 154 // ForeachNonChangeRef calls fn for each git ref on the server that is 155 // not a change (code review) ref. In general, these correspond to 156 // submitted changes. 157 // fn is called serially with sorted ref names. 158 // Iteration stops with the first non-nil error returned by fn. 159 func (gp *GerritProject) ForeachNonChangeRef(fn func(ref string, hash GitHash) error) error { 160 refs := make([]string, 0, len(gp.ref)) 161 for ref := range gp.ref { 162 refs = append(refs, ref) 163 } 164 sort.Strings(refs) 165 for _, ref := range refs { 166 if err := fn(ref, gp.ref[ref]); err != nil { 167 return err 168 } 169 } 170 return nil 171 } 172 173 // ForeachOpenCL calls fn for each open CL in the repo. 174 // 175 // If fn returns an error, iteration ends and ForeachOpenCL returns 176 // with that error. 177 // 178 // The fn function is called serially, with increasingly numbered 179 // CLs. 180 func (gp *GerritProject) ForeachOpenCL(fn func(*GerritCL) error) error { 181 var s []*GerritCL 182 for _, cl := range gp.cls { 183 if !cl.complete() || cl.Status != "new" || cl.Private { 184 continue 185 } 186 s = append(s, cl) 187 } 188 sort.Slice(s, func(i, j int) bool { return s[i].Number < s[j].Number }) 189 for _, cl := range s { 190 if err := fn(cl); err != nil { 191 return err 192 } 193 } 194 return nil 195 } 196 197 // ForeachCLUnsorted calls fn for each CL in the repo, in any order. 198 // 199 // If fn returns an error, iteration ends and ForeachCLUnsorted returns with 200 // that error. 201 func (gp *GerritProject) ForeachCLUnsorted(fn func(*GerritCL) error) error { 202 for _, cl := range gp.cls { 203 if !cl.complete() { 204 continue 205 } 206 if err := fn(cl); err != nil { 207 return err 208 } 209 } 210 return nil 211 } 212 213 // CL returns the GerritCL with the given number, or nil if it is not present. 214 // 215 // CL numbers are shared across all projects on a Gerrit server, so you can get 216 // nil unless you have the GerritProject containing that CL. 217 func (gp *GerritProject) CL(number int32) *GerritCL { 218 if cl := gp.cls[number]; cl != nil && cl.complete() { 219 return cl 220 } 221 return nil 222 } 223 224 // GitCommit returns the provided git commit. 225 func (gp *GerritProject) GitCommit(hash string) (*GitCommit, error) { 226 if len(hash) != 40 { 227 // TODO: support prefix lookups. build a trie. But 228 // for now just avoid panicking in gitHashFromHexStr. 229 return nil, fmt.Errorf("git hash %q is not 40 characters", hash) 230 } 231 var buf [20]byte 232 _, err := decodeHexStr(buf[:], hash) 233 if err != nil { 234 return nil, fmt.Errorf("git hash %q is not a valid hex string: %w", hash, err) 235 } 236 c := gp.commit[GitHash(buf[:])] 237 if c == nil { 238 // TODO: return an error that the caller can unpack with errors.Is or 239 // errors.As to distinguish this case. 240 return nil, fmt.Errorf("git commit %s not found in project", hash) 241 } 242 return c, nil 243 } 244 245 func (gp *GerritProject) logf(format string, args ...interface{}) { 246 log.Printf("gerrit "+gp.proj+": "+format, args...) 247 } 248 249 // gerritCLVersion is a value type used as a map key to store a CL 250 // number and a patchset version. Its Version field is overloaded 251 // to reference the "meta" metadata commit if the Version is 0. 252 type gerritCLVersion struct { 253 CLNumber int32 254 Version int32 // version 0 is used for the "meta" ref. 255 } 256 257 // A GerritCL represents a single change in Gerrit. 258 type GerritCL struct { 259 // Project is the project this CL is part of. 260 Project *GerritProject 261 262 // Number is the CL number on the Gerrit server (e.g. 1, 2, 3). Gerrit CL 263 // numbers are sparse (CL N does not guarantee that CL N-1 exists) and 264 // Gerrit issues CL's out of order - it may issue CL N, then CL (N - 18), 265 // then CL (N - 40). 266 Number int32 267 268 // Created is the CL creation time. 269 Created time.Time 270 271 // Version is the number of versions of the patchset for this 272 // CL seen so far. It starts at 1. 273 Version int32 274 275 // Commit is the git commit of the latest version of this CL. 276 // Previous versions are available via CommitAtVersion. 277 // Commit is always non-nil. 278 Commit *GitCommit 279 280 // branch is a cache of the latest "Branch: " value seen from 281 // MetaCommits' commit message values, stripped of any 282 // "refs/heads/" prefix. It's usually "master". 283 branch string 284 285 // Meta is the head of the most recent Gerrit "meta" commit 286 // for this CL. This is guaranteed to be a linear history 287 // back to a CL-specific root commit for this meta branch. 288 // Meta will always be non-nil. 289 Meta *GerritMeta 290 291 // Metas contains the history of Meta commits, from the oldest (root) 292 // to the most recent. The last item in the slice is the same 293 // value as the GerritCL.Meta field. 294 // The Metas slice will always contain at least 1 element. 295 Metas []*GerritMeta 296 297 // Status will be "merged", "abandoned", "new", or "draft". 298 Status string 299 300 // Private indicates whether this is a private CL. 301 // Empirically, it seems that one meta commit of private CLs is 302 // sometimes visible to everybody, even when the rest of the details 303 // and later meta commits are not. In general, if you see this 304 // being set to true, treat this CL as if it doesn't exist. 305 Private bool 306 307 // GitHubIssueRefs are parsed references to GitHub issues. 308 // Multiple references to the same issue are deduplicated. 309 GitHubIssueRefs []GitHubIssueRef 310 311 // Messages contains all of the messages for this CL, in sorted order. 312 Messages []*GerritMessage 313 } 314 315 // complete reports whether cl is complete. 316 // A CL is considered complete if its Meta and Commit fields are non-nil, 317 // and the Metas slice contains at least 1 element. 318 func (cl *GerritCL) complete() bool { 319 return cl.Meta != nil && 320 len(cl.Metas) >= 1 && 321 cl.Commit != nil 322 } 323 324 // GerritMessage is a Gerrit reply that is attached to the CL as a whole, and 325 // not to a file or line of a patch set. 326 // 327 // Maintner does very little parsing or formatting of a Message body. Messages 328 // are stored the same way they are stored in the API. 329 type GerritMessage struct { 330 // Meta is the commit containing the message. 331 Meta *GitCommit 332 333 // Version is the patch set version this message was sent on. 334 Version int32 335 336 // Message is the raw message contents from Gerrit (a subset 337 // of the raw git commit message), starting with "Patch Set 338 // nnnn". 339 Message string 340 341 // Date is when this message was stored (the commit time of 342 // the git commit). 343 Date time.Time 344 345 // Author returns the author of the commit. This takes the form "Gerrit User 346 // 13437 <13437@62eb7196-b449-3ce5-99f1-c037f21e1705>", where the number 347 // before the '@' sign is your Gerrit user ID, and the UUID after the '@' sign 348 // seems to be the same for all commits for the same Gerrit server, across 349 // projects. 350 // 351 // TODO: Merge the *GitPerson object here and for a person's Git commits 352 // (which use their real email) via the user ID, so they point to the same 353 // object. 354 Author *GitPerson 355 } 356 357 // References reports whether cl includes a commit message reference 358 // to the provided Github issue ref. 359 func (cl *GerritCL) References(ref GitHubIssueRef) bool { 360 for _, eref := range cl.GitHubIssueRefs { 361 if eref == ref { 362 return true 363 } 364 } 365 return false 366 } 367 368 // Branch returns the CL's branch, with any "refs/heads/" prefix removed. 369 func (cl *GerritCL) Branch() string { return cl.branch } 370 371 func (cl *GerritCL) updateBranch() { 372 for i := len(cl.Metas) - 1; i >= 0; i-- { 373 mc := cl.Metas[i] 374 branch := lineValue(mc.Commit.Msg, "Branch:") 375 if branch != "" { 376 cl.branch = strings.TrimPrefix(branch, "refs/heads/") 377 return 378 } 379 } 380 } 381 382 // lineValueOK extracts a value from an RFC 822-style "key: value" series of lines. 383 // If all is, 384 // 385 // foo: bar 386 // bar: baz 387 // 388 // lineValue(all, "foo:") returns "bar". It trims any whitespace. 389 // The prefix is case sensitive and must include the colon. 390 // The ok value reports whether a line with such a prefix is found, even if its 391 // value is empty. If ok is true, the rest value contains the subsequent lines. 392 func lineValueOK(all, prefix string) (value, rest string, ok bool) { 393 orig := all 394 consumed := 0 395 for { 396 i := strings.Index(all, prefix) 397 if i == -1 { 398 return "", "", false 399 } 400 if i > 0 && all[i-1] != '\n' && all[i-1] != '\r' { 401 all = all[i+len(prefix):] 402 consumed += i + len(prefix) 403 continue 404 } 405 val := all[i+len(prefix):] 406 consumed += i + len(prefix) 407 if nl := strings.IndexByte(val, '\n'); nl != -1 { 408 consumed += nl + 1 409 val = val[:nl+1] 410 } else { 411 consumed = len(orig) 412 } 413 return strings.TrimSpace(val), orig[consumed:], true 414 } 415 } 416 417 func lineValue(all, prefix string) string { 418 value, _, _ := lineValueOK(all, prefix) 419 return value 420 } 421 422 func lineValueRest(all, prefix string) (value, rest string) { 423 value, rest, _ = lineValueOK(all, prefix) 424 return 425 } 426 427 // WorkInProgress reports whether the CL has its Work-in-progress bit set, per 428 // https://gerrit-review.googlesource.com/Documentation/intro-user.html#wip 429 func (cl *GerritCL) WorkInProgress() bool { 430 var wip bool 431 for _, m := range cl.Metas { 432 switch lineValue(m.Commit.Msg, "Work-in-progress:") { 433 case "true": 434 wip = true 435 case "false": 436 wip = false 437 } 438 } 439 return wip 440 } 441 442 // ChangeID returns the Gerrit "Change-Id: Ixxxx" line's Ixxxx 443 // value from the cl.Msg, if any. 444 func (cl *GerritCL) ChangeID() string { 445 id := cl.Footer("Change-Id:") 446 if strings.HasPrefix(id, "I") && len(id) == 41 { 447 return id 448 } 449 return "" 450 } 451 452 // Footer returns the value of a line of the form <key>: value from 453 // the CL’s commit message. The key is case-sensitive and must end in 454 // a colon. 455 // An empty string is returned if there is no value for key. 456 func (cl *GerritCL) Footer(key string) string { 457 if len(key) == 0 || key[len(key)-1] != ':' { 458 panic("Footer key does not end in colon") 459 } 460 // TODO: git footers are treated as multimaps. Account for this. 461 return lineValue(cl.Commit.Msg, key) 462 } 463 464 // OwnerID returns the ID of the CL’s owner. It will return -1 on error. 465 func (cl *GerritCL) OwnerID() int { 466 if !cl.complete() { 467 return -1 468 } 469 // Meta commits caused by the owner of a change have an email of the form 470 // <user id>@<uuid of gerrit server>. 471 email := cl.Metas[0].Commit.Author.Email() 472 idx := strings.Index(email, "@") 473 if idx == -1 { 474 return -1 475 } 476 id, err := strconv.Atoi(email[:idx]) 477 if err != nil { 478 return -1 479 } 480 return id 481 } 482 483 // Owner returns the author of the first commit to the CL. It returns nil on error. 484 func (cl *GerritCL) Owner() *GitPerson { 485 // The owner of a change is a numeric ID that can have more than one email 486 // associated with it, but the email associated with the very first upload is 487 // designated as the owner of the change by Gerrit. 488 hash, ok := cl.Project.remote[gerritCLVersion{CLNumber: cl.Number, Version: 1}] 489 if !ok { 490 return nil 491 } 492 commit, ok := cl.Project.commit[hash] 493 if !ok { 494 return nil 495 } 496 return commit.Author 497 } 498 499 // Subject returns the subject of the latest commit message. 500 // The subject is separated from the body by a blank line. 501 func (cl *GerritCL) Subject() string { 502 if i := strings.Index(cl.Commit.Msg, "\n\n"); i >= 0 { 503 return strings.Replace(cl.Commit.Msg[:i], "\n", " ", -1) 504 } 505 return strings.Replace(cl.Commit.Msg, "\n", " ", -1) 506 } 507 508 // CommitAtVersion returns the git commit of the specified version of this CL. 509 // It returns nil if version is not in the range [1, cl.Version]. 510 func (cl *GerritCL) CommitAtVersion(version int32) *GitCommit { 511 if version < 1 || version > cl.Version { 512 return nil 513 } 514 hash, ok := cl.Project.remote[gerritCLVersion{CLNumber: cl.Number, Version: version}] 515 if !ok { 516 return nil 517 } 518 return cl.Project.commit[hash] 519 } 520 521 func (cl *GerritCL) updateGithubIssueRefs() { 522 gp := cl.Project 523 gerrit := gp.gerrit 524 gc := cl.Commit 525 526 oldRefs := cl.GitHubIssueRefs 527 newRefs := gerrit.c.parseGithubRefs(gp.proj, gc.Msg) 528 cl.GitHubIssueRefs = newRefs 529 for _, ref := range newRefs { 530 if !clSliceContains(gerrit.clsReferencingGithubIssue[ref], cl) { 531 // TODO: make this as small as 532 // possible? Most will have length 533 // 1. Care about default capacity of 534 // 2? 535 gerrit.clsReferencingGithubIssue[ref] = append(gerrit.clsReferencingGithubIssue[ref], cl) 536 } 537 } 538 for _, ref := range oldRefs { 539 if !cl.References(ref) { 540 // TODO: remove ref from gerrit.clsReferencingGithubIssue 541 // It could be a map of maps I suppose, but not as compact. 542 // So uses a slice as the second layer, since there will normally 543 // be one item. 544 } 545 } 546 } 547 548 // c.mu must be held 549 func (c *Corpus) initGerrit() { 550 if c.gerrit != nil { 551 return 552 } 553 c.gerrit = &Gerrit{ 554 c: c, 555 projects: map[string]*GerritProject{}, 556 clsReferencingGithubIssue: map[GitHubIssueRef][]*GerritCL{}, 557 } 558 } 559 560 type watchedGerritRepo struct { 561 project *GerritProject 562 } 563 564 // TrackGerrit registers the Gerrit project with the given project as a project 565 // to watch and append to the mutation log. Only valid in leader mode. 566 // The provided string should be of the form "hostname/project", without a scheme 567 // or trailing slash. 568 func (c *Corpus) TrackGerrit(gerritProj string) { 569 if c.mutationLogger == nil { 570 panic("can't TrackGerrit in non-leader mode") 571 } 572 c.mu.Lock() 573 defer c.mu.Unlock() 574 575 if strings.Count(gerritProj, "/") != 1 { 576 panic(fmt.Sprintf("gerrit project argument %q expected to contain exactly 1 slash", gerritProj)) 577 } 578 c.initGerrit() 579 if _, dup := c.gerrit.projects[gerritProj]; dup { 580 panic("duplicated watched gerrit project " + gerritProj) 581 } 582 project := c.gerrit.getOrCreateProject(gerritProj) 583 if project == nil { 584 panic("gerrit project not created") 585 } 586 c.watchedGerritRepos = append(c.watchedGerritRepos, watchedGerritRepo{ 587 project: project, 588 }) 589 } 590 591 // called with c.mu Locked 592 func (c *Corpus) processGerritMutation(gm *maintpb.GerritMutation) { 593 if c.gerrit == nil { 594 // TODO: option to ignore mutation if user isn't interested. 595 c.initGerrit() 596 } 597 gp, ok := c.gerrit.projects[gm.Project] 598 if !ok { 599 // TODO: option to ignore mutation if user isn't interested. 600 // For now, always process the record. 601 gp = c.gerrit.getOrCreateProject(gm.Project) 602 } 603 gp.processMutation(gm) 604 } 605 606 var statusIndicator = "\nStatus: " 607 608 // The Go Gerrit site does not really use the "draft" status much, but if 609 // you need to test it, create a dummy commit and then run 610 // 611 // git push origin HEAD:refs/drafts/master 612 var statuses = []string{"merged", "abandoned", "draft", "new"} 613 614 // getGerritStatus returns a Gerrit status for a commit, or the empty string to 615 // indicate the commit did not show a status. 616 // 617 // getGerritStatus relies on the Gerrit code review convention of amending 618 // the meta commit to include the current status of the CL. The Gerrit search 619 // bar allows you to search for changes with the following statuses: "open", 620 // "reviewed", "closed", "abandoned", "merged", "draft", "pending". The REST API 621 // returns only "NEW", "DRAFT", "ABANDONED", "MERGED". Gerrit attaches "draft", 622 // "abandoned", "new", and "merged" statuses to some meta commits; you may have 623 // to search the current meta commit's parents to find the last good commit. 624 func getGerritStatus(commit *GitCommit) string { 625 idx := strings.Index(commit.Msg, statusIndicator) 626 if idx == -1 { 627 return "" 628 } 629 off := idx + len(statusIndicator) 630 for _, status := range statuses { 631 if strings.HasPrefix(commit.Msg[off:], status) { 632 return status 633 } 634 } 635 return "" 636 } 637 638 var errTooManyParents = errors.New("maintner: too many commit parents") 639 640 // foreachCommit walks an entire linear git history, starting at commit itself, 641 // and iterating over all of its parents. commit must be non-nil. 642 // f is called for each commit until an error is returned from f, or a commit has no parent. 643 // 644 // foreachCommit returns errTooManyParents (and stops processing) if a commit 645 // has more than one parent. 646 // An error is returned if a commit has a parent that cannot be found. 647 // 648 // Corpus.mu must be held. 649 func (gp *GerritProject) foreachCommit(commit *GitCommit, f func(*GitCommit) error) error { 650 c := gp.gerrit.c 651 for { 652 if err := f(commit); err != nil { 653 return err 654 } 655 if len(commit.Parents) == 0 { 656 // No parents, we're at the end of the linear history. 657 return nil 658 } 659 if len(commit.Parents) > 1 { 660 return errTooManyParents 661 } 662 parentHash := commit.Parents[0].Hash // meta tree has no merge commits 663 commit = c.gitCommit[parentHash] 664 if commit == nil { 665 return fmt.Errorf("parent commit %v not found", parentHash) 666 } 667 } 668 } 669 670 // getGerritMessage parses a Gerrit comment from the given commit or returns nil 671 // if there wasn't one. 672 // 673 // Corpus.mu must be held. 674 func (gp *GerritProject) getGerritMessage(commit *GitCommit) *GerritMessage { 675 const existVerPhrase = "\nPatch Set " 676 const newVerPhrase = "\nUploaded patch set " 677 678 startExist := strings.Index(commit.Msg, existVerPhrase) 679 startNew := strings.Index(commit.Msg, newVerPhrase) 680 var start int 681 var phrase string 682 switch { 683 case startExist == -1 && startNew == -1: 684 return nil 685 case startExist == -1 || (startNew != -1 && startNew < startExist): 686 phrase = newVerPhrase 687 start = startNew 688 case startNew == -1 || (startExist != -1 && startExist < startNew): 689 phrase = existVerPhrase 690 start = startExist 691 } 692 693 numStart := start + len(phrase) 694 colon := strings.IndexByte(commit.Msg[numStart:], ':') 695 if colon == -1 { 696 return nil 697 } 698 num := commit.Msg[numStart : numStart+colon] 699 if strings.Contains(num, "\n") || strings.Contains(num, ".") { 700 // Spanned lines. Didn't match expected comment form 701 // we care about (comments with vote changes), like: 702 // 703 // Uploaded patch set 5: Some-Vote=+2 704 // 705 // For now, treat such meta updates (new uploads only) 706 // as not comments. 707 return nil 708 } 709 version, err := strconv.ParseInt(num, 10, 32) 710 if err != nil { 711 gp.logf("for phrase %q at %d, unexpected patch set number in %s; err: %v, message: %s", phrase, start, commit.Hash, err, commit.Msg) 712 return nil 713 } 714 start++ 715 v := commit.Msg[start:] 716 l := 0 717 for { 718 i := strings.IndexByte(v, '\n') 719 if i < 0 { 720 return nil 721 } 722 if strings.HasPrefix(v[:i], "Patch-set:") { 723 // two newlines before the Patch-set message 724 v = commit.Msg[start : start+l-2] 725 break 726 } 727 v = v[i+1:] 728 l = l + i + 1 729 } 730 return &GerritMessage{ 731 Meta: commit, 732 Author: commit.Author, 733 Date: commit.CommitTime, 734 Message: v, 735 Version: int32(version), 736 } 737 } 738 739 func reverseGerritMessages(ss []*GerritMessage) { 740 for i := len(ss)/2 - 1; i >= 0; i-- { 741 opp := len(ss) - 1 - i 742 ss[i], ss[opp] = ss[opp], ss[i] 743 } 744 } 745 746 func reverseGerritMetas(ss []*GerritMeta) { 747 for i := len(ss)/2 - 1; i >= 0; i-- { 748 opp := len(ss) - 1 - i 749 ss[i], ss[opp] = ss[opp], ss[i] 750 } 751 } 752 753 // called with c.mu Locked 754 func (gp *GerritProject) processMutation(gm *maintpb.GerritMutation) { 755 c := gp.gerrit.c 756 757 for _, commitp := range gm.Commits { 758 gc, err := c.processGitCommit(commitp) 759 if err != nil { 760 gp.logf("error processing commit %q: %v", commitp.Sha1, err) 761 continue 762 } 763 gp.commit[gc.Hash] = gc 764 delete(gp.need, gc.Hash) 765 766 for _, p := range gc.Parents { 767 gp.markNeededCommit(p.Hash) 768 } 769 } 770 771 for _, refName := range gm.DeletedRefs { 772 delete(gp.ref, refName) 773 // TODO: this doesn't delete change refs (from 774 // gp.remote) yet, mostly because those don't tend to 775 // ever get deleted and we haven't yet needed it. If 776 // we ever need it, the mutation generation side would 777 // also need to be updated. 778 } 779 780 for _, refp := range gm.Refs { 781 refName := refp.Ref 782 hash := c.gitHashFromHexStr(refp.Sha1) 783 m := rxChangeRef.FindStringSubmatch(refName) 784 if m == nil { 785 if strings.HasPrefix(refName, "refs/meta/") { 786 // Some of these slipped in to the data 787 // before we started ignoring them. So ignore them here. 788 continue 789 } 790 // Misc ref, not a change ref. 791 if _, ok := c.gitCommit[hash]; !ok { 792 gp.logf("ERROR: non-change ref %v references unknown hash %v; ignoring", refp, hash) 793 continue 794 } 795 gp.ref[refName] = hash 796 continue 797 } 798 799 clNum64, err := strconv.ParseInt(m[1], 10, 32) 800 version, ok := gerritVersionNumber(m[2]) 801 if !ok || err != nil { 802 continue 803 } 804 gc, ok := c.gitCommit[hash] 805 if !ok { 806 gp.logf("ERROR: ref %v references unknown hash %v; ignoring", refp, hash) 807 continue 808 } 809 clv := gerritCLVersion{int32(clNum64), version} 810 gp.remote[clv] = hash 811 cl := gp.getOrCreateCL(clv.CLNumber) 812 813 if clv.Version == 0 { // is a meta commit 814 cl.Meta = newGerritMeta(gc, cl) 815 gp.noteDirtyCL(cl) // needs processing at end of sync 816 } else { 817 cl.Commit = gc 818 cl.Version = clv.Version 819 cl.updateGithubIssueRefs() 820 } 821 if c.didInit { 822 gp.logf("Ref %+v => %v", clv, hash) 823 } 824 } 825 } 826 827 // noteDirtyCL notes a CL that needs further processing before the corpus 828 // is returned to the user. 829 // cl.Meta must be non-nil. 830 // 831 // called with Corpus.mu Locked 832 func (gp *GerritProject) noteDirtyCL(cl *GerritCL) { 833 if cl.Meta == nil { 834 panic("noteDirtyCL given a GerritCL with a nil Meta field") 835 } 836 if gp.dirtyCL == nil { 837 gp.dirtyCL = make(map[*GerritCL]struct{}) 838 } 839 gp.dirtyCL[cl] = struct{}{} 840 } 841 842 // called with Corpus.mu Locked 843 func (gp *GerritProject) finishProcessing() { 844 for cl := range gp.dirtyCL { 845 // All dirty CLs have non-nil Meta, so it's safe to call finishProcessingCL. 846 gp.finishProcessingCL(cl) 847 } 848 gp.dirtyCL = nil 849 } 850 851 // finishProcessingCL fixes up invariants before the cl can be returned back to the user. 852 // cl.Meta must be non-nil. 853 // 854 // called with Corpus.mu Locked 855 func (gp *GerritProject) finishProcessingCL(cl *GerritCL) { 856 c := gp.gerrit.c 857 858 mostRecentMetaCommit, ok := c.gitCommit[cl.Meta.Commit.Hash] 859 if !ok { 860 log.Printf("WARNING: GerritProject(%q).finishProcessingCL failed to find CL %v hash %s", 861 gp.ServerSlashProject(), cl.Number, cl.Meta.Commit.Hash) 862 return 863 } 864 865 foundStatus := "" 866 867 // Walk from the newest meta commit backwards, so we store the messages 868 // in reverse order and then flip the array before setting on the 869 // GerritCL object. 870 var backwardMessages []*GerritMessage 871 var backwardMetas []*GerritMeta 872 873 err := gp.foreachCommit(mostRecentMetaCommit, func(gc *GitCommit) error { 874 if strings.Contains(gc.Msg, "\nLabel: ") { 875 gp.numLabelChanges++ 876 } 877 if strings.Contains(gc.Msg, "\nPrivate: true\n") { 878 cl.Private = true 879 } 880 if gc.GerritMeta == nil { 881 gc.GerritMeta = newGerritMeta(gc, cl) 882 } 883 if foundStatus == "" { 884 foundStatus = getGerritStatus(gc) 885 } 886 backwardMetas = append(backwardMetas, gc.GerritMeta) 887 if message := gp.getGerritMessage(gc); message != nil { 888 backwardMessages = append(backwardMessages, message) 889 } 890 return nil 891 }) 892 if err != nil { 893 log.Printf("WARNING: GerritProject(%q).finishProcessingCL failed to walk CL %v meta history: %v", 894 gp.ServerSlashProject(), cl.Number, err) 895 return 896 } 897 898 if foundStatus != "" { 899 cl.Status = foundStatus 900 } else if cl.Status == "" { 901 cl.Status = "new" 902 } 903 904 reverseGerritMessages(backwardMessages) 905 cl.Messages = backwardMessages 906 907 reverseGerritMetas(backwardMetas) 908 cl.Metas = backwardMetas 909 910 cl.Created = cl.Metas[0].Commit.CommitTime 911 912 cl.updateBranch() 913 } 914 915 // clSliceContains reports whether cls contains cl. 916 func clSliceContains(cls []*GerritCL, cl *GerritCL) bool { 917 for _, v := range cls { 918 if v == cl { 919 return true 920 } 921 } 922 return false 923 } 924 925 // c.mu must be held 926 func (gp *GerritProject) markNeededCommit(hash GitHash) { 927 if _, ok := gp.commit[hash]; ok { 928 // Already have it. 929 return 930 } 931 gp.need[hash] = true 932 } 933 934 // c.mu must be held 935 func (gp *GerritProject) getOrCreateCL(num int32) *GerritCL { 936 cl, ok := gp.cls[num] 937 if ok { 938 return cl 939 } 940 cl = &GerritCL{ 941 Project: gp, 942 Number: num, 943 } 944 gp.cls[num] = cl 945 return cl 946 } 947 948 func gerritVersionNumber(s string) (version int32, ok bool) { 949 if s == "meta" { 950 return 0, true 951 } 952 v, err := strconv.ParseInt(s, 10, 32) 953 if err != nil { 954 return 0, false 955 } 956 return int32(v), true 957 } 958 959 // rxRemoteRef matches "git ls-remote" lines. 960 // 961 // sample row: 962 // fd1e71f1594ce64941a85428ddef2fbb0ad1023e refs/changes/99/30599/3 963 // 964 // Capture values: 965 // 966 // $0: whole match 967 // $1: "fd1e71f1594ce64941a85428ddef2fbb0ad1023e" 968 // $2: "30599" (CL number) 969 // $3: "1", "2" (patchset number) or "meta" (a/ special commit 970 // holding the comments for a commit) 971 // 972 // The "99" in the middle covers all CL's that end in "99", so 973 // refs/changes/99/99/1, refs/changes/99/199/meta. 974 var rxRemoteRef = regexp.MustCompile(`^([0-9a-f]{40,})\s+refs/changes/[0-9a-f]{2}/([0-9]+)/(.+)$`) 975 976 // $1: change num 977 // $2: version or "meta" 978 var rxChangeRef = regexp.MustCompile(`^refs/changes/[0-9a-f]{2}/([0-9]+)/(meta|(?:\d+))`) 979 980 func (gp *GerritProject) sync(ctx context.Context, loop bool) error { 981 if err := gp.init(ctx); err != nil { 982 gp.logf("init: %v", err) 983 return err 984 } 985 activityCh := gp.gerrit.c.activityChan("gerrit:" + gp.proj) 986 for { 987 if err := gp.syncOnce(ctx); err != nil { 988 if ee, ok := err.(*exec.ExitError); ok { 989 err = fmt.Errorf("%v; stderr=%q", err, ee.Stderr) 990 } 991 gp.logf("sync: %v", err) 992 return err 993 } 994 if !loop { 995 return nil 996 } 997 timer := time.NewTimer(5 * time.Minute) 998 select { 999 case <-ctx.Done(): 1000 timer.Stop() 1001 return ctx.Err() 1002 case <-activityCh: 1003 timer.Stop() 1004 case <-timer.C: 1005 } 1006 } 1007 } 1008 1009 // syncMissingCommits is a cleanup step to fix a previous maintner bug where 1010 // refs were updated without all their reachable commits being indexed and 1011 // recorded in the log. This should only ever run once, and only in Go's history. 1012 // If we restarted the log from the beginning this wouldn't be necessary. 1013 func (gp *GerritProject) syncMissingCommits(ctx context.Context) error { 1014 c := gp.gerrit.c 1015 var hashes []GitHash 1016 c.mu.Lock() 1017 for hash := range gp.need { 1018 hashes = append(hashes, hash) 1019 } 1020 c.mu.Unlock() 1021 if len(hashes) == 0 { 1022 return nil 1023 } 1024 1025 gp.logf("fixing indexing of %d missing commits", len(hashes)) 1026 if err := gp.fetchHashes(ctx, hashes); err != nil { 1027 return err 1028 } 1029 1030 n, err := gp.syncCommits(ctx) 1031 if err != nil { 1032 return err 1033 } 1034 gp.logf("%d missing commits indexed", n) 1035 return nil 1036 } 1037 1038 func (gp *GerritProject) syncOnce(ctx context.Context) error { 1039 if err := gp.syncMissingCommits(ctx); err != nil { 1040 return err 1041 } 1042 1043 c := gp.gerrit.c 1044 gitDir := gp.gitDir() 1045 1046 t0 := time.Now() 1047 cmd := exec.CommandContext(ctx, "git", "fetch", "origin") 1048 envutil.SetDir(cmd, gitDir) 1049 // Enable extra Git tracing in case the fetch hangs. 1050 envutil.SetEnv(cmd, 1051 "GIT_TRACE2_EVENT=1", 1052 "GIT_TRACE_CURL_NO_DATA=1", 1053 ) 1054 cmd.Stdout = new(bytes.Buffer) 1055 cmd.Stderr = cmd.Stdout 1056 1057 // The 'git fetch' needs a timeout in case it hangs, but to avoid spurious 1058 // timeouts (and live-lock) the timeout should be (at least) an order of 1059 // magnitude longer than we expect the operation to actually take. Moreover, 1060 // exec.CommandContext sends SIGKILL, which may terminate the command without 1061 // giving it a chance to flush useful trace entries, so we'll terminate it 1062 // manually instead (see https://golang.org/issue/22757). 1063 if err := cmd.Start(); err != nil { 1064 return fmt.Errorf("git fetch origin: %v", err) 1065 } 1066 timer := time.AfterFunc(10*time.Minute, func() { 1067 cmd.Process.Signal(os.Interrupt) 1068 }) 1069 err := cmd.Wait() 1070 fetchDuration := time.Since(t0).Round(time.Millisecond) 1071 timer.Stop() 1072 if err != nil { 1073 return fmt.Errorf("git fetch origin: %v after %v, %s", err, fetchDuration, cmd.Stdout) 1074 } 1075 gp.logf("ran git fetch origin in %v", fetchDuration) 1076 1077 t0 = time.Now() 1078 cmd = exec.CommandContext(ctx, "git", "ls-remote") 1079 envutil.SetDir(cmd, gitDir) 1080 out, err := cmd.CombinedOutput() 1081 lsRemoteDuration := time.Since(t0).Round(time.Millisecond) 1082 if err != nil { 1083 return fmt.Errorf("git ls-remote in %s: %v after %v, %s", gitDir, err, lsRemoteDuration, out) 1084 } 1085 gp.logf("ran git ls-remote in %v", lsRemoteDuration) 1086 1087 var changedRefs []*maintpb.GitRef 1088 var toFetch []GitHash 1089 1090 bs := bufio.NewScanner(bytes.NewReader(out)) 1091 1092 // Take the lock here to access gp.remote and call c.gitHashFromHex. 1093 // It's acceptable to take such a coarse-looking lock because 1094 // it's not actually around I/O: all the input from ls-remote has 1095 // already been slurped into memory. 1096 c.mu.Lock() 1097 refExists := map[string]bool{} // whether ref is this ls-remote fetch 1098 for bs.Scan() { 1099 line := bs.Bytes() 1100 tab := bytes.IndexByte(line, '\t') 1101 if tab == -1 { 1102 if !strings.HasPrefix(bs.Text(), "From ") { 1103 gp.logf("bogus ls-remote line: %q", line) 1104 } 1105 continue 1106 } 1107 sha1 := string(line[:tab]) 1108 refName := strings.TrimSpace(string(line[tab+1:])) 1109 refExists[refName] = true 1110 hash := c.gitHashFromHexStr(sha1) 1111 1112 var needFetch bool 1113 1114 m := rxRemoteRef.FindSubmatch(line) 1115 if m != nil { 1116 clNum, err := strconv.ParseInt(string(m[2]), 10, 32) 1117 version, ok := gerritVersionNumber(string(m[3])) 1118 if err != nil || !ok { 1119 continue 1120 } 1121 curHash := gp.remote[gerritCLVersion{int32(clNum), version}] 1122 needFetch = curHash != hash 1123 } else if trackGerritRef(refName) && gp.ref[refName] != hash { 1124 needFetch = true 1125 gp.logf("ref %q = %q", refName, sha1) 1126 } 1127 1128 if needFetch { 1129 toFetch = append(toFetch, hash) 1130 changedRefs = append(changedRefs, &maintpb.GitRef{ 1131 Ref: refName, 1132 Sha1: string(sha1), 1133 }) 1134 } 1135 } 1136 var deletedRefs []string 1137 for n := range gp.ref { 1138 if !refExists[n] { 1139 gp.logf("ref %q now deleted", n) 1140 deletedRefs = append(deletedRefs, n) 1141 } 1142 } 1143 c.mu.Unlock() 1144 1145 if err := bs.Err(); err != nil { 1146 gp.logf("ls-remote scanning error: %v", err) 1147 return err 1148 } 1149 if len(deletedRefs) > 0 { 1150 c.addMutation(&maintpb.Mutation{ 1151 Gerrit: &maintpb.GerritMutation{ 1152 Project: gp.proj, 1153 DeletedRefs: deletedRefs, 1154 }, 1155 }) 1156 } 1157 if len(changedRefs) == 0 { 1158 return nil 1159 } 1160 gp.logf("%d new refs", len(changedRefs)) 1161 const batchSize = 250 1162 for len(toFetch) > 0 { 1163 batch := toFetch 1164 if len(batch) > batchSize { 1165 batch = batch[:batchSize] 1166 } 1167 if err := gp.fetchHashes(ctx, batch); err != nil { 1168 return err 1169 } 1170 1171 c.mu.Lock() 1172 for _, hash := range batch { 1173 gp.markNeededCommit(hash) 1174 } 1175 c.mu.Unlock() 1176 1177 n, err := gp.syncCommits(ctx) 1178 if err != nil { 1179 return err 1180 } 1181 toFetch = toFetch[len(batch):] 1182 gp.logf("synced %v commits for %d new hashes, %d hashes remain", n, len(batch), len(toFetch)) 1183 1184 c.addMutation(&maintpb.Mutation{ 1185 Gerrit: &maintpb.GerritMutation{ 1186 Project: gp.proj, 1187 Refs: changedRefs[:len(batch)], 1188 }}) 1189 changedRefs = changedRefs[len(batch):] 1190 } 1191 1192 return nil 1193 } 1194 1195 func (gp *GerritProject) syncCommits(ctx context.Context) (n int, err error) { 1196 c := gp.gerrit.c 1197 lastLog := time.Now() 1198 for { 1199 hash := gp.commitToIndex() 1200 if hash == "" { 1201 return n, nil 1202 } 1203 now := time.Now() 1204 if lastLog.Before(now.Add(-1 * time.Second)) { 1205 lastLog = now 1206 gp.logf("parsing commits (%v done)", n) 1207 } 1208 commit, err := parseCommitFromGit(gp.gitDir(), hash) 1209 if err != nil { 1210 return n, err 1211 } 1212 c.addMutation(&maintpb.Mutation{ 1213 Gerrit: &maintpb.GerritMutation{ 1214 Project: gp.proj, 1215 Commits: []*maintpb.GitCommit{commit}, 1216 }, 1217 }) 1218 n++ 1219 } 1220 } 1221 1222 func (gp *GerritProject) commitToIndex() GitHash { 1223 c := gp.gerrit.c 1224 1225 c.mu.RLock() 1226 defer c.mu.RUnlock() 1227 for hash := range gp.need { 1228 return hash 1229 } 1230 return "" 1231 } 1232 1233 var ( 1234 statusSpace = []byte("Status: ") 1235 ) 1236 1237 func (gp *GerritProject) fetchHashes(ctx context.Context, hashes []GitHash) error { 1238 args := []string{"fetch", "--quiet", "origin"} 1239 for _, hash := range hashes { 1240 args = append(args, hash.String()) 1241 } 1242 gp.logf("fetching %v hashes...", len(hashes)) 1243 t0 := time.Now() 1244 cmd := exec.CommandContext(ctx, "git", args...) 1245 envutil.SetDir(cmd, gp.gitDir()) 1246 out, err := cmd.CombinedOutput() 1247 d := time.Since(t0).Round(time.Millisecond) 1248 if err != nil { 1249 gp.logf("error fetching %d hashes after %v: %s", len(hashes), d, out) 1250 return err 1251 } 1252 gp.logf("fetched %v hashes in %v", len(hashes), d) 1253 return nil 1254 } 1255 1256 func formatExecError(err error) string { 1257 if ee, ok := err.(*exec.ExitError); ok { 1258 return fmt.Sprintf("%v; stderr=%q", err, ee.Stderr) 1259 } 1260 return fmt.Sprint(err) 1261 } 1262 1263 func (gp *GerritProject) init(ctx context.Context) error { 1264 gitDir := gp.gitDir() 1265 if err := os.MkdirAll(gitDir, 0755); err != nil { 1266 return err 1267 } 1268 // try to short circuit a git init error, since the init error matching is 1269 // brittle 1270 if _, err := exec.LookPath("git"); err != nil { 1271 return fmt.Errorf("looking for git binary: %v", err) 1272 } 1273 1274 if _, err := os.Stat(filepath.Join(gitDir, ".git", "config")); err == nil { 1275 cmd := exec.CommandContext(ctx, "git", "remote", "-v") 1276 envutil.SetDir(cmd, gitDir) 1277 remoteBytes, err := cmd.Output() 1278 if err != nil { 1279 return fmt.Errorf("running git remote -v in %v: %v", gitDir, formatExecError(err)) 1280 } 1281 if !strings.Contains(string(remoteBytes), "origin") && !strings.Contains(string(remoteBytes), "https://"+gp.proj) { 1282 return fmt.Errorf("didn't find origin & gp.url in remote output %s", string(remoteBytes)) 1283 } 1284 gp.logf("git directory exists.") 1285 return nil 1286 } 1287 1288 cmd := exec.CommandContext(ctx, "git", "init") 1289 buf := new(bytes.Buffer) 1290 cmd.Stdout = buf 1291 cmd.Stderr = buf 1292 envutil.SetDir(cmd, gitDir) 1293 if err := cmd.Run(); err != nil { 1294 log.Printf(`Error running "git init": %s`, buf.String()) 1295 return err 1296 } 1297 buf.Reset() 1298 cmd = exec.CommandContext(ctx, "git", "remote", "add", "origin", "https://"+gp.proj) 1299 cmd.Stdout = buf 1300 cmd.Stderr = buf 1301 envutil.SetDir(cmd, gitDir) 1302 if err := cmd.Run(); err != nil { 1303 log.Printf(`Error running "git remote add origin": %s`, buf.String()) 1304 return err 1305 } 1306 1307 return nil 1308 } 1309 1310 // trackGerritRef reports whether we care to record changes about the 1311 // given ref. 1312 func trackGerritRef(ref string) bool { 1313 if strings.HasPrefix(ref, "refs/users/") { 1314 return false 1315 } 1316 if strings.HasPrefix(ref, "refs/meta/") { 1317 return false 1318 } 1319 if strings.HasPrefix(ref, "refs/cache-automerge/") { 1320 return false 1321 } 1322 return true 1323 } 1324 1325 func (g *Gerrit) check() error { 1326 for key, gp := range g.projects { 1327 if err := gp.check(); err != nil { 1328 return fmt.Errorf("%s: %v", key, err) 1329 } 1330 } 1331 return nil 1332 } 1333 1334 // called with its Corpus.mu locked. (called by 1335 // Corpus.finishProcessing; read comment there) 1336 func (g *Gerrit) finishProcessing() { 1337 if g == nil { 1338 return 1339 } 1340 for _, gp := range g.projects { 1341 gp.finishProcessing() 1342 } 1343 } 1344 1345 func (gp *GerritProject) check() error { 1346 if len(gp.need) != 0 { 1347 return fmt.Errorf("%d missing commits", len(gp.need)) 1348 } 1349 for hash, gc := range gp.commit { 1350 if gc.Committer == placeholderCommitter { 1351 return fmt.Errorf("git commit for key %q was placeholder", hash) 1352 } 1353 if gc.Hash != hash { 1354 return fmt.Errorf("git commit for key %q had GitCommit.Hash %q", hash, gc.Hash) 1355 } 1356 for _, pc := range gc.Parents { 1357 if _, ok := gp.commit[pc.Hash]; !ok { 1358 return fmt.Errorf("git commit %q exists but its parent %q does not", gc.Hash, pc.Hash) 1359 } 1360 } 1361 } 1362 return nil 1363 } 1364 1365 // GerritMeta represents a Git commit in the Gerrit NoteDb meta 1366 // format. 1367 type GerritMeta struct { 1368 // Commit points up to the git commit for this Gerrit NoteDB meta commit. 1369 Commit *GitCommit 1370 // CL is the Gerrit CL this metadata is for. 1371 CL *GerritCL 1372 1373 flags gerritMetaFlags 1374 } 1375 1376 type gerritMetaFlags uint8 1377 1378 const ( 1379 // metaFlagHashtagEdit indicates that the meta commit edits the hashtags on the commit. 1380 metaFlagHashtagEdit gerritMetaFlags = 1 << iota 1381 ) 1382 1383 func newGerritMeta(gc *GitCommit, cl *GerritCL) *GerritMeta { 1384 m := &GerritMeta{Commit: gc, CL: cl} 1385 1386 if msg := m.Commit.Msg; strings.Contains(msg, "autogenerated:gerrit:setHashtag") && m.ActionTag() == "autogenerated:gerrit:setHashtag" { 1387 m.flags |= metaFlagHashtagEdit 1388 } 1389 return m 1390 } 1391 1392 // Footer returns the "key: value" lines at the base of the commit. 1393 func (m *GerritMeta) Footer() string { 1394 i := strings.LastIndex(m.Commit.Msg, "\n\n") 1395 if i == -1 { 1396 return "" 1397 } 1398 return m.Commit.Msg[i+2:] 1399 } 1400 1401 // Hashtags returns the set of hashtags on m's CL as of the time of m. 1402 func (m *GerritMeta) Hashtags() GerritHashtags { 1403 // If this GerritMeta set hashtags, use it. 1404 tags, _, ok := lineValueOK(m.Footer(), "Hashtags: ") 1405 if ok { 1406 return GerritHashtags(tags) 1407 } 1408 1409 // Otherwise, look at older metas (from most recent to oldest) 1410 // to find most recent value. Ignore anything that's newer 1411 // than m. 1412 sawThisMeta := false // whether we've seen 'm' 1413 metas := m.CL.Metas 1414 for i := len(metas) - 1; i >= 0; i-- { 1415 mp := metas[i] 1416 if mp.Commit.Hash == m.Commit.Hash { 1417 sawThisMeta = true 1418 continue 1419 } 1420 if !sawThisMeta { 1421 continue 1422 } 1423 if tags, _, ok := lineValueOK(mp.Footer(), "Hashtags: "); ok { 1424 return GerritHashtags(tags) 1425 } 1426 } 1427 return "" 1428 } 1429 1430 // ActionTag returns the Gerrit "Tag" value from the meta commit. 1431 // These are of the form "autogenerated:gerrit:setHashtag". 1432 func (m *GerritMeta) ActionTag() string { 1433 return lineValue(m.Footer(), "Tag: ") 1434 } 1435 1436 // HashtagEdits returns the hashtags added and removed by this meta commit, 1437 // and whether this meta commit actually modified hashtags. 1438 func (m *GerritMeta) HashtagEdits() (added, removed GerritHashtags, ok bool) { 1439 // Return early for the majority of meta commits that don't edit hashtags. 1440 if m.flags&metaFlagHashtagEdit == 0 { 1441 return 1442 } 1443 1444 msg := m.Commit.Msg 1445 1446 // Parse lines of form: 1447 // 1448 // Hashtag removed: bar 1449 // Hashtags removed: foo, bar 1450 // Hashtag added: bar 1451 // Hashtags added: foo, bar 1452 for len(msg) > 0 { 1453 value, rest := lineValueRest(msg, "Hash") 1454 msg = rest 1455 colon := strings.IndexByte(value, ':') 1456 if colon != -1 { 1457 action := value[:colon] 1458 value := GerritHashtags(strings.TrimSpace(value[colon+1:])) 1459 switch action { 1460 case "tag added", "tags added": 1461 added = value 1462 case "tag removed", "tags removed": 1463 removed = value 1464 } 1465 } 1466 } 1467 ok = added != "" || removed != "" 1468 return 1469 } 1470 1471 // HashtagsAdded returns the hashtags added by this meta commit, if any. 1472 func (m *GerritMeta) HashtagsAdded() GerritHashtags { 1473 added, _, _ := m.HashtagEdits() 1474 return added 1475 } 1476 1477 // HashtagsRemoved returns the hashtags removed by this meta commit, if any. 1478 func (m *GerritMeta) HashtagsRemoved() GerritHashtags { 1479 _, removed, _ := m.HashtagEdits() 1480 return removed 1481 } 1482 1483 // LabelVotes returns a map from label name to voter email to their vote. 1484 // 1485 // This is relatively expensive to call compared to other methods in maintner. 1486 // It is not currently cached. 1487 func (m *GerritMeta) LabelVotes() (map[string]map[string]int8, error) { 1488 if m.CL == nil { 1489 panic("GerritMeta has nil CL field") 1490 } 1491 // To calculate votes as the time of the 'm' meta commit, 1492 // we need to consider the meta commits before it. 1493 // Let's see which number in the (linear) meta history 1494 // we are. 1495 ourIndex := -1 1496 for i, mc := range m.CL.Metas { 1497 if mc == m { 1498 ourIndex = i 1499 break 1500 } 1501 } 1502 if ourIndex == -1 { 1503 panic("LabelVotes called on GerritMeta not in its m.CL.Metas slice") 1504 } 1505 labels := map[string]map[string]int8{} 1506 1507 history := m.CL.Metas[:ourIndex+1] 1508 var lastCommit *GitCommit 1509 for _, mc := range history { 1510 footer := mc.Footer() 1511 isNew := strings.Contains(footer, "\nTag: autogenerated:gerrit:newPatchSet\n") 1512 email := mc.Commit.Author.Email() 1513 if isNew { 1514 if commit := lineValue(footer, "Commit: "); commit != "" { 1515 // TODO: implement Gerrit's vote copying. For example, 1516 // label.Label-Name.copyAllScoresIfNoChange defaults to true (as it is with Go's server) 1517 // https://gerrit-review.googlesource.com/Documentation/config-labels.html#label_copyAllScoresIfNoChange 1518 // We don't have the information in Maintner to do this, though. 1519 // One approximation is: 1520 newCommit, err := m.CL.Project.GitCommit(commit) 1521 if err != nil { 1522 return nil, fmt.Errorf("LabelVotes: invalid Commit in footer on CL %v, meta-CL %x: %v", m.CL.Number, mc.Commit.Hash, err) 1523 } 1524 if lastCommit != nil { 1525 if !lastCommit.SameDiffStat(newCommit) { 1526 // TODO: this should really use 1527 // the Gerrit server's project 1528 // config, including the 1529 // All-Projects config, but 1530 // that's not in Maintner 1531 // either. 1532 delete(labels, "Run-TryBot") 1533 delete(labels, "TryBot-Result") 1534 } 1535 } 1536 lastCommit = newCommit 1537 } 1538 } 1539 1540 remain := footer 1541 for len(remain) > 0 { 1542 var labelEqVal string 1543 labelEqVal, remain = lineValueRest(remain, "Label: ") 1544 if labelEqVal != "" { 1545 label, value, whose := parseGerritLabelValue(labelEqVal) 1546 if label != "" { 1547 if whose == "" { 1548 whose = email 1549 } 1550 if label[0] == '-' { 1551 label = label[1:] 1552 if m := labels[label]; m != nil { 1553 delete(m, whose) 1554 } 1555 } else { 1556 m := labels[label] 1557 if m == nil { 1558 m = make(map[string]int8) 1559 labels[label] = m 1560 } 1561 m[whose] = value 1562 1563 } 1564 } 1565 } 1566 } 1567 } 1568 1569 return labels, nil 1570 } 1571 1572 // parseGerritLabelValue parses a Gerrit NoteDb "Label: ..." value. 1573 // It can take forms and return values such as: 1574 // 1575 // "Run-TryBot=+1" => ("Run-TryBot", 1, "") 1576 // "-Run-TryBot" => ("-Run-TryBot", 0, "") 1577 // "-Run-TryBot " => ("-Run-TryBot", 0, "") 1578 // "Run-TryBot=+1 Brad Fitzpatrick <5065@62eb7196-b449-3ce5-99f1-c037f21e1705>" => 1579 // ("Run-TryBot", 1, "5065@62eb7196-b449-3ce5-99f1-c037f21e1705") 1580 // "-TryBot-Result Gobot Gobot <5976@62eb7196-b449-3ce5-99f1-c037f21e1705>" => 1581 // ("-TryBot-Result", 0, "5976@62eb7196-b449-3ce5-99f1-c037f21e1705") 1582 func parseGerritLabelValue(v string) (label string, value int8, whose string) { 1583 space := strings.IndexByte(v, ' ') 1584 if space != -1 { 1585 v, whose = v[:space], v[space+1:] 1586 if i := strings.IndexByte(whose, '<'); i == -1 { 1587 whose = "" 1588 } else { 1589 whose = whose[i+1:] 1590 if i := strings.IndexByte(whose, '>'); i == -1 { 1591 whose = "" 1592 } else { 1593 whose = whose[:i] 1594 } 1595 } 1596 } 1597 v = strings.TrimSpace(v) 1598 if eq := strings.IndexByte(v, '='); eq == -1 { 1599 label = v 1600 } else { 1601 label = v[:eq] 1602 if n, err := strconv.ParseInt(v[eq+1:], 10, 8); err == nil { 1603 value = int8(n) 1604 } 1605 } 1606 return 1607 } 1608 1609 // GerritHashtags represents a set of "hashtags" on a Gerrit CL. 1610 // 1611 // The representation is a comma-separated string, to match Gerrit's 1612 // internal representation in the meta commits. To support both 1613 // forms of Gerrit's internal representation, whitespace is optional 1614 // around the commas. 1615 type GerritHashtags string 1616 1617 // Contains reports whether the hashtag t is in the set of tags s. 1618 func (s GerritHashtags) Contains(t string) bool { 1619 for len(s) > 0 { 1620 comma := strings.IndexByte(string(s), ',') 1621 if comma == -1 { 1622 return strings.TrimSpace(string(s)) == t 1623 } 1624 if strings.TrimSpace(string(s[:comma])) == t { 1625 return true 1626 } 1627 s = s[comma+1:] 1628 } 1629 return false 1630 } 1631 1632 // Foreach calls fn for each tag in the set s. 1633 func (s GerritHashtags) Foreach(fn func(string)) { 1634 for len(s) > 0 { 1635 comma := strings.IndexByte(string(s), ',') 1636 if comma == -1 { 1637 fn(strings.TrimSpace(string(s))) 1638 return 1639 } 1640 fn(strings.TrimSpace(string(s[:comma]))) 1641 s = s[comma+1:] 1642 } 1643 } 1644 1645 // Match reports whether fn returns true for any tag in the set s. 1646 // If fn returns true, iteration stops and Match returns true. 1647 func (s GerritHashtags) Match(fn func(string) bool) bool { 1648 for len(s) > 0 { 1649 comma := strings.IndexByte(string(s), ',') 1650 if comma == -1 { 1651 return fn(strings.TrimSpace(string(s))) 1652 } 1653 if fn(strings.TrimSpace(string(s[:comma]))) { 1654 return true 1655 } 1656 s = s[comma+1:] 1657 } 1658 return false 1659 } 1660 1661 // Len returns the number of tags in the set s. 1662 func (s GerritHashtags) Len() int { 1663 if s == "" { 1664 return 0 1665 } 1666 return strings.Count(string(s), ",") + 1 1667 }