github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/scripts/release-notes.py (about)

     1  #! /usr/bin/env python3
     2  #
     3  # Show a compact release note summary of a range of Git commits.
     4  #
     5  # Example use: release-notes.py --help
     6  #
     7  # Note: the first commit in the range is excluded!
     8  #
     9  # Requires:
    10  #   - GitPython https://pypi.python.org/pypi/GitPython/
    11  #   - You need to configure your local repo to pull the PR refs from
    12  #     GitHub.  To do this, add a line like:
    13  #       fetch = +refs/pull/*/head:refs/pull/origin/*
    14  #     to the GitHub remote section of .git/config.
    15  #
    16  # Disclaimer: this program is provided without warranties of any kind,
    17  # including suitability for any purpose. The author(s) will not be
    18  # responsible if this script eats your left sock.
    19  #
    20  # Known limitations:
    21  #
    22  # - if different people with the same name contribute, this script
    23  #   will be confused. (it will merge their work under one entry).
    24  # - the list of aliases below must be manually modified when
    25  #   contributors change their git name and/or email address.
    26  #
    27  # Note: there are unit tests in the release-notes subdirectory!
    28  #
    29  # pylint: disable=line-too-long, invalid-name, missing-function-docstring, too-many-branches, redefined-outer-name
    30  
    31  import sys
    32  import itertools
    33  import re
    34  import datetime
    35  import time
    36  from gitdb import exc
    37  import subprocess
    38  import os.path
    39  
    40  from optparse import OptionParser
    41  from gitdb import exc
    42  from git import Repo
    43  from git.repo.fun import name_to_object
    44  from git.util import Stats
    45  import os.path
    46  
    47  #
    48  # Global behavior constants
    49  #
    50  
    51  # minimum sha length to disambiguate
    52  shamin = 9
    53  
    54  # Basic mailmap functionality using the AUTHORS file.
    55  mmre = re.compile(r'^(?P<name>.*?)\s+<(?P<addr>[^>]*)>(?P<aliases>(?:[^<]*<[^>]*>)*)$')
    56  mmare = re.compile('(?P<alias>[^<]*)<(?P<addr>[^>]*)>')
    57  crdb_folk = set()
    58  
    59  class P:
    60      def __init__(self, name, addr):
    61          self.name = name
    62          self.email = addr
    63          self.aliases = [(name, addr)]
    64          self.crdb = '@cockroachlabs.com' in addr
    65          if self.crdb:
    66              crdb_folk.add(self)
    67      def __repr__(self):
    68          return "%s <%s>" % (self.name, self.email)
    69      def __lt__(self, other):
    70          return self.name < other.name or (self.name == other.name and self.email < other.email)
    71  mmap_bycanon = {}
    72  mmap_byaddr = {}
    73  mmap_byname = {}
    74  def define_person(name, addr):
    75          p = P(name, addr)
    76          canon = (name, addr)
    77          if canon in mmap_bycanon:
    78              print('warning: duplicate person %r, ignoring', canon)
    79              return None
    80          mmap_bycanon[canon] = p
    81          byaddr = mmap_byaddr.get(addr, [])
    82          byaddr.append(p)
    83          mmap_byaddr[addr] = byaddr
    84          byname = mmap_byname.get(name, [])
    85          byname.append(p)
    86          mmap_byname[name] = byname
    87          return p
    88  
    89  if not os.path.exists('AUTHORS'):
    90      print('warning: AUTHORS missing in current directory.', file=sys.stderr)
    91      print('Maybe use "cd" to navigate to the working tree root.', file=sys.stderr)
    92  else:
    93      with open('AUTHORS', 'r') as f:
    94          for line in f.readlines():
    95              if line.strip().startswith('#'):
    96                  continue
    97              m = mmre.match(line)
    98              if m is None:
    99                  continue
   100              p = define_person(m.group('name'), m.group('addr'))
   101              if p is None:
   102                  continue
   103              p.crdb = '@cockroachlabs.com' in line
   104              if p.crdb:
   105                  crdb_folk.add(p)
   106              aliases = m.group('aliases')
   107              aliases = mmare.findall(aliases)
   108              for alias, addr in aliases:
   109                  name = alias.strip()
   110                  byaddr = mmap_byaddr.get(addr, [])
   111                  if p not in byaddr:
   112                      byaddr.append(p)
   113                  mmap_byaddr[addr] = byaddr
   114                  if name == '':
   115                      name = p.name
   116                  canon = (name, addr)
   117                  if canon in mmap_bycanon:
   118                      print('warning: duplicate alias %r, ignoring', canon)
   119                      continue
   120                  mmap_bycanon[canon] = p
   121                  p.aliases.append(canon)
   122                  byname = mmap_byname.get(name, [])
   123                  if p not in byname:
   124                      byname.append(p)
   125                  mmap_byname[name] = byname
   126  
   127  # lookup_person retrieves the main identity of a person given one of their
   128  # names or email aliases in the mailmap.
   129  def lookup_person(name, email):
   130      key = (name, email)
   131      if key in mmap_bycanon:
   132          # lucky case.
   133          return mmap_bycanon[key]
   134      # Name+email didn't work.
   135      # Let's see email next.
   136      if email in mmap_byaddr:
   137          candidates = mmap_byaddr[email]
   138          if len(candidates) > 1:
   139              print('warning: no direct name match for', (name, email),
   140                    'and addr', email, 'is ambiguous,',
   141                    'keeping as-is', file=sys.stderr)
   142              return define_person(name, email)
   143          return candidates[0]
   144      # Email didn't work either. That's not great.
   145      if name in mmap_byname:
   146          candidates = mmap_byname[name]
   147          if len(candidates) > 1:
   148              print('warning: no direct name match for', (name, email),
   149                    'and name', name, 'is ambiguous,',
   150                    'keeping as-is', file=sys.stderr)
   151              return define_person(name, email)
   152          return candidates[0]
   153      return define_person(name, email)
   154  
   155  # Section titles for release notes.
   156  relnotetitles = {
   157      'cli change': "Command-line changes",
   158      'sql change': "SQL language changes",
   159      'admin ui change': "Admin UI changes",
   160      'general change': "General changes",
   161      'build change': "Build changes",
   162      'enterprise change': "Enterprise edition changes",
   163      'backward-incompatible change': "Backward-incompatible changes",
   164      'performance improvement': "Performance improvements",
   165      'bug fix': "Bug fixes",
   166      'security update': "Security updates",
   167  }
   168  
   169  # Order in which to show the sections.
   170  relnote_sec_order = [
   171      'backward-incompatible change',
   172      'security update',
   173      'general change',
   174      'enterprise change',
   175      'sql change',
   176      'cli change',
   177      'admin ui change',
   178      'bug fix',
   179      'performance improvement',
   180      'build change',
   181  ]
   182  
   183  # Release note category common misspellings.
   184  cat_misspells = {
   185      'sql': 'sql change',
   186      'general': 'general change',
   187      'core change': 'general change',
   188      'bugfix': 'bug fix',
   189      'performance change': 'performance improvement',
   190      'performance': 'performance improvement',
   191      'ui': 'admin ui change',
   192      'backwards-incompatible change': 'backward-incompatible change',
   193      'enterprise': 'enterprise change',
   194      'security': 'security update',
   195      'security change': 'security update',
   196  }
   197  
   198  #
   199  # Release note format
   200  #
   201  
   202  # The following release note formats have been seen in the wild:
   203  #
   204  # Release note (xxx): yyy    <- canonical
   205  # Release Notes: None
   206  # Release note (xxx): yyy
   207  # Release note (xxx) : yyy
   208  # Release note: (xxx): yyy
   209  # Release note: xxx: yyy
   210  # Release note: (xxx) yyy
   211  # Release note: yyy (no category)
   212  # Release note (xxx, zzz): yyy
   213  norelnote = re.compile(r'^[rR]elease [nN]otes?: *[Nn]one', flags=re.M)
   214  # Captures :? (xxx) ?: yyy
   215  form1 = r':? *\((?P<cat1>[^)]*)\) *:?'
   216  # Captures : xxx: yyy - this must be careful not to capture too much, we just accept one or two words
   217  form2 = r': *(?P<cat2>[^ ]+(?: +[^ ]+)?) *:'
   218  # Captures : yyy - no category
   219  form3 = r':(?P<cat3>)'
   220  relnote = re.compile(r'(?:^|[\n\r])[rR]elease [nN]otes? *(?:' + form1 + '|' + form2 + '|' + form3 + r') *(?P<note>.*)$', flags=re.S)
   221  
   222  coauthor = re.compile(r'^Co-authored-by: (?P<name>[^<]*) <(?P<email>.*)>', flags=re.M)
   223  fixannot = re.compile(r'^([fF]ix(es|ed)?|[cC]lose(d|s)?) #', flags=re.M)
   224  
   225  #
   226  # Merge commit format
   227  #
   228  
   229  # The following merge commits have been seen in the wild:
   230  #
   231  # Merge pull request #XXXXX from ...      <- GitHub merges
   232  # .... (#XXXX)                            <- GitHub merges (alt format)
   233  # Merge #XXXXX #XXXXX #XXXXX              <- Bors merges
   234  merge_numbers = re.compile(r'^Merge( pull request)?(?P<numbers>( #[0-9]+)+)')
   235  simple_merge = re.compile(r'.*\((?P<numbers>#[0-9]+)\)$', re.M)
   236  
   237  #
   238  # Initialization / option parsing
   239  #
   240  
   241  parser = OptionParser()
   242  parser.add_option("-k", "--sort-key", dest="sort_key", default="title",
   243                    help="sort by KEY (pr, title, insertions, deletions, files, sha, date; default: title)", metavar="KEY")
   244  parser.add_option("-r", "--reverse", action="store_true", dest="reverse_sort", default=False,
   245                    help="reverse sort")
   246  parser.add_option("-f", "--from", dest="from_commit",
   247                    help="list history from COMMIT. Note: the first commit is excluded.", metavar="COMMIT")
   248  parser.add_option("-t", "--until", dest="until_commit", default="HEAD",
   249                    help="list history up and until COMMIT (default: HEAD)", metavar="COMMIT")
   250  parser.add_option("-p", "--pull-ref", dest="pull_ref_prefix", default="refs/pull/origin",
   251                    help="prefix for pull request refs (default: refs/pull/origin)", metavar="PREFIX")
   252  parser.add_option("--hide-unambiguous-shas", action="store_true", dest="hide_shas", default=False,
   253                    help="omit commit SHAs from the release notes and per-contributor sections")
   254  parser.add_option("--hide-per-contributor-section", action="store_true", dest="hide_per_contributor", default=False,
   255                    help="omit the per-contributor section")
   256  parser.add_option("--hide-downloads-section", action="store_true", dest="hide_downloads", default=False,
   257                    help="omit the email sign-up and downloads section")
   258  parser.add_option("--hide-header", action="store_true", dest="hide_header", default=False,
   259                    help="omit the title and date header")
   260  parser.add_option("--exclude-from", dest="exclude_from_commit",
   261                    help="exclude history starting after COMMIT. Note: COMMIT itself is excluded.", metavar="COMMIT")
   262  parser.add_option("--exclude-until", dest="exclude_until_commit",
   263                    help="exclude history ending at COMMIT", metavar="COMMIT")
   264  parser.add_option("--one-line", dest="one_line", action="store_true", default=False,
   265                    help="unwrap release notes on a single line")
   266  
   267  (options, args) = parser.parse_args()
   268  
   269  sortkey = options.sort_key
   270  revsort = options.reverse_sort
   271  pull_ref_prefix = options.pull_ref_prefix
   272  hideshas = options.hide_shas
   273  hidepercontributor = options.hide_per_contributor
   274  hidedownloads = options.hide_downloads
   275  hideheader = options.hide_header
   276  
   277  repo = Repo('.')
   278  heads = repo.heads
   279  
   280  
   281  def reformat_note(note_lines):
   282      sep = '\n'
   283      if options.one_line:
   284          sep = ' '
   285      return sep.join(note_lines).strip()
   286  
   287  
   288  # Check that pull_ref_prefix is valid
   289  testrefname = "%s/1" % pull_ref_prefix
   290  
   291  try:
   292      repo.commit(testrefname)
   293  except exc.ODBError:
   294      print("Unable to find pull request refs at %s." % pull_ref_prefix, file=sys.stderr)
   295      print("Is your repo set up to fetch them?  Try adding", file=sys.stderr)
   296      print("  fetch = +refs/pull/*/head:%s/*" % pull_ref_prefix, file=sys.stderr)
   297      print("to the GitHub remote section of .git/config.", file=sys.stderr)
   298      sys.exit(1)
   299  
   300  
   301  
   302  def find_commits(from_commit_ref, until_commit_ref):
   303      try:
   304          firstCommit = repo.commit(from_commit_ref)
   305      except exc.ODBError:
   306          print("Unable to find the first commit of the range.", file=sys.stderr)
   307          print("No ref named %s." % from_commit_ref, file=sys.stderr)
   308          sys.exit(1)
   309  
   310      try:
   311          finalCommit = repo.commit(until_commit_ref)
   312      except exc.ODBError:
   313          print("Unable to find the last commit of the range.", file=sys.stderr)
   314          print("No ref named %s." % until_commit_ref, file=sys.stderr)
   315          sys.exit(1)
   316  
   317      return firstCommit, finalCommit
   318  
   319  
   320  if not options.until_commit:
   321      print("no value specified with --until, try --until=xxxxx (without space after =)", file=sys.stderr)
   322      sys.exit(1)
   323  if not options.from_commit:
   324      print("no value specified with --from, try --from=xxxx (without space after =)", file=sys.stderr)
   325      sys.exit(1)
   326  
   327  firstCommit, commit = find_commits(options.from_commit, options.until_commit)
   328  if commit == firstCommit:
   329      print("Commit range is empty!", file=sys.stderr)
   330      print(parser.get_usage(), file=sys.stderr)
   331      print("Example use:", file=sys.stderr)
   332      print("  %s --help" % sys.argv[0], file=sys.stderr)
   333      print("  %s --from xxx >output.md" % sys.argv[0], file=sys.stderr)
   334      print("  %s --from xxx --until yyy >output.md" % sys.argv[0], file=sys.stderr)
   335      print("Note: the first commit is excluded. Use e.g.: --from <prev-release-tag> --until <new-release-candidate-sha>", file=sys.stderr)
   336      sys.exit(0)
   337  
   338  excludedFirst, excludedLast = None, None
   339  if options.exclude_from_commit or options.exclude_until_commit:
   340      if not options.exclude_from_commit or not options.exclude_until_commit:
   341          print("Both -xf and -xt must be specified, or not at all.")
   342          sys.exit(1)
   343      excludedFirst, excludedLast = find_commits(options.exclude_from_commit, options.exclude_until_commit)
   344  
   345  #
   346  # Reading data from repository
   347  #
   348  
   349  
   350  def identify_commit(c):
   351      return '%s ("%s", %s)' % (
   352          c.hexsha, c.message.split('\n', 1)[0],
   353          datetime.datetime.fromtimestamp(c.committed_date).ctime())
   354  
   355  
   356  def check_reachability(start, end):
   357      # Is the first commit reachable from the current one?
   358      base = repo.merge_base(start, end)
   359      if len(base) == 0:
   360          print("error: %s:%s\nand %s:%s\nhave no common ancestor" % (
   361              options.from_commit, identify_commit(start),
   362              options.until_commit, identify_commit(end)), file=sys.stderr)
   363          sys.exit(1)
   364      commonParent = base[0]
   365      if start != commonParent:
   366          print("warning: %s:%s\nis not an ancestor of %s:%s!" % (
   367              options.from_commit, identify_commit(start),
   368              options.until_commit, identify_commit(end)), file=sys.stderr)
   369          print(file=sys.stderr)
   370          ageindays = int((start.committed_date - commonParent.committed_date) / 86400)
   371          prevlen = sum((1 for x in repo.iter_commits(commonParent.hexsha + '...' + start.hexsha)))
   372          print("The first common ancestor is %s" % identify_commit(commonParent), file=sys.stderr)
   373          print("which is %d commits older than %s:%s\nand %d days older. Using that as origin." %\
   374                (prevlen, options.from_commit, identify_commit(start), ageindays), file=sys.stderr)
   375          print(file=sys.stderr)
   376          start = commonParent
   377      return start, end
   378  
   379  
   380  firstCommit, commit = check_reachability(firstCommit, commit)
   381  options.from_commit = firstCommit.hexsha
   382  
   383  
   384  def extract_release_notes(currentCommit):
   385      msglines = currentCommit.message.split('\n')
   386      curnote = []
   387      innote = False
   388      foundnote = False
   389      cat = None
   390      notes = []
   391      for line in msglines:
   392          m = coauthor.search(line)
   393          if m is not None:
   394              # A Co-authored-line finishes the parsing of the commit message,
   395              # because it's included at the end only.
   396              break
   397  
   398          m = fixannot.search(line)
   399          if m is not None:
   400              # Fix/Close etc. Ignore.
   401              continue
   402  
   403          m = norelnote.search(line)
   404          if m is not None:
   405              # Release note: None
   406              #
   407              # Remember we found a note (so the commit is not marked as "missing
   408              # a release note"), but we won't collect it.
   409              foundnote = True
   410              continue
   411  
   412          m = relnote.search(line)
   413          if m is None:
   414              # Current line does not contain a release note separator.
   415              # If we were already collecting a note, continue collecting it.
   416              if innote:
   417                  curnote.append(line)
   418              continue
   419  
   420          # We have a release note boundary. If we were collecting a
   421          # note already, complete it.
   422          if innote:
   423              notes.append((cat, reformat_note(curnote)))
   424              curnote = []
   425              innote = False
   426  
   427          # Start a new release note.
   428  
   429          firstline = m.group('note').strip()
   430          if firstline.lower() == 'none':
   431              # Release note: none - there's no note yet.
   432              continue
   433          foundnote = True
   434          innote = True
   435  
   436          # Capitalize the first line.
   437          if firstline != "":
   438              firstline = firstline[0].upper() + firstline[1:]
   439  
   440          curnote = [firstline]
   441          cat = m.group('cat1')
   442          if cat is None:
   443              cat = m.group('cat2')
   444          if cat is None:
   445              cat = 'missing category'
   446          # Normalize to tolerate various capitalizations.
   447          cat = cat.lower()
   448          # If there are multiple categories separated by commas or slashes, use the first as grouping key.
   449          cat = cat.split(',', 1)[0]
   450          cat = cat.split('/', 1)[0]
   451          # If there is any misspell, correct it.
   452          if cat in cat_misspells:
   453              cat = cat_misspells[cat]
   454  
   455      if innote:
   456          notes.append((cat, reformat_note(curnote)))
   457  
   458      return foundnote, notes
   459  
   460  
   461  spinner = itertools.cycle(['/', '-', '\\', '|'])
   462  spin_counter = 0
   463  
   464  
   465  def spin():
   466      global spin_counter
   467      # Display a progress bar
   468      spin_counter += 1
   469      if spin_counter % 10 == 0:
   470          if spin_counter % 100 == 0:
   471              print("\b..", end='', file=sys.stderr)
   472          print("\b", end='', file=sys.stderr)
   473          print(next(spinner), end='', file=sys.stderr)
   474          sys.stderr.flush()
   475  
   476  
   477  def get_direct_history(startCommit, lastCommit):
   478      history = []
   479      for c in repo.iter_commits(startCommit.hexsha + '..' + lastCommit.hexsha, first_parent=True):
   480          history.append(c)
   481      return history
   482  
   483  
   484  excluded_notes = set()
   485  if excludedFirst is not None:
   486      #
   487      # Collect all the notes to exclude during collection below.
   488      #
   489      print("Collecting EXCLUDED release notes from\n%s\nuntil\n%s" %
   490            (identify_commit(excludedFirst), identify_commit(excludedLast)), file=sys.stderr)
   491  
   492      # First ensure that the loop below will terminate.
   493      excludedFirst, excludedLast = check_reachability(excludedFirst, excludedLast)
   494      # Collect all the merge points, so we can measure progress.
   495      mergepoints = get_direct_history(excludedFirst, excludedLast)
   496  
   497      # Now collect all commits.
   498      print("Collecting EXCLUDED release notes...", file=sys.stderr)
   499      i = 0
   500      progress = 0
   501      lastTime = time.time()
   502      for c in repo.iter_commits(excludedFirst.hexsha + '..' + excludedLast.hexsha):
   503          progress = int(100. * float(i) / len(mergepoints))
   504          newTime = time.time()
   505          if newTime >= lastTime + 5:
   506              print("\b%d%%.." % progress, file=sys.stderr, end='')
   507              lastTime = newTime
   508          i += 1
   509  
   510          spin()
   511          # Collect the release notes in that commit.
   512          _, notes = extract_release_notes(c)
   513          for cat, note in notes:
   514              excluded_notes.add((cat, note))
   515  
   516      print("\b100%\n", file=sys.stderr)
   517  
   518  print("Collecting release notes from\n%s\nuntil\n%s" % (identify_commit(firstCommit), identify_commit(commit)), file=sys.stderr)
   519  
   520  release_notes = {}
   521  missing_release_notes = []
   522  
   523  
   524  def collect_authors(commit):
   525      authors = set()
   526      author = lookup_person(commit.author.name, commit.author.email)
   527      if author.name != 'GitHub':
   528          authors.add(author)
   529      author = lookup_person(commit.committer.name, commit.committer.email)
   530      if author.name != 'GitHub':
   531          authors.add(author)
   532      for m in coauthor.finditer(commit.message):
   533          aname = m.group('name').strip()
   534          amail = m.group('email').strip()
   535          author = lookup_person(aname, amail)
   536          authors.add(author)
   537      return authors
   538  
   539  
   540  def process_release_notes(pr, title, commit):
   541      authors = collect_authors(commit)
   542  
   543      foundnote, notes = extract_release_notes(commit)
   544  
   545      # At the end the notes will be presented in reverse order, because
   546      # we explore the commits in reverse order. However within 1 commit
   547      # the notes are in the correct order. So reverse them upfront here,
   548      # so that the 2nd reverse gets them in the right order again.
   549      for cat, note in reversed(notes):
   550          if (cat, note) not in excluded_notes:
   551              completenote(commit, cat, note, authors, pr, title)
   552  
   553      missing_item = None
   554      if not foundnote:
   555          # Missing release note. Keep track for later.
   556          missing_item = makeitem(pr, title, commit.hexsha[:shamin], authors)
   557      return missing_item, authors
   558  
   559  
   560  def makeitem(pr, prtitle, sha, authors):
   561      return {'authors': authors,
   562              'sha': sha,
   563              'pr': pr,
   564              'title': prtitle,
   565              'note': None}
   566  
   567  
   568  def completenote(commit, cat, notemsg, authors, pr, title):
   569      item = makeitem(pr, title, commit.hexsha[:shamin], authors)
   570      item['note'] = notemsg
   571  
   572      # Now collect per category.
   573      catnotes = release_notes.get(cat, [])
   574      catnotes.append(item)
   575      release_notes[cat] = catnotes
   576  
   577  
   578  per_group_history = {}
   579  individual_authors = set()
   580  allprs = set()
   581  
   582  
   583  # This function groups and counts all the commits that belong to a particular PR.
   584  # Some description is in order regarding the logic here: it should visit all
   585  # commits that are on the PR and only on the PR. If there's some secondary
   586  # branch merge included on the PR, as long as those commits don't otherwise end
   587  # up reachable from the target branch, they'll be included.  If there's a back-
   588  # merge from the target branch, that should be excluded.
   589  #
   590  # Examples:
   591  #
   592  # ### secondary branch merged into PR
   593  #
   594  # Dev branched off of K, made a commit J, made a commit G while someone else
   595  # committed H, merged H from the secondary branch to the topic branch in E,
   596  # made a final commit in C, then merged to master in A.
   597  #
   598  #     A <-- master
   599  #     |\
   600  #     | \
   601  #     B  C <-- PR tip
   602  #     |  |
   603  #     |  |
   604  #     D  E <-- secondary merge
   605  #     |  |\
   606  #     |  | \
   607  #     F  G  H <-- secondary branch
   608  #     |  | /
   609  #     |  |/
   610  #     I  J
   611  #     | /
   612  #     |/
   613  #     K <-- merge base
   614  #
   615  # C, E, G, H, and J will each be checked.  None of them are ancestors of B,
   616  # so they will all be visited. E will be not be counted because the message
   617  # starts with "Merge", so in the end C, G, H, and J will be included.
   618  #
   619  # ### back-merge from target branch
   620  #
   621  # Dev branched off H, made one commit G, merged the latest F from master in E,
   622  # made one final commit in C, then merged the PR.
   623  #
   624  #     A <-- master
   625  #     |\
   626  #     | \
   627  #     B  C <-- PR tip
   628  #     |  |
   629  #     |  |
   630  #     D  E <-- back-merge
   631  #     | /|
   632  #     |/ |
   633  #     F  G
   634  #     | /
   635  #     |/
   636  #     H <-- merge base
   637  #
   638  # C, E, F, and G will each be checked. F is an ancestor of B, so it will be
   639  # excluded. E starts with "Merge", so it will not be counted. Only C and G will
   640  # have statistics included.
   641  def analyze_pr(merge, pr):
   642      allprs.add(pr)
   643  
   644      refname = pull_ref_prefix + "/" + pr[1:]
   645      tip = name_to_object(repo, refname)
   646  
   647      noteexpr = re.compile("^%s: (?P<message>.*) r=.* a=.*" % pr[1:], flags=re.M)
   648      m = noteexpr.search(merge.message)
   649      title = ''
   650      if m is None:
   651          # GitHub merge
   652          title = merge.message.split('\n', 3)[2]
   653      else:
   654          # Bors merge
   655          title = m.group('message')
   656      title = title.strip()
   657  
   658      merge_base_result = repo.merge_base(merge.parents[0], tip)
   659      if len(merge_base_result) == 0:
   660          print("uh-oh!  can't find merge base!  pr", pr, file=sys.stderr)
   661          sys.exit(-1)
   662  
   663      merge_base = merge_base_result[0]
   664  
   665      seen_commits = set()
   666  
   667      missing_items = []
   668      authors = set()
   669      ncommits = 0
   670      for commit in repo.iter_commits(merge_base.hexsha + '..' + tip.hexsha):
   671          spin()
   672  
   673          if commit in seen_commits:
   674              # We may be seeing the same commit twice if a feature branch has
   675              # been forked in sub-branches. Just skip over what we've seen
   676              # already.
   677              continue
   678          seen_commits.add(commit)
   679  
   680          if not commit.message.startswith("Merge"):
   681              missing_item, prauthors = process_release_notes(pr, title, commit)
   682              authors.update(prauthors)
   683              ncommits += 1
   684              if missing_item is not None:
   685                  missing_items.append(missing_item)
   686  
   687      if ncommits == len(missing_items):
   688          # None of the commits found had a release note. List them.
   689          for item in missing_items:
   690              missing_release_notes.append(item)
   691  
   692      text = repo.git.diff(merge_base.hexsha, tip.hexsha, '--', numstat=True)
   693      stats = Stats._list_from_string(repo, text)
   694  
   695      collect_item(pr, title, merge.hexsha[:shamin], ncommits, authors, stats.total, merge.committed_date)
   696  
   697  
   698  
   699  def collect_item(pr, prtitle, sha, ncommits, authors, stats, prts):
   700      individual_authors.update(authors)
   701      if len(authors) == 0:
   702          authors.add("Unknown Author")
   703      item = makeitem(pr, prtitle, sha, authors)
   704      item.update({'ncommits': ncommits,
   705                   'insertions': stats['insertions'],
   706                   'deletions': stats['deletions'],
   707                   'files': stats['files'],
   708                   'lines': stats['lines'],
   709                   'date': datetime.date.fromtimestamp(prts).isoformat(),
   710                   })
   711  
   712      al = item['authors']
   713      k = str(sorted(al))
   714      history = per_group_history.get(k, (al, []))
   715      history[1].append(item)
   716      per_group_history[k] = history
   717  
   718  
   719  
   720  def analyze_standalone_commit(commit):
   721      # Some random out-of-branch commit. Let's not forget them.
   722      authors = collect_authors(commit)
   723      title = commit.message.split('\n', 1)[0].strip()
   724      item = makeitem('#unknown', title, commit.hexsha[:shamin], authors)
   725      missing_release_notes.append(item)
   726      collect_item('#unknown', title, commit.hexsha[:shamin], 1, authors, commit.stats.total, commit.committed_date)
   727  
   728  
   729  # Collect all the merge points so we can report progress.
   730  mergepoints = get_direct_history(firstCommit, commit)
   731  i = 0
   732  progress = 0
   733  lastTime = time.time()
   734  for commit in mergepoints:
   735      progress = int(100. * float(i) / len(mergepoints))
   736      newTime = time.time()
   737      if newTime >= lastTime + 5:
   738          print("\b.%d%%\n." % progress, file=sys.stderr, end='')
   739          lastTime = newTime
   740      i += 1
   741      spin()
   742  
   743      ctime = datetime.datetime.fromtimestamp(commit.committed_date).ctime()
   744      numbermatch = merge_numbers.search(commit.message)
   745      if numbermatch is None:
   746          # Try again with the alternate format.
   747          firstline = commit.message.split('\n', 1)[0]
   748          numbermatch = simple_merge.search(firstline)
   749      # Analyze the commit
   750      if numbermatch is not None:
   751          prs = numbermatch.group("numbers").strip().split(" ")
   752          for pr in prs:
   753              print("                                \r%s (%s) " % (pr, ctime), end='', file=sys.stderr)
   754              analyze_pr(commit, pr)
   755      else:
   756          print("                                \r%s (%s) " % (commit.hexsha[:shamin], ctime), end='', file=sys.stderr)
   757          analyze_standalone_commit(commit)
   758  
   759  
   760  print("\b\nAnalyzing authors...", file=sys.stderr)
   761  sys.stderr.flush()
   762  
   763  allgroups = list(per_group_history.keys())
   764  allgroups.sort(key=lambda x: x.lower())
   765  
   766  print("\b\nComputing first-time contributors...", end='', file=sys.stderr)
   767  
   768  ext_contributors = individual_authors - crdb_folk
   769  firsttime_contributors = []
   770  for a in individual_authors:
   771      # Find all aliases known for this person
   772      aliases = a.aliases
   773      # Collect the history for every alias
   774      hist = b''
   775      for al in aliases:
   776          spin()
   777          cmd = subprocess.run(["git", "log", "--author=%s <%s>" % al, options.from_commit, '-n', '1'], stdout=subprocess.PIPE, check=True)
   778          hist += cmd.stdout
   779      if len(hist) == 0:
   780          # No commit from that author older than the first commit
   781          # selected, so that's a first-time author.
   782          firsttime_contributors.append(a)
   783  
   784  print("\b\n", file=sys.stderr)
   785  sys.stderr.flush()
   786  
   787  #
   788  # Presentation of results.
   789  #
   790  
   791  # Print the release notes.
   792  
   793  # Start with known sections.
   794  
   795  current_version = subprocess.check_output(["git", "describe", "--tags", "--match=v[0-9]*", options.until_commit], universal_newlines=True).strip()
   796  previous_version = subprocess.check_output(["git", "describe", "--tags", "--match=v[0-9]*", options.from_commit], universal_newlines=True).strip()
   797  
   798  if not hideheader:
   799      print("---")
   800      print("title: What&#39;s New in", current_version)
   801      print("toc: true")
   802      print("summary: Additions and changes in CockroachDB version", current_version, "since version", previous_version)
   803      print("---")
   804      print()
   805      print("## " + time.strftime("%B %d, %Y"))
   806      print()
   807  
   808  # Print the release notes sign-up and Downloads section.
   809  
   810  if not hidedownloads:
   811      print("""Get future release notes emailed to you:
   812  
   813  <div class="hubspot-install-form install-form-1 clearfix">
   814      <script>
   815          hbspt.forms.create({
   816              css: '',
   817              cssClass: 'install-form',
   818              portalId: '1753393',
   819              formId: '39686297-81d2-45e7-a73f-55a596a8d5ff',
   820              formInstanceId: 1,
   821              target: '.install-form-1'
   822          });
   823      </script>
   824  </div>""")
   825      print()
   826  
   827      print("""### Downloads
   828  
   829  <div id="os-tabs" class="clearfix">
   830      <a href="https://binaries.cockroachdb.com/cockroach-""" + current_version + """.darwin-10.9-amd64.tgz"><button id="mac" data-eventcategory="mac-binary-release-notes">Mac</button></a>
   831      <a href="https://binaries.cockroachdb.com/cockroach-""" + current_version + """.linux-amd64.tgz"><button id="linux" data-eventcategory="linux-binary-release-notes">Linux</button></a>
   832      <a href="https://binaries.cockroachdb.com/cockroach-""" + current_version + """.windows-6.2-amd64.zip"><button id="windows" data-eventcategory="windows-binary-release-notes">Windows</button></a>
   833      <a href="https://binaries.cockroachdb.com/cockroach-""" + current_version + """.src.tgz"><button id="source" data-eventcategory="source-release-notes">Source</button></a>
   834  </div>
   835  """)
   836  
   837      print("""### Docker image
   838  
   839  {% include copy-clipboard.html %}
   840  ~~~shell
   841  $ docker pull cockroachdb/cockroach""" + ("-unstable:" if "-" in current_version else ":") + current_version + """
   842  ~~~
   843  """)
   844      print()
   845  
   846  seenshas = set()
   847  seenprs = set()
   848  
   849  
   850  def renderlinks(item):
   851      ret = '[%(pr)s][%(pr)s]' % item
   852      seenprs.add(item['pr'])
   853      if not hideshas:
   854          ret += ' [%(sha)s][%(sha)s]' % item
   855          seenshas.add(item['sha'])
   856      return ret
   857  
   858  
   859  for sec in relnote_sec_order:
   860      r = release_notes.get(sec, None)
   861      if r is None:
   862          # No change in this section, nothing to print.
   863          continue
   864      sectitle = relnotetitles[sec]
   865      print("###", sectitle)
   866      print()
   867  
   868      for item in reversed(r):
   869          print("-", item['note'].replace('\n', '\n  '), renderlinks(item))
   870  
   871      print()
   872  
   873  extrasec = set()
   874  for sec in release_notes:
   875      if sec in relnote_sec_order:
   876          # already handled above, don't do anything.
   877          continue
   878      extrasec.add(sec)
   879  if len(extrasec) > 0 or len(missing_release_notes) > 0:
   880      print("### Miscellaneous")
   881      print()
   882  if len(extrasec) > 0:
   883      extrasec_sorted = sorted(list(extrasec))
   884      for extrasec in extrasec_sorted:
   885          print("#### %s" % extrasec.capitalize())
   886          print()
   887          for item in release_notes[extrasec]:
   888              print("-", item['note'].replace('\n', '\n  '), renderlinks(item))
   889          print()
   890  
   891  if len(missing_release_notes) > 0:
   892      print("#### Changes without release note annotation")
   893      print()
   894      for item in missing_release_notes:
   895          authors = ', '.join(str(x) for x in sorted(item['authors']))
   896          print("- [%(pr)s][%(pr)s] [%(sha)s][%(sha)s] %(title)s" % item, "(%s)" % authors)
   897          seenshas.add(item['sha'])
   898          seenprs.add(item['pr'])
   899      print()
   900  
   901  # Print the Doc Updates section.
   902  print("### Doc updates")
   903  print()
   904  print("Docs team: Please add these manually.")
   905  print()
   906  
   907  # Print the Contributors section.
   908  print("### Contributors")
   909  print()
   910  print("This release includes %d merged PR%s by %s author%s." %
   911        (len(allprs), len(allprs) != 1 and "s" or "",
   912         len(individual_authors), (len(individual_authors) != 1 and "s" or "")))
   913  
   914  ext_contributors = individual_authors - crdb_folk
   915  
   916  notified_authors = sorted(set(ext_contributors) | set(firsttime_contributors))
   917  if len(notified_authors) > 0:
   918      print("We would like to thank the following contributors from the CockroachDB community:")
   919      print()
   920      for person in notified_authors:
   921          print("-", person.name, end='')
   922          if person in firsttime_contributors:
   923              annot = ""
   924              if person.crdb:
   925                  annot = ", CockroachDB team member"
   926              print(" (first-time contributor%s)" % annot, end='')
   927          print()
   928  print()
   929  
   930  # Print the per-author contribution list.
   931  if not hidepercontributor:
   932      print("### PRs merged by contributors")
   933      print()
   934      if not hideshas:
   935          fmt = "  - %(date)s [%(pr)-6s][%(pr)-6s] [%(sha)s][%(sha)s] (+%(insertions)4d -%(deletions)4d ~%(lines)4d/%(files)2d) %(title)s"
   936      else:
   937          fmt = "  - %(date)s [%(pr)-6s][%(pr)-6s] (+%(insertions)4d -%(deletions)4d ~%(lines)4d/%(files)2d) %(title)s"
   938  
   939      for group in allgroups:
   940          al, items = per_group_history[group]
   941          items.sort(key=lambda x: x[sortkey], reverse=not revsort)
   942          print("- %s:" % ', '.join(a.name for a in sorted(al)))
   943          for item in items:
   944              print(fmt % item, end='')
   945              if not hideshas:
   946                  seenshas.add(item['sha'])
   947              seenprs.add(item['pr'])
   948  
   949              ncommits = item['ncommits']
   950              if ncommits > 1:
   951                  print(" (", end='')
   952                  print("%d commits" % ncommits, end='')
   953                  print(")", end='')
   954              print()
   955          print()
   956      print()
   957  
   958  # Link the PRs and SHAs
   959  for pr in sorted(seenprs):
   960      print("[%s]: https://github.com/cockroachdb/cockroach/pull/%s" % (pr, pr[1:]))
   961  for sha in sorted(seenshas):
   962      print("[%s]: https://github.com/cockroachdb/cockroach/commit/%s" % (sha, sha))
   963  print()