github.com/containers/podman/v5@v5.1.0-rc1/hack/branch_commits.rb (about)

     1  #!/usr/bin/ruby
     2  
     3  require 'set'
     4  
     5  # Get commits in one branch, but not in another, accounting for cherry-picks.
     6  # Accepts two arguments: base branch and old branch. Commits in base branch that
     7  # are not in old branch will be reported.
     8  
     9  # Preface: I know exactly enough ruby to be dangerous with it.
    10  # For anyone reading this who is actually skilled at writing Ruby, I can only
    11  # say I'm very, very sorry.
    12  
    13  # Utility functions:
    14  
    15  # Check if a given Git branch exists
    16  def CheckBranchExists(branch)
    17    return `git branch --list #{branch}`.rstrip.empty?
    18  end
    19  
    20  # Returns author (email) and commit subject for the given hash
    21  def GetCommitInfo(hash)
    22    info = `git log -n 1 --format='%ae%n%s' #{hash}`.split("\n")
    23    if info.length != 2
    24      puts("Badly-formatted commit with hash #{hash}")
    25      exit(127)
    26    end
    27    return info[0], info[1]
    28  end
    29  
    30  # Actual script begins here
    31  
    32  if ARGV.length != 2
    33    puts("Must provide exactly 2 arguments, base branch and old branch")
    34    exit(127)
    35  end
    36  
    37  # Both branches must exist
    38  ARGV.each do |branch|
    39    if !CheckBranchExists(branch)
    40      puts("Branch #{branch} does not exist")
    41      exit(127)
    42    end
    43  end
    44  
    45  base = ARGV[0]
    46  old = ARGV[1]
    47  
    48  # Get a base list of commits
    49  commits = `git log --no-merges --format=%H #{base} ^#{old}`.split("\n")
    50  
    51  # Alright, now for the hacky bit.
    52  # We want to remove every commit with a shortlog precisely matching something in
    53  # the old branch. This is an effort to catch cherry-picks, where commit ID has
    54  # almost certainly changed because the committer is different (and possibly
    55  # conflicts needed to be resolved).
    56  # We will match also try and match author, but not committer (which is reset to
    57  # whoever did the cherry-pick). We will *not* match full commit body - I
    58  # routinely edit these when I fix cherry-pick conflicts to indicate that I made
    59  # changes. A more ambitious future committer could attempt to see if the body of
    60  # the commit message in the old branch is a subset of the full commit message
    61  # from the base branch, but there are potential performance implications in that
    62  # due to the size of the string comparison that would be needed.
    63  # This will not catch commits where the shortlog is deliberately altered as part
    64  # of the cherry pick... But we can just ask folks not to do that, I guess?
    65  # (A classic example of something this wouldn't catch: cherry-picking a commit
    66  # to a branch and then prepending the branch name to the commit subject. I see
    67  # this a lot in Github PR subjects, but fortunately not much at all in actual
    68  # commit subjects).
    69  
    70  # Begin by fetching commit author + subject for each commit in old branch.
    71  # Map each author to an array of potential commit subjects.
    72  oldIndex = {}
    73  
    74  # TODO: This could probably be made a whole lot more efficient by unifying the
    75  # GetCommitInfo bits into two big `git log --format` calls.
    76  # But I'm not really ambitious enough to do that...
    77  oldCommits = `git log --no-merges --format=%H #{old}`.split("\n")
    78  oldCommits.each do |hash|
    79    name, subject = GetCommitInfo(hash)
    80    if oldIndex[name] == nil
    81      oldIndex[name] = Set[]
    82    end
    83    oldIndex[name].add(subject)
    84  end
    85  
    86  # Go through our earlier commits list and check for matches.
    87  filtered = commits.reject do |hash|
    88    name, subject = GetCommitInfo(hash)
    89    oldIndex[name] != nil && oldIndex[name].include?(subject)
    90  end
    91  
    92  # We have now filtered out all commits we want to filter.
    93  # Now we just have to print all remaining commits.
    94  # This breaks the default pager, but we can just pipe to less.
    95  filtered.each do |hash|
    96    puts `git log -n 1 #{hash}`
    97    puts "\n"
    98  end