github.com/containers/podman/v5@v5.1.0-rc1/hack/branch_commits.rb (about) 1 #!/usr/bin/ruby 2 3 require 'set' 4 5 # Get commits in one branch, but not in another, accounting for cherry-picks. 6 # Accepts two arguments: base branch and old branch. Commits in base branch that 7 # are not in old branch will be reported. 8 9 # Preface: I know exactly enough ruby to be dangerous with it. 10 # For anyone reading this who is actually skilled at writing Ruby, I can only 11 # say I'm very, very sorry. 12 13 # Utility functions: 14 15 # Check if a given Git branch exists 16 def CheckBranchExists(branch) 17 return `git branch --list #{branch}`.rstrip.empty? 18 end 19 20 # Returns author (email) and commit subject for the given hash 21 def GetCommitInfo(hash) 22 info = `git log -n 1 --format='%ae%n%s' #{hash}`.split("\n") 23 if info.length != 2 24 puts("Badly-formatted commit with hash #{hash}") 25 exit(127) 26 end 27 return info[0], info[1] 28 end 29 30 # Actual script begins here 31 32 if ARGV.length != 2 33 puts("Must provide exactly 2 arguments, base branch and old branch") 34 exit(127) 35 end 36 37 # Both branches must exist 38 ARGV.each do |branch| 39 if !CheckBranchExists(branch) 40 puts("Branch #{branch} does not exist") 41 exit(127) 42 end 43 end 44 45 base = ARGV[0] 46 old = ARGV[1] 47 48 # Get a base list of commits 49 commits = `git log --no-merges --format=%H #{base} ^#{old}`.split("\n") 50 51 # Alright, now for the hacky bit. 52 # We want to remove every commit with a shortlog precisely matching something in 53 # the old branch. This is an effort to catch cherry-picks, where commit ID has 54 # almost certainly changed because the committer is different (and possibly 55 # conflicts needed to be resolved). 56 # We will match also try and match author, but not committer (which is reset to 57 # whoever did the cherry-pick). We will *not* match full commit body - I 58 # routinely edit these when I fix cherry-pick conflicts to indicate that I made 59 # changes. A more ambitious future committer could attempt to see if the body of 60 # the commit message in the old branch is a subset of the full commit message 61 # from the base branch, but there are potential performance implications in that 62 # due to the size of the string comparison that would be needed. 63 # This will not catch commits where the shortlog is deliberately altered as part 64 # of the cherry pick... But we can just ask folks not to do that, I guess? 65 # (A classic example of something this wouldn't catch: cherry-picking a commit 66 # to a branch and then prepending the branch name to the commit subject. I see 67 # this a lot in Github PR subjects, but fortunately not much at all in actual 68 # commit subjects). 69 70 # Begin by fetching commit author + subject for each commit in old branch. 71 # Map each author to an array of potential commit subjects. 72 oldIndex = {} 73 74 # TODO: This could probably be made a whole lot more efficient by unifying the 75 # GetCommitInfo bits into two big `git log --format` calls. 76 # But I'm not really ambitious enough to do that... 77 oldCommits = `git log --no-merges --format=%H #{old}`.split("\n") 78 oldCommits.each do |hash| 79 name, subject = GetCommitInfo(hash) 80 if oldIndex[name] == nil 81 oldIndex[name] = Set[] 82 end 83 oldIndex[name].add(subject) 84 end 85 86 # Go through our earlier commits list and check for matches. 87 filtered = commits.reject do |hash| 88 name, subject = GetCommitInfo(hash) 89 oldIndex[name] != nil && oldIndex[name].include?(subject) 90 end 91 92 # We have now filtered out all commits we want to filter. 93 # Now we just have to print all remaining commits. 94 # This breaks the default pager, but we can just pipe to less. 95 filtered.each do |hash| 96 puts `git log -n 1 #{hash}` 97 puts "\n" 98 end