github.com/anchore/syft@v1.38.2/.github/scripts/labeler.py (about) 1 #!/usr/bin/env python3 2 3 from __future__ import annotations 4 5 import sys 6 import glob 7 import subprocess 8 import os 9 import re 10 11 DRY_RUN = False 12 13 JSON_SCHEMA_LABEL = "json-schema" 14 15 # note: we can't use "breaking-change" as the label since that might be applied manually by a user. This is a 16 # distinct label that we can use to indicate that the label was applied (or removed) by automation. 17 BREAKING_CHANGE_LABEL = "detected-breaking-change" 18 19 20 def main(changed_files: str | None = None, merge_base_schema_files: str | None = None): 21 global DRY_RUN 22 23 pr_number = os.environ.get("GITHUB_PR_NUMBER") 24 comment_file_path = os.environ.get("CI_COMMENT_FILE") 25 26 if not comment_file_path: 27 print("CI_COMMENT_FILE not set") 28 sys.exit(1) 29 30 if not pr_number: 31 DRY_RUN = True 32 33 if changed_files: 34 DRY_RUN = True 35 36 # read lines from file... this is useful for local testing 37 with open(changed_files) as f: 38 pr_changed_files = f.read().splitlines() 39 40 with open(merge_base_schema_files) as f: 41 og_json_schema_files = sort_json_schema_files(f.read().splitlines()) 42 43 else: 44 if not is_ci(): 45 print("Not in CI") 46 sys.exit(1) 47 48 if not pr_number: 49 print("Not a PR") 50 sys.exit(1) 51 52 pr_changed_files = get_pr_changed_files(pr_number) 53 # since we are running this in the context of the pull_request_target, the checkout is the merge base.. 54 # that is the main branch of the original repo, NOT the branch in the forked repo (or branch in the target 55 # repo for non-forked PRs). This means we just need to list the current checkedout files to get a sense of 56 # the changes before a merge. 57 og_json_schema_files = list_json_schema_files() 58 59 pr_json_schema_files = filter_to_schema_files(pr_changed_files) 60 61 pr_labels = get_pr_labels(pr_number) 62 63 # print("schema files in pr: ", summarize_schema_files(pr_json_schema_files)) 64 # print("og schema files: ", summarize_schema_files(og_json_schema_files)) 65 66 if not og_json_schema_files: 67 print("No schema files found in merge base") 68 sys.exit(1) 69 70 # pr_json_schema_files = set of PR files are added, removed, and changed files 71 new_schema_files = set(pr_json_schema_files) - set(og_json_schema_files) 72 removed_or_modified_schema_files = set(pr_json_schema_files) - set(new_schema_files) 73 74 print("new schemas: ", summarize_schema_files(new_schema_files)) 75 print("removed or modified schemas:", summarize_schema_files(removed_or_modified_schema_files)) 76 77 # if there is a new or modified schema, we should add the "json-schema" label to the PR... 78 if new_schema_files or removed_or_modified_schema_files: 79 print("\nAdding json-schema label...") 80 add_label(pr_number, JSON_SCHEMA_LABEL) 81 82 else: 83 if JSON_SCHEMA_LABEL in pr_labels: 84 remove_label(pr_number, JSON_SCHEMA_LABEL) 85 86 # new schema files should be scrutinized, comparing the latest and added versions to see if it's a breaking 87 # change (major version bump). Warn about it on the PR via adding a breaking-change label... 88 if is_breaking_change(new_schema_files, og_json_schema_files[-1]): 89 print("\nBreaking change detected...") 90 add_label(pr_number, BREAKING_CHANGE_LABEL) 91 else: 92 if BREAKING_CHANGE_LABEL in pr_labels: 93 remove_label(pr_number, BREAKING_CHANGE_LABEL) 94 95 # modifying an existing schema could be a breaking change, we should warn about it on the PR via a comment... 96 # removing schema files should never be allowed, we should warn about it on the PR via a comment... 97 if removed_or_modified_schema_files: 98 print("\nRemoved or modified schema detected...") 99 schemas = sort_json_schema_files(list(removed_or_modified_schema_files)) 100 schemas_str = "\n".join([f" - {schema}" for schema in schemas]) 101 add_comment(comment_file_path, f"Detected modification or removal of existing json schemas:\n{schemas_str}", warning=True) 102 103 104 def add_comment(comment_file_path: str, comment: str, warning: bool = False, important: bool = False): 105 if warning or important: 106 comment_lines = comment.splitlines() 107 comment = "\n".join([f"> {line}" for line in comment_lines]) 108 109 if warning: 110 comment = f"> [!WARNING]\n{comment}" 111 elif important: 112 comment = f"> [!IMPORTANT]\n{comment}" 113 114 # create any parent directories if they don't exist 115 os.makedirs(os.path.dirname(comment_file_path), exist_ok=True) 116 117 with open(comment_file_path, "w") as f: 118 f.write(comment) 119 120 print(f"Comment file contents: {comment_file_path}") 121 print(comment) 122 123 124 def add_label(pr_number: str, label: str): 125 # run "gh pr edit --add-label <label>" 126 result = run(f"gh pr edit {pr_number} --add-label {label}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 127 if result.returncode != 0: 128 print(f"Unable to add '{label!r}' label to PR, error:") 129 print(str(result.stderr)) 130 sys.exit(1) 131 132 133 def remove_label(pr_number: str, label: str): 134 # run "gh pr edit --remove-label <label>" 135 result = run(f"gh pr edit {pr_number} --remove-label {label}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 136 if result.returncode != 0: 137 print(f"Unable to remove '{label!r}' label from PR, error:") 138 print(str(result.stderr)) 139 sys.exit(1) 140 141 142 def major_version(semver: str) -> int: 143 return int(semver.split(".")[0]) 144 145 146 def is_breaking_change(new_schema_files: set[str], latest_schema_file: str) -> bool: 147 latest_major_version = major_version(get_semver(latest_schema_file)) 148 for file in new_schema_files: 149 change_major_version = major_version(get_semver(file)) 150 if change_major_version > latest_major_version: 151 return True 152 return False 153 154 155 def summarize_schema_files(files: list[str]) -> list[str]: 156 return [get_semver(file) for file in files] 157 158 159 def is_ci() -> bool: 160 return "CI" in os.environ 161 162 163 def get_pr_changed_files(pr_number: str) -> list[str]: 164 result = run(f"gh pr view {pr_number} --json files --jq '.files.[].path'", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) 165 if result.returncode != 0: 166 print("Unable to get list of changed files in PR") 167 print(str(result.stderr)) 168 sys.exit(1) 169 170 list_of_files = result.stdout.splitlines() 171 return list_of_files 172 173 174 def get_pr_labels(pr_number: str) -> list[str]: 175 result = run(f"gh pr view {pr_number} --json labels --jq '.labels[].name'", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) 176 if result.returncode != 0: 177 print("Unable to get list of labels on PR") 178 print(str(result.stderr)) 179 sys.exit(1) 180 181 list_of_labels = result.stdout.splitlines() 182 return list_of_labels 183 184 185 def filter_to_schema_files(list_of_files: list[str]) -> list[str]: 186 # get files matching "schema/json/schema-*.json" 187 files = [] 188 for file in list_of_files: 189 if re.match(r"^schema/json/schema-\d+\.\d+\.\d+\.json$", file): 190 files.append(file) 191 return sort_json_schema_files(files) 192 193 194 def list_json_schema_files() -> list[str]: 195 # list files in "schema/json" directory matching the pattern of "schema-*.json" 196 # special case: always ignore the "latest" schema file 197 return sort_json_schema_files([f for f in glob.glob("schema/json/schema-*.json") if "latest" not in f]) 198 199 200 def run(command: str, **kwargs) -> subprocess.CompletedProcess: 201 if DRY_RUN: 202 print(f"[DRY RUN] {command}") 203 return subprocess.CompletedProcess(args=[command], returncode=0) 204 print(f"[RUN] {command}") 205 return subprocess.run(command, **kwargs) 206 207 208 def get_semver(input_file: str) -> str: 209 return input_file.split("-")[1].split(".json")[0] 210 211 212 def sort_json_schema_files(files: list[str]) -> list[str]: 213 # sort files by schema version, where the input looks like "schema/json/schema-1.12.1.json" 214 # we should sort by the semantic version embedded within the basename, not the string 215 # so that "schema/json/schema-1.2.1.json" comes before "schema/json/schema-1.12.1.json". 216 versions = [get_semver(file) for file in files if file] 217 218 versions = sorted(versions, key=lambda s: [int(u) for u in s.split('.') if "." in s]) 219 220 return [f"schema/json/schema-{version}.json" for version in versions] 221 222 223 # allow for test files that have line-by-line list of files: 224 225 # .binny.yaml 226 # .github/actions/bootstrap/action.yaml 227 # .github/scripts/goreleaser-install.sh 228 # .github/workflows/release.yaml 229 # .github/workflows/update-bootstrap-tools.yml 230 # .github/workflows/update-cpe-dictionary-index.yml 231 # .github/workflows/update-stereoscope-release.yml 232 # .github/workflows/validations.yaml 233 # .gitignore 234 # .goreleaser.yaml 235 # Makefile 236 # Taskfile.yaml 237 # schema/cyclonedx/Makefile 238 239 if __name__ == "__main__": 240 # these are variables for a single file name that contains a list of files (line separated) 241 changed_files = None 242 merge_base_schema_files = None 243 244 if len(sys.argv) > 2: 245 changed_files = sys.argv[1] 246 merge_base_schema_files = sys.argv[2] 247 248 main(changed_files, merge_base_schema_files) 249