github.com/anchore/syft@v1.38.2/.github/scripts/labeler.py (about)

     1  #!/usr/bin/env python3
     2  
     3  from __future__ import annotations
     4  
     5  import sys
     6  import glob
     7  import subprocess
     8  import os
     9  import re
    10  
    11  DRY_RUN = False
    12  
    13  JSON_SCHEMA_LABEL = "json-schema"
    14  
    15  # note: we can't use "breaking-change" as the label since that might be applied manually by a user. This is a
    16  # distinct label that we can use to indicate that the label was applied (or removed) by automation.
    17  BREAKING_CHANGE_LABEL = "detected-breaking-change"
    18  
    19  
    20  def main(changed_files: str | None = None, merge_base_schema_files: str | None = None):
    21      global DRY_RUN
    22  
    23      pr_number = os.environ.get("GITHUB_PR_NUMBER")
    24      comment_file_path = os.environ.get("CI_COMMENT_FILE")
    25  
    26      if not comment_file_path:
    27          print("CI_COMMENT_FILE not set")
    28          sys.exit(1)
    29  
    30      if not pr_number:
    31          DRY_RUN = True
    32  
    33      if changed_files:
    34          DRY_RUN = True
    35  
    36          # read lines from file... this is useful for local testing
    37          with open(changed_files) as f:
    38              pr_changed_files = f.read().splitlines()
    39  
    40          with open(merge_base_schema_files) as f:
    41              og_json_schema_files = sort_json_schema_files(f.read().splitlines())
    42  
    43      else:
    44          if not is_ci():
    45              print("Not in CI")
    46              sys.exit(1)
    47  
    48          if not pr_number:
    49              print("Not a PR")
    50              sys.exit(1)
    51  
    52          pr_changed_files = get_pr_changed_files(pr_number)
    53          # since we are running this in the context of the pull_request_target, the checkout is the merge base..
    54          # that is the main branch of the original repo, NOT the branch in the forked repo (or branch in the target 
    55          # repo for non-forked PRs). This means we just need to list the current checkedout files to get a sense of
    56          # the changes before a merge.
    57          og_json_schema_files = list_json_schema_files()
    58  
    59      pr_json_schema_files = filter_to_schema_files(pr_changed_files)
    60  
    61      pr_labels = get_pr_labels(pr_number)
    62  
    63      # print("schema files in pr:   ", summarize_schema_files(pr_json_schema_files))
    64      # print("og schema files:      ", summarize_schema_files(og_json_schema_files))
    65  
    66      if not og_json_schema_files:
    67          print("No schema files found in merge base")
    68          sys.exit(1)
    69  
    70      # pr_json_schema_files = set of PR files are added, removed, and changed files
    71      new_schema_files = set(pr_json_schema_files) - set(og_json_schema_files)
    72      removed_or_modified_schema_files = set(pr_json_schema_files) - set(new_schema_files)
    73  
    74      print("new schemas:                ", summarize_schema_files(new_schema_files))
    75      print("removed or modified schemas:", summarize_schema_files(removed_or_modified_schema_files))
    76  
    77      # if there is a new or modified schema, we should add the "json-schema" label to the PR...
    78      if new_schema_files or removed_or_modified_schema_files:
    79          print("\nAdding json-schema label...")
    80          add_label(pr_number, JSON_SCHEMA_LABEL)
    81  
    82      else:
    83          if JSON_SCHEMA_LABEL in pr_labels:
    84              remove_label(pr_number, JSON_SCHEMA_LABEL)
    85  
    86      # new schema files should be scrutinized, comparing the latest and added versions to see if it's a breaking
    87      # change (major version bump). Warn about it on the PR via adding a breaking-change label...
    88      if is_breaking_change(new_schema_files, og_json_schema_files[-1]):
    89          print("\nBreaking change detected...")
    90          add_label(pr_number, BREAKING_CHANGE_LABEL)
    91      else:
    92          if BREAKING_CHANGE_LABEL in pr_labels:        
    93              remove_label(pr_number, BREAKING_CHANGE_LABEL)
    94  
    95      # modifying an existing schema could be a breaking change, we should warn about it on the PR via a comment...
    96      # removing schema files should never be allowed, we should warn about it on the PR via a comment...
    97      if removed_or_modified_schema_files:
    98          print("\nRemoved or modified schema detected...")
    99          schemas = sort_json_schema_files(list(removed_or_modified_schema_files))
   100          schemas_str = "\n".join([f" - {schema}" for schema in schemas])
   101          add_comment(comment_file_path, f"Detected modification or removal of existing json schemas:\n{schemas_str}", warning=True)
   102  
   103  
   104  def add_comment(comment_file_path: str, comment: str, warning: bool = False, important: bool = False):
   105      if warning or important:
   106          comment_lines = comment.splitlines()
   107          comment = "\n".join([f"> {line}" for line in comment_lines])
   108  
   109      if warning:
   110          comment = f"> [!WARNING]\n{comment}"    
   111      elif important:
   112          comment = f"> [!IMPORTANT]\n{comment}"
   113  
   114      # create any parent directories if they don't exist
   115      os.makedirs(os.path.dirname(comment_file_path), exist_ok=True)
   116  
   117      with open(comment_file_path, "w") as f:
   118          f.write(comment)
   119  
   120      print(f"Comment file contents: {comment_file_path}")
   121      print(comment)
   122  
   123  
   124  def add_label(pr_number: str, label: str):
   125      # run "gh pr edit --add-label <label>"
   126      result = run(f"gh pr edit {pr_number} --add-label {label}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   127      if result.returncode != 0:
   128          print(f"Unable to add '{label!r}' label to PR, error:")
   129          print(str(result.stderr))
   130          sys.exit(1)
   131  
   132  
   133  def remove_label(pr_number: str, label: str):
   134      # run "gh pr edit --remove-label <label>"
   135      result = run(f"gh pr edit {pr_number} --remove-label {label}", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
   136      if result.returncode != 0:
   137          print(f"Unable to remove '{label!r}' label from PR, error:")
   138          print(str(result.stderr))
   139          sys.exit(1)
   140  
   141  
   142  def major_version(semver: str) -> int:
   143      return int(semver.split(".")[0])
   144  
   145  
   146  def is_breaking_change(new_schema_files: set[str], latest_schema_file: str) -> bool:
   147      latest_major_version = major_version(get_semver(latest_schema_file))
   148      for file in new_schema_files:
   149          change_major_version = major_version(get_semver(file))
   150          if change_major_version > latest_major_version:
   151              return True
   152      return False
   153  
   154  
   155  def summarize_schema_files(files: list[str]) -> list[str]:
   156      return [get_semver(file) for file in files]
   157  
   158  
   159  def is_ci() -> bool:
   160      return "CI" in os.environ
   161  
   162  
   163  def get_pr_changed_files(pr_number: str) -> list[str]:
   164      result = run(f"gh pr view {pr_number} --json files --jq '.files.[].path'", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
   165      if result.returncode != 0:
   166          print("Unable to get list of changed files in PR")
   167          print(str(result.stderr))
   168          sys.exit(1)
   169      
   170      list_of_files = result.stdout.splitlines()
   171      return list_of_files
   172  
   173  
   174  def get_pr_labels(pr_number: str) -> list[str]:
   175      result = run(f"gh pr view {pr_number} --json labels --jq '.labels[].name'", shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
   176      if result.returncode != 0:
   177          print("Unable to get list of labels on PR")
   178          print(str(result.stderr))
   179          sys.exit(1)
   180      
   181      list_of_labels = result.stdout.splitlines()
   182      return list_of_labels
   183  
   184  
   185  def filter_to_schema_files(list_of_files: list[str]) -> list[str]:
   186      # get files matching "schema/json/schema-*.json"
   187      files = []
   188      for file in list_of_files:
   189          if re.match(r"^schema/json/schema-\d+\.\d+\.\d+\.json$", file):
   190              files.append(file)
   191      return sort_json_schema_files(files)
   192  
   193  
   194  def list_json_schema_files() -> list[str]:
   195      # list files in "schema/json" directory matching the pattern of "schema-*.json"
   196      # special case: always ignore the "latest" schema file
   197      return sort_json_schema_files([f for f in glob.glob("schema/json/schema-*.json") if "latest" not in f])
   198  
   199  
   200  def run(command: str,  **kwargs) -> subprocess.CompletedProcess:
   201      if DRY_RUN:
   202          print(f"[DRY RUN] {command}")
   203          return subprocess.CompletedProcess(args=[command], returncode=0)
   204      print(f"[RUN] {command}")
   205      return subprocess.run(command, **kwargs)
   206  
   207  
   208  def get_semver(input_file: str) -> str:
   209      return input_file.split("-")[1].split(".json")[0]
   210  
   211  
   212  def sort_json_schema_files(files: list[str]) -> list[str]:
   213      # sort files by schema version, where the input looks like "schema/json/schema-1.12.1.json"
   214      # we should sort by the semantic version embedded within the basename, not the string
   215      # so that "schema/json/schema-1.2.1.json" comes before "schema/json/schema-1.12.1.json".
   216      versions = [get_semver(file) for file in files if file]
   217      
   218      versions = sorted(versions, key=lambda s: [int(u) for u in s.split('.') if "." in s])
   219  
   220      return [f"schema/json/schema-{version}.json" for version in versions]
   221  
   222  
   223  # allow for test files that have line-by-line list of files:
   224  
   225  # .binny.yaml
   226  # .github/actions/bootstrap/action.yaml
   227  # .github/scripts/goreleaser-install.sh
   228  # .github/workflows/release.yaml
   229  # .github/workflows/update-bootstrap-tools.yml
   230  # .github/workflows/update-cpe-dictionary-index.yml
   231  # .github/workflows/update-stereoscope-release.yml
   232  # .github/workflows/validations.yaml
   233  # .gitignore
   234  # .goreleaser.yaml
   235  # Makefile
   236  # Taskfile.yaml
   237  # schema/cyclonedx/Makefile
   238  
   239  if __name__ == "__main__":
   240      # these are variables for a single file name that contains a list of files (line separated)
   241      changed_files = None
   242      merge_base_schema_files = None
   243  
   244      if len(sys.argv) > 2:
   245          changed_files = sys.argv[1]
   246          merge_base_schema_files = sys.argv[2]
   247  
   248      main(changed_files, merge_base_schema_files)
   249