github.com/anchore/syft@v1.38.2/.github/scripts/find_cache_paths.py (about)

     1  #!/usr/bin/env python3
     2  from __future__ import annotations
     3  
     4  import os
     5  import glob
     6  import sys
     7  import json
     8  import hashlib
     9  
    10  
    11  IGNORED_PREFIXES = []
    12  
    13  
    14  def find_fingerprints_and_check_dirs(base_dir):
    15      all_fingerprints = set(glob.glob(os.path.join(base_dir, '**', 'test*', '**', '*.fingerprint'), recursive=True))
    16  
    17      all_fingerprints = {os.path.relpath(fp) for fp in all_fingerprints
    18                          if not any(fp.startswith(prefix) for prefix in IGNORED_PREFIXES)}
    19  
    20      if not all_fingerprints:
    21          show("No .fingerprint files or cache directories found.")
    22          exit(1)
    23  
    24      missing_content = []
    25      valid_paths = set()
    26      fingerprint_contents = []
    27  
    28      for fingerprint in all_fingerprints:
    29          path = fingerprint.replace('.fingerprint', '')
    30  
    31          if not os.path.exists(path):
    32              missing_content.append(path)
    33              continue
    34  
    35          if not os.path.isdir(path):
    36              valid_paths.add(path)
    37              continue
    38  
    39          if os.listdir(path):
    40              valid_paths.add(path)
    41          else:
    42              missing_content.append(path)
    43  
    44          with open(fingerprint, 'r') as f:
    45              content = f.read().strip()
    46              fingerprint_contents.append((fingerprint, content))
    47  
    48      return sorted(valid_paths), missing_content, fingerprint_contents
    49  
    50  
    51  def parse_fingerprint_contents(fingerprint_content):
    52      input_map = {}
    53      for line in fingerprint_content.splitlines():
    54          digest, path = line.split()
    55          input_map[path] = digest
    56      return input_map
    57  
    58  
    59  def calculate_sha256(fingerprint_contents):
    60      sorted_fingerprint_contents = sorted(fingerprint_contents, key=lambda x: x[0])
    61  
    62      concatenated_contents = ''.join(content for _, content in sorted_fingerprint_contents)
    63  
    64      sha256_hash = hashlib.sha256(concatenated_contents.encode()).hexdigest()
    65  
    66      return sha256_hash
    67  
    68  
    69  def calculate_file_sha256(file_path):
    70      sha256_hash = hashlib.sha256()
    71      with open(file_path, 'rb') as f:
    72          for byte_block in iter(lambda: f.read(4096), b""):
    73              sha256_hash.update(byte_block)
    74      return sha256_hash.hexdigest()
    75  
    76  
    77  def show(*s: str):
    78      print(*s, file=sys.stderr)
    79  
    80  
    81  def main(file_path: str | None):
    82      base_dir = '.'
    83      valid_paths, missing_content, fingerprint_contents = find_fingerprints_and_check_dirs(base_dir)
    84  
    85      if missing_content:
    86          show("The following paths are missing or have no content, but have corresponding .fingerprint files:")
    87          for path in sorted(missing_content):
    88              show(f"- {path}")
    89          # when adding new cache directories there is a time where it is not possible to have this directory without
    90          # running the tests first... but this step is a prerequisite for running the tests. We should not block on this.
    91          # show("Please ensure these paths exist and have content if they are directories.")
    92          # exit(1)
    93  
    94      sha256_hash = calculate_sha256(fingerprint_contents)
    95  
    96      paths_with_digests = []
    97      for path in sorted(valid_paths):
    98          fingerprint_file = f"{path}.fingerprint"
    99          try:
   100              if os.path.exists(fingerprint_file):
   101                  file_digest = calculate_file_sha256(fingerprint_file)
   102  
   103                  # Parse the fingerprint file to get the digest/path tuples
   104                  with open(fingerprint_file, 'r') as f:
   105                      fingerprint_content = f.read().strip()
   106                      input_map = parse_fingerprint_contents(fingerprint_content)
   107  
   108                  paths_with_digests.append({
   109                      "path": path,
   110                      "digest": file_digest,
   111                      "input": input_map
   112                  })
   113  
   114          except Exception as e:
   115              show(f"Error processing {fingerprint_file}: {e}")
   116              raise e
   117  
   118  
   119      output = {
   120          "digest": sha256_hash,
   121          "paths": paths_with_digests
   122      }
   123  
   124      content = json.dumps(output, indent=2, sort_keys=True)
   125  
   126      if file_path:
   127          with open(file_path, 'w') as f:
   128              f.write(content)
   129  
   130      print(content)
   131  
   132  
   133  if __name__ == "__main__":
   134      file_path = None
   135      if len(sys.argv) > 1:
   136          file_path = sys.argv[1]
   137      main(file_path)