github.com/anchore/syft@v1.38.2/.github/scripts/find_cache_paths.py (about) 1 #!/usr/bin/env python3 2 from __future__ import annotations 3 4 import os 5 import glob 6 import sys 7 import json 8 import hashlib 9 10 11 IGNORED_PREFIXES = [] 12 13 14 def find_fingerprints_and_check_dirs(base_dir): 15 all_fingerprints = set(glob.glob(os.path.join(base_dir, '**', 'test*', '**', '*.fingerprint'), recursive=True)) 16 17 all_fingerprints = {os.path.relpath(fp) for fp in all_fingerprints 18 if not any(fp.startswith(prefix) for prefix in IGNORED_PREFIXES)} 19 20 if not all_fingerprints: 21 show("No .fingerprint files or cache directories found.") 22 exit(1) 23 24 missing_content = [] 25 valid_paths = set() 26 fingerprint_contents = [] 27 28 for fingerprint in all_fingerprints: 29 path = fingerprint.replace('.fingerprint', '') 30 31 if not os.path.exists(path): 32 missing_content.append(path) 33 continue 34 35 if not os.path.isdir(path): 36 valid_paths.add(path) 37 continue 38 39 if os.listdir(path): 40 valid_paths.add(path) 41 else: 42 missing_content.append(path) 43 44 with open(fingerprint, 'r') as f: 45 content = f.read().strip() 46 fingerprint_contents.append((fingerprint, content)) 47 48 return sorted(valid_paths), missing_content, fingerprint_contents 49 50 51 def parse_fingerprint_contents(fingerprint_content): 52 input_map = {} 53 for line in fingerprint_content.splitlines(): 54 digest, path = line.split() 55 input_map[path] = digest 56 return input_map 57 58 59 def calculate_sha256(fingerprint_contents): 60 sorted_fingerprint_contents = sorted(fingerprint_contents, key=lambda x: x[0]) 61 62 concatenated_contents = ''.join(content for _, content in sorted_fingerprint_contents) 63 64 sha256_hash = hashlib.sha256(concatenated_contents.encode()).hexdigest() 65 66 return sha256_hash 67 68 69 def calculate_file_sha256(file_path): 70 sha256_hash = hashlib.sha256() 71 with open(file_path, 'rb') as f: 72 for byte_block in iter(lambda: f.read(4096), b""): 73 sha256_hash.update(byte_block) 74 return sha256_hash.hexdigest() 75 76 77 def show(*s: str): 78 print(*s, file=sys.stderr) 79 80 81 def main(file_path: str | None): 82 base_dir = '.' 83 valid_paths, missing_content, fingerprint_contents = find_fingerprints_and_check_dirs(base_dir) 84 85 if missing_content: 86 show("The following paths are missing or have no content, but have corresponding .fingerprint files:") 87 for path in sorted(missing_content): 88 show(f"- {path}") 89 # when adding new cache directories there is a time where it is not possible to have this directory without 90 # running the tests first... but this step is a prerequisite for running the tests. We should not block on this. 91 # show("Please ensure these paths exist and have content if they are directories.") 92 # exit(1) 93 94 sha256_hash = calculate_sha256(fingerprint_contents) 95 96 paths_with_digests = [] 97 for path in sorted(valid_paths): 98 fingerprint_file = f"{path}.fingerprint" 99 try: 100 if os.path.exists(fingerprint_file): 101 file_digest = calculate_file_sha256(fingerprint_file) 102 103 # Parse the fingerprint file to get the digest/path tuples 104 with open(fingerprint_file, 'r') as f: 105 fingerprint_content = f.read().strip() 106 input_map = parse_fingerprint_contents(fingerprint_content) 107 108 paths_with_digests.append({ 109 "path": path, 110 "digest": file_digest, 111 "input": input_map 112 }) 113 114 except Exception as e: 115 show(f"Error processing {fingerprint_file}: {e}") 116 raise e 117 118 119 output = { 120 "digest": sha256_hash, 121 "paths": paths_with_digests 122 } 123 124 content = json.dumps(output, indent=2, sort_keys=True) 125 126 if file_path: 127 with open(file_path, 'w') as f: 128 f.write(content) 129 130 print(content) 131 132 133 if __name__ == "__main__": 134 file_path = None 135 if len(sys.argv) > 1: 136 file_path = sys.argv[1] 137 main(file_path)