github.com/anchore/syft@v1.38.2/cmd/syft/internal/test/integration/test-fixtures/image-java-virtualpath-regression/extract.py (about) 1 import os 2 import zipfile 3 import io 4 5 ARCHIVE_EXTENSIONS = ('.jar', '.war', '.ear', '.hpi', '.war', '.sar', '.nar', '.par') 6 METADATA_FILES = ('pom.xml', 'pom.properties', 'MANIFEST.MF') 7 8 9 def slim_archive(archive, output_dir, base_path="", archive_name=""): 10 """ 11 extracts metadata files from the archive and creates a slim JAR file 12 containing only these files. handles nested JARs by preserving them. 13 """ 14 slim_buffer = io.BytesIO() 15 with zipfile.ZipFile(archive, 'r') as zip_file: 16 with zipfile.ZipFile(slim_buffer, 'w', zipfile.ZIP_DEFLATED) as slim_zip: 17 for file_name in zip_file.namelist(): 18 # check for metadata files or nested JARs 19 if file_name.endswith(METADATA_FILES): 20 # add metadata files directly to the slimmed archive 21 file_data = zip_file.read(file_name) 22 slim_zip.writestr(file_name, file_data) 23 elif file_name.endswith(ARCHIVE_EXTENSIONS): 24 # if it's a nested archive, recursively slim it 25 nested_archive = io.BytesIO(zip_file.read(file_name)) 26 nested_slim_buffer = io.BytesIO() 27 slim_archive( 28 nested_archive, 29 nested_slim_buffer, 30 base_path=os.path.join(base_path, os.path.dirname(file_name)), 31 archive_name=os.path.basename(file_name) 32 ) 33 # add the slimmed nested archive back to the parent archive 34 nested_slim_buffer.seek(0) 35 slim_zip.writestr(file_name, nested_slim_buffer.read()) 36 37 # write out the slimmed JAR to the output directory if output_dir is a directory 38 if isinstance(output_dir, str): 39 output_path = os.path.join(output_dir, base_path, archive_name) 40 os.makedirs(os.path.dirname(output_path), exist_ok=True) 41 with open(output_path, 'wb') as f: 42 slim_buffer.seek(0) 43 f.write(slim_buffer.read()) 44 else: 45 # if output_dir is a BytesIO buffer (for nested archives), just write to it 46 output_dir.seek(0) 47 output_dir.write(slim_buffer.getvalue()) 48 49 50 def walk_directory_and_slim_jars(base_dir, output_dir): 51 """ 52 recursively walks through a directory tree looking for .jar, .war, .ear, 53 .hpi files and slims them down by keeping only metadata files. 54 """ 55 for dirpath, _, filenames in os.walk(base_dir): 56 for filename in filenames: 57 if filename.endswith(ARCHIVE_EXTENSIONS): 58 archive_path = os.path.join(dirpath, filename) 59 print(f"Processing {archive_path}") 60 slim_archive(archive_path, output_dir, os.path.relpath(dirpath, base_dir), filename) 61 62 63 # a helper script for slimming down JAR files by keeping only metadata files but still keeping the jar packaging, 64 # including nested JARs! Useful for testing purposes. 65 if __name__ == "__main__": 66 BASE_DIR = "." 67 OUTPUT_DIR = "./slim" 68 os.makedirs(OUTPUT_DIR, exist_ok=True) 69 walk_directory_and_slim_jars(BASE_DIR, OUTPUT_DIR)