github.com/anchore/syft@v1.38.2/cmd/syft/internal/test/integration/test-fixtures/image-test-java-purls/extract.py (about)

     1  import os
     2  import zipfile
     3  import io
     4  
     5  ARCHIVE_EXTENSIONS = ('.jar', '.war', '.ear', '.hpi', '.war', '.sar', '.nar', '.par')
     6  METADATA_FILES = ('pom.xml', 'pom.properties', 'MANIFEST.MF')
     7  
     8  
     9  def slim_archive(archive, output_dir, base_path="", archive_name=""):
    10      """
    11      extracts metadata files from the archive and creates a slim JAR file
    12      containing only these files. handles nested JARs by preserving them.
    13      """
    14      slim_buffer = io.BytesIO()
    15      with zipfile.ZipFile(archive, 'r') as zip_file:
    16          with zipfile.ZipFile(slim_buffer, 'w', zipfile.ZIP_DEFLATED) as slim_zip:
    17              for file_name in zip_file.namelist():
    18                  # check for metadata files or nested JARs
    19                  if file_name.endswith(METADATA_FILES):
    20                      # add metadata files directly to the slimmed archive
    21                      file_data = zip_file.read(file_name)
    22                      slim_zip.writestr(file_name, file_data)
    23                  elif file_name.endswith(ARCHIVE_EXTENSIONS):
    24                      # if it's a nested archive, recursively slim it
    25                      nested_archive = io.BytesIO(zip_file.read(file_name))
    26                      nested_slim_buffer = io.BytesIO()
    27                      slim_archive(
    28                          nested_archive,
    29                          nested_slim_buffer,
    30                          base_path=os.path.join(base_path, os.path.dirname(file_name)),
    31                          archive_name=os.path.basename(file_name)
    32                      )
    33                      # add the slimmed nested archive back to the parent archive
    34                      nested_slim_buffer.seek(0)
    35                      slim_zip.writestr(file_name, nested_slim_buffer.read())
    36  
    37      # write out the slimmed JAR to the output directory if output_dir is a directory
    38      if isinstance(output_dir, str):
    39          output_path = os.path.join(output_dir, base_path, archive_name)
    40          os.makedirs(os.path.dirname(output_path), exist_ok=True)
    41          with open(output_path, 'wb') as f:
    42              slim_buffer.seek(0)
    43              f.write(slim_buffer.read())
    44      else:
    45          # if output_dir is a BytesIO buffer (for nested archives), just write to it
    46          output_dir.seek(0)
    47          output_dir.write(slim_buffer.getvalue())
    48  
    49  
    50  def walk_directory_and_slim_jars(base_dir, output_dir):
    51      """
    52      recursively walks through a directory tree looking for .jar, .war, .ear,
    53      .hpi files and slims them down by keeping only metadata files.
    54      """
    55      for dirpath, _, filenames in os.walk(base_dir):
    56          for filename in filenames:
    57              if filename.endswith(ARCHIVE_EXTENSIONS):
    58                  archive_path = os.path.join(dirpath, filename)
    59                  print(f"Processing {archive_path}")
    60                  slim_archive(archive_path, output_dir, os.path.relpath(dirpath, base_dir), filename)
    61  
    62  
    63  # a helper script for slimming down JAR files by keeping only metadata files but still keeping the jar packaging,
    64  # including nested JARs! Useful for testing purposes.
    65  if __name__ == "__main__":
    66      BASE_DIR = "."
    67      OUTPUT_DIR = "./slim"
    68      os.makedirs(OUTPUT_DIR, exist_ok=True)
    69      walk_directory_and_slim_jars(BASE_DIR, OUTPUT_DIR)