github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/internal/file/zip_file_traversal.go (about)

     1  package file
     2  
     3  import (
     4  	"archive/zip"
     5  	"bytes"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  
    11  	"github.com/anchore/syft/internal/log"
    12  )
    13  
    14  const (
    15  	// represents the order of bytes
    16  	_  = iota
    17  	KB = 1 << (10 * iota)
    18  	MB
    19  	GB
    20  )
    21  
    22  type errZipSlipDetected struct {
    23  	Prefix   string
    24  	JoinArgs []string
    25  }
    26  
    27  func (e *errZipSlipDetected) Error() string {
    28  	return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
    29  }
    30  
    31  type zipTraversalRequest map[string]struct{}
    32  
    33  func newZipTraverseRequest(paths ...string) zipTraversalRequest {
    34  	results := make(zipTraversalRequest)
    35  	for _, p := range paths {
    36  		results[p] = struct{}{}
    37  	}
    38  	return results
    39  }
    40  
    41  // TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
    42  func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error {
    43  	request := newZipTraverseRequest(paths...)
    44  
    45  	zipReader, err := OpenZip(archivePath)
    46  	if err != nil {
    47  		return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
    48  	}
    49  	defer func() {
    50  		err = zipReader.Close()
    51  		if err != nil {
    52  			log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
    53  		}
    54  	}()
    55  
    56  	for _, file := range zipReader.Reader.File {
    57  		// if no paths are given then assume that all files should be traversed
    58  		if len(paths) > 0 {
    59  			if _, ok := request[file.Name]; !ok {
    60  				// this file path is not of interest
    61  				continue
    62  			}
    63  		}
    64  
    65  		if err = visitor(file); err != nil {
    66  			return err
    67  		}
    68  	}
    69  	return nil
    70  }
    71  
    72  // ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
    73  func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) {
    74  	results := make(map[string]Opener)
    75  
    76  	// don't allow for full traversal, only select traversal from given paths
    77  	if len(paths) == 0 {
    78  		return results, nil
    79  	}
    80  
    81  	visitor := func(file *zip.File) error {
    82  		tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-"
    83  
    84  		tempFile, err := os.CreateTemp(dir, tempfilePrefix)
    85  		if err != nil {
    86  			return fmt.Errorf("unable to create temp file: %w", err)
    87  		}
    88  		// we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up
    89  		// resources (leading to "too many open files"). Instead we'll return a file opener to the caller which
    90  		// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
    91  		defer tempFile.Close()
    92  
    93  		zippedFile, err := file.Open()
    94  		if err != nil {
    95  			return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
    96  		}
    97  		defer func() {
    98  			err := zippedFile.Close()
    99  			if err != nil {
   100  				log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err)
   101  			}
   102  		}()
   103  
   104  		if file.FileInfo().IsDir() {
   105  			return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
   106  		}
   107  
   108  		if err := safeCopy(tempFile, zippedFile); err != nil {
   109  			return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
   110  		}
   111  
   112  		results[file.Name] = Opener{path: tempFile.Name()}
   113  
   114  		return nil
   115  	}
   116  
   117  	return results, TraverseFilesInZip(archivePath, visitor, paths...)
   118  }
   119  
   120  // ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
   121  func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) {
   122  	results := make(map[string]string)
   123  
   124  	// don't allow for full traversal, only select traversal from given paths
   125  	if len(paths) == 0 {
   126  		return results, nil
   127  	}
   128  
   129  	visitor := func(file *zip.File) error {
   130  		zippedFile, err := file.Open()
   131  		if err != nil {
   132  			return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
   133  		}
   134  
   135  		if file.FileInfo().IsDir() {
   136  			return fmt.Errorf("unable to extract directories, only files: %s", file.Name)
   137  		}
   138  
   139  		var buffer bytes.Buffer
   140  		if err := safeCopy(&buffer, zippedFile); err != nil {
   141  			return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err)
   142  		}
   143  
   144  		results[file.Name] = buffer.String()
   145  
   146  		err = zippedFile.Close()
   147  		if err != nil {
   148  			return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
   149  		}
   150  		return nil
   151  	}
   152  
   153  	return results, TraverseFilesInZip(archivePath, visitor, paths...)
   154  }
   155  
   156  // UnzipToDir extracts a zip archive to a target directory.
   157  func UnzipToDir(archivePath, targetDir string) error {
   158  	visitor := func(file *zip.File) error {
   159  		joinedPath, err := safeJoin(targetDir, file.Name)
   160  		if err != nil {
   161  			return err
   162  		}
   163  
   164  		return extractSingleFile(file, joinedPath, archivePath)
   165  	}
   166  
   167  	return TraverseFilesInZip(archivePath, visitor)
   168  }
   169  
   170  // safeJoin ensures that any destinations do not resolve to a path above the prefix path.
   171  func safeJoin(prefix string, dest ...string) (string, error) {
   172  	joinResult := filepath.Join(append([]string{prefix}, dest...)...)
   173  	cleanJoinResult := filepath.Clean(joinResult)
   174  	if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) {
   175  		return "", &errZipSlipDetected{
   176  			Prefix:   prefix,
   177  			JoinArgs: dest,
   178  		}
   179  	}
   180  	// why not return the clean path? the called may not be expected it from what should only be a join operation.
   181  	return joinResult, nil
   182  }
   183  
   184  func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error {
   185  	zippedFile, err := file.Open()
   186  	if err != nil {
   187  		return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err)
   188  	}
   189  
   190  	if file.FileInfo().IsDir() {
   191  		err = os.MkdirAll(expandedFilePath, file.Mode())
   192  		if err != nil {
   193  			return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
   194  		}
   195  	} else {
   196  		// Open an output file for writing
   197  		outputFile, err := os.OpenFile(
   198  			expandedFilePath,
   199  			os.O_WRONLY|os.O_CREATE|os.O_TRUNC,
   200  			file.Mode(),
   201  		)
   202  		if err != nil {
   203  			return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
   204  		}
   205  
   206  		if err := safeCopy(outputFile, zippedFile); err != nil {
   207  			return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err)
   208  		}
   209  
   210  		err = outputFile.Close()
   211  		if err != nil {
   212  			return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err)
   213  		}
   214  	}
   215  
   216  	err = zippedFile.Close()
   217  	if err != nil {
   218  		return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err)
   219  	}
   220  	return nil
   221  }