github.com/anchore/syft@v1.38.2/internal/file/zip_file_traversal.go (about)

     1  package file
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"strings"
    10  
    11  	"github.com/mholt/archives"
    12  
    13  	"github.com/anchore/syft/internal/log"
    14  )
    15  
    16  const (
    17  	// represents the order of bytes
    18  	_  = iota
    19  	KB = 1 << (10 * iota)
    20  	MB
    21  	GB
    22  )
    23  
    24  type errZipSlipDetected struct {
    25  	Prefix   string
    26  	JoinArgs []string
    27  }
    28  
    29  func (e *errZipSlipDetected) Error() string {
    30  	return fmt.Sprintf("path traversal detected: paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs)
    31  }
    32  
    33  type zipTraversalRequest map[string]struct{}
    34  
    35  func newZipTraverseRequest(paths ...string) zipTraversalRequest {
    36  	results := make(zipTraversalRequest)
    37  	for _, p := range paths {
    38  		results[p] = struct{}{}
    39  	}
    40  	return results
    41  }
    42  
    43  // TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern.
    44  func TraverseFilesInZip(ctx context.Context, archivePath string, visitor archives.FileHandler, paths ...string) error {
    45  	request := newZipTraverseRequest(paths...)
    46  
    47  	zipReader, err := os.Open(archivePath)
    48  	if err != nil {
    49  		return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err)
    50  	}
    51  	defer func() {
    52  		if err := zipReader.Close(); err != nil {
    53  			log.Errorf("unable to close zip archive (%s): %+v", archivePath, err)
    54  		}
    55  	}()
    56  
    57  	return archives.Zip{}.Extract(ctx, zipReader, func(ctx context.Context, file archives.FileInfo) error {
    58  		// if no paths are given then assume that all files should be traversed
    59  		if len(paths) > 0 {
    60  			if _, ok := request[file.NameInArchive]; !ok {
    61  				// this file path is not of interest
    62  				return nil
    63  			}
    64  		}
    65  
    66  		return visitor(ctx, file)
    67  	})
    68  }
    69  
    70  // ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted.
    71  func ExtractFromZipToUniqueTempFile(ctx context.Context, archivePath, dir string, paths ...string) (map[string]Opener, error) {
    72  	results := make(map[string]Opener)
    73  
    74  	// don't allow for full traversal, only select traversal from given paths
    75  	if len(paths) == 0 {
    76  		return results, nil
    77  	}
    78  
    79  	visitor := func(_ context.Context, file archives.FileInfo) error {
    80  		tempfilePrefix := filepath.Base(filepath.Clean(file.NameInArchive)) + "-"
    81  		tempFile, err := os.CreateTemp(dir, tempfilePrefix)
    82  		if err != nil {
    83  			return fmt.Errorf("unable to create temp file: %w", err)
    84  		}
    85  		// we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up
    86  		// resources (leading to "too many open files"). Instead we'll return a file opener to the caller which
    87  		// provides a ReadCloser. It is up to the caller to handle closing the file explicitly.
    88  		defer tempFile.Close()
    89  
    90  		zippedFile, err := file.Open()
    91  		if err != nil {
    92  			return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
    93  		}
    94  		defer func() {
    95  			if err := zippedFile.Close(); err != nil {
    96  				log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
    97  			}
    98  		}()
    99  
   100  		if file.IsDir() {
   101  			return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
   102  		}
   103  
   104  		if err := safeCopy(tempFile, zippedFile); err != nil {
   105  			return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
   106  		}
   107  
   108  		results[file.NameInArchive] = Opener{path: tempFile.Name()}
   109  
   110  		return nil
   111  	}
   112  
   113  	return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
   114  }
   115  
   116  // ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path.
   117  func ContentsFromZip(ctx context.Context, archivePath string, paths ...string) (map[string]string, error) {
   118  	results := make(map[string]string)
   119  
   120  	// don't allow for full traversal, only select traversal from given paths
   121  	if len(paths) == 0 {
   122  		return results, nil
   123  	}
   124  
   125  	visitor := func(_ context.Context, file archives.FileInfo) error {
   126  		zippedFile, err := file.Open()
   127  		if err != nil {
   128  			return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
   129  		}
   130  		defer func() {
   131  			if err := zippedFile.Close(); err != nil {
   132  				log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
   133  			}
   134  		}()
   135  
   136  		if file.IsDir() {
   137  			return fmt.Errorf("unable to extract directories, only files: %s", file.NameInArchive)
   138  		}
   139  
   140  		var buffer bytes.Buffer
   141  		if err := safeCopy(&buffer, zippedFile); err != nil {
   142  			return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.NameInArchive, archivePath, err)
   143  		}
   144  
   145  		results[file.NameInArchive] = buffer.String()
   146  
   147  		return nil
   148  	}
   149  
   150  	return results, TraverseFilesInZip(ctx, archivePath, visitor, paths...)
   151  }
   152  
   153  // UnzipToDir extracts a zip archive to a target directory.
   154  func UnzipToDir(ctx context.Context, archivePath, targetDir string) error {
   155  	visitor := func(_ context.Context, file archives.FileInfo) error {
   156  		joinedPath, err := SafeJoin(targetDir, file.NameInArchive)
   157  		if err != nil {
   158  			return err
   159  		}
   160  
   161  		return extractSingleFile(file, joinedPath, archivePath)
   162  	}
   163  
   164  	return TraverseFilesInZip(ctx, archivePath, visitor)
   165  }
   166  
   167  // SafeJoin ensures that any destinations do not resolve to a path above the prefix path.
   168  func SafeJoin(prefix string, dest ...string) (string, error) {
   169  	joinResult := filepath.Join(append([]string{prefix}, dest...)...)
   170  	cleanJoinResult := filepath.Clean(joinResult)
   171  	if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) {
   172  		return "", &errZipSlipDetected{
   173  			Prefix:   prefix,
   174  			JoinArgs: dest,
   175  		}
   176  	}
   177  	// why not return the clean path? the called may not be expected it from what should only be a join operation.
   178  	return joinResult, nil
   179  }
   180  
   181  func extractSingleFile(file archives.FileInfo, expandedFilePath, archivePath string) error {
   182  	zippedFile, err := file.Open()
   183  	if err != nil {
   184  		return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.NameInArchive, archivePath, err)
   185  	}
   186  	defer func() {
   187  		if err := zippedFile.Close(); err != nil {
   188  			log.Errorf("unable to close source file=%q from zip=%q: %+v", file.NameInArchive, archivePath, err)
   189  		}
   190  	}()
   191  
   192  	if file.IsDir() {
   193  		err = os.MkdirAll(expandedFilePath, file.Mode())
   194  		if err != nil {
   195  			return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err)
   196  		}
   197  	} else {
   198  		// Open an output file for writing
   199  		outputFile, err := os.OpenFile(
   200  			expandedFilePath,
   201  			os.O_WRONLY|os.O_CREATE|os.O_TRUNC,
   202  			file.Mode(),
   203  		)
   204  		if err != nil {
   205  			return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err)
   206  		}
   207  		defer func() {
   208  			if err := outputFile.Close(); err != nil {
   209  				log.Errorf("unable to close dest file=%q from zip=%q: %+v", outputFile.Name(), archivePath, err)
   210  			}
   211  		}()
   212  
   213  		if err := safeCopy(outputFile, zippedFile); err != nil {
   214  			return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.NameInArchive, outputFile.Name(), archivePath, err)
   215  		}
   216  	}
   217  
   218  	return nil
   219  }