github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/internal/file/zip_file_traversal.go (about) 1 package file 2 3 import ( 4 "archive/zip" 5 "bytes" 6 "fmt" 7 "os" 8 "path/filepath" 9 "strings" 10 11 "github.com/anchore/syft/internal/log" 12 ) 13 14 const ( 15 // represents the order of bytes 16 _ = iota 17 KB = 1 << (10 * iota) 18 MB 19 GB 20 ) 21 22 type errZipSlipDetected struct { 23 Prefix string 24 JoinArgs []string 25 } 26 27 func (e *errZipSlipDetected) Error() string { 28 return fmt.Sprintf("paths are not allowed to resolve outside of the root prefix (%q). Destination: %q", e.Prefix, e.JoinArgs) 29 } 30 31 type zipTraversalRequest map[string]struct{} 32 33 func newZipTraverseRequest(paths ...string) zipTraversalRequest { 34 results := make(zipTraversalRequest) 35 for _, p := range paths { 36 results[p] = struct{}{} 37 } 38 return results 39 } 40 41 // TraverseFilesInZip enumerates all paths stored within a zip archive using the visitor pattern. 42 func TraverseFilesInZip(archivePath string, visitor func(*zip.File) error, paths ...string) error { 43 request := newZipTraverseRequest(paths...) 44 45 zipReader, err := OpenZip(archivePath) 46 if err != nil { 47 return fmt.Errorf("unable to open zip archive (%s): %w", archivePath, err) 48 } 49 defer func() { 50 err = zipReader.Close() 51 if err != nil { 52 log.Errorf("unable to close zip archive (%s): %+v", archivePath, err) 53 } 54 }() 55 56 for _, file := range zipReader.Reader.File { 57 // if no paths are given then assume that all files should be traversed 58 if len(paths) > 0 { 59 if _, ok := request[file.Name]; !ok { 60 // this file path is not of interest 61 continue 62 } 63 } 64 65 if err = visitor(file); err != nil { 66 return err 67 } 68 } 69 return nil 70 } 71 72 // ExtractFromZipToUniqueTempFile extracts select paths for the given archive to a temporary directory, returning file openers for each file extracted. 73 func ExtractFromZipToUniqueTempFile(archivePath, dir string, paths ...string) (map[string]Opener, error) { 74 results := make(map[string]Opener) 75 76 // don't allow for full traversal, only select traversal from given paths 77 if len(paths) == 0 { 78 return results, nil 79 } 80 81 visitor := func(file *zip.File) error { 82 tempfilePrefix := filepath.Base(filepath.Clean(file.Name)) + "-" 83 84 tempFile, err := os.CreateTemp(dir, tempfilePrefix) 85 if err != nil { 86 return fmt.Errorf("unable to create temp file: %w", err) 87 } 88 // we shouldn't try and keep the tempfile open as the returned result may have several files, which takes up 89 // resources (leading to "too many open files"). Instead we'll return a file opener to the caller which 90 // provides a ReadCloser. It is up to the caller to handle closing the file explicitly. 91 defer tempFile.Close() 92 93 zippedFile, err := file.Open() 94 if err != nil { 95 return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) 96 } 97 defer func() { 98 err := zippedFile.Close() 99 if err != nil { 100 log.Errorf("unable to close source file=%q from zip=%q: %+v", file.Name, archivePath, err) 101 } 102 }() 103 104 if file.FileInfo().IsDir() { 105 return fmt.Errorf("unable to extract directories, only files: %s", file.Name) 106 } 107 108 if err := safeCopy(tempFile, zippedFile); err != nil { 109 return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) 110 } 111 112 results[file.Name] = Opener{path: tempFile.Name()} 113 114 return nil 115 } 116 117 return results, TraverseFilesInZip(archivePath, visitor, paths...) 118 } 119 120 // ContentsFromZip extracts select paths for the given archive and returns a set of string contents for each path. 121 func ContentsFromZip(archivePath string, paths ...string) (map[string]string, error) { 122 results := make(map[string]string) 123 124 // don't allow for full traversal, only select traversal from given paths 125 if len(paths) == 0 { 126 return results, nil 127 } 128 129 visitor := func(file *zip.File) error { 130 zippedFile, err := file.Open() 131 if err != nil { 132 return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) 133 } 134 135 if file.FileInfo().IsDir() { 136 return fmt.Errorf("unable to extract directories, only files: %s", file.Name) 137 } 138 139 var buffer bytes.Buffer 140 if err := safeCopy(&buffer, zippedFile); err != nil { 141 return fmt.Errorf("unable to copy source=%q for zip=%q: %w", file.Name, archivePath, err) 142 } 143 144 results[file.Name] = buffer.String() 145 146 err = zippedFile.Close() 147 if err != nil { 148 return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) 149 } 150 return nil 151 } 152 153 return results, TraverseFilesInZip(archivePath, visitor, paths...) 154 } 155 156 // UnzipToDir extracts a zip archive to a target directory. 157 func UnzipToDir(archivePath, targetDir string) error { 158 visitor := func(file *zip.File) error { 159 joinedPath, err := safeJoin(targetDir, file.Name) 160 if err != nil { 161 return err 162 } 163 164 return extractSingleFile(file, joinedPath, archivePath) 165 } 166 167 return TraverseFilesInZip(archivePath, visitor) 168 } 169 170 // safeJoin ensures that any destinations do not resolve to a path above the prefix path. 171 func safeJoin(prefix string, dest ...string) (string, error) { 172 joinResult := filepath.Join(append([]string{prefix}, dest...)...) 173 cleanJoinResult := filepath.Clean(joinResult) 174 if !strings.HasPrefix(cleanJoinResult, filepath.Clean(prefix)) { 175 return "", &errZipSlipDetected{ 176 Prefix: prefix, 177 JoinArgs: dest, 178 } 179 } 180 // why not return the clean path? the called may not be expected it from what should only be a join operation. 181 return joinResult, nil 182 } 183 184 func extractSingleFile(file *zip.File, expandedFilePath, archivePath string) error { 185 zippedFile, err := file.Open() 186 if err != nil { 187 return fmt.Errorf("unable to read file=%q from zip=%q: %w", file.Name, archivePath, err) 188 } 189 190 if file.FileInfo().IsDir() { 191 err = os.MkdirAll(expandedFilePath, file.Mode()) 192 if err != nil { 193 return fmt.Errorf("unable to create dir=%q from zip=%q: %w", expandedFilePath, archivePath, err) 194 } 195 } else { 196 // Open an output file for writing 197 outputFile, err := os.OpenFile( 198 expandedFilePath, 199 os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 200 file.Mode(), 201 ) 202 if err != nil { 203 return fmt.Errorf("unable to create dest file=%q from zip=%q: %w", expandedFilePath, archivePath, err) 204 } 205 206 if err := safeCopy(outputFile, zippedFile); err != nil { 207 return fmt.Errorf("unable to copy source=%q to dest=%q for zip=%q: %w", file.Name, outputFile.Name(), archivePath, err) 208 } 209 210 err = outputFile.Close() 211 if err != nil { 212 return fmt.Errorf("unable to close dest file=%q from zip=%q: %w", outputFile.Name(), archivePath, err) 213 } 214 } 215 216 err = zippedFile.Close() 217 if err != nil { 218 return fmt.Errorf("unable to close source file=%q from zip=%q: %w", file.Name, archivePath, err) 219 } 220 return nil 221 }