github.com/anchore/syft@v1.38.2/syft/source/filesource/file_source.go (about) 1 package filesource 2 3 import ( 4 "context" 5 "crypto" 6 "fmt" 7 "io" 8 "os" 9 "path" 10 "path/filepath" 11 "sync" 12 13 "github.com/mholt/archives" 14 "github.com/opencontainers/go-digest" 15 16 stereoFile "github.com/anchore/stereoscope/pkg/file" 17 intFile "github.com/anchore/syft/internal/file" 18 "github.com/anchore/syft/internal/log" 19 "github.com/anchore/syft/syft/artifact" 20 "github.com/anchore/syft/syft/file" 21 "github.com/anchore/syft/syft/internal/fileresolver" 22 "github.com/anchore/syft/syft/source" 23 "github.com/anchore/syft/syft/source/directorysource" 24 "github.com/anchore/syft/syft/source/internal" 25 ) 26 27 var _ source.Source = (*fileSource)(nil) 28 29 type Config struct { 30 Path string 31 Exclude source.ExcludeConfig 32 DigestAlgorithms []crypto.Hash 33 Alias source.Alias 34 SkipExtractArchive bool 35 } 36 37 type fileSource struct { 38 id artifact.ID 39 digestForVersion string 40 config Config 41 resolver file.Resolver 42 mutex *sync.Mutex 43 closer func() error 44 digests []file.Digest 45 mimeType string 46 analysisPath string 47 } 48 49 func NewFromPath(path string) (source.Source, error) { 50 return New(Config{Path: path}) 51 } 52 53 func New(cfg Config) (source.Source, error) { 54 f, err := os.Open(cfg.Path) 55 if err != nil { 56 return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err) 57 } 58 defer f.Close() 59 60 fileMeta, err := f.Stat() 61 if err != nil { 62 return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err) 63 } 64 65 if fileMeta.IsDir() { 66 return nil, fmt.Errorf("given path is a directory: %q", cfg.Path) 67 } 68 69 var digests []file.Digest 70 if len(cfg.DigestAlgorithms) > 0 { 71 digests, err = intFile.NewDigestsFromFile(context.TODO(), f, cfg.DigestAlgorithms) 72 if err != nil { 73 return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err) 74 } 75 } 76 77 analysisPath, cleanupFn, err := fileAnalysisPath(cfg.Path, cfg.SkipExtractArchive) 78 if err != nil { 79 return nil, fmt.Errorf("unable to extract file analysis path=%q: %w", cfg.Path, err) 80 } 81 82 id, versionDigest := deriveIDFromFile(cfg) 83 84 return &fileSource{ 85 id: id, 86 config: cfg, 87 mutex: &sync.Mutex{}, 88 closer: cleanupFn, 89 analysisPath: analysisPath, 90 digestForVersion: versionDigest, 91 digests: digests, 92 mimeType: stereoFile.MIMEType(f), 93 }, nil 94 } 95 96 func (s fileSource) ID() artifact.ID { 97 return s.id 98 } 99 100 func (s fileSource) Describe() source.Description { 101 name := path.Base(s.config.Path) 102 version := s.digestForVersion 103 supplier := "" 104 if !s.config.Alias.IsEmpty() { 105 a := s.config.Alias 106 if a.Name != "" { 107 name = a.Name 108 } 109 110 if a.Version != "" { 111 version = a.Version 112 } 113 114 if a.Supplier != "" { 115 supplier = a.Supplier 116 } 117 } 118 return source.Description{ 119 ID: string(s.id), 120 Name: name, 121 Version: version, 122 Supplier: supplier, 123 Metadata: source.FileMetadata{ 124 Path: s.config.Path, 125 Digests: s.digests, 126 MIMEType: s.mimeType, 127 }, 128 } 129 } 130 131 func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) { 132 s.mutex.Lock() 133 defer s.mutex.Unlock() 134 135 if s.resolver != nil { 136 return s.resolver, nil 137 } 138 139 exclusionFunctions, err := directorysource.GetDirectoryExclusionFunctions(s.analysisPath, s.config.Exclude.Paths) 140 if err != nil { 141 return nil, err 142 } 143 144 fi, err := os.Stat(s.analysisPath) 145 if err != nil { 146 return nil, fmt.Errorf("unable to stat path=%q: %w", s.analysisPath, err) 147 } 148 149 if isArchiveAnalysis := fi.IsDir(); isArchiveAnalysis { 150 // this is an analysis of an archive file... we should scan the directory where the archive contents 151 res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...) 152 if err != nil { 153 return nil, fmt.Errorf("unable to create directory resolver: %w", err) 154 } 155 156 s.resolver = res 157 return s.resolver, nil 158 } 159 160 // This is analysis of a single file. Use file indexer. 161 res, err := fileresolver.NewFromFile(s.analysisPath, exclusionFunctions...) 162 if err != nil { 163 return nil, fmt.Errorf("unable to create file resolver: %w", err) 164 } 165 166 s.resolver = res 167 return s.resolver, nil 168 } 169 170 func (s *fileSource) Close() error { 171 s.mutex.Lock() 172 defer s.mutex.Unlock() 173 174 if s.closer == nil { 175 return nil 176 } 177 178 s.resolver = nil 179 return s.closer() 180 } 181 182 // deriveIDFromFile derives an artifact ID from the contents of a file. If an alias is provided, it will be included 183 // in the ID derivation (along with contents). This way if the user scans the same item but is considered to be 184 // logically different, then ID will express that. 185 func deriveIDFromFile(cfg Config) (artifact.ID, string) { 186 d := digestOfFileContents(cfg.Path) 187 info := d 188 189 if !cfg.Alias.IsEmpty() { 190 // if the user provided an alias, we want to consider that in the artifact ID. This way if the user 191 // scans the same item but is considered to be logically different, then ID will express that. 192 info += fmt.Sprintf(":%s@%s", cfg.Alias.Name, cfg.Alias.Version) 193 } 194 195 return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d 196 } 197 198 // fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive 199 // contents have been made available. A cleanup function is provided for any temp files created (if any). 200 // Users can disable unpacking archives, allowing individual cataloguers to extract them instead (where 201 // supported) 202 func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() error, error) { 203 var cleanupFn = func() error { return nil } 204 var analysisPath = path 205 206 if skipExtractArchive { 207 return analysisPath, cleanupFn, nil 208 } 209 210 envelopedUnarchiver, _, err := intFile.IdentifyArchive(context.Background(), path, nil) 211 if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok { 212 analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) 213 if err != nil { 214 return "", nil, fmt.Errorf("unable to unarchive source file: %w", err) 215 } 216 217 log.Debugf("source path is an archive") 218 } 219 220 return analysisPath, cleanupFn, nil 221 } 222 223 func digestOfFileContents(path string) string { 224 f, err := os.Open(path) 225 if err != nil { 226 return digest.SHA256.FromString(path).String() 227 } 228 defer f.Close() 229 230 di, err := digest.SHA256.FromReader(f) 231 if err != nil { 232 return digest.SHA256.FromString(path).String() 233 } 234 235 return di.String() 236 } 237 238 func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) { 239 var cleanupFn = func() error { return nil } 240 archive, err := os.Open(path) 241 if err != nil { 242 return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err) 243 } 244 defer archive.Close() 245 246 tempDir, err := os.MkdirTemp("", "syft-archive-contents-") 247 if err != nil { 248 return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err) 249 } 250 251 visitor := func(_ context.Context, file archives.FileInfo) error { 252 // Protect against symlink attacks by ensuring path doesn't escape tempDir 253 destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive) 254 if err != nil { 255 return err 256 } 257 258 if file.IsDir() { 259 return os.MkdirAll(destPath, file.Mode()) 260 } 261 262 if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil { 263 return fmt.Errorf("failed to create parent directory: %w", err) 264 } 265 266 rc, err := file.Open() 267 if err != nil { 268 return fmt.Errorf("failed to open file in archive: %w", err) 269 } 270 defer rc.Close() 271 272 destFile, err := os.Create(destPath) 273 if err != nil { 274 return fmt.Errorf("failed to create file in destination: %w", err) 275 } 276 defer destFile.Close() 277 278 if err := destFile.Chmod(file.Mode()); err != nil { 279 return fmt.Errorf("failed to change mode of destination file: %w", err) 280 } 281 282 if _, err := io.Copy(destFile, rc); err != nil { 283 return fmt.Errorf("failed to copy file contents: %w", err) 284 } 285 286 return nil 287 } 288 289 return tempDir, func() error { 290 return os.RemoveAll(tempDir) 291 }, unarchiver.Extract(context.Background(), archive, visitor) 292 }