github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/source/filesource/file_source.go (about) 1 package filesource 2 3 import ( 4 "crypto" 5 "fmt" 6 "io/fs" 7 "os" 8 "path" 9 "path/filepath" 10 "sync" 11 12 "github.com/mholt/archiver/v3" 13 "github.com/opencontainers/go-digest" 14 15 stereoFile "github.com/anchore/stereoscope/pkg/file" 16 intFile "github.com/anchore/syft/internal/file" 17 "github.com/anchore/syft/internal/log" 18 "github.com/anchore/syft/syft/artifact" 19 "github.com/anchore/syft/syft/file" 20 "github.com/anchore/syft/syft/internal/fileresolver" 21 "github.com/anchore/syft/syft/source" 22 "github.com/anchore/syft/syft/source/directorysource" 23 "github.com/anchore/syft/syft/source/internal" 24 ) 25 26 var _ source.Source = (*fileSource)(nil) 27 28 type Config struct { 29 Path string 30 Exclude source.ExcludeConfig 31 DigestAlgorithms []crypto.Hash 32 Alias source.Alias 33 } 34 35 type fileSource struct { 36 id artifact.ID 37 digestForVersion string 38 config Config 39 resolver *fileresolver.Directory 40 mutex *sync.Mutex 41 closer func() error 42 digests []file.Digest 43 mimeType string 44 analysisPath string 45 } 46 47 func NewFromPath(path string) (source.Source, error) { 48 return New(Config{Path: path}) 49 } 50 51 func New(cfg Config) (source.Source, error) { 52 fileMeta, err := os.Stat(cfg.Path) 53 if err != nil { 54 return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err) 55 } 56 57 if fileMeta.IsDir() { 58 return nil, fmt.Errorf("given path is a directory: %q", cfg.Path) 59 } 60 61 analysisPath, cleanupFn := fileAnalysisPath(cfg.Path) 62 63 var digests []file.Digest 64 if len(cfg.DigestAlgorithms) > 0 { 65 fh, err := os.Open(cfg.Path) 66 if err != nil { 67 return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err) 68 } 69 70 defer fh.Close() 71 72 digests, err = intFile.NewDigestsFromFile(fh, cfg.DigestAlgorithms) 73 if err != nil { 74 return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err) 75 } 76 } 77 78 fh, err := os.Open(cfg.Path) 79 if err != nil { 80 return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err) 81 } 82 83 defer fh.Close() 84 85 id, versionDigest := deriveIDFromFile(cfg) 86 87 return &fileSource{ 88 id: id, 89 config: cfg, 90 mutex: &sync.Mutex{}, 91 closer: cleanupFn, 92 analysisPath: analysisPath, 93 digestForVersion: versionDigest, 94 digests: digests, 95 mimeType: stereoFile.MIMEType(fh), 96 }, nil 97 } 98 99 // deriveIDFromFile derives an artifact ID from the contents of a file. If an alias is provided, it will be included 100 // in the ID derivation (along with contents). This way if the user scans the same item but is considered to be 101 // logically different, then ID will express that. 102 func deriveIDFromFile(cfg Config) (artifact.ID, string) { 103 d := digestOfFileContents(cfg.Path) 104 info := d 105 106 if !cfg.Alias.IsEmpty() { 107 // if the user provided an alias, we want to consider that in the artifact ID. This way if the user 108 // scans the same item but is considered to be logically different, then ID will express that. 109 info += fmt.Sprintf(":%s@%s", cfg.Alias.Name, cfg.Alias.Version) 110 } 111 112 return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d 113 } 114 115 func (s fileSource) ID() artifact.ID { 116 return s.id 117 } 118 119 func (s fileSource) Describe() source.Description { 120 name := path.Base(s.config.Path) 121 version := s.digestForVersion 122 if !s.config.Alias.IsEmpty() { 123 a := s.config.Alias 124 if a.Name != "" { 125 name = a.Name 126 } 127 128 if a.Version != "" { 129 version = a.Version 130 } 131 } 132 return source.Description{ 133 ID: string(s.id), 134 Name: name, 135 Version: version, 136 Metadata: source.FileMetadata{ 137 Path: s.config.Path, 138 Digests: s.digests, 139 MIMEType: s.mimeType, 140 }, 141 } 142 } 143 144 func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) { 145 s.mutex.Lock() 146 defer s.mutex.Unlock() 147 148 if s.resolver != nil { 149 return s.resolver, nil 150 } 151 152 exclusionFunctions, err := directorysource.GetDirectoryExclusionFunctions(s.analysisPath, s.config.Exclude.Paths) 153 if err != nil { 154 return nil, err 155 } 156 157 fi, err := os.Stat(s.analysisPath) 158 if err != nil { 159 return nil, fmt.Errorf("unable to stat path=%q: %w", s.analysisPath, err) 160 } 161 isArchiveAnalysis := fi.IsDir() 162 163 absParentDir, err := absoluteSymlinkFreePathToParent(s.analysisPath) 164 if err != nil { 165 return nil, err 166 } 167 168 var res *fileresolver.Directory 169 if isArchiveAnalysis { 170 // this is an analysis of an archive file... we should scan the directory where the archive contents 171 res, err = fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...) 172 if err != nil { 173 return nil, fmt.Errorf("unable to create directory resolver: %w", err) 174 } 175 } else { 176 // this is an analysis of a single file. We want to ultimately scan the directory that the file is in, but we 177 // don't want to include any other files except this the given file. 178 exclusionFunctions = append([]fileresolver.PathIndexVisitor{ 179 180 // note: we should exclude these kinds of paths first before considering any other user-provided exclusions 181 func(_, p string, _ os.FileInfo, _ error) error { 182 if p == absParentDir { 183 // this is the root directory... always include it 184 return nil 185 } 186 187 if filepath.Dir(p) != absParentDir { 188 // we are no longer in the root directory containing the single file we want to scan... 189 // we should skip the directory this path resides in entirely! 190 return fs.SkipDir 191 } 192 193 if filepath.Base(p) != filepath.Base(s.config.Path) { 194 // we're in the root directory, but this is not the file we want to scan... 195 // we should selectively skip this file (not the directory we're in). 196 return fileresolver.ErrSkipPath 197 } 198 return nil 199 }, 200 }, exclusionFunctions...) 201 202 res, err = fileresolver.NewFromDirectory(absParentDir, absParentDir, exclusionFunctions...) 203 if err != nil { 204 return nil, fmt.Errorf("unable to create directory resolver: %w", err) 205 } 206 } 207 208 s.resolver = res 209 210 return s.resolver, nil 211 } 212 213 func absoluteSymlinkFreePathToParent(path string) (string, error) { 214 absAnalysisPath, err := filepath.Abs(path) 215 if err != nil { 216 return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err) 217 } 218 dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath) 219 if err != nil { 220 return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err) 221 } 222 return filepath.Dir(dereferencedAbsAnalysisPath), nil 223 } 224 225 func (s *fileSource) Close() error { 226 if s.closer == nil { 227 return nil 228 } 229 s.resolver = nil 230 return s.closer() 231 } 232 233 // fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive 234 // contents have been made available. A cleanup function is provided for any temp files created (if any). 235 func fileAnalysisPath(path string) (string, func() error) { 236 var analysisPath = path 237 var cleanupFn = func() error { return nil } 238 239 // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and 240 // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is 241 // unarchived. 242 envelopedUnarchiver, err := archiver.ByExtension(path) 243 if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { 244 if tar, ok := unarchiver.(*archiver.Tar); ok { 245 // when tar files are extracted, if there are multiple entries at the same 246 // location, the last entry wins 247 // NOTE: this currently does not display any messages if an overwrite happens 248 tar.OverwriteExisting = true 249 } 250 unarchivedPath, tmpCleanup, err := unarchiveToTmp(path, unarchiver) 251 if err != nil { 252 log.Warnf("file could not be unarchived: %+v", err) 253 } else { 254 log.Debugf("source path is an archive") 255 analysisPath = unarchivedPath 256 } 257 if tmpCleanup != nil { 258 cleanupFn = tmpCleanup 259 } 260 } 261 262 return analysisPath, cleanupFn 263 } 264 265 func digestOfFileContents(path string) string { 266 file, err := os.Open(path) 267 if err != nil { 268 return digest.SHA256.FromString(path).String() 269 } 270 defer file.Close() 271 di, err := digest.SHA256.FromReader(file) 272 if err != nil { 273 return digest.SHA256.FromString(path).String() 274 } 275 return di.String() 276 } 277 278 func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) { 279 tempDir, err := os.MkdirTemp("", "syft-archive-contents-") 280 if err != nil { 281 return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err) 282 } 283 284 cleanupFn := func() error { 285 return os.RemoveAll(tempDir) 286 } 287 288 return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir) 289 }