github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/file_indexer.go (about) 1 package fileresolver 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 8 "github.com/wagoodman/go-progress" 9 10 "github.com/anchore/stereoscope/pkg/file" 11 "github.com/anchore/stereoscope/pkg/filetree" 12 "github.com/anchore/syft/internal/bus" 13 "github.com/anchore/syft/internal/log" 14 "github.com/anchore/syft/syft/internal/windows" 15 ) 16 17 type fileIndexer struct { 18 path string 19 base string 20 pathIndexVisitors []PathIndexVisitor 21 errPaths map[string]error 22 tree filetree.ReadWriter 23 index filetree.Index 24 } 25 26 func newFileIndexer(path, base string, visitors ...PathIndexVisitor) *fileIndexer { 27 i := &fileIndexer{ 28 path: path, 29 base: base, 30 tree: filetree.New(), 31 index: filetree.NewIndex(), 32 pathIndexVisitors: append( 33 []PathIndexVisitor{ 34 requireFileInfo, 35 disallowByFileType, 36 skipPathsByMountTypeAndName(path), 37 }, 38 visitors..., 39 ), 40 errPaths: make(map[string]error), 41 } 42 43 return i 44 } 45 46 // Build the indexer 47 func (r *fileIndexer) build() (filetree.Reader, filetree.IndexReader, error) { 48 return r.tree, r.index, index(r.path, r.indexPath) 49 } 50 51 // Index file at the given path 52 // A file indexer simply indexes the file and its directory. 53 func index(path string, indexer func(string, *progress.AtomicStage) error) error { 54 // We want to index the file at the provided path and its parent directory. 55 // We need to probably check that we have file access 56 // We also need to determine what to do when the file itself is a symlink. 57 prog := bus.StartIndexingFiles(path) 58 defer prog.SetCompleted() 59 60 err := indexer(path, prog.AtomicStage) 61 if err != nil { 62 return fmt.Errorf("unable to index filesystem path=%q: %w", path, err) 63 } 64 65 return nil 66 } 67 68 // indexPath will index the file at the provided path as well as its parent directory. 69 // It expects path to be a file, not a directory. 70 // If a directory is provided then an error will be returned. Additionally, any IO or 71 // permissions errors on the file at path or its parent directory will return an error. 72 // Filter functions provided to the indexer are honoured, so if the path provided (or its parent 73 // directory) is filtered by a filter function, an error is returned. 74 func (r *fileIndexer) indexPath(path string, stager *progress.AtomicStage) error { 75 log.WithFields("path", path).Trace("indexing file path") 76 77 absPath, err := filepath.Abs(path) 78 if err != nil { 79 return err 80 } 81 82 // Protect against callers trying to call file_indexer with directories 83 fi, err := os.Stat(absPath) 84 // The directory indexer ignores stat errors, however this file indexer won't ignore them 85 if err != nil { 86 return fmt.Errorf("unable to stat path=%q: %w", path, err) 87 } 88 if fi.IsDir() { 89 return fmt.Errorf("unable to index file, given path was a directory=%q", path) 90 } 91 92 absSymlinkFreeFilePath, err := absoluteSymlinkFreePathToFile(path) 93 if err != nil { 94 return err 95 } 96 97 // Now index the file and its parent directory 98 // We try to index the parent directory first, because if the parent directory 99 // is ignored by any filter function, then we must ensure we also ignore the file. 100 absSymlinkFreeParent, err := absoluteSymlinkFreePathToParent(absSymlinkFreeFilePath) 101 if err != nil { 102 return err 103 } 104 parentFi, err := os.Stat(absSymlinkFreeParent) 105 if err != nil { 106 return fmt.Errorf("unable to stat parent of file=%q: %w", absSymlinkFreeParent, err) 107 } 108 109 stager.Set(absSymlinkFreeParent) 110 indexParentErr := r.filterAndIndex(absSymlinkFreeParent, parentFi) 111 if indexParentErr != nil { 112 return indexParentErr 113 } 114 115 // We have indexed the parent successfully, now attempt to index the file. 116 stager.Set(absSymlinkFreeFilePath) 117 indexFileErr := r.filterAndIndex(absSymlinkFreeFilePath, fi) 118 if indexFileErr != nil { 119 return indexFileErr 120 } 121 122 return nil 123 } 124 125 func (r *fileIndexer) filterAndIndex(path string, info os.FileInfo) error { 126 // check if any of the filters want us to ignore this path 127 for _, filterFn := range r.pathIndexVisitors { 128 if filterFn == nil { 129 continue 130 } 131 132 if filterErr := filterFn(r.base, path, info, nil); filterErr != nil { 133 // A filter function wants us to ignore this path, honour it 134 return filterErr 135 } 136 } 137 138 // here we check to see if we need to normalize paths to posix on the way in coming from windows 139 if windows.HostRunningOnWindows() { 140 path = windows.ToPosix(path) 141 } 142 143 err := r.addPathToIndex(path, info) 144 // If we hit file access errors, isFileAccessErr will handle logging & adding 145 // the path to the errPaths map. 146 // While the directory_indexer does not let these cause the indexer to throw 147 // we will here, as not having access to the file we index for a file source 148 // probably makes the file source creation useless? I need to check with Syft maintainers. 149 // This also poses the question, is errPaths worthwhile for file_indexer? 150 if r.isFileAccessErr(path, err) { 151 return err 152 } 153 154 return nil 155 } 156 157 // Add path to index. File indexer doesn't need to support symlink, as we should have abs symlink free path. 158 // If we somehow get a symlink here, report as an error. 159 func (r *fileIndexer) addPathToIndex(path string, info os.FileInfo) error { 160 switch t := file.TypeFromMode(info.Mode()); t { 161 case file.TypeDirectory: 162 return r.addDirectoryToIndex(path, info) 163 case file.TypeRegular: 164 return r.addFileToIndex(path, info) 165 default: 166 return fmt.Errorf("unsupported file type: %s", t) 167 } 168 } 169 170 func (r *fileIndexer) addDirectoryToIndex(path string, info os.FileInfo) error { 171 ref, err := r.tree.AddDir(file.Path(path)) 172 if err != nil { 173 return err 174 } 175 176 metadata := NewMetadataFromPath(path, info) 177 r.index.Add(*ref, metadata) 178 179 return nil 180 } 181 182 func (r *fileIndexer) addFileToIndex(path string, info os.FileInfo) error { 183 ref, err := r.tree.AddFile(file.Path(path)) 184 if err != nil { 185 return err 186 } 187 188 metadata := NewMetadataFromPath(path, info) 189 r.index.Add(*ref, metadata) 190 191 return nil 192 } 193 194 // Get absolute symlink free path to parent of the file 195 func absoluteSymlinkFreePathToParent(path string) (string, error) { 196 absFilePath, err := absoluteSymlinkFreePathToFile(path) 197 if err != nil { 198 return "", err 199 } 200 201 return filepath.Dir(absFilePath), nil 202 } 203 204 // Get absolute symlink free path to the file 205 func absoluteSymlinkFreePathToFile(path string) (string, error) { 206 absAnalysisPath, err := filepath.Abs(path) 207 if err != nil { 208 return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err) 209 } 210 dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath) 211 if err != nil { 212 return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err) 213 } 214 return dereferencedAbsAnalysisPath, nil 215 } 216 217 func (r *fileIndexer) isFileAccessErr(path string, err error) bool { 218 // don't allow for errors to stop indexing, keep track of the paths and continue. 219 if err != nil { 220 log.Warnf("unable to access path=%q: %+v", path, err) 221 r.errPaths[path] = err 222 return true 223 } 224 return false 225 }