github.com/anchore/syft@v1.38.2/syft/internal/fileresolver/directory_indexer.go (about) 1 package fileresolver 2 3 import ( 4 "errors" 5 "fmt" 6 "io/fs" 7 "os" 8 "path" 9 "path/filepath" 10 "strings" 11 12 "github.com/wagoodman/go-progress" 13 14 "github.com/anchore/stereoscope/pkg/file" 15 "github.com/anchore/stereoscope/pkg/filetree" 16 "github.com/anchore/syft/internal/bus" 17 "github.com/anchore/syft/internal/log" 18 "github.com/anchore/syft/syft/internal/windows" 19 ) 20 21 type PathIndexVisitor func(string, string, os.FileInfo, error) error 22 23 type directoryIndexer struct { 24 path string 25 base string 26 pathIndexVisitors []PathIndexVisitor 27 errPaths map[string]error 28 tree filetree.ReadWriter 29 index filetree.Index 30 } 31 32 func newDirectoryIndexer(path, base string, visitors ...PathIndexVisitor) *directoryIndexer { 33 i := &directoryIndexer{ 34 path: path, 35 base: base, 36 tree: filetree.New(), 37 index: filetree.NewIndex(), 38 pathIndexVisitors: append( 39 []PathIndexVisitor{ 40 requireFileInfo, 41 disallowByFileType, 42 skipPathsByMountTypeAndName(path), 43 }, 44 visitors..., 45 ), 46 errPaths: make(map[string]error), 47 } 48 49 // these additional stateful visitors should be the first thing considered when walking / indexing 50 i.pathIndexVisitors = append( 51 []PathIndexVisitor{ 52 i.disallowRevisitingVisitor, 53 i.disallowFileAccessErr, 54 }, 55 i.pathIndexVisitors..., 56 ) 57 58 return i 59 } 60 61 func (r *directoryIndexer) build() (filetree.Reader, filetree.IndexReader, error) { 62 return r.tree, r.index, indexAllRoots(r.path, r.indexTree) 63 } 64 65 func indexAllRoots(root string, indexer func(string, *progress.AtomicStage) ([]string, error)) error { 66 // why account for multiple roots? To cover cases when there is a symlink that references above the root path, 67 // in which case we need to additionally index where the link resolves to. it's for this reason why the filetree 68 // must be relative to the root of the filesystem (and not just relative to the given path). 69 pathsToIndex := []string{root} 70 fullPathsMap := map[string]struct{}{} 71 72 prog := bus.StartIndexingFiles(root) 73 defer prog.SetCompleted() 74 loop: 75 for { 76 var currentPath string 77 switch len(pathsToIndex) { 78 case 0: 79 break loop 80 case 1: 81 currentPath, pathsToIndex = pathsToIndex[0], nil 82 default: 83 currentPath, pathsToIndex = pathsToIndex[0], pathsToIndex[1:] 84 } 85 86 additionalRoots, err := indexer(currentPath, prog.AtomicStage) 87 if err != nil { 88 return fmt.Errorf("unable to index filesystem path=%q: %w", currentPath, err) 89 } 90 91 for _, newRoot := range additionalRoots { 92 if _, ok := fullPathsMap[newRoot]; !ok { 93 fullPathsMap[newRoot] = struct{}{} 94 pathsToIndex = append(pathsToIndex, newRoot) 95 } 96 } 97 } 98 99 return nil 100 } 101 102 func (r *directoryIndexer) indexTree(root string, stager *progress.AtomicStage) ([]string, error) { 103 log.WithFields("path", root).Trace("indexing filetree") 104 105 var roots []string 106 var err error 107 108 root, err = filepath.Abs(root) 109 if err != nil { 110 return nil, err 111 } 112 113 // we want to be able to index single files with the directory resolver. However, we should also allow for attempting 114 // to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing). 115 // For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root, 116 // but continue forth with index regardless if the given root path exists or not. 117 fi, err := os.Stat(root) 118 if err != nil && fi != nil && !fi.IsDir() { 119 // note: we want to index the path regardless of an error stat-ing the path 120 newRoot, _ := r.indexPath(root, fi, nil) 121 if newRoot != "" { 122 roots = append(roots, newRoot) 123 } 124 return roots, nil 125 } 126 127 shouldIndexFullTree, err := isRealPath(root) 128 if err != nil { 129 return nil, err 130 } 131 132 if !shouldIndexFullTree { 133 newRoots, err := r.indexBranch(root, stager) 134 if err != nil { 135 return nil, fmt.Errorf("unable to index branch=%q: %w", root, err) 136 } 137 138 roots = append(roots, newRoots...) 139 140 return roots, nil 141 } 142 143 err = filepath.Walk(root, 144 func(path string, info os.FileInfo, err error) error { 145 stager.Set(path) 146 147 newRoot, err := r.indexPath(path, info, err) 148 149 if err != nil { 150 return err 151 } 152 153 if newRoot != "" { 154 roots = append(roots, newRoot) 155 } 156 157 return nil 158 }) 159 160 if err != nil { 161 return nil, fmt.Errorf("unable to index root=%q: %w", root, err) 162 } 163 164 return roots, nil 165 } 166 167 func isRealPath(root string) (bool, error) { 168 rootParent := filepath.Clean(filepath.Dir(root)) 169 170 realRootParent, err := filepath.EvalSymlinks(rootParent) 171 if err != nil { 172 return false, err 173 } 174 175 realRootParent = filepath.Clean(realRootParent) 176 177 return rootParent == realRootParent, nil 178 } 179 180 func (r *directoryIndexer) indexBranch(root string, stager *progress.AtomicStage) ([]string, error) { 181 rootRealPath, err := filepath.EvalSymlinks(root) 182 if err != nil { 183 var pathErr *os.PathError 184 if errors.As(err, &pathErr) { 185 // we can't index the path, but we shouldn't consider this to be fatal 186 // TODO: known-unknowns 187 log.WithFields("root", root, "error", err).Trace("unable to evaluate symlink while indexing branch") 188 return nil, nil 189 } 190 return nil, err 191 } 192 193 // there is a symlink within the path to the root, we need to index the real root parent first 194 // then capture the symlinks to the root path 195 roots, err := r.indexTree(rootRealPath, stager) 196 if err != nil { 197 return nil, fmt.Errorf("unable to index real root=%q: %w", rootRealPath, err) 198 } 199 200 // walk down all ancestor paths and shallow-add non-existing elements to the tree 201 for idx, p := range allContainedPaths(root) { 202 var targetPath string 203 if idx != 0 { 204 parent := path.Dir(p) 205 cleanParent, err := filepath.EvalSymlinks(parent) 206 if err != nil { 207 return nil, fmt.Errorf("unable to evaluate symlink for contained path parent=%q: %w", parent, err) 208 } 209 targetPath = filepath.Join(cleanParent, filepath.Base(p)) 210 } else { 211 targetPath = p 212 } 213 214 stager.Set(targetPath) 215 216 lstat, err := os.Lstat(targetPath) 217 newRoot, err := r.indexPath(targetPath, lstat, err) 218 if err != nil && !errors.Is(err, ErrSkipPath) && !errors.Is(err, fs.SkipDir) { 219 return nil, fmt.Errorf("unable to index ancestor path=%q: %w", targetPath, err) 220 } 221 if newRoot != "" { 222 roots = append(roots, newRoot) 223 } 224 } 225 226 return roots, nil 227 } 228 229 func allContainedPaths(p string) []string { 230 var all []string 231 var currentPath string 232 233 cleanPath := strings.TrimSpace(p) 234 235 if cleanPath == "" { 236 return nil 237 } 238 239 // iterate through all parts of the path, replacing path elements with link resolutions where possible. 240 for idx, part := range strings.Split(filepath.Clean(cleanPath), file.DirSeparator) { 241 if idx == 0 && part == "" { 242 currentPath = file.DirSeparator 243 continue 244 } 245 246 // cumulatively gather where we are currently at and provide a rich object 247 currentPath = path.Join(currentPath, part) 248 all = append(all, currentPath) 249 } 250 return all 251 } 252 253 func (r *directoryIndexer) indexPath(givenPath string, info os.FileInfo, err error) (string, error) { 254 // ignore any path which a filter function returns true 255 for _, filterFn := range r.pathIndexVisitors { 256 if filterFn == nil { 257 continue 258 } 259 260 if filterErr := filterFn(r.base, givenPath, info, err); filterErr != nil { 261 if errors.Is(filterErr, fs.SkipDir) { 262 // signal to walk() to skip this directory entirely (even if we're processing a file) 263 return "", filterErr 264 } 265 // skip this path but don't affect walk() trajectory 266 return "", nil 267 } 268 } 269 270 if info == nil { 271 // walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue. 272 r.errPaths[givenPath] = fmt.Errorf("no file info observable at path=%q", givenPath) 273 return "", nil 274 } 275 276 // here we check to see if we need to normalize paths to posix on the way in coming from windows 277 if windows.HostRunningOnWindows() { 278 givenPath = windows.ToPosix(givenPath) 279 } 280 281 newRoot, err := r.addPathToIndex(givenPath, info) 282 if r.isFileAccessErr(givenPath, err) { 283 return "", nil 284 } 285 286 return newRoot, nil 287 } 288 289 func (r *directoryIndexer) disallowFileAccessErr(_, path string, _ os.FileInfo, err error) error { 290 if r.isFileAccessErr(path, err) { 291 return ErrSkipPath 292 } 293 return nil 294 } 295 296 func (r *directoryIndexer) isFileAccessErr(path string, err error) bool { 297 // don't allow for errors to stop indexing, keep track of the paths and continue. 298 if err != nil { 299 log.Warnf("unable to access path=%q: %+v", path, err) 300 r.errPaths[path] = err 301 return true 302 } 303 return false 304 } 305 306 func (r directoryIndexer) addPathToIndex(p string, info os.FileInfo) (string, error) { 307 switch t := file.TypeFromMode(info.Mode()); t { 308 case file.TypeSymLink: 309 return r.addSymlinkToIndex(p, info) 310 case file.TypeDirectory: 311 return "", r.addDirectoryToIndex(p, info) 312 case file.TypeRegular: 313 return "", r.addFileToIndex(p, info) 314 default: 315 return "", fmt.Errorf("unsupported file type: %s", t) 316 } 317 } 318 319 func (r directoryIndexer) addDirectoryToIndex(p string, info os.FileInfo) error { 320 ref, err := r.tree.AddDir(file.Path(p)) 321 if err != nil { 322 return err 323 } 324 325 metadata := NewMetadataFromPath(p, info) 326 r.index.Add(*ref, metadata) 327 328 return nil 329 } 330 331 func (r directoryIndexer) addFileToIndex(p string, info os.FileInfo) error { 332 ref, err := r.tree.AddFile(file.Path(p)) 333 if err != nil { 334 return err 335 } 336 337 metadata := NewMetadataFromPath(p, info) 338 r.index.Add(*ref, metadata) 339 340 return nil 341 } 342 343 func (r directoryIndexer) addSymlinkToIndex(p string, info os.FileInfo) (string, error) { 344 linkTarget, err := os.Readlink(p) 345 if err != nil { 346 isOnWindows := windows.HostRunningOnWindows() 347 if isOnWindows { 348 p = windows.FromPosix(p) 349 } 350 351 linkTarget, err = filepath.EvalSymlinks(p) 352 353 if isOnWindows { 354 p = windows.ToPosix(p) 355 } 356 357 if err != nil { 358 return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) 359 } 360 } 361 362 if filepath.IsAbs(linkTarget) { 363 linkTarget = filepath.Clean(linkTarget) 364 // if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to 365 // resolve relative to the root of the base directory, if it is not already 366 // prefixed with a volume name 367 if filepath.VolumeName(linkTarget) == "" { 368 linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget)) 369 } 370 } else { 371 // if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to 372 // resolve it relative to the directory in question (e.g. resolve to 373 // /dev/fd/2) 374 if r.base == "" { 375 linkTarget = filepath.Join(filepath.Dir(p), linkTarget) 376 } else { 377 // if the base is set, then we first need to resolve the link, 378 // before finding it's location in the base 379 dir, err := filepath.Rel(r.base, filepath.Dir(p)) 380 // if the relative path to the base contains "..",i.e. p is the parent or ancestor of the base 381 // For example: 382 // dir: "/root/asymlink" -> "/root/realdir" (linkTarget:"realdir") 383 // base: "/root/asymlink" 384 // so the relative path of /root to the "/root/asymlink" is ".." 385 // we cannot directly concatenate ".." to "/root/symlink",however, 386 // the parent directory of linkTarget should be "/root" 387 for strings.HasPrefix(dir, "..") { 388 if strings.HasPrefix(dir, "../") { 389 dir = strings.TrimPrefix(dir, "../") 390 } else { 391 dir = strings.TrimPrefix(dir, "..") 392 } 393 lastSlash := strings.LastIndex(r.base, "/") 394 if lastSlash != -1 { 395 r.base = r.base[:lastSlash] 396 } 397 // In case of the root directory 398 if r.base == "" { 399 r.base = "/" 400 } 401 } 402 if err != nil { 403 return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err) 404 } 405 linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget))) 406 } 407 } 408 409 ref, err := r.tree.AddSymLink(file.Path(p), file.Path(linkTarget)) 410 if err != nil { 411 return "", err 412 } 413 414 targetAbsPath := linkTarget 415 if !filepath.IsAbs(targetAbsPath) { 416 targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget)) 417 } 418 419 metadata := NewMetadataFromPath(p, info) 420 metadata.LinkDestination = linkTarget 421 r.index.Add(*ref, metadata) 422 423 // if the target path does not exist, then do not report it as a new root, or try to send 424 // syft parsing there. 425 if _, err := os.Stat(targetAbsPath); err != nil && errors.Is(err, os.ErrNotExist) { 426 log.Debugf("link %s points to unresolved path %s, ignoring target as new root", p, targetAbsPath) 427 targetAbsPath = "" 428 } 429 430 return targetAbsPath, nil 431 } 432 433 func (r directoryIndexer) hasBeenIndexed(p string) (bool, *file.Metadata) { 434 filePath := file.Path(p) 435 if !r.tree.HasPath(filePath) { 436 return false, nil 437 } 438 439 exists, ref, err := r.tree.File(filePath) 440 if err != nil || !exists || !ref.HasReference() { 441 return false, nil 442 } 443 444 // cases like "/" will be in the tree, but not been indexed yet (a special case). We want to capture 445 // these cases as new paths to index. 446 if !ref.HasReference() { 447 return false, nil 448 } 449 450 entry, err := r.index.Get(*ref.Reference) 451 if err != nil { 452 return false, nil 453 } 454 455 return true, &entry.Metadata 456 } 457 458 func (r *directoryIndexer) disallowRevisitingVisitor(_, path string, _ os.FileInfo, _ error) error { 459 // this prevents visiting: 460 // - link destinations twice, once for the real file and another through the virtual path 461 // - infinite link cycles 462 if indexed, metadata := r.hasBeenIndexed(path); indexed { 463 if metadata.IsDir() { 464 // signal to walk() that we should skip this directory entirely 465 return fs.SkipDir 466 } 467 return ErrSkipPath 468 } 469 return nil 470 } 471 472 func disallowByFileType(_, _ string, info os.FileInfo, _ error) error { 473 if info == nil { 474 // we can't filter out by filetype for non-existent files 475 return nil 476 } 477 switch file.TypeFromMode(info.Mode()) { 478 case file.TypeCharacterDevice, file.TypeSocket, file.TypeBlockDevice, file.TypeFIFO, file.TypeIrregular: 479 return ErrSkipPath 480 // note: symlinks that point to these files may still get by. 481 // We handle this later in processing to help prevent against infinite links traversal. 482 } 483 484 return nil 485 } 486 487 func requireFileInfo(_, _ string, info os.FileInfo, _ error) error { 488 if info == nil { 489 return ErrSkipPath 490 } 491 return nil 492 }