github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/internal/fileresolver/directory_indexer.go (about) 1 package fileresolver 2 3 import ( 4 "errors" 5 "fmt" 6 "io/fs" 7 "os" 8 "path" 9 "path/filepath" 10 "strings" 11 12 "github.com/wagoodman/go-partybus" 13 "github.com/wagoodman/go-progress" 14 15 "github.com/anchore/stereoscope/pkg/file" 16 "github.com/anchore/stereoscope/pkg/filetree" 17 "github.com/anchore/syft/internal" 18 "github.com/anchore/syft/internal/bus" 19 "github.com/anchore/syft/internal/log" 20 "github.com/anchore/syft/syft/event" 21 "github.com/anchore/syft/syft/internal/windows" 22 ) 23 24 type PathIndexVisitor func(string, os.FileInfo, error) error 25 26 type directoryIndexer struct { 27 path string 28 base string 29 pathIndexVisitors []PathIndexVisitor 30 errPaths map[string]error 31 tree filetree.ReadWriter 32 index filetree.Index 33 } 34 35 func newDirectoryIndexer(path, base string, visitors ...PathIndexVisitor) *directoryIndexer { 36 i := &directoryIndexer{ 37 path: path, 38 base: base, 39 tree: filetree.New(), 40 index: filetree.NewIndex(), 41 pathIndexVisitors: append([]PathIndexVisitor{requireFileInfo, disallowByFileType, disallowUnixSystemRuntimePath}, visitors...), 42 errPaths: make(map[string]error), 43 } 44 45 // these additional stateful visitors should be the first thing considered when walking / indexing 46 i.pathIndexVisitors = append( 47 []PathIndexVisitor{ 48 i.disallowRevisitingVisitor, 49 i.disallowFileAccessErr, 50 }, 51 i.pathIndexVisitors..., 52 ) 53 54 return i 55 } 56 57 func (r *directoryIndexer) build() (filetree.Reader, filetree.IndexReader, error) { 58 return r.tree, r.index, indexAllRoots(r.path, r.indexTree) 59 } 60 61 func indexAllRoots(root string, indexer func(string, *progress.Stage) ([]string, error)) error { 62 // why account for multiple roots? To cover cases when there is a symlink that references above the root path, 63 // in which case we need to additionally index where the link resolves to. it's for this reason why the filetree 64 // must be relative to the root of the filesystem (and not just relative to the given path). 65 pathsToIndex := []string{root} 66 fullPathsMap := map[string]struct{}{} 67 68 stager, prog := indexingProgress(root) 69 defer prog.SetCompleted() 70 loop: 71 for { 72 var currentPath string 73 switch len(pathsToIndex) { 74 case 0: 75 break loop 76 case 1: 77 currentPath, pathsToIndex = pathsToIndex[0], nil 78 default: 79 currentPath, pathsToIndex = pathsToIndex[0], pathsToIndex[1:] 80 } 81 82 additionalRoots, err := indexer(currentPath, stager) 83 if err != nil { 84 return fmt.Errorf("unable to index filesystem path=%q: %w", currentPath, err) 85 } 86 87 for _, newRoot := range additionalRoots { 88 if _, ok := fullPathsMap[newRoot]; !ok { 89 fullPathsMap[newRoot] = struct{}{} 90 pathsToIndex = append(pathsToIndex, newRoot) 91 } 92 } 93 } 94 95 return nil 96 } 97 98 func (r *directoryIndexer) indexTree(root string, stager *progress.Stage) ([]string, error) { 99 log.WithFields("path", root).Trace("indexing filetree") 100 101 var roots []string 102 var err error 103 104 root, err = filepath.Abs(root) 105 if err != nil { 106 return nil, err 107 } 108 109 // we want to be able to index single files with the directory resolver. However, we should also allow for attempting 110 // to index paths that do not exist (that is, a root that does not exist is not an error case that should stop indexing). 111 // For this reason we look for an opportunity to discover if the given root is a file, and if so add a single root, 112 // but continue forth with index regardless if the given root path exists or not. 113 fi, err := os.Stat(root) 114 if err != nil && fi != nil && !fi.IsDir() { 115 // note: we want to index the path regardless of an error stat-ing the path 116 newRoot, _ := r.indexPath(root, fi, nil) 117 if newRoot != "" { 118 roots = append(roots, newRoot) 119 } 120 return roots, nil 121 } 122 123 shouldIndexFullTree, err := isRealPath(root) 124 if err != nil { 125 return nil, err 126 } 127 128 if !shouldIndexFullTree { 129 newRoots, err := r.indexBranch(root, stager) 130 if err != nil { 131 return nil, fmt.Errorf("unable to index branch=%q: %w", root, err) 132 } 133 134 roots = append(roots, newRoots...) 135 136 return roots, nil 137 } 138 139 err = filepath.Walk(root, 140 func(path string, info os.FileInfo, err error) error { 141 stager.Current = path 142 143 newRoot, err := r.indexPath(path, info, err) 144 145 if err != nil { 146 return err 147 } 148 149 if newRoot != "" { 150 roots = append(roots, newRoot) 151 } 152 153 return nil 154 }) 155 156 if err != nil { 157 return nil, fmt.Errorf("unable to index root=%q: %w", root, err) 158 } 159 160 return roots, nil 161 } 162 163 func isRealPath(root string) (bool, error) { 164 rootParent := filepath.Clean(filepath.Dir(root)) 165 166 realRootParent, err := filepath.EvalSymlinks(rootParent) 167 if err != nil { 168 return false, err 169 } 170 171 realRootParent = filepath.Clean(realRootParent) 172 173 return rootParent == realRootParent, nil 174 } 175 176 func (r *directoryIndexer) indexBranch(root string, stager *progress.Stage) ([]string, error) { 177 rootRealPath, err := filepath.EvalSymlinks(root) 178 if err != nil { 179 return nil, err 180 } 181 182 // there is a symlink within the path to the root, we need to index the real root parent first 183 // then capture the symlinks to the root path 184 roots, err := r.indexTree(rootRealPath, stager) 185 if err != nil { 186 return nil, fmt.Errorf("unable to index real root=%q: %w", rootRealPath, err) 187 } 188 189 // walk down all ancestor paths and shallow-add non-existing elements to the tree 190 for idx, p := range allContainedPaths(root) { 191 var targetPath string 192 if idx != 0 { 193 parent := path.Dir(p) 194 cleanParent, err := filepath.EvalSymlinks(parent) 195 if err != nil { 196 return nil, fmt.Errorf("unable to evaluate symlink for contained path parent=%q: %w", parent, err) 197 } 198 targetPath = filepath.Join(cleanParent, filepath.Base(p)) 199 } else { 200 targetPath = p 201 } 202 203 stager.Current = targetPath 204 205 lstat, err := os.Lstat(targetPath) 206 newRoot, err := r.indexPath(targetPath, lstat, err) 207 if err != nil && !errors.Is(err, ErrSkipPath) && !errors.Is(err, fs.SkipDir) { 208 return nil, fmt.Errorf("unable to index ancestor path=%q: %w", targetPath, err) 209 } 210 if newRoot != "" { 211 roots = append(roots, newRoot) 212 } 213 } 214 215 return roots, nil 216 } 217 218 func allContainedPaths(p string) []string { 219 var all []string 220 var currentPath string 221 222 cleanPath := strings.TrimSpace(p) 223 224 if cleanPath == "" { 225 return nil 226 } 227 228 // iterate through all parts of the path, replacing path elements with link resolutions where possible. 229 for idx, part := range strings.Split(filepath.Clean(cleanPath), file.DirSeparator) { 230 if idx == 0 && part == "" { 231 currentPath = file.DirSeparator 232 continue 233 } 234 235 // cumulatively gather where we are currently at and provide a rich object 236 currentPath = path.Join(currentPath, part) 237 all = append(all, currentPath) 238 } 239 return all 240 } 241 242 func (r *directoryIndexer) indexPath(path string, info os.FileInfo, err error) (string, error) { 243 // ignore any path which a filter function returns true 244 for _, filterFn := range r.pathIndexVisitors { 245 if filterFn == nil { 246 continue 247 } 248 249 if filterErr := filterFn(path, info, err); filterErr != nil { 250 if errors.Is(filterErr, fs.SkipDir) { 251 // signal to walk() to skip this directory entirely (even if we're processing a file) 252 return "", filterErr 253 } 254 // skip this path but don't affect walk() trajectory 255 return "", nil 256 } 257 } 258 259 if info == nil { 260 // walk may not be able to provide a FileInfo object, don't allow for this to stop indexing; keep track of the paths and continue. 261 r.errPaths[path] = fmt.Errorf("no file info observable at path=%q", path) 262 return "", nil 263 } 264 265 // here we check to see if we need to normalize paths to posix on the way in coming from windows 266 if windows.HostRunningOnWindows() { 267 path = windows.ToPosix(path) 268 } 269 270 newRoot, err := r.addPathToIndex(path, info) 271 if r.isFileAccessErr(path, err) { 272 return "", nil 273 } 274 275 return newRoot, nil 276 } 277 278 func (r *directoryIndexer) disallowFileAccessErr(path string, _ os.FileInfo, err error) error { 279 if r.isFileAccessErr(path, err) { 280 return ErrSkipPath 281 } 282 return nil 283 } 284 285 func (r *directoryIndexer) isFileAccessErr(path string, err error) bool { 286 // don't allow for errors to stop indexing, keep track of the paths and continue. 287 if err != nil { 288 log.Warnf("unable to access path=%q: %+v", path, err) 289 r.errPaths[path] = err 290 return true 291 } 292 return false 293 } 294 295 func (r directoryIndexer) addPathToIndex(p string, info os.FileInfo) (string, error) { 296 switch t := file.TypeFromMode(info.Mode()); t { 297 case file.TypeSymLink: 298 return r.addSymlinkToIndex(p, info) 299 case file.TypeDirectory: 300 return "", r.addDirectoryToIndex(p, info) 301 case file.TypeRegular: 302 return "", r.addFileToIndex(p, info) 303 default: 304 return "", fmt.Errorf("unsupported file type: %s", t) 305 } 306 } 307 308 func (r directoryIndexer) addDirectoryToIndex(p string, info os.FileInfo) error { 309 ref, err := r.tree.AddDir(file.Path(p)) 310 if err != nil { 311 return err 312 } 313 314 metadata := file.NewMetadataFromPath(p, info) 315 r.index.Add(*ref, metadata) 316 317 return nil 318 } 319 320 func (r directoryIndexer) addFileToIndex(p string, info os.FileInfo) error { 321 ref, err := r.tree.AddFile(file.Path(p)) 322 if err != nil { 323 return err 324 } 325 326 metadata := file.NewMetadataFromPath(p, info) 327 r.index.Add(*ref, metadata) 328 329 return nil 330 } 331 332 func (r directoryIndexer) addSymlinkToIndex(p string, info os.FileInfo) (string, error) { 333 linkTarget, err := os.Readlink(p) 334 if err != nil { 335 isOnWindows := windows.HostRunningOnWindows() 336 if isOnWindows { 337 p = windows.FromPosix(p) 338 } 339 340 linkTarget, err = filepath.EvalSymlinks(p) 341 342 if isOnWindows { 343 p = windows.ToPosix(p) 344 } 345 346 if err != nil { 347 return "", fmt.Errorf("unable to readlink for path=%q: %w", p, err) 348 } 349 } 350 351 if filepath.IsAbs(linkTarget) { 352 linkTarget = filepath.Clean(linkTarget) 353 // if the link is absolute (e.g, /bin/ls -> /bin/busybox) we need to 354 // resolve relative to the root of the base directory, if it is not already 355 // prefixed with a volume name 356 if filepath.VolumeName(linkTarget) == "" { 357 linkTarget = filepath.Join(r.base, filepath.Clean(linkTarget)) 358 } 359 } else { 360 // if the link is not absolute (e.g, /dev/stderr -> fd/2 ) we need to 361 // resolve it relative to the directory in question (e.g. resolve to 362 // /dev/fd/2) 363 if r.base == "" { 364 linkTarget = filepath.Join(filepath.Dir(p), linkTarget) 365 } else { 366 // if the base is set, then we first need to resolve the link, 367 // before finding it's location in the base 368 dir, err := filepath.Rel(r.base, filepath.Dir(p)) 369 if err != nil { 370 return "", fmt.Errorf("unable to resolve relative path for path=%q: %w", p, err) 371 } 372 linkTarget = filepath.Join(r.base, filepath.Clean(filepath.Join("/", dir, linkTarget))) 373 } 374 } 375 376 ref, err := r.tree.AddSymLink(file.Path(p), file.Path(linkTarget)) 377 if err != nil { 378 return "", err 379 } 380 381 targetAbsPath := linkTarget 382 if !filepath.IsAbs(targetAbsPath) { 383 targetAbsPath = filepath.Clean(filepath.Join(path.Dir(p), linkTarget)) 384 } 385 386 metadata := file.NewMetadataFromPath(p, info) 387 metadata.LinkDestination = linkTarget 388 r.index.Add(*ref, metadata) 389 390 // if the target path does not exist, then do not report it as a new root, or try to send 391 // syft parsing there. 392 if _, err := os.Stat(targetAbsPath); err != nil && errors.Is(err, os.ErrNotExist) { 393 log.Debugf("link %s points to unresolved path %s, ignoring target as new root", p, targetAbsPath) 394 targetAbsPath = "" 395 } 396 397 return targetAbsPath, nil 398 } 399 400 func (r directoryIndexer) hasBeenIndexed(p string) (bool, *file.Metadata) { 401 filePath := file.Path(p) 402 if !r.tree.HasPath(filePath) { 403 return false, nil 404 } 405 406 exists, ref, err := r.tree.File(filePath) 407 if err != nil || !exists || !ref.HasReference() { 408 return false, nil 409 } 410 411 // cases like "/" will be in the tree, but not been indexed yet (a special case). We want to capture 412 // these cases as new paths to index. 413 if !ref.HasReference() { 414 return false, nil 415 } 416 417 entry, err := r.index.Get(*ref.Reference) 418 if err != nil { 419 return false, nil 420 } 421 422 return true, &entry.Metadata 423 } 424 425 func (r *directoryIndexer) disallowRevisitingVisitor(path string, _ os.FileInfo, _ error) error { 426 // this prevents visiting: 427 // - link destinations twice, once for the real file and another through the virtual path 428 // - infinite link cycles 429 if indexed, metadata := r.hasBeenIndexed(path); indexed { 430 if metadata.IsDir() { 431 // signal to walk() that we should skip this directory entirely 432 return fs.SkipDir 433 } 434 return ErrSkipPath 435 } 436 return nil 437 } 438 439 func disallowUnixSystemRuntimePath(path string, _ os.FileInfo, _ error) error { 440 if internal.HasAnyOfPrefixes(path, unixSystemRuntimePrefixes...) { 441 return fs.SkipDir 442 } 443 return nil 444 } 445 446 func disallowByFileType(_ string, info os.FileInfo, _ error) error { 447 if info == nil { 448 // we can't filter out by filetype for non-existent files 449 return nil 450 } 451 switch file.TypeFromMode(info.Mode()) { 452 case file.TypeCharacterDevice, file.TypeSocket, file.TypeBlockDevice, file.TypeFIFO, file.TypeIrregular: 453 return ErrSkipPath 454 // note: symlinks that point to these files may still get by. 455 // We handle this later in processing to help prevent against infinite links traversal. 456 } 457 458 return nil 459 } 460 461 func requireFileInfo(_ string, info os.FileInfo, _ error) error { 462 if info == nil { 463 return ErrSkipPath 464 } 465 return nil 466 } 467 468 func indexingProgress(path string) (*progress.Stage, *progress.Manual) { 469 stage := &progress.Stage{} 470 prog := progress.NewManual(-1) 471 472 bus.Publish(partybus.Event{ 473 Type: event.FileIndexingStarted, 474 Source: path, 475 Value: struct { 476 progress.Stager 477 progress.Progressable 478 }{ 479 Stager: progress.Stager(stage), 480 Progressable: prog, 481 }, 482 }) 483 484 return stage, prog 485 }