github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/internal/fileresolver/directory.go (about)

     1  package fileresolver
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"path"
     9  	"path/filepath"
    10  	"runtime"
    11  	"strings"
    12  
    13  	"github.com/nextlinux/gosbom/gosbom/file"
    14  	"github.com/nextlinux/gosbom/internal/log"
    15  
    16  	stereoscopeFile "github.com/anchore/stereoscope/pkg/file"
    17  	"github.com/anchore/stereoscope/pkg/filetree"
    18  )
    19  
    20  const WindowsOS = "windows"
    21  
    22  var unixSystemRuntimePrefixes = []string{
    23  	"/proc",
    24  	"/dev",
    25  	"/sys",
    26  }
    27  
    28  var ErrSkipPath = errors.New("skip path")
    29  
    30  var _ file.Resolver = (*Directory)(nil)
    31  
    32  // Directory implements path and content access for the directory data source.
    33  type Directory struct {
    34  	path                    string
    35  	base                    string
    36  	currentWdRelativeToRoot string
    37  	currentWd               string
    38  	tree                    filetree.Reader
    39  	index                   filetree.IndexReader
    40  	searchContext           filetree.Searcher
    41  	indexer                 *directoryIndexer
    42  }
    43  
    44  func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) {
    45  	r, err := newFromDirectoryWithoutIndex(root, base, pathFilters...)
    46  	if err != nil {
    47  		return nil, err
    48  	}
    49  
    50  	return r, r.buildIndex()
    51  }
    52  
    53  func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) {
    54  	currentWD, err := os.Getwd()
    55  	if err != nil {
    56  		return nil, fmt.Errorf("could not get CWD: %w", err)
    57  	}
    58  
    59  	cleanRoot, err := filepath.EvalSymlinks(root)
    60  	if err != nil {
    61  		return nil, fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err)
    62  	}
    63  
    64  	cleanBase := ""
    65  	if base != "" {
    66  		cleanBase, err = filepath.EvalSymlinks(base)
    67  		if err != nil {
    68  			return nil, fmt.Errorf("could not evaluate base=%q symlinks: %w", base, err)
    69  		}
    70  		cleanBase, err = filepath.Abs(cleanBase)
    71  		if err != nil {
    72  			return nil, err
    73  		}
    74  	}
    75  
    76  	var currentWdRelRoot string
    77  	if path.IsAbs(cleanRoot) {
    78  		currentWdRelRoot, err = filepath.Rel(currentWD, cleanRoot)
    79  		if err != nil {
    80  			return nil, fmt.Errorf("could not determine given root path to CWD: %w", err)
    81  		}
    82  	} else {
    83  		currentWdRelRoot = filepath.Clean(cleanRoot)
    84  	}
    85  
    86  	return &Directory{
    87  		path:                    cleanRoot,
    88  		base:                    cleanBase,
    89  		currentWd:               currentWD,
    90  		currentWdRelativeToRoot: currentWdRelRoot,
    91  		tree:                    filetree.New(),
    92  		index:                   filetree.NewIndex(),
    93  		indexer:                 newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...),
    94  	}, nil
    95  }
    96  
    97  func (r *Directory) buildIndex() error {
    98  	if r.indexer == nil {
    99  		return fmt.Errorf("no directory indexer configured")
   100  	}
   101  	tree, index, err := r.indexer.build()
   102  	if err != nil {
   103  		return err
   104  	}
   105  
   106  	r.tree = tree
   107  	r.index = index
   108  	r.searchContext = filetree.NewSearchContext(tree, index)
   109  
   110  	return nil
   111  }
   112  
   113  func (r Directory) requestPath(userPath string) (string, error) {
   114  	if filepath.IsAbs(userPath) {
   115  		// don't allow input to potentially hop above root path
   116  		userPath = path.Join(r.path, userPath)
   117  	} else {
   118  		// ensure we take into account any relative difference between the root path and the CWD for relative requests
   119  		userPath = path.Join(r.currentWdRelativeToRoot, userPath)
   120  	}
   121  
   122  	var err error
   123  	userPath, err = filepath.Abs(userPath)
   124  	if err != nil {
   125  		return "", err
   126  	}
   127  	return userPath, nil
   128  }
   129  
   130  // responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the directory resolver.
   131  func (r Directory) responsePath(path string) string {
   132  	// check to see if we need to encode back to Windows from posix
   133  	if runtime.GOOS == WindowsOS {
   134  		path = posixToWindows(path)
   135  	}
   136  
   137  	// clean references to the request path (either the root, or the base if set)
   138  	if filepath.IsAbs(path) {
   139  		var prefix string
   140  		if r.base != "" {
   141  			prefix = r.base
   142  		} else {
   143  			// we need to account for the cwd relative to the running process and the given root for the directory resolver
   144  			prefix = filepath.Clean(filepath.Join(r.currentWd, r.currentWdRelativeToRoot))
   145  			prefix += string(filepath.Separator)
   146  		}
   147  		path = strings.TrimPrefix(path, prefix)
   148  	}
   149  
   150  	return path
   151  }
   152  
   153  // HasPath indicates if the given path exists in the underlying source.
   154  func (r *Directory) HasPath(userPath string) bool {
   155  	requestPath, err := r.requestPath(userPath)
   156  	if err != nil {
   157  		return false
   158  	}
   159  	return r.tree.HasPath(stereoscopeFile.Path(requestPath))
   160  }
   161  
   162  // Stringer to represent a directory path data source
   163  func (r Directory) String() string {
   164  	return fmt.Sprintf("dir:%s", r.path)
   165  }
   166  
   167  // FilesByPath returns all file.References that match the given paths from the directory.
   168  func (r Directory) FilesByPath(userPaths ...string) ([]file.Location, error) {
   169  	var references = make([]file.Location, 0)
   170  
   171  	for _, userPath := range userPaths {
   172  		userStrPath, err := r.requestPath(userPath)
   173  		if err != nil {
   174  			log.Warnf("unable to get file by path=%q : %+v", userPath, err)
   175  			continue
   176  		}
   177  
   178  		// we should be resolving symlinks and preserving this information as a VirtualPath to the real file
   179  		ref, err := r.searchContext.SearchByPath(userStrPath, filetree.FollowBasenameLinks)
   180  		if err != nil {
   181  			log.Tracef("unable to evaluate symlink for path=%q : %+v", userPath, err)
   182  			continue
   183  		}
   184  
   185  		if !ref.HasReference() {
   186  			continue
   187  		}
   188  
   189  		entry, err := r.index.Get(*ref.Reference)
   190  		if err != nil {
   191  			log.Warnf("unable to get file by path=%q : %+v", userPath, err)
   192  			continue
   193  		}
   194  
   195  		// don't consider directories
   196  		if entry.Metadata.IsDir() {
   197  			continue
   198  		}
   199  
   200  		if runtime.GOOS == WindowsOS {
   201  			userStrPath = windowsToPosix(userStrPath)
   202  		}
   203  
   204  		if ref.HasReference() {
   205  			references = append(references,
   206  				file.NewVirtualLocationFromDirectory(
   207  					r.responsePath(string(ref.RealPath)), // the actual path relative to the resolver root
   208  					r.responsePath(userStrPath),          // the path used to access this file, relative to the resolver root
   209  					*ref.Reference,
   210  				),
   211  			)
   212  		}
   213  	}
   214  
   215  	return references, nil
   216  }
   217  
   218  // FilesByGlob returns all file.References that match the given path glob pattern from any layer in the image.
   219  func (r Directory) FilesByGlob(patterns ...string) ([]file.Location, error) {
   220  	uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
   221  	uniqueLocations := make([]file.Location, 0)
   222  
   223  	for _, pattern := range patterns {
   224  		refVias, err := r.searchContext.SearchByGlob(pattern, filetree.FollowBasenameLinks)
   225  		if err != nil {
   226  			return nil, err
   227  		}
   228  		for _, refVia := range refVias {
   229  			if !refVia.HasReference() || uniqueFileIDs.Contains(*refVia.Reference) {
   230  				continue
   231  			}
   232  			entry, err := r.index.Get(*refVia.Reference)
   233  			if err != nil {
   234  				return nil, fmt.Errorf("unable to get file metadata for reference %s: %w", refVia.Reference.RealPath, err)
   235  			}
   236  
   237  			// don't consider directories
   238  			if entry.Metadata.IsDir() {
   239  				continue
   240  			}
   241  
   242  			loc := file.NewVirtualLocationFromDirectory(
   243  				r.responsePath(string(refVia.Reference.RealPath)), // the actual path relative to the resolver root
   244  				r.responsePath(string(refVia.RequestPath)),        // the path used to access this file, relative to the resolver root
   245  				*refVia.Reference,
   246  			)
   247  			uniqueFileIDs.Add(*refVia.Reference)
   248  			uniqueLocations = append(uniqueLocations, loc)
   249  		}
   250  	}
   251  
   252  	return uniqueLocations, nil
   253  }
   254  
   255  // RelativeFileByPath fetches a single file at the given path relative to the layer squash of the given reference.
   256  // This is helpful when attempting to find a file that is in the same layer or lower as another file. For the
   257  // Directory, this is a simple path lookup.
   258  func (r *Directory) RelativeFileByPath(_ file.Location, path string) *file.Location {
   259  	paths, err := r.FilesByPath(path)
   260  	if err != nil {
   261  		return nil
   262  	}
   263  	if len(paths) == 0 {
   264  		return nil
   265  	}
   266  
   267  	return &paths[0]
   268  }
   269  
   270  // FileContentsByLocation fetches file contents for a single file reference relative to a directory.
   271  // If the path does not exist an error is returned.
   272  func (r Directory) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
   273  	if location.RealPath == "" {
   274  		return nil, errors.New("empty path given")
   275  	}
   276  
   277  	entry, err := r.index.Get(location.Reference())
   278  	if err != nil {
   279  		return nil, err
   280  	}
   281  
   282  	// don't consider directories
   283  	if entry.Type == stereoscopeFile.TypeDirectory {
   284  		return nil, fmt.Errorf("cannot read contents of non-file %q", location.Reference().RealPath)
   285  	}
   286  
   287  	// RealPath is posix so for windows directory resolver we need to translate
   288  	// to its true on disk path.
   289  	filePath := string(location.Reference().RealPath)
   290  	if runtime.GOOS == WindowsOS {
   291  		filePath = posixToWindows(filePath)
   292  	}
   293  
   294  	return stereoscopeFile.NewLazyReadCloser(filePath), nil
   295  }
   296  
   297  func (r *Directory) AllLocations() <-chan file.Location {
   298  	results := make(chan file.Location)
   299  	go func() {
   300  		defer close(results)
   301  		for _, ref := range r.tree.AllFiles(stereoscopeFile.AllTypes()...) {
   302  			results <- file.NewLocationFromDirectory(r.responsePath(string(ref.RealPath)), ref)
   303  		}
   304  	}()
   305  	return results
   306  }
   307  
   308  func (r *Directory) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
   309  	entry, err := r.index.Get(location.Reference())
   310  	if err != nil {
   311  		return file.Metadata{}, fmt.Errorf("location: %+v : %w", location, os.ErrNotExist)
   312  	}
   313  
   314  	return entry.Metadata, nil
   315  }
   316  
   317  func (r *Directory) FilesByMIMEType(types ...string) ([]file.Location, error) {
   318  	uniqueFileIDs := stereoscopeFile.NewFileReferenceSet()
   319  	uniqueLocations := make([]file.Location, 0)
   320  
   321  	refVias, err := r.searchContext.SearchByMIMEType(types...)
   322  	if err != nil {
   323  		return nil, err
   324  	}
   325  	for _, refVia := range refVias {
   326  		if !refVia.HasReference() {
   327  			continue
   328  		}
   329  		if uniqueFileIDs.Contains(*refVia.Reference) {
   330  			continue
   331  		}
   332  		location := file.NewLocationFromDirectory(
   333  			r.responsePath(string(refVia.Reference.RealPath)),
   334  			*refVia.Reference,
   335  		)
   336  		uniqueFileIDs.Add(*refVia.Reference)
   337  		uniqueLocations = append(uniqueLocations, location)
   338  	}
   339  
   340  	return uniqueLocations, nil
   341  }
   342  
   343  func windowsToPosix(windowsPath string) (posixPath string) {
   344  	// volume should be encoded at the start (e.g /c/<path>) where c is the volume
   345  	volumeName := filepath.VolumeName(windowsPath)
   346  	pathWithoutVolume := strings.TrimPrefix(windowsPath, volumeName)
   347  	volumeLetter := strings.ToLower(strings.TrimSuffix(volumeName, ":"))
   348  
   349  	// translate non-escaped backslash to forwardslash
   350  	translatedPath := strings.ReplaceAll(pathWithoutVolume, "\\", "/")
   351  
   352  	// always have `/` as the root... join all components, e.g.:
   353  	// convert: C:\\some\windows\Place
   354  	// into: /c/some/windows/Place
   355  	return path.Clean("/" + strings.Join([]string{volumeLetter, translatedPath}, "/"))
   356  }
   357  
   358  func posixToWindows(posixPath string) (windowsPath string) {
   359  	// decode the volume (e.g. /c/<path> --> C:\\) - There should always be a volume name.
   360  	pathFields := strings.Split(posixPath, "/")
   361  	volumeName := strings.ToUpper(pathFields[1]) + `:\\`
   362  
   363  	// translate non-escaped forward slashes into backslashes
   364  	remainingTranslatedPath := strings.Join(pathFields[2:], "\\")
   365  
   366  	// combine volume name and backslash components
   367  	return filepath.Clean(volumeName + remainingTranslatedPath)
   368  }