github.com/anchore/syft@v1.38.2/syft/source/directorysource/directory_source.go (about)

     1  package directorysource
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"strings"
     8  	"sync"
     9  
    10  	"github.com/bmatcuk/doublestar/v4"
    11  	"github.com/opencontainers/go-digest"
    12  
    13  	"github.com/anchore/syft/internal/log"
    14  	"github.com/anchore/syft/syft/artifact"
    15  	"github.com/anchore/syft/syft/file"
    16  	"github.com/anchore/syft/syft/internal/fileresolver"
    17  	"github.com/anchore/syft/syft/source"
    18  	"github.com/anchore/syft/syft/source/internal"
    19  )
    20  
    21  var _ source.Source = (*directorySource)(nil)
    22  
    23  type Config struct {
    24  	Path    string
    25  	Base    string
    26  	Exclude source.ExcludeConfig
    27  	Alias   source.Alias
    28  }
    29  
    30  type directorySource struct {
    31  	id       artifact.ID
    32  	config   Config
    33  	resolver file.Resolver
    34  	mutex    *sync.Mutex
    35  }
    36  
    37  func NewFromPath(path string) (source.Source, error) {
    38  	return New(Config{Path: path})
    39  }
    40  
    41  func New(cfg Config) (source.Source, error) {
    42  	fileMeta, err := os.Stat(cfg.Path)
    43  	if err != nil {
    44  		return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err)
    45  	}
    46  
    47  	if !fileMeta.IsDir() {
    48  		return nil, fmt.Errorf("given path is not a directory: %q", cfg.Path)
    49  	}
    50  
    51  	return &directorySource{
    52  		id:     deriveIDFromDirectory(cfg),
    53  		config: cfg,
    54  		mutex:  &sync.Mutex{},
    55  	}, nil
    56  }
    57  
    58  func (s directorySource) ID() artifact.ID {
    59  	return s.id
    60  }
    61  
    62  func (s directorySource) Describe() source.Description {
    63  	name := cleanDirPath(s.config.Path, s.config.Base)
    64  	version := ""
    65  	supplier := ""
    66  	if !s.config.Alias.IsEmpty() {
    67  		a := s.config.Alias
    68  		if a.Name != "" {
    69  			name = a.Name
    70  		}
    71  
    72  		if a.Version != "" {
    73  			version = a.Version
    74  		}
    75  
    76  		if a.Supplier != "" {
    77  			supplier = a.Supplier
    78  		}
    79  	}
    80  	return source.Description{
    81  		ID:       string(s.id),
    82  		Name:     name,
    83  		Version:  version,
    84  		Supplier: supplier,
    85  		Metadata: source.DirectoryMetadata{
    86  			Path: s.config.Path,
    87  			Base: s.config.Base,
    88  		},
    89  	}
    90  }
    91  
    92  func (s *directorySource) FileResolver(_ source.Scope) (file.Resolver, error) {
    93  	s.mutex.Lock()
    94  	defer s.mutex.Unlock()
    95  
    96  	if s.resolver != nil {
    97  		return s.resolver, nil
    98  	}
    99  
   100  	exclusionFunctions, err := GetDirectoryExclusionFunctions(s.config.Path, s.config.Exclude.Paths)
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  
   105  	// this should be the only file resolver that might have overlap with where files are cached
   106  	exclusionFunctions = append(exclusionFunctions, excludeCachePathVisitors()...)
   107  
   108  	res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
   109  	if err != nil {
   110  		return nil, fmt.Errorf("unable to create directory resolver: %w", err)
   111  	}
   112  
   113  	s.resolver = res
   114  	return s.resolver, nil
   115  }
   116  
   117  func (s *directorySource) Close() error {
   118  	s.mutex.Lock()
   119  	defer s.mutex.Unlock()
   120  
   121  	s.resolver = nil
   122  	return nil
   123  }
   124  
   125  func GetDirectoryExclusionFunctions(root string, exclusions []string) ([]fileresolver.PathIndexVisitor, error) {
   126  	if len(exclusions) == 0 {
   127  		return nil, nil
   128  	}
   129  
   130  	// this is what directoryResolver.indexTree is doing to get the absolute path:
   131  	root, err := filepath.Abs(root)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  
   136  	// this handles Windows file paths by converting them to C:/something/else format
   137  	root = filepath.ToSlash(root)
   138  
   139  	if !strings.HasSuffix(root, "/") {
   140  		root += "/"
   141  	}
   142  
   143  	var errors []string
   144  	for idx, exclusion := range exclusions {
   145  		// check exclusions for supported paths, these are all relative to the "scan root"
   146  		if strings.HasPrefix(exclusion, "./") || strings.HasPrefix(exclusion, "*/") || strings.HasPrefix(exclusion, "**/") {
   147  			exclusion = strings.TrimPrefix(exclusion, "./")
   148  			exclusions[idx] = root + exclusion
   149  		} else {
   150  			errors = append(errors, exclusion)
   151  		}
   152  	}
   153  
   154  	if errors != nil {
   155  		return nil, fmt.Errorf("invalid exclusion pattern(s): '%s' (must start with one of: './', '*/', or '**/')", strings.Join(errors, "', '"))
   156  	}
   157  
   158  	return []fileresolver.PathIndexVisitor{
   159  		func(_, path string, info os.FileInfo, _ error) error {
   160  			for _, exclusion := range exclusions {
   161  				// this is required to handle Windows filepaths
   162  				path = filepath.ToSlash(path)
   163  				matches, err := doublestar.Match(exclusion, path)
   164  				if err != nil {
   165  					return nil
   166  				}
   167  				if matches {
   168  					if info != nil && info.IsDir() {
   169  						return filepath.SkipDir
   170  					}
   171  					return fileresolver.ErrSkipPath
   172  				}
   173  			}
   174  			return nil
   175  		},
   176  	}, nil
   177  }
   178  
   179  // deriveIDFromDirectory generates an artifact ID from the given directory config. If an alias is provided, then
   180  // the artifact ID is derived exclusively from the alias name and version. Otherwise, the artifact ID is derived
   181  // from the path provided with an attempt to prune a prefix if a base is given. Since the contents of the directory
   182  // are not considered, there is no semantic meaning to the artifact ID -- this is why the alias is preferred without
   183  // consideration for the path.
   184  func deriveIDFromDirectory(cfg Config) artifact.ID {
   185  	var info string
   186  	if !cfg.Alias.IsEmpty() {
   187  		// don't use any of the path information -- instead use the alias name and version as the artifact ID.
   188  		// why? this allows the user to set a dependable stable value for the artifact ID in case the
   189  		// scanning root changes (e.g. a user scans a directory, then moves it to a new location and scans again).
   190  		info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
   191  	} else {
   192  		log.Warn("no explicit name and version provided for directory source, deriving artifact ID from the given path (which is not ideal)")
   193  		info = cleanDirPath(cfg.Path, cfg.Base)
   194  	}
   195  
   196  	return internal.ArtifactIDFromDigest(digest.SHA256.FromString(filepath.Clean(info)).String())
   197  }
   198  
   199  func cleanDirPath(path, base string) string {
   200  	if path == base {
   201  		return path
   202  	}
   203  
   204  	if base != "" {
   205  		cleanRoot, rootErr := fileresolver.NormalizeRootDirectory(path)
   206  		cleanBase, baseErr := fileresolver.NormalizeBaseDirectory(base)
   207  
   208  		if rootErr == nil && baseErr == nil {
   209  			// allows for normalizing inputs:
   210  			//   cleanRoot: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001/some/path
   211  			//   cleanBase: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001
   212  			//   normalized: some/path
   213  
   214  			relPath, err := filepath.Rel(cleanBase, cleanRoot)
   215  			if err == nil {
   216  				path = relPath
   217  			}
   218  			// this is odd, but this means we can't use base
   219  		}
   220  		// if the base is not a valid chroot, then just use the path as-is
   221  	}
   222  
   223  	return path
   224  }