github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/source/directorysource/directory_source.go (about)

     1  package directorysource
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"strings"
     8  	"sync"
     9  
    10  	"github.com/bmatcuk/doublestar/v4"
    11  	"github.com/opencontainers/go-digest"
    12  
    13  	"github.com/anchore/syft/internal/log"
    14  	"github.com/anchore/syft/syft/artifact"
    15  	"github.com/anchore/syft/syft/file"
    16  	"github.com/anchore/syft/syft/internal/fileresolver"
    17  	"github.com/anchore/syft/syft/source"
    18  	"github.com/anchore/syft/syft/source/internal"
    19  )
    20  
    21  var _ source.Source = (*directorySource)(nil)
    22  
    23  type Config struct {
    24  	Path    string
    25  	Base    string
    26  	Exclude source.ExcludeConfig
    27  	Alias   source.Alias
    28  }
    29  
    30  type directorySource struct {
    31  	id       artifact.ID
    32  	config   Config
    33  	resolver *fileresolver.Directory
    34  	mutex    *sync.Mutex
    35  }
    36  
    37  func NewFromPath(path string) (source.Source, error) {
    38  	cfg := Config{
    39  		Path: path,
    40  	}
    41  	return New(cfg)
    42  }
    43  
    44  func New(cfg Config) (source.Source, error) {
    45  	fi, err := os.Stat(cfg.Path)
    46  	if err != nil {
    47  		return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err)
    48  	}
    49  
    50  	if !fi.IsDir() {
    51  		return nil, fmt.Errorf("given path is not a directory: %q", cfg.Path)
    52  	}
    53  
    54  	return &directorySource{
    55  		id:     deriveIDFromDirectory(cfg),
    56  		config: cfg,
    57  		mutex:  &sync.Mutex{},
    58  	}, nil
    59  }
    60  
    61  // deriveIDFromDirectory generates an artifact ID from the given directory config. If an alias is provided, then
    62  // the artifact ID is derived exclusively from the alias name and version. Otherwise, the artifact ID is derived
    63  // from the path provided with an attempt to prune a prefix if a base is given. Since the contents of the directory
    64  // are not considered, there is no semantic meaning to the artifact ID -- this is why the alias is preferred without
    65  // consideration for the path.
    66  func deriveIDFromDirectory(cfg Config) artifact.ID {
    67  	var info string
    68  	if !cfg.Alias.IsEmpty() {
    69  		// don't use any of the path information -- instead use the alias name and version as the artifact ID.
    70  		// why? this allows the user to set a dependable stable value for the artifact ID in case the
    71  		// scanning root changes (e.g. a user scans a directory, then moves it to a new location and scans again).
    72  		info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
    73  	} else {
    74  		log.Warn("no explicit name and version provided for directory source, deriving artifact ID from the given path (which is not ideal)")
    75  		info = cleanDirPath(cfg.Path, cfg.Base)
    76  	}
    77  
    78  	return internal.ArtifactIDFromDigest(digest.SHA256.FromString(filepath.Clean(info)).String())
    79  }
    80  
    81  func cleanDirPath(path, base string) string {
    82  	if path == base {
    83  		return path
    84  	}
    85  
    86  	if base != "" {
    87  		cleanRoot, rootErr := fileresolver.NormalizeRootDirectory(path)
    88  		cleanBase, baseErr := fileresolver.NormalizeBaseDirectory(base)
    89  
    90  		if rootErr == nil && baseErr == nil {
    91  			// allows for normalizing inputs:
    92  			//   cleanRoot: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001/some/path
    93  			//   cleanBase: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001
    94  			//   normalized: some/path
    95  
    96  			relPath, err := filepath.Rel(cleanBase, cleanRoot)
    97  			if err == nil {
    98  				path = relPath
    99  			}
   100  			// this is odd, but this means we can't use base
   101  		}
   102  		// if the base is not a valid chroot, then just use the path as-is
   103  	}
   104  
   105  	return path
   106  }
   107  
   108  func (s directorySource) ID() artifact.ID {
   109  	return s.id
   110  }
   111  
   112  func (s directorySource) Describe() source.Description {
   113  	name := cleanDirPath(s.config.Path, s.config.Base)
   114  	version := ""
   115  	if !s.config.Alias.IsEmpty() {
   116  		a := s.config.Alias
   117  		if a.Name != "" {
   118  			name = a.Name
   119  		}
   120  		if a.Version != "" {
   121  			version = a.Version
   122  		}
   123  	}
   124  	return source.Description{
   125  		ID:      string(s.id),
   126  		Name:    name,
   127  		Version: version,
   128  		Metadata: source.DirectoryMetadata{
   129  			Path: s.config.Path,
   130  			Base: s.config.Base,
   131  		},
   132  	}
   133  }
   134  
   135  func (s *directorySource) FileResolver(_ source.Scope) (file.Resolver, error) {
   136  	s.mutex.Lock()
   137  	defer s.mutex.Unlock()
   138  
   139  	if s.resolver == nil {
   140  		exclusionFunctions, err := GetDirectoryExclusionFunctions(s.config.Path, s.config.Exclude.Paths)
   141  		if err != nil {
   142  			return nil, err
   143  		}
   144  
   145  		res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
   146  		if err != nil {
   147  			return nil, fmt.Errorf("unable to create directory resolver: %w", err)
   148  		}
   149  
   150  		s.resolver = res
   151  	}
   152  
   153  	return s.resolver, nil
   154  }
   155  
   156  func (s *directorySource) Close() error {
   157  	s.mutex.Lock()
   158  	defer s.mutex.Unlock()
   159  	s.resolver = nil
   160  	return nil
   161  }
   162  
   163  func GetDirectoryExclusionFunctions(root string, exclusions []string) ([]fileresolver.PathIndexVisitor, error) {
   164  	if len(exclusions) == 0 {
   165  		return nil, nil
   166  	}
   167  
   168  	// this is what directoryResolver.indexTree is doing to get the absolute path:
   169  	root, err := filepath.Abs(root)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  
   174  	// this handles Windows file paths by converting them to C:/something/else format
   175  	root = filepath.ToSlash(root)
   176  
   177  	if !strings.HasSuffix(root, "/") {
   178  		root += "/"
   179  	}
   180  
   181  	var errors []string
   182  	for idx, exclusion := range exclusions {
   183  		// check exclusions for supported paths, these are all relative to the "scan root"
   184  		if strings.HasPrefix(exclusion, "./") || strings.HasPrefix(exclusion, "*/") || strings.HasPrefix(exclusion, "**/") {
   185  			exclusion = strings.TrimPrefix(exclusion, "./")
   186  			exclusions[idx] = root + exclusion
   187  		} else {
   188  			errors = append(errors, exclusion)
   189  		}
   190  	}
   191  
   192  	if errors != nil {
   193  		return nil, fmt.Errorf("invalid exclusion pattern(s): '%s' (must start with one of: './', '*/', or '**/')", strings.Join(errors, "', '"))
   194  	}
   195  
   196  	return []fileresolver.PathIndexVisitor{
   197  		func(_, path string, info os.FileInfo, _ error) error {
   198  			for _, exclusion := range exclusions {
   199  				// this is required to handle Windows filepaths
   200  				path = filepath.ToSlash(path)
   201  				matches, err := doublestar.Match(exclusion, path)
   202  				if err != nil {
   203  					return nil
   204  				}
   205  				if matches {
   206  					if info != nil && info.IsDir() {
   207  						return filepath.SkipDir
   208  					}
   209  					return fileresolver.ErrSkipPath
   210  				}
   211  			}
   212  			return nil
   213  		},
   214  	}, nil
   215  }