github.com/devseccon/trivy@v0.47.1-0.20231123133102-bd902a0bd996/pkg/fanal/analyzer/language/nodejs/yarn/yarn.go (about)

     1  package yarn
     2  
     3  import (
     4  	"archive/zip"
     5  	"context"
     6  	"errors"
     7  	"io"
     8  	"io/fs"
     9  	"os"
    10  	"path"
    11  	"path/filepath"
    12  	"sort"
    13  	"strings"
    14  
    15  	"github.com/hashicorp/go-multierror"
    16  	"github.com/samber/lo"
    17  	"golang.org/x/exp/maps"
    18  	"golang.org/x/xerrors"
    19  
    20  	"github.com/aquasecurity/go-dep-parser/pkg/nodejs/packagejson"
    21  	"github.com/aquasecurity/go-dep-parser/pkg/nodejs/yarn"
    22  	godeptypes "github.com/aquasecurity/go-dep-parser/pkg/types"
    23  	"github.com/devseccon/trivy/pkg/detector/library/compare/npm"
    24  	"github.com/devseccon/trivy/pkg/fanal/analyzer"
    25  	"github.com/devseccon/trivy/pkg/fanal/analyzer/language"
    26  	"github.com/devseccon/trivy/pkg/fanal/analyzer/language/nodejs/license"
    27  	"github.com/devseccon/trivy/pkg/fanal/types"
    28  	"github.com/devseccon/trivy/pkg/log"
    29  	"github.com/devseccon/trivy/pkg/utils/fsutils"
    30  	xio "github.com/devseccon/trivy/pkg/x/io"
    31  )
    32  
    33  func init() {
    34  	analyzer.RegisterPostAnalyzer(analyzer.TypeYarn, newYarnAnalyzer)
    35  }
    36  
    37  const version = 2
    38  
    39  type yarnAnalyzer struct {
    40  	packageJsonParser *packagejson.Parser
    41  	lockParser        godeptypes.Parser
    42  	comparer          npm.Comparer
    43  	license           *license.License
    44  }
    45  
    46  func newYarnAnalyzer(opt analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) {
    47  	return &yarnAnalyzer{
    48  		packageJsonParser: packagejson.NewParser(),
    49  		lockParser:        yarn.NewParser(),
    50  		comparer:          npm.Comparer{},
    51  		license:           license.NewLicense(opt.LicenseScannerOption.ClassifierConfidenceLevel),
    52  	}, nil
    53  }
    54  
    55  func (a yarnAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) {
    56  	var apps []types.Application
    57  
    58  	required := func(path string, d fs.DirEntry) bool {
    59  		return filepath.Base(path) == types.YarnLock
    60  	}
    61  
    62  	err := fsutils.WalkDir(input.FS, ".", required, func(filePath string, d fs.DirEntry, r io.Reader) error {
    63  		// Parse yarn.lock
    64  		app, err := a.parseYarnLock(filePath, r)
    65  		if err != nil {
    66  			return xerrors.Errorf("parse error: %w", err)
    67  		} else if app == nil {
    68  			return nil
    69  		}
    70  
    71  		licenses, err := a.traverseLicenses(input.FS, filePath)
    72  		if err != nil {
    73  			log.Logger.Debugf("Unable to traverse licenses: %s", err)
    74  		}
    75  
    76  		// Parse package.json alongside yarn.lock to find direct deps and mark dev deps
    77  		if err = a.analyzeDependencies(input.FS, path.Dir(filePath), app); err != nil {
    78  			log.Logger.Warnf("Unable to parse %q to remove dev dependencies: %s", path.Join(path.Dir(filePath), types.NpmPkg), err)
    79  		}
    80  
    81  		// Fill licenses
    82  		for i, lib := range app.Libraries {
    83  			if l, ok := licenses[lib.ID]; ok {
    84  				app.Libraries[i].Licenses = l
    85  			}
    86  		}
    87  
    88  		apps = append(apps, *app)
    89  
    90  		return nil
    91  	})
    92  	if err != nil {
    93  		return nil, xerrors.Errorf("yarn walk error: %w", err)
    94  	}
    95  
    96  	return &analyzer.AnalysisResult{
    97  		Applications: apps,
    98  	}, nil
    99  }
   100  
   101  func (a yarnAnalyzer) Required(filePath string, _ os.FileInfo) bool {
   102  	dirs, fileName := splitPath(filePath)
   103  
   104  	if fileName == types.YarnLock &&
   105  		// skipping yarn.lock in cache folders
   106  		lo.Some(dirs, []string{
   107  			"node_modules",
   108  			".yarn",
   109  		}) {
   110  		return false
   111  	}
   112  
   113  	if fileName == types.YarnLock ||
   114  		fileName == types.NpmPkg ||
   115  		strings.HasPrefix(strings.ToLower(fileName), "license") {
   116  		return true
   117  	}
   118  
   119  	// The path is slashed in analyzers.
   120  	l := len(dirs)
   121  	// Valid path to the zip file - **/.yarn/cache/*.zip
   122  	if l > 1 && dirs[l-2] == ".yarn" && dirs[l-1] == "cache" && path.Ext(fileName) == ".zip" {
   123  		return true
   124  	}
   125  
   126  	return false
   127  }
   128  
   129  func splitPath(filePath string) (dirs []string, fileName string) {
   130  	fileName = filepath.Base(filePath)
   131  	// The path is slashed in analyzers.
   132  	dirs = strings.Split(path.Dir(filePath), "/")
   133  	return dirs, fileName
   134  }
   135  
   136  func (a yarnAnalyzer) Type() analyzer.Type {
   137  	return analyzer.TypeYarn
   138  }
   139  
   140  func (a yarnAnalyzer) Version() int {
   141  	return version
   142  }
   143  
   144  func (a yarnAnalyzer) parseYarnLock(filePath string, r io.Reader) (*types.Application, error) {
   145  	return language.Parse(types.Yarn, filePath, r, a.lockParser)
   146  }
   147  
   148  // analyzeDependencies analyzes the package.json file next to yarn.lock,
   149  // distinguishing between direct and transitive dependencies as well as production and development dependencies.
   150  func (a yarnAnalyzer) analyzeDependencies(fsys fs.FS, dir string, app *types.Application) error {
   151  	packageJsonPath := path.Join(dir, types.NpmPkg)
   152  	directDeps, directDevDeps, err := a.parsePackageJsonDependencies(fsys, packageJsonPath)
   153  	if errors.Is(err, fs.ErrNotExist) {
   154  		log.Logger.Debugf("Yarn: %s not found", packageJsonPath)
   155  		return nil
   156  	} else if err != nil {
   157  		return xerrors.Errorf("unable to parse %s: %w", dir, err)
   158  	}
   159  
   160  	// yarn.lock file can contain same libraries with different versions
   161  	// save versions separately for version comparison by comparator
   162  	pkgIDs := lo.SliceToMap(app.Libraries, func(pkg types.Package) (string, types.Package) {
   163  		return pkg.ID, pkg
   164  	})
   165  
   166  	// Walk prod dependencies
   167  	pkgs, err := a.walkDependencies(app.Libraries, pkgIDs, directDeps, false)
   168  	if err != nil {
   169  		return xerrors.Errorf("unable to walk dependencies: %w", err)
   170  	}
   171  
   172  	// Walk dev dependencies
   173  	devPkgs, err := a.walkDependencies(app.Libraries, pkgIDs, directDevDeps, true)
   174  	if err != nil {
   175  		return xerrors.Errorf("unable to walk dependencies: %w", err)
   176  	}
   177  
   178  	// Merge prod and dev dependencies.
   179  	// If the same package is found in both prod and dev dependencies, use the one in prod.
   180  	pkgs = lo.Assign(devPkgs, pkgs)
   181  
   182  	pkgSlice := maps.Values(pkgs)
   183  	sort.Sort(types.Packages(pkgSlice))
   184  
   185  	// Save libraries
   186  	app.Libraries = pkgSlice
   187  	return nil
   188  }
   189  
   190  func (a yarnAnalyzer) walkDependencies(libs []types.Package, pkgIDs map[string]types.Package,
   191  	directDeps map[string]string, dev bool) (map[string]types.Package, error) {
   192  
   193  	// Identify direct dependencies
   194  	pkgs := make(map[string]types.Package)
   195  	for _, pkg := range libs {
   196  		if constraint, ok := directDeps[pkg.Name]; ok {
   197  			// npm has own comparer to compare versions
   198  			if match, err := a.comparer.MatchVersion(pkg.Version, constraint); err != nil {
   199  				return nil, xerrors.Errorf("unable to match version for %s", pkg.Name)
   200  			} else if match {
   201  				// Mark as a direct dependency
   202  				pkg.Indirect = false
   203  				pkg.Dev = dev
   204  				pkgs[pkg.ID] = pkg
   205  			}
   206  		}
   207  	}
   208  
   209  	// Walk indirect dependencies
   210  	for _, pkg := range pkgs {
   211  		a.walkIndirectDependencies(pkg, pkgIDs, pkgs)
   212  	}
   213  
   214  	return pkgs, nil
   215  }
   216  
   217  func (a yarnAnalyzer) walkIndirectDependencies(pkg types.Package, pkgIDs, deps map[string]types.Package) {
   218  	for _, pkgID := range pkg.DependsOn {
   219  		if _, ok := deps[pkgID]; ok {
   220  			continue
   221  		}
   222  
   223  		dep, ok := pkgIDs[pkgID]
   224  		if !ok {
   225  			continue
   226  		}
   227  
   228  		dep.Indirect = true
   229  		dep.Dev = pkg.Dev
   230  		deps[dep.ID] = dep
   231  		a.walkIndirectDependencies(dep, pkgIDs, deps)
   232  	}
   233  }
   234  
   235  func (a yarnAnalyzer) parsePackageJsonDependencies(fsys fs.FS, filePath string) (map[string]string, map[string]string, error) {
   236  	// Parse package.json
   237  	f, err := fsys.Open(filePath)
   238  	if err != nil {
   239  		return nil, nil, xerrors.Errorf("file open error: %w", err)
   240  	}
   241  	defer func() { _ = f.Close() }()
   242  
   243  	rootPkg, err := a.packageJsonParser.Parse(f)
   244  	if err != nil {
   245  		return nil, nil, xerrors.Errorf("parse error: %w", err)
   246  	}
   247  
   248  	// Merge dependencies and optionalDependencies
   249  	dependencies := lo.Assign(rootPkg.Dependencies, rootPkg.OptionalDependencies)
   250  	devDependencies := rootPkg.DevDependencies
   251  
   252  	if len(rootPkg.Workspaces) > 0 {
   253  		pkgs, err := a.traverseWorkspaces(fsys, rootPkg.Workspaces)
   254  		if err != nil {
   255  			return nil, nil, xerrors.Errorf("traverse workspaces error: %w", err)
   256  		}
   257  		for _, pkg := range pkgs {
   258  			dependencies = lo.Assign(dependencies, pkg.Dependencies, pkg.OptionalDependencies)
   259  			devDependencies = lo.Assign(devDependencies, pkg.DevDependencies)
   260  		}
   261  	}
   262  
   263  	return dependencies, devDependencies, nil
   264  }
   265  
   266  func (a yarnAnalyzer) traverseWorkspaces(fsys fs.FS, workspaces []string) ([]packagejson.Package, error) {
   267  	var pkgs []packagejson.Package
   268  
   269  	required := func(path string, _ fs.DirEntry) bool {
   270  		return filepath.Base(path) == types.NpmPkg
   271  	}
   272  
   273  	walkDirFunc := func(path string, d fs.DirEntry, r io.Reader) error {
   274  		pkg, err := a.packageJsonParser.Parse(r)
   275  		if err != nil {
   276  			return xerrors.Errorf("unable to parse %q: %w", path, err)
   277  		}
   278  		pkgs = append(pkgs, pkg)
   279  		return nil
   280  	}
   281  
   282  	for _, workspace := range workspaces {
   283  		matches, err := fs.Glob(fsys, workspace)
   284  		if err != nil {
   285  			return nil, err
   286  		}
   287  		for _, match := range matches {
   288  			if err := fsutils.WalkDir(fsys, match, required, walkDirFunc); err != nil {
   289  				return nil, xerrors.Errorf("walk error: %w", err)
   290  			}
   291  		}
   292  
   293  	}
   294  	return pkgs, nil
   295  }
   296  
   297  func (a yarnAnalyzer) traverseLicenses(fsys fs.FS, lockPath string) (map[string][]string, error) {
   298  	sub, err := fs.Sub(fsys, path.Dir(lockPath))
   299  	if err != nil {
   300  		return nil, xerrors.Errorf("fs error: %w", err)
   301  	}
   302  	var errs error
   303  
   304  	// Yarn v1
   305  	licenses, err := a.traverseYarnClassicPkgs(sub)
   306  	if err == nil {
   307  		return licenses, nil
   308  	}
   309  	errs = multierror.Append(errs, err)
   310  
   311  	// Yarn v2+
   312  	licenses, err = a.traverseYarnModernPkgs(sub)
   313  	if err == nil {
   314  		return licenses, nil
   315  	}
   316  	errs = multierror.Append(errs, err)
   317  
   318  	return nil, errs
   319  }
   320  
   321  func (a yarnAnalyzer) traverseYarnClassicPkgs(fsys fs.FS) (map[string][]string, error) {
   322  	return a.license.Traverse(fsys, "node_modules")
   323  }
   324  
   325  func (a yarnAnalyzer) traverseYarnModernPkgs(fsys fs.FS) (map[string][]string, error) {
   326  	sub, err := fs.Sub(fsys, ".yarn")
   327  	if err != nil {
   328  		return nil, xerrors.Errorf("fs error: %w", err)
   329  	}
   330  
   331  	var errs error
   332  	licenses := make(map[string][]string)
   333  
   334  	if ll, err := a.traverseUnpluggedDir(sub); err != nil {
   335  		errs = multierror.Append(errs, err)
   336  	} else {
   337  		licenses = lo.Assign(licenses, ll)
   338  	}
   339  
   340  	if ll, err := a.traverseCacheDir(sub); err != nil {
   341  		errs = multierror.Append(errs, err)
   342  	} else {
   343  		licenses = lo.Assign(licenses, ll)
   344  	}
   345  
   346  	if len(licenses) == 0 {
   347  		return nil, errs
   348  	}
   349  
   350  	return licenses, nil
   351  }
   352  
   353  func (a yarnAnalyzer) traverseUnpluggedDir(fsys fs.FS) (map[string][]string, error) {
   354  	// `unplugged` hold machine-specific build artifacts
   355  	// Traverse .yarn/unplugged dir
   356  	return a.license.Traverse(fsys, "unplugged")
   357  }
   358  
   359  func (a yarnAnalyzer) traverseCacheDir(fsys fs.FS) (map[string][]string, error) {
   360  	// Traverse .yarn/cache dir
   361  	licenses := make(map[string][]string)
   362  	err := fsutils.WalkDir(fsys, "cache", fsutils.RequiredExt(".zip"),
   363  		func(filePath string, d fs.DirEntry, r io.Reader) error {
   364  			fi, err := d.Info()
   365  			if err != nil {
   366  				return xerrors.Errorf("file stat error: %w", err)
   367  			}
   368  
   369  			rr, err := xio.NewReadSeekerAt(r)
   370  			if err != nil {
   371  				return xerrors.Errorf("reader error: %w", err)
   372  			}
   373  
   374  			zr, err := zip.NewReader(rr, fi.Size())
   375  			if err != nil {
   376  				return xerrors.Errorf("zip reader error: %w", err)
   377  			}
   378  
   379  			if l, err := a.license.Traverse(zr, "node_modules"); err != nil {
   380  				return xerrors.Errorf("license traverse error: %w", err)
   381  			} else {
   382  				licenses = lo.Assign(licenses, l)
   383  			}
   384  			return nil
   385  		})
   386  
   387  	if err != nil {
   388  		return nil, xerrors.Errorf("walk error: %w", err)
   389  	}
   390  
   391  	return licenses, nil
   392  }