github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/javascript/parse_yarn_lock.go (about)

     1  package javascript
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"fmt"
     7  	"regexp"
     8  
     9  	"github.com/scylladb/go-set/strset"
    10  
    11  	"github.com/anchore/syft/syft/artifact"
    12  	"github.com/anchore/syft/syft/file"
    13  	"github.com/anchore/syft/syft/pkg"
    14  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    15  )
    16  
    17  var (
    18  	// packageNameExp matches the name of the dependency in yarn.lock
    19  	// including scope/namespace prefix if found.
    20  	// For example: "aws-sdk@2.706.0" returns "aws-sdk"
    21  	//              "@babel/code-frame@^7.0.0" returns "@babel/code-frame"
    22  	packageNameExp = regexp.MustCompile(`^"?((?:@\w[\w-_.]*\/)?\w[\w-_.]*)@`)
    23  
    24  	// versionExp matches the "version" line of a yarn.lock entry and captures the version value.
    25  	// For example: version "4.10.1" (...and the value "4.10.1" is captured)
    26  	versionExp = regexp.MustCompile(`^\W+version(?:\W+"|:\W+)([\w-_.]+)"?`)
    27  
    28  	// packageURLExp matches the name and version of the dependency in yarn.lock
    29  	// from the resolved URL, including scope/namespace prefix if any.
    30  	// For example:
    31  	//		`resolved "https://registry.yarnpkg.com/async/-/async-3.2.3.tgz#ac53dafd3f4720ee9e8a160628f18ea91df196c9"`
    32  	//			would return "async" and "3.2.3"
    33  	//
    34  	//		`resolved "https://registry.yarnpkg.com/@4lolo/resize-observer-polyfill/-/resize-observer-polyfill-1.5.2.tgz#58868fc7224506236b5550d0c68357f0a874b84b"`
    35  	//			would return "@4lolo/resize-observer-polyfill" and "1.5.2"
    36  	packageURLExp = regexp.MustCompile(`^\s+resolved\s+"https://registry\.(?:yarnpkg\.com|npmjs\.org)/(.+?)/-/(?:.+?)-(\d+\..+?)\.tgz`)
    37  
    38  	// resolvedExp matches the resolved of the dependency in yarn.lock
    39  	// For example:
    40  	// 		resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.3.tgz#3dca0e3f33b200fc7d1139c0cd96c1268cadfd9d"
    41  	// 			would return "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-3.0.3.tgz#3dca0e3f33b200fc7d1139c0cd96c1268cadfd9d"
    42  	resolvedExp = regexp.MustCompile(`^\s+resolved\s+"(.+?)"`)
    43  
    44  	// integrityExp matches the integrity of the dependency in yarn.lock
    45  	// For example:
    46  	//		integrity sha512-tHq6qdbT9U1IRSGf14CL0pUlULksvY9OZ+5eEgl1N7t+OA3tGvNpxJCzuKQlsNgCVwbAs670L1vcVQi8j9HjnA==
    47  	// 			would return "sha512-tHq6qdbT9U1IRSGf14CL0pUlULksvY9OZ+5eEgl1N7t+OA3tGvNpxJCzuKQlsNgCVwbAs670L1vcVQi8j9HjnA==""
    48  	integrityExp = regexp.MustCompile(`^\s+integrity\s+([^\s]+)`)
    49  )
    50  
    51  type genericYarnLockAdapter struct {
    52  	cfg CatalogerConfig
    53  }
    54  
    55  func newGenericYarnLockAdapter(cfg CatalogerConfig) genericYarnLockAdapter {
    56  	return genericYarnLockAdapter{
    57  		cfg: cfg,
    58  	}
    59  }
    60  
    61  func (a genericYarnLockAdapter) parseYarnLock(_ context.Context, resolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    62  	// in the case we find yarn.lock files in the node_modules directories, skip those
    63  	// as the whole purpose of the lock file is for the specific dependencies of the project
    64  	if pathContainsNodeModulesDirectory(reader.Path()) {
    65  		return nil, nil, nil
    66  	}
    67  
    68  	var pkgs []pkg.Package
    69  	var currentPackage, currentVersion, currentResolved, currentIntegrity string
    70  
    71  	scanner := bufio.NewScanner(reader)
    72  	parsedPackages := strset.New()
    73  
    74  	for scanner.Scan() {
    75  		line := scanner.Text()
    76  
    77  		if packageName := findPackageName(line); packageName != "" {
    78  			// When we find a new package, check if we have unsaved identifiers
    79  			if currentPackage != "" && currentVersion != "" && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
    80  				pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, currentPackage, currentVersion, currentResolved, currentIntegrity))
    81  				parsedPackages.Add(currentPackage + "@" + currentVersion)
    82  			}
    83  
    84  			currentPackage = packageName
    85  		} else if version := findPackageVersion(line); version != "" {
    86  			currentVersion = version
    87  		} else if packageName, version, resolved := findResolvedPackageAndVersion(line); packageName != "" && version != "" && resolved != "" {
    88  			currentResolved = resolved
    89  			currentPackage = packageName
    90  			currentVersion = version
    91  		} else if integrity := findIntegrity(line); integrity != "" && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
    92  			pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, currentPackage, currentVersion, currentResolved, integrity))
    93  			parsedPackages.Add(currentPackage + "@" + currentVersion)
    94  
    95  			// Cleanup to indicate no unsaved identifiers
    96  			currentPackage = ""
    97  			currentVersion = ""
    98  			currentResolved = ""
    99  			currentIntegrity = ""
   100  		}
   101  	}
   102  
   103  	// check if we have valid unsaved data after end-of-file has reached
   104  	if currentPackage != "" && currentVersion != "" && !parsedPackages.Has(currentPackage+"@"+currentVersion) {
   105  		pkgs = append(pkgs, newYarnLockPackage(a.cfg, resolver, reader.Location, currentPackage, currentVersion, currentResolved, currentIntegrity))
   106  		parsedPackages.Add(currentPackage + "@" + currentVersion)
   107  	}
   108  
   109  	if err := scanner.Err(); err != nil {
   110  		return nil, nil, fmt.Errorf("failed to parse yarn.lock file: %w", err)
   111  	}
   112  
   113  	pkg.Sort(pkgs)
   114  
   115  	return pkgs, nil, nil
   116  }
   117  
   118  func findPackageName(line string) string {
   119  	if matches := packageNameExp.FindStringSubmatch(line); len(matches) >= 2 {
   120  		return matches[1]
   121  	}
   122  
   123  	return ""
   124  }
   125  
   126  func findPackageVersion(line string) string {
   127  	if matches := versionExp.FindStringSubmatch(line); len(matches) >= 2 {
   128  		return matches[1]
   129  	}
   130  
   131  	return ""
   132  }
   133  
   134  func findResolvedPackageAndVersion(line string) (string, string, string) {
   135  	var resolved string
   136  	if matches := resolvedExp.FindStringSubmatch(line); len(matches) >= 2 {
   137  		resolved = matches[1]
   138  	}
   139  	if matches := packageURLExp.FindStringSubmatch(line); len(matches) >= 2 {
   140  		return matches[1], matches[2], resolved
   141  	}
   142  
   143  	return "", "", ""
   144  }
   145  
   146  func findIntegrity(line string) string {
   147  	if matches := integrityExp.FindStringSubmatch(line); len(matches) >= 2 {
   148  		return matches[1]
   149  	}
   150  
   151  	return ""
   152  }