github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/yarnlock/yarnlock.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package yarnlock extracts NPC yarn.lock files.
    16  package yarnlock
    17  
    18  import (
    19  	"bufio"
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"path/filepath"
    24  	"regexp"
    25  	"slices"
    26  	"strings"
    27  
    28  	"github.com/google/osv-scalibr/extractor"
    29  	"github.com/google/osv-scalibr/extractor/filesystem"
    30  	"github.com/google/osv-scalibr/extractor/filesystem/language/javascript/internal/commitextractor"
    31  	"github.com/google/osv-scalibr/inventory"
    32  	"github.com/google/osv-scalibr/log"
    33  	"github.com/google/osv-scalibr/plugin"
    34  	"github.com/google/osv-scalibr/purl"
    35  )
    36  
    37  const (
    38  	// Name is the unique name of this extractor.
    39  	Name = "javascript/yarnlock"
    40  )
    41  
    42  var (
    43  	// Version matcher regex.
    44  	// Format for yarn.lock v1: `version "0.0.1"`
    45  	// Format for yarn.lock v2: `version: 0.0.1`
    46  	yarnPackageVersionRe = regexp.MustCompile(`^ {2}"?version"?:? "?([\w-.+]+)"?$`)
    47  	// Package resolution matcher regex. Might contain commit hashes.
    48  	// Format for yarn.lock v1: `resolved "git+ssh://git@github.com:G-Rath/repo-2#hash"`
    49  	// Format for yarn.lock v2: `resolution: "@my-scope/my-first-package@https://github.com/my-org/my-first-pkg.git#commit=hash"`
    50  	yarnPackageResolutionRe = regexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)
    51  )
    52  
    53  func shouldSkipYarnLine(line string) bool {
    54  	line = strings.TrimSpace(line)
    55  	return line == "" || strings.HasPrefix(line, "#")
    56  }
    57  
    58  // yaml.lock files define packages as follows:
    59  //
    60  //	header
    61  //	  prop1 value1
    62  //	  prop2 value2
    63  //
    64  //	header2
    65  //	  prop3 value3
    66  type packageDescription struct {
    67  	header string
    68  	props  []string
    69  }
    70  
    71  func groupYarnPackageDescriptions(ctx context.Context, scanner *bufio.Scanner) ([]*packageDescription, error) {
    72  	result := []*packageDescription{}
    73  
    74  	var current *packageDescription
    75  	for scanner.Scan() {
    76  		if err := ctx.Err(); err != nil {
    77  			return result, err
    78  		}
    79  		if err := scanner.Err(); err != nil {
    80  			return result, err
    81  		}
    82  
    83  		line := scanner.Text()
    84  
    85  		if shouldSkipYarnLine(line) {
    86  			continue
    87  		}
    88  
    89  		// represents the start of a new dependency
    90  		if !strings.HasPrefix(line, " ") {
    91  			// Add previous descriptor if it's for a package.
    92  			if current != nil {
    93  				result = append(result, current)
    94  			}
    95  			current = &packageDescription{header: line}
    96  		} else if current == nil {
    97  			return nil, errors.New("malformed yarn.lock")
    98  		} else {
    99  			current.props = append(current.props, line)
   100  		}
   101  	}
   102  	// Add trailing descriptor.
   103  	if current != nil {
   104  		result = append(result, current)
   105  	}
   106  
   107  	return result, nil
   108  }
   109  
   110  func extractYarnPackageName(header string) string {
   111  	// Header format: @my-scope/my-first-package@my-scope/my-first-package#commit=hash
   112  	str := strings.TrimPrefix(header, "\"")
   113  	str = strings.TrimSuffix(str, ":")
   114  	str, _, _ = strings.Cut(str, ",")
   115  
   116  	isScoped := strings.HasPrefix(str, "@")
   117  
   118  	if isScoped {
   119  		str = strings.TrimPrefix(str, "@")
   120  	}
   121  	name, right, _ := strings.Cut(str, "@")
   122  
   123  	// Packages can also contain an NPM entry, e.g. @nicolo-ribaudo/chokidar-2@npm:2.1.8-no-fsevents.3
   124  	if strings.HasPrefix(right, "npm:") && strings.Contains(right, "@") {
   125  		return extractYarnPackageName(strings.TrimPrefix(right, "npm:"))
   126  	}
   127  
   128  	if isScoped {
   129  		name = "@" + name
   130  	}
   131  	return name
   132  }
   133  
   134  func determineYarnPackageVersion(props []string) string {
   135  	for _, s := range props {
   136  		matched := yarnPackageVersionRe.FindStringSubmatch(s)
   137  
   138  		if matched != nil {
   139  			return matched[1]
   140  		}
   141  	}
   142  	return ""
   143  }
   144  
   145  func determineYarnPackageResolution(props []string) string {
   146  	for _, s := range props {
   147  		matched := yarnPackageResolutionRe.FindStringSubmatch(s)
   148  		if matched != nil {
   149  			return matched[1]
   150  		}
   151  	}
   152  	return ""
   153  }
   154  
   155  func parseYarnPackageGroup(desc *packageDescription) *extractor.Package {
   156  	name := extractYarnPackageName(desc.header)
   157  	version := determineYarnPackageVersion(desc.props)
   158  	resolution := determineYarnPackageResolution(desc.props)
   159  
   160  	if version == "" {
   161  		log.Errorf("Failed to determine version of %s while parsing a yarn.lock", name)
   162  	}
   163  
   164  	return &extractor.Package{
   165  		Name:     name,
   166  		Version:  version,
   167  		PURLType: purl.TypeNPM,
   168  		SourceCode: &extractor.SourceCodeIdentifier{
   169  			Commit: commitextractor.TryExtractCommit(resolution),
   170  		},
   171  	}
   172  }
   173  
   174  // Extractor extracts NPM yarn.lock files.
   175  type Extractor struct{}
   176  
   177  // New returns a new instance of the extractor.
   178  func New() filesystem.Extractor { return &Extractor{} }
   179  
   180  // Name of the extractor
   181  func (e Extractor) Name() string { return Name }
   182  
   183  // Version of the extractor
   184  func (e Extractor) Version() int { return 0 }
   185  
   186  // Requirements of the extractor
   187  func (e Extractor) Requirements() *plugin.Capabilities {
   188  	return &plugin.Capabilities{}
   189  }
   190  
   191  // FileRequired returns true if the specified file is an NPM yarn.lock file.
   192  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   193  	path := api.Path()
   194  	if filepath.Base(path) != "yarn.lock" {
   195  		return false
   196  	}
   197  	// Skip lockfiles inside node_modules directories since the packages they list aren't
   198  	// necessarily installed by the root project. We instead use the more specific top-level
   199  	// lockfile for the root project dependencies.
   200  	dir := filepath.ToSlash(filepath.Dir(path))
   201  	return !slices.Contains(strings.Split(dir, "/"), "node_modules")
   202  }
   203  
   204  // Extract extracts packages from NPM yarn.lock files passed through the scan input.
   205  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   206  	scanner := bufio.NewScanner(input.Reader)
   207  
   208  	packageGroups, err := groupYarnPackageDescriptions(ctx, scanner)
   209  	if err != nil {
   210  		return inventory.Inventory{}, fmt.Errorf("error while scanning: %w", err)
   211  	}
   212  
   213  	packages := make([]*extractor.Package, 0, len(packageGroups))
   214  
   215  	for _, group := range packageGroups {
   216  		if group.header == "__metadata:" {
   217  			// This group doesn't describe a package.
   218  			continue
   219  		}
   220  		if strings.HasSuffix(group.header, "@workspace:.\":") {
   221  			// This is the root package itself.
   222  			continue
   223  		}
   224  		pkg := parseYarnPackageGroup(group)
   225  		pkg.Locations = []string{input.Path}
   226  		packages = append(packages, pkg)
   227  	}
   228  
   229  	return inventory.Inventory{Packages: packages}, nil
   230  }
   231  
   232  var _ filesystem.Extractor = Extractor{}