github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/pnpmlock/pnpmlock.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package pnpmlock extracts pnpm-lock.yaml files.
    16  package pnpmlock
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"io"
    23  	"path/filepath"
    24  	"regexp"
    25  	"slices"
    26  	"strconv"
    27  	"strings"
    28  
    29  	"github.com/google/osv-scalibr/extractor"
    30  	"github.com/google/osv-scalibr/extractor/filesystem"
    31  	"github.com/google/osv-scalibr/extractor/filesystem/osv"
    32  	"github.com/google/osv-scalibr/inventory"
    33  	"github.com/google/osv-scalibr/log"
    34  	"github.com/google/osv-scalibr/plugin"
    35  	"github.com/google/osv-scalibr/purl"
    36  	"gopkg.in/yaml.v3"
    37  )
    38  
    39  const (
    40  	// Name is the unique name of this extractor.
    41  	Name = "javascript/pnpmlock"
    42  )
    43  
    44  type pnpmLockPackageResolution struct {
    45  	Tarball string `yaml:"tarball"`
    46  	Commit  string `yaml:"commit"`
    47  	Repo    string `yaml:"repo"`
    48  	Type    string `yaml:"type"`
    49  }
    50  
    51  type pnpmLockPackage struct {
    52  	Resolution pnpmLockPackageResolution `yaml:"resolution"`
    53  	Name       string                    `yaml:"name"`
    54  	Version    string                    `yaml:"version"`
    55  	Dev        bool                      `yaml:"dev"`
    56  }
    57  
    58  type pnpmLockfile struct {
    59  	Version  float64                    `yaml:"lockfileVersion"`
    60  	Packages map[string]pnpmLockPackage `yaml:"packages,omitempty"`
    61  }
    62  
    63  type pnpmLockfileV6 struct {
    64  	Version  string                     `yaml:"lockfileVersion"`
    65  	Packages map[string]pnpmLockPackage `yaml:"packages,omitempty"`
    66  }
    67  
    68  // UnmarshalYAML is a custom unmarshalling function for handling v6 lockfiles.
    69  func (l *pnpmLockfile) UnmarshalYAML(unmarshal func(any) error) error {
    70  	var lockfileV6 pnpmLockfileV6
    71  
    72  	if err := unmarshal(&lockfileV6); err != nil {
    73  		return err
    74  	}
    75  
    76  	parsedVersion, err := strconv.ParseFloat(lockfileV6.Version, 64)
    77  
    78  	if err != nil {
    79  		return err
    80  	}
    81  
    82  	l.Version = parsedVersion
    83  	l.Packages = lockfileV6.Packages
    84  
    85  	return nil
    86  }
    87  
    88  var (
    89  	numberMatcher = regexp.MustCompile(`^\d`)
    90  	// Looks for the pattern "name@version", where name is allowed to contain zero or more "@"
    91  	nameVersionRegexp = regexp.MustCompile(`^(.+)@([\w.-]+)(?:\(|$)`)
    92  
    93  	codeLoadURLRegexp = regexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
    94  )
    95  
    96  // extractPnpmPackageNameAndVersion parses a dependency path, attempting to
    97  // extract the name and version of the package it represents
    98  func extractPnpmPackageNameAndVersion(dependencyPath string, lockfileVersion float64) (string, string, error) {
    99  	// file dependencies must always have a name property to be installed,
   100  	// and their dependency path never has the version encoded, so we can
   101  	// skip trying to extract either from their dependency path
   102  	if strings.HasPrefix(dependencyPath, "file:") {
   103  		return "", "", nil
   104  	}
   105  
   106  	// v9.0 specifies the dependencies as <package>@<version> rather than as a path
   107  	if lockfileVersion >= 9.0 {
   108  		dependencyPath = strings.Trim(dependencyPath, "'")
   109  		dependencyPath, isScoped := strings.CutPrefix(dependencyPath, "@")
   110  
   111  		name, version, _ := strings.Cut(dependencyPath, "@")
   112  
   113  		if isScoped {
   114  			name = "@" + name
   115  		}
   116  
   117  		return name, version, nil
   118  	}
   119  
   120  	parts := strings.Split(dependencyPath, "/")
   121  	if len(parts) < 2 {
   122  		return "", "", fmt.Errorf("invalid dependency path: %v", dependencyPath)
   123  	}
   124  	var name string
   125  
   126  	parts = parts[1:]
   127  
   128  	if strings.HasPrefix(parts[0], "@") {
   129  		name = strings.Join(parts[:2], "/")
   130  		parts = parts[2:]
   131  	} else {
   132  		name = parts[0]
   133  		parts = parts[1:]
   134  	}
   135  
   136  	version := ""
   137  
   138  	if len(parts) != 0 {
   139  		version = parts[0]
   140  	}
   141  
   142  	if version == "" {
   143  		name, version = parseNameAtVersion(name)
   144  	}
   145  
   146  	if version == "" || !numberMatcher.MatchString(version) {
   147  		return "", "", nil
   148  	}
   149  
   150  	underscoreIndex := strings.Index(version, "_")
   151  
   152  	if underscoreIndex != -1 {
   153  		version = strings.Split(version, "_")[0]
   154  	}
   155  
   156  	return name, version, nil
   157  }
   158  
   159  func parseNameAtVersion(value string) (name string, version string) {
   160  	matches := nameVersionRegexp.FindStringSubmatch(value)
   161  
   162  	if len(matches) != 3 {
   163  		return name, ""
   164  	}
   165  
   166  	return matches[1], matches[2]
   167  }
   168  
   169  func parsePnpmLock(lockfile pnpmLockfile) ([]*extractor.Package, error) {
   170  	packages := make([]*extractor.Package, 0, len(lockfile.Packages))
   171  	errs := []error{}
   172  
   173  	for s, pkg := range lockfile.Packages {
   174  		name, version, err := extractPnpmPackageNameAndVersion(s, lockfile.Version)
   175  		if err != nil {
   176  			errs = append(errs, err)
   177  			log.Errorf("failed to extract package version from %v: %v", pkg, err)
   178  			continue
   179  		}
   180  
   181  		// "name" is only present if it's not in the dependency path and takes
   182  		// priority over whatever name we think we've extracted (if any)
   183  		if pkg.Name != "" {
   184  			name = pkg.Name
   185  		}
   186  
   187  		// "version" is only present if it's not in the dependency path and takes
   188  		// priority over whatever version we think we've extracted (if any)
   189  		if pkg.Version != "" {
   190  			version = pkg.Version
   191  		}
   192  
   193  		if name == "" || version == "" {
   194  			continue
   195  		}
   196  
   197  		commit := pkg.Resolution.Commit
   198  
   199  		if strings.HasPrefix(pkg.Resolution.Tarball, "https://codeload.github.com") {
   200  			matched := codeLoadURLRegexp.FindStringSubmatch(pkg.Resolution.Tarball)
   201  
   202  			if matched != nil {
   203  				commit = matched[1]
   204  			}
   205  		}
   206  
   207  		depGroups := []string{}
   208  		if pkg.Dev {
   209  			depGroups = append(depGroups, "dev")
   210  		}
   211  
   212  		packages = append(packages, &extractor.Package{
   213  			Name:     name,
   214  			Version:  version,
   215  			PURLType: purl.TypeNPM,
   216  			SourceCode: &extractor.SourceCodeIdentifier{
   217  				Commit: commit,
   218  			},
   219  			Metadata: osv.DepGroupMetadata{
   220  				DepGroupVals: depGroups,
   221  			},
   222  		})
   223  	}
   224  
   225  	return packages, errors.Join(errs...)
   226  }
   227  
   228  // Extractor extracts pnpm-lock.yaml files.
   229  type Extractor struct{}
   230  
   231  // New returns a new instance of the extractor.
   232  func New() filesystem.Extractor { return &Extractor{} }
   233  
   234  // Name of the extractor
   235  func (e Extractor) Name() string { return Name }
   236  
   237  // Version of the extractor
   238  func (e Extractor) Version() int { return 0 }
   239  
   240  // Requirements of the extractor.
   241  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
   242  
   243  // FileRequired returns true if the specified file matches pnpm-lock.yaml files.
   244  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   245  	path := api.Path()
   246  	if filepath.Base(path) != "pnpm-lock.yaml" {
   247  		return false
   248  	}
   249  	// Skip lockfiles inside node_modules directories since the packages they list aren't
   250  	// necessarily installed by the root project. We instead use the more specific top-level
   251  	// lockfile for the root project dependencies.
   252  	dir := filepath.ToSlash(filepath.Dir(path))
   253  	return !slices.Contains(strings.Split(dir, "/"), "node_modules")
   254  }
   255  
   256  // Extract extracts packages from a pnpm-lock.yaml file passed through the scan input.
   257  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   258  	var parsedLockfile *pnpmLockfile
   259  
   260  	err := yaml.NewDecoder(input.Reader).Decode(&parsedLockfile)
   261  
   262  	if err != nil && !errors.Is(err, io.EOF) {
   263  		return inventory.Inventory{}, fmt.Errorf("could not extract: %w", err)
   264  	}
   265  
   266  	// this will happen if the file is empty
   267  	if parsedLockfile == nil {
   268  		parsedLockfile = &pnpmLockfile{}
   269  	}
   270  
   271  	packages, err := parsePnpmLock(*parsedLockfile)
   272  	for i := range packages {
   273  		packages[i].Locations = []string{input.Path}
   274  	}
   275  
   276  	return inventory.Inventory{Packages: packages}, err
   277  }
   278  
   279  var _ filesystem.Extractor = Extractor{}