github.com/google/osv-scalibr@v0.4.1/annotator/misc/npmsource/npmsource.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package npmsource implements an annotator for packages to determine where they were installed from.
    16  // This is used to determine if NPM package is a locally-published package or not to
    17  // identify package name collisions on the NPM registry.
    18  package npmsource
    19  
    20  import (
    21  	"context"
    22  	"encoding/json"
    23  	"errors"
    24  	"fmt"
    25  	"io/fs"
    26  	"maps"
    27  	"path"
    28  	"path/filepath"
    29  	"strings"
    30  
    31  	"github.com/google/osv-scalibr/annotator"
    32  	"github.com/google/osv-scalibr/extractor"
    33  	"github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagejson/metadata"
    34  	scalibrfs "github.com/google/osv-scalibr/fs"
    35  	"github.com/google/osv-scalibr/internal/dependencyfile/packagelockjson"
    36  	"github.com/google/osv-scalibr/inventory"
    37  	"github.com/google/osv-scalibr/plugin"
    38  	"github.com/google/osv-scalibr/purl"
    39  )
    40  
    41  const (
    42  	// Name of the Annotator.
    43  	Name = "misc/npm-source"
    44  	// nodeModulesDirectory is the NPM node_modules directory.
    45  	nodeModulesDirectory = "node_modules"
    46  	// npmRegistryURL is the NPM Registry URL.
    47  	npmRegistryURL = "https://registry.npmjs.org/"
    48  )
    49  
    50  var (
    51  	// lockfilesByPriority is the priority of the lockfile to use.
    52  	// npm-shrinkwrap.json, if exists, npm will use it to install dependencies. When shrinkwrap is
    53  	// not present, npm will look for package-lock.json. This is the default lockfile for the modern
    54  	// npm versions. The hidden package-lock.json is generated based on the root-level package-lock.json.
    55  	lockfilesByPriority = []string{"npm-shrinkwrap.json", "package-lock.json", ".package-lock.json"}
    56  )
    57  
    58  // Annotator adds annotations to NPM packages that are installed from the NPM repositories.
    59  // This is used to determine if NPM package is a locally-published package or not to
    60  // identify package name collisions on the NPM registry.
    61  type Annotator struct{}
    62  
    63  // New returns a new Annotator.
    64  func New() annotator.Annotator { return &Annotator{} }
    65  
    66  // Name of the annotator.
    67  func (Annotator) Name() string { return "misc/npm-source" }
    68  
    69  // Version of the annotator.
    70  func (Annotator) Version() int { return 1 }
    71  
    72  // Requirements of the annotator.
    73  func (Annotator) Requirements() *plugin.Capabilities {
    74  	return &plugin.Capabilities{}
    75  }
    76  
    77  // Annotate adds annotations to NPM packages from /node_modules/../package.json that are installed from the NPM repositories.
    78  func (a *Annotator) Annotate(ctx context.Context, input *annotator.ScanInput, results *inventory.Inventory) error {
    79  	rootDirToPackages := MapNPMProjectRootsToPackages(results.Packages)
    80  	var errs []error
    81  	for rootDir, pkgs := range rootDirToPackages {
    82  		fullPath := rootDir
    83  		var relError error
    84  		if filepath.IsAbs(fullPath) {
    85  			fullPath, relError = filepath.Rel(input.ScanRoot.Path, fullPath)
    86  		}
    87  		if relError != nil {
    88  			errs = append(errs, fmt.Errorf("%s failed to get relative path for %q from base %q: %w", a.Name(), fullPath, input.ScanRoot.Path, relError))
    89  			continue
    90  		}
    91  		registryResolvedMap, err := ResolvedFromLockfile(fullPath, input.ScanRoot.FS)
    92  		if err != nil {
    93  			// If no lockfile is found, we want to annotate the packages as locally published packages.
    94  			errs = append(errs, fmt.Errorf("%s failed to resolve lockfile in %q: %w", a.Name(), rootDir, err))
    95  		}
    96  		for _, pkg := range pkgs {
    97  			if pkg.Metadata == nil {
    98  				pkg.Metadata = &metadata.JavascriptPackageJSONMetadata{}
    99  			}
   100  			castedMetadata, ok := pkg.Metadata.(*metadata.JavascriptPackageJSONMetadata)
   101  			if !ok {
   102  				errs = append(errs, fmt.Errorf("%s expected type *metadata.JavascriptPackageJSONMetadata but got %T for package %q", a.Name(), pkg.Metadata, pkg.Name))
   103  				continue
   104  			}
   105  			if source, ok := registryResolvedMap[pkg.Name]; ok {
   106  				castedMetadata.Source = source
   107  			} else {
   108  				castedMetadata.Source = metadata.Unknown
   109  			}
   110  		}
   111  	}
   112  	return errors.Join(errs...)
   113  }
   114  
   115  // ResolvedFromLockfile looks for lockfiles in the given root directory and returns a map of package
   116  // names in the lockfile and the source of the package.
   117  // If no lockfile is found, it returns an error.
   118  // The first non-empty lockfile it finds per the priority list gets parsed and returned.
   119  // For example, when given /tmp as root, it will look through the following lockfiles in this order:
   120  // 1. /tmp/npm-shrinkwrap.json
   121  // 2. /tmp/package-lock.json
   122  // 3. /tmp/node_modules/.package-lock.json
   123  func ResolvedFromLockfile(root string, fsys scalibrfs.FS) (map[string]metadata.NPMPackageSource, error) {
   124  	var errs []error
   125  	for _, lockfile := range lockfilesByPriority {
   126  		lockfilePath := filepath.Join(root, lockfile)
   127  		if lockfile == ".package-lock.json" {
   128  			lockfilePath = filepath.Join(root, nodeModulesDirectory, ".package-lock.json")
   129  		}
   130  
   131  		parsedLockfile, err := npmLockfile(filepath.ToSlash(lockfilePath), fsys)
   132  		if err != nil {
   133  			errs = append(errs, fmt.Errorf("failed to resolve lockfile: %w", err))
   134  			continue
   135  		}
   136  
   137  		if parsedLockfile == nil {
   138  			continue
   139  		}
   140  
   141  		return registryResolvedPackages(parsedLockfile), nil
   142  	}
   143  	return nil, errors.Join(errs...)
   144  }
   145  
   146  // NPMPackageSource returns the source of the NPM package based on the resolved field in the lockfile.
   147  func NPMPackageSource(resolved string) metadata.NPMPackageSource {
   148  	if resolved == "" {
   149  		return metadata.Unknown
   150  	}
   151  	if strings.HasPrefix(resolved, npmRegistryURL) {
   152  		return metadata.PublicRegistry
   153  	}
   154  	if strings.HasPrefix(resolved, "file:") {
   155  		return metadata.Local
   156  	}
   157  	return metadata.Other
   158  }
   159  
   160  func registryResolvedPackages(lockfile *packagelockjson.LockFile) map[string]metadata.NPMPackageSource {
   161  	registryResolvedMap := make(map[string]metadata.NPMPackageSource)
   162  
   163  	if lockfile.Packages != nil {
   164  		registryResolvedMap = lockfilePackages(lockfile.Packages)
   165  	}
   166  	if lockfile.Dependencies != nil {
   167  		maps.Copy(registryResolvedMap, lockfileDependencies(lockfile.Dependencies))
   168  	}
   169  	return registryResolvedMap
   170  }
   171  
   172  func lockfilePackages(packages map[string]packagelockjson.Package) map[string]metadata.NPMPackageSource {
   173  	packagesResolvedMap := make(map[string]metadata.NPMPackageSource)
   174  	for namePath, pkg := range packages {
   175  		if namePath == "" {
   176  			continue
   177  		}
   178  		pkgName := pkg.Name
   179  		if pkgName == "" {
   180  			pkgName = packageName(namePath)
   181  		}
   182  		packagesResolvedMap[pkgName] = NPMPackageSource(pkg.Resolved)
   183  	}
   184  	return packagesResolvedMap
   185  }
   186  
   187  func lockfileDependencies(dependencies map[string]packagelockjson.Dependency) map[string]metadata.NPMPackageSource {
   188  	resolvedMap := make(map[string]metadata.NPMPackageSource)
   189  	resolvedLockfileDependencies(dependencies, resolvedMap)
   190  	return resolvedMap
   191  }
   192  
   193  func resolvedLockfileDependencies(dependencies map[string]packagelockjson.Dependency, dependenciesResolvedMap map[string]metadata.NPMPackageSource) {
   194  	for name, detail := range dependencies {
   195  		identifier := dependencyName(name, detail.Version)
   196  		dependenciesResolvedMap[identifier] = NPMPackageSource(detail.Resolved)
   197  		if detail.Dependencies != nil {
   198  			resolvedLockfileDependencies(detail.Dependencies, dependenciesResolvedMap)
   199  		}
   200  	}
   201  }
   202  
   203  func dependencyName(name string, version string) string {
   204  	prefix := "npm:"
   205  	if strings.HasPrefix(version, prefix) {
   206  		i := strings.LastIndex(version, "@")
   207  		if i < len(prefix)+1 {
   208  			return name
   209  		}
   210  		return version[len(prefix):i]
   211  	}
   212  	return name
   213  }
   214  
   215  func packageName(name string) string {
   216  	maybeScope := path.Base(path.Dir(name))
   217  	pkgName := path.Base(name)
   218  
   219  	if strings.HasPrefix(maybeScope, "@") {
   220  		pkgName = maybeScope + "/" + pkgName
   221  	}
   222  
   223  	return pkgName
   224  }
   225  
   226  func npmLockfile(lockfile string, fsys scalibrfs.FS) (*packagelockjson.LockFile, error) {
   227  	data, err := fs.ReadFile(fsys, lockfile)
   228  	if err != nil {
   229  		if errors.Is(err, fs.ErrNotExist) {
   230  			return nil, nil
   231  		}
   232  
   233  		return nil, err
   234  	}
   235  
   236  	parsedLockfile := &packagelockjson.LockFile{}
   237  	if err := json.Unmarshal(data, parsedLockfile); err != nil {
   238  		return nil, err
   239  	}
   240  
   241  	if parsedLockfile.Packages == nil && parsedLockfile.Dependencies == nil {
   242  		return nil, fmt.Errorf("lockfile %q is empty", lockfile)
   243  	}
   244  
   245  	return parsedLockfile, nil
   246  }
   247  
   248  // MapNPMProjectRootsToPackages maps the root-level directories to packages where they were installed from.
   249  // Note that only NPM packages from root/node_modules/../package.json are considered.
   250  // For example, if package @foo/bar was installed from root/node_modules/foo/bar/package.json,
   251  // then the map will contain root as the key and package @foo/bar as the value.
   252  func MapNPMProjectRootsToPackages(packages []*extractor.Package) map[string][]*extractor.Package {
   253  	rootsToPackages := map[string][]*extractor.Package{}
   254  	for _, pkg := range packages {
   255  		if len(pkg.Locations) == 0 || pkg.PURLType != purl.TypeNPM {
   256  			continue
   257  		}
   258  
   259  		for _, loc := range pkg.Locations {
   260  			root := npmProjectRootDirectory(loc)
   261  			if root == "" {
   262  				continue
   263  			}
   264  			rootsToPackages[root] = append(rootsToPackages[root], pkg)
   265  			break
   266  		}
   267  	}
   268  	return rootsToPackages
   269  }
   270  
   271  func npmProjectRootDirectory(path string) string {
   272  	// Only consider packages from root/node_modules/../package.json.
   273  	if !(filepath.Base(path) == "package.json" && strings.Contains(path, nodeModulesDirectory)) {
   274  		// We are silently dropping packages that are outside of root/node_modules/../package.json.
   275  		return ""
   276  	}
   277  
   278  	nodeModulesIndex := strings.Index(filepath.ToSlash(path), "/node_modules/")
   279  	if nodeModulesIndex == -1 {
   280  		return ""
   281  	}
   282  
   283  	return path[:nodeModulesIndex]
   284  }