github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/packagelockjson/packagelockjson.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package packagelockjson extracts package-lock.json files.
    16  package packagelockjson
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"maps"
    23  	"path"
    24  	"path/filepath"
    25  	"slices"
    26  	"strings"
    27  
    28  	"github.com/google/osv-scalibr/extractor"
    29  	"github.com/google/osv-scalibr/extractor/filesystem"
    30  	"github.com/google/osv-scalibr/extractor/filesystem/language/javascript/internal/commitextractor"
    31  	"github.com/google/osv-scalibr/extractor/filesystem/osv"
    32  	"github.com/google/osv-scalibr/internal/dependencyfile/packagelockjson"
    33  	"github.com/google/osv-scalibr/inventory"
    34  	"github.com/google/osv-scalibr/plugin"
    35  	"github.com/google/osv-scalibr/purl"
    36  	"github.com/google/osv-scalibr/stats"
    37  )
    38  
    39  const (
    40  	// Name is the unique name of this extractor.
    41  	Name = "javascript/packagelockjson"
    42  )
    43  
    44  type packageDetails struct {
    45  	Name      string
    46  	Version   string
    47  	Commit    string
    48  	DepGroups []string
    49  }
    50  
    51  type npmPackageDetailsMap map[string]packageDetails
    52  
    53  // mergeNpmDepsGroups handles merging the dependency groups of packages within the
    54  // NPM ecosystem, since they can appear multiple times in the same dependency tree
    55  //
    56  // the merge happens almost as you'd expect, except that if either given packages
    57  // belong to no groups, then that is the result since it indicates the package
    58  // is implicitly a production dependency.
    59  func mergeNpmDepsGroups(a, b packageDetails) []string {
    60  	// if either group includes no groups, then the package is in the "production" group
    61  	if len(a.DepGroups) == 0 || len(b.DepGroups) == 0 {
    62  		return nil
    63  	}
    64  
    65  	combined := make([]string, 0, len(a.DepGroups)+len(b.DepGroups))
    66  	combined = append(combined, a.DepGroups...)
    67  	combined = append(combined, b.DepGroups...)
    68  
    69  	slices.Sort(combined)
    70  
    71  	return slices.Compact(combined)
    72  }
    73  
    74  func (pdm npmPackageDetailsMap) add(key string, details packageDetails) {
    75  	existing, ok := pdm[key]
    76  
    77  	if ok {
    78  		details.DepGroups = mergeNpmDepsGroups(existing, details)
    79  	}
    80  
    81  	pdm[key] = details
    82  }
    83  
    84  func parseNpmLockDependencies(dependencies map[string]packagelockjson.Dependency) map[string]packageDetails {
    85  	details := npmPackageDetailsMap{}
    86  
    87  	for name, detail := range dependencies {
    88  		if detail.Dependencies != nil {
    89  			nestedDeps := parseNpmLockDependencies(detail.Dependencies)
    90  			for k, v := range nestedDeps {
    91  				details.add(k, v)
    92  			}
    93  		}
    94  
    95  		version := detail.Version
    96  		finalVersion := version
    97  		commit := ""
    98  
    99  		// If the package is aliased, get the name and version
   100  		// E.g. npm:string-width@^4.2.0
   101  		if strings.HasPrefix(detail.Version, "npm:") {
   102  			i := strings.LastIndex(detail.Version, "@")
   103  			name = detail.Version[4:i]
   104  			finalVersion = detail.Version[i+1:]
   105  		}
   106  
   107  		// we can't resolve a version from a "file:" dependency
   108  		if strings.HasPrefix(detail.Version, "file:") {
   109  			finalVersion = ""
   110  		} else {
   111  			commit = commitextractor.TryExtractCommit(detail.Version)
   112  
   113  			// if there is a commit, we want to deduplicate based on that rather than
   114  			// the version (the versions must match anyway for the commits to match)
   115  			//
   116  			// we also don't actually know what the "version" is, so blank it
   117  			if commit != "" {
   118  				finalVersion = ""
   119  				version = commit
   120  			}
   121  		}
   122  
   123  		details.add(name+"@"+version, packageDetails{
   124  			Name:      name,
   125  			Version:   finalVersion,
   126  			Commit:    commit,
   127  			DepGroups: detail.DepGroups(),
   128  		})
   129  	}
   130  
   131  	return details
   132  }
   133  
   134  func extractNpmPackageName(name string) string {
   135  	maybeScope := path.Base(path.Dir(name))
   136  	pkgName := path.Base(name)
   137  
   138  	if strings.HasPrefix(maybeScope, "@") {
   139  		pkgName = maybeScope + "/" + pkgName
   140  	}
   141  
   142  	return pkgName
   143  }
   144  
   145  func parseNpmLockPackages(packages map[string]packagelockjson.Package) map[string]packageDetails {
   146  	details := npmPackageDetailsMap{}
   147  
   148  	for namePath, detail := range packages {
   149  		if namePath == "" {
   150  			continue
   151  		}
   152  
   153  		finalName := detail.Name
   154  		if finalName == "" {
   155  			finalName = extractNpmPackageName(namePath)
   156  		}
   157  
   158  		finalVersion := detail.Version
   159  
   160  		commit := commitextractor.TryExtractCommit(detail.Resolved)
   161  
   162  		// if there is a commit, we want to deduplicate based on that rather than
   163  		// the version (the versions must match anyway for the commits to match)
   164  		if commit != "" {
   165  			finalVersion = commit
   166  		}
   167  
   168  		details.add(finalName+"@"+finalVersion, packageDetails{
   169  			Name:      finalName,
   170  			Version:   detail.Version,
   171  			Commit:    commit,
   172  			DepGroups: detail.DepGroups(),
   173  		})
   174  	}
   175  
   176  	return details
   177  }
   178  
   179  func parseNpmLock(lockfile packagelockjson.LockFile) map[string]packageDetails {
   180  	if lockfile.Packages != nil {
   181  		return parseNpmLockPackages(lockfile.Packages)
   182  	}
   183  
   184  	return parseNpmLockDependencies(lockfile.Dependencies)
   185  }
   186  
   187  // Config is the configuration for the Extractor.
   188  type Config struct {
   189  	// Stats is a stats collector for reporting metrics.
   190  	Stats stats.Collector
   191  	// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
   192  	// `FileRequired` gets a bigger file, it will return false,
   193  	MaxFileSizeBytes int64
   194  }
   195  
   196  // DefaultConfig returns the default configuration for the extractor.
   197  func DefaultConfig() Config {
   198  	return Config{
   199  		Stats:            nil,
   200  		MaxFileSizeBytes: 0,
   201  	}
   202  }
   203  
   204  // Extractor extracts npm packages from package-lock.json files.
   205  type Extractor struct {
   206  	stats            stats.Collector
   207  	maxFileSizeBytes int64
   208  }
   209  
   210  // New returns a package-lock.json extractor.
   211  //
   212  // For most use cases, initialize with:
   213  // ```
   214  // e := New(DefaultConfig())
   215  // ```
   216  func New(cfg Config) *Extractor {
   217  	return &Extractor{
   218  		stats:            cfg.Stats,
   219  		maxFileSizeBytes: cfg.MaxFileSizeBytes,
   220  	}
   221  }
   222  
   223  // NewDefault returns an extractor with the default config settings.
   224  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
   225  
   226  // Name of the extractor.
   227  func (e Extractor) Name() string { return Name }
   228  
   229  // Version of the extractor.
   230  func (e Extractor) Version() int { return 0 }
   231  
   232  // Requirements of the extractor.
   233  func (e Extractor) Requirements() *plugin.Capabilities {
   234  	return &plugin.Capabilities{}
   235  }
   236  
   237  // FileRequired returns true if the specified file matches npm lockfile patterns.
   238  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   239  	path := api.Path()
   240  	if !slices.Contains([]string{"package-lock.json", "npm-shrinkwrap.json"}, filepath.Base(path)) {
   241  		return false
   242  	}
   243  	// Skip lockfiles inside node_modules directories since the packages they list aren't
   244  	// necessarily installed by the root project. We instead use the more specific top-level
   245  	// lockfile for the root project dependencies.
   246  	dir := filepath.ToSlash(filepath.Dir(path))
   247  	if slices.Contains(strings.Split(dir, "/"), "node_modules") {
   248  		return false
   249  	}
   250  
   251  	fileInfo, err := api.Stat()
   252  	if err != nil {
   253  		return false
   254  	}
   255  	if e.maxFileSizeBytes > 0 && fileInfo.Size() > e.maxFileSizeBytes {
   256  		e.reportFileRequired(path, fileInfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
   257  		return false
   258  	}
   259  
   260  	e.reportFileRequired(path, fileInfo.Size(), stats.FileRequiredResultOK)
   261  	return true
   262  }
   263  
   264  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   265  	if e.stats == nil {
   266  		return
   267  	}
   268  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   269  		Path:          path,
   270  		Result:        result,
   271  		FileSizeBytes: fileSizeBytes,
   272  	})
   273  }
   274  
   275  // Extract extracts packages from package-lock.json files passed through the scan input.
   276  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   277  	packages, err := e.extractPkgLock(ctx, input)
   278  
   279  	if e.stats != nil {
   280  		var fileSizeBytes int64
   281  		if input.Info != nil {
   282  			fileSizeBytes = input.Info.Size()
   283  		}
   284  		e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   285  			Path:          input.Path,
   286  			Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   287  			FileSizeBytes: fileSizeBytes,
   288  		})
   289  	}
   290  
   291  	return inventory.Inventory{Packages: packages}, err
   292  }
   293  
   294  func (e Extractor) extractPkgLock(_ context.Context, input *filesystem.ScanInput) ([]*extractor.Package, error) {
   295  	// If both package-lock.json and npm-shrinkwrap.json are present in the root of a project,
   296  	// npm-shrinkwrap.json will take precedence and package-lock.json will be ignored.
   297  	if filepath.Base(input.Path) == "package-lock.json" {
   298  		npmShrinkwrapPath := path.Join(filepath.ToSlash(filepath.Dir(input.Path)), "npm-shrinkwrap.json")
   299  		_, err := input.FS.Open(npmShrinkwrapPath)
   300  		if err == nil {
   301  			return nil, nil
   302  		}
   303  	}
   304  
   305  	var parsedLockfile *packagelockjson.LockFile
   306  
   307  	err := json.NewDecoder(input.Reader).Decode(&parsedLockfile)
   308  
   309  	if err != nil {
   310  		return nil, fmt.Errorf("could not extract: %w", err)
   311  	}
   312  
   313  	packages := slices.Collect(maps.Values(parseNpmLock(*parsedLockfile)))
   314  	result := make([]*extractor.Package, len(packages))
   315  
   316  	for i, pkg := range packages {
   317  		if pkg.DepGroups == nil {
   318  			pkg.DepGroups = []string{}
   319  		}
   320  
   321  		result[i] = &extractor.Package{
   322  			Name: pkg.Name,
   323  			SourceCode: &extractor.SourceCodeIdentifier{
   324  				Commit: pkg.Commit,
   325  			},
   326  			Version:  pkg.Version,
   327  			PURLType: purl.TypeNPM,
   328  			Metadata: osv.DepGroupMetadata{
   329  				DepGroupVals: pkg.DepGroups,
   330  			},
   331  			Locations: []string{input.Path},
   332  		}
   333  	}
   334  
   335  	return result, nil
   336  }