github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/packagejson/packagejson.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package packagejson extracts package.json files.
    16  package packagejson
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"fmt"
    22  	"io"
    23  	"io/fs"
    24  	"path/filepath"
    25  
    26  	"deps.dev/util/semver"
    27  	"github.com/google/osv-scalibr/extractor"
    28  	"github.com/google/osv-scalibr/extractor/filesystem"
    29  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    30  	"github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagejson/metadata"
    31  	"github.com/google/osv-scalibr/inventory"
    32  	"github.com/google/osv-scalibr/log"
    33  	"github.com/google/osv-scalibr/plugin"
    34  	"github.com/google/osv-scalibr/purl"
    35  	"github.com/google/osv-scalibr/stats"
    36  )
    37  
    38  const (
    39  	// Name is the unique name of this extractor.
    40  	Name = "javascript/packagejson"
    41  
    42  	// defaultMaxFileSizeBytes is the default maximum file size the extractor will
    43  	// attempt to extract. If a file is encountered that is larger than this
    44  	// limit, the file is ignored by `FileRequired`.
    45  	defaultMaxFileSizeBytes = 100 * units.MiB
    46  )
    47  
    48  type packageJSON struct {
    49  	Version      string             `json:"version"`
    50  	Name         string             `json:"name"`
    51  	Engines      any                `json:"engines"`
    52  	Author       *metadata.Person   `json:"author"`
    53  	Maintainers  []*metadata.Person `json:"maintainers"`
    54  	Contributors []*metadata.Person `json:"contributors"`
    55  	// Not an NPM field but present for VSCode Extension Manifest files.
    56  	Contributes *struct {
    57  	} `json:"contributes"`
    58  	// Not an NPM field but present for Unity package files.
    59  	Unity        string            `json:"unity"`
    60  	Dependencies map[string]string `json:"dependencies"`
    61  }
    62  
    63  // Config is the configuration for the Extractor.
    64  type Config struct {
    65  	// Stats is a stats collector for reporting metrics.
    66  	Stats stats.Collector
    67  	// MaxFileSizeBytes is the maximum size of a file that can be extracted.
    68  	// If this limit is greater than zero and a file is encountered that is larger
    69  	// than this limit, the file is ignored by returning false for `FileRequired`.
    70  	MaxFileSizeBytes int64
    71  	// IncludeDependencies specifies whether to extract dependencies.
    72  	IncludeDependencies bool
    73  }
    74  
    75  // DefaultConfig returns the default configuration for the package.json extractor.
    76  func DefaultConfig() Config {
    77  	return Config{
    78  		Stats:               nil,
    79  		MaxFileSizeBytes:    defaultMaxFileSizeBytes,
    80  		IncludeDependencies: false,
    81  	}
    82  }
    83  
    84  // Extractor extracts javascript packages from package.json files.
    85  type Extractor struct {
    86  	stats               stats.Collector
    87  	maxFileSizeBytes    int64
    88  	includeDependencies bool
    89  }
    90  
    91  // New returns a package.json extractor.
    92  //
    93  // For most use cases, initialize with:
    94  // ```
    95  // e := New(DefaultConfig())
    96  // ```
    97  func New(cfg Config) *Extractor {
    98  	return &Extractor{
    99  		stats:               cfg.Stats,
   100  		maxFileSizeBytes:    cfg.MaxFileSizeBytes,
   101  		includeDependencies: cfg.IncludeDependencies,
   102  	}
   103  }
   104  
   105  // NewDefault returns an extractor with the default config settings.
   106  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
   107  
   108  // Name of the extractor.
   109  func (e Extractor) Name() string { return Name }
   110  
   111  // Version of the extractor.
   112  func (e Extractor) Version() int { return 0 }
   113  
   114  // Requirements of the extractor.
   115  func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }
   116  
   117  // FileRequired returns true if the specified file matches javascript Metadata file
   118  // patterns.
   119  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   120  	path := api.Path()
   121  	if filepath.Base(path) != "package.json" {
   122  		return false
   123  	}
   124  
   125  	fileinfo, err := api.Stat()
   126  	if err != nil {
   127  		return false
   128  	}
   129  	if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
   130  		e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
   131  		return false
   132  	}
   133  
   134  	e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK)
   135  	return true
   136  }
   137  
   138  func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
   139  	if e.stats == nil {
   140  		return
   141  	}
   142  	e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   143  		Path:          path,
   144  		Result:        result,
   145  		FileSizeBytes: fileSizeBytes,
   146  	})
   147  }
   148  
   149  // Extract extracts packages from package.json files passed through the scan input.
   150  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   151  	pkgs, err := parse(input.Path, input.Reader, e.includeDependencies)
   152  	if err != nil {
   153  		e.reportFileExtracted(input.Path, input.Info, err)
   154  		return inventory.Inventory{}, fmt.Errorf("packagejson.parse: %w", err)
   155  	}
   156  
   157  	for _, p := range pkgs {
   158  		p.Locations = []string{input.Path}
   159  	}
   160  
   161  	e.reportFileExtracted(input.Path, input.Info, nil)
   162  	return inventory.Inventory{Packages: pkgs}, nil
   163  }
   164  
   165  func (e Extractor) reportFileExtracted(path string, fileinfo fs.FileInfo, err error) {
   166  	if e.stats == nil {
   167  		return
   168  	}
   169  	var fileSizeBytes int64
   170  	if fileinfo != nil {
   171  		fileSizeBytes = fileinfo.Size()
   172  	}
   173  	e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   174  		Path:          path,
   175  		Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   176  		FileSizeBytes: fileSizeBytes,
   177  	})
   178  }
   179  
   180  func parse(path string, r io.Reader, includeDependencies bool) ([]*extractor.Package, error) {
   181  	dec := json.NewDecoder(r)
   182  
   183  	var p packageJSON
   184  	if err := dec.Decode(&p); err != nil {
   185  		log.Debugf("package.json file %s json decode failed: %v", path, err)
   186  		// TODO(b/281023532): We should not mark the overall SCALIBR scan as failed if we can't parse a file.
   187  		return nil, fmt.Errorf("failed to parse package.json file: %w", err)
   188  	}
   189  
   190  	if !p.hasNameAndVersionValues() {
   191  		log.Debugf("package.json file %s does not have a version and/or name", path)
   192  		return nil, nil
   193  	}
   194  	if p.isVSCodeExtension() {
   195  		log.Debugf("package.json file %s is a Visual Studio Code Extension Manifest, not an NPM package", path)
   196  		return nil, nil
   197  	}
   198  	if p.isUnityPackage() {
   199  		log.Debugf("package.json file %s is a Unity package, not an NPM package", path)
   200  		return nil, nil
   201  	}
   202  
   203  	var pkgs []*extractor.Package
   204  	pkgs = append(pkgs, &extractor.Package{
   205  		Name:     p.Name,
   206  		Version:  p.Version,
   207  		PURLType: purl.TypeNPM,
   208  		Metadata: &metadata.JavascriptPackageJSONMetadata{
   209  			Author:       p.Author,
   210  			Maintainers:  removeEmptyPersons(p.Maintainers),
   211  			Contributors: removeEmptyPersons(p.Contributors),
   212  		},
   213  	})
   214  
   215  	if includeDependencies {
   216  		for name, version := range p.Dependencies {
   217  			c, err := semver.NPM.ParseConstraint(version)
   218  			if err != nil {
   219  				log.Debugf("failed to parse NPM version constraint %s for dependency %s in %s: %v", version, name, path, err)
   220  				continue
   221  			}
   222  			v, err := c.CalculateMinVersion()
   223  			if err != nil {
   224  				log.Debugf("failed to calculate min NPM version for dependency %s in %s with constraint %s: %v", name, path, version, err)
   225  				continue
   226  			}
   227  			pkgs = append(pkgs, &extractor.Package{
   228  				Name: name,
   229  				// Need to use Canon() to rebuild the string with the changes from CalculateMinVersion.
   230  				// Ignoring the build value, which isn't relevant for version comparison.
   231  				// TODO(b/444684673): Include the build value in the version string. Currently deps.dev
   232  				// does not parse out the build value, so that need to be fixed first.
   233  				Version:  v.Canon(false),
   234  				PURLType: purl.TypeNPM,
   235  			})
   236  		}
   237  	}
   238  
   239  	return pkgs, nil
   240  }
   241  
   242  func (p packageJSON) hasNameAndVersionValues() bool {
   243  	return p.Name != "" && p.Version != ""
   244  }
   245  
   246  // isVSCodeExtension returns true if p is a VSCode Extension Manifest.
   247  //
   248  // Visual Studio Code uses package.lock files as manifest files for extensions:
   249  // https://code.visualstudio.com/api/references/extension-manifest
   250  // These files are similar to NPM package.lock:
   251  // https://docs.npmjs.com/cli/v10/configuring-npm/package.jsonn
   252  // The `engine` field exists in both but is required to contain `vscode` in the extension.
   253  // The `contributes` field is not required but only exists for VSCode extensions.
   254  func (p packageJSON) isVSCodeExtension() bool {
   255  	if e, ok := p.Engines.(map[string]any); ok {
   256  		if _, ok := e["vscode"]; ok {
   257  			return true
   258  		}
   259  	}
   260  	return p.Contributes != nil
   261  }
   262  
   263  // isUnityPackage returns true if p is a Unity package.
   264  //
   265  // Unity (https://docs.unity3d.com/Manual/upm-manifestPkg.html) packages
   266  // are similar to NPM packages in that they use the same filename share some of
   267  // the core fields such as name and version.
   268  // They also have a "unity" field that lists the Unity version. we can use
   269  // this to differentiate them from NPM packages.
   270  func (p packageJSON) isUnityPackage() bool {
   271  	return p.Unity != ""
   272  }
   273  
   274  func removeEmptyPersons(persons []*metadata.Person) []*metadata.Person {
   275  	var result []*metadata.Person
   276  	for _, p := range persons {
   277  		if p.Name != "" {
   278  			result = append(result, p)
   279  		}
   280  	}
   281  	return result
   282  }