github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/packagejson/packagejson.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package packagejson extracts package.json files. 16 package packagejson 17 18 import ( 19 "context" 20 "encoding/json" 21 "fmt" 22 "io" 23 "io/fs" 24 "path/filepath" 25 26 "deps.dev/util/semver" 27 "github.com/google/osv-scalibr/extractor" 28 "github.com/google/osv-scalibr/extractor/filesystem" 29 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 30 "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/packagejson/metadata" 31 "github.com/google/osv-scalibr/inventory" 32 "github.com/google/osv-scalibr/log" 33 "github.com/google/osv-scalibr/plugin" 34 "github.com/google/osv-scalibr/purl" 35 "github.com/google/osv-scalibr/stats" 36 ) 37 38 const ( 39 // Name is the unique name of this extractor. 40 Name = "javascript/packagejson" 41 42 // defaultMaxFileSizeBytes is the default maximum file size the extractor will 43 // attempt to extract. If a file is encountered that is larger than this 44 // limit, the file is ignored by `FileRequired`. 45 defaultMaxFileSizeBytes = 100 * units.MiB 46 ) 47 48 type packageJSON struct { 49 Version string `json:"version"` 50 Name string `json:"name"` 51 Engines any `json:"engines"` 52 Author *metadata.Person `json:"author"` 53 Maintainers []*metadata.Person `json:"maintainers"` 54 Contributors []*metadata.Person `json:"contributors"` 55 // Not an NPM field but present for VSCode Extension Manifest files. 56 Contributes *struct { 57 } `json:"contributes"` 58 // Not an NPM field but present for Unity package files. 59 Unity string `json:"unity"` 60 Dependencies map[string]string `json:"dependencies"` 61 } 62 63 // Config is the configuration for the Extractor. 64 type Config struct { 65 // Stats is a stats collector for reporting metrics. 66 Stats stats.Collector 67 // MaxFileSizeBytes is the maximum size of a file that can be extracted. 68 // If this limit is greater than zero and a file is encountered that is larger 69 // than this limit, the file is ignored by returning false for `FileRequired`. 70 MaxFileSizeBytes int64 71 // IncludeDependencies specifies whether to extract dependencies. 72 IncludeDependencies bool 73 } 74 75 // DefaultConfig returns the default configuration for the package.json extractor. 76 func DefaultConfig() Config { 77 return Config{ 78 Stats: nil, 79 MaxFileSizeBytes: defaultMaxFileSizeBytes, 80 IncludeDependencies: false, 81 } 82 } 83 84 // Extractor extracts javascript packages from package.json files. 85 type Extractor struct { 86 stats stats.Collector 87 maxFileSizeBytes int64 88 includeDependencies bool 89 } 90 91 // New returns a package.json extractor. 92 // 93 // For most use cases, initialize with: 94 // ``` 95 // e := New(DefaultConfig()) 96 // ``` 97 func New(cfg Config) *Extractor { 98 return &Extractor{ 99 stats: cfg.Stats, 100 maxFileSizeBytes: cfg.MaxFileSizeBytes, 101 includeDependencies: cfg.IncludeDependencies, 102 } 103 } 104 105 // NewDefault returns an extractor with the default config settings. 106 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 107 108 // Name of the extractor. 109 func (e Extractor) Name() string { return Name } 110 111 // Version of the extractor. 112 func (e Extractor) Version() int { return 0 } 113 114 // Requirements of the extractor. 115 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 116 117 // FileRequired returns true if the specified file matches javascript Metadata file 118 // patterns. 119 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 120 path := api.Path() 121 if filepath.Base(path) != "package.json" { 122 return false 123 } 124 125 fileinfo, err := api.Stat() 126 if err != nil { 127 return false 128 } 129 if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes { 130 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded) 131 return false 132 } 133 134 e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK) 135 return true 136 } 137 138 func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) { 139 if e.stats == nil { 140 return 141 } 142 e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 143 Path: path, 144 Result: result, 145 FileSizeBytes: fileSizeBytes, 146 }) 147 } 148 149 // Extract extracts packages from package.json files passed through the scan input. 150 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 151 pkgs, err := parse(input.Path, input.Reader, e.includeDependencies) 152 if err != nil { 153 e.reportFileExtracted(input.Path, input.Info, err) 154 return inventory.Inventory{}, fmt.Errorf("packagejson.parse: %w", err) 155 } 156 157 for _, p := range pkgs { 158 p.Locations = []string{input.Path} 159 } 160 161 e.reportFileExtracted(input.Path, input.Info, nil) 162 return inventory.Inventory{Packages: pkgs}, nil 163 } 164 165 func (e Extractor) reportFileExtracted(path string, fileinfo fs.FileInfo, err error) { 166 if e.stats == nil { 167 return 168 } 169 var fileSizeBytes int64 170 if fileinfo != nil { 171 fileSizeBytes = fileinfo.Size() 172 } 173 e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 174 Path: path, 175 Result: filesystem.ExtractorErrorToFileExtractedResult(err), 176 FileSizeBytes: fileSizeBytes, 177 }) 178 } 179 180 func parse(path string, r io.Reader, includeDependencies bool) ([]*extractor.Package, error) { 181 dec := json.NewDecoder(r) 182 183 var p packageJSON 184 if err := dec.Decode(&p); err != nil { 185 log.Debugf("package.json file %s json decode failed: %v", path, err) 186 // TODO(b/281023532): We should not mark the overall SCALIBR scan as failed if we can't parse a file. 187 return nil, fmt.Errorf("failed to parse package.json file: %w", err) 188 } 189 190 if !p.hasNameAndVersionValues() { 191 log.Debugf("package.json file %s does not have a version and/or name", path) 192 return nil, nil 193 } 194 if p.isVSCodeExtension() { 195 log.Debugf("package.json file %s is a Visual Studio Code Extension Manifest, not an NPM package", path) 196 return nil, nil 197 } 198 if p.isUnityPackage() { 199 log.Debugf("package.json file %s is a Unity package, not an NPM package", path) 200 return nil, nil 201 } 202 203 var pkgs []*extractor.Package 204 pkgs = append(pkgs, &extractor.Package{ 205 Name: p.Name, 206 Version: p.Version, 207 PURLType: purl.TypeNPM, 208 Metadata: &metadata.JavascriptPackageJSONMetadata{ 209 Author: p.Author, 210 Maintainers: removeEmptyPersons(p.Maintainers), 211 Contributors: removeEmptyPersons(p.Contributors), 212 }, 213 }) 214 215 if includeDependencies { 216 for name, version := range p.Dependencies { 217 c, err := semver.NPM.ParseConstraint(version) 218 if err != nil { 219 log.Debugf("failed to parse NPM version constraint %s for dependency %s in %s: %v", version, name, path, err) 220 continue 221 } 222 v, err := c.CalculateMinVersion() 223 if err != nil { 224 log.Debugf("failed to calculate min NPM version for dependency %s in %s with constraint %s: %v", name, path, version, err) 225 continue 226 } 227 pkgs = append(pkgs, &extractor.Package{ 228 Name: name, 229 // Need to use Canon() to rebuild the string with the changes from CalculateMinVersion. 230 // Ignoring the build value, which isn't relevant for version comparison. 231 // TODO(b/444684673): Include the build value in the version string. Currently deps.dev 232 // does not parse out the build value, so that need to be fixed first. 233 Version: v.Canon(false), 234 PURLType: purl.TypeNPM, 235 }) 236 } 237 } 238 239 return pkgs, nil 240 } 241 242 func (p packageJSON) hasNameAndVersionValues() bool { 243 return p.Name != "" && p.Version != "" 244 } 245 246 // isVSCodeExtension returns true if p is a VSCode Extension Manifest. 247 // 248 // Visual Studio Code uses package.lock files as manifest files for extensions: 249 // https://code.visualstudio.com/api/references/extension-manifest 250 // These files are similar to NPM package.lock: 251 // https://docs.npmjs.com/cli/v10/configuring-npm/package.jsonn 252 // The `engine` field exists in both but is required to contain `vscode` in the extension. 253 // The `contributes` field is not required but only exists for VSCode extensions. 254 func (p packageJSON) isVSCodeExtension() bool { 255 if e, ok := p.Engines.(map[string]any); ok { 256 if _, ok := e["vscode"]; ok { 257 return true 258 } 259 } 260 return p.Contributes != nil 261 } 262 263 // isUnityPackage returns true if p is a Unity package. 264 // 265 // Unity (https://docs.unity3d.com/Manual/upm-manifestPkg.html) packages 266 // are similar to NPM packages in that they use the same filename share some of 267 // the core fields such as name and version. 268 // They also have a "unity" field that lists the Unity version. we can use 269 // this to differentiate them from NPM packages. 270 func (p packageJSON) isUnityPackage() bool { 271 return p.Unity != "" 272 } 273 274 func removeEmptyPersons(persons []*metadata.Person) []*metadata.Person { 275 var result []*metadata.Person 276 for _, p := range persons { 277 if p.Name != "" { 278 result = append(result, p) 279 } 280 } 281 return result 282 }