github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/pnpmlock/pnpmlock.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package pnpmlock extracts pnpm-lock.yaml files. 16 package pnpmlock 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "io" 23 "path/filepath" 24 "regexp" 25 "slices" 26 "strconv" 27 "strings" 28 29 "github.com/google/osv-scalibr/extractor" 30 "github.com/google/osv-scalibr/extractor/filesystem" 31 "github.com/google/osv-scalibr/extractor/filesystem/osv" 32 "github.com/google/osv-scalibr/inventory" 33 "github.com/google/osv-scalibr/log" 34 "github.com/google/osv-scalibr/plugin" 35 "github.com/google/osv-scalibr/purl" 36 "gopkg.in/yaml.v3" 37 ) 38 39 const ( 40 // Name is the unique name of this extractor. 41 Name = "javascript/pnpmlock" 42 ) 43 44 type pnpmLockPackageResolution struct { 45 Tarball string `yaml:"tarball"` 46 Commit string `yaml:"commit"` 47 Repo string `yaml:"repo"` 48 Type string `yaml:"type"` 49 } 50 51 type pnpmLockPackage struct { 52 Resolution pnpmLockPackageResolution `yaml:"resolution"` 53 Name string `yaml:"name"` 54 Version string `yaml:"version"` 55 Dev bool `yaml:"dev"` 56 } 57 58 type pnpmLockfile struct { 59 Version float64 `yaml:"lockfileVersion"` 60 Packages map[string]pnpmLockPackage `yaml:"packages,omitempty"` 61 } 62 63 type pnpmLockfileV6 struct { 64 Version string `yaml:"lockfileVersion"` 65 Packages map[string]pnpmLockPackage `yaml:"packages,omitempty"` 66 } 67 68 // UnmarshalYAML is a custom unmarshalling function for handling v6 lockfiles. 69 func (l *pnpmLockfile) UnmarshalYAML(unmarshal func(any) error) error { 70 var lockfileV6 pnpmLockfileV6 71 72 if err := unmarshal(&lockfileV6); err != nil { 73 return err 74 } 75 76 parsedVersion, err := strconv.ParseFloat(lockfileV6.Version, 64) 77 78 if err != nil { 79 return err 80 } 81 82 l.Version = parsedVersion 83 l.Packages = lockfileV6.Packages 84 85 return nil 86 } 87 88 var ( 89 numberMatcher = regexp.MustCompile(`^\d`) 90 // Looks for the pattern "name@version", where name is allowed to contain zero or more "@" 91 nameVersionRegexp = regexp.MustCompile(`^(.+)@([\w.-]+)(?:\(|$)`) 92 93 codeLoadURLRegexp = regexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`) 94 ) 95 96 // extractPnpmPackageNameAndVersion parses a dependency path, attempting to 97 // extract the name and version of the package it represents 98 func extractPnpmPackageNameAndVersion(dependencyPath string, lockfileVersion float64) (string, string, error) { 99 // file dependencies must always have a name property to be installed, 100 // and their dependency path never has the version encoded, so we can 101 // skip trying to extract either from their dependency path 102 if strings.HasPrefix(dependencyPath, "file:") { 103 return "", "", nil 104 } 105 106 // v9.0 specifies the dependencies as <package>@<version> rather than as a path 107 if lockfileVersion >= 9.0 { 108 dependencyPath = strings.Trim(dependencyPath, "'") 109 dependencyPath, isScoped := strings.CutPrefix(dependencyPath, "@") 110 111 name, version, _ := strings.Cut(dependencyPath, "@") 112 113 if isScoped { 114 name = "@" + name 115 } 116 117 return name, version, nil 118 } 119 120 parts := strings.Split(dependencyPath, "/") 121 if len(parts) < 2 { 122 return "", "", fmt.Errorf("invalid dependency path: %v", dependencyPath) 123 } 124 var name string 125 126 parts = parts[1:] 127 128 if strings.HasPrefix(parts[0], "@") { 129 name = strings.Join(parts[:2], "/") 130 parts = parts[2:] 131 } else { 132 name = parts[0] 133 parts = parts[1:] 134 } 135 136 version := "" 137 138 if len(parts) != 0 { 139 version = parts[0] 140 } 141 142 if version == "" { 143 name, version = parseNameAtVersion(name) 144 } 145 146 if version == "" || !numberMatcher.MatchString(version) { 147 return "", "", nil 148 } 149 150 underscoreIndex := strings.Index(version, "_") 151 152 if underscoreIndex != -1 { 153 version = strings.Split(version, "_")[0] 154 } 155 156 return name, version, nil 157 } 158 159 func parseNameAtVersion(value string) (name string, version string) { 160 matches := nameVersionRegexp.FindStringSubmatch(value) 161 162 if len(matches) != 3 { 163 return name, "" 164 } 165 166 return matches[1], matches[2] 167 } 168 169 func parsePnpmLock(lockfile pnpmLockfile) ([]*extractor.Package, error) { 170 packages := make([]*extractor.Package, 0, len(lockfile.Packages)) 171 errs := []error{} 172 173 for s, pkg := range lockfile.Packages { 174 name, version, err := extractPnpmPackageNameAndVersion(s, lockfile.Version) 175 if err != nil { 176 errs = append(errs, err) 177 log.Errorf("failed to extract package version from %v: %v", pkg, err) 178 continue 179 } 180 181 // "name" is only present if it's not in the dependency path and takes 182 // priority over whatever name we think we've extracted (if any) 183 if pkg.Name != "" { 184 name = pkg.Name 185 } 186 187 // "version" is only present if it's not in the dependency path and takes 188 // priority over whatever version we think we've extracted (if any) 189 if pkg.Version != "" { 190 version = pkg.Version 191 } 192 193 if name == "" || version == "" { 194 continue 195 } 196 197 commit := pkg.Resolution.Commit 198 199 if strings.HasPrefix(pkg.Resolution.Tarball, "https://codeload.github.com") { 200 matched := codeLoadURLRegexp.FindStringSubmatch(pkg.Resolution.Tarball) 201 202 if matched != nil { 203 commit = matched[1] 204 } 205 } 206 207 depGroups := []string{} 208 if pkg.Dev { 209 depGroups = append(depGroups, "dev") 210 } 211 212 packages = append(packages, &extractor.Package{ 213 Name: name, 214 Version: version, 215 PURLType: purl.TypeNPM, 216 SourceCode: &extractor.SourceCodeIdentifier{ 217 Commit: commit, 218 }, 219 Metadata: osv.DepGroupMetadata{ 220 DepGroupVals: depGroups, 221 }, 222 }) 223 } 224 225 return packages, errors.Join(errs...) 226 } 227 228 // Extractor extracts pnpm-lock.yaml files. 229 type Extractor struct{} 230 231 // New returns a new instance of the extractor. 232 func New() filesystem.Extractor { return &Extractor{} } 233 234 // Name of the extractor 235 func (e Extractor) Name() string { return Name } 236 237 // Version of the extractor 238 func (e Extractor) Version() int { return 0 } 239 240 // Requirements of the extractor. 241 func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} } 242 243 // FileRequired returns true if the specified file matches pnpm-lock.yaml files. 244 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 245 path := api.Path() 246 if filepath.Base(path) != "pnpm-lock.yaml" { 247 return false 248 } 249 // Skip lockfiles inside node_modules directories since the packages they list aren't 250 // necessarily installed by the root project. We instead use the more specific top-level 251 // lockfile for the root project dependencies. 252 dir := filepath.ToSlash(filepath.Dir(path)) 253 return !slices.Contains(strings.Split(dir, "/"), "node_modules") 254 } 255 256 // Extract extracts packages from a pnpm-lock.yaml file passed through the scan input. 257 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 258 var parsedLockfile *pnpmLockfile 259 260 err := yaml.NewDecoder(input.Reader).Decode(&parsedLockfile) 261 262 if err != nil && !errors.Is(err, io.EOF) { 263 return inventory.Inventory{}, fmt.Errorf("could not extract: %w", err) 264 } 265 266 // this will happen if the file is empty 267 if parsedLockfile == nil { 268 parsedLockfile = &pnpmLockfile{} 269 } 270 271 packages, err := parsePnpmLock(*parsedLockfile) 272 for i := range packages { 273 packages[i].Locations = []string{input.Path} 274 } 275 276 return inventory.Inventory{Packages: packages}, err 277 } 278 279 var _ filesystem.Extractor = Extractor{}