github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/yarnlock/yarnlock.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package yarnlock extracts NPC yarn.lock files. 16 package yarnlock 17 18 import ( 19 "bufio" 20 "context" 21 "errors" 22 "fmt" 23 "path/filepath" 24 "regexp" 25 "slices" 26 "strings" 27 28 "github.com/google/osv-scalibr/extractor" 29 "github.com/google/osv-scalibr/extractor/filesystem" 30 "github.com/google/osv-scalibr/extractor/filesystem/language/javascript/internal/commitextractor" 31 "github.com/google/osv-scalibr/inventory" 32 "github.com/google/osv-scalibr/log" 33 "github.com/google/osv-scalibr/plugin" 34 "github.com/google/osv-scalibr/purl" 35 ) 36 37 const ( 38 // Name is the unique name of this extractor. 39 Name = "javascript/yarnlock" 40 ) 41 42 var ( 43 // Version matcher regex. 44 // Format for yarn.lock v1: `version "0.0.1"` 45 // Format for yarn.lock v2: `version: 0.0.1` 46 yarnPackageVersionRe = regexp.MustCompile(`^ {2}"?version"?:? "?([\w-.+]+)"?$`) 47 // Package resolution matcher regex. Might contain commit hashes. 48 // Format for yarn.lock v1: `resolved "git+ssh://git@github.com:G-Rath/repo-2#hash"` 49 // Format for yarn.lock v2: `resolution: "@my-scope/my-first-package@https://github.com/my-org/my-first-pkg.git#commit=hash"` 50 yarnPackageResolutionRe = regexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`) 51 ) 52 53 func shouldSkipYarnLine(line string) bool { 54 line = strings.TrimSpace(line) 55 return line == "" || strings.HasPrefix(line, "#") 56 } 57 58 // yaml.lock files define packages as follows: 59 // 60 // header 61 // prop1 value1 62 // prop2 value2 63 // 64 // header2 65 // prop3 value3 66 type packageDescription struct { 67 header string 68 props []string 69 } 70 71 func groupYarnPackageDescriptions(ctx context.Context, scanner *bufio.Scanner) ([]*packageDescription, error) { 72 result := []*packageDescription{} 73 74 var current *packageDescription 75 for scanner.Scan() { 76 if err := ctx.Err(); err != nil { 77 return result, err 78 } 79 if err := scanner.Err(); err != nil { 80 return result, err 81 } 82 83 line := scanner.Text() 84 85 if shouldSkipYarnLine(line) { 86 continue 87 } 88 89 // represents the start of a new dependency 90 if !strings.HasPrefix(line, " ") { 91 // Add previous descriptor if it's for a package. 92 if current != nil { 93 result = append(result, current) 94 } 95 current = &packageDescription{header: line} 96 } else if current == nil { 97 return nil, errors.New("malformed yarn.lock") 98 } else { 99 current.props = append(current.props, line) 100 } 101 } 102 // Add trailing descriptor. 103 if current != nil { 104 result = append(result, current) 105 } 106 107 return result, nil 108 } 109 110 func extractYarnPackageName(header string) string { 111 // Header format: @my-scope/my-first-package@my-scope/my-first-package#commit=hash 112 str := strings.TrimPrefix(header, "\"") 113 str = strings.TrimSuffix(str, ":") 114 str, _, _ = strings.Cut(str, ",") 115 116 isScoped := strings.HasPrefix(str, "@") 117 118 if isScoped { 119 str = strings.TrimPrefix(str, "@") 120 } 121 name, right, _ := strings.Cut(str, "@") 122 123 // Packages can also contain an NPM entry, e.g. @nicolo-ribaudo/chokidar-2@npm:2.1.8-no-fsevents.3 124 if strings.HasPrefix(right, "npm:") && strings.Contains(right, "@") { 125 return extractYarnPackageName(strings.TrimPrefix(right, "npm:")) 126 } 127 128 if isScoped { 129 name = "@" + name 130 } 131 return name 132 } 133 134 func determineYarnPackageVersion(props []string) string { 135 for _, s := range props { 136 matched := yarnPackageVersionRe.FindStringSubmatch(s) 137 138 if matched != nil { 139 return matched[1] 140 } 141 } 142 return "" 143 } 144 145 func determineYarnPackageResolution(props []string) string { 146 for _, s := range props { 147 matched := yarnPackageResolutionRe.FindStringSubmatch(s) 148 if matched != nil { 149 return matched[1] 150 } 151 } 152 return "" 153 } 154 155 func parseYarnPackageGroup(desc *packageDescription) *extractor.Package { 156 name := extractYarnPackageName(desc.header) 157 version := determineYarnPackageVersion(desc.props) 158 resolution := determineYarnPackageResolution(desc.props) 159 160 if version == "" { 161 log.Errorf("Failed to determine version of %s while parsing a yarn.lock", name) 162 } 163 164 return &extractor.Package{ 165 Name: name, 166 Version: version, 167 PURLType: purl.TypeNPM, 168 SourceCode: &extractor.SourceCodeIdentifier{ 169 Commit: commitextractor.TryExtractCommit(resolution), 170 }, 171 } 172 } 173 174 // Extractor extracts NPM yarn.lock files. 175 type Extractor struct{} 176 177 // New returns a new instance of the extractor. 178 func New() filesystem.Extractor { return &Extractor{} } 179 180 // Name of the extractor 181 func (e Extractor) Name() string { return Name } 182 183 // Version of the extractor 184 func (e Extractor) Version() int { return 0 } 185 186 // Requirements of the extractor 187 func (e Extractor) Requirements() *plugin.Capabilities { 188 return &plugin.Capabilities{} 189 } 190 191 // FileRequired returns true if the specified file is an NPM yarn.lock file. 192 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 193 path := api.Path() 194 if filepath.Base(path) != "yarn.lock" { 195 return false 196 } 197 // Skip lockfiles inside node_modules directories since the packages they list aren't 198 // necessarily installed by the root project. We instead use the more specific top-level 199 // lockfile for the root project dependencies. 200 dir := filepath.ToSlash(filepath.Dir(path)) 201 return !slices.Contains(strings.Split(dir, "/"), "node_modules") 202 } 203 204 // Extract extracts packages from NPM yarn.lock files passed through the scan input. 205 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 206 scanner := bufio.NewScanner(input.Reader) 207 208 packageGroups, err := groupYarnPackageDescriptions(ctx, scanner) 209 if err != nil { 210 return inventory.Inventory{}, fmt.Errorf("error while scanning: %w", err) 211 } 212 213 packages := make([]*extractor.Package, 0, len(packageGroups)) 214 215 for _, group := range packageGroups { 216 if group.header == "__metadata:" { 217 // This group doesn't describe a package. 218 continue 219 } 220 if strings.HasSuffix(group.header, "@workspace:.\":") { 221 // This is the root package itself. 222 continue 223 } 224 pkg := parseYarnPackageGroup(group) 225 pkg.Locations = []string{input.Path} 226 packages = append(packages, pkg) 227 } 228 229 return inventory.Inventory{Packages: packages}, nil 230 } 231 232 var _ filesystem.Extractor = Extractor{}