github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/javascript/parser/yarn/parse.go (about) 1 package yarn 2 3 import ( 4 "bufio" 5 "bytes" 6 "errors" 7 "fmt" 8 "io" 9 "regexp" 10 "strings" 11 12 "github.com/anchore/syft/internal/log" 13 "github.com/anchore/syft/syft/pkg/cataloger/javascript/key" 14 ) 15 16 var ( 17 yarnPatternRegexp = regexp.MustCompile(`^\s?\\?"?(?P<package>\S+?)@(?:(?P<protocol>\S+?):)?(?P<version>.+?)\\?"?:?$`) 18 yarnPatternHTTPRegexp = regexp.MustCompile(`^\s?\\?"?(?P<package>\S+?)@https:\/\/[^#]+#(?P<version>.+?)\\?"?:?$`) 19 20 yarnVersionRegexp = regexp.MustCompile(`^"?version:?"?\s+"?(?P<version>[^"]+)"?`) 21 yarnDependencyRegexp = regexp.MustCompile(`\s{4,}"?(?P<package>.+?)"?:?\s"?(?P<version>[^"]+)"?`) 22 yarnIntegrityRegexp = regexp.MustCompile(`^"?integrity:?"?\s+"?(?P<integrity>[^"]+)"?`) 23 yarnResolvedRegexp = regexp.MustCompile(`^"?resolved:?"?\s+"?(?P<resolved>[^"]+)"?`) 24 // yarnPackageURLExp matches the name and version of the dependency in yarn.lock 25 // from the resolved URL, including scope/namespace prefix if any. 26 // For example: 27 // `"https://registry.yarnpkg.com/async/-/async-3.2.3.tgz#ac53dafd3f4720ee9e8a160628f18ea91df196c9"` 28 // would return "async" and "3.2.3" 29 // 30 // `"https://registry.yarnpkg.com/@4lolo/resize-observer-polyfill/-/resize-observer-polyfill-1.5.2.tgz#58868fc7224506236b5550d0c68357f0a874b84b"` 31 // would return "@4lolo/resize-observer-polyfill" and "1.5.2" 32 yarnPackageURLExp = regexp.MustCompile(`^https://registry\.(?:yarnpkg\.com|npmjs\.org)/(.+?)/-/(?:.+?)-(\d+\..+?)\.tgz`) 33 ) 34 35 type PkgRef struct { 36 Name string 37 Version string 38 Integrity string 39 Resolved string 40 Patterns []string 41 Dependencies map[string]string 42 } 43 44 type LineScanner struct { 45 *bufio.Scanner 46 lineCount int 47 } 48 49 func newLineScanner(r io.Reader) *LineScanner { 50 return &LineScanner{ 51 Scanner: bufio.NewScanner(r), 52 } 53 } 54 55 func (s *LineScanner) Scan() bool { 56 scan := s.Scanner.Scan() 57 if scan { 58 s.lineCount++ 59 } 60 return scan 61 } 62 63 func (s *LineScanner) LineNum(prevNum int) int { 64 return prevNum + s.lineCount - 1 65 } 66 67 func parseDependencies(scanner *LineScanner) map[string]string { 68 deps := map[string]string{} 69 for scanner.Scan() { 70 line := scanner.Text() 71 name, version, err := parseDependency(line) 72 if err != nil { 73 // finished dependencies block 74 return deps 75 } 76 deps[name] = version 77 } 78 79 return deps 80 } 81 82 func getDependency(target string) (name, version string, err error) { 83 capture := yarnDependencyRegexp.FindStringSubmatch(target) 84 if len(capture) < 3 { 85 return "", "", errors.New("not dependency") 86 } 87 return capture[1], capture[2], nil 88 } 89 90 func getIntegrity(target string) (integrity string, err error) { 91 capture := yarnIntegrityRegexp.FindStringSubmatch(target) 92 if len(capture) < 2 { 93 return "", errors.New("not integrity") 94 } 95 return capture[1], nil 96 } 97 98 func getResolved(target string) (resolved string, err error) { 99 capture := yarnResolvedRegexp.FindStringSubmatch(target) 100 if len(capture) < 2 { 101 return "", errors.New("not resolved") 102 } 103 return capture[1], nil 104 } 105 106 func parseDependency(line string) (string, string, error) { 107 name, version, err := getDependency(line) 108 if err != nil { 109 return "", "", err 110 } 111 return name, version, nil 112 } 113 114 func getVersion(target string) (version string, err error) { 115 capture := yarnVersionRegexp.FindStringSubmatch(target) 116 if len(capture) < 2 { 117 return "", fmt.Errorf("failed to parse version: '%s", target) 118 } 119 return capture[len(capture)-1], nil 120 } 121 122 func getPackageNameFromResolved(resolution string) (pkgName string) { 123 if matches := yarnPackageURLExp.FindStringSubmatch(resolution); len(matches) >= 2 { 124 return matches[1] 125 } 126 return "" 127 } 128 129 func parsePattern(target string) (packagename, protocol, version string, err error) { 130 var capture []string 131 var names []string 132 133 if strings.Contains(target, "https://") { 134 capture = yarnPatternHTTPRegexp.FindStringSubmatch(target) 135 protocol = "https" 136 names = yarnPatternHTTPRegexp.SubexpNames() 137 } else { 138 capture = yarnPatternRegexp.FindStringSubmatch(target) 139 names = yarnPatternRegexp.SubexpNames() 140 } 141 142 if len(capture) < 3 { 143 return "", "", "", errors.New("not package format") 144 } 145 for i, group := range names { 146 switch group { 147 case "package": 148 packagename = capture[i] 149 case "protocol": 150 protocol = capture[i] 151 case "version": 152 version = capture[i] 153 } 154 } 155 return 156 } 157 158 func parsePackagePatterns(target string) (packagename, protocol string, patterns []string, err error) { 159 patternsSplit := strings.Split(target, ", ") 160 packagename, protocol, _, err = parsePattern(patternsSplit[0]) 161 if err != nil { 162 return "", "", nil, err 163 } 164 165 var resultPatterns []string 166 for _, pattern := range patternsSplit { 167 _, _, version, _ := parsePattern(pattern) 168 resultPatterns = append(resultPatterns, key.NpmPackageKey(packagename, version)) 169 } 170 patterns = resultPatterns 171 return 172 } 173 174 func validProtocol(protocol string) bool { 175 switch protocol { 176 // example: "jhipster-core@npm:7.3.4": 177 case "npm", "": 178 return true 179 // example: "my-pkg@workspace:." 180 case "workspace": 181 return true 182 // example: "should-type@https://github.com/shouldjs/type.git#1.3.0" 183 case "https": 184 return true 185 } 186 return false 187 } 188 189 func ignoreProtocol(protocol string) bool { 190 switch protocol { 191 case "patch", "file", "link", "portal", "github", "git", "git+ssh", "git+http", "git+https", "git+file": 192 return true 193 } 194 return false 195 } 196 197 func handleEmptyLinesAndComments(line string, skipBlock bool) (int, bool) { 198 if len(line) == 0 { 199 return 1, skipBlock 200 } 201 202 if line[0] == '#' || skipBlock { 203 return 0, skipBlock 204 } 205 206 if strings.HasPrefix(line, "__metadata") { 207 return 0, true 208 } 209 210 return 0, skipBlock 211 } 212 213 func handleLinePrefixes(line string, pkg *PkgRef, scanner *LineScanner) (err error) { 214 switch { 215 case strings.HasPrefix(line, "version"): 216 pkg.Version, err = getVersion(line) 217 case strings.HasPrefix(line, "integrity"): 218 pkg.Integrity, err = getIntegrity(line) 219 case strings.HasPrefix(line, "resolved"): 220 pkg.Resolved, err = getResolved(line) 221 case strings.HasPrefix(line, "dependencies:"): 222 pkg.Dependencies = parseDependencies(scanner) 223 } 224 return 225 } 226 227 func ParseBlock(block []byte, lineNum int) (pkg PkgRef, lineNumber int, err error) { 228 var ( 229 emptyLines int // lib can start with empty lines first 230 skipBlock bool 231 ) 232 233 scanner := newLineScanner(bytes.NewReader(block)) 234 for scanner.Scan() { 235 line := scanner.Text() 236 237 var increment int 238 increment, skipBlock = handleEmptyLinesAndComments(line, skipBlock) 239 emptyLines += increment 240 241 line = strings.TrimPrefix(strings.TrimSpace(line), "\"") 242 243 if err := handleLinePrefixes(line, &pkg, scanner); err != nil { 244 skipBlock = true 245 } 246 247 // try parse package patterns 248 if name, protocol, patterns, patternErr := parsePackagePatterns(line); patternErr == nil { 249 if patterns == nil || !validProtocol(protocol) { 250 skipBlock = true 251 if !ignoreProtocol(protocol) { 252 // we need to calculate the last line of the block in order to correctly determine the line numbers of the next blocks 253 // store the error. we will handle it later 254 err = fmt.Errorf("unknown protocol: '%s', line: %s", protocol, line) 255 continue 256 } 257 continue 258 } 259 pkg.Name = name 260 pkg.Patterns = patterns 261 continue 262 } 263 } 264 265 // handles the case of namespaces packages like @4lolo/resize-observer-polyfill 266 // where the name might not be present in the name field, but only in the 267 // resolved field 268 resolvedPkgName := getPackageNameFromResolved(pkg.Resolved) 269 if resolvedPkgName != "" { 270 pkg.Name = resolvedPkgName 271 } 272 273 // in case an unsupported protocol is detected 274 // show warning and continue parsing 275 if err != nil { 276 log.Debugf("failed to parse block: %s", err) 277 return pkg, scanner.LineNum(lineNum), nil 278 } 279 280 if scanErr := scanner.Err(); scanErr != nil { 281 err = scanErr 282 } 283 284 return pkg, scanner.LineNum(lineNum), err 285 } 286 287 func ScanBlocks(data []byte, atEOF bool) (advance int, token []byte, err error) { 288 if atEOF && len(data) == 0 { 289 return 0, nil, nil 290 } 291 if i := bytes.Index(data, []byte("\n\n")); i >= 0 { 292 // We have a full newline-terminated line. 293 return i + 2, data[0:i], nil 294 } else if i := bytes.Index(data, []byte("\r\n\r\n")); i >= 0 { 295 return i + 4, data[0:i], nil 296 } 297 298 // If we're at EOF, we have a final, non-terminated line. Return it. 299 if atEOF { 300 return len(data), data, nil 301 } 302 // Request more data. 303 return 0, nil, nil 304 }