github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/javascript/parser/yarn/parse.go (about)

     1  package yarn
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"regexp"
    10  	"strings"
    11  
    12  	"github.com/anchore/syft/internal/log"
    13  	"github.com/anchore/syft/syft/pkg/cataloger/javascript/key"
    14  )
    15  
    16  var (
    17  	yarnPatternRegexp     = regexp.MustCompile(`^\s?\\?"?(?P<package>\S+?)@(?:(?P<protocol>\S+?):)?(?P<version>.+?)\\?"?:?$`)
    18  	yarnPatternHTTPRegexp = regexp.MustCompile(`^\s?\\?"?(?P<package>\S+?)@https:\/\/[^#]+#(?P<version>.+?)\\?"?:?$`)
    19  
    20  	yarnVersionRegexp    = regexp.MustCompile(`^"?version:?"?\s+"?(?P<version>[^"]+)"?`)
    21  	yarnDependencyRegexp = regexp.MustCompile(`\s{4,}"?(?P<package>.+?)"?:?\s"?(?P<version>[^"]+)"?`)
    22  	yarnIntegrityRegexp  = regexp.MustCompile(`^"?integrity:?"?\s+"?(?P<integrity>[^"]+)"?`)
    23  	yarnResolvedRegexp   = regexp.MustCompile(`^"?resolved:?"?\s+"?(?P<resolved>[^"]+)"?`)
    24  	// yarnPackageURLExp matches the name and version of the dependency in yarn.lock
    25  	// from the resolved URL, including scope/namespace prefix if any.
    26  	// For example:
    27  	//		`"https://registry.yarnpkg.com/async/-/async-3.2.3.tgz#ac53dafd3f4720ee9e8a160628f18ea91df196c9"`
    28  	//			would return "async" and "3.2.3"
    29  	//
    30  	//		`"https://registry.yarnpkg.com/@4lolo/resize-observer-polyfill/-/resize-observer-polyfill-1.5.2.tgz#58868fc7224506236b5550d0c68357f0a874b84b"`
    31  	//			would return "@4lolo/resize-observer-polyfill" and "1.5.2"
    32  	yarnPackageURLExp = regexp.MustCompile(`^https://registry\.(?:yarnpkg\.com|npmjs\.org)/(.+?)/-/(?:.+?)-(\d+\..+?)\.tgz`)
    33  )
    34  
    35  type PkgRef struct {
    36  	Name         string
    37  	Version      string
    38  	Integrity    string
    39  	Resolved     string
    40  	Patterns     []string
    41  	Dependencies map[string]string
    42  }
    43  
    44  type LineScanner struct {
    45  	*bufio.Scanner
    46  	lineCount int
    47  }
    48  
    49  func newLineScanner(r io.Reader) *LineScanner {
    50  	return &LineScanner{
    51  		Scanner: bufio.NewScanner(r),
    52  	}
    53  }
    54  
    55  func (s *LineScanner) Scan() bool {
    56  	scan := s.Scanner.Scan()
    57  	if scan {
    58  		s.lineCount++
    59  	}
    60  	return scan
    61  }
    62  
    63  func (s *LineScanner) LineNum(prevNum int) int {
    64  	return prevNum + s.lineCount - 1
    65  }
    66  
    67  func parseDependencies(scanner *LineScanner) map[string]string {
    68  	deps := map[string]string{}
    69  	for scanner.Scan() {
    70  		line := scanner.Text()
    71  		name, version, err := parseDependency(line)
    72  		if err != nil {
    73  			// finished dependencies block
    74  			return deps
    75  		}
    76  		deps[name] = version
    77  	}
    78  
    79  	return deps
    80  }
    81  
    82  func getDependency(target string) (name, version string, err error) {
    83  	capture := yarnDependencyRegexp.FindStringSubmatch(target)
    84  	if len(capture) < 3 {
    85  		return "", "", errors.New("not dependency")
    86  	}
    87  	return capture[1], capture[2], nil
    88  }
    89  
    90  func getIntegrity(target string) (integrity string, err error) {
    91  	capture := yarnIntegrityRegexp.FindStringSubmatch(target)
    92  	if len(capture) < 2 {
    93  		return "", errors.New("not integrity")
    94  	}
    95  	return capture[1], nil
    96  }
    97  
    98  func getResolved(target string) (resolved string, err error) {
    99  	capture := yarnResolvedRegexp.FindStringSubmatch(target)
   100  	if len(capture) < 2 {
   101  		return "", errors.New("not resolved")
   102  	}
   103  	return capture[1], nil
   104  }
   105  
   106  func parseDependency(line string) (string, string, error) {
   107  	name, version, err := getDependency(line)
   108  	if err != nil {
   109  		return "", "", err
   110  	}
   111  	return name, version, nil
   112  }
   113  
   114  func getVersion(target string) (version string, err error) {
   115  	capture := yarnVersionRegexp.FindStringSubmatch(target)
   116  	if len(capture) < 2 {
   117  		return "", fmt.Errorf("failed to parse version: '%s", target)
   118  	}
   119  	return capture[len(capture)-1], nil
   120  }
   121  
   122  func getPackageNameFromResolved(resolution string) (pkgName string) {
   123  	if matches := yarnPackageURLExp.FindStringSubmatch(resolution); len(matches) >= 2 {
   124  		return matches[1]
   125  	}
   126  	return ""
   127  }
   128  
   129  func parsePattern(target string) (packagename, protocol, version string, err error) {
   130  	var capture []string
   131  	var names []string
   132  
   133  	if strings.Contains(target, "https://") {
   134  		capture = yarnPatternHTTPRegexp.FindStringSubmatch(target)
   135  		protocol = "https"
   136  		names = yarnPatternHTTPRegexp.SubexpNames()
   137  	} else {
   138  		capture = yarnPatternRegexp.FindStringSubmatch(target)
   139  		names = yarnPatternRegexp.SubexpNames()
   140  	}
   141  
   142  	if len(capture) < 3 {
   143  		return "", "", "", errors.New("not package format")
   144  	}
   145  	for i, group := range names {
   146  		switch group {
   147  		case "package":
   148  			packagename = capture[i]
   149  		case "protocol":
   150  			protocol = capture[i]
   151  		case "version":
   152  			version = capture[i]
   153  		}
   154  	}
   155  	return
   156  }
   157  
   158  func parsePackagePatterns(target string) (packagename, protocol string, patterns []string, err error) {
   159  	patternsSplit := strings.Split(target, ", ")
   160  	packagename, protocol, _, err = parsePattern(patternsSplit[0])
   161  	if err != nil {
   162  		return "", "", nil, err
   163  	}
   164  
   165  	var resultPatterns []string
   166  	for _, pattern := range patternsSplit {
   167  		_, _, version, _ := parsePattern(pattern)
   168  		resultPatterns = append(resultPatterns, key.NpmPackageKey(packagename, version))
   169  	}
   170  	patterns = resultPatterns
   171  	return
   172  }
   173  
   174  func validProtocol(protocol string) bool {
   175  	switch protocol {
   176  	// example: "jhipster-core@npm:7.3.4":
   177  	case "npm", "":
   178  		return true
   179  	// example: "my-pkg@workspace:."
   180  	case "workspace":
   181  		return true
   182  	// example: "should-type@https://github.com/shouldjs/type.git#1.3.0"
   183  	case "https":
   184  		return true
   185  	}
   186  	return false
   187  }
   188  
   189  func ignoreProtocol(protocol string) bool {
   190  	switch protocol {
   191  	case "patch", "file", "link", "portal", "github", "git", "git+ssh", "git+http", "git+https", "git+file":
   192  		return true
   193  	}
   194  	return false
   195  }
   196  
   197  func handleEmptyLinesAndComments(line string, skipBlock bool) (int, bool) {
   198  	if len(line) == 0 {
   199  		return 1, skipBlock
   200  	}
   201  
   202  	if line[0] == '#' || skipBlock {
   203  		return 0, skipBlock
   204  	}
   205  
   206  	if strings.HasPrefix(line, "__metadata") {
   207  		return 0, true
   208  	}
   209  
   210  	return 0, skipBlock
   211  }
   212  
   213  func handleLinePrefixes(line string, pkg *PkgRef, scanner *LineScanner) (err error) {
   214  	switch {
   215  	case strings.HasPrefix(line, "version"):
   216  		pkg.Version, err = getVersion(line)
   217  	case strings.HasPrefix(line, "integrity"):
   218  		pkg.Integrity, err = getIntegrity(line)
   219  	case strings.HasPrefix(line, "resolved"):
   220  		pkg.Resolved, err = getResolved(line)
   221  	case strings.HasPrefix(line, "dependencies:"):
   222  		pkg.Dependencies = parseDependencies(scanner)
   223  	}
   224  	return
   225  }
   226  
   227  func ParseBlock(block []byte, lineNum int) (pkg PkgRef, lineNumber int, err error) {
   228  	var (
   229  		emptyLines int // lib can start with empty lines first
   230  		skipBlock  bool
   231  	)
   232  
   233  	scanner := newLineScanner(bytes.NewReader(block))
   234  	for scanner.Scan() {
   235  		line := scanner.Text()
   236  
   237  		var increment int
   238  		increment, skipBlock = handleEmptyLinesAndComments(line, skipBlock)
   239  		emptyLines += increment
   240  
   241  		line = strings.TrimPrefix(strings.TrimSpace(line), "\"")
   242  
   243  		if err := handleLinePrefixes(line, &pkg, scanner); err != nil {
   244  			skipBlock = true
   245  		}
   246  
   247  		// try parse package patterns
   248  		if name, protocol, patterns, patternErr := parsePackagePatterns(line); patternErr == nil {
   249  			if patterns == nil || !validProtocol(protocol) {
   250  				skipBlock = true
   251  				if !ignoreProtocol(protocol) {
   252  					// we need to calculate the last line of the block in order to correctly determine the line numbers of the next blocks
   253  					// store the error. we will handle it later
   254  					err = fmt.Errorf("unknown protocol: '%s', line: %s", protocol, line)
   255  					continue
   256  				}
   257  				continue
   258  			}
   259  			pkg.Name = name
   260  			pkg.Patterns = patterns
   261  			continue
   262  		}
   263  	}
   264  
   265  	// handles the case of namespaces packages like @4lolo/resize-observer-polyfill
   266  	// where the name might not be present in the name field, but only in the
   267  	// resolved field
   268  	resolvedPkgName := getPackageNameFromResolved(pkg.Resolved)
   269  	if resolvedPkgName != "" {
   270  		pkg.Name = resolvedPkgName
   271  	}
   272  
   273  	// in case an unsupported protocol is detected
   274  	// show warning and continue parsing
   275  	if err != nil {
   276  		log.Debugf("failed to parse block: %s", err)
   277  		return pkg, scanner.LineNum(lineNum), nil
   278  	}
   279  
   280  	if scanErr := scanner.Err(); scanErr != nil {
   281  		err = scanErr
   282  	}
   283  
   284  	return pkg, scanner.LineNum(lineNum), err
   285  }
   286  
   287  func ScanBlocks(data []byte, atEOF bool) (advance int, token []byte, err error) {
   288  	if atEOF && len(data) == 0 {
   289  		return 0, nil, nil
   290  	}
   291  	if i := bytes.Index(data, []byte("\n\n")); i >= 0 {
   292  		// We have a full newline-terminated line.
   293  		return i + 2, data[0:i], nil
   294  	} else if i := bytes.Index(data, []byte("\r\n\r\n")); i >= 0 {
   295  		return i + 4, data[0:i], nil
   296  	}
   297  
   298  	// If we're at EOF, we have a final, non-terminated line. Return it.
   299  	if atEOF {
   300  		return len(data), data, nil
   301  	}
   302  	// Request more data.
   303  	return 0, nil, nil
   304  }