github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/javascript/parse_package_json.go (about)

     1  package javascript
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"regexp"
    10  
    11  	"github.com/mitchellh/mapstructure"
    12  
    13  	"github.com/anchore/syft/internal"
    14  	"github.com/anchore/syft/internal/log"
    15  	"github.com/anchore/syft/syft/artifact"
    16  	"github.com/anchore/syft/syft/file"
    17  	"github.com/anchore/syft/syft/pkg"
    18  	"github.com/anchore/syft/syft/pkg/cataloger/generic"
    19  )
    20  
    21  // integrity check
    22  var _ generic.Parser = parsePackageJSON
    23  
    24  // packageJSON represents a JavaScript package.json file
    25  type packageJSON struct {
    26  	Version      string            `json:"version"`
    27  	Latest       []string          `json:"latest"`
    28  	Author       author            `json:"author"`
    29  	License      json.RawMessage   `json:"license"`
    30  	Licenses     json.RawMessage   `json:"licenses"`
    31  	Name         string            `json:"name"`
    32  	Homepage     string            `json:"homepage"`
    33  	Description  string            `json:"description"`
    34  	Dependencies map[string]string `json:"dependencies"`
    35  	Repository   repository        `json:"repository"`
    36  	Private      bool              `json:"private"`
    37  }
    38  
    39  type author struct {
    40  	Name  string `json:"name" mapstruct:"name"`
    41  	Email string `json:"email" mapstruct:"email"`
    42  	URL   string `json:"url" mapstruct:"url"`
    43  }
    44  
    45  type repository struct {
    46  	Type string `json:"type" mapstructure:"type"`
    47  	URL  string `json:"url" mapstructure:"url"`
    48  }
    49  
    50  // match example: "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)"
    51  // ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me"
    52  var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)
    53  
    54  // parsePackageJSON parses a package.json and returns the discovered JavaScript packages.
    55  func parsePackageJSON(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    56  	var pkgs []pkg.Package
    57  	dec := json.NewDecoder(reader)
    58  
    59  	for {
    60  		var p packageJSON
    61  		if err := dec.Decode(&p); errors.Is(err, io.EOF) {
    62  			break
    63  		} else if err != nil {
    64  			return nil, nil, fmt.Errorf("failed to parse package.json file: %w", err)
    65  		}
    66  
    67  		if !p.hasNameAndVersionValues() {
    68  			log.Debugf("encountered package.json file without a name and/or version field, ignoring (path=%q)", reader.Path())
    69  			return nil, nil, nil
    70  		}
    71  
    72  		pkgs = append(
    73  			pkgs,
    74  			newPackageJSONPackage(p, reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
    75  		)
    76  	}
    77  
    78  	pkg.Sort(pkgs)
    79  
    80  	return pkgs, nil, nil
    81  }
    82  
    83  func (a *author) UnmarshalJSON(b []byte) error {
    84  	var authorStr string
    85  	var fields map[string]string
    86  	var auth author
    87  
    88  	if err := json.Unmarshal(b, &authorStr); err != nil {
    89  		// string parsing did not work, assume a map was given
    90  		// for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors
    91  		if err := json.Unmarshal(b, &fields); err != nil {
    92  			return fmt.Errorf("unable to parse package.json author: %w", err)
    93  		}
    94  	} else {
    95  		// parse out "name <email> (url)" into an author struct
    96  		fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr)
    97  	}
    98  
    99  	// translate the map into a structure
   100  	if err := mapstructure.Decode(fields, &auth); err != nil {
   101  		return fmt.Errorf("unable to decode package.json author: %w", err)
   102  	}
   103  
   104  	*a = auth
   105  
   106  	return nil
   107  }
   108  
   109  func (a *author) AuthorString() string {
   110  	result := a.Name
   111  	if a.Email != "" {
   112  		result += fmt.Sprintf(" <%s>", a.Email)
   113  	}
   114  	if a.URL != "" {
   115  		result += fmt.Sprintf(" (%s)", a.URL)
   116  	}
   117  	return result
   118  }
   119  
   120  func (r *repository) UnmarshalJSON(b []byte) error {
   121  	var repositoryStr string
   122  	var fields map[string]string
   123  	var repo repository
   124  
   125  	if err := json.Unmarshal(b, &repositoryStr); err != nil {
   126  		// string parsing did not work, assume a map was given
   127  		// for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors
   128  		if err := json.Unmarshal(b, &fields); err != nil {
   129  			return fmt.Errorf("unable to parse package.json author: %w", err)
   130  		}
   131  		// translate the map into a structure
   132  		if err := mapstructure.Decode(fields, &repo); err != nil {
   133  			return fmt.Errorf("unable to decode package.json author: %w", err)
   134  		}
   135  
   136  		*r = repo
   137  	} else {
   138  		r.URL = repositoryStr
   139  	}
   140  
   141  	return nil
   142  }
   143  
   144  type npmPackageLicense struct {
   145  	Type string `json:"type"`
   146  	URL  string `json:"url"`
   147  }
   148  
   149  func licenseFromJSON(b []byte) (string, error) {
   150  	// first try as string
   151  	var licenseString string
   152  	err := json.Unmarshal(b, &licenseString)
   153  	if err == nil {
   154  		return licenseString, nil
   155  	}
   156  
   157  	// then try as object (this format is deprecated)
   158  	var licenseObject npmPackageLicense
   159  	err = json.Unmarshal(b, &licenseObject)
   160  	if err == nil {
   161  		return licenseObject.Type, nil
   162  	}
   163  
   164  	return "", errors.New("unable to unmarshal license field as either string or object")
   165  }
   166  
   167  func (p packageJSON) licensesFromJSON() ([]string, error) {
   168  	if p.License == nil && p.Licenses == nil {
   169  		// This package.json doesn't specify any licenses whatsoever
   170  		return []string{}, nil
   171  	}
   172  
   173  	singleLicense, err := licenseFromJSON(p.License)
   174  	if err == nil {
   175  		return []string{singleLicense}, nil
   176  	}
   177  
   178  	multiLicense, err := licensesFromJSON(p.Licenses)
   179  
   180  	// The "licenses" field is deprecated. It should be inspected as a last resort.
   181  	if multiLicense != nil && err == nil {
   182  		mapLicenses := func(licenses []npmPackageLicense) []string {
   183  			mappedLicenses := make([]string, len(licenses))
   184  			for i, l := range licenses {
   185  				mappedLicenses[i] = l.Type
   186  			}
   187  			return mappedLicenses
   188  		}
   189  
   190  		return mapLicenses(multiLicense), nil
   191  	}
   192  
   193  	return nil, err
   194  }
   195  
   196  func licensesFromJSON(b []byte) ([]npmPackageLicense, error) {
   197  	var licenseObject []npmPackageLicense
   198  	err := json.Unmarshal(b, &licenseObject)
   199  	if err == nil {
   200  		return licenseObject, nil
   201  	}
   202  
   203  	return nil, errors.New("unmarshal failed")
   204  }
   205  
   206  func (p packageJSON) hasNameAndVersionValues() bool {
   207  	return p.Name != "" && p.Version != ""
   208  }
   209  
   210  // this supports both windows and unix paths
   211  var filepathSeparator = regexp.MustCompile(`[\\/]`)
   212  
   213  func pathContainsNodeModulesDirectory(p string) bool {
   214  	for _, subPath := range filepathSeparator.Split(p, -1) {
   215  		if subPath == "node_modules" {
   216  			return true
   217  		}
   218  	}
   219  	return false
   220  }