github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/javascript/parse_package_json.go (about)

     1  package javascript
     2  
     3  import (
     4  	"encoding/json"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"regexp"
     9  
    10  	"github.com/mitchellh/mapstructure"
    11  	"github.com/nextlinux/gosbom/gosbom/artifact"
    12  	"github.com/nextlinux/gosbom/gosbom/file"
    13  	"github.com/nextlinux/gosbom/gosbom/pkg"
    14  	"github.com/nextlinux/gosbom/gosbom/pkg/cataloger/generic"
    15  	"github.com/nextlinux/gosbom/internal"
    16  	"github.com/nextlinux/gosbom/internal/log"
    17  )
    18  
    19  // integrity check
    20  var _ generic.Parser = parsePackageJSON
    21  
    22  // packageJSON represents a JavaScript package.json file
    23  type packageJSON struct {
    24  	Version      string            `json:"version"`
    25  	Latest       []string          `json:"latest"`
    26  	Author       author            `json:"author"`
    27  	License      json.RawMessage   `json:"license"`
    28  	Licenses     json.RawMessage   `json:"licenses"`
    29  	Name         string            `json:"name"`
    30  	Homepage     string            `json:"homepage"`
    31  	Description  string            `json:"description"`
    32  	Dependencies map[string]string `json:"dependencies"`
    33  	Repository   repository        `json:"repository"`
    34  	Private      bool              `json:"private"`
    35  }
    36  
    37  type author struct {
    38  	Name  string `json:"name" mapstruct:"name"`
    39  	Email string `json:"email" mapstruct:"email"`
    40  	URL   string `json:"url" mapstruct:"url"`
    41  }
    42  
    43  type repository struct {
    44  	Type string `json:"type" mapstructure:"type"`
    45  	URL  string `json:"url" mapstructure:"url"`
    46  }
    47  
    48  // match example: "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)"
    49  // ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me"
    50  var authorPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)
    51  
    52  // parsePackageJSON parses a package.json and returns the discovered JavaScript packages.
    53  func parsePackageJSON(_ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
    54  	var pkgs []pkg.Package
    55  	dec := json.NewDecoder(reader)
    56  
    57  	for {
    58  		var p packageJSON
    59  		if err := dec.Decode(&p); errors.Is(err, io.EOF) {
    60  			break
    61  		} else if err != nil {
    62  			return nil, nil, fmt.Errorf("failed to parse package.json file: %w", err)
    63  		}
    64  
    65  		if !p.hasNameAndVersionValues() {
    66  			log.Debugf("encountered package.json file without a name and/or version field, ignoring (path=%q)", reader.AccessPath())
    67  			return nil, nil, nil
    68  		}
    69  
    70  		pkgs = append(
    71  			pkgs,
    72  			newPackageJSONPackage(p, reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
    73  		)
    74  	}
    75  
    76  	pkg.Sort(pkgs)
    77  
    78  	return pkgs, nil, nil
    79  }
    80  
    81  func (a *author) UnmarshalJSON(b []byte) error {
    82  	var authorStr string
    83  	var fields map[string]string
    84  	var auth author
    85  
    86  	if err := json.Unmarshal(b, &authorStr); err != nil {
    87  		// string parsing did not work, assume a map was given
    88  		// for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors
    89  		if err := json.Unmarshal(b, &fields); err != nil {
    90  			return fmt.Errorf("unable to parse package.json author: %w", err)
    91  		}
    92  	} else {
    93  		// parse out "name <email> (url)" into an author struct
    94  		fields = internal.MatchNamedCaptureGroups(authorPattern, authorStr)
    95  	}
    96  
    97  	// translate the map into a structure
    98  	if err := mapstructure.Decode(fields, &auth); err != nil {
    99  		return fmt.Errorf("unable to decode package.json author: %w", err)
   100  	}
   101  
   102  	*a = auth
   103  
   104  	return nil
   105  }
   106  
   107  func (a *author) AuthorString() string {
   108  	result := a.Name
   109  	if a.Email != "" {
   110  		result += fmt.Sprintf(" <%s>", a.Email)
   111  	}
   112  	if a.URL != "" {
   113  		result += fmt.Sprintf(" (%s)", a.URL)
   114  	}
   115  	return result
   116  }
   117  
   118  func (r *repository) UnmarshalJSON(b []byte) error {
   119  	var repositoryStr string
   120  	var fields map[string]string
   121  	var repo repository
   122  
   123  	if err := json.Unmarshal(b, &repositoryStr); err != nil {
   124  		// string parsing did not work, assume a map was given
   125  		// for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors
   126  		if err := json.Unmarshal(b, &fields); err != nil {
   127  			return fmt.Errorf("unable to parse package.json author: %w", err)
   128  		}
   129  		// translate the map into a structure
   130  		if err := mapstructure.Decode(fields, &repo); err != nil {
   131  			return fmt.Errorf("unable to decode package.json author: %w", err)
   132  		}
   133  
   134  		*r = repo
   135  	} else {
   136  		r.URL = repositoryStr
   137  	}
   138  
   139  	return nil
   140  }
   141  
   142  type npmPackageLicense struct {
   143  	Type string `json:"type"`
   144  	URL  string `json:"url"`
   145  }
   146  
   147  func licenseFromJSON(b []byte) (string, error) {
   148  	// first try as string
   149  	var licenseString string
   150  	err := json.Unmarshal(b, &licenseString)
   151  	if err == nil {
   152  		return licenseString, nil
   153  	}
   154  
   155  	// then try as object (this format is deprecated)
   156  	var licenseObject npmPackageLicense
   157  	err = json.Unmarshal(b, &licenseObject)
   158  	if err == nil {
   159  		return licenseObject.Type, nil
   160  	}
   161  
   162  	return "", errors.New("unable to unmarshal license field as either string or object")
   163  }
   164  
   165  func (p packageJSON) licensesFromJSON() ([]string, error) {
   166  	if p.License == nil && p.Licenses == nil {
   167  		// This package.json doesn't specify any licenses whatsoever
   168  		return []string{}, nil
   169  	}
   170  
   171  	singleLicense, err := licenseFromJSON(p.License)
   172  	if err == nil {
   173  		return []string{singleLicense}, nil
   174  	}
   175  
   176  	multiLicense, err := licensesFromJSON(p.Licenses)
   177  
   178  	// The "licenses" field is deprecated. It should be inspected as a last resort.
   179  	if multiLicense != nil && err == nil {
   180  		mapLicenses := func(licenses []npmPackageLicense) []string {
   181  			mappedLicenses := make([]string, len(licenses))
   182  			for i, l := range licenses {
   183  				mappedLicenses[i] = l.Type
   184  			}
   185  			return mappedLicenses
   186  		}
   187  
   188  		return mapLicenses(multiLicense), nil
   189  	}
   190  
   191  	return nil, err
   192  }
   193  
   194  func licensesFromJSON(b []byte) ([]npmPackageLicense, error) {
   195  	var licenseObject []npmPackageLicense
   196  	err := json.Unmarshal(b, &licenseObject)
   197  	if err == nil {
   198  		return licenseObject, nil
   199  	}
   200  
   201  	return nil, errors.New("unmarshal failed")
   202  }
   203  
   204  func (p packageJSON) hasNameAndVersionValues() bool {
   205  	return p.Name != "" && p.Version != ""
   206  }
   207  
   208  // this supports both windows and unix paths
   209  var filepathSeparator = regexp.MustCompile(`[\\/]`)
   210  
   211  func pathContainsNodeModulesDirectory(p string) bool {
   212  	for _, subPath := range filepathSeparator.Split(p, -1) {
   213  		if subPath == "node_modules" {
   214  			return true
   215  		}
   216  	}
   217  	return false
   218  }