github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/javascript/packagejson/metadata/metadata.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package metadata defines a metadata struct for Javascript packages.
    16  package metadata
    17  
    18  import (
    19  	"encoding/json"
    20  	"fmt"
    21  	"regexp"
    22  	"strings"
    23  
    24  	"github.com/google/osv-scalibr/extractor/filesystem/internal"
    25  
    26  	pb "github.com/google/osv-scalibr/binary/proto/scan_result_go_proto"
    27  )
    28  
    29  // Person represents a person field in a javascript package.json file.
    30  type Person struct {
    31  	Name  string `json:"name"`
    32  	Email string `json:"email"`
    33  	URL   string `json:"url"`
    34  }
    35  
    36  // NPMPackageSource is the source of the NPM package.
    37  type NPMPackageSource string
    38  
    39  const (
    40  	// Unknown is when the source of the NPM package is unknown because the lockfile was not found.
    41  	Unknown NPMPackageSource = "UNKNOWN"
    42  	// PublicRegistry is the public NPM registry.
    43  	PublicRegistry NPMPackageSource = "PUBLIC_REGISTRY"
    44  	// Other is any other remote or private source (e.g. Github).
    45  	// This is used for packages that are not found in the public NPM registry.
    46  	Other NPMPackageSource = "OTHER"
    47  	// Local is the local filesystem that stores the package versions.
    48  	// This is used for when the package is locally-developed or -installed.
    49  	Local NPMPackageSource = "LOCAL"
    50  )
    51  
    52  // match example: "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)"
    53  // ---> name: "Isaac Z. Schlueter" email: "i@izs.me" url: "http://blog.izs.me"
    54  var personPattern = regexp.MustCompile(`^\s*(?P<name>[^<(]*)(\s+<(?P<email>.*)>)?(\s\((?P<url>.*)\))?\s*$`)
    55  
    56  // UnmarshalJSON parses a JSON object or string into a Person struct.
    57  func (p *Person) UnmarshalJSON(b []byte) error {
    58  	var personStr string
    59  	var fields map[string]string
    60  
    61  	if err := json.Unmarshal(b, &personStr); err != nil {
    62  		// string parsing did not work, assume a map was given
    63  		// for more information: https://docs.npmjs.com/files/package.json#people-fields-author-contributors
    64  		var rawJSON map[string]any
    65  		if err := json.Unmarshal(b, &rawJSON); err != nil {
    66  			return fmt.Errorf("unable to parse package.json person: %w", err)
    67  		}
    68  		fields = rawToPerson(rawJSON)
    69  	} else {
    70  		// parse out "name <email> (url)" into a person struct
    71  		fields = internal.MatchNamedCaptureGroups(personPattern, personStr)
    72  	}
    73  
    74  	if _, ok := fields["name"]; ok {
    75  		// translate the map into a structure
    76  		*p = Person{
    77  			Name:  fields["name"],
    78  			Email: fields["email"],
    79  			URL:   fields["url"],
    80  		}
    81  	}
    82  
    83  	return nil
    84  }
    85  
    86  // PersonString produces a string format of Person struct in the format of "name <email> (url)"
    87  func (p *Person) PersonString() string {
    88  	if p == nil || p.Name == "" {
    89  		return ""
    90  	}
    91  	result := p.Name
    92  	if p.Email != "" {
    93  		result += fmt.Sprintf(" <%s>", p.Email)
    94  	}
    95  	if p.URL != "" {
    96  		result += fmt.Sprintf(" (%s)", p.URL)
    97  	}
    98  	return result
    99  }
   100  
   101  // PersonFromString parses a string of the form "name <email> (url)" into a Person struct.
   102  func PersonFromString(s string) *Person {
   103  	if s == "" {
   104  		return nil
   105  	}
   106  	fields := internal.MatchNamedCaptureGroups(personPattern, s)
   107  	for name, field := range fields {
   108  		fields[name] = strings.TrimSpace(field)
   109  	}
   110  	return &Person{
   111  		Name:  fields["name"],
   112  		Email: fields["email"],
   113  		URL:   fields["url"],
   114  	}
   115  }
   116  
   117  // JavascriptPackageJSONMetadata holds parsing information for a javascript package.json file.
   118  type JavascriptPackageJSONMetadata struct {
   119  	Author       *Person   `json:"author"`
   120  	Maintainers  []*Person `json:"maintainers"`
   121  	Contributors []*Person `json:"contributors"`
   122  
   123  	// FromNPMRepository field is annotated by the misc/from-npm annotator by parsing the lockfile
   124  	// of the root-level directory. This field is used to indicate whether this package's dependency
   125  	// was resolved from the official NPM registry during installation. If false, it means the package
   126  	// was either installed from a local path, a git repository, or another private registry.
   127  	// This is to identify name collisions between locally published packages and official NPM packages.
   128  	Source NPMPackageSource
   129  }
   130  
   131  // SetProto sets the JavascriptMetadata field in the Package proto.
   132  func (m *JavascriptPackageJSONMetadata) SetProto(p *pb.Package) {
   133  	if m == nil {
   134  		return
   135  	}
   136  	if p == nil {
   137  		return
   138  	}
   139  
   140  	p.Metadata = &pb.Package_JavascriptMetadata{
   141  		JavascriptMetadata: &pb.JavascriptPackageJSONMetadata{
   142  			Author:       m.Author.PersonString(),
   143  			Contributors: personsToProto(m.Contributors),
   144  			Maintainers:  personsToProto(m.Maintainers),
   145  			Source:       m.Source.ToProto(),
   146  		},
   147  	}
   148  }
   149  
   150  // ToStruct converts the JavascriptPackageJSONMetadata proto to a Metadata struct.
   151  func ToStruct(m *pb.JavascriptPackageJSONMetadata) *JavascriptPackageJSONMetadata {
   152  	if m == nil {
   153  		return nil
   154  	}
   155  
   156  	var author *Person
   157  	if m.GetAuthor() != "" {
   158  		author = PersonFromString(m.GetAuthor())
   159  	}
   160  
   161  	return &JavascriptPackageJSONMetadata{
   162  		Author:       author,
   163  		Maintainers:  personsToStruct(m.GetMaintainers()),
   164  		Contributors: personsToStruct(m.GetContributors()),
   165  		Source:       packageSourceToStruct(m.GetSource()),
   166  	}
   167  }
   168  
   169  // ToProto converts the NPMPackageSource to the proto enum.
   170  func (source NPMPackageSource) ToProto() pb.PackageSource {
   171  	switch source {
   172  	case PublicRegistry:
   173  		return pb.PackageSource_PUBLIC_REGISTRY
   174  	case Local:
   175  		return pb.PackageSource_LOCAL
   176  	case Other:
   177  		return pb.PackageSource_OTHER
   178  	default:
   179  		return pb.PackageSource_UNKNOWN
   180  	}
   181  }
   182  
   183  func packageSourceToStruct(ps pb.PackageSource) NPMPackageSource {
   184  	switch ps {
   185  	case pb.PackageSource_PUBLIC_REGISTRY:
   186  		return PublicRegistry
   187  	case pb.PackageSource_OTHER:
   188  		return Other
   189  	case pb.PackageSource_LOCAL:
   190  		return Local
   191  	default:
   192  		return Unknown
   193  	}
   194  }
   195  
   196  func personsToProto(persons []*Person) []string {
   197  	var personStrings []string
   198  	for _, p := range persons {
   199  		personStrings = append(personStrings, p.PersonString())
   200  	}
   201  	return personStrings
   202  }
   203  
   204  func personsToStruct(personStrings []string) []*Person {
   205  	var persons []*Person
   206  	for _, s := range personStrings {
   207  		persons = append(persons, PersonFromString(s))
   208  	}
   209  	return persons
   210  }
   211  
   212  func rawToPerson(rawJSON map[string]any) map[string]string {
   213  	personMap := make(map[string]string)
   214  	for key := range rawJSON {
   215  		if val, ok := rawJSON[key].(string); ok {
   216  			personMap[key] = val
   217  		}
   218  	}
   219  	return personMap
   220  }