github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/format/internal/spdxutil/helpers/originator_supplier.go (about)

     1  package helpers
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strings"
     7  
     8  	"github.com/anchore/syft/internal"
     9  	"github.com/anchore/syft/syft/pkg"
    10  )
    11  
    12  const (
    13  	orgType    = "Organization"
    14  	personType = "Person"
    15  )
    16  
    17  // Originator needs to conform to the SPDX spec here:
    18  // https://spdx.github.io/spdx-spec/v2.2.2/package-information/#76-package-originator-field
    19  //
    20  // Definition:
    21  //
    22  //	If the package identified in the SPDX document originated from a different person or
    23  //	organization than identified as Package Supplier (see 7.5 above), this field identifies from
    24  //	where or whom the package originally came. In some cases, a package may be created and
    25  //	originally distributed by a different third party than the Package Supplier of the package.
    26  //	For example, the SPDX document identifies the package as glibc and the Package Supplier as
    27  //	Red Hat, but the Free Software Foundation is the Package Originator.
    28  //
    29  // Use NOASSERTION if:
    30  //
    31  //   - the SPDX document creator has attempted to but cannot reach a reasonable objective determination;
    32  //   - the SPDX document creator has made no attempt to determine this field; or
    33  //   - the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
    34  //
    35  // Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org>
    36  // return values are: <type>, <value>
    37  func Originator(p pkg.Package) (typ string, author string) { // nolint: funlen
    38  	if !hasMetadata(p) {
    39  		return typ, author
    40  	}
    41  
    42  	switch metadata := p.Metadata.(type) {
    43  	case pkg.ApkDBEntry:
    44  		author = metadata.Maintainer
    45  
    46  	case pkg.DotnetPortableExecutableEntry:
    47  		typ = orgType
    48  		author = metadata.CompanyName
    49  
    50  	case pkg.DpkgDBEntry:
    51  		author = metadata.Maintainer
    52  
    53  	case pkg.JavaArchive:
    54  		if metadata.Manifest != nil {
    55  			author = metadata.Manifest.Main.MustGet("Specification-Vendor")
    56  			if author == "" {
    57  				author = metadata.Manifest.Main.MustGet("Implementation-Vendor")
    58  			}
    59  		}
    60  
    61  	case pkg.LinuxKernelModule:
    62  		author = metadata.Author
    63  
    64  	case pkg.PhpComposerLockEntry:
    65  		if len(metadata.Authors) > 0 {
    66  			entry := metadata.Authors[0]
    67  			author = formatPersonOrOrg(entry.Name, entry.Email)
    68  		}
    69  
    70  	case pkg.PhpComposerInstalledEntry:
    71  		if len(metadata.Authors) > 0 {
    72  			entry := metadata.Authors[0]
    73  			author = formatPersonOrOrg(entry.Name, entry.Email)
    74  		}
    75  
    76  	case pkg.RDescription:
    77  		// this is most likely to have a name and email
    78  		author = metadata.Maintainer
    79  
    80  		if author == "" {
    81  			author = metadata.Author
    82  		}
    83  
    84  	case pkg.NpmPackage:
    85  		author = metadata.Author
    86  
    87  	case pkg.PythonPackage:
    88  		author = formatPersonOrOrg(metadata.Author, metadata.AuthorEmail)
    89  
    90  	case pkg.RubyGemspec:
    91  		if len(metadata.Authors) > 0 {
    92  			author = metadata.Authors[0]
    93  		}
    94  	case pkg.RpmDBEntry:
    95  		typ = orgType
    96  		author = metadata.Vendor
    97  
    98  	case pkg.RpmArchive:
    99  		typ = orgType
   100  		author = metadata.Vendor
   101  
   102  	case pkg.WordpressPluginEntry:
   103  		// it seems that the vast majority of the time the author is an org, not a person
   104  		typ = orgType
   105  		author = metadata.Author
   106  	}
   107  
   108  	if typ == "" && author != "" {
   109  		typ = personType
   110  	}
   111  
   112  	return typ, parseAndFormatPersonOrOrg(author)
   113  }
   114  
   115  // Supplier needs to conform to the SPDX spec here:
   116  // https://spdx.github.io/spdx-spec/v2.2.2/package-information/#75-package-supplier-field
   117  //
   118  // Definition:
   119  //
   120  //	Identify the actual distribution source for the package/directory identified in the SPDX document. This might
   121  //	or might not be different from the originating distribution source for the package. The name of the Package Supplier
   122  //	shall be an organization or recognized author and not a web site. For example, SourceForge is a host website, not a
   123  //	supplier, the supplier for https://sourceforge.net/projects/bridge/ is “The Linux Foundation.”
   124  //
   125  // Use NOASSERTION if:
   126  //
   127  //   - the SPDX document creator has attempted to but cannot reach a reasonable objective determination;
   128  //   - the SPDX document creator has made no attempt to determine this field; or
   129  //   - the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
   130  //
   131  // Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org>
   132  // return values are: <type>, <value>
   133  func Supplier(p pkg.Package) (typ string, author string) {
   134  	if !hasMetadata(p) {
   135  		return
   136  	}
   137  
   138  	if metadata, ok := p.Metadata.(pkg.AlpmDBEntry); ok {
   139  		// most indications here are that this is the person that is simply packaging the upstream software. Most
   140  		// of the time this is not the original author of the upstream software (which would be the originator).
   141  		// Though it is possible for users to be both the packager and the author, this code cannot distinct this
   142  		// case and sticks to the semantically correct interpretation of the "packager" (which says nothing about the
   143  		// authorship of the upstream software).
   144  		author = metadata.Packager
   145  	}
   146  
   147  	if author == "" {
   148  		// TODO: this uses the Originator function for now until a better distinction can be made for supplier
   149  		return Originator(p)
   150  	}
   151  
   152  	if typ == "" && author != "" {
   153  		typ = personType
   154  	}
   155  
   156  	return typ, parseAndFormatPersonOrOrg(author)
   157  }
   158  
   159  var nameEmailURLPattern = regexp.MustCompile(`^(?P<name>[^<>()]*)( <(?P<email>[^@]+@\w+\.\w+)>)?( \((?P<url>.*)\))?$`)
   160  
   161  func parseAndFormatPersonOrOrg(s string) string {
   162  	name, email, _ := parseNameEmailURL(s)
   163  	return formatPersonOrOrg(name, email)
   164  }
   165  
   166  func parseNameEmailURL(s string) (name, email, url string) {
   167  	fields := internal.MatchNamedCaptureGroups(nameEmailURLPattern, s)
   168  	name = strings.TrimSpace(fields["name"])
   169  	email = strings.TrimSpace(fields["email"])
   170  	url = strings.TrimSpace(fields["url"])
   171  
   172  	if email == "" {
   173  		if approximatesAsEmail(url) {
   174  			email = url
   175  			url = ""
   176  		} else if approximatesAsEmail(name) {
   177  			email = name
   178  			name = ""
   179  		}
   180  	}
   181  	return name, email, url
   182  }
   183  
   184  func approximatesAsEmail(s string) bool {
   185  	atIndex := strings.Index(s, "@")
   186  	if atIndex == -1 {
   187  		return false
   188  	}
   189  	dotIndex := strings.Index(s[atIndex:], ".")
   190  	return dotIndex != -1
   191  }
   192  
   193  func formatPersonOrOrg(name, email string) string {
   194  	name = strings.TrimSpace(name)
   195  	email = strings.TrimSpace(email)
   196  
   197  	blankName := name == ""
   198  	blankEmail := email == ""
   199  
   200  	if !blankEmail && !blankName {
   201  		return fmt.Sprintf("%s (%s)", name, email)
   202  	}
   203  	if !blankName && blankEmail {
   204  		return name
   205  	}
   206  	if blankName && !blankEmail {
   207  		return email
   208  	}
   209  	return ""
   210  }