github.com/anchore/syft@v1.38.2/syft/format/internal/spdxutil/helpers/originator_supplier.go (about)

     1  package helpers
     2  
     3  import (
     4  	"fmt"
     5  	"regexp"
     6  	"strings"
     7  
     8  	"github.com/anchore/syft/internal"
     9  	"github.com/anchore/syft/syft/pkg"
    10  )
    11  
    12  const (
    13  	orgType    = "Organization"
    14  	personType = "Person"
    15  )
    16  
    17  // Originator needs to conform to the SPDX spec here:
    18  // https://spdx.github.io/spdx-spec/v2.2.2/package-information/#76-package-originator-field
    19  //
    20  // Definition:
    21  //
    22  //	If the package identified in the SPDX document originated from a different person or
    23  //	organization than identified as Package Supplier (see 7.5 above), this field identifies from
    24  //	where or whom the package originally came. In some cases, a package may be created and
    25  //	originally distributed by a different third party than the Package Supplier of the package.
    26  //	For example, the SPDX document identifies the package as glibc and the Package Supplier as
    27  //	Red Hat, but the Free Software Foundation is the Package Originator.
    28  //
    29  // Use NOASSERTION if:
    30  //
    31  //   - the SPDX document creator has attempted to but cannot reach a reasonable objective determination;
    32  //   - the SPDX document creator has made no attempt to determine this field; or
    33  //   - the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
    34  //
    35  // Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org>
    36  // return values are: <type>, <value>
    37  func Originator(p pkg.Package) (typ string, author string) { //nolint: gocyclo,funlen
    38  	if !hasMetadata(p) {
    39  		return typ, author
    40  	}
    41  
    42  	switch metadata := p.Metadata.(type) {
    43  	case pkg.ApkDBEntry:
    44  		author = metadata.Maintainer
    45  
    46  	case pkg.BitnamiSBOMEntry:
    47  		typ = orgType
    48  		author = "Bitnami"
    49  
    50  	case pkg.DotnetPortableExecutableEntry:
    51  		typ = orgType
    52  		author = metadata.CompanyName
    53  	case pkg.PEBinary:
    54  		// this is a known common keyword used in version resources
    55  		// for more info see: https://learn.microsoft.com/en-us/windows/win32/menurc/versioninfo-resource
    56  		val, ok := metadata.VersionResources.Get("CompanyName")
    57  		if ok {
    58  			typ = orgType
    59  			author = val
    60  		}
    61  
    62  	case pkg.DpkgDBEntry:
    63  		author = metadata.Maintainer
    64  
    65  	case pkg.DpkgArchiveEntry:
    66  		author = metadata.Maintainer
    67  
    68  	case pkg.GitHubActionsUseStatement:
    69  		typ = orgType
    70  		org := strings.Split(metadata.Value, "/")[0]
    71  		if org == "actions" {
    72  			// this is a GitHub action, so the org is GitHub
    73  			org = "GitHub"
    74  		}
    75  		author = org
    76  
    77  	case pkg.JavaArchive:
    78  		if metadata.Manifest != nil {
    79  			author = metadata.Manifest.Main.MustGet("Specification-Vendor")
    80  			if author == "" {
    81  				author = metadata.Manifest.Main.MustGet("Implementation-Vendor")
    82  			}
    83  			// Vendor is specified, hence set 'Organization' as the PackageSupplier
    84  			if author != "" {
    85  				typ = orgType
    86  			}
    87  		}
    88  
    89  	case pkg.JavaVMInstallation:
    90  		typ = orgType
    91  		author = metadata.Release.Implementor
    92  
    93  	case pkg.LinuxKernelModule:
    94  		author = metadata.Author
    95  
    96  	case pkg.PhpComposerLockEntry:
    97  		if len(metadata.Authors) > 0 {
    98  			entry := metadata.Authors[0]
    99  			author = formatPersonOrOrg(entry.Name, entry.Email)
   100  		}
   101  
   102  	case pkg.PhpComposerInstalledEntry:
   103  		if len(metadata.Authors) > 0 {
   104  			entry := metadata.Authors[0]
   105  			author = formatPersonOrOrg(entry.Name, entry.Email)
   106  		}
   107  
   108  	case pkg.RDescription:
   109  		// this is most likely to have a name and email
   110  		author = metadata.Maintainer
   111  
   112  		if author == "" {
   113  			author = metadata.Author
   114  		}
   115  
   116  	case pkg.NpmPackage:
   117  		author = metadata.Author
   118  
   119  	case pkg.PythonPackage:
   120  		author = formatPersonOrOrg(metadata.Author, metadata.AuthorEmail)
   121  
   122  	case pkg.RubyGemspec:
   123  		if len(metadata.Authors) > 0 {
   124  			author = metadata.Authors[0]
   125  		}
   126  	case pkg.RpmDBEntry:
   127  		typ = orgType
   128  		author = metadata.Vendor
   129  
   130  	case pkg.RpmArchive:
   131  		typ = orgType
   132  		author = metadata.Vendor
   133  
   134  	case pkg.WordpressPluginEntry:
   135  		// it seems that the vast majority of the time the author is an org, not a person
   136  		typ = orgType
   137  		author = metadata.Author
   138  
   139  	case pkg.SwiplPackEntry:
   140  		author = formatPersonOrOrg(metadata.Author, metadata.AuthorEmail)
   141  	}
   142  
   143  	if typ == "" && author != "" {
   144  		typ = personType
   145  	}
   146  
   147  	return typ, parseAndFormatPersonOrOrg(author)
   148  }
   149  
   150  // Supplier needs to conform to the SPDX spec here:
   151  // https://spdx.github.io/spdx-spec/v2.2.2/package-information/#75-package-supplier-field
   152  //
   153  // Definition:
   154  //
   155  //	Identify the actual distribution source for the package/directory identified in the SPDX document. This might
   156  //	or might not be different from the originating distribution source for the package. The name of the Package Supplier
   157  //	shall be an organization or recognized author and not a web site. For example, SourceForge is a host website, not a
   158  //	supplier, the supplier for https://sourceforge.net/projects/bridge/ is “The Linux Foundation.”
   159  //
   160  // Use NOASSERTION if:
   161  //
   162  //   - the SPDX document creator has attempted to but cannot reach a reasonable objective determination;
   163  //   - the SPDX document creator has made no attempt to determine this field; or
   164  //   - the SPDX document creator has intentionally provided no information (no meaning should be implied by doing so).
   165  //
   166  // Available options are: <omit>, NOASSERTION, Person: <person>, Organization: <org>
   167  // return values are: <type>, <value>
   168  func Supplier(p pkg.Package) (typ string, author string) {
   169  	if !hasMetadata(p) {
   170  		return
   171  	}
   172  
   173  	if metadata, ok := p.Metadata.(pkg.AlpmDBEntry); ok {
   174  		// most indications here are that this is the person that is simply packaging the upstream software. Most
   175  		// of the time this is not the original author of the upstream software (which would be the originator).
   176  		// Though it is possible for users to be both the packager and the author, this code cannot distinct this
   177  		// case and sticks to the semantically correct interpretation of the "packager" (which says nothing about the
   178  		// authorship of the upstream software).
   179  		author = metadata.Packager
   180  	}
   181  
   182  	if metadata, ok := p.Metadata.(pkg.SwiplPackEntry); ok {
   183  		author = formatPersonOrOrg(metadata.Packager, metadata.PackagerEmail)
   184  	}
   185  
   186  	if author == "" {
   187  		// TODO: this uses the Originator function for now until a better distinction can be made for supplier
   188  		return Originator(p)
   189  	}
   190  
   191  	if typ == "" && author != "" {
   192  		typ = personType
   193  	}
   194  
   195  	return typ, parseAndFormatPersonOrOrg(author)
   196  }
   197  
   198  var nameEmailURLPattern = regexp.MustCompile(`^(?P<name>[^<>()]*)( <(?P<email>[^@]+@\w+\.\w+)>)?( \((?P<url>.*)\))?$`)
   199  
   200  func parseAndFormatPersonOrOrg(s string) string {
   201  	name, email, _ := parseNameEmailURL(s)
   202  	return formatPersonOrOrg(name, email)
   203  }
   204  
   205  func parseNameEmailURL(s string) (name, email, url string) {
   206  	fields := internal.MatchNamedCaptureGroups(nameEmailURLPattern, s)
   207  	name = strings.TrimSpace(fields["name"])
   208  	email = strings.TrimSpace(fields["email"])
   209  	url = strings.TrimSpace(fields["url"])
   210  
   211  	if email == "" {
   212  		if approximatesAsEmail(url) {
   213  			email = url
   214  			url = ""
   215  		} else if approximatesAsEmail(name) {
   216  			email = name
   217  			name = ""
   218  		}
   219  	}
   220  	return name, email, url
   221  }
   222  
   223  func approximatesAsEmail(s string) bool {
   224  	atIndex := strings.Index(s, "@")
   225  	if atIndex == -1 {
   226  		return false
   227  	}
   228  	dotIndex := strings.Index(s[atIndex:], ".")
   229  	return dotIndex != -1
   230  }
   231  
   232  func formatPersonOrOrg(name, email string) string {
   233  	name = strings.TrimSpace(name)
   234  	email = strings.TrimSpace(email)
   235  
   236  	blankName := name == ""
   237  	blankEmail := email == ""
   238  
   239  	if !blankEmail && !blankName {
   240  		return fmt.Sprintf("%s (%s)", name, email)
   241  	}
   242  	if !blankName && blankEmail {
   243  		return name
   244  	}
   245  	if blankName && !blankEmail {
   246  		return email
   247  	}
   248  	return ""
   249  }