github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/common/cpe/java.go (about)

     1  package cpe
     2  
     3  import (
     4  	"strings"
     5  
     6  	"github.com/scylladb/go-set/strset"
     7  
     8  	"github.com/anchore/syft/internal"
     9  	"github.com/anchore/syft/syft/pkg"
    10  )
    11  
    12  var (
    13  	forbiddenProductGroupIDFields = strset.New("plugin", "plugins", "client")
    14  	forbiddenVendorGroupIDFields  = strset.New("plugin", "plugins")
    15  
    16  	domains = []string{
    17  		"com",
    18  		"org",
    19  		"net",
    20  		"io",
    21  		"be",
    22  	}
    23  
    24  	PrimaryJavaManifestGroupIDFields = []string{
    25  		"Bundle-SymbolicName",
    26  		"Extension-Name",
    27  		"Specification-Vendor",
    28  		"Implementation-Vendor",
    29  		"Implementation-Vendor-Id",
    30  		"Implementation-Title",
    31  		"Bundle-Activator",
    32  	}
    33  	SecondaryJavaManifestGroupIDFields = []string{
    34  		"Automatic-Module-Name",
    35  		"Main-Class",
    36  		"Package",
    37  	}
    38  	javaManifestNameFields = []string{
    39  		"Specification-Vendor",
    40  		"Implementation-Vendor",
    41  	}
    42  )
    43  
    44  func candidateProductsForJava(p pkg.Package) []string {
    45  	return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p))
    46  }
    47  
    48  func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
    49  	gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p))
    50  	nameVendors := vendorsFromJavaManifestNames(p)
    51  	return newFieldCandidateSetFromSets(gidVendors, nameVendors)
    52  }
    53  
    54  func vendorsFromJavaManifestNames(p pkg.Package) fieldCandidateSet {
    55  	vendors := newFieldCandidateSet()
    56  
    57  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
    58  	if !ok {
    59  		return vendors
    60  	}
    61  
    62  	if metadata.Manifest == nil {
    63  		return vendors
    64  	}
    65  
    66  	for _, name := range javaManifestNameFields {
    67  		if metadata.Manifest.Main != nil {
    68  			if value, exists := metadata.Manifest.Main[name]; exists {
    69  				if !startsWithTopLevelDomain(value) {
    70  					vendors.add(fieldCandidate{
    71  						value:                 normalizePersonName(value),
    72  						disallowSubSelections: true,
    73  					})
    74  				}
    75  			}
    76  		}
    77  		if metadata.Manifest.NamedSections != nil {
    78  			for _, section := range metadata.Manifest.NamedSections {
    79  				if section == nil {
    80  					continue
    81  				}
    82  				if value, exists := section[name]; exists {
    83  					if !startsWithTopLevelDomain(value) {
    84  						vendors.add(fieldCandidate{
    85  							value:                 normalizePersonName(value),
    86  							disallowSubSelections: true,
    87  						})
    88  					}
    89  				}
    90  			}
    91  		}
    92  	}
    93  
    94  	return vendors
    95  }
    96  
    97  func vendorsFromGroupIDs(groupIDs []string) fieldCandidateSet {
    98  	vendors := newFieldCandidateSet()
    99  	for _, groupID := range groupIDs {
   100  		for i, field := range strings.Split(groupID, ".") {
   101  			field = strings.TrimSpace(field)
   102  
   103  			if len(field) == 0 {
   104  				continue
   105  			}
   106  
   107  			if forbiddenVendorGroupIDFields.Has(strings.ToLower(field)) {
   108  				continue
   109  			}
   110  
   111  			if i == 0 {
   112  				continue
   113  			}
   114  
   115  			vendors.addValue(field)
   116  		}
   117  	}
   118  
   119  	return vendors
   120  }
   121  
   122  func productsFromArtifactAndGroupIDs(artifactID string, groupIDs []string) []string {
   123  	products := strset.New()
   124  	if artifactID != "" {
   125  		products.Add(artifactID)
   126  	}
   127  
   128  	for _, groupID := range groupIDs {
   129  		isPlugin := strings.Contains(artifactID, "plugin") || strings.Contains(groupID, "plugin")
   130  
   131  		for i, field := range strings.Split(groupID, ".") {
   132  			field = strings.TrimSpace(field)
   133  
   134  			if len(field) == 0 {
   135  				continue
   136  			}
   137  
   138  			// don't add this field as a name if the name is implying the package is a plugin or client
   139  			if forbiddenProductGroupIDFields.Has(strings.ToLower(field)) {
   140  				continue
   141  			}
   142  
   143  			if i <= 1 {
   144  				continue
   145  			}
   146  
   147  			// umbrella projects tend to have sub components that either start or end with the project name. We expect
   148  			// to identify fields that may represent the umbrella project, and not fields that indicate auxiliary
   149  			// information about the package.
   150  			couldBeProjectName := strings.HasPrefix(artifactID, field) || strings.HasSuffix(artifactID, field)
   151  			if artifactID == "" || (couldBeProjectName && !isPlugin) {
   152  				products.Add(field)
   153  			}
   154  		}
   155  	}
   156  
   157  	return products.List()
   158  }
   159  
   160  func artifactIDFromJavaPackage(p pkg.Package) string {
   161  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
   162  	if !ok {
   163  		return ""
   164  	}
   165  
   166  	if metadata.PomProperties == nil {
   167  		return ""
   168  	}
   169  
   170  	artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID)
   171  	if looksLikeGroupID(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   172  		// there is a strong indication that the artifact ID is really a group ID, don't use it
   173  		return ""
   174  	}
   175  	return artifactID
   176  }
   177  
   178  func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
   179  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
   180  	if !ok {
   181  		return nil
   182  	}
   183  
   184  	return GroupIDsFromJavaMetadata(p.Name, metadata)
   185  }
   186  
   187  // GroupIDsFromJavaMetadata returns the possible group IDs for a Java package
   188  // This function is similar to GroupIDFromJavaPackage, but returns all possible group IDs and is less strict
   189  // It is used as a way to generate possible candidates for CPE matching.
   190  func GroupIDsFromJavaMetadata(pkgName string, metadata pkg.JavaMetadata) (groupIDs []string) {
   191  	groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...)
   192  	groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...)
   193  	groupIDs = append(groupIDs, groupIDsFromJavaManifest(pkgName, metadata.Manifest)...)
   194  
   195  	return groupIDs
   196  }
   197  
   198  func groupIDsFromPomProperties(properties *pkg.PomProperties) (groupIDs []string) {
   199  	if properties == nil {
   200  		return nil
   201  	}
   202  
   203  	if startsWithTopLevelDomain(properties.GroupID) {
   204  		groupIDs = append(groupIDs, cleanGroupID(properties.GroupID))
   205  	}
   206  
   207  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   208  	if startsWithTopLevelDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 {
   209  		// there is a strong indication that the artifact ID is really a group ID
   210  		groupIDs = append(groupIDs, cleanGroupID(properties.ArtifactID))
   211  	}
   212  
   213  	return groupIDs
   214  }
   215  
   216  func groupIDsFromPomProject(project *pkg.PomProject) (groupIDs []string) {
   217  	if project == nil {
   218  		return nil
   219  	}
   220  
   221  	// extract the project info...
   222  	groupIDs = addGroupIDsFromGroupIDsAndArtifactID(project.GroupID, project.ArtifactID)
   223  
   224  	if project.Parent == nil {
   225  		return groupIDs
   226  	}
   227  
   228  	// extract the parent project info...
   229  	groupIDs = append(groupIDs, addGroupIDsFromGroupIDsAndArtifactID(project.Parent.GroupID, project.Parent.ArtifactID)...)
   230  
   231  	return groupIDs
   232  }
   233  
   234  func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs []string) {
   235  	if startsWithTopLevelDomain(groupID) {
   236  		groupIDs = append(groupIDs, cleanGroupID(groupID))
   237  	}
   238  
   239  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   240  	if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   241  		// there is a strong indication that the artifact ID is really a group ID
   242  		groupIDs = append(groupIDs, cleanGroupID(artifactID))
   243  	}
   244  	return groupIDs
   245  }
   246  
   247  func groupIDsFromJavaManifest(pkgName string, manifest *pkg.JavaManifest) []string {
   248  	if groupID, ok := DefaultArtifactIDToGroupID[pkgName]; ok {
   249  		return []string{groupID}
   250  	}
   251  
   252  	if manifest == nil {
   253  		return nil
   254  	}
   255  
   256  	// try the common manifest fields first for a set of candidates
   257  	groupIDs := GetManifestFieldGroupIDs(manifest, PrimaryJavaManifestGroupIDFields)
   258  
   259  	if len(groupIDs) != 0 {
   260  		return groupIDs
   261  	}
   262  
   263  	// if we haven't found anything yet, let's try a last ditch effort:
   264  	// attempt to get group-id-like info from the MANIFEST.MF "Automatic-Module-Name" and "Extension-Name" field.
   265  	// for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2
   266  	// at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar
   267  	// as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra.
   268  	return GetManifestFieldGroupIDs(manifest, SecondaryJavaManifestGroupIDFields)
   269  }
   270  
   271  func GetManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) {
   272  	if manifest == nil {
   273  		return nil
   274  	}
   275  
   276  	for _, name := range fields {
   277  		if value, exists := manifest.Main[name]; exists {
   278  			if startsWithTopLevelDomain(value) {
   279  				groupIDs = append(groupIDs, cleanGroupID(value))
   280  			}
   281  		}
   282  		for _, section := range manifest.NamedSections {
   283  			if value, exists := section[name]; exists {
   284  				if startsWithTopLevelDomain(value) {
   285  					groupIDs = append(groupIDs, cleanGroupID(value))
   286  				}
   287  			}
   288  		}
   289  	}
   290  
   291  	return groupIDs
   292  }
   293  
   294  func cleanGroupID(groupID string) string {
   295  	return strings.TrimSpace(removeOSCIDirectives(groupID))
   296  }
   297  
   298  func removeOSCIDirectives(groupID string) string {
   299  	// for example:
   300  	// 		org.bar;uses:=“org.foo”		-> 	org.bar
   301  	// more about OSGI directives see https://spring.io/blog/2008/10/20/understanding-the-osgi-uses-directive/
   302  	return strings.Split(groupID, ";")[0]
   303  }
   304  
   305  func startsWithTopLevelDomain(value string) bool {
   306  	return internal.HasAnyOfPrefixes(value, domains...)
   307  }
   308  
   309  func looksLikeGroupID(value string) bool {
   310  	return strings.Contains(value, ".")
   311  }