github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/internal/cpegenerate/java.go (about)

     1  package cpegenerate
     2  
     3  import (
     4  	"sort"
     5  	"strings"
     6  
     7  	"github.com/scylladb/go-set/strset"
     8  
     9  	"github.com/anchore/syft/internal"
    10  	"github.com/anchore/syft/syft/pkg"
    11  )
    12  
    13  var (
    14  	forbiddenProductGroupIDFields = strset.New("plugin", "plugins", "client")
    15  	forbiddenVendorGroupIDFields  = strset.New("plugin", "plugins")
    16  
    17  	domains = []string{
    18  		"com",
    19  		"org",
    20  		"net",
    21  		"io",
    22  		"be",
    23  	}
    24  
    25  	PrimaryJavaManifestGroupIDFields = []string{
    26  		"Group-Id",
    27  		"Bundle-SymbolicName",
    28  		"Extension-Name",
    29  		"Specification-Vendor",
    30  		"Implementation-Vendor",
    31  		"Implementation-Vendor-Id",
    32  		"Implementation-Title",
    33  		"Bundle-Activator",
    34  	}
    35  	SecondaryJavaManifestGroupIDFields = []string{
    36  		"Automatic-Module-Name",
    37  		"Main-Class",
    38  		"Package",
    39  	}
    40  	javaManifestNameFields = []string{
    41  		"Specification-Vendor",
    42  		"Implementation-Vendor",
    43  	}
    44  )
    45  
    46  func candidateProductsForJava(p pkg.Package) []string {
    47  	return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p))
    48  }
    49  
    50  func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
    51  	gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p))
    52  	nameVendors := vendorsFromJavaManifestNames(p)
    53  	return newFieldCandidateSetFromSets(gidVendors, nameVendors)
    54  }
    55  
    56  func vendorsFromJavaManifestNames(p pkg.Package) fieldCandidateSet {
    57  	vendors := newFieldCandidateSet()
    58  
    59  	metadata, ok := p.Metadata.(pkg.JavaArchive)
    60  	if !ok {
    61  		return vendors
    62  	}
    63  
    64  	if metadata.Manifest == nil {
    65  		return vendors
    66  	}
    67  
    68  	for _, name := range javaManifestNameFields {
    69  		if metadata.Manifest.Main != nil {
    70  			if value, exists := metadata.Manifest.Main.Get(name); exists {
    71  				if !startsWithTopLevelDomain(value) {
    72  					vendors.add(fieldCandidate{
    73  						value:                 normalizePersonName(value),
    74  						disallowSubSelections: true,
    75  					})
    76  				}
    77  			}
    78  		}
    79  		if metadata.Manifest.Sections != nil {
    80  			for _, section := range metadata.Manifest.Sections {
    81  				if section == nil {
    82  					continue
    83  				}
    84  				if value, exists := section.Get(name); exists {
    85  					if !startsWithTopLevelDomain(value) {
    86  						vendors.add(fieldCandidate{
    87  							value:                 normalizePersonName(value),
    88  							disallowSubSelections: true,
    89  						})
    90  					}
    91  				}
    92  			}
    93  		}
    94  	}
    95  
    96  	return vendors
    97  }
    98  
    99  func vendorsFromGroupIDs(groupIDs []string) fieldCandidateSet {
   100  	vendors := newFieldCandidateSet()
   101  	for _, groupID := range groupIDs {
   102  		// always include the groupId as a vendor -- the Grype database may include alternate matches with these
   103  		vendors.add(fieldCandidate{
   104  			value:                       groupID,
   105  			disallowSubSelections:       true,
   106  			disallowDelimiterVariations: true,
   107  		})
   108  
   109  		for i, field := range strings.Split(groupID, ".") {
   110  			field = strings.TrimSpace(field)
   111  
   112  			if len(field) == 0 {
   113  				continue
   114  			}
   115  
   116  			if forbiddenVendorGroupIDFields.Has(strings.ToLower(field)) {
   117  				continue
   118  			}
   119  
   120  			if i == 0 {
   121  				continue
   122  			}
   123  
   124  			vendors.addValue(field)
   125  		}
   126  	}
   127  
   128  	return vendors
   129  }
   130  
   131  func productsFromArtifactAndGroupIDs(artifactID string, groupIDs []string) []string {
   132  	products := strset.New()
   133  	if artifactID != "" {
   134  		products.Add(artifactID)
   135  	}
   136  
   137  	for _, groupID := range groupIDs {
   138  		isPlugin := strings.Contains(artifactID, "plugin") || strings.Contains(groupID, "plugin")
   139  
   140  		for i, field := range strings.Split(groupID, ".") {
   141  			field = strings.TrimSpace(field)
   142  
   143  			if len(field) == 0 {
   144  				continue
   145  			}
   146  
   147  			// don't add this field as a name if the name is implying the package is a plugin or client
   148  			if forbiddenProductGroupIDFields.Has(strings.ToLower(field)) {
   149  				continue
   150  			}
   151  
   152  			if i <= 1 {
   153  				continue
   154  			}
   155  
   156  			// umbrella projects tend to have sub components that either start or end with the project name. We expect
   157  			// to identify fields that may represent the umbrella project, and not fields that indicate auxiliary
   158  			// information about the package.
   159  			couldBeProjectName := strings.HasPrefix(artifactID, field) || strings.HasSuffix(artifactID, field)
   160  			if artifactID == "" || (couldBeProjectName && !isPlugin) {
   161  				products.Add(field)
   162  			}
   163  		}
   164  	}
   165  
   166  	return products.List()
   167  }
   168  
   169  func artifactIDFromJavaPackage(p pkg.Package) string {
   170  	metadata, ok := p.Metadata.(pkg.JavaArchive)
   171  	if !ok {
   172  		return ""
   173  	}
   174  
   175  	if metadata.PomProperties == nil {
   176  		return ""
   177  	}
   178  
   179  	artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID)
   180  	if looksLikeGroupID(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   181  		// there is a strong indication that the artifact ID is really a group ID, don't use it
   182  		return ""
   183  	}
   184  	return artifactID
   185  }
   186  
   187  func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
   188  	metadata, ok := p.Metadata.(pkg.JavaArchive)
   189  	if !ok {
   190  		return nil
   191  	}
   192  
   193  	return GroupIDsFromJavaMetadata(p.Name, metadata)
   194  }
   195  
   196  // GroupIDsFromJavaMetadata returns the possible group IDs for a Java package
   197  // This function is similar to GroupIDFromJavaPackage, but returns all possible group IDs and is less strict
   198  // It is used as a way to generate possible candidates for CPE matching.
   199  func GroupIDsFromJavaMetadata(pkgName string, metadata pkg.JavaArchive) (groupIDs []string) {
   200  	groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...)
   201  	groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...)
   202  	groupIDs = append(groupIDs, groupIDsFromJavaManifest(pkgName, metadata.Manifest)...)
   203  
   204  	return groupIDs
   205  }
   206  
   207  func groupIDsFromPomProperties(properties *pkg.JavaPomProperties) (groupIDs []string) {
   208  	if properties == nil {
   209  		return nil
   210  	}
   211  
   212  	if startsWithTopLevelDomain(properties.GroupID) {
   213  		groupIDs = append(groupIDs, cleanGroupID(properties.GroupID))
   214  	}
   215  
   216  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   217  	if startsWithTopLevelDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 {
   218  		// there is a strong indication that the artifact ID is really a group ID
   219  		groupIDs = append(groupIDs, cleanGroupID(properties.ArtifactID))
   220  	}
   221  
   222  	return groupIDs
   223  }
   224  
   225  func groupIDsFromPomProject(project *pkg.JavaPomProject) (groupIDs []string) {
   226  	if project == nil {
   227  		return nil
   228  	}
   229  
   230  	// extract the project info...
   231  	groupIDs = addGroupIDsFromGroupIDsAndArtifactID(project.GroupID, project.ArtifactID)
   232  
   233  	if project.Parent == nil {
   234  		return groupIDs
   235  	}
   236  
   237  	// extract the parent project info...
   238  	groupIDs = append(groupIDs, addGroupIDsFromGroupIDsAndArtifactID(project.Parent.GroupID, project.Parent.ArtifactID)...)
   239  
   240  	return groupIDs
   241  }
   242  
   243  func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs []string) {
   244  	if startsWithTopLevelDomain(groupID) {
   245  		groupIDs = append(groupIDs, cleanGroupID(groupID))
   246  	}
   247  
   248  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   249  	if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   250  		// there is a strong indication that the artifact ID is really a group ID
   251  		groupIDs = append(groupIDs, cleanGroupID(artifactID))
   252  	}
   253  	return groupIDs
   254  }
   255  
   256  func groupIDsFromJavaManifest(pkgName string, manifest *pkg.JavaManifest) []string {
   257  	if groupID, ok := DefaultArtifactIDToGroupID[pkgName]; ok {
   258  		return []string{groupID}
   259  	}
   260  
   261  	if manifest == nil {
   262  		return nil
   263  	}
   264  
   265  	// try the common manifest fields first for a set of candidates
   266  	groupIDs := GetManifestFieldGroupIDs(manifest, PrimaryJavaManifestGroupIDFields)
   267  
   268  	if len(groupIDs) != 0 {
   269  		return groupIDs
   270  	}
   271  
   272  	// if we haven't found anything yet, let's try a last ditch effort:
   273  	// attempt to get group-id-like info from the MANIFEST.MF "Automatic-Module-Name" and "Extension-Name" field.
   274  	// for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2
   275  	// at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar
   276  	// as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra.
   277  	return GetManifestFieldGroupIDs(manifest, SecondaryJavaManifestGroupIDFields)
   278  }
   279  
   280  func GetManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) {
   281  	if manifest == nil {
   282  		return nil
   283  	}
   284  
   285  	for _, name := range fields {
   286  		if value, exists := manifest.Main.Get(name); exists {
   287  			if startsWithTopLevelDomain(value) {
   288  				groupIDs = append(groupIDs, cleanGroupID(value))
   289  			}
   290  		}
   291  		for _, section := range manifest.Sections {
   292  			if value, exists := section.Get(name); exists {
   293  				if startsWithTopLevelDomain(value) {
   294  					groupIDs = append(groupIDs, cleanGroupID(value))
   295  				}
   296  			}
   297  		}
   298  	}
   299  	sort.Strings(groupIDs)
   300  
   301  	return groupIDs
   302  }
   303  
   304  func cleanGroupID(groupID string) string {
   305  	return strings.TrimSpace(strings.Split(removeOSCIDirectives(groupID), "#")[0])
   306  }
   307  
   308  func removeOSCIDirectives(groupID string) string {
   309  	// for example:
   310  	// 		org.bar;uses:=“org.foo”		-> 	org.bar
   311  	// more about OSGI directives see https://spring.io/blog/2008/10/20/understanding-the-osgi-uses-directive/
   312  	return strings.Split(groupID, ";")[0]
   313  }
   314  
   315  func startsWithTopLevelDomain(value string) bool {
   316  	return internal.HasAnyOfPrefixes(value, domains...)
   317  }
   318  
   319  func looksLikeGroupID(value string) bool {
   320  	return strings.Contains(value, ".")
   321  }