github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/common/cpe/java.go (about)

     1  package cpe
     2  
     3  import (
     4  	"sort"
     5  	"strings"
     6  
     7  	"github.com/scylladb/go-set/strset"
     8  
     9  	"github.com/anchore/syft/syft/pkg"
    10  	"github.com/lineaje-labs/syft/internal"
    11  )
    12  
    13  var (
    14  	forbiddenProductGroupIDFields = strset.New("plugin", "plugins", "client")
    15  	forbiddenVendorGroupIDFields  = strset.New("plugin", "plugins")
    16  
    17  	domains = []string{
    18  		"com",
    19  		"org",
    20  		"net",
    21  		"io",
    22  		"be",
    23  	}
    24  
    25  	PrimaryJavaManifestGroupIDFields = []string{
    26  		"Bundle-SymbolicName",
    27  		"Extension-Name",
    28  		"Specification-Vendor",
    29  		"Implementation-Vendor",
    30  		"Implementation-Vendor-Id",
    31  		"Implementation-Title",
    32  		"Bundle-Activator",
    33  	}
    34  	SecondaryJavaManifestGroupIDFields = []string{
    35  		"Automatic-Module-Name",
    36  		"Main-Class",
    37  		"Package",
    38  	}
    39  	javaManifestNameFields = []string{
    40  		"Specification-Vendor",
    41  		"Implementation-Vendor",
    42  	}
    43  )
    44  
    45  func candidateProductsForJava(p pkg.Package) []string {
    46  	return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p))
    47  }
    48  
    49  func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
    50  	gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p))
    51  	nameVendors := vendorsFromJavaManifestNames(p)
    52  	return newFieldCandidateSetFromSets(gidVendors, nameVendors)
    53  }
    54  
    55  func vendorsFromJavaManifestNames(p pkg.Package) fieldCandidateSet {
    56  	vendors := newFieldCandidateSet()
    57  
    58  	metadata, ok := p.Metadata.(pkg.JavaArchive)
    59  	if !ok {
    60  		return vendors
    61  	}
    62  
    63  	if metadata.Manifest == nil {
    64  		return vendors
    65  	}
    66  
    67  	for _, name := range javaManifestNameFields {
    68  		if metadata.Manifest.Main != nil {
    69  			if value, exists := metadata.Manifest.Main[name]; exists {
    70  				if !startsWithTopLevelDomain(value) {
    71  					vendors.add(fieldCandidate{
    72  						value:                 normalizePersonName(value),
    73  						disallowSubSelections: true,
    74  					})
    75  				}
    76  			}
    77  		}
    78  		if metadata.Manifest.NamedSections != nil {
    79  			for _, section := range metadata.Manifest.NamedSections {
    80  				if section == nil {
    81  					continue
    82  				}
    83  				if value, exists := section[name]; exists {
    84  					if !startsWithTopLevelDomain(value) {
    85  						vendors.add(fieldCandidate{
    86  							value:                 normalizePersonName(value),
    87  							disallowSubSelections: true,
    88  						})
    89  					}
    90  				}
    91  			}
    92  		}
    93  	}
    94  
    95  	return vendors
    96  }
    97  
    98  func vendorsFromGroupIDs(groupIDs []string) fieldCandidateSet {
    99  	vendors := newFieldCandidateSet()
   100  	for _, groupID := range groupIDs {
   101  		for i, field := range strings.Split(groupID, ".") {
   102  			field = strings.TrimSpace(field)
   103  
   104  			if len(field) == 0 {
   105  				continue
   106  			}
   107  
   108  			if forbiddenVendorGroupIDFields.Has(strings.ToLower(field)) {
   109  				continue
   110  			}
   111  
   112  			if i == 0 {
   113  				continue
   114  			}
   115  
   116  			vendors.addValue(field)
   117  		}
   118  	}
   119  
   120  	return vendors
   121  }
   122  
   123  func productsFromArtifactAndGroupIDs(artifactID string, groupIDs []string) []string {
   124  	products := strset.New()
   125  	if artifactID != "" {
   126  		products.Add(artifactID)
   127  	}
   128  
   129  	for _, groupID := range groupIDs {
   130  		isPlugin := strings.Contains(artifactID, "plugin") || strings.Contains(groupID, "plugin")
   131  
   132  		for i, field := range strings.Split(groupID, ".") {
   133  			field = strings.TrimSpace(field)
   134  
   135  			if len(field) == 0 {
   136  				continue
   137  			}
   138  
   139  			// don't add this field as a name if the name is implying the package is a plugin or client
   140  			if forbiddenProductGroupIDFields.Has(strings.ToLower(field)) {
   141  				continue
   142  			}
   143  
   144  			if i <= 1 {
   145  				continue
   146  			}
   147  
   148  			// umbrella projects tend to have sub components that either start or end with the project name. We expect
   149  			// to identify fields that may represent the umbrella project, and not fields that indicate auxiliary
   150  			// information about the package.
   151  			couldBeProjectName := strings.HasPrefix(artifactID, field) || strings.HasSuffix(artifactID, field)
   152  			if artifactID == "" || (couldBeProjectName && !isPlugin) {
   153  				products.Add(field)
   154  			}
   155  		}
   156  	}
   157  
   158  	return products.List()
   159  }
   160  
   161  func artifactIDFromJavaPackage(p pkg.Package) string {
   162  	metadata, ok := p.Metadata.(pkg.JavaArchive)
   163  	if !ok {
   164  		return ""
   165  	}
   166  
   167  	if metadata.PomProperties == nil {
   168  		return ""
   169  	}
   170  
   171  	artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID)
   172  	if looksLikeGroupID(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   173  		// there is a strong indication that the artifact ID is really a group ID, don't use it
   174  		return ""
   175  	}
   176  	return artifactID
   177  }
   178  
   179  func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
   180  	metadata, ok := p.Metadata.(pkg.JavaArchive)
   181  	if !ok {
   182  		return nil
   183  	}
   184  
   185  	return GroupIDsFromJavaMetadata(p.Name, metadata)
   186  }
   187  
   188  // GroupIDsFromJavaMetadata returns the possible group IDs for a Java package
   189  // This function is similar to GroupIDFromJavaPackage, but returns all possible group IDs and is less strict
   190  // It is used as a way to generate possible candidates for CPE matching.
   191  func GroupIDsFromJavaMetadata(pkgName string, metadata pkg.JavaArchive) (groupIDs []string) {
   192  	groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...)
   193  	groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...)
   194  	groupIDs = append(groupIDs, groupIDsFromJavaManifest(pkgName, metadata.Manifest)...)
   195  
   196  	return groupIDs
   197  }
   198  
   199  func groupIDsFromPomProperties(properties *pkg.JavaPomProperties) (groupIDs []string) {
   200  	if properties == nil {
   201  		return nil
   202  	}
   203  
   204  	if startsWithTopLevelDomain(properties.GroupID) {
   205  		groupIDs = append(groupIDs, cleanGroupID(properties.GroupID))
   206  	}
   207  
   208  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   209  	if startsWithTopLevelDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 {
   210  		// there is a strong indication that the artifact ID is really a group ID
   211  		groupIDs = append(groupIDs, cleanGroupID(properties.ArtifactID))
   212  	}
   213  
   214  	return groupIDs
   215  }
   216  
   217  func groupIDsFromPomProject(project *pkg.JavaPomProject) (groupIDs []string) {
   218  	if project == nil {
   219  		return nil
   220  	}
   221  
   222  	// extract the project info...
   223  	groupIDs = addGroupIDsFromGroupIDsAndArtifactID(project.GroupID, project.ArtifactID)
   224  
   225  	if project.Parent == nil {
   226  		return groupIDs
   227  	}
   228  
   229  	// extract the parent project info...
   230  	groupIDs = append(groupIDs, addGroupIDsFromGroupIDsAndArtifactID(project.Parent.GroupID, project.Parent.ArtifactID)...)
   231  
   232  	return groupIDs
   233  }
   234  
   235  func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs []string) {
   236  	if startsWithTopLevelDomain(groupID) {
   237  		groupIDs = append(groupIDs, cleanGroupID(groupID))
   238  	}
   239  
   240  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   241  	if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   242  		// there is a strong indication that the artifact ID is really a group ID
   243  		groupIDs = append(groupIDs, cleanGroupID(artifactID))
   244  	}
   245  	return groupIDs
   246  }
   247  
   248  func groupIDsFromJavaManifest(pkgName string, manifest *pkg.JavaManifest) []string {
   249  	if groupID, ok := DefaultArtifactIDToGroupID[pkgName]; ok {
   250  		return []string{groupID}
   251  	}
   252  
   253  	if manifest == nil {
   254  		return nil
   255  	}
   256  
   257  	// try the common manifest fields first for a set of candidates
   258  	groupIDs := GetManifestFieldGroupIDs(manifest, PrimaryJavaManifestGroupIDFields)
   259  
   260  	if len(groupIDs) != 0 {
   261  		return groupIDs
   262  	}
   263  
   264  	// if we haven't found anything yet, let's try a last ditch effort:
   265  	// attempt to get group-id-like info from the MANIFEST.MF "Automatic-Module-Name" and "Extension-Name" field.
   266  	// for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2
   267  	// at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar
   268  	// as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra.
   269  	return GetManifestFieldGroupIDs(manifest, SecondaryJavaManifestGroupIDFields)
   270  }
   271  
   272  func GetManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) {
   273  	if manifest == nil {
   274  		return nil
   275  	}
   276  
   277  	for _, name := range fields {
   278  		if value, exists := manifest.Main[name]; exists {
   279  			if startsWithTopLevelDomain(value) {
   280  				groupIDs = append(groupIDs, cleanGroupID(value))
   281  			}
   282  		}
   283  		for _, section := range manifest.NamedSections {
   284  			if value, exists := section[name]; exists {
   285  				if startsWithTopLevelDomain(value) {
   286  					groupIDs = append(groupIDs, cleanGroupID(value))
   287  				}
   288  			}
   289  		}
   290  	}
   291  	sort.Strings(groupIDs)
   292  
   293  	return groupIDs
   294  }
   295  
   296  func cleanGroupID(groupID string) string {
   297  	return strings.TrimSpace(removeOSCIDirectives(groupID))
   298  }
   299  
   300  func removeOSCIDirectives(groupID string) string {
   301  	// for example:
   302  	// 		org.bar;uses:=“org.foo”		-> 	org.bar
   303  	// more about OSGI directives see https://spring.io/blog/2008/10/20/understanding-the-osgi-uses-directive/
   304  	return strings.Split(groupID, ";")[0]
   305  }
   306  
   307  func startsWithTopLevelDomain(value string) bool {
   308  	return internal.HasAnyOfPrefixes(value, domains...)
   309  }
   310  
   311  func looksLikeGroupID(value string) bool {
   312  	return strings.Contains(value, ".")
   313  }