github.com/kastenhq/syft@v0.0.0-20230821225854-0710af25cdbe/syft/pkg/cataloger/common/cpe/java.go (about)

     1  package cpe
     2  
     3  import (
     4  	"strings"
     5  
     6  	"github.com/scylladb/go-set/strset"
     7  
     8  	"github.com/kastenhq/syft/internal"
     9  	"github.com/kastenhq/syft/syft/pkg"
    10  )
    11  
    12  var (
    13  	forbiddenProductGroupIDFields = strset.New("plugin", "plugins", "client")
    14  	forbiddenVendorGroupIDFields  = strset.New("plugin", "plugins")
    15  
    16  	domains = []string{
    17  		"com",
    18  		"org",
    19  		"net",
    20  		"io",
    21  		"be",
    22  	}
    23  
    24  	primaryJavaManifestGroupIDFields = []string{
    25  		"Extension-Name",
    26  		"Specification-Vendor",
    27  		"Implementation-Vendor",
    28  		"Bundle-SymbolicName",
    29  		"Implementation-Vendor-Id",
    30  		"Implementation-Title",
    31  		"Bundle-Activator",
    32  	}
    33  	secondaryJavaManifestGroupIDFields = []string{
    34  		"Automatic-Module-Name",
    35  		"Main-Class",
    36  		"Package",
    37  	}
    38  	javaManifestNameFields = []string{
    39  		"Specification-Vendor",
    40  		"Implementation-Vendor",
    41  	}
    42  )
    43  
    44  func candidateProductsForJava(p pkg.Package) []string {
    45  	return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p))
    46  }
    47  
    48  func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
    49  	gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p))
    50  	nameVendors := vendorsFromJavaManifestNames(p)
    51  	return newFieldCandidateSetFromSets(gidVendors, nameVendors)
    52  }
    53  
    54  func vendorsFromJavaManifestNames(p pkg.Package) fieldCandidateSet {
    55  	vendors := newFieldCandidateSet()
    56  
    57  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
    58  	if !ok {
    59  		return vendors
    60  	}
    61  
    62  	if metadata.Manifest == nil {
    63  		return vendors
    64  	}
    65  
    66  	for _, name := range javaManifestNameFields {
    67  		if metadata.Manifest.Main != nil {
    68  			if value, exists := metadata.Manifest.Main[name]; exists {
    69  				if !startsWithTopLevelDomain(value) {
    70  					vendors.add(fieldCandidate{
    71  						value:                 normalizePersonName(value),
    72  						disallowSubSelections: true,
    73  					})
    74  				}
    75  			}
    76  		}
    77  		if metadata.Manifest.NamedSections != nil {
    78  			for _, section := range metadata.Manifest.NamedSections {
    79  				if section == nil {
    80  					continue
    81  				}
    82  				if value, exists := section[name]; exists {
    83  					if !startsWithTopLevelDomain(value) {
    84  						vendors.add(fieldCandidate{
    85  							value:                 normalizePersonName(value),
    86  							disallowSubSelections: true,
    87  						})
    88  					}
    89  				}
    90  			}
    91  		}
    92  	}
    93  
    94  	return vendors
    95  }
    96  
    97  func vendorsFromGroupIDs(groupIDs []string) fieldCandidateSet {
    98  	vendors := newFieldCandidateSet()
    99  	for _, groupID := range groupIDs {
   100  		for i, field := range strings.Split(groupID, ".") {
   101  			field = strings.TrimSpace(field)
   102  
   103  			if len(field) == 0 {
   104  				continue
   105  			}
   106  
   107  			if forbiddenVendorGroupIDFields.Has(strings.ToLower(field)) {
   108  				continue
   109  			}
   110  
   111  			if i == 0 {
   112  				continue
   113  			}
   114  
   115  			vendors.addValue(field)
   116  		}
   117  	}
   118  
   119  	return vendors
   120  }
   121  
   122  func productsFromArtifactAndGroupIDs(artifactID string, groupIDs []string) []string {
   123  	products := strset.New()
   124  	if artifactID != "" {
   125  		products.Add(artifactID)
   126  	}
   127  
   128  	for _, groupID := range groupIDs {
   129  		isPlugin := strings.Contains(artifactID, "plugin") || strings.Contains(groupID, "plugin")
   130  
   131  		for i, field := range strings.Split(groupID, ".") {
   132  			field = strings.TrimSpace(field)
   133  
   134  			if len(field) == 0 {
   135  				continue
   136  			}
   137  
   138  			// don't add this field as a name if the name is implying the package is a plugin or client
   139  			if forbiddenProductGroupIDFields.Has(strings.ToLower(field)) {
   140  				continue
   141  			}
   142  
   143  			if i <= 1 {
   144  				continue
   145  			}
   146  
   147  			// umbrella projects tend to have sub components that either start or end with the project name. We expect
   148  			// to identify fields that may represent the umbrella project, and not fields that indicate auxiliary
   149  			// information about the package.
   150  			couldBeProjectName := strings.HasPrefix(artifactID, field) || strings.HasSuffix(artifactID, field)
   151  			if artifactID == "" || (couldBeProjectName && !isPlugin) {
   152  				products.Add(field)
   153  			}
   154  		}
   155  	}
   156  
   157  	return products.List()
   158  }
   159  
   160  func artifactIDFromJavaPackage(p pkg.Package) string {
   161  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
   162  	if !ok {
   163  		return ""
   164  	}
   165  
   166  	if metadata.PomProperties == nil {
   167  		return ""
   168  	}
   169  
   170  	artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID)
   171  	if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   172  		// there is a strong indication that the artifact ID is really a group ID, don't use it
   173  		return ""
   174  	}
   175  	return artifactID
   176  }
   177  
   178  func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
   179  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
   180  	if !ok {
   181  		return nil
   182  	}
   183  
   184  	return GroupIDsFromJavaMetadata(metadata)
   185  }
   186  
   187  func GroupIDsFromJavaMetadata(metadata pkg.JavaMetadata) (groupIDs []string) {
   188  	groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...)
   189  	groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...)
   190  	groupIDs = append(groupIDs, groupIDsFromJavaManifest(metadata.Manifest)...)
   191  
   192  	return groupIDs
   193  }
   194  
   195  func groupIDsFromPomProperties(properties *pkg.PomProperties) (groupIDs []string) {
   196  	if properties == nil {
   197  		return nil
   198  	}
   199  
   200  	if startsWithTopLevelDomain(properties.GroupID) {
   201  		groupIDs = append(groupIDs, cleanGroupID(properties.GroupID))
   202  	}
   203  
   204  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   205  	if startsWithTopLevelDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 {
   206  		// there is a strong indication that the artifact ID is really a group ID
   207  		groupIDs = append(groupIDs, cleanGroupID(properties.ArtifactID))
   208  	}
   209  
   210  	return groupIDs
   211  }
   212  
   213  func groupIDsFromPomProject(project *pkg.PomProject) (groupIDs []string) {
   214  	if project == nil {
   215  		return nil
   216  	}
   217  
   218  	// extract the project info...
   219  	groupIDs = addGroupIDsFromGroupIDsAndArtifactID(project.GroupID, project.ArtifactID)
   220  
   221  	if project.Parent == nil {
   222  		return groupIDs
   223  	}
   224  
   225  	// extract the parent project info...
   226  	groupIDs = append(groupIDs, addGroupIDsFromGroupIDsAndArtifactID(project.Parent.GroupID, project.Parent.ArtifactID)...)
   227  
   228  	return groupIDs
   229  }
   230  
   231  func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs []string) {
   232  	if startsWithTopLevelDomain(groupID) {
   233  		groupIDs = append(groupIDs, cleanGroupID(groupID))
   234  	}
   235  
   236  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   237  	if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   238  		// there is a strong indication that the artifact ID is really a group ID
   239  		groupIDs = append(groupIDs, cleanGroupID(artifactID))
   240  	}
   241  	return groupIDs
   242  }
   243  
   244  func groupIDsFromJavaManifest(manifest *pkg.JavaManifest) []string {
   245  	if manifest == nil {
   246  		return nil
   247  	}
   248  
   249  	// try the common manifest fields first for a set of candidates
   250  	groupIDs := getManifestFieldGroupIDs(manifest, primaryJavaManifestGroupIDFields)
   251  
   252  	if len(groupIDs) != 0 {
   253  		return groupIDs
   254  	}
   255  
   256  	// if we haven't found anything yet, let's try a last ditch effort:
   257  	// attempt to get group-id-like info from the MANIFEST.MF "Automatic-Module-Name" and "Extension-Name" field.
   258  	// for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2
   259  	// at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar
   260  	// as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra.
   261  	return getManifestFieldGroupIDs(manifest, secondaryJavaManifestGroupIDFields)
   262  }
   263  
   264  func getManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) {
   265  	if manifest == nil {
   266  		return nil
   267  	}
   268  
   269  	for _, name := range fields {
   270  		if value, exists := manifest.Main[name]; exists {
   271  			if startsWithTopLevelDomain(value) {
   272  				groupIDs = append(groupIDs, cleanGroupID(value))
   273  			}
   274  		}
   275  		for _, section := range manifest.NamedSections {
   276  			if value, exists := section[name]; exists {
   277  				if startsWithTopLevelDomain(value) {
   278  					groupIDs = append(groupIDs, cleanGroupID(value))
   279  				}
   280  			}
   281  		}
   282  	}
   283  
   284  	return groupIDs
   285  }
   286  
   287  func cleanGroupID(groupID string) string {
   288  	return strings.TrimSpace(removeOSCIDirectives(groupID))
   289  }
   290  
   291  func removeOSCIDirectives(groupID string) string {
   292  	// for example:
   293  	// 		org.bar;uses:=“org.foo”		-> 	org.bar
   294  	// more about OSGI directives see https://spring.io/blog/2008/10/20/understanding-the-osgi-uses-directive/
   295  	return strings.Split(groupID, ";")[0]
   296  }
   297  
   298  func startsWithTopLevelDomain(value string) bool {
   299  	return internal.HasAnyOfPrefixes(value, domains...)
   300  }