github.com/nextlinux/gosbom@v0.81.1-0.20230627115839-1ff50c281391/gosbom/pkg/cataloger/common/cpe/java.go (about)

     1  package cpe
     2  
     3  import (
     4  	"strings"
     5  
     6  	"github.com/nextlinux/gosbom/gosbom/pkg"
     7  	"github.com/nextlinux/gosbom/internal"
     8  	"github.com/scylladb/go-set/strset"
     9  )
    10  
    11  var (
    12  	forbiddenProductGroupIDFields = strset.New("plugin", "plugins", "client")
    13  	forbiddenVendorGroupIDFields  = strset.New("plugin", "plugins")
    14  
    15  	domains = []string{
    16  		"com",
    17  		"org",
    18  		"net",
    19  		"io",
    20  		"be",
    21  	}
    22  
    23  	primaryJavaManifestGroupIDFields = []string{
    24  		"Extension-Name",
    25  		"Specification-Vendor",
    26  		"Implementation-Vendor",
    27  		"Bundle-SymbolicName",
    28  		"Implementation-Vendor-Id",
    29  		"Implementation-Title",
    30  		"Bundle-Activator",
    31  	}
    32  	secondaryJavaManifestGroupIDFields = []string{
    33  		"Automatic-Module-Name",
    34  		"Main-Class",
    35  		"Package",
    36  	}
    37  	javaManifestNameFields = []string{
    38  		"Specification-Vendor",
    39  		"Implementation-Vendor",
    40  	}
    41  )
    42  
    43  func candidateProductsForJava(p pkg.Package) []string {
    44  	return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p))
    45  }
    46  
    47  func candidateVendorsForJava(p pkg.Package) fieldCandidateSet {
    48  	gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p))
    49  	nameVendors := vendorsFromJavaManifestNames(p)
    50  	return newFieldCandidateSetFromSets(gidVendors, nameVendors)
    51  }
    52  
    53  func vendorsFromJavaManifestNames(p pkg.Package) fieldCandidateSet {
    54  	vendors := newFieldCandidateSet()
    55  
    56  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
    57  	if !ok {
    58  		return vendors
    59  	}
    60  
    61  	if metadata.Manifest == nil {
    62  		return vendors
    63  	}
    64  
    65  	for _, name := range javaManifestNameFields {
    66  		if metadata.Manifest.Main != nil {
    67  			if value, exists := metadata.Manifest.Main[name]; exists {
    68  				if !startsWithTopLevelDomain(value) {
    69  					vendors.add(fieldCandidate{
    70  						value:                 normalizePersonName(value),
    71  						disallowSubSelections: true,
    72  					})
    73  				}
    74  			}
    75  		}
    76  		if metadata.Manifest.NamedSections != nil {
    77  			for _, section := range metadata.Manifest.NamedSections {
    78  				if section == nil {
    79  					continue
    80  				}
    81  				if value, exists := section[name]; exists {
    82  					if !startsWithTopLevelDomain(value) {
    83  						vendors.add(fieldCandidate{
    84  							value:                 normalizePersonName(value),
    85  							disallowSubSelections: true,
    86  						})
    87  					}
    88  				}
    89  			}
    90  		}
    91  	}
    92  
    93  	return vendors
    94  }
    95  
    96  func vendorsFromGroupIDs(groupIDs []string) fieldCandidateSet {
    97  	vendors := newFieldCandidateSet()
    98  	for _, groupID := range groupIDs {
    99  		for i, field := range strings.Split(groupID, ".") {
   100  			field = strings.TrimSpace(field)
   101  
   102  			if len(field) == 0 {
   103  				continue
   104  			}
   105  
   106  			if forbiddenVendorGroupIDFields.Has(strings.ToLower(field)) {
   107  				continue
   108  			}
   109  
   110  			if i == 0 {
   111  				continue
   112  			}
   113  
   114  			vendors.addValue(field)
   115  		}
   116  	}
   117  
   118  	return vendors
   119  }
   120  
   121  func productsFromArtifactAndGroupIDs(artifactID string, groupIDs []string) []string {
   122  	products := strset.New()
   123  	if artifactID != "" {
   124  		products.Add(artifactID)
   125  	}
   126  
   127  	for _, groupID := range groupIDs {
   128  		isPlugin := strings.Contains(artifactID, "plugin") || strings.Contains(groupID, "plugin")
   129  
   130  		for i, field := range strings.Split(groupID, ".") {
   131  			field = strings.TrimSpace(field)
   132  
   133  			if len(field) == 0 {
   134  				continue
   135  			}
   136  
   137  			// don't add this field as a name if the name is implying the package is a plugin or client
   138  			if forbiddenProductGroupIDFields.Has(strings.ToLower(field)) {
   139  				continue
   140  			}
   141  
   142  			if i <= 1 {
   143  				continue
   144  			}
   145  
   146  			// umbrella projects tend to have sub components that either start or end with the project name. We expect
   147  			// to identify fields that may represent the umbrella project, and not fields that indicate auxiliary
   148  			// information about the package.
   149  			couldBeProjectName := strings.HasPrefix(artifactID, field) || strings.HasSuffix(artifactID, field)
   150  			if artifactID == "" || (couldBeProjectName && !isPlugin) {
   151  				products.Add(field)
   152  			}
   153  		}
   154  	}
   155  
   156  	return products.List()
   157  }
   158  
   159  func artifactIDFromJavaPackage(p pkg.Package) string {
   160  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
   161  	if !ok {
   162  		return ""
   163  	}
   164  
   165  	if metadata.PomProperties == nil {
   166  		return ""
   167  	}
   168  
   169  	artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID)
   170  	if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   171  		// there is a strong indication that the artifact ID is really a group ID, don't use it
   172  		return ""
   173  	}
   174  	return artifactID
   175  }
   176  
   177  func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) {
   178  	metadata, ok := p.Metadata.(pkg.JavaMetadata)
   179  	if !ok {
   180  		return nil
   181  	}
   182  
   183  	return GroupIDsFromJavaMetadata(metadata)
   184  }
   185  
   186  func GroupIDsFromJavaMetadata(metadata pkg.JavaMetadata) (groupIDs []string) {
   187  	groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...)
   188  	groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...)
   189  	groupIDs = append(groupIDs, groupIDsFromJavaManifest(metadata.Manifest)...)
   190  
   191  	return groupIDs
   192  }
   193  
   194  func groupIDsFromPomProperties(properties *pkg.PomProperties) (groupIDs []string) {
   195  	if properties == nil {
   196  		return nil
   197  	}
   198  
   199  	if startsWithTopLevelDomain(properties.GroupID) {
   200  		groupIDs = append(groupIDs, cleanGroupID(properties.GroupID))
   201  	}
   202  
   203  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   204  	if startsWithTopLevelDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 {
   205  		// there is a strong indication that the artifact ID is really a group ID
   206  		groupIDs = append(groupIDs, cleanGroupID(properties.ArtifactID))
   207  	}
   208  
   209  	return groupIDs
   210  }
   211  
   212  func groupIDsFromPomProject(project *pkg.PomProject) (groupIDs []string) {
   213  	if project == nil {
   214  		return nil
   215  	}
   216  
   217  	// extract the project info...
   218  	groupIDs = addGroupIDsFromGroupIDsAndArtifactID(project.GroupID, project.ArtifactID)
   219  
   220  	if project.Parent == nil {
   221  		return groupIDs
   222  	}
   223  
   224  	// extract the parent project info...
   225  	groupIDs = append(groupIDs, addGroupIDsFromGroupIDsAndArtifactID(project.Parent.GroupID, project.Parent.ArtifactID)...)
   226  
   227  	return groupIDs
   228  }
   229  
   230  func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs []string) {
   231  	if startsWithTopLevelDomain(groupID) {
   232  		groupIDs = append(groupIDs, cleanGroupID(groupID))
   233  	}
   234  
   235  	// sometimes the publisher puts the group ID in the artifact ID field unintentionally
   236  	if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 {
   237  		// there is a strong indication that the artifact ID is really a group ID
   238  		groupIDs = append(groupIDs, cleanGroupID(artifactID))
   239  	}
   240  	return groupIDs
   241  }
   242  
   243  func groupIDsFromJavaManifest(manifest *pkg.JavaManifest) []string {
   244  	if manifest == nil {
   245  		return nil
   246  	}
   247  
   248  	// try the common manifest fields first for a set of candidates
   249  	groupIDs := getManifestFieldGroupIDs(manifest, primaryJavaManifestGroupIDFields)
   250  
   251  	if len(groupIDs) != 0 {
   252  		return groupIDs
   253  	}
   254  
   255  	// if we haven't found anything yet, let's try a last ditch effort:
   256  	// attempt to get group-id-like info from the MANIFEST.MF "Automatic-Module-Name" and "Extension-Name" field.
   257  	// for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2
   258  	// at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar
   259  	// as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra.
   260  	return getManifestFieldGroupIDs(manifest, secondaryJavaManifestGroupIDFields)
   261  }
   262  
   263  func getManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) {
   264  	if manifest == nil {
   265  		return nil
   266  	}
   267  
   268  	for _, name := range fields {
   269  		if value, exists := manifest.Main[name]; exists {
   270  			if startsWithTopLevelDomain(value) {
   271  				groupIDs = append(groupIDs, cleanGroupID(value))
   272  			}
   273  		}
   274  		for _, section := range manifest.NamedSections {
   275  			if value, exists := section[name]; exists {
   276  				if startsWithTopLevelDomain(value) {
   277  					groupIDs = append(groupIDs, cleanGroupID(value))
   278  				}
   279  			}
   280  		}
   281  	}
   282  
   283  	return groupIDs
   284  }
   285  
   286  func cleanGroupID(groupID string) string {
   287  	return strings.TrimSpace(removeOSCIDirectives(groupID))
   288  }
   289  
   290  func removeOSCIDirectives(groupID string) string {
   291  	// for example:
   292  	// 		org.bar;uses:=“org.foo”		-> 	org.bar
   293  	// more about OSGI directives see https://spring.io/blog/2008/10/20/understanding-the-osgi-uses-directive/
   294  	return strings.Split(groupID, ";")[0]
   295  }
   296  
   297  func startsWithTopLevelDomain(value string) bool {
   298  	return internal.HasAnyOfPrefixes(value, domains...)
   299  }