github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/common/cpe/java.go (about) 1 package cpe 2 3 import ( 4 "sort" 5 "strings" 6 7 "github.com/scylladb/go-set/strset" 8 9 "github.com/anchore/syft/syft/pkg" 10 "github.com/lineaje-labs/syft/internal" 11 ) 12 13 var ( 14 forbiddenProductGroupIDFields = strset.New("plugin", "plugins", "client") 15 forbiddenVendorGroupIDFields = strset.New("plugin", "plugins") 16 17 domains = []string{ 18 "com", 19 "org", 20 "net", 21 "io", 22 "be", 23 } 24 25 PrimaryJavaManifestGroupIDFields = []string{ 26 "Bundle-SymbolicName", 27 "Extension-Name", 28 "Specification-Vendor", 29 "Implementation-Vendor", 30 "Implementation-Vendor-Id", 31 "Implementation-Title", 32 "Bundle-Activator", 33 } 34 SecondaryJavaManifestGroupIDFields = []string{ 35 "Automatic-Module-Name", 36 "Main-Class", 37 "Package", 38 } 39 javaManifestNameFields = []string{ 40 "Specification-Vendor", 41 "Implementation-Vendor", 42 } 43 ) 44 45 func candidateProductsForJava(p pkg.Package) []string { 46 return productsFromArtifactAndGroupIDs(artifactIDFromJavaPackage(p), GroupIDsFromJavaPackage(p)) 47 } 48 49 func candidateVendorsForJava(p pkg.Package) fieldCandidateSet { 50 gidVendors := vendorsFromGroupIDs(GroupIDsFromJavaPackage(p)) 51 nameVendors := vendorsFromJavaManifestNames(p) 52 return newFieldCandidateSetFromSets(gidVendors, nameVendors) 53 } 54 55 func vendorsFromJavaManifestNames(p pkg.Package) fieldCandidateSet { 56 vendors := newFieldCandidateSet() 57 58 metadata, ok := p.Metadata.(pkg.JavaArchive) 59 if !ok { 60 return vendors 61 } 62 63 if metadata.Manifest == nil { 64 return vendors 65 } 66 67 for _, name := range javaManifestNameFields { 68 if metadata.Manifest.Main != nil { 69 if value, exists := metadata.Manifest.Main[name]; exists { 70 if !startsWithTopLevelDomain(value) { 71 vendors.add(fieldCandidate{ 72 value: normalizePersonName(value), 73 disallowSubSelections: true, 74 }) 75 } 76 } 77 } 78 if metadata.Manifest.NamedSections != nil { 79 for _, section := range metadata.Manifest.NamedSections { 80 if section == nil { 81 continue 82 } 83 if value, exists := section[name]; exists { 84 if !startsWithTopLevelDomain(value) { 85 vendors.add(fieldCandidate{ 86 value: normalizePersonName(value), 87 disallowSubSelections: true, 88 }) 89 } 90 } 91 } 92 } 93 } 94 95 return vendors 96 } 97 98 func vendorsFromGroupIDs(groupIDs []string) fieldCandidateSet { 99 vendors := newFieldCandidateSet() 100 for _, groupID := range groupIDs { 101 for i, field := range strings.Split(groupID, ".") { 102 field = strings.TrimSpace(field) 103 104 if len(field) == 0 { 105 continue 106 } 107 108 if forbiddenVendorGroupIDFields.Has(strings.ToLower(field)) { 109 continue 110 } 111 112 if i == 0 { 113 continue 114 } 115 116 vendors.addValue(field) 117 } 118 } 119 120 return vendors 121 } 122 123 func productsFromArtifactAndGroupIDs(artifactID string, groupIDs []string) []string { 124 products := strset.New() 125 if artifactID != "" { 126 products.Add(artifactID) 127 } 128 129 for _, groupID := range groupIDs { 130 isPlugin := strings.Contains(artifactID, "plugin") || strings.Contains(groupID, "plugin") 131 132 for i, field := range strings.Split(groupID, ".") { 133 field = strings.TrimSpace(field) 134 135 if len(field) == 0 { 136 continue 137 } 138 139 // don't add this field as a name if the name is implying the package is a plugin or client 140 if forbiddenProductGroupIDFields.Has(strings.ToLower(field)) { 141 continue 142 } 143 144 if i <= 1 { 145 continue 146 } 147 148 // umbrella projects tend to have sub components that either start or end with the project name. We expect 149 // to identify fields that may represent the umbrella project, and not fields that indicate auxiliary 150 // information about the package. 151 couldBeProjectName := strings.HasPrefix(artifactID, field) || strings.HasSuffix(artifactID, field) 152 if artifactID == "" || (couldBeProjectName && !isPlugin) { 153 products.Add(field) 154 } 155 } 156 } 157 158 return products.List() 159 } 160 161 func artifactIDFromJavaPackage(p pkg.Package) string { 162 metadata, ok := p.Metadata.(pkg.JavaArchive) 163 if !ok { 164 return "" 165 } 166 167 if metadata.PomProperties == nil { 168 return "" 169 } 170 171 artifactID := strings.TrimSpace(metadata.PomProperties.ArtifactID) 172 if looksLikeGroupID(artifactID) && len(strings.Split(artifactID, ".")) > 1 { 173 // there is a strong indication that the artifact ID is really a group ID, don't use it 174 return "" 175 } 176 return artifactID 177 } 178 179 func GroupIDsFromJavaPackage(p pkg.Package) (groupIDs []string) { 180 metadata, ok := p.Metadata.(pkg.JavaArchive) 181 if !ok { 182 return nil 183 } 184 185 return GroupIDsFromJavaMetadata(p.Name, metadata) 186 } 187 188 // GroupIDsFromJavaMetadata returns the possible group IDs for a Java package 189 // This function is similar to GroupIDFromJavaPackage, but returns all possible group IDs and is less strict 190 // It is used as a way to generate possible candidates for CPE matching. 191 func GroupIDsFromJavaMetadata(pkgName string, metadata pkg.JavaArchive) (groupIDs []string) { 192 groupIDs = append(groupIDs, groupIDsFromPomProperties(metadata.PomProperties)...) 193 groupIDs = append(groupIDs, groupIDsFromPomProject(metadata.PomProject)...) 194 groupIDs = append(groupIDs, groupIDsFromJavaManifest(pkgName, metadata.Manifest)...) 195 196 return groupIDs 197 } 198 199 func groupIDsFromPomProperties(properties *pkg.JavaPomProperties) (groupIDs []string) { 200 if properties == nil { 201 return nil 202 } 203 204 if startsWithTopLevelDomain(properties.GroupID) { 205 groupIDs = append(groupIDs, cleanGroupID(properties.GroupID)) 206 } 207 208 // sometimes the publisher puts the group ID in the artifact ID field unintentionally 209 if startsWithTopLevelDomain(properties.ArtifactID) && len(strings.Split(properties.ArtifactID, ".")) > 1 { 210 // there is a strong indication that the artifact ID is really a group ID 211 groupIDs = append(groupIDs, cleanGroupID(properties.ArtifactID)) 212 } 213 214 return groupIDs 215 } 216 217 func groupIDsFromPomProject(project *pkg.JavaPomProject) (groupIDs []string) { 218 if project == nil { 219 return nil 220 } 221 222 // extract the project info... 223 groupIDs = addGroupIDsFromGroupIDsAndArtifactID(project.GroupID, project.ArtifactID) 224 225 if project.Parent == nil { 226 return groupIDs 227 } 228 229 // extract the parent project info... 230 groupIDs = append(groupIDs, addGroupIDsFromGroupIDsAndArtifactID(project.Parent.GroupID, project.Parent.ArtifactID)...) 231 232 return groupIDs 233 } 234 235 func addGroupIDsFromGroupIDsAndArtifactID(groupID, artifactID string) (groupIDs []string) { 236 if startsWithTopLevelDomain(groupID) { 237 groupIDs = append(groupIDs, cleanGroupID(groupID)) 238 } 239 240 // sometimes the publisher puts the group ID in the artifact ID field unintentionally 241 if startsWithTopLevelDomain(artifactID) && len(strings.Split(artifactID, ".")) > 1 { 242 // there is a strong indication that the artifact ID is really a group ID 243 groupIDs = append(groupIDs, cleanGroupID(artifactID)) 244 } 245 return groupIDs 246 } 247 248 func groupIDsFromJavaManifest(pkgName string, manifest *pkg.JavaManifest) []string { 249 if groupID, ok := DefaultArtifactIDToGroupID[pkgName]; ok { 250 return []string{groupID} 251 } 252 253 if manifest == nil { 254 return nil 255 } 256 257 // try the common manifest fields first for a set of candidates 258 groupIDs := GetManifestFieldGroupIDs(manifest, PrimaryJavaManifestGroupIDFields) 259 260 if len(groupIDs) != 0 { 261 return groupIDs 262 } 263 264 // if we haven't found anything yet, let's try a last ditch effort: 265 // attempt to get group-id-like info from the MANIFEST.MF "Automatic-Module-Name" and "Extension-Name" field. 266 // for more info see pkg:maven/commons-io/commons-io@2.8.0 within cloudbees/cloudbees-core-mm:2.263.4.2 267 // at /usr/share/jenkins/jenkins.war:WEB-INF/plugins/analysis-model-api.hpi:WEB-INF/lib/commons-io-2.8.0.jar 268 // as well as the ant package from cloudbees/cloudbees-core-mm:2.277.2.4-ra. 269 return GetManifestFieldGroupIDs(manifest, SecondaryJavaManifestGroupIDFields) 270 } 271 272 func GetManifestFieldGroupIDs(manifest *pkg.JavaManifest, fields []string) (groupIDs []string) { 273 if manifest == nil { 274 return nil 275 } 276 277 for _, name := range fields { 278 if value, exists := manifest.Main[name]; exists { 279 if startsWithTopLevelDomain(value) { 280 groupIDs = append(groupIDs, cleanGroupID(value)) 281 } 282 } 283 for _, section := range manifest.NamedSections { 284 if value, exists := section[name]; exists { 285 if startsWithTopLevelDomain(value) { 286 groupIDs = append(groupIDs, cleanGroupID(value)) 287 } 288 } 289 } 290 } 291 sort.Strings(groupIDs) 292 293 return groupIDs 294 } 295 296 func cleanGroupID(groupID string) string { 297 return strings.TrimSpace(removeOSCIDirectives(groupID)) 298 } 299 300 func removeOSCIDirectives(groupID string) string { 301 // for example: 302 // org.bar;uses:=“org.foo” -> org.bar 303 // more about OSGI directives see https://spring.io/blog/2008/10/20/understanding-the-osgi-uses-directive/ 304 return strings.Split(groupID, ";")[0] 305 } 306 307 func startsWithTopLevelDomain(value string) bool { 308 return internal.HasAnyOfPrefixes(value, domains...) 309 } 310 311 func looksLikeGroupID(value string) bool { 312 return strings.Contains(value, ".") 313 }