github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/archive_parser.go (about) 1 package java 2 3 import ( 4 "context" 5 "crypto" 6 "fmt" 7 "os" 8 "path" 9 "strings" 10 11 intFile "github.com/anchore/syft/internal/file" 12 "github.com/anchore/syft/internal/licenses" 13 "github.com/anchore/syft/internal/log" 14 "github.com/anchore/syft/syft/artifact" 15 "github.com/anchore/syft/syft/file" 16 "github.com/anchore/syft/syft/pkg" 17 "github.com/anchore/syft/syft/pkg/cataloger/generic" 18 ) 19 20 var archiveFormatGlobs = []string{ 21 "**/*.jar", 22 "**/*.war", 23 "**/*.ear", 24 "**/*.par", 25 "**/*.sar", 26 "**/*.nar", 27 "**/*.jpi", 28 "**/*.hpi", 29 "**/*.lpkg", // Zip-compressed package used to deploy applications 30 // (aka plugins) to Liferay Portal server. Those files contains .JAR(s) and a .PROPERTIES file, the latter 31 // has information about the application and installation requirements. 32 // NOTE(jonasagx): If you would like to test it with lpkg file, 33 // use: https://web.liferay.com/marketplace/-/mp/download/25019275/7403 34 // LifeRay makes it pretty cumbersome to make a such plugins; their docs are 35 // out of date, and they charge for their IDE. If you find an example 36 // project that we can build in CI feel free to include it 37 } 38 39 // javaArchiveHashes are all the current hash algorithms used to calculate archive digests 40 var javaArchiveHashes = []crypto.Hash{ 41 crypto.SHA1, 42 } 43 44 type archiveParser struct { 45 fileManifest intFile.ZipFileManifest 46 location file.Location 47 archivePath string 48 contentPath string 49 fileInfo archiveFilename 50 detectNested bool 51 cfg ArchiveCatalogerConfig 52 } 53 54 type genericArchiveParserAdapter struct { 55 cfg ArchiveCatalogerConfig 56 } 57 58 func newGenericArchiveParserAdapter(cfg ArchiveCatalogerConfig) genericArchiveParserAdapter { 59 return genericArchiveParserAdapter{cfg: cfg} 60 } 61 62 // parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives. 63 func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { 64 parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg) 65 // note: even on error, we should always run cleanup functions 66 defer cleanupFn() 67 if err != nil { 68 return nil, nil, err 69 } 70 return parser.parse(ctx) 71 } 72 73 // uniquePkgKey creates a unique string to identify the given package. 74 func uniquePkgKey(groupID string, p *pkg.Package) string { 75 if p == nil { 76 return "" 77 } 78 return fmt.Sprintf("%s|%s|%s", groupID, p.Name, p.Version) 79 } 80 81 // newJavaArchiveParser returns a new java archive parser object for the given archive. Can be configured to discover 82 // and parse nested archives or ignore them. 83 func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg ArchiveCatalogerConfig) (*archiveParser, func(), error) { 84 // fetch the last element of the virtual path 85 virtualElements := strings.Split(reader.Path(), ":") 86 currentFilepath := virtualElements[len(virtualElements)-1] 87 88 contentPath, archivePath, cleanupFn, err := saveArchiveToTmp(currentFilepath, reader) 89 if err != nil { 90 return nil, cleanupFn, fmt.Errorf("unable to process java archive: %w", err) 91 } 92 93 fileManifest, err := intFile.NewZipFileManifest(archivePath) 94 if err != nil { 95 return nil, cleanupFn, fmt.Errorf("unable to read files from java archive: %w", err) 96 } 97 98 return &archiveParser{ 99 fileManifest: fileManifest, 100 location: reader.Location, 101 archivePath: archivePath, 102 contentPath: contentPath, 103 fileInfo: newJavaArchiveFilename(currentFilepath), 104 detectNested: detectNested, 105 cfg: cfg, 106 }, cleanupFn, nil 107 } 108 109 // parse the loaded archive and return all packages found. 110 func (j *archiveParser) parse(ctx context.Context) ([]pkg.Package, []artifact.Relationship, error) { 111 var pkgs []pkg.Package 112 var relationships []artifact.Relationship 113 114 // find the parent package from the java manifest 115 parentPkg, err := j.discoverMainPackage(ctx) 116 if err != nil { 117 return nil, nil, fmt.Errorf("could not generate package from %s: %w", j.location, err) 118 } 119 120 // find aux packages from pom.properties/pom.xml and potentially modify the existing parentPkg 121 // NOTE: we cannot generate sha1 digests from packages discovered via pom.properties/pom.xml 122 auxPkgs, err := j.discoverPkgsFromAllMavenFiles(ctx, parentPkg) 123 if err != nil { 124 return nil, nil, err 125 } 126 pkgs = append(pkgs, auxPkgs...) 127 128 if j.detectNested { 129 // find nested java archive packages 130 nestedPkgs, nestedRelationships, err := j.discoverPkgsFromNestedArchives(ctx, parentPkg) 131 if err != nil { 132 return nil, nil, err 133 } 134 pkgs = append(pkgs, nestedPkgs...) 135 relationships = append(relationships, nestedRelationships...) 136 } 137 138 // lastly, add the parent package to the list (assuming the parent exists) 139 if parentPkg != nil { 140 pkgs = append([]pkg.Package{*parentPkg}, pkgs...) 141 } 142 143 // add pURLs to all packages found 144 // note: since package information may change after initial creation when parsing multiple locations within the 145 // jar, we wait until the conclusion of the parsing process before synthesizing pURLs. 146 for i := range pkgs { 147 p := &pkgs[i] 148 if m, ok := p.Metadata.(pkg.JavaArchive); ok { 149 p.PURL = packageURL(p.Name, p.Version, m) 150 } else { 151 log.WithFields("package", p.String()).Warn("unable to extract java metadata to generate purl") 152 } 153 p.SetID() 154 } 155 156 return pkgs, relationships, nil 157 } 158 159 // discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages. 160 func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package, error) { 161 // search and parse java manifest files 162 manifestMatches := j.fileManifest.GlobMatch(false, manifestGlob) 163 if len(manifestMatches) > 1 { 164 return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches) 165 } else if len(manifestMatches) == 0 { 166 // we did not find any manifests, but that may not be a problem (there may be other information to generate packages for) 167 return nil, nil 168 } 169 170 // fetch the manifest file 171 contents, err := intFile.ContentsFromZip(j.archivePath, manifestMatches...) 172 if err != nil { 173 return nil, fmt.Errorf("unable to extract java manifests (%s): %w", j.location, err) 174 } 175 176 // parse the manifest file into a rich object 177 manifestContents := contents[manifestMatches[0]] 178 manifest, err := parseJavaManifest(j.archivePath, strings.NewReader(manifestContents)) 179 if err != nil { 180 log.Warnf("failed to parse java manifest (%s): %+v", j.location, err) 181 return nil, nil 182 } 183 184 // check for existence of Weave-Classes manifest key in order to exclude jars getting misrepresented as 185 // their targeted counterparts, e.g. newrelic spring and tomcat instrumentation 186 if _, ok := manifest.Main.Get("Weave-Classes"); ok { 187 log.Debugf("excluding archive due to Weave-Classes manifest entry: %s", j.location) 188 return nil, nil 189 } 190 191 // grab and assign digest for the entire archive 192 digests, err := getDigestsFromArchive(j.archivePath) 193 if err != nil { 194 return nil, err 195 } 196 197 licenses, name, version, err := j.parseLicenses(ctx, manifest) 198 if err != nil { 199 return nil, err 200 } 201 202 return &pkg.Package{ 203 // TODO: maybe select name should just have a pom properties in it? 204 Name: name, 205 Version: version, 206 Language: pkg.Java, 207 Licenses: pkg.NewLicenseSet(licenses...), 208 Locations: file.NewLocationSet( 209 j.location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 210 ), 211 Type: j.fileInfo.pkgType(), 212 Metadata: pkg.JavaArchive{ 213 VirtualPath: j.location.Path(), 214 Manifest: manifest, 215 ArchiveDigests: digests, 216 }, 217 }, nil 218 } 219 220 func (j *archiveParser) parseLicenses(ctx context.Context, manifest *pkg.JavaManifest) ([]pkg.License, string, string, error) { 221 // we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest 222 // TODO: when we support locations of paths within archives we should start passing the specific manifest location object instead of the top jar 223 licenses := pkg.NewLicensesFromLocation(j.location, selectLicenses(manifest)...) 224 /* 225 We should name and version from, in this order: 226 1. pom.properties if we find exactly 1 227 2. pom.xml if we find exactly 1 228 3. manifest 229 4. filename 230 */ 231 name, version, pomLicenses := j.guessMainPackageNameAndVersionFromPomInfo(ctx) 232 if name == "" { 233 name = selectName(manifest, j.fileInfo) 234 } 235 if version == "" { 236 version = selectVersion(manifest, j.fileInfo) 237 } 238 if len(licenses) == 0 { 239 // Today we don't have a way to distinguish between licenses from the manifest and licenses from the pom.xml 240 // until the file.Location object can support sub-paths (i.e. paths within archives, recursively; issue https://github.com/anchore/syft/issues/2211). 241 // Until then it's less confusing to use the licenses from the pom.xml only if the manifest did not list any. 242 licenses = append(licenses, pomLicenses...) 243 } 244 245 if len(licenses) == 0 { 246 fileLicenses, err := j.getLicenseFromFileInArchive() 247 if err != nil { 248 return nil, "", "", err 249 } 250 if fileLicenses != nil { 251 licenses = append(licenses, fileLicenses...) 252 } 253 } 254 255 // If we didn't find any licenses in the archive so far, we'll try again in Maven Central using groupIDFromJavaMetadata 256 if len(licenses) == 0 && j.cfg.UseNetwork { 257 licenses = findLicenseFromJavaMetadata(ctx, name, manifest, version, j, licenses) 258 } 259 260 return licenses, name, version, nil 261 } 262 263 func findLicenseFromJavaMetadata(ctx context.Context, name string, manifest *pkg.JavaManifest, version string, j *archiveParser, licenses []pkg.License) []pkg.License { 264 var groupID = name 265 if gID := groupIDFromJavaMetadata(name, pkg.JavaArchive{Manifest: manifest}); gID != "" { 266 groupID = gID 267 } 268 pomLicenses := recursivelyFindLicensesFromParentPom(ctx, groupID, name, version, j.cfg) 269 270 if len(pomLicenses) == 0 { 271 // Try removing the last part of the groupId, as sometimes it duplicates the artifactId 272 packages := strings.Split(groupID, ".") 273 groupID = strings.Join(packages[:len(packages)-1], ".") 274 pomLicenses = recursivelyFindLicensesFromParentPom(ctx, groupID, name, version, j.cfg) 275 } 276 277 if len(pomLicenses) > 0 { 278 pkgLicenses := pkg.NewLicensesFromLocation(j.location, pomLicenses...) 279 if pkgLicenses != nil { 280 licenses = append(licenses, pkgLicenses...) 281 } 282 } 283 return licenses 284 } 285 286 type parsedPomProject struct { 287 *pkg.JavaPomProject 288 Licenses []pkg.License 289 } 290 291 func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo(ctx context.Context) (name, version string, licenses []pkg.License) { 292 pomPropertyMatches := j.fileManifest.GlobMatch(false, pomPropertiesGlob) 293 pomMatches := j.fileManifest.GlobMatch(false, pomXMLGlob) 294 var pomPropertiesObject pkg.JavaPomProperties 295 var pomProjectObject *parsedPomProject 296 297 // Find the pom.properties/pom.xml if the names seem like a plausible match 298 properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, pomPropertyMatches) 299 projects, _ := pomProjectByParentPath(j.archivePath, j.location, pomMatches) 300 301 for parentPath, propertiesObj := range properties { 302 if artifactIDMatchesFilename(propertiesObj.ArtifactID, j.fileInfo.name) { 303 pomPropertiesObject = propertiesObj 304 if proj, exists := projects[parentPath]; exists { 305 pomProjectObject = proj 306 break 307 } 308 } 309 } 310 311 name = pomPropertiesObject.ArtifactID 312 if name == "" && pomProjectObject != nil { 313 name = pomProjectObject.ArtifactID 314 } 315 version = pomPropertiesObject.Version 316 if version == "" && pomProjectObject != nil { 317 version = pomProjectObject.Version 318 } 319 if j.cfg.UseNetwork { 320 if pomProjectObject == nil { 321 // If we have no pom.xml, check maven central using pom.properties 322 parentLicenses := recursivelyFindLicensesFromParentPom(ctx, pomPropertiesObject.GroupID, pomPropertiesObject.ArtifactID, pomPropertiesObject.Version, j.cfg) 323 if len(parentLicenses) > 0 { 324 for _, licenseName := range parentLicenses { 325 licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, "", nil)) 326 } 327 } 328 } else { 329 findPomLicenses(ctx, pomProjectObject, j.cfg) 330 } 331 } 332 333 if pomProjectObject != nil { 334 licenses = pomProjectObject.Licenses 335 } 336 337 return name, version, licenses 338 } 339 340 func artifactIDMatchesFilename(artifactID, fileName string) bool { 341 if artifactID == "" || fileName == "" { 342 return false 343 } 344 return strings.HasPrefix(artifactID, fileName) || strings.HasSuffix(fileName, artifactID) 345 } 346 347 func findPomLicenses(ctx context.Context, pomProjectObject *parsedPomProject, cfg ArchiveCatalogerConfig) { 348 // If we don't have any licenses until now, and if we have a parent Pom, then we'll check the parent pom in maven central for licenses. 349 if pomProjectObject != nil && pomProjectObject.Parent != nil && len(pomProjectObject.Licenses) == 0 { 350 parentLicenses := recursivelyFindLicensesFromParentPom( 351 ctx, 352 pomProjectObject.Parent.GroupID, 353 pomProjectObject.Parent.ArtifactID, 354 pomProjectObject.Parent.Version, 355 cfg) 356 357 if len(parentLicenses) > 0 { 358 for _, licenseName := range parentLicenses { 359 pomProjectObject.Licenses = append(pomProjectObject.Licenses, pkg.NewLicenseFromFields(licenseName, "", nil)) 360 } 361 } 362 } 363 } 364 365 // discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given 366 // parent package, returning all listed Java packages found for each pom 367 // properties discovered and potentially updating the given parentPkg with new 368 // data. 369 func (j *archiveParser) discoverPkgsFromAllMavenFiles(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, error) { 370 if parentPkg == nil { 371 return nil, nil 372 } 373 374 var pkgs []pkg.Package 375 376 // pom.properties 377 properties, err := pomPropertiesByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomPropertiesGlob)) 378 if err != nil { 379 return nil, err 380 } 381 382 // pom.xml 383 projects, err := pomProjectByParentPath(j.archivePath, j.location, j.fileManifest.GlobMatch(false, pomXMLGlob)) 384 if err != nil { 385 return nil, err 386 } 387 388 for parentPath, propertiesObj := range properties { 389 var pomProject *parsedPomProject 390 if proj, exists := projects[parentPath]; exists { 391 pomProject = proj 392 } 393 394 pkgFromPom := newPackageFromMavenData(ctx, propertiesObj, pomProject, parentPkg, j.location, j.cfg) 395 if pkgFromPom != nil { 396 pkgs = append(pkgs, *pkgFromPom) 397 } 398 } 399 400 return pkgs, nil 401 } 402 403 func getDigestsFromArchive(archivePath string) ([]file.Digest, error) { 404 archiveCloser, err := os.Open(archivePath) 405 if err != nil { 406 return nil, fmt.Errorf("unable to open archive path (%s): %w", archivePath, err) 407 } 408 defer archiveCloser.Close() 409 410 // grab and assign digest for the entire archive 411 digests, err := intFile.NewDigestsFromFile(archiveCloser, javaArchiveHashes) 412 if err != nil { 413 log.Warnf("failed to create digest for file=%q: %+v", archivePath, err) 414 } 415 416 return digests, nil 417 } 418 419 func (j *archiveParser) getLicenseFromFileInArchive() ([]pkg.License, error) { 420 var fileLicenses []pkg.License 421 for _, filename := range licenses.FileNames() { 422 licenseMatches := j.fileManifest.GlobMatch(true, "/META-INF/"+filename) 423 if len(licenseMatches) == 0 { 424 // Try the root directory if it's not in META-INF 425 licenseMatches = j.fileManifest.GlobMatch(true, "/"+filename) 426 } 427 428 if len(licenseMatches) > 0 { 429 contents, err := intFile.ContentsFromZip(j.archivePath, licenseMatches...) 430 if err != nil { 431 return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err) 432 } 433 434 for _, licenseMatch := range licenseMatches { 435 licenseContents := contents[licenseMatch] 436 parsed, err := licenses.Parse(strings.NewReader(licenseContents), j.location) 437 if err != nil { 438 return nil, err 439 } 440 441 if len(parsed) > 0 { 442 fileLicenses = append(fileLicenses, parsed...) 443 } 444 } 445 } 446 } 447 448 return fileLicenses, nil 449 } 450 451 func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { 452 // we know that all java archives are zip formatted files, so we can use the shared zip helper 453 return discoverPkgsFromZip(ctx, j.location, j.archivePath, j.contentPath, j.fileManifest, parentPkg, j.cfg) 454 } 455 456 // discoverPkgsFromZip finds Java archives within Java archives, returning all listed Java packages found and 457 // associating each discovered package to the given parent package. 458 func discoverPkgsFromZip(ctx context.Context, location file.Location, archivePath, contentPath string, fileManifest intFile.ZipFileManifest, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { 459 // search and parse pom.properties files & fetch the contents 460 openers, err := intFile.ExtractFromZipToUniqueTempFile(archivePath, contentPath, fileManifest.GlobMatch(false, archiveFormatGlobs...)...) 461 if err != nil { 462 return nil, nil, fmt.Errorf("unable to extract files from zip: %w", err) 463 } 464 465 return discoverPkgsFromOpeners(ctx, location, openers, parentPkg, cfg) 466 } 467 468 // discoverPkgsFromOpeners finds Java archives within the given files and associates them with the given parent package. 469 func discoverPkgsFromOpeners(ctx context.Context, location file.Location, openers map[string]intFile.Opener, parentPkg *pkg.Package, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { 470 var pkgs []pkg.Package 471 var relationships []artifact.Relationship 472 473 for pathWithinArchive, archiveOpener := range openers { 474 nestedPkgs, nestedRelationships, err := discoverPkgsFromOpener(ctx, location, pathWithinArchive, archiveOpener, cfg) 475 if err != nil { 476 log.WithFields("location", location.Path()).Warnf("unable to discover java packages from opener: %+v", err) 477 continue 478 } 479 480 // attach the parent package to all discovered packages that are not already associated with a java archive 481 for _, p := range nestedPkgs { 482 if metadata, ok := p.Metadata.(pkg.JavaArchive); ok { 483 if metadata.Parent == nil { 484 metadata.Parent = parentPkg 485 } 486 p.Metadata = metadata 487 } 488 pkgs = append(pkgs, p) 489 } 490 491 relationships = append(relationships, nestedRelationships...) 492 } 493 494 return pkgs, relationships, nil 495 } 496 497 // discoverPkgsFromOpener finds Java archives within the given file. 498 func discoverPkgsFromOpener(ctx context.Context, location file.Location, pathWithinArchive string, archiveOpener intFile.Opener, cfg ArchiveCatalogerConfig) ([]pkg.Package, []artifact.Relationship, error) { 499 archiveReadCloser, err := archiveOpener.Open() 500 if err != nil { 501 return nil, nil, fmt.Errorf("unable to open archived file from tempdir: %w", err) 502 } 503 defer func() { 504 if closeErr := archiveReadCloser.Close(); closeErr != nil { 505 log.Warnf("unable to close archived file from tempdir: %+v", closeErr) 506 } 507 }() 508 509 nestedPath := fmt.Sprintf("%s:%s", location.Path(), pathWithinArchive) 510 nestedLocation := file.NewLocationFromCoordinates(location.Coordinates) 511 nestedLocation.AccessPath = nestedPath 512 gap := newGenericArchiveParserAdapter(cfg) 513 nestedPkgs, nestedRelationships, err := gap.parseJavaArchive(ctx, nil, nil, file.LocationReadCloser{ 514 Location: nestedLocation, 515 ReadCloser: archiveReadCloser, 516 }) 517 if err != nil { 518 return nil, nil, fmt.Errorf("unable to process nested java archive (%s): %w", pathWithinArchive, err) 519 } 520 521 return nestedPkgs, nestedRelationships, nil 522 } 523 524 func pomPropertiesByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]pkg.JavaPomProperties, error) { 525 contentsOfMavenPropertiesFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) 526 if err != nil { 527 return nil, fmt.Errorf("unable to extract maven files: %w", err) 528 } 529 530 propertiesByParentPath := make(map[string]pkg.JavaPomProperties) 531 for filePath, fileContents := range contentsOfMavenPropertiesFiles { 532 pomProperties, err := parsePomProperties(filePath, strings.NewReader(fileContents)) 533 if err != nil { 534 log.WithFields("contents-path", filePath, "location", location.Path()).Warnf("failed to parse pom.properties: %+v", err) 535 continue 536 } 537 538 if pomProperties == nil { 539 continue 540 } 541 542 if pomProperties.Version == "" || pomProperties.ArtifactID == "" { 543 // TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package 544 continue 545 } 546 547 propertiesByParentPath[path.Dir(filePath)] = *pomProperties 548 } 549 550 return propertiesByParentPath, nil 551 } 552 553 func pomProjectByParentPath(archivePath string, location file.Location, extractPaths []string) (map[string]*parsedPomProject, error) { 554 contentsOfMavenProjectFiles, err := intFile.ContentsFromZip(archivePath, extractPaths...) 555 if err != nil { 556 return nil, fmt.Errorf("unable to extract maven files: %w", err) 557 } 558 559 projectByParentPath := make(map[string]*parsedPomProject) 560 for filePath, fileContents := range contentsOfMavenProjectFiles { 561 // TODO: when we support locations of paths within archives we should start passing the specific pom.xml location object instead of the top jar 562 pomProject, err := parsePomXMLProject(filePath, strings.NewReader(fileContents), location) 563 if err != nil { 564 log.WithFields("contents-path", filePath, "location", location.Path()).Warnf("failed to parse pom.xml: %+v", err) 565 continue 566 } 567 568 if pomProject == nil { 569 continue 570 } 571 572 // If we don't have a version, then maybe the parent pom has it... 573 if (pomProject.Parent == nil && pomProject.Version == "") || pomProject.ArtifactID == "" { 574 // TODO: if there is no parentPkg (no java manifest) one of these poms could be the parent. We should discover the right parent and attach the correct info accordingly to each discovered package 575 continue 576 } 577 578 projectByParentPath[path.Dir(filePath)] = pomProject 579 } 580 return projectByParentPath, nil 581 } 582 583 // newPackageFromMavenData processes a single Maven POM properties for a given parent package, returning all listed Java packages found and 584 // associating each discovered package to the given parent package. Note the pom.xml is optional, the pom.properties is not. 585 func newPackageFromMavenData(ctx context.Context, pomProperties pkg.JavaPomProperties, parsedPomProject *parsedPomProject, parentPkg *pkg.Package, location file.Location, cfg ArchiveCatalogerConfig) *pkg.Package { 586 // keep the artifact name within the virtual path if this package does not match the parent package 587 vPathSuffix := "" 588 groupID := "" 589 if parentMetadata, ok := parentPkg.Metadata.(pkg.JavaArchive); ok { 590 groupID = groupIDFromJavaMetadata(parentPkg.Name, parentMetadata) 591 } 592 593 parentKey := fmt.Sprintf("%s:%s:%s", groupID, parentPkg.Name, parentPkg.Version) 594 // Since we don't have a package yet, it's important to use the same `field: value` association that we used when creating the parent package 595 // See below where Name => pomProperties.ArtifactID and Version => pomProperties.Version. We want to check for potentially nested identical 596 // packages and create equal virtual paths so they are de duped in the future 597 pomProjectKey := fmt.Sprintf("%s:%s:%s", pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version) 598 if parentKey != pomProjectKey { 599 // build a new virtual path suffix for the package that is different from the parent package 600 // we want to use the GroupID and ArtifactID here to preserve uniqueness 601 // Some packages have the same name but different group IDs (e.g. "org.glassfish.jaxb/jaxb-core", "com.sun.xml.bind/jaxb-core") 602 // https://github.com/anchore/syft/issues/1944 603 vPathSuffix += ":" + pomProperties.GroupID + ":" + pomProperties.ArtifactID 604 } 605 virtualPath := location.Path() + vPathSuffix 606 607 var pkgPomProject *pkg.JavaPomProject 608 licenses := make([]pkg.License, 0) 609 610 if cfg.UseNetwork { 611 if parsedPomProject == nil { 612 // If we have no pom.xml, check maven central using pom.properties 613 parentLicenses := recursivelyFindLicensesFromParentPom(ctx, pomProperties.GroupID, pomProperties.ArtifactID, pomProperties.Version, cfg) 614 if len(parentLicenses) > 0 { 615 for _, licenseName := range parentLicenses { 616 licenses = append(licenses, pkg.NewLicenseFromFields(licenseName, "", nil)) 617 } 618 } 619 } else { 620 findPomLicenses(ctx, parsedPomProject, cfg) 621 } 622 } 623 624 if parsedPomProject != nil { 625 pkgPomProject = parsedPomProject.JavaPomProject 626 licenses = append(licenses, parsedPomProject.Licenses...) 627 } 628 629 p := pkg.Package{ 630 Name: pomProperties.ArtifactID, 631 Version: pomProperties.Version, 632 Locations: file.NewLocationSet( 633 location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 634 ), 635 Licenses: pkg.NewLicenseSet(licenses...), 636 Language: pkg.Java, 637 Type: pomProperties.PkgTypeIndicated(), 638 Metadata: pkg.JavaArchive{ 639 VirtualPath: virtualPath, 640 PomProperties: &pomProperties, 641 PomProject: pkgPomProject, 642 Parent: parentPkg, 643 }, 644 } 645 646 if packageIdentitiesMatch(p, parentPkg) { 647 updateParentPackage(p, parentPkg) 648 return nil 649 } 650 651 return &p 652 } 653 654 func packageIdentitiesMatch(p pkg.Package, parentPkg *pkg.Package) bool { 655 metadata, ok := p.Metadata.(pkg.JavaArchive) 656 parentMetadata, parentOk := parentPkg.Metadata.(pkg.JavaArchive) 657 if !ok || !parentOk { 658 switch { 659 case !ok: 660 log.WithFields("package", p.String()).Trace("unable to extract java metadata to check for matching package identity for package: %s", p.Name) 661 case !parentOk: 662 log.WithFields("package", parentPkg.String()).Trace("unable to extract java metadata to check for matching package identity for package: %s", parentPkg.Name) 663 } 664 // if we can't extract metadata, we can check for matching identities via the package name 665 // this is not ideal, but it's better than nothing - this should not be used if we have Metadata 666 667 return uniquePkgKey("", &p) == uniquePkgKey("", parentPkg) 668 } 669 670 // try to determine identity with the metadata 671 groupID := groupIDFromJavaMetadata(p.Name, metadata) 672 parentGroupID := groupIDFromJavaMetadata(parentPkg.Name, parentMetadata) 673 if uniquePkgKey(groupID, &p) == uniquePkgKey(parentGroupID, parentPkg) { 674 return true 675 } 676 677 // the virtual path matches... 678 if parentMetadata.VirtualPath == metadata.VirtualPath { 679 return true 680 } 681 682 // the pom artifactId is the parent name 683 // note: you CANNOT use name-is-subset-of-artifact-id or vice versa --this is too generic. Shaded jars are a good 684 // example of this: where the package name is "cloudbees-analytics-segment-driver" and a child is "analytics", but 685 // they do not indicate the same package. 686 // NOTE: artifactId might not be a good indicator of uniqueness since archives can contain forks with the same name 687 // from different groups (e.g. "org.glassfish.jaxb.jaxb-core" and "com.sun.xml.bind.jaxb-core") 688 // we will use this check as a last resort 689 if metadata.PomProperties != nil { 690 if metadata.PomProperties.ArtifactID != "" && parentPkg.Name == metadata.PomProperties.ArtifactID { 691 return true 692 } 693 } 694 return false 695 } 696 697 func updateParentPackage(p pkg.Package, parentPkg *pkg.Package) { 698 // we've run across more information about our parent package, add this info to the parent package metadata 699 // the pom properties is typically a better source of information for name and version than the manifest 700 parentPkg.Name = p.Name 701 parentPkg.Version = p.Version 702 703 // we may have learned more about the type via data in the pom properties 704 parentPkg.Type = p.Type 705 706 metadata, ok := p.Metadata.(pkg.JavaArchive) 707 if !ok { 708 return 709 } 710 pomPropertiesCopy := *metadata.PomProperties 711 712 // keep the pom properties, but don't overwrite existing pom properties 713 parentMetadata, ok := parentPkg.Metadata.(pkg.JavaArchive) 714 if ok && parentMetadata.PomProperties == nil { 715 parentMetadata.PomProperties = &pomPropertiesCopy 716 parentPkg.Metadata = parentMetadata 717 } 718 }