github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/java/parse_pom_xml.go (about)

     1  package java
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"strings"
     7  
     8  	"github.com/anchore/syft/internal"
     9  	"github.com/anchore/syft/internal/log"
    10  	"github.com/anchore/syft/internal/unknown"
    11  	"github.com/anchore/syft/syft/artifact"
    12  	"github.com/anchore/syft/syft/file"
    13  	"github.com/anchore/syft/syft/pkg"
    14  	"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
    15  	"github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven"
    16  )
    17  
    18  const (
    19  	pomXMLGlob       = "**/*pom.xml"
    20  	pomCatalogerName = "java-pom-cataloger"
    21  )
    22  
    23  type pomXMLCataloger struct {
    24  	cfg ArchiveCatalogerConfig
    25  }
    26  
    27  func (p pomXMLCataloger) Name() string {
    28  	return pomCatalogerName
    29  }
    30  
    31  func (p pomXMLCataloger) Catalog(ctx context.Context, fileResolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
    32  	locations, err := fileResolver.FilesByGlob("**/pom.xml")
    33  	if err != nil {
    34  		return nil, nil, err
    35  	}
    36  
    37  	r := maven.NewResolver(fileResolver, p.cfg.mavenConfig())
    38  
    39  	var errs error
    40  	var poms []*maven.Project
    41  	pomLocations := map[*maven.Project]file.Location{}
    42  	for _, pomLocation := range locations {
    43  		pom, err := readPomFromLocation(fileResolver, pomLocation)
    44  		if err != nil || pom == nil {
    45  			log.WithFields("error", err, "pomLocation", pomLocation).Debug("error while reading pom")
    46  			errs = unknown.Appendf(errs, pomLocation, "error reading pom.xml: %w", err)
    47  			continue
    48  		}
    49  
    50  		poms = append(poms, pom)
    51  		pomLocations[pom] = pomLocation
    52  		r.AddPom(ctx, pom, pomLocation)
    53  	}
    54  
    55  	var pkgs []pkg.Package
    56  	var relationships []artifact.Relationship
    57  	resolved := map[maven.ID]*pkg.Package{}
    58  
    59  	// catalog all the main packages first so these can be referenced later when building the dependency graph
    60  	for _, pom := range poms {
    61  		location := pomLocations[pom] // should always exist
    62  
    63  		id := r.ResolveID(ctx, pom)
    64  		mainPkg := newPackageFromMavenPom(ctx, r, pom, location)
    65  		if mainPkg == nil {
    66  			continue
    67  		}
    68  		resolved[id] = mainPkg
    69  		pkgs = append(pkgs, licenses.RelativeToPackage(ctx, fileResolver, *mainPkg))
    70  	}
    71  
    72  	// catalog all dependencies
    73  	for _, pom := range poms {
    74  		location := pomLocations[pom] // should always exist
    75  
    76  		id := r.ResolveID(ctx, pom)
    77  		mainPkg := resolved[id]
    78  
    79  		newPkgs, newRelationships, newErrs := collectDependencies(ctx, r, resolved, mainPkg, pom, location, p.cfg.ResolveTransitiveDependencies)
    80  		pkgs = append(pkgs, newPkgs...)
    81  		relationships = append(relationships, newRelationships...)
    82  		errs = unknown.Join(errs, newErrs)
    83  	}
    84  
    85  	return pkgs, relationships, errs
    86  }
    87  
    88  func readPomFromLocation(fileResolver file.Resolver, pomLocation file.Location) (*maven.Project, error) {
    89  	contents, err := fileResolver.FileContentsByLocation(pomLocation)
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  	defer internal.CloseAndLogError(contents, pomLocation.RealPath)
    94  	return maven.ParsePomXML(contents)
    95  }
    96  
    97  // newPackageFromMavenPom processes a single Maven POM for a given parent package, returning only the main package from the pom
    98  func newPackageFromMavenPom(ctx context.Context, r *maven.Resolver, pom *maven.Project, location file.Location) *pkg.Package {
    99  	id := r.ResolveID(ctx, pom)
   100  	parent, err := r.ResolveParent(ctx, pom)
   101  	if err != nil {
   102  		// this is expected in many cases, there will be no network access and the maven resolver is unable to
   103  		// look up information, so we can continue with what little information we have
   104  		log.Tracef("unable to resolve parent due to: %v", err)
   105  	}
   106  
   107  	var javaPomParent *pkg.JavaPomParent
   108  	if parent != nil { // parent is returned in both cases: when it is resolved or synthesized from the pom.parent info
   109  		parentID := r.ResolveID(ctx, parent)
   110  		javaPomParent = &pkg.JavaPomParent{
   111  			GroupID:    parentID.GroupID,
   112  			ArtifactID: parentID.ArtifactID,
   113  			Version:    parentID.Version,
   114  		}
   115  	}
   116  
   117  	pomLicenses, err := r.ResolveLicenses(ctx, pom)
   118  	if err != nil {
   119  		log.Tracef("error resolving licenses: %v", err)
   120  	}
   121  	pkgLicenses := toPkgLicenses(ctx, &location, pomLicenses)
   122  
   123  	m := pkg.JavaArchive{
   124  		PomProject: &pkg.JavaPomProject{
   125  			Parent:      javaPomParent,
   126  			GroupID:     id.GroupID,
   127  			ArtifactID:  id.ArtifactID,
   128  			Version:     id.Version,
   129  			Name:        r.ResolveProperty(ctx, pom, pom.Name),
   130  			Description: r.ResolveProperty(ctx, pom, pom.Description),
   131  			URL:         r.ResolveProperty(ctx, pom, pom.URL),
   132  		},
   133  	}
   134  
   135  	p := &pkg.Package{
   136  		Name:    id.ArtifactID,
   137  		Version: id.Version,
   138  		Locations: file.NewLocationSet(
   139  			location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   140  		),
   141  		Licenses: pkg.NewLicenseSet(pkgLicenses...),
   142  		Language: pkg.Java,
   143  		Type:     pkg.JavaPkg,
   144  		FoundBy:  pomCatalogerName,
   145  		PURL:     packageURL(id.ArtifactID, id.Version, m),
   146  		Metadata: m,
   147  	}
   148  
   149  	finalizePackage(p)
   150  
   151  	return p
   152  }
   153  
   154  func collectDependencies(ctx context.Context, r *maven.Resolver, resolved map[maven.ID]*pkg.Package, parentPkg *pkg.Package, pom *maven.Project, loc file.Location, includeTransitiveDependencies bool) ([]pkg.Package, []artifact.Relationship, error) {
   155  	var errs error
   156  	var pkgs []pkg.Package
   157  	var relationships []artifact.Relationship
   158  
   159  	pomID := r.ResolveID(ctx, pom)
   160  	for _, dep := range maven.DirectPomDependencies(pom) {
   161  		depID := r.ResolveDependencyID(ctx, pom, dep)
   162  		log.WithFields("pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Trace("adding maven pom dependency")
   163  
   164  		// we may have a reference to a package pointing to an existing pom on the filesystem, but we don't want to duplicate these entries
   165  		depPkg := resolved[depID]
   166  		if depPkg == nil {
   167  			p, err := newPackageFromDependency(
   168  				ctx,
   169  				r,
   170  				pom,
   171  				dep,
   172  				loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
   173  			)
   174  			if err != nil {
   175  				log.WithFields("error", err, "pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Debugf("error adding dependency")
   176  			}
   177  
   178  			if p == nil {
   179  				// we don't have a valid package, just continue to the next dependency
   180  				continue
   181  			}
   182  			depPkg = p
   183  			resolved[depID] = depPkg
   184  
   185  			// only resolve transitive dependencies if we're not already looking these up for the specific package
   186  			if includeTransitiveDependencies && depID.Valid() {
   187  				depPom, err := r.FindPom(ctx, depID.GroupID, depID.ArtifactID, depID.Version)
   188  				if err != nil {
   189  					log.WithFields("mavenID", depID, "error", err).Debug("error finding pom")
   190  				}
   191  				if depPom != nil {
   192  					transitivePkgs, transitiveRelationships, transitiveErrs := collectDependencies(ctx, r, resolved, depPkg, depPom, loc, includeTransitiveDependencies)
   193  					pkgs = append(pkgs, transitivePkgs...)
   194  					relationships = append(relationships, transitiveRelationships...)
   195  					errs = unknown.Join(errs, transitiveErrs)
   196  				}
   197  			}
   198  		}
   199  
   200  		pkgs = append(pkgs, *depPkg)
   201  		if parentPkg != nil {
   202  			relationships = append(relationships, artifact.Relationship{
   203  				From: *depPkg,
   204  				To:   *parentPkg,
   205  				Type: artifact.DependencyOfRelationship,
   206  			})
   207  		}
   208  	}
   209  
   210  	return pkgs, relationships, errs
   211  }
   212  
   213  func newPomProject(ctx context.Context, r *maven.Resolver, path string, pom *maven.Project) *pkg.JavaPomProject {
   214  	id := r.ResolveID(ctx, pom)
   215  	name := r.ResolveProperty(ctx, pom, pom.Name)
   216  	projectURL := r.ResolveProperty(ctx, pom, pom.URL)
   217  
   218  	log.WithFields("path", path, "artifactID", id.ArtifactID, "name", name, "projectURL", projectURL).Trace("parsing pom.xml")
   219  	return &pkg.JavaPomProject{
   220  		Path:        path,
   221  		Parent:      pomParent(ctx, r, pom),
   222  		GroupID:     id.GroupID,
   223  		ArtifactID:  id.ArtifactID,
   224  		Version:     id.Version,
   225  		Name:        name,
   226  		Description: cleanDescription(r.ResolveProperty(ctx, pom, pom.Description)),
   227  		URL:         projectURL,
   228  	}
   229  }
   230  
   231  func newPackageFromDependency(ctx context.Context, r *maven.Resolver, pom *maven.Project, dep maven.Dependency, locations ...file.Location) (*pkg.Package, error) {
   232  	id := r.ResolveDependencyID(ctx, pom, dep)
   233  
   234  	var err error
   235  	var pkgLicenses []pkg.License
   236  	dependencyPom, depErr := r.FindPom(ctx, id.GroupID, id.ArtifactID, id.Version)
   237  	if depErr != nil {
   238  		err = errors.Join(err, depErr)
   239  	}
   240  
   241  	var pomProject *pkg.JavaPomProject
   242  	if dependencyPom != nil {
   243  		depLicenses, _ := r.ResolveLicenses(ctx, dependencyPom)
   244  		pkgLicenses = append(pkgLicenses, toPkgLicenses(ctx, nil, depLicenses)...)
   245  		pomProject = &pkg.JavaPomProject{
   246  			Parent:      pomParent(ctx, r, dependencyPom),
   247  			GroupID:     id.GroupID,
   248  			ArtifactID:  id.ArtifactID,
   249  			Version:     id.Version,
   250  			Name:        r.ResolveProperty(ctx, pom, pom.Name),
   251  			Description: r.ResolveProperty(ctx, pom, pom.Description),
   252  			URL:         r.ResolveProperty(ctx, pom, pom.URL),
   253  		}
   254  	}
   255  
   256  	m := pkg.JavaArchive{
   257  		PomProperties: &pkg.JavaPomProperties{
   258  			GroupID:    id.GroupID,
   259  			ArtifactID: id.ArtifactID,
   260  			Scope:      r.ResolveProperty(ctx, pom, dep.Scope),
   261  		},
   262  		PomProject: pomProject,
   263  	}
   264  
   265  	p := &pkg.Package{
   266  		Name:      id.ArtifactID,
   267  		Version:   id.Version,
   268  		Locations: file.NewLocationSet(locations...),
   269  		Licenses:  pkg.NewLicenseSet(pkgLicenses...),
   270  		PURL:      packageURL(id.ArtifactID, id.Version, m),
   271  		Language:  pkg.Java,
   272  		Type:      pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet?
   273  		FoundBy:   pomCatalogerName,
   274  		Metadata:  m,
   275  	}
   276  
   277  	finalizePackage(p)
   278  
   279  	return p, err
   280  }
   281  
   282  func pomParent(ctx context.Context, r *maven.Resolver, pom *maven.Project) *pkg.JavaPomParent {
   283  	if pom == nil || pom.Parent == nil {
   284  		return nil
   285  	}
   286  
   287  	groupID := r.ResolveProperty(ctx, pom, pom.Parent.GroupID)
   288  	artifactID := r.ResolveProperty(ctx, pom, pom.Parent.ArtifactID)
   289  	version := r.ResolveProperty(ctx, pom, pom.Parent.Version)
   290  
   291  	if groupID == "" && artifactID == "" && version == "" {
   292  		return nil
   293  	}
   294  
   295  	return &pkg.JavaPomParent{
   296  		GroupID:    groupID,
   297  		ArtifactID: artifactID,
   298  		Version:    version,
   299  	}
   300  }
   301  
   302  func cleanDescription(original string) (cleaned string) {
   303  	descriptionLines := strings.Split(original, "\n")
   304  	for _, line := range descriptionLines {
   305  		line = strings.TrimSpace(line)
   306  		if len(line) == 0 {
   307  			continue
   308  		}
   309  		cleaned += line + " "
   310  	}
   311  	return strings.TrimSpace(cleaned)
   312  }