github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/java/parse_pom_xml.go (about) 1 package java 2 3 import ( 4 "context" 5 "errors" 6 "strings" 7 8 "github.com/anchore/syft/internal" 9 "github.com/anchore/syft/internal/log" 10 "github.com/anchore/syft/internal/unknown" 11 "github.com/anchore/syft/syft/artifact" 12 "github.com/anchore/syft/syft/file" 13 "github.com/anchore/syft/syft/pkg" 14 "github.com/anchore/syft/syft/pkg/cataloger/internal/licenses" 15 "github.com/anchore/syft/syft/pkg/cataloger/java/internal/maven" 16 ) 17 18 const ( 19 pomXMLGlob = "**/*pom.xml" 20 pomCatalogerName = "java-pom-cataloger" 21 ) 22 23 type pomXMLCataloger struct { 24 cfg ArchiveCatalogerConfig 25 } 26 27 func (p pomXMLCataloger) Name() string { 28 return pomCatalogerName 29 } 30 31 func (p pomXMLCataloger) Catalog(ctx context.Context, fileResolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) { 32 locations, err := fileResolver.FilesByGlob("**/pom.xml") 33 if err != nil { 34 return nil, nil, err 35 } 36 37 r := maven.NewResolver(fileResolver, p.cfg.mavenConfig()) 38 39 var errs error 40 var poms []*maven.Project 41 pomLocations := map[*maven.Project]file.Location{} 42 for _, pomLocation := range locations { 43 pom, err := readPomFromLocation(fileResolver, pomLocation) 44 if err != nil || pom == nil { 45 log.WithFields("error", err, "pomLocation", pomLocation).Debug("error while reading pom") 46 errs = unknown.Appendf(errs, pomLocation, "error reading pom.xml: %w", err) 47 continue 48 } 49 50 poms = append(poms, pom) 51 pomLocations[pom] = pomLocation 52 r.AddPom(ctx, pom, pomLocation) 53 } 54 55 var pkgs []pkg.Package 56 var relationships []artifact.Relationship 57 resolved := map[maven.ID]*pkg.Package{} 58 59 // catalog all the main packages first so these can be referenced later when building the dependency graph 60 for _, pom := range poms { 61 location := pomLocations[pom] // should always exist 62 63 id := r.ResolveID(ctx, pom) 64 mainPkg := newPackageFromMavenPom(ctx, r, pom, location) 65 if mainPkg == nil { 66 continue 67 } 68 resolved[id] = mainPkg 69 pkgs = append(pkgs, licenses.RelativeToPackage(ctx, fileResolver, *mainPkg)) 70 } 71 72 // catalog all dependencies 73 for _, pom := range poms { 74 location := pomLocations[pom] // should always exist 75 76 id := r.ResolveID(ctx, pom) 77 mainPkg := resolved[id] 78 79 newPkgs, newRelationships, newErrs := collectDependencies(ctx, r, resolved, mainPkg, pom, location, p.cfg.ResolveTransitiveDependencies) 80 pkgs = append(pkgs, newPkgs...) 81 relationships = append(relationships, newRelationships...) 82 errs = unknown.Join(errs, newErrs) 83 } 84 85 return pkgs, relationships, errs 86 } 87 88 func readPomFromLocation(fileResolver file.Resolver, pomLocation file.Location) (*maven.Project, error) { 89 contents, err := fileResolver.FileContentsByLocation(pomLocation) 90 if err != nil { 91 return nil, err 92 } 93 defer internal.CloseAndLogError(contents, pomLocation.RealPath) 94 return maven.ParsePomXML(contents) 95 } 96 97 // newPackageFromMavenPom processes a single Maven POM for a given parent package, returning only the main package from the pom 98 func newPackageFromMavenPom(ctx context.Context, r *maven.Resolver, pom *maven.Project, location file.Location) *pkg.Package { 99 id := r.ResolveID(ctx, pom) 100 parent, err := r.ResolveParent(ctx, pom) 101 if err != nil { 102 // this is expected in many cases, there will be no network access and the maven resolver is unable to 103 // look up information, so we can continue with what little information we have 104 log.Tracef("unable to resolve parent due to: %v", err) 105 } 106 107 var javaPomParent *pkg.JavaPomParent 108 if parent != nil { // parent is returned in both cases: when it is resolved or synthesized from the pom.parent info 109 parentID := r.ResolveID(ctx, parent) 110 javaPomParent = &pkg.JavaPomParent{ 111 GroupID: parentID.GroupID, 112 ArtifactID: parentID.ArtifactID, 113 Version: parentID.Version, 114 } 115 } 116 117 pomLicenses, err := r.ResolveLicenses(ctx, pom) 118 if err != nil { 119 log.Tracef("error resolving licenses: %v", err) 120 } 121 pkgLicenses := toPkgLicenses(ctx, &location, pomLicenses) 122 123 m := pkg.JavaArchive{ 124 PomProject: &pkg.JavaPomProject{ 125 Parent: javaPomParent, 126 GroupID: id.GroupID, 127 ArtifactID: id.ArtifactID, 128 Version: id.Version, 129 Name: r.ResolveProperty(ctx, pom, pom.Name), 130 Description: r.ResolveProperty(ctx, pom, pom.Description), 131 URL: r.ResolveProperty(ctx, pom, pom.URL), 132 }, 133 } 134 135 p := &pkg.Package{ 136 Name: id.ArtifactID, 137 Version: id.Version, 138 Locations: file.NewLocationSet( 139 location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 140 ), 141 Licenses: pkg.NewLicenseSet(pkgLicenses...), 142 Language: pkg.Java, 143 Type: pkg.JavaPkg, 144 FoundBy: pomCatalogerName, 145 PURL: packageURL(id.ArtifactID, id.Version, m), 146 Metadata: m, 147 } 148 149 finalizePackage(p) 150 151 return p 152 } 153 154 func collectDependencies(ctx context.Context, r *maven.Resolver, resolved map[maven.ID]*pkg.Package, parentPkg *pkg.Package, pom *maven.Project, loc file.Location, includeTransitiveDependencies bool) ([]pkg.Package, []artifact.Relationship, error) { 155 var errs error 156 var pkgs []pkg.Package 157 var relationships []artifact.Relationship 158 159 pomID := r.ResolveID(ctx, pom) 160 for _, dep := range maven.DirectPomDependencies(pom) { 161 depID := r.ResolveDependencyID(ctx, pom, dep) 162 log.WithFields("pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Trace("adding maven pom dependency") 163 164 // we may have a reference to a package pointing to an existing pom on the filesystem, but we don't want to duplicate these entries 165 depPkg := resolved[depID] 166 if depPkg == nil { 167 p, err := newPackageFromDependency( 168 ctx, 169 r, 170 pom, 171 dep, 172 loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), 173 ) 174 if err != nil { 175 log.WithFields("error", err, "pomLocation", loc, "mavenID", pomID, "dependencyID", depID).Debugf("error adding dependency") 176 } 177 178 if p == nil { 179 // we don't have a valid package, just continue to the next dependency 180 continue 181 } 182 depPkg = p 183 resolved[depID] = depPkg 184 185 // only resolve transitive dependencies if we're not already looking these up for the specific package 186 if includeTransitiveDependencies && depID.Valid() { 187 depPom, err := r.FindPom(ctx, depID.GroupID, depID.ArtifactID, depID.Version) 188 if err != nil { 189 log.WithFields("mavenID", depID, "error", err).Debug("error finding pom") 190 } 191 if depPom != nil { 192 transitivePkgs, transitiveRelationships, transitiveErrs := collectDependencies(ctx, r, resolved, depPkg, depPom, loc, includeTransitiveDependencies) 193 pkgs = append(pkgs, transitivePkgs...) 194 relationships = append(relationships, transitiveRelationships...) 195 errs = unknown.Join(errs, transitiveErrs) 196 } 197 } 198 } 199 200 pkgs = append(pkgs, *depPkg) 201 if parentPkg != nil { 202 relationships = append(relationships, artifact.Relationship{ 203 From: *depPkg, 204 To: *parentPkg, 205 Type: artifact.DependencyOfRelationship, 206 }) 207 } 208 } 209 210 return pkgs, relationships, errs 211 } 212 213 func newPomProject(ctx context.Context, r *maven.Resolver, path string, pom *maven.Project) *pkg.JavaPomProject { 214 id := r.ResolveID(ctx, pom) 215 name := r.ResolveProperty(ctx, pom, pom.Name) 216 projectURL := r.ResolveProperty(ctx, pom, pom.URL) 217 218 log.WithFields("path", path, "artifactID", id.ArtifactID, "name", name, "projectURL", projectURL).Trace("parsing pom.xml") 219 return &pkg.JavaPomProject{ 220 Path: path, 221 Parent: pomParent(ctx, r, pom), 222 GroupID: id.GroupID, 223 ArtifactID: id.ArtifactID, 224 Version: id.Version, 225 Name: name, 226 Description: cleanDescription(r.ResolveProperty(ctx, pom, pom.Description)), 227 URL: projectURL, 228 } 229 } 230 231 func newPackageFromDependency(ctx context.Context, r *maven.Resolver, pom *maven.Project, dep maven.Dependency, locations ...file.Location) (*pkg.Package, error) { 232 id := r.ResolveDependencyID(ctx, pom, dep) 233 234 var err error 235 var pkgLicenses []pkg.License 236 dependencyPom, depErr := r.FindPom(ctx, id.GroupID, id.ArtifactID, id.Version) 237 if depErr != nil { 238 err = errors.Join(err, depErr) 239 } 240 241 var pomProject *pkg.JavaPomProject 242 if dependencyPom != nil { 243 depLicenses, _ := r.ResolveLicenses(ctx, dependencyPom) 244 pkgLicenses = append(pkgLicenses, toPkgLicenses(ctx, nil, depLicenses)...) 245 pomProject = &pkg.JavaPomProject{ 246 Parent: pomParent(ctx, r, dependencyPom), 247 GroupID: id.GroupID, 248 ArtifactID: id.ArtifactID, 249 Version: id.Version, 250 Name: r.ResolveProperty(ctx, pom, pom.Name), 251 Description: r.ResolveProperty(ctx, pom, pom.Description), 252 URL: r.ResolveProperty(ctx, pom, pom.URL), 253 } 254 } 255 256 m := pkg.JavaArchive{ 257 PomProperties: &pkg.JavaPomProperties{ 258 GroupID: id.GroupID, 259 ArtifactID: id.ArtifactID, 260 Scope: r.ResolveProperty(ctx, pom, dep.Scope), 261 }, 262 PomProject: pomProject, 263 } 264 265 p := &pkg.Package{ 266 Name: id.ArtifactID, 267 Version: id.Version, 268 Locations: file.NewLocationSet(locations...), 269 Licenses: pkg.NewLicenseSet(pkgLicenses...), 270 PURL: packageURL(id.ArtifactID, id.Version, m), 271 Language: pkg.Java, 272 Type: pkg.JavaPkg, // TODO: should we differentiate between packages from jar/war/zip versus packages from a pom.xml that were not installed yet? 273 FoundBy: pomCatalogerName, 274 Metadata: m, 275 } 276 277 finalizePackage(p) 278 279 return p, err 280 } 281 282 func pomParent(ctx context.Context, r *maven.Resolver, pom *maven.Project) *pkg.JavaPomParent { 283 if pom == nil || pom.Parent == nil { 284 return nil 285 } 286 287 groupID := r.ResolveProperty(ctx, pom, pom.Parent.GroupID) 288 artifactID := r.ResolveProperty(ctx, pom, pom.Parent.ArtifactID) 289 version := r.ResolveProperty(ctx, pom, pom.Parent.Version) 290 291 if groupID == "" && artifactID == "" && version == "" { 292 return nil 293 } 294 295 return &pkg.JavaPomParent{ 296 GroupID: groupID, 297 ArtifactID: artifactID, 298 Version: version, 299 } 300 } 301 302 func cleanDescription(original string) (cleaned string) { 303 descriptionLines := strings.Split(original, "\n") 304 for _, line := range descriptionLines { 305 line = strings.TrimSpace(line) 306 if len(line) == 0 { 307 continue 308 } 309 cleaned += line + " " 310 } 311 return strings.TrimSpace(cleaned) 312 }